ICU-7168 Implementation of ListFormatter, port from Java

X-SVN-Rev: 32247
This commit is contained in:
Umesh Nair 2012-08-27 20:14:25 +00:00
parent 8131a32512
commit 484c465ddf
8 changed files with 707 additions and 5 deletions

View file

@ -104,7 +104,7 @@ rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.
serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
uidna.o usprep.o uts46.o punycode.o \
util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o mutex.o dtintrv.o ucnvsel.o propsvec.o \
ulist.o uloc_tag.o icudataver.o icuplug.o
ulist.o uloc_tag.o icudataver.o icuplug.o listformatter.o
## Header files to install
HEADERS = $(srcdir)/unicode/*.h

View file

@ -0,0 +1,329 @@
/*
*******************************************************************************
*
* Copyright (C) 2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: listformatter.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2012aug27
* created by: Umesh P. Nair
*/
#include "unicode/listformatter.h"
#include "mutex.h"
#include "hash.h"
#include "cstring.h"
#include "ulocimp.h"
#include "charstr.h"
#include "ucln_cmn.h"
U_NAMESPACE_BEGIN
struct ListFormatData : public UMemory {
UnicodeString twoPattern;
UnicodeString startPattern;
UnicodeString middlePattern;
UnicodeString endPattern;
};
static Hashtable* listPatternHash = NULL;
static UMTX listFormatterMutex = NULL;
static UChar FIRST_PARAMETER[] = { 0x7b, 0x30, 0x7d }; // "{0}"
static UChar SECOND_PARAMETER[] = { 0x7b, 0x31, 0x7d }; // "{0}"
U_CDECL_BEGIN
static UBool U_CALLCONV uprv_listformatter_cleanup() {
delete listPatternHash;
listPatternHash = NULL;
return TRUE;
}
static void U_CALLCONV
uprv_deleteListFormatData(void *obj) {
delete static_cast<ListFormatData *>(obj);
}
U_CDECL_END
void ListFormatter::initializeHash(UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return;
}
listPatternHash = new Hashtable();
if (listPatternHash == NULL) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
listPatternHash->setValueDeleter(uprv_deleteListFormatData);
ucln_common_registerCleanup(UCLN_COMMON_LIST_FORMATTER, uprv_listformatter_cleanup);
addDataToHash("af", "{0} en {1}", "{0}, {1}", "{0}, {1}", "{0} en {1}", errorCode);
addDataToHash("am", "{0} \\u12a5\\u1293 {1}", "{0}, {1}", "{0}, {1}", "{0}, \\u12a5\\u1293 {1}", errorCode);
addDataToHash("ar", "{0} \\u0648 {1}", "{0}\\u060c {1}", "{0}\\u060c {1}", "{0}\\u060c \\u0648 {1}", errorCode);
addDataToHash("bg", "{0} \\u0438 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0438 {1}", errorCode);
addDataToHash("bn", "{0} \\u098f\\u09ac\\u0982 {1}", "{0}, {1}", "{0}, {1}", "{0}, \\u098f\\u09ac\\u0982 {1}", errorCode);
addDataToHash("bs", "{0} i {1}", "{0}, {1}", "{0}, {1}", "{0} i {1}", errorCode);
addDataToHash("ca", "{0} i {1}", "{0}, {1}", "{0}, {1}", "{0} i {1}", errorCode);
addDataToHash("cs", "{0} a {1}", "{0}, {1}", "{0}, {1}", "{0} a {1}", errorCode);
addDataToHash("da", "{0} og {1}", "{0}, {1}", "{0}, {1}", "{0} og {1}", errorCode);
addDataToHash("de", "{0} und {1}", "{0}, {1}", "{0}, {1}", "{0} und {1}", errorCode);
addDataToHash("ee", "{0} kple {1}", "{0}, {1}", "{0}, {1}", "{0}, kple {1}", errorCode);
addDataToHash("el", "{0} \\u03ba\\u03b1\\u03b9 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u03ba\\u03b1\\u03b9 {1}", errorCode);
addDataToHash("en", "{0} and {1}", "{0}, {1}", "{0}, {1}", "{0}, and {1}", errorCode);
addDataToHash("es", "{0} y {1}", "{0}, {1}", "{0}, {1}", "{0} y {1}", errorCode);
addDataToHash("et", "{0} ja {1}", "{0}, {1}", "{0}, {1}", "{0} ja {1}", errorCode);
addDataToHash("eu", "{0} eta {1}", "{0}, {1}", "{0}, {1}", "{0} eta {1}", errorCode);
addDataToHash("fa", "{0} \\u0648 {1}", "{0}\\u060c\\u200f {1}", "{0}\\u060c\\u200f {1}", "{0}\\u060c \\u0648 {1}", errorCode);
addDataToHash("fi", "{0} ja {1}", "{0}, {1}", "{0}, {1}", "{0} ja {1}", errorCode);
addDataToHash("fil", "{0} at {1}", "{0}, {1}", "{0}, {1}", "{0} at {1}", errorCode);
addDataToHash("fo", "{0} og {1}", "{0}, {1}", "{0}, {1}", "{0} og {1}", errorCode);
addDataToHash("fr", "{0} et {1}", "{0}, {1}", "{0}, {1}", "{0} et {1}", errorCode);
addDataToHash("fur", "{0} e {1}", "{0}, {1}", "{0}, {1}", "{0} e {1}", errorCode);
addDataToHash("gd", "{0} agus {1}", "{0}, {1}", "{0}, {1}", "{0}, agus {1}", errorCode);
addDataToHash("gl", "{0} e {1}", "{0}, {1}", "{0}, {1}", "{0} e {1}", errorCode);
addDataToHash("gsw", "{0} und {1}", "{0}, {1}", "{0}, {1}", "{0} und {1}", errorCode);
addDataToHash("gu", "{0} \\u0a85\\u0aa8\\u0ac7 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0a85\\u0aa8\\u0ac7 {1}", errorCode);
addDataToHash("he", "{0} \\u05d5-{1}", "{0}, {1}", "{0}, {1}", "{0} \\u05d5-{1}", errorCode);
addDataToHash("hi", "{0} \\u0914\\u0930 {1}", "{0}, {1}", "{0}, {1}", "{0}, \\u0914\\u0930 {1}", errorCode);
addDataToHash("hr", "{0} i {1}", "{0}, {1}", "{0}, {1}", "{0} i {1}", errorCode);
addDataToHash("hu", "{0} \\u00e9s {1}", "{0}, {1}", "{0}, {1}", "{0} \\u00e9s {1}", errorCode);
addDataToHash("id", "{0} dan {1}", "{0}, {1}", "{0}, {1}", "{0}, dan {1}", errorCode);
addDataToHash("is", "{0} og {1}", "{0}, {1}", "{0}, {1}", "{0} og {1}", errorCode);
addDataToHash("it", "{0} e {1}", "{0}, {1}", "{0}, {1}", "{0}, e {1}", errorCode);
addDataToHash("ja", "{0}\\u3001{1}", "{0}\\u3001{1}", "{0}\\u3001{1}", "{0}\\u3001{1}", errorCode);
addDataToHash("ka", "{0} \\u10d3\\u10d0 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u10d3\\u10d0 {1}", errorCode);
addDataToHash("kea", "{0} y {1}", "{0}, {1}", "{0}, {1}", "{0} y {1}", errorCode);
addDataToHash("kl", "{0} aamma {1}", "{0} aamma {1}", "{0}, {1}", "{0}, {1}", errorCode);
addDataToHash("kn", "{0} \\u0cae\\u0ca4\\u0ccd\\u0ca4\\u0cc1 {1}", "{0}, {1}", "{0}, {1}",
"{0}, \\u0cae\\u0ca4\\u0ccd\\u0ca4\\u0cc1 {1}", errorCode);
addDataToHash("ko", "{0} \\ubc0f {1}", "{0}, {1}", "{0}, {1}", "{0} \\ubc0f {1}", errorCode);
addDataToHash("ksh", "{0} un {1}", "{0}, {1}", "{0}, {1}", "{0} un {1}", errorCode);
addDataToHash("lt", "{0} ir {1}", "{0}, {1}", "{0}, {1}", "{0} ir {1}", errorCode);
addDataToHash("lv", "{0} un {1}", "{0}, {1}", "{0}, {1}", "{0} un {1}", errorCode);
addDataToHash("ml", "{0} \\u0d15\\u0d42\\u0d1f\\u0d3e\\u0d24\\u0d46 {1}", "{0}, {1}", "{0}, {1}",
"{0}, {1} \\u0d0e\\u0d28\\u0d4d\\u0d28\\u0d3f\\u0d35", errorCode);
addDataToHash("mr", "{0} \\u0906\\u0923\\u093f {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0906\\u0923\\u093f {1}", errorCode);
addDataToHash("ms", "{0} dan {1}", "{0}, {1}", "{0}, {1}", "{0}, dan {1}", errorCode);
addDataToHash("nb", "{0} og {1}", "{0}, {1}", "{0}, {1}", "{0} og {1}", errorCode);
addDataToHash("nl", "{0} en {1}", "{0}, {1}", "{0}, {1}", "{0} en {1}", errorCode);
addDataToHash("nn", "{0} og {1}", "{0}, {1}", "{0}, {1}", "{0} og {1}", errorCode);
addDataToHash("pl", "{0} i {1}", "{0}; {1}", "{0}; {1}", "{0} i {1}", errorCode);
addDataToHash("pt", "{0} e {1}", "{0}, {1}", "{0}, {1}", "{0} e {1}", errorCode);
addDataToHash("ro", "{0} \\u015fi {1}", "{0}, {1}", "{0}, {1}", "{0} \\u015fi {1}", errorCode);
addDataToHash("", "{0}, {1}", "{0}, {1}", "{0}, {1}", "{0}, {1}", errorCode); // root
addDataToHash("ru", "{0} \\u0438 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0438 {1}", errorCode);
addDataToHash("se", "{0} ja {1}", "{0}, {1}", "{0}, {1}", "{0} ja {1}", errorCode);
addDataToHash("sk", "{0} a {1}", "{0}, {1}", "{0}, {1}", "{0} a {1}", errorCode);
addDataToHash("sl", "{0} in {1}", "{0}, {1}", "{0}, {1}", "{0} in {1}", errorCode);
addDataToHash("sr", "{0} \\u0438 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0438 {1}", errorCode);
addDataToHash("sr_Cyrl", "{0} \\u0438 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0438 {1}", errorCode);
addDataToHash("sr_Latn", "{0} i {1}", "{0}, {1}", "{0}, {1}", "{0} i {1}", errorCode);
addDataToHash("sv", "{0} och {1}", "{0}, {1}", "{0}, {1}", "{0} och {1}", errorCode);
addDataToHash("sw", "{0} na {1}", "{0}, {1}", "{0}, {1}", "{0}, na {1}", errorCode);
addDataToHash("ta", "{0} \\u0bae\\u0bb1\\u0bcd\\u0bb1\\u0bc1\\u0bae\\u0bcd {1}", "{0}, {1}", "{0}, {1}",
"{0} \\u0bae\\u0bb1\\u0bcd\\u0bb1\\u0bc1\\u0bae\\u0bcd {1}", errorCode);
addDataToHash("te", "{0} \\u0c2e\\u0c30\\u0c3f\\u0c2f\\u0c41 {1}", "{0}, {1}", "{0}, {1}",
"{0} \\u0c2e\\u0c30\\u0c3f\\u0c2f\\u0c41 {1}", errorCode);
addDataToHash("th", "{0}\\u0e41\\u0e25\\u0e30{1}", "{0} {1}", "{0} {1}", "{0} \\u0e41\\u0e25\\u0e30{1}", errorCode);
addDataToHash("tr", "{0} ve {1}", "{0}, {1}", "{0}, {1}", "{0} ve {1}", errorCode);
addDataToHash("uk", "{0} \\u0442\\u0430 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0442\\u0430 {1}", errorCode);
addDataToHash("ur", "{0} \\u0627\\u0648\\u0631 {1}", "{0}\\u060c {1}", "{0}\\u060c {1}",
"{0}\\u060c \\u0627\\u0648\\u0631 {1}", errorCode);
addDataToHash("vi", "{0} v\\u00e0 {1}", "{0}, {1}", "{0}, {1}", "{0} v\\u00e0 {1}", errorCode);
addDataToHash("wae", "{0} und {1}", "{0}, {1}", "{0}, {1}", "{0} und {1}", errorCode);
addDataToHash("zh", "{0}\\u548c{1}", "{0}\\u3001{1}", "{0}\\u3001{1}", "{0}\\u548c{1}", errorCode);
addDataToHash("zu", "I-{0} ne-{1}", "{0}, {1}", "{0}, {1}", "{0}, no-{1}", errorCode);
}
void ListFormatter::addDataToHash(
const char* locale,
const char* two,
const char* start,
const char* middle,
const char* end,
UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return;
}
UnicodeString key(locale, -1, US_INV);
ListFormatData* value = new ListFormatData();
if (value == NULL) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
value->twoPattern = UnicodeString(two, -1, US_INV).unescape();
value->startPattern = UnicodeString(start, -1, US_INV).unescape();
value->middlePattern = UnicodeString(middle, -1, US_INV).unescape();
value->endPattern = UnicodeString(end, -1, US_INV).unescape();
listPatternHash->put(key, value, errorCode);
}
const ListFormatData* ListFormatter::getListFormatData(
const Locale& locale, UErrorCode& errorCode) {
{
Mutex m(&listFormatterMutex);
if (listPatternHash == NULL) {
initializeHash(errorCode);
if (U_FAILURE(errorCode)) {
return NULL;
}
}
}
UnicodeString key(locale.getName(), -1, US_INV);
return static_cast<const ListFormatData*>(listPatternHash->get(key));
}
ListFormatter* ListFormatter::createInstance(UErrorCode& errorCode) {
Locale locale; // The default locale.
return createInstance(locale, errorCode);
}
ListFormatter* ListFormatter::createInstance(const Locale& locale, UErrorCode& errorCode) {
Locale tempLocale = locale;
for (;;) {
const ListFormatData* listFormatData = getListFormatData(tempLocale, errorCode);
if (U_FAILURE(errorCode)) {
return NULL;
}
if (listFormatData != NULL) {
ListFormatter* p = new ListFormatter(tempLocale, listFormatData);
if (p == NULL) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
return p;
}
errorCode = U_ZERO_ERROR;
Locale correctLocale;
getFallbackLocale(tempLocale, correctLocale, errorCode);
if (U_FAILURE(errorCode)) {
return NULL;
}
if (correctLocale.isBogus()) {
return createInstance(Locale::getRoot(), errorCode);
}
tempLocale = correctLocale;
}
}
ListFormatter::ListFormatter(const Locale& listFormatterLocale, const ListFormatData* listFormatterData)
: locale(listFormatterLocale), data(listFormatterData) {
}
ListFormatter::~ListFormatter() {}
void ListFormatter::getFallbackLocale(const Locale& in, Locale& out, UErrorCode& errorCode) {
if (uprv_strcmp(in.getName(), "zh_TW") == 0) {
out = Locale::getTraditionalChinese();
} else {
const char* localeString = in.getName();
const char* extStart = locale_getKeywordsStart(localeString);
if (extStart == NULL) {
extStart = uprv_strchr(localeString, 0);
}
const char* last = extStart;
// TODO: Check whether uloc_getParent() will work here.
while (last > localeString && *(last - 1) != '_') {
--last;
}
// Truncate empty segment.
while (last > localeString) {
if (*(last-1) != '_') {
break;
}
--last;
}
size_t localePortionLen = last - localeString;
CharString fullLocale;
fullLocale.append(localeString, localePortionLen, errorCode).append(extStart, errorCode);
if (U_FAILURE(errorCode)) {
return;
}
out = Locale(fullLocale.data());
}
}
UnicodeString& ListFormatter::format(const UnicodeString items[], int32_t nItems,
UnicodeString& appendTo, UErrorCode& errorCode) const {
if (U_FAILURE(errorCode)) {
return appendTo;
}
if (nItems > 0) {
UnicodeString newString = items[0];
if (nItems == 2) {
addNewString(data->twoPattern, newString, items[1], errorCode);
} else if (nItems > 2) {
addNewString(data->startPattern, newString, items[1], errorCode);
int i;
for (i = 2; i < nItems - 1; ++i) {
addNewString(data->middlePattern, newString, items[i], errorCode);
}
addNewString(data->endPattern, newString, items[nItems - 1], errorCode);
}
if (U_SUCCESS(errorCode)) {
appendTo += newString;
}
}
return appendTo;
}
/**
* Joins originalString and nextString using the pattern pat and puts the result in
* originalString.
*/
void ListFormatter::addNewString(const UnicodeString& pat, UnicodeString& originalString,
const UnicodeString& nextString, UErrorCode& errorCode) const {
if (U_FAILURE(errorCode)) {
return;
}
int32_t p0Offset = pat.indexOf(FIRST_PARAMETER, 3, 0);
if (p0Offset < 0) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
int32_t p1Offset = pat.indexOf(SECOND_PARAMETER, 3, 0);
if (p1Offset < 0) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
int32_t i, j;
const UnicodeString* firstString;
const UnicodeString* secondString;
if (p0Offset < p1Offset) {
i = p0Offset;
j = p1Offset;
firstString = &originalString;
secondString = &nextString;
} else {
i = p1Offset;
j = p0Offset;
firstString = &nextString;
secondString = &originalString;
}
UnicodeString result = UnicodeString(pat, 0, i) + *firstString;
result += UnicodeString(pat, i+3, j-i-3);
result += *secondString;
result += UnicodeString(pat, j+3);
originalString = result;
}
UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(ListFormatter)
U_NAMESPACE_END

View file

@ -1,7 +1,7 @@
/*
******************************************************************************
* *
* Copyright (C) 2001-2010, International Business Machines *
* Copyright (C) 2001-2012, International Business Machines *
* Corporation and others. All Rights Reserved. *
* *
******************************************************************************
@ -49,6 +49,7 @@ typedef enum ECleanupCommonType {
UCLN_COMMON_UCNV_IO,
UCLN_COMMON_UDATA,
UCLN_COMMON_PUTIL,
UCLN_COMMON_LIST_FORMATTER,
UCLN_COMMON_COUNT /* This must be last */
} ECleanupCommonType;

View file

@ -0,0 +1,122 @@
/*
*******************************************************************************
*
* Copyright (C) 2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: listformatter.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 20120426
* created by: Umesh P. Nair
*/
#ifndef __LISTFORMATTER_H__
#define __LISTFORMATTER_H__
#include "unicode/unistr.h"
#include "unicode/locid.h"
U_NAMESPACE_BEGIN
/** @internal */
class Hashtable;
/** @internal */
class ListFormatData;
/**
* \file
* \brief C++ API: API for formatting a list.
*/
/**
* An immutable class for formatting a list, using data from CLDR (or supplied
* separately).
*
* Example: Input data ["Alice", "Bob", "Charlie", "Delta"] will be formatted
* as "Alice, Bob, Charlie and Delta" in English.
*
* The ListFormatter class is not intended for public subclassing.
*/
class U_COMMON_API ListFormatter : public UObject{
public:
/**
* Creates a ListFormatter appropriate for the default locale.
*
* @param errorCode ICU error code, set if no data available for default locale.
* @return Pointer to a ListFormatter object for the default locale,
* created from internal data derived from CLDR data.
* @draft ICU 50
*/
static ListFormatter* createInstance(UErrorCode& errorCode);
/**
* Creates a ListFormatter appropriate for a locale.
*
* @param locale The locale.
* @param errorCode ICU error code, set if no data available for the given locale.
* @return A ListFormatter object created from internal data derived from
* CLDR data.
* @draft ICU 50
*/
static ListFormatter* createInstance(const Locale& locale, UErrorCode& errorCode);
/**
* Destructor.
*
* @draft ICU 50
*/
virtual ~ListFormatter();
/**
* Formats a list of strings.
*
* @param items An array of strings to be combined and formatted.
* @param n_items Length of the array items.
* @param appendTo The string to which the result should be appended to.
* @param errorCode ICU error code, set if there is an error.
* @return Formatted string combining the elements of items, appended to appendTo.
* @draft ICU 50
*/
UnicodeString& format(const UnicodeString items[], int32_t n_items,
UnicodeString& appendTo, UErrorCode& errorCode) const;
/**
* Gets the fallback locale for a given locale.
* TODO: Consider moving this to the Locale class.
* @param in The input locale.
* @param out The output locale after fallback.
* @internal For testing.
*/
static void getFallbackLocale(const Locale& in, Locale& out, UErrorCode& errorCode);
private:
static void initializeHash(UErrorCode& errorCode);
static void addDataToHash(const char* locale, const char* two, const char* start, const char* middle, const char* end, UErrorCode& errorCode);
static const ListFormatData* getListFormatData(const Locale& locale, UErrorCode& errorCode);
ListFormatter();
ListFormatter(const Locale& listFormatterLocale, const ListFormatData* listFormatterData);
ListFormatter(const ListFormatter&);
ListFormatter& operator = (const ListFormatter&);
void addNewString(const UnicodeString& pattern, UnicodeString& originalString,
const UnicodeString& newString, UErrorCode& errorCode) const;
virtual UClassID getDynamicClassID() const;
Locale locale;
const ListFormatData* data;
};
U_NAMESPACE_END
#endif

View file

@ -55,7 +55,7 @@ itrbnf.o itrbnfrt.o itrbnfp.o ucaconf.o icusvtst.o \
uobjtest.o idnaref.o idnaconf.o nptrans.o punyref.o testidn.o testidna.o uts46test.o \
incaltst.o calcasts.o v32test.o uvectest.o textfile.o tokiter.o utxttest.o \
windttst.o winnmtst.o winutil.o csdetest.o tzrulets.o tzoffloc.o tzfmttst.o ssearch.o dtifmtts.o \
tufmtts.o itspoof.o simplethread.o bidiconf.o locnmtst.o dcfmtest.o alphaindextst.o
tufmtts.o itspoof.o simplethread.o bidiconf.o locnmtst.o dcfmtest.o alphaindextst.o listformattertest.o
DEPS = $(OBJECTS:.o=.d)

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2010, International Business Machines
* COPYRIGHT:
* Copyright (c) 1997-2012, International Business Machines
* Corporation and others. All Rights Reserved.
********************************************************************/
@ -55,6 +55,7 @@
#include "tufmtts.h" // TimeUnitTest
#include "locnmtst.h" // LocaleDisplayNamesTest
#include "dcfmtest.h" // DecimalFormatTest
#include "listformattertest.h" // ListFormatterTest
#define TESTCLASS(id, TestClass) \
case id: \
@ -131,6 +132,7 @@ void IntlTestFormat::runIndexedTest( int32_t index, UBool exec, const char* &nam
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
TESTCLASS(41,DecimalFormatTest);
#endif
TESTCLASS(42,ListFormatterTest);
default: name = ""; break; //needed to end loop
}

View file

@ -0,0 +1,190 @@
/*
*******************************************************************************
*
* Copyright (C) 2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: listformattertest.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2012aug27
* created by: Umesh P. Nair
*/
#include "listformattertest.h"
#include <string.h>
ListFormatterTest::ListFormatterTest() :
prefix("Prefix: ", -1, US_INV),
one("Alice", -1, US_INV), two("Bob", -1, US_INV),
three("Charlie", -1, US_INV), four("Delta", -1, US_INV) {
}
void ListFormatterTest::CheckFormatting(const ListFormatter* formatter, UnicodeString data[], int32_t dataSize,
const UnicodeString& expected_result) {
UnicodeString actualResult(prefix);
UErrorCode errorCode = U_ZERO_ERROR;
formatter->format(data, dataSize, actualResult, errorCode);
UnicodeString expectedStringWithPrefix = prefix + expected_result;
if (expectedStringWithPrefix != actualResult) {
errln(UnicodeString("Expected: |") + expectedStringWithPrefix + "|, Actual: |" + actualResult + "|");
}
}
void ListFormatterTest::CheckFourCases(const char* locale_string, UnicodeString one, UnicodeString two,
UnicodeString three, UnicodeString four, UnicodeString results[4]) {
UErrorCode errorCode = U_ZERO_ERROR;
ListFormatter* formatter = ListFormatter::createInstance(Locale(locale_string), errorCode);
if (formatter == NULL || U_FAILURE(errorCode)) {
errln("Allocation problem\n");
return;
}
UnicodeString input1[] = {one};
CheckFormatting(formatter, input1, 1, results[0]);
UnicodeString input2[] = {one, two};
CheckFormatting(formatter, input2, 2, results[1]);
UnicodeString input3[] = {one, two, three};
CheckFormatting(formatter, input3, 3, results[2]);
UnicodeString input4[] = {one, two, three, four};
CheckFormatting(formatter, input4, 4, results[3]);
}
void ListFormatterTest::TestLocaleFallback() {
const char* testData[][4] = {
{"en_US", "en", "", ""}, // ULocale.getFallback("") should return ""
{"EN_us_Var", "en_US", "en", ""}, // Case is always normalized
{"de_DE@collation=phonebook", "de@collation=phonebook", "@collation=phonebook", "@collation=phonebook"}, // Keyword is preserved
{"en__POSIX", "en", "", ""}, // Trailing empty segment should be truncated
{"_US_POSIX", "_US", "", ""}, // Same as above
{"root", "", "", ""}, // No canonicalization
};
for (int i = 0; i < 6; ++i) {
for(int j = 1; j < 4; ++j) {
Locale in(testData[i][j-1]);
Locale out;
UErrorCode errorCode;
ListFormatter::getFallbackLocale(in, out, errorCode);
if (U_FAILURE(errorCode)) {
errln("Error in getLocaleFallback: %s", u_errorName(errorCode));
}
if (::strcmp(testData[i][j], out.getName())) {
errln("Expected: |%s|, Actual: |%s|\n", testData[i][j], out.getName());
}
}
}
}
void ListFormatterTest::TestRoot() {
UnicodeString results[4] = {
one,
one + ", " + two,
one + ", " + two + ", " + three,
one + ", " + two + ", " + three + ", " + four
};
CheckFourCases("", one, two, three, four, results);
}
// Bogus locale should fallback to root.
void ListFormatterTest::TestBogus() {
UnicodeString results[4] = {
one,
one + ", " + two,
one + ", " + two + ", " + three,
one + ", " + two + ", " + three + ", " + four
};
CheckFourCases("ex_PY", one, two, three, four, results);
}
// Formatting in English.
// "and" is used before the last element, and all elements up to (and including) the penultimate are followed by a comma.
void ListFormatterTest::TestEnglish() {
UnicodeString results[4] = {
one,
one + " and " + two,
one + ", " + two + ", and " + three,
one + ", " + two + ", " + three + ", and " + four
};
CheckFourCases("en", one, two, three, four, results);
}
void ListFormatterTest::TestEnglishUS() {
UnicodeString results[4] = {
one,
one + " and " + two,
one + ", " + two + ", and " + three,
one + ", " + two + ", " + three + ", and " + four
};
CheckFourCases("en_US", one, two, three, four, results);
}
// Formatting in Russian.
// "\\u0438" is used before the last element, and all elements up to (but not including) the penultimate are followed by a comma.
void ListFormatterTest::TestRussian() {
UnicodeString and_string = UnicodeString(" \\u0438 ", -1, US_INV).unescape();
UnicodeString results[4] = {
one,
one + and_string + two,
one + ", " + two + and_string + three,
one + ", " + two + ", " + three + and_string + four
};
CheckFourCases("ru", one, two, three, four, results);
}
// Formatting in Malayalam.
// For two elements, "\\u0d15\\u0d42\\u0d1f\\u0d3e\\u0d24\\u0d46" is inserted in between.
// For more than two elements, comma is inserted between all elements up to (and including) the penultimate,
// and the word \\u0d0e\\u0d28\\u0d4d\\u0d28\\u0d3f\\u0d35 is inserted in the end.
void ListFormatterTest::TestMalayalam() {
UnicodeString pair_string = UnicodeString(" \\u0d15\\u0d42\\u0d1f\\u0d3e\\u0d24\\u0d46 ", -1, US_INV).unescape();
UnicodeString total_string = UnicodeString(" \\u0d0e\\u0d28\\u0d4d\\u0d28\\u0d3f\\u0d35", -1, US_INV).unescape();
UnicodeString results[4] = {
one,
one + pair_string + two,
one + ", " + two + ", " + three + total_string,
one + ", " + two + ", " + three + ", " + four + total_string
};
CheckFourCases("ml", one, two, three, four, results);
}
// Formatting in Zulu.
// "and" is used before the last element, and all elements up to (and including) the penultimate are followed by a comma.
void ListFormatterTest::TestZulu() {
UnicodeString results[4] = {
one,
"I-" + one + " ne-" + two,
one + ", " + two + ", no-" + three,
one + ", " + two + ", " + three + ", no-" + four
};
CheckFourCases("zu", one, two, three, four, results);
}
void ListFormatterTest::runIndexedTest(int32_t index, UBool exec,
const char* &name, char* /*par */) {
switch(index) {
case 0: name = "TestRoot"; if (exec) TestRoot(); break;
case 1: name = "TestBogus"; if (exec) TestBogus(); break;
case 2: name = "TestEnglish"; if (exec) TestEnglish(); break;
case 3: name = "TestEnglishUS"; if (exec) TestEnglishUS(); break;
case 4: name = "TestRussian"; if (exec) TestRussian(); break;
case 5: name = "TestMalayalam"; if (exec) TestMalayalam(); break;
case 6: name = "TestZulu"; if (exec) TestZulu(); break;
case 7: name = "TestLocaleFallback"; if (exec) TestLocaleFallback(); break;
default: name = ""; break;
}
}

View file

@ -0,0 +1,58 @@
/*
*******************************************************************************
*
* Copyright (C) 2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: listformattertest.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2012aug27
* created by: Umesh P. Nair
*/
#ifndef __LISTFORMATTERTEST_H__
#define __LISTFORMATTERTEST_H__
#include "unicode/listformatter.h"
#include "intltest.h"
class ListFormatterTest : public IntlTest {
public:
ListFormatterTest();
virtual ~ListFormatterTest() {}
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0);
void TestLocaleFallback();
void TestRoot();
void TestBogus();
void TestEnglish();
void TestEnglishUS();
void TestRussian();
void TestMalayalam();
void TestZulu();
private:
void CheckFormatting(const ListFormatter* formatter, UnicodeString data[], int32_t data_size, const UnicodeString& expected_result);
void CheckFourCases(
const char* locale_string,
UnicodeString one,
UnicodeString two,
UnicodeString three,
UnicodeString four,
UnicodeString results[4]);
private:
// Reused test data.
const UnicodeString prefix;
const UnicodeString one;
const UnicodeString two;
const UnicodeString three;
const UnicodeString four;
};
#endif