diff --git a/icu4c/source/common/Makefile.in b/icu4c/source/common/Makefile.in index c3f82cc192f..f8efcf9d094 100644 --- a/icu4c/source/common/Makefile.in +++ b/icu4c/source/common/Makefile.in @@ -104,7 +104,7 @@ rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb. serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \ uidna.o usprep.o uts46.o punycode.o \ util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o mutex.o dtintrv.o ucnvsel.o propsvec.o \ -ulist.o uloc_tag.o icudataver.o icuplug.o +ulist.o uloc_tag.o icudataver.o icuplug.o listformatter.o ## Header files to install HEADERS = $(srcdir)/unicode/*.h diff --git a/icu4c/source/common/listformatter.cpp b/icu4c/source/common/listformatter.cpp new file mode 100644 index 00000000000..1daf7b18391 --- /dev/null +++ b/icu4c/source/common/listformatter.cpp @@ -0,0 +1,329 @@ +/* +******************************************************************************* +* +* Copyright (C) 2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: listformatter.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2012aug27 +* created by: Umesh P. Nair +*/ + +#include "unicode/listformatter.h" +#include "mutex.h" +#include "hash.h" +#include "cstring.h" +#include "ulocimp.h" +#include "charstr.h" +#include "ucln_cmn.h" + +U_NAMESPACE_BEGIN + +struct ListFormatData : public UMemory { + UnicodeString twoPattern; + UnicodeString startPattern; + UnicodeString middlePattern; + UnicodeString endPattern; +}; + +static Hashtable* listPatternHash = NULL; +static UMTX listFormatterMutex = NULL; +static UChar FIRST_PARAMETER[] = { 0x7b, 0x30, 0x7d }; // "{0}" +static UChar SECOND_PARAMETER[] = { 0x7b, 0x31, 0x7d }; // "{0}" + +U_CDECL_BEGIN +static UBool U_CALLCONV uprv_listformatter_cleanup() { + delete listPatternHash; + listPatternHash = NULL; + return TRUE; +} + +static void U_CALLCONV +uprv_deleteListFormatData(void *obj) { + delete static_cast(obj); +} + +U_CDECL_END + +void ListFormatter::initializeHash(UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return; + } + + listPatternHash = new Hashtable(); + if (listPatternHash == NULL) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + + listPatternHash->setValueDeleter(uprv_deleteListFormatData); + ucln_common_registerCleanup(UCLN_COMMON_LIST_FORMATTER, uprv_listformatter_cleanup); + + addDataToHash("af", "{0} en {1}", "{0}, {1}", "{0}, {1}", "{0} en {1}", errorCode); + addDataToHash("am", "{0} \\u12a5\\u1293 {1}", "{0}, {1}", "{0}, {1}", "{0}, \\u12a5\\u1293 {1}", errorCode); + addDataToHash("ar", "{0} \\u0648 {1}", "{0}\\u060c {1}", "{0}\\u060c {1}", "{0}\\u060c \\u0648 {1}", errorCode); + addDataToHash("bg", "{0} \\u0438 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0438 {1}", errorCode); + addDataToHash("bn", "{0} \\u098f\\u09ac\\u0982 {1}", "{0}, {1}", "{0}, {1}", "{0}, \\u098f\\u09ac\\u0982 {1}", errorCode); + addDataToHash("bs", "{0} i {1}", "{0}, {1}", "{0}, {1}", "{0} i {1}", errorCode); + addDataToHash("ca", "{0} i {1}", "{0}, {1}", "{0}, {1}", "{0} i {1}", errorCode); + addDataToHash("cs", "{0} a {1}", "{0}, {1}", "{0}, {1}", "{0} a {1}", errorCode); + addDataToHash("da", "{0} og {1}", "{0}, {1}", "{0}, {1}", "{0} og {1}", errorCode); + addDataToHash("de", "{0} und {1}", "{0}, {1}", "{0}, {1}", "{0} und {1}", errorCode); + addDataToHash("ee", "{0} kple {1}", "{0}, {1}", "{0}, {1}", "{0}, kple {1}", errorCode); + addDataToHash("el", "{0} \\u03ba\\u03b1\\u03b9 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u03ba\\u03b1\\u03b9 {1}", errorCode); + addDataToHash("en", "{0} and {1}", "{0}, {1}", "{0}, {1}", "{0}, and {1}", errorCode); + addDataToHash("es", "{0} y {1}", "{0}, {1}", "{0}, {1}", "{0} y {1}", errorCode); + addDataToHash("et", "{0} ja {1}", "{0}, {1}", "{0}, {1}", "{0} ja {1}", errorCode); + addDataToHash("eu", "{0} eta {1}", "{0}, {1}", "{0}, {1}", "{0} eta {1}", errorCode); + addDataToHash("fa", "{0} \\u0648 {1}", "{0}\\u060c\\u200f {1}", "{0}\\u060c\\u200f {1}", "{0}\\u060c \\u0648 {1}", errorCode); + addDataToHash("fi", "{0} ja {1}", "{0}, {1}", "{0}, {1}", "{0} ja {1}", errorCode); + addDataToHash("fil", "{0} at {1}", "{0}, {1}", "{0}, {1}", "{0} at {1}", errorCode); + addDataToHash("fo", "{0} og {1}", "{0}, {1}", "{0}, {1}", "{0} og {1}", errorCode); + addDataToHash("fr", "{0} et {1}", "{0}, {1}", "{0}, {1}", "{0} et {1}", errorCode); + addDataToHash("fur", "{0} e {1}", "{0}, {1}", "{0}, {1}", "{0} e {1}", errorCode); + addDataToHash("gd", "{0} agus {1}", "{0}, {1}", "{0}, {1}", "{0}, agus {1}", errorCode); + addDataToHash("gl", "{0} e {1}", "{0}, {1}", "{0}, {1}", "{0} e {1}", errorCode); + addDataToHash("gsw", "{0} und {1}", "{0}, {1}", "{0}, {1}", "{0} und {1}", errorCode); + addDataToHash("gu", "{0} \\u0a85\\u0aa8\\u0ac7 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0a85\\u0aa8\\u0ac7 {1}", errorCode); + addDataToHash("he", "{0} \\u05d5-{1}", "{0}, {1}", "{0}, {1}", "{0} \\u05d5-{1}", errorCode); + addDataToHash("hi", "{0} \\u0914\\u0930 {1}", "{0}, {1}", "{0}, {1}", "{0}, \\u0914\\u0930 {1}", errorCode); + addDataToHash("hr", "{0} i {1}", "{0}, {1}", "{0}, {1}", "{0} i {1}", errorCode); + addDataToHash("hu", "{0} \\u00e9s {1}", "{0}, {1}", "{0}, {1}", "{0} \\u00e9s {1}", errorCode); + addDataToHash("id", "{0} dan {1}", "{0}, {1}", "{0}, {1}", "{0}, dan {1}", errorCode); + addDataToHash("is", "{0} og {1}", "{0}, {1}", "{0}, {1}", "{0} og {1}", errorCode); + addDataToHash("it", "{0} e {1}", "{0}, {1}", "{0}, {1}", "{0}, e {1}", errorCode); + addDataToHash("ja", "{0}\\u3001{1}", "{0}\\u3001{1}", "{0}\\u3001{1}", "{0}\\u3001{1}", errorCode); + addDataToHash("ka", "{0} \\u10d3\\u10d0 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u10d3\\u10d0 {1}", errorCode); + addDataToHash("kea", "{0} y {1}", "{0}, {1}", "{0}, {1}", "{0} y {1}", errorCode); + addDataToHash("kl", "{0} aamma {1}", "{0} aamma {1}", "{0}, {1}", "{0}, {1}", errorCode); + addDataToHash("kn", "{0} \\u0cae\\u0ca4\\u0ccd\\u0ca4\\u0cc1 {1}", "{0}, {1}", "{0}, {1}", + "{0}, \\u0cae\\u0ca4\\u0ccd\\u0ca4\\u0cc1 {1}", errorCode); + addDataToHash("ko", "{0} \\ubc0f {1}", "{0}, {1}", "{0}, {1}", "{0} \\ubc0f {1}", errorCode); + addDataToHash("ksh", "{0} un {1}", "{0}, {1}", "{0}, {1}", "{0} un {1}", errorCode); + addDataToHash("lt", "{0} ir {1}", "{0}, {1}", "{0}, {1}", "{0} ir {1}", errorCode); + addDataToHash("lv", "{0} un {1}", "{0}, {1}", "{0}, {1}", "{0} un {1}", errorCode); + addDataToHash("ml", "{0} \\u0d15\\u0d42\\u0d1f\\u0d3e\\u0d24\\u0d46 {1}", "{0}, {1}", "{0}, {1}", + "{0}, {1} \\u0d0e\\u0d28\\u0d4d\\u0d28\\u0d3f\\u0d35", errorCode); + addDataToHash("mr", "{0} \\u0906\\u0923\\u093f {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0906\\u0923\\u093f {1}", errorCode); + addDataToHash("ms", "{0} dan {1}", "{0}, {1}", "{0}, {1}", "{0}, dan {1}", errorCode); + addDataToHash("nb", "{0} og {1}", "{0}, {1}", "{0}, {1}", "{0} og {1}", errorCode); + addDataToHash("nl", "{0} en {1}", "{0}, {1}", "{0}, {1}", "{0} en {1}", errorCode); + addDataToHash("nn", "{0} og {1}", "{0}, {1}", "{0}, {1}", "{0} og {1}", errorCode); + addDataToHash("pl", "{0} i {1}", "{0}; {1}", "{0}; {1}", "{0} i {1}", errorCode); + addDataToHash("pt", "{0} e {1}", "{0}, {1}", "{0}, {1}", "{0} e {1}", errorCode); + addDataToHash("ro", "{0} \\u015fi {1}", "{0}, {1}", "{0}, {1}", "{0} \\u015fi {1}", errorCode); + addDataToHash("", "{0}, {1}", "{0}, {1}", "{0}, {1}", "{0}, {1}", errorCode); // root + addDataToHash("ru", "{0} \\u0438 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0438 {1}", errorCode); + addDataToHash("se", "{0} ja {1}", "{0}, {1}", "{0}, {1}", "{0} ja {1}", errorCode); + addDataToHash("sk", "{0} a {1}", "{0}, {1}", "{0}, {1}", "{0} a {1}", errorCode); + addDataToHash("sl", "{0} in {1}", "{0}, {1}", "{0}, {1}", "{0} in {1}", errorCode); + addDataToHash("sr", "{0} \\u0438 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0438 {1}", errorCode); + addDataToHash("sr_Cyrl", "{0} \\u0438 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0438 {1}", errorCode); + addDataToHash("sr_Latn", "{0} i {1}", "{0}, {1}", "{0}, {1}", "{0} i {1}", errorCode); + addDataToHash("sv", "{0} och {1}", "{0}, {1}", "{0}, {1}", "{0} och {1}", errorCode); + addDataToHash("sw", "{0} na {1}", "{0}, {1}", "{0}, {1}", "{0}, na {1}", errorCode); + addDataToHash("ta", "{0} \\u0bae\\u0bb1\\u0bcd\\u0bb1\\u0bc1\\u0bae\\u0bcd {1}", "{0}, {1}", "{0}, {1}", + "{0} \\u0bae\\u0bb1\\u0bcd\\u0bb1\\u0bc1\\u0bae\\u0bcd {1}", errorCode); + addDataToHash("te", "{0} \\u0c2e\\u0c30\\u0c3f\\u0c2f\\u0c41 {1}", "{0}, {1}", "{0}, {1}", + "{0} \\u0c2e\\u0c30\\u0c3f\\u0c2f\\u0c41 {1}", errorCode); + addDataToHash("th", "{0}\\u0e41\\u0e25\\u0e30{1}", "{0} {1}", "{0} {1}", "{0} \\u0e41\\u0e25\\u0e30{1}", errorCode); + addDataToHash("tr", "{0} ve {1}", "{0}, {1}", "{0}, {1}", "{0} ve {1}", errorCode); + addDataToHash("uk", "{0} \\u0442\\u0430 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0442\\u0430 {1}", errorCode); + addDataToHash("ur", "{0} \\u0627\\u0648\\u0631 {1}", "{0}\\u060c {1}", "{0}\\u060c {1}", + "{0}\\u060c \\u0627\\u0648\\u0631 {1}", errorCode); + addDataToHash("vi", "{0} v\\u00e0 {1}", "{0}, {1}", "{0}, {1}", "{0} v\\u00e0 {1}", errorCode); + addDataToHash("wae", "{0} und {1}", "{0}, {1}", "{0}, {1}", "{0} und {1}", errorCode); + addDataToHash("zh", "{0}\\u548c{1}", "{0}\\u3001{1}", "{0}\\u3001{1}", "{0}\\u548c{1}", errorCode); + addDataToHash("zu", "I-{0} ne-{1}", "{0}, {1}", "{0}, {1}", "{0}, no-{1}", errorCode); +} + +void ListFormatter::addDataToHash( + const char* locale, + const char* two, + const char* start, + const char* middle, + const char* end, + UErrorCode& errorCode) { + if (U_FAILURE(errorCode)) { + return; + } + UnicodeString key(locale, -1, US_INV); + ListFormatData* value = new ListFormatData(); + if (value == NULL) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + value->twoPattern = UnicodeString(two, -1, US_INV).unescape(); + value->startPattern = UnicodeString(start, -1, US_INV).unescape(); + value->middlePattern = UnicodeString(middle, -1, US_INV).unescape(); + value->endPattern = UnicodeString(end, -1, US_INV).unescape(); + listPatternHash->put(key, value, errorCode); +} + +const ListFormatData* ListFormatter::getListFormatData( + const Locale& locale, UErrorCode& errorCode) { + { + Mutex m(&listFormatterMutex); + if (listPatternHash == NULL) { + initializeHash(errorCode); + if (U_FAILURE(errorCode)) { + return NULL; + } + } + } + + UnicodeString key(locale.getName(), -1, US_INV); + return static_cast(listPatternHash->get(key)); +} + +ListFormatter* ListFormatter::createInstance(UErrorCode& errorCode) { + Locale locale; // The default locale. + return createInstance(locale, errorCode); +} + +ListFormatter* ListFormatter::createInstance(const Locale& locale, UErrorCode& errorCode) { + Locale tempLocale = locale; + for (;;) { + const ListFormatData* listFormatData = getListFormatData(tempLocale, errorCode); + if (U_FAILURE(errorCode)) { + return NULL; + } + if (listFormatData != NULL) { + ListFormatter* p = new ListFormatter(tempLocale, listFormatData); + if (p == NULL) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + return p; + } + errorCode = U_ZERO_ERROR; + Locale correctLocale; + getFallbackLocale(tempLocale, correctLocale, errorCode); + if (U_FAILURE(errorCode)) { + return NULL; + } + if (correctLocale.isBogus()) { + return createInstance(Locale::getRoot(), errorCode); + } + tempLocale = correctLocale; + } +} + +ListFormatter::ListFormatter(const Locale& listFormatterLocale, const ListFormatData* listFormatterData) + : locale(listFormatterLocale), data(listFormatterData) { +} + +ListFormatter::~ListFormatter() {} + +void ListFormatter::getFallbackLocale(const Locale& in, Locale& out, UErrorCode& errorCode) { + if (uprv_strcmp(in.getName(), "zh_TW") == 0) { + out = Locale::getTraditionalChinese(); + } else { + const char* localeString = in.getName(); + const char* extStart = locale_getKeywordsStart(localeString); + if (extStart == NULL) { + extStart = uprv_strchr(localeString, 0); + } + const char* last = extStart; + + // TODO: Check whether uloc_getParent() will work here. + while (last > localeString && *(last - 1) != '_') { + --last; + } + + // Truncate empty segment. + while (last > localeString) { + if (*(last-1) != '_') { + break; + } + --last; + } + + size_t localePortionLen = last - localeString; + CharString fullLocale; + fullLocale.append(localeString, localePortionLen, errorCode).append(extStart, errorCode); + + if (U_FAILURE(errorCode)) { + return; + } + out = Locale(fullLocale.data()); + } +} + +UnicodeString& ListFormatter::format(const UnicodeString items[], int32_t nItems, + UnicodeString& appendTo, UErrorCode& errorCode) const { + if (U_FAILURE(errorCode)) { + return appendTo; + } + + if (nItems > 0) { + UnicodeString newString = items[0]; + if (nItems == 2) { + addNewString(data->twoPattern, newString, items[1], errorCode); + } else if (nItems > 2) { + addNewString(data->startPattern, newString, items[1], errorCode); + int i; + for (i = 2; i < nItems - 1; ++i) { + addNewString(data->middlePattern, newString, items[i], errorCode); + } + addNewString(data->endPattern, newString, items[nItems - 1], errorCode); + } + if (U_SUCCESS(errorCode)) { + appendTo += newString; + } + } + return appendTo; +} + +/** + * Joins originalString and nextString using the pattern pat and puts the result in + * originalString. + */ +void ListFormatter::addNewString(const UnicodeString& pat, UnicodeString& originalString, + const UnicodeString& nextString, UErrorCode& errorCode) const { + if (U_FAILURE(errorCode)) { + return; + } + + int32_t p0Offset = pat.indexOf(FIRST_PARAMETER, 3, 0); + if (p0Offset < 0) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + int32_t p1Offset = pat.indexOf(SECOND_PARAMETER, 3, 0); + if (p1Offset < 0) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + int32_t i, j; + + const UnicodeString* firstString; + const UnicodeString* secondString; + if (p0Offset < p1Offset) { + i = p0Offset; + j = p1Offset; + firstString = &originalString; + secondString = &nextString; + } else { + i = p1Offset; + j = p0Offset; + firstString = &nextString; + secondString = &originalString; + } + + UnicodeString result = UnicodeString(pat, 0, i) + *firstString; + result += UnicodeString(pat, i+3, j-i-3); + result += *secondString; + result += UnicodeString(pat, j+3); + originalString = result; +} + +UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(ListFormatter) + +U_NAMESPACE_END diff --git a/icu4c/source/common/ucln_cmn.h b/icu4c/source/common/ucln_cmn.h index e53a6717eb4..e35653a6a30 100644 --- a/icu4c/source/common/ucln_cmn.h +++ b/icu4c/source/common/ucln_cmn.h @@ -1,7 +1,7 @@ /* ****************************************************************************** * * -* Copyright (C) 2001-2010, International Business Machines * +* Copyright (C) 2001-2012, International Business Machines * * Corporation and others. All Rights Reserved. * * * ****************************************************************************** @@ -49,6 +49,7 @@ typedef enum ECleanupCommonType { UCLN_COMMON_UCNV_IO, UCLN_COMMON_UDATA, UCLN_COMMON_PUTIL, + UCLN_COMMON_LIST_FORMATTER, UCLN_COMMON_COUNT /* This must be last */ } ECleanupCommonType; diff --git a/icu4c/source/common/unicode/listformatter.h b/icu4c/source/common/unicode/listformatter.h new file mode 100644 index 00000000000..52591bdd522 --- /dev/null +++ b/icu4c/source/common/unicode/listformatter.h @@ -0,0 +1,122 @@ +/* +******************************************************************************* +* +* Copyright (C) 2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: listformatter.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 20120426 +* created by: Umesh P. Nair +*/ + +#ifndef __LISTFORMATTER_H__ +#define __LISTFORMATTER_H__ + +#include "unicode/unistr.h" +#include "unicode/locid.h" + + +U_NAMESPACE_BEGIN + +/** @internal */ +class Hashtable; + +/** @internal */ +class ListFormatData; + +/** + * \file + * \brief C++ API: API for formatting a list. + */ + + +/** + * An immutable class for formatting a list, using data from CLDR (or supplied + * separately). + * + * Example: Input data ["Alice", "Bob", "Charlie", "Delta"] will be formatted + * as "Alice, Bob, Charlie and Delta" in English. + * + * The ListFormatter class is not intended for public subclassing. + */ +class U_COMMON_API ListFormatter : public UObject{ + + public: + /** + * Creates a ListFormatter appropriate for the default locale. + * + * @param errorCode ICU error code, set if no data available for default locale. + * @return Pointer to a ListFormatter object for the default locale, + * created from internal data derived from CLDR data. + * @draft ICU 50 + */ + static ListFormatter* createInstance(UErrorCode& errorCode); + + /** + * Creates a ListFormatter appropriate for a locale. + * + * @param locale The locale. + * @param errorCode ICU error code, set if no data available for the given locale. + * @return A ListFormatter object created from internal data derived from + * CLDR data. + * @draft ICU 50 + */ + static ListFormatter* createInstance(const Locale& locale, UErrorCode& errorCode); + + + /** + * Destructor. + * + * @draft ICU 50 + */ + virtual ~ListFormatter(); + + + /** + * Formats a list of strings. + * + * @param items An array of strings to be combined and formatted. + * @param n_items Length of the array items. + * @param appendTo The string to which the result should be appended to. + * @param errorCode ICU error code, set if there is an error. + * @return Formatted string combining the elements of items, appended to appendTo. + * @draft ICU 50 + */ + UnicodeString& format(const UnicodeString items[], int32_t n_items, + UnicodeString& appendTo, UErrorCode& errorCode) const; + + /** + * Gets the fallback locale for a given locale. + * TODO: Consider moving this to the Locale class. + * @param in The input locale. + * @param out The output locale after fallback. + * @internal For testing. + */ + static void getFallbackLocale(const Locale& in, Locale& out, UErrorCode& errorCode); + + private: + static void initializeHash(UErrorCode& errorCode); + static void addDataToHash(const char* locale, const char* two, const char* start, const char* middle, const char* end, UErrorCode& errorCode); + static const ListFormatData* getListFormatData(const Locale& locale, UErrorCode& errorCode); + + ListFormatter(); + ListFormatter(const Locale& listFormatterLocale, const ListFormatData* listFormatterData); + ListFormatter(const ListFormatter&); + + ListFormatter& operator = (const ListFormatter&); + void addNewString(const UnicodeString& pattern, UnicodeString& originalString, + const UnicodeString& newString, UErrorCode& errorCode) const; + virtual UClassID getDynamicClassID() const; + + Locale locale; + const ListFormatData* data; +}; + +U_NAMESPACE_END + +#endif diff --git a/icu4c/source/test/intltest/Makefile.in b/icu4c/source/test/intltest/Makefile.in index d4f7e7c3131..b664c97b70c 100644 --- a/icu4c/source/test/intltest/Makefile.in +++ b/icu4c/source/test/intltest/Makefile.in @@ -55,7 +55,7 @@ itrbnf.o itrbnfrt.o itrbnfp.o ucaconf.o icusvtst.o \ uobjtest.o idnaref.o idnaconf.o nptrans.o punyref.o testidn.o testidna.o uts46test.o \ incaltst.o calcasts.o v32test.o uvectest.o textfile.o tokiter.o utxttest.o \ windttst.o winnmtst.o winutil.o csdetest.o tzrulets.o tzoffloc.o tzfmttst.o ssearch.o dtifmtts.o \ -tufmtts.o itspoof.o simplethread.o bidiconf.o locnmtst.o dcfmtest.o alphaindextst.o +tufmtts.o itspoof.o simplethread.o bidiconf.o locnmtst.o dcfmtest.o alphaindextst.o listformattertest.o DEPS = $(OBJECTS:.o=.d) diff --git a/icu4c/source/test/intltest/itformat.cpp b/icu4c/source/test/intltest/itformat.cpp index 467b6b20fca..5e9d38f98d1 100644 --- a/icu4c/source/test/intltest/itformat.cpp +++ b/icu4c/source/test/intltest/itformat.cpp @@ -1,6 +1,6 @@ /******************************************************************** - * COPYRIGHT: - * Copyright (c) 1997-2010, International Business Machines + * COPYRIGHT: + * Copyright (c) 1997-2012, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************/ @@ -55,6 +55,7 @@ #include "tufmtts.h" // TimeUnitTest #include "locnmtst.h" // LocaleDisplayNamesTest #include "dcfmtest.h" // DecimalFormatTest +#include "listformattertest.h" // ListFormatterTest #define TESTCLASS(id, TestClass) \ case id: \ @@ -131,6 +132,7 @@ void IntlTestFormat::runIndexedTest( int32_t index, UBool exec, const char* &nam #if !UCONFIG_NO_REGULAR_EXPRESSIONS TESTCLASS(41,DecimalFormatTest); #endif + TESTCLASS(42,ListFormatterTest); default: name = ""; break; //needed to end loop } diff --git a/icu4c/source/test/intltest/listformattertest.cpp b/icu4c/source/test/intltest/listformattertest.cpp new file mode 100644 index 00000000000..ec7b3cfe450 --- /dev/null +++ b/icu4c/source/test/intltest/listformattertest.cpp @@ -0,0 +1,190 @@ +/* +******************************************************************************* +* +* Copyright (C) 2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: listformattertest.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2012aug27 +* created by: Umesh P. Nair +*/ + +#include "listformattertest.h" +#include + +ListFormatterTest::ListFormatterTest() : + prefix("Prefix: ", -1, US_INV), + one("Alice", -1, US_INV), two("Bob", -1, US_INV), + three("Charlie", -1, US_INV), four("Delta", -1, US_INV) { +} + +void ListFormatterTest::CheckFormatting(const ListFormatter* formatter, UnicodeString data[], int32_t dataSize, + const UnicodeString& expected_result) { + UnicodeString actualResult(prefix); + UErrorCode errorCode = U_ZERO_ERROR; + formatter->format(data, dataSize, actualResult, errorCode); + UnicodeString expectedStringWithPrefix = prefix + expected_result; + if (expectedStringWithPrefix != actualResult) { + errln(UnicodeString("Expected: |") + expectedStringWithPrefix + "|, Actual: |" + actualResult + "|"); + } +} + +void ListFormatterTest::CheckFourCases(const char* locale_string, UnicodeString one, UnicodeString two, + UnicodeString three, UnicodeString four, UnicodeString results[4]) { + UErrorCode errorCode = U_ZERO_ERROR; + ListFormatter* formatter = ListFormatter::createInstance(Locale(locale_string), errorCode); + if (formatter == NULL || U_FAILURE(errorCode)) { + errln("Allocation problem\n"); + return; + } + UnicodeString input1[] = {one}; + CheckFormatting(formatter, input1, 1, results[0]); + + UnicodeString input2[] = {one, two}; + CheckFormatting(formatter, input2, 2, results[1]); + + UnicodeString input3[] = {one, two, three}; + CheckFormatting(formatter, input3, 3, results[2]); + + UnicodeString input4[] = {one, two, three, four}; + CheckFormatting(formatter, input4, 4, results[3]); +} + + +void ListFormatterTest::TestLocaleFallback() { + const char* testData[][4] = { + {"en_US", "en", "", ""}, // ULocale.getFallback("") should return "" + {"EN_us_Var", "en_US", "en", ""}, // Case is always normalized + {"de_DE@collation=phonebook", "de@collation=phonebook", "@collation=phonebook", "@collation=phonebook"}, // Keyword is preserved + {"en__POSIX", "en", "", ""}, // Trailing empty segment should be truncated + {"_US_POSIX", "_US", "", ""}, // Same as above + {"root", "", "", ""}, // No canonicalization + }; + for (int i = 0; i < 6; ++i) { + for(int j = 1; j < 4; ++j) { + Locale in(testData[i][j-1]); + Locale out; + UErrorCode errorCode; + ListFormatter::getFallbackLocale(in, out, errorCode); + if (U_FAILURE(errorCode)) { + errln("Error in getLocaleFallback: %s", u_errorName(errorCode)); + } + + if (::strcmp(testData[i][j], out.getName())) { + errln("Expected: |%s|, Actual: |%s|\n", testData[i][j], out.getName()); + } + } + } +} + +void ListFormatterTest::TestRoot() { + UnicodeString results[4] = { + one, + one + ", " + two, + one + ", " + two + ", " + three, + one + ", " + two + ", " + three + ", " + four + }; + + CheckFourCases("", one, two, three, four, results); +} + +// Bogus locale should fallback to root. +void ListFormatterTest::TestBogus() { + UnicodeString results[4] = { + one, + one + ", " + two, + one + ", " + two + ", " + three, + one + ", " + two + ", " + three + ", " + four + }; + + CheckFourCases("ex_PY", one, two, three, four, results); +} + +// Formatting in English. +// "and" is used before the last element, and all elements up to (and including) the penultimate are followed by a comma. +void ListFormatterTest::TestEnglish() { + UnicodeString results[4] = { + one, + one + " and " + two, + one + ", " + two + ", and " + three, + one + ", " + two + ", " + three + ", and " + four + }; + + CheckFourCases("en", one, two, three, four, results); +} + +void ListFormatterTest::TestEnglishUS() { + UnicodeString results[4] = { + one, + one + " and " + two, + one + ", " + two + ", and " + three, + one + ", " + two + ", " + three + ", and " + four + }; + + CheckFourCases("en_US", one, two, three, four, results); +} + +// Formatting in Russian. +// "\\u0438" is used before the last element, and all elements up to (but not including) the penultimate are followed by a comma. +void ListFormatterTest::TestRussian() { + UnicodeString and_string = UnicodeString(" \\u0438 ", -1, US_INV).unescape(); + UnicodeString results[4] = { + one, + one + and_string + two, + one + ", " + two + and_string + three, + one + ", " + two + ", " + three + and_string + four + }; + + CheckFourCases("ru", one, two, three, four, results); +} + +// Formatting in Malayalam. +// For two elements, "\\u0d15\\u0d42\\u0d1f\\u0d3e\\u0d24\\u0d46" is inserted in between. +// For more than two elements, comma is inserted between all elements up to (and including) the penultimate, +// and the word \\u0d0e\\u0d28\\u0d4d\\u0d28\\u0d3f\\u0d35 is inserted in the end. +void ListFormatterTest::TestMalayalam() { + UnicodeString pair_string = UnicodeString(" \\u0d15\\u0d42\\u0d1f\\u0d3e\\u0d24\\u0d46 ", -1, US_INV).unescape(); + UnicodeString total_string = UnicodeString(" \\u0d0e\\u0d28\\u0d4d\\u0d28\\u0d3f\\u0d35", -1, US_INV).unescape(); + UnicodeString results[4] = { + one, + one + pair_string + two, + one + ", " + two + ", " + three + total_string, + one + ", " + two + ", " + three + ", " + four + total_string + }; + + CheckFourCases("ml", one, two, three, four, results); +} + +// Formatting in Zulu. +// "and" is used before the last element, and all elements up to (and including) the penultimate are followed by a comma. +void ListFormatterTest::TestZulu() { + UnicodeString results[4] = { + one, + "I-" + one + " ne-" + two, + one + ", " + two + ", no-" + three, + one + ", " + two + ", " + three + ", no-" + four + }; + + CheckFourCases("zu", one, two, three, four, results); +} + +void ListFormatterTest::runIndexedTest(int32_t index, UBool exec, + const char* &name, char* /*par */) { + switch(index) { + case 0: name = "TestRoot"; if (exec) TestRoot(); break; + case 1: name = "TestBogus"; if (exec) TestBogus(); break; + case 2: name = "TestEnglish"; if (exec) TestEnglish(); break; + case 3: name = "TestEnglishUS"; if (exec) TestEnglishUS(); break; + case 4: name = "TestRussian"; if (exec) TestRussian(); break; + case 5: name = "TestMalayalam"; if (exec) TestMalayalam(); break; + case 6: name = "TestZulu"; if (exec) TestZulu(); break; + case 7: name = "TestLocaleFallback"; if (exec) TestLocaleFallback(); break; + + default: name = ""; break; + } +} diff --git a/icu4c/source/test/intltest/listformattertest.h b/icu4c/source/test/intltest/listformattertest.h new file mode 100644 index 00000000000..4a012c21556 --- /dev/null +++ b/icu4c/source/test/intltest/listformattertest.h @@ -0,0 +1,58 @@ +/* +******************************************************************************* +* +* Copyright (C) 2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: listformattertest.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2012aug27 +* created by: Umesh P. Nair +*/ + +#ifndef __LISTFORMATTERTEST_H__ +#define __LISTFORMATTERTEST_H__ + +#include "unicode/listformatter.h" +#include "intltest.h" + +class ListFormatterTest : public IntlTest { + public: + ListFormatterTest(); + virtual ~ListFormatterTest() {} + + void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0); + + void TestLocaleFallback(); + void TestRoot(); + void TestBogus(); + void TestEnglish(); + void TestEnglishUS(); + void TestRussian(); + void TestMalayalam(); + void TestZulu(); + + private: + void CheckFormatting(const ListFormatter* formatter, UnicodeString data[], int32_t data_size, const UnicodeString& expected_result); + void CheckFourCases( + const char* locale_string, + UnicodeString one, + UnicodeString two, + UnicodeString three, + UnicodeString four, + UnicodeString results[4]); + + private: + // Reused test data. + const UnicodeString prefix; + const UnicodeString one; + const UnicodeString two; + const UnicodeString three; + const UnicodeString four; +}; + +#endif