ICU-7168 Implementation of ListFormatter, port from Java

X-SVN-Rev: 32247
2025-04-05 21:45:37 +00:00 · 2012-08-27 20:14:25 +00:00 · 2012-08-27 20:14:25 +00:00 · 484c465ddf
commit 484c465ddf
parent 8131a32512
8 changed files with 707 additions and 5 deletions
--- a/icu4c/source/common/Makefile.in
+++ b/icu4c/source/common/Makefile.in
@ -104,7 +104,7 @@ rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.
 serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
 uidna.o usprep.o uts46.o punycode.o \
 util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o mutex.o dtintrv.o ucnvsel.o propsvec.o \
-ulist.o uloc_tag.o icudataver.o icuplug.o
+ulist.o uloc_tag.o icudataver.o icuplug.o listformatter.o

 ## Header files to install
 HEADERS = $(srcdir)/unicode/*.h
--- a/icu4c/source/common/listformatter.cpp
+++ b/icu4c/source/common/listformatter.cpp
@ -0,0 +1,329 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  listformatter.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2012aug27
+*   created by: Umesh P. Nair
+*/
+
+#include "unicode/listformatter.h"
+#include "mutex.h"
+#include "hash.h"
+#include "cstring.h"
+#include "ulocimp.h"
+#include "charstr.h"
+#include "ucln_cmn.h"
+
+U_NAMESPACE_BEGIN
+
+struct ListFormatData : public UMemory {
+    UnicodeString twoPattern;
+    UnicodeString startPattern;
+    UnicodeString middlePattern;
+    UnicodeString endPattern;
+};
+
+static Hashtable* listPatternHash = NULL;
+static UMTX listFormatterMutex = NULL;
+static UChar FIRST_PARAMETER[] = { 0x7b, 0x30, 0x7d };  // "{0}"
+static UChar SECOND_PARAMETER[] = { 0x7b, 0x31, 0x7d };  // "{0}"
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV uprv_listformatter_cleanup() {
+    delete listPatternHash;
+    listPatternHash = NULL;
+    return TRUE;
+}
+
+static void U_CALLCONV
+uprv_deleteListFormatData(void *obj) {
+    delete static_cast<ListFormatData *>(obj);
+}
+
+U_CDECL_END
+
+void ListFormatter::initializeHash(UErrorCode& errorCode) {
+    if (U_FAILURE(errorCode)) {
+        return;
+    }
+
+    listPatternHash = new Hashtable();
+    if (listPatternHash == NULL) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+
+    listPatternHash->setValueDeleter(uprv_deleteListFormatData);
+    ucln_common_registerCleanup(UCLN_COMMON_LIST_FORMATTER, uprv_listformatter_cleanup);
+
+    addDataToHash("af", "{0} en {1}", "{0}, {1}", "{0}, {1}", "{0} en {1}", errorCode);
+    addDataToHash("am", "{0} \\u12a5\\u1293 {1}", "{0}, {1}", "{0}, {1}", "{0}, \\u12a5\\u1293 {1}", errorCode);
+    addDataToHash("ar", "{0} \\u0648 {1}", "{0}\\u060c {1}", "{0}\\u060c {1}", "{0}\\u060c \\u0648 {1}", errorCode);
+    addDataToHash("bg", "{0} \\u0438 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0438 {1}", errorCode);
+    addDataToHash("bn", "{0} \\u098f\\u09ac\\u0982 {1}", "{0}, {1}", "{0}, {1}", "{0}, \\u098f\\u09ac\\u0982 {1}", errorCode);
+    addDataToHash("bs", "{0} i {1}", "{0}, {1}", "{0}, {1}", "{0} i {1}", errorCode);
+    addDataToHash("ca", "{0} i {1}", "{0}, {1}", "{0}, {1}", "{0} i {1}", errorCode);
+    addDataToHash("cs", "{0} a {1}", "{0}, {1}", "{0}, {1}", "{0} a {1}", errorCode);
+    addDataToHash("da", "{0} og {1}", "{0}, {1}", "{0}, {1}", "{0} og {1}", errorCode);
+    addDataToHash("de", "{0} und {1}", "{0}, {1}", "{0}, {1}", "{0} und {1}", errorCode);
+    addDataToHash("ee", "{0} kple {1}", "{0}, {1}", "{0}, {1}", "{0}, kple {1}", errorCode);
+    addDataToHash("el", "{0} \\u03ba\\u03b1\\u03b9 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u03ba\\u03b1\\u03b9 {1}", errorCode);
+    addDataToHash("en", "{0} and {1}", "{0}, {1}", "{0}, {1}", "{0}, and {1}", errorCode);
+    addDataToHash("es", "{0} y {1}", "{0}, {1}", "{0}, {1}", "{0} y {1}", errorCode);
+    addDataToHash("et", "{0} ja {1}", "{0}, {1}", "{0}, {1}", "{0} ja {1}", errorCode);
+    addDataToHash("eu", "{0} eta {1}", "{0}, {1}", "{0}, {1}", "{0} eta {1}", errorCode);
+    addDataToHash("fa", "{0} \\u0648 {1}", "{0}\\u060c\\u200f {1}", "{0}\\u060c\\u200f {1}", "{0}\\u060c \\u0648 {1}", errorCode);
+    addDataToHash("fi", "{0} ja {1}", "{0}, {1}", "{0}, {1}", "{0} ja {1}", errorCode);
+    addDataToHash("fil", "{0} at {1}", "{0}, {1}", "{0}, {1}", "{0} at {1}", errorCode);
+    addDataToHash("fo", "{0} og {1}", "{0}, {1}", "{0}, {1}", "{0} og {1}", errorCode);
+    addDataToHash("fr", "{0} et {1}", "{0}, {1}", "{0}, {1}", "{0} et {1}", errorCode);
+    addDataToHash("fur", "{0} e {1}", "{0}, {1}", "{0}, {1}", "{0} e {1}", errorCode);
+    addDataToHash("gd", "{0} agus {1}", "{0}, {1}", "{0}, {1}", "{0}, agus {1}", errorCode);
+    addDataToHash("gl", "{0} e {1}", "{0}, {1}", "{0}, {1}", "{0} e {1}", errorCode);
+    addDataToHash("gsw", "{0} und {1}", "{0}, {1}", "{0}, {1}", "{0} und {1}", errorCode);
+    addDataToHash("gu", "{0} \\u0a85\\u0aa8\\u0ac7 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0a85\\u0aa8\\u0ac7 {1}", errorCode);
+    addDataToHash("he", "{0} \\u05d5-{1}", "{0}, {1}", "{0}, {1}", "{0} \\u05d5-{1}", errorCode);
+    addDataToHash("hi", "{0} \\u0914\\u0930 {1}", "{0}, {1}", "{0}, {1}", "{0}, \\u0914\\u0930 {1}", errorCode);
+    addDataToHash("hr", "{0} i {1}", "{0}, {1}", "{0}, {1}", "{0} i {1}", errorCode);
+    addDataToHash("hu", "{0} \\u00e9s {1}", "{0}, {1}", "{0}, {1}", "{0} \\u00e9s {1}", errorCode);
+    addDataToHash("id", "{0} dan {1}", "{0}, {1}", "{0}, {1}", "{0}, dan {1}", errorCode);
+    addDataToHash("is", "{0} og {1}", "{0}, {1}", "{0}, {1}", "{0} og {1}", errorCode);
+    addDataToHash("it", "{0} e {1}", "{0}, {1}", "{0}, {1}", "{0}, e {1}", errorCode);
+    addDataToHash("ja", "{0}\\u3001{1}", "{0}\\u3001{1}", "{0}\\u3001{1}", "{0}\\u3001{1}", errorCode);
+    addDataToHash("ka", "{0} \\u10d3\\u10d0 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u10d3\\u10d0 {1}", errorCode);
+    addDataToHash("kea", "{0} y {1}", "{0}, {1}", "{0}, {1}", "{0} y {1}", errorCode);
+    addDataToHash("kl", "{0} aamma {1}", "{0} aamma {1}", "{0}, {1}", "{0}, {1}", errorCode);
+    addDataToHash("kn", "{0} \\u0cae\\u0ca4\\u0ccd\\u0ca4\\u0cc1 {1}", "{0}, {1}", "{0}, {1}",
+                  "{0}, \\u0cae\\u0ca4\\u0ccd\\u0ca4\\u0cc1 {1}", errorCode);
+    addDataToHash("ko", "{0} \\ubc0f {1}", "{0}, {1}", "{0}, {1}", "{0} \\ubc0f {1}", errorCode);
+    addDataToHash("ksh", "{0} un {1}", "{0}, {1}", "{0}, {1}", "{0} un {1}", errorCode);
+    addDataToHash("lt", "{0} ir {1}", "{0}, {1}", "{0}, {1}", "{0} ir {1}", errorCode);
+    addDataToHash("lv", "{0} un {1}", "{0}, {1}", "{0}, {1}", "{0} un {1}", errorCode);
+    addDataToHash("ml", "{0} \\u0d15\\u0d42\\u0d1f\\u0d3e\\u0d24\\u0d46 {1}", "{0}, {1}", "{0}, {1}",
+                  "{0}, {1} \\u0d0e\\u0d28\\u0d4d\\u0d28\\u0d3f\\u0d35", errorCode);
+    addDataToHash("mr", "{0} \\u0906\\u0923\\u093f {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0906\\u0923\\u093f {1}", errorCode);
+    addDataToHash("ms", "{0} dan {1}", "{0}, {1}", "{0}, {1}", "{0}, dan {1}", errorCode);
+    addDataToHash("nb", "{0} og {1}", "{0}, {1}", "{0}, {1}", "{0} og {1}", errorCode);
+    addDataToHash("nl", "{0} en {1}", "{0}, {1}", "{0}, {1}", "{0} en {1}", errorCode);
+    addDataToHash("nn", "{0} og {1}", "{0}, {1}", "{0}, {1}", "{0} og {1}", errorCode);
+    addDataToHash("pl", "{0} i {1}", "{0}; {1}", "{0}; {1}", "{0} i {1}", errorCode);
+    addDataToHash("pt", "{0} e {1}", "{0}, {1}", "{0}, {1}", "{0} e {1}", errorCode);
+    addDataToHash("ro", "{0} \\u015fi {1}", "{0}, {1}", "{0}, {1}", "{0} \\u015fi {1}", errorCode);
+    addDataToHash("", "{0}, {1}", "{0}, {1}", "{0}, {1}", "{0}, {1}", errorCode); // root
+    addDataToHash("ru", "{0} \\u0438 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0438 {1}", errorCode);
+    addDataToHash("se", "{0} ja {1}", "{0}, {1}", "{0}, {1}", "{0} ja {1}", errorCode);
+    addDataToHash("sk", "{0} a {1}", "{0}, {1}", "{0}, {1}", "{0} a {1}", errorCode);
+    addDataToHash("sl", "{0} in {1}", "{0}, {1}", "{0}, {1}", "{0} in {1}", errorCode);
+    addDataToHash("sr", "{0} \\u0438 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0438 {1}", errorCode);
+    addDataToHash("sr_Cyrl", "{0} \\u0438 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0438 {1}", errorCode);
+    addDataToHash("sr_Latn", "{0} i {1}", "{0}, {1}", "{0}, {1}", "{0} i {1}", errorCode);
+    addDataToHash("sv", "{0} och {1}", "{0}, {1}", "{0}, {1}", "{0} och {1}", errorCode);
+    addDataToHash("sw", "{0} na {1}", "{0}, {1}", "{0}, {1}", "{0}, na {1}", errorCode);
+    addDataToHash("ta", "{0} \\u0bae\\u0bb1\\u0bcd\\u0bb1\\u0bc1\\u0bae\\u0bcd {1}", "{0}, {1}", "{0}, {1}",
+                  "{0} \\u0bae\\u0bb1\\u0bcd\\u0bb1\\u0bc1\\u0bae\\u0bcd {1}", errorCode);
+    addDataToHash("te", "{0} \\u0c2e\\u0c30\\u0c3f\\u0c2f\\u0c41 {1}", "{0}, {1}", "{0}, {1}",
+                  "{0} \\u0c2e\\u0c30\\u0c3f\\u0c2f\\u0c41 {1}", errorCode);
+    addDataToHash("th", "{0}\\u0e41\\u0e25\\u0e30{1}", "{0} {1}", "{0} {1}", "{0} \\u0e41\\u0e25\\u0e30{1}", errorCode);
+    addDataToHash("tr", "{0} ve {1}", "{0}, {1}", "{0}, {1}", "{0} ve {1}", errorCode);
+    addDataToHash("uk", "{0} \\u0442\\u0430 {1}", "{0}, {1}", "{0}, {1}", "{0} \\u0442\\u0430 {1}", errorCode);
+    addDataToHash("ur", "{0} \\u0627\\u0648\\u0631 {1}", "{0}\\u060c {1}", "{0}\\u060c {1}",
+                  "{0}\\u060c \\u0627\\u0648\\u0631 {1}", errorCode);
+    addDataToHash("vi", "{0} v\\u00e0 {1}", "{0}, {1}", "{0}, {1}", "{0} v\\u00e0 {1}", errorCode);
+    addDataToHash("wae", "{0} und {1}", "{0}, {1}", "{0}, {1}", "{0} und {1}", errorCode);
+    addDataToHash("zh", "{0}\\u548c{1}", "{0}\\u3001{1}", "{0}\\u3001{1}", "{0}\\u548c{1}", errorCode);
+    addDataToHash("zu", "I-{0} ne-{1}", "{0}, {1}", "{0}, {1}", "{0}, no-{1}", errorCode);
+}
+
+void ListFormatter::addDataToHash(
+    const char* locale,
+    const char* two,
+    const char* start,
+    const char* middle,
+    const char* end,
+    UErrorCode& errorCode) {
+    if (U_FAILURE(errorCode)) {
+        return;
+    }
+    UnicodeString key(locale, -1, US_INV);
+    ListFormatData* value = new ListFormatData();
+    if (value == NULL) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    value->twoPattern = UnicodeString(two, -1, US_INV).unescape();
+    value->startPattern = UnicodeString(start, -1, US_INV).unescape();
+    value->middlePattern = UnicodeString(middle, -1, US_INV).unescape();
+    value->endPattern = UnicodeString(end, -1, US_INV).unescape();
+    listPatternHash->put(key, value, errorCode);
+}
+
+const ListFormatData* ListFormatter::getListFormatData(
+        const Locale& locale, UErrorCode& errorCode) {
+    {
+        Mutex m(&listFormatterMutex);
+        if (listPatternHash == NULL) {
+            initializeHash(errorCode);
+            if (U_FAILURE(errorCode)) {
+                return NULL;
+            }
+        }
+    }
+
+    UnicodeString key(locale.getName(), -1, US_INV);
+    return static_cast<const ListFormatData*>(listPatternHash->get(key));
+}
+
+ListFormatter* ListFormatter::createInstance(UErrorCode& errorCode) {
+    Locale locale;  // The default locale.
+    return createInstance(locale, errorCode);
+}
+
+ListFormatter* ListFormatter::createInstance(const Locale& locale, UErrorCode& errorCode) {
+    Locale tempLocale = locale;
+    for (;;) {
+        const ListFormatData* listFormatData = getListFormatData(tempLocale, errorCode);
+        if (U_FAILURE(errorCode)) {
+            return NULL;
+        }
+        if (listFormatData != NULL) {
+            ListFormatter* p = new ListFormatter(tempLocale, listFormatData);
+            if (p == NULL) {
+                errorCode = U_MEMORY_ALLOCATION_ERROR;
+                return NULL;
+            }
+            return p;
+        }
+        errorCode = U_ZERO_ERROR;
+        Locale correctLocale;
+        getFallbackLocale(tempLocale, correctLocale, errorCode);
+        if (U_FAILURE(errorCode)) {
+            return NULL;
+        }
+        if (correctLocale.isBogus()) {
+            return createInstance(Locale::getRoot(), errorCode);
+        }
+        tempLocale = correctLocale;
+    }
+}
+
+ListFormatter::ListFormatter(const Locale& listFormatterLocale, const ListFormatData* listFormatterData)
+        : locale(listFormatterLocale), data(listFormatterData) {
+}
+
+ListFormatter::~ListFormatter() {}
+
+void ListFormatter::getFallbackLocale(const Locale& in, Locale& out, UErrorCode& errorCode) {
+    if (uprv_strcmp(in.getName(), "zh_TW") == 0) {
+        out = Locale::getTraditionalChinese();
+    } else {
+        const char* localeString = in.getName();
+        const char* extStart = locale_getKeywordsStart(localeString);
+        if (extStart == NULL) {
+            extStart = uprv_strchr(localeString, 0);
+        }
+        const char* last = extStart;
+
+        // TODO: Check whether uloc_getParent() will work here.
+        while (last > localeString && *(last - 1) != '_') {
+            --last;
+        }
+
+        // Truncate empty segment.
+        while (last > localeString) {
+            if (*(last-1) != '_') {
+                break;
+            }
+            --last;
+        }
+
+        size_t localePortionLen = last - localeString;
+        CharString fullLocale;
+        fullLocale.append(localeString, localePortionLen, errorCode).append(extStart, errorCode);
+
+        if (U_FAILURE(errorCode)) {
+            return;
+        }
+        out = Locale(fullLocale.data());
+    }
+}
+
+UnicodeString& ListFormatter::format(const UnicodeString items[], int32_t nItems,
+                      UnicodeString& appendTo, UErrorCode& errorCode) const {
+    if (U_FAILURE(errorCode)) {
+        return appendTo;
+    }
+
+    if (nItems > 0) {
+        UnicodeString newString = items[0];
+        if (nItems == 2) {
+            addNewString(data->twoPattern, newString, items[1], errorCode);
+        } else if (nItems > 2) {
+            addNewString(data->startPattern, newString, items[1], errorCode);
+            int i;
+            for (i = 2; i < nItems - 1; ++i) {
+                addNewString(data->middlePattern, newString, items[i], errorCode);
+            }
+            addNewString(data->endPattern, newString, items[nItems - 1], errorCode);
+        }
+        if (U_SUCCESS(errorCode)) {
+            appendTo += newString;
+        }
+    }
+    return appendTo;
+}
+
+/**
+ * Joins originalString and nextString using the pattern pat and puts the result in
+ * originalString.
+ */
+void ListFormatter::addNewString(const UnicodeString& pat, UnicodeString& originalString,
+    const UnicodeString& nextString, UErrorCode& errorCode) const {
+    if (U_FAILURE(errorCode)) {
+        return;
+    }
+
+    int32_t p0Offset = pat.indexOf(FIRST_PARAMETER, 3, 0);
+    if (p0Offset < 0) {
+        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+    int32_t p1Offset = pat.indexOf(SECOND_PARAMETER, 3, 0);
+    if (p1Offset < 0) {
+        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+
+    int32_t i, j;
+
+    const UnicodeString* firstString;
+    const UnicodeString* secondString;
+    if (p0Offset < p1Offset) {
+        i = p0Offset;
+        j = p1Offset;
+        firstString = &originalString;
+        secondString = &nextString;
+    } else {
+        i = p1Offset;
+        j = p0Offset;
+        firstString = &nextString;
+        secondString = &originalString;
+    }
+
+    UnicodeString result = UnicodeString(pat, 0, i) + *firstString;
+    result += UnicodeString(pat, i+3, j-i-3);
+    result += *secondString;
+    result += UnicodeString(pat, j+3);
+    originalString = result;
+}
+
+UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(ListFormatter)
+
+U_NAMESPACE_END
--- a/icu4c/source/common/ucln_cmn.h
+++ b/icu4c/source/common/ucln_cmn.h
@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *                                                                            *
-* Copyright (C) 2001-2010, International Business Machines                   *
+* Copyright (C) 2001-2012, International Business Machines                   *
 *                Corporation and others. All Rights Reserved.                *
 *                                                                            *
 ******************************************************************************
@ -49,6 +49,7 @@ typedef enum ECleanupCommonType {
    UCLN_COMMON_UCNV_IO,
    UCLN_COMMON_UDATA,
    UCLN_COMMON_PUTIL,
+    UCLN_COMMON_LIST_FORMATTER,
    UCLN_COMMON_COUNT /* This must be last */
 } ECleanupCommonType;

--- a/icu4c/source/common/unicode/listformatter.h
+++ b/icu4c/source/common/unicode/listformatter.h
@ -0,0 +1,122 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  listformatter.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 20120426
+*   created by: Umesh P. Nair
+*/
+
+#ifndef __LISTFORMATTER_H__
+#define __LISTFORMATTER_H__
+
+#include "unicode/unistr.h"
+#include "unicode/locid.h"
+
+
+U_NAMESPACE_BEGIN
+
+/** @internal */
+class Hashtable;
+
+/** @internal */
+class ListFormatData;
+
+/**
+ * \file
+ * \brief C++ API: API for formatting a list.
+ */
+
+
+/**
+ * An immutable class for formatting a list, using data from CLDR (or supplied
+ * separately).
+ *
+ * Example: Input data ["Alice", "Bob", "Charlie", "Delta"] will be formatted
+ * as "Alice, Bob, Charlie and Delta" in English.
+ *
+ * The ListFormatter class is not intended for public subclassing.
+ */
+class U_COMMON_API ListFormatter : public UObject{
+
+  public:
+    /**
+     * Creates a ListFormatter appropriate for the default locale.
+     *
+     * @param errorCode ICU error code, set if no data available for default locale.
+     * @return Pointer to a ListFormatter object for the default locale,
+     *     created from internal data derived from CLDR data.
+     * @draft ICU 50
+     */
+    static ListFormatter* createInstance(UErrorCode& errorCode);
+
+    /**
+     * Creates a ListFormatter appropriate for a locale.
+     *
+     * @param locale The locale.
+     * @param errorCode ICU error code, set if no data available for the given locale.
+     * @return A ListFormatter object created from internal data derived from
+     *     CLDR data.
+     * @draft ICU 50
+     */
+    static ListFormatter* createInstance(const Locale& locale, UErrorCode& errorCode);
+
+
+    /**
+     * Destructor.
+     *
+     * @draft ICU 50
+     */
+    virtual ~ListFormatter();
+
+
+    /**
+     * Formats a list of strings.
+     *
+     * @param items An array of strings to be combined and formatted.
+     * @param n_items Length of the array items.
+     * @param appendTo The string to which the result should be appended to.
+     * @param errorCode ICU error code, set if there is an error.
+     * @return Formatted string combining the elements of items, appended to appendTo.
+     * @draft ICU 50
+     */
+    UnicodeString& format(const UnicodeString items[], int32_t n_items,
+        UnicodeString& appendTo, UErrorCode& errorCode) const;
+
+    /**
+     * Gets the fallback locale for a given locale.
+     * TODO: Consider moving this to the Locale class.
+     * @param in The input locale.
+     * @param out The output locale after fallback.
+     * @internal For testing.
+     */
+    static void getFallbackLocale(const Locale& in, Locale& out, UErrorCode& errorCode);
+
+  private:
+    static void initializeHash(UErrorCode& errorCode);
+    static void addDataToHash(const char* locale, const char* two, const char* start, const char* middle, const char* end, UErrorCode& errorCode);
+    static const ListFormatData* getListFormatData(const Locale& locale, UErrorCode& errorCode);
+
+    ListFormatter();
+    ListFormatter(const Locale& listFormatterLocale, const ListFormatData* listFormatterData);
+    ListFormatter(const ListFormatter&);
+
+    ListFormatter& operator = (const ListFormatter&);
+    void addNewString(const UnicodeString& pattern, UnicodeString& originalString,
+                      const UnicodeString& newString, UErrorCode& errorCode) const;
+    virtual UClassID getDynamicClassID() const;
+
+    Locale locale;
+    const ListFormatData* data;
+};
+
+U_NAMESPACE_END
+
+#endif
--- a/icu4c/source/test/intltest/Makefile.in
+++ b/icu4c/source/test/intltest/Makefile.in
@ -55,7 +55,7 @@ itrbnf.o itrbnfrt.o itrbnfp.o ucaconf.o icusvtst.o \
 uobjtest.o idnaref.o idnaconf.o nptrans.o punyref.o testidn.o testidna.o uts46test.o \
 incaltst.o calcasts.o v32test.o uvectest.o textfile.o tokiter.o utxttest.o \
 windttst.o winnmtst.o winutil.o csdetest.o tzrulets.o tzoffloc.o tzfmttst.o ssearch.o dtifmtts.o \
-tufmtts.o itspoof.o simplethread.o bidiconf.o locnmtst.o dcfmtest.o alphaindextst.o
+tufmtts.o itspoof.o simplethread.o bidiconf.o locnmtst.o dcfmtest.o alphaindextst.o listformattertest.o

 DEPS = $(OBJECTS:.o=.d)

--- a/icu4c/source/test/intltest/itformat.cpp
+++ b/icu4c/source/test/intltest/itformat.cpp
@ -1,6 +1,6 @@
 /********************************************************************
- * COPYRIGHT: 
- * Copyright (c) 1997-2010, International Business Machines
+ * COPYRIGHT:
+ * Copyright (c) 1997-2012, International Business Machines
 * Corporation and others. All Rights Reserved.
 ********************************************************************/

@ -55,6 +55,7 @@
 #include "tufmtts.h"        // TimeUnitTest
 #include "locnmtst.h"       // LocaleDisplayNamesTest
 #include "dcfmtest.h"       // DecimalFormatTest
+#include "listformattertest.h"  // ListFormatterTest

 #define TESTCLASS(id, TestClass)          \
    case id:                              \
@ -131,6 +132,7 @@ void IntlTestFormat::runIndexedTest( int32_t index, UBool exec, const char* &nam
 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
        TESTCLASS(41,DecimalFormatTest);
 #endif
+        TESTCLASS(42,ListFormatterTest);

        default: name = ""; break; //needed to end loop
    }
--- a/icu4c/source/test/intltest/listformattertest.cpp
+++ b/icu4c/source/test/intltest/listformattertest.cpp
@ -0,0 +1,190 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  listformattertest.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2012aug27
+*   created by: Umesh P. Nair
+*/
+
+#include "listformattertest.h"
+#include <string.h>
+
+ListFormatterTest::ListFormatterTest() :
+        prefix("Prefix: ", -1, US_INV),
+        one("Alice", -1, US_INV), two("Bob", -1, US_INV),
+        three("Charlie", -1, US_INV), four("Delta", -1, US_INV) {
+}
+
+void ListFormatterTest::CheckFormatting(const ListFormatter* formatter, UnicodeString data[], int32_t dataSize,
+                                        const UnicodeString& expected_result) {
+    UnicodeString actualResult(prefix);
+    UErrorCode errorCode = U_ZERO_ERROR;
+    formatter->format(data, dataSize, actualResult, errorCode);
+    UnicodeString expectedStringWithPrefix = prefix + expected_result;
+    if (expectedStringWithPrefix != actualResult) {
+        errln(UnicodeString("Expected: |") + expectedStringWithPrefix +  "|, Actual: |" + actualResult + "|");
+    }
+}
+
+void ListFormatterTest::CheckFourCases(const char* locale_string, UnicodeString one, UnicodeString two,
+        UnicodeString three, UnicodeString four, UnicodeString results[4]) {
+    UErrorCode errorCode = U_ZERO_ERROR;
+    ListFormatter* formatter = ListFormatter::createInstance(Locale(locale_string), errorCode);
+    if (formatter == NULL ||  U_FAILURE(errorCode)) {
+        errln("Allocation problem\n");
+        return;
+    }
+    UnicodeString input1[] = {one};
+    CheckFormatting(formatter, input1, 1, results[0]);
+
+    UnicodeString input2[] = {one, two};
+    CheckFormatting(formatter, input2, 2, results[1]);
+
+    UnicodeString input3[] = {one, two, three};
+    CheckFormatting(formatter, input3, 3, results[2]);
+
+    UnicodeString input4[] = {one, two, three, four};
+    CheckFormatting(formatter, input4, 4, results[3]);
+}
+
+
+void ListFormatterTest::TestLocaleFallback() {
+    const char* testData[][4] = {
+        {"en_US", "en", "", ""},    // ULocale.getFallback("") should return ""
+        {"EN_us_Var", "en_US", "en", ""},   // Case is always normalized
+        {"de_DE@collation=phonebook", "de@collation=phonebook", "@collation=phonebook", "@collation=phonebook"},    // Keyword is preserved
+        {"en__POSIX", "en", "", ""},    // Trailing empty segment should be truncated
+        {"_US_POSIX", "_US", "", ""},   // Same as above
+        {"root", "", "", ""},               // No canonicalization
+    };
+    for (int i = 0; i < 6; ++i) {
+        for(int j = 1; j < 4; ++j) {
+            Locale in(testData[i][j-1]);
+            Locale out;
+            UErrorCode errorCode;
+            ListFormatter::getFallbackLocale(in, out, errorCode);
+            if (U_FAILURE(errorCode)) {
+                errln("Error in getLocaleFallback: %s", u_errorName(errorCode));
+            }
+
+            if (::strcmp(testData[i][j], out.getName())) {
+                errln("Expected: |%s|, Actual: |%s|\n", testData[i][j], out.getName());
+            }
+        }
+    }
+}
+
+void ListFormatterTest::TestRoot() {
+    UnicodeString results[4] = {
+        one,
+        one + ", " + two,
+        one + ", " + two + ", " + three,
+        one + ", " + two + ", " + three + ", " + four
+    };
+
+    CheckFourCases("", one, two, three, four, results);
+}
+
+// Bogus locale should fallback to root.
+void ListFormatterTest::TestBogus() {
+    UnicodeString results[4] = {
+        one,
+        one + ", " + two,
+        one + ", " + two + ", " + three,
+        one + ", " + two + ", " + three + ", " + four
+    };
+
+    CheckFourCases("ex_PY", one, two, three, four, results);
+}
+
+// Formatting in English.
+// "and" is used before the last element, and all elements up to (and including) the penultimate are followed by a comma.
+void ListFormatterTest::TestEnglish() {
+    UnicodeString results[4] = {
+        one,
+        one + " and " + two,
+        one + ", " + two + ", and " + three,
+        one + ", " + two + ", " + three + ", and " + four
+    };
+
+    CheckFourCases("en", one, two, three, four, results);
+}
+
+void ListFormatterTest::TestEnglishUS() {
+    UnicodeString results[4] = {
+        one,
+        one + " and " + two,
+        one + ", " + two + ", and " + three,
+        one + ", " + two + ", " + three + ", and " + four
+    };
+
+    CheckFourCases("en_US", one, two, three, four, results);
+}
+
+// Formatting in Russian.
+// "\\u0438" is used before the last element, and all elements up to (but not including) the penultimate are followed by a comma.
+void ListFormatterTest::TestRussian() {
+    UnicodeString and_string = UnicodeString(" \\u0438 ", -1, US_INV).unescape();
+    UnicodeString results[4] = {
+        one,
+        one + and_string + two,
+        one + ", " + two + and_string + three,
+        one + ", " + two + ", " + three + and_string + four
+    };
+
+    CheckFourCases("ru", one, two, three, four, results);
+}
+
+// Formatting in Malayalam.
+// For two elements, "\\u0d15\\u0d42\\u0d1f\\u0d3e\\u0d24\\u0d46" is inserted in between.
+// For more than two elements, comma is inserted between all elements up to (and including) the penultimate,
+// and the word \\u0d0e\\u0d28\\u0d4d\\u0d28\\u0d3f\\u0d35 is inserted in the end.
+void ListFormatterTest::TestMalayalam() {
+    UnicodeString pair_string = UnicodeString(" \\u0d15\\u0d42\\u0d1f\\u0d3e\\u0d24\\u0d46 ", -1, US_INV).unescape();
+    UnicodeString total_string = UnicodeString(" \\u0d0e\\u0d28\\u0d4d\\u0d28\\u0d3f\\u0d35", -1, US_INV).unescape();
+    UnicodeString results[4] = {
+        one,
+        one + pair_string + two,
+        one + ", " + two + ", " + three + total_string,
+        one + ", " + two + ", " + three + ", " + four + total_string
+    };
+
+    CheckFourCases("ml", one, two, three, four, results);
+}
+
+// Formatting in Zulu.
+// "and" is used before the last element, and all elements up to (and including) the penultimate are followed by a comma.
+void ListFormatterTest::TestZulu() {
+    UnicodeString results[4] = {
+        one,
+        "I-" + one + " ne-" + two,
+        one + ", " + two + ", no-" + three,
+        one + ", " + two + ", " + three + ", no-" + four
+    };
+
+    CheckFourCases("zu", one, two, three, four, results);
+}
+
+void ListFormatterTest::runIndexedTest(int32_t index, UBool exec,
+                                       const char* &name, char* /*par */) {
+    switch(index) {
+        case 0: name = "TestRoot"; if (exec) TestRoot(); break;
+        case 1: name = "TestBogus"; if (exec) TestBogus(); break;
+        case 2: name = "TestEnglish"; if (exec) TestEnglish(); break;
+        case 3: name = "TestEnglishUS"; if (exec) TestEnglishUS(); break;
+        case 4: name = "TestRussian"; if (exec) TestRussian(); break;
+        case 5: name = "TestMalayalam"; if (exec) TestMalayalam(); break;
+        case 6: name = "TestZulu"; if (exec) TestZulu(); break;
+        case 7: name = "TestLocaleFallback"; if (exec) TestLocaleFallback(); break;
+
+        default: name = ""; break;
+    }
+}
--- a/icu4c/source/test/intltest/listformattertest.h
+++ b/icu4c/source/test/intltest/listformattertest.h
@ -0,0 +1,58 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  listformattertest.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2012aug27
+*   created by: Umesh P. Nair
+*/
+
+#ifndef __LISTFORMATTERTEST_H__
+#define __LISTFORMATTERTEST_H__
+
+#include "unicode/listformatter.h"
+#include "intltest.h"
+
+class ListFormatterTest : public IntlTest {
+  public:
+    ListFormatterTest();
+    virtual ~ListFormatterTest() {}
+
+    void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=0);
+
+    void TestLocaleFallback();
+    void TestRoot();
+    void TestBogus();
+    void TestEnglish();
+    void TestEnglishUS();
+    void TestRussian();
+    void TestMalayalam();
+    void TestZulu();
+
+  private:
+    void CheckFormatting(const ListFormatter* formatter, UnicodeString data[], int32_t data_size, const UnicodeString& expected_result);
+    void CheckFourCases(
+        const char* locale_string,
+        UnicodeString one,
+        UnicodeString two,
+        UnicodeString three,
+        UnicodeString four,
+        UnicodeString results[4]);
+
+  private:
+    // Reused test data.
+    const UnicodeString prefix;
+    const UnicodeString one;
+    const UnicodeString two;
+    const UnicodeString three;
+    const UnicodeString four;
+};
+
+#endif