From f5cd9984c6e6bb0ed4ac7423b2e7ca46411310bd Mon Sep 17 00:00:00 2001
From: Markus Scherer <markus.icu@gmail.com>
Date: Fri, 15 Feb 2013 22:11:33 +0000
Subject: [PATCH] ICU-9880 add ImmutableIndex, replace Chinese hacks and
 support zhuyin, handle index characters with multiple primary weights,
 lazy-create rarely-used objects, bug fixes, port other improvements from Java

X-SVN-Rev: 33245
---
 icu4c/source/common/uvector.cpp              |    8 +-
 icu4c/source/common/uvector.h                |    6 +-
 icu4c/source/i18n/alphaindex.cpp             | 1739 +++++++++---------
 icu4c/source/i18n/ucln_in.h                  |    1 -
 icu4c/source/i18n/unicode/alphaindex.h       |  405 ++--
 icu4c/source/test/intltest/alphaindextst.cpp |  302 ++-
 icu4c/source/test/intltest/alphaindextst.h   |   23 +-
 icu4c/source/test/intltest/intltest.cpp      |   32 +
 icu4c/source/test/intltest/intltest.h        |    6 +
 9 files changed, 1396 insertions(+), 1126 deletions(-)

diff --git a/icu4c/source/common/uvector.cpp b/icu4c/source/common/uvector.cpp
index a5adaa058ee..d8a4283dd1e 100644
--- a/icu4c/source/common/uvector.cpp
+++ b/icu4c/source/common/uvector.cpp
@@ -1,7 +1,7 @@
 /*
 ******************************************************************************
-* Copyright (C) 1999-2011, International Business Machines Corporation and   *
-* others. All Rights Reserved.                                               *
+* Copyright (C) 1999-2013, International Business Machines Corporation and
+* others. All Rights Reserved.
 ******************************************************************************
 *   Date        Name        Description
 *   10/22/99    alan        Creation.
@@ -552,12 +552,12 @@ void UVector::sort(UElementComparator *compare, UErrorCode &ec) {
 
 
 /**
- *  Sort with a user supplied comparator of type UComparator.
+ *  Stable sort with a user supplied comparator of type UComparator.
  */
 void UVector::sortWithUComparator(UComparator *compare, const void *context, UErrorCode &ec) {
     if (U_SUCCESS(ec)) {
         uprv_sortArray(elements, count, sizeof(UElement),
-                       compare, context, FALSE, &ec);
+                       compare, context, TRUE, &ec);
     }
 }
 
diff --git a/icu4c/source/common/uvector.h b/icu4c/source/common/uvector.h
index a04176266f4..29cda39f8f2 100644
--- a/icu4c/source/common/uvector.h
+++ b/icu4c/source/common/uvector.h
@@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 1999-2011, International Business Machines
+*   Copyright (C) 1999-2013, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   Date        Name        Description
@@ -243,9 +243,9 @@ public:
     void sort(UElementComparator *compare, UErrorCode &ec);
 
     /**
-     * Sort the contents of this vector using a caller-supplied function
+     * Stable sort the contents of this vector using a caller-supplied function
      * of type UComparator to do the comparison.  Provides more flexibility
-     * than uvector::sort() because an additional user-parameter can be passed to
+     * than UVector::sort() because an additional user parameter can be passed to
      * the comparison function.
      */
     void sortWithUComparator(UComparator *compare, const void *context, UErrorCode &ec);
diff --git a/icu4c/source/i18n/alphaindex.cpp b/icu4c/source/i18n/alphaindex.cpp
index 1d493745749..1b02e050880 100644
--- a/icu4c/source/i18n/alphaindex.cpp
+++ b/icu4c/source/i18n/alphaindex.cpp
@@ -1,140 +1,228 @@
 /*
 *******************************************************************************
-* Copyright (C) 2009-2013, International Business Machines Corporation and    *
-* others. All Rights Reserved.                                                *
+* Copyright (C) 2009-2013, International Business Machines Corporation and
+* others. All Rights Reserved.
 *******************************************************************************
 */
 
-/**
- * \file
- * \brief C API: AlphabeticIndex class
- */
-
 #include "unicode/utypes.h"
 
 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
 
 #include "unicode/alphaindex.h"
+#include "unicode/coleitr.h"
 #include "unicode/coll.h"
+#include "unicode/localpointer.h"
 #include "unicode/normalizer2.h"
-#include "unicode/strenum.h"
 #include "unicode/tblcoll.h"
 #include "unicode/ulocdata.h"
 #include "unicode/uniset.h"
 #include "unicode/uobject.h"
-#include "unicode/uscript.h"
 #include "unicode/usetiter.h"
-#include "unicode/ustring.h"
 #include "unicode/utf16.h"
 
+#include "cmemory.h"
 #include "cstring.h"
-#include "mutex.h"
 #include "uassert.h"
-#include "ucln_in.h"
-#include "uhash.h"
 #include "uvector.h"
 
 //#include <string>
 //#include <iostream>
+
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+
 U_NAMESPACE_BEGIN
 
-// Forward Declarations
-static int32_t U_CALLCONV
-PreferenceComparator(const void *context, const void *left, const void *right);
+namespace {
+
+/**
+ * Prefix string for Chinese index buckets.
+ * See http://unicode.org/repos/cldr/trunk/specs/ldml/tr35-collation.html#Collation_Indexes
+ */
+const UChar BASE[1] = { 0xFDD0 };
+const int32_t BASE_LENGTH = 1;
+
+UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
+                                const UnicodeString &one, const UnicodeString &other);
+
+}  // namespace
 
 static int32_t U_CALLCONV
-sortCollateComparator(const void *context, const void *left, const void *right);
+collatorComparator(const void *context, const void *left, const void *right);
 
 static int32_t U_CALLCONV
 recordCompareFn(const void *context, const void *left, const void *right);
 
-//  UVector<Bucket *> support function, delete a Bucket.
-static void U_CALLCONV
-alphaIndex_deleteBucket(void *obj) {
-    delete static_cast<AlphabeticIndex::Bucket *>(obj);
-}
-
 //  UVector<Record *> support function, delete a Record.
 static void U_CALLCONV
 alphaIndex_deleteRecord(void *obj) {
     delete static_cast<AlphabeticIndex::Record *>(obj);
 }
 
+namespace {
 
+UnicodeString *ownedString(const UnicodeString &s, LocalPointer<UnicodeString> &owned,
+                           UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return NULL; }
+    if (owned.isValid()) {
+        return owned.orphan();
+    }
+    UnicodeString *p = new UnicodeString(s);
+    if (p == NULL) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+    }
+    return p;
+}
 
-static const Normalizer2 *nfkdNormalizer;
+inline UnicodeString *getString(const UVector &list, int32_t i) {
+    return static_cast<UnicodeString *>(list[i]);
+}
 
-//
-//  Append the contents of a UnicodeSet to a UVector of UnicodeStrings.
-//  Append everything - individual characters are handled as strings of length 1.
-//  The destination vector owns the appended strings.
+inline AlphabeticIndex::Bucket *getBucket(const UVector &list, int32_t i) {
+    return static_cast<AlphabeticIndex::Bucket *>(list[i]);
+}
 
-static void appendUnicodeSetToUVector(UVector &dest, const UnicodeSet &source, UErrorCode &status) {
-    UnicodeSetIterator setIter(source);
-    while (setIter.next()) {
-        const UnicodeString &str = setIter.getString();
-        dest.addElement(str.clone(), status);
+inline AlphabeticIndex::Record *getRecord(const UVector &list, int32_t i) {
+    return static_cast<AlphabeticIndex::Record *>(list[i]);
+}
+
+/**
+ * Like Java Collections.binarySearch(List, String, Comparator).
+ *
+ * @return the index>=0 where the item was found,
+ *         or the index<0 for inserting the string at ~index in sorted order
+ */
+int32_t binarySearch(const UVector &list, const UnicodeString &s, const Collator &coll) {
+    if (list.size() == 0) { return ~0; }
+    int32_t start = 0;
+    int32_t limit = list.size();
+    for (;;) {
+        int32_t i = (start + limit) / 2;
+        const UnicodeString *si = static_cast<UnicodeString *>(list.elementAt(i));
+        UErrorCode errorCode = U_ZERO_ERROR;
+        UCollationResult cmp = coll.compare(s, *si, errorCode);
+        if (cmp == UCOL_EQUAL) {
+            return i;
+        } else if (cmp < 0) {
+            if (i == start) {
+                return ~start;  // insert s before *si
+            }
+            limit = i;
+        } else {
+            if (i == start) {
+                return ~(start + 1);  // insert s after *si
+            }
+            start = i;
+        }
     }
 }
 
+class BucketList : public UObject {
+public:
+    BucketList(UVector *bucketList, UVector *publicBucketList)
+            : bucketList_(bucketList), immutableVisibleList_(publicBucketList) {
+        int32_t displayIndex = 0;
+        for (int32_t i = 0; i < publicBucketList->size(); ++i) {
+            getBucket(*publicBucketList, i)->displayIndex_ = displayIndex++;
+        }
+    }
 
-AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status) {
-    init(status);
-    if (U_FAILURE(status)) {
-        return;
+    virtual ~BucketList() {
+        delete bucketList_;
+        if (immutableVisibleList_ != bucketList_) {
+            delete immutableVisibleList_;
+        }
     }
-    locale_ = locale;
-    langType_ = langTypeFromLocale(locale_);
 
-    collator_ = Collator::createInstance(locale, status);
-    if (collator_ != NULL) {
-        collatorPrimaryOnly_ = collator_->clone();
+    int32_t getBucketCount() const {
+        return immutableVisibleList_->size();
     }
-    if (collatorPrimaryOnly_ != NULL) {
-        collatorPrimaryOnly_->setStrength(Collator::PRIMARY);
+
+    int32_t getBucketIndex(const UnicodeString &name, const Collator &collatorPrimaryOnly,
+                           UErrorCode &errorCode) {
+        // binary search
+        int32_t start = 0;
+        int32_t limit = bucketList_->size();
+        while ((start + 1) < limit) {
+            int32_t i = (start + limit) / 2;
+            const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, i);
+            UCollationResult nameVsBucket =
+                collatorPrimaryOnly.compare(name, bucket->lowerBoundary_, errorCode);
+            if (nameVsBucket < 0) {
+                limit = i;
+            } else {
+                start = i;
+            }
+        }
+        const AlphabeticIndex::Bucket *bucket = getBucket(*bucketList_, start);
+        if (bucket->displayBucket_ != NULL) {
+            bucket = bucket->displayBucket_;
+        }
+        return bucket->displayIndex_;
     }
-    getIndexExemplars(*initialLabels_, locale, status);
-    indexBuildRequired_ = TRUE;
-    if ((collator_ == NULL || collatorPrimaryOnly_ == NULL) && U_SUCCESS(status)) {
-        status = U_MEMORY_ALLOCATION_ERROR;
+
+    /** All of the buckets, visible and invisible. */
+    UVector *bucketList_;
+    /** Just the visible buckets. */
+    UVector *immutableVisibleList_;
+};
+
+}  // namespace
+
+AlphabeticIndex::ImmutableIndex::~ImmutableIndex() {
+    delete buckets_;
+    delete collatorPrimaryOnly_;
+}
+
+int32_t
+AlphabeticIndex::ImmutableIndex::getBucketCount() const {
+    return buckets_->getBucketCount();
+}
+
+int32_t
+AlphabeticIndex::ImmutableIndex::getBucketIndex(
+        const UnicodeString &name, UErrorCode &errorCode) const {
+    return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, errorCode);
+}
+
+const AlphabeticIndex::Bucket *
+AlphabeticIndex::ImmutableIndex::getBucket(int32_t index) const {
+    if (0 <= index && index < buckets_->getBucketCount()) {
+        return icu::getBucket(*buckets_->immutableVisibleList_, index);
+    } else {
+        return NULL;
     }
-    firstScriptCharacters_ = firstStringsInScript(status);
+}
+
+AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status)
+        : inputList_(NULL),
+          labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
+          maxLabelCount_(99),
+          initialLabels_(NULL), firstCharsInScripts_(NULL),
+          collator_(NULL), collatorPrimaryOnly_(NULL),
+          buckets_(NULL) {
+    init(&locale, status);
 }
 
 
-AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status) {
-    init(status);
-    if (U_FAILURE(status)) {
-        return;
-    }
-    if (collator == NULL) {
-        status = U_ILLEGAL_ARGUMENT_ERROR;
-        return;
-    }
-    collator_ = collator;
-    collatorPrimaryOnly_ = collator_->clone();
-    if (collatorPrimaryOnly_ == NULL) {
-        status = U_MEMORY_ALLOCATION_ERROR;
-        return;
-    }
-    collatorPrimaryOnly_->setStrength(Collator::PRIMARY);
-    // Note: initialLabels_ is set to an empty UnicodeSet by init().
-    indexBuildRequired_ = TRUE;
-    firstScriptCharacters_ = firstStringsInScript(status);
+AlphabeticIndex::AlphabeticIndex(RuleBasedCollator *collator, UErrorCode &status)
+        : inputList_(NULL),
+          labelsIterIndex_(-1), itemsIterIndex_(0), currentBucket_(NULL),
+          maxLabelCount_(99),
+          initialLabels_(NULL), firstCharsInScripts_(NULL),
+          collator_(collator), collatorPrimaryOnly_(NULL),
+          buckets_(NULL) {
+    init(NULL, status);
 }
 
 
 
 AlphabeticIndex::~AlphabeticIndex() {
-    uhash_close(alreadyIn_);
-    delete bucketList_;
     delete collator_;
     delete collatorPrimaryOnly_;
-    delete firstScriptCharacters_;
-    delete labels_;
-    delete inputRecords_;
-    delete noDistinctSorting_;
-    delete notAlphabetic_;
+    delete firstCharsInScripts_;
+    delete buckets_;
+    delete inputList_;
     delete initialLabels_;
 }
 
@@ -144,319 +232,602 @@ AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorC
         return *this;
     }
     initialLabels_->addAll(additions);
+    clearBuckets();
     return *this;
 }
 
 
 AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
-    if (U_FAILURE(status)) {
-        return *this;
-    }
-    UnicodeSet additions;
-    getIndexExemplars(additions, locale, status);
-    initialLabels_->addAll(additions);
+    addIndexExemplars(locale, status);
+    clearBuckets();
     return *this;
 }
 
 
+AlphabeticIndex::ImmutableIndex *AlphabeticIndex::buildImmutableIndex(UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return NULL; }
+    // In C++, the ImmutableIndex must own its copy of the BucketList,
+    // even if it contains no records, for proper memory management.
+    // We could clone the buckets_ if they are not NULL,
+    // but that would be worth it only if this method is called multiple times,
+    // or called after using the old-style bucket iterator API.
+    LocalPointer<BucketList> immutableBucketList(createBucketList(errorCode));
+    LocalPointer<RuleBasedCollator> coll(
+        static_cast<RuleBasedCollator *>(collatorPrimaryOnly_->clone()));
+    if (immutableBucketList.isNull() || coll.isNull()) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+    ImmutableIndex *immIndex = new ImmutableIndex(immutableBucketList.getAlias(), coll.getAlias());
+    if (immIndex == NULL) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+    // The ImmutableIndex adopted its parameter objects.
+    immutableBucketList.orphan();
+    coll.orphan();
+    return immIndex;
+}
+
 int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
-    buildIndex(status);
+    initBuckets(status);
     if (U_FAILURE(status)) {
         return 0;
     }
-    return bucketList_->size();
+    return buckets_->getBucketCount();
 }
 
 
 int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
-    if (U_FAILURE(status)) {
+    if (U_FAILURE(status) || inputList_ == NULL) {
         return 0;
     }
-    return inputRecords_->size();
+    return inputList_->size();
 }
 
+void AlphabeticIndex::initLabels(UVector &indexCharacters, UErrorCode &errorCode) const {
+    const Normalizer2 *nfkdNormalizer = Normalizer2::getNFKDInstance(errorCode);
+    if (U_FAILURE(errorCode)) { return; }
 
-void AlphabeticIndex::buildIndex(UErrorCode &status) {
-    if (U_FAILURE(status)) {
-        return;
+    const UnicodeString &firstScriptBoundary = *getString(*firstCharsInScripts_, 0);
+    const UnicodeString &overflowBoundary =
+        *getString(*firstCharsInScripts_, firstCharsInScripts_->size() - 1);
+
+    // We make a sorted array of elements.
+    // Some of the input may be redundant.
+    // That is, we might have c, ch, d, where "ch" sorts just like "c", "h".
+    // We filter out those cases.
+    UnicodeSetIterator iter(*initialLabels_);
+    while (iter.next()) {
+        const UnicodeString *item = &iter.getString();
+        LocalPointer<UnicodeString> ownedItem;
+        UBool checkDistinct;
+        int32_t itemLength = item->length();
+        if (!item->hasMoreChar32Than(0, itemLength, 1)) {
+            checkDistinct = FALSE;
+        } else if(item->charAt(itemLength - 1) == 0x2a &&  // '*'
+                item->charAt(itemLength - 2) != 0x2a) {
+            // Use a label if it is marked with one trailing star,
+            // even if the label string sorts the same when all contractions are suppressed.
+            ownedItem.adoptInstead(new UnicodeString(*item, 0, itemLength - 1));
+            item = ownedItem.getAlias();
+            if (item == NULL) {
+                errorCode = U_MEMORY_ALLOCATION_ERROR;
+                return;
+            }
+            checkDistinct = FALSE;
+        } else {
+            checkDistinct = TRUE;
+        }
+        if (collatorPrimaryOnly_->compare(*item, firstScriptBoundary, errorCode) < 0) {
+            // Ignore a primary-ignorable or non-alphabetic index character.
+        } else if (collatorPrimaryOnly_->compare(*item, overflowBoundary, errorCode) >= 0) {
+            // Ignore an index characters that will land in the overflow bucket.
+        } else if (checkDistinct &&
+                collatorPrimaryOnly_->compare(*item, separated(*item), errorCode) == 0) {
+            // Ignore a multi-code point index character that does not sort distinctly
+            // from the sequence of its separate characters.
+        } else {
+            int32_t insertionPoint = binarySearch(indexCharacters, *item, *collatorPrimaryOnly_);
+            if (insertionPoint < 0) {
+                indexCharacters.insertElementAt(
+                    ownedString(*item, ownedItem, errorCode), ~insertionPoint, errorCode);
+            } else {
+                const UnicodeString &itemAlreadyIn = *getString(indexCharacters, insertionPoint);
+                if (isOneLabelBetterThanOther(*nfkdNormalizer, *item, itemAlreadyIn)) {
+                    indexCharacters.setElementAt(
+                        ownedString(*item, ownedItem, errorCode), insertionPoint);
+                }
+            }
+        }
     }
-    if (!indexBuildRequired_) {
-        return;
+    if (U_FAILURE(errorCode)) { return; }
+
+    // if the result is still too large, cut down to maxCount elements, by removing every nth element
+
+    int32_t size = indexCharacters.size() - 1;
+    if (size > maxLabelCount_) {
+        int32_t count = 0;
+        int32_t old = -1;
+        for (int32_t i = 0; i < indexCharacters.size();) {
+            ++count;
+            int32_t bump = count * maxLabelCount_ / size;
+            if (bump == old) {
+                indexCharacters.removeElementAt(i);
+            } else {
+                old = bump;
+                ++i;
+            }
+        }
     }
+}
 
-    // Discard any already-built data.
-    // This is important when the user builds and uses an index, then subsequently modifies it,
-    // necessitating a rebuild.
+namespace {
 
-    bucketList_->removeAllElements();
-    labels_->removeAllElements();
-    uhash_removeAll(alreadyIn_);
-    noDistinctSorting_->clear();
-    notAlphabetic_->clear();
+const UnicodeString &fixLabel(const UnicodeString &current, UnicodeString &temp) {
+    if (!current.startsWith(BASE, BASE_LENGTH)) {
+        return current;
+    }
+    UChar rest = current.charAt(BASE_LENGTH);
+    if (0x2800 < rest && rest <= 0x28FF) { // stroke count
+        int32_t count = rest-0x2800;
+        temp.setTo((UChar)(0x30 + count % 10));
+        if (count >= 10) {
+            count /= 10;
+            temp.insert(0, (UChar)(0x30 + count % 10));
+            if (count >= 10) {
+                count /= 10;
+                temp.insert(0, (UChar)(0x30 + count));
+            }
+        }
+        return temp.append((UChar)0x5283);
+    }
+    return temp.setTo(current, BASE_LENGTH);
+}
 
-    // first sort the incoming Labels, with a "best" ordering among items
-    // that are the same according to the collator
+UBool hasMultiplePrimaryWeights(
+        CollationElementIterator &cei, int32_t variableTop,
+        const UnicodeString &s, UErrorCode &errorCode) {
+    cei.setText(s, errorCode);
+    UBool seenPrimary = FALSE;
+    for (;;) {
+        int32_t ce32 = cei.next(errorCode);
+        if (ce32 == CollationElementIterator::NULLORDER) {
+            break;
+        }
+        int32_t p = CollationElementIterator::primaryOrder(ce32);
+        if (p > variableTop && (ce32 & 0xc0) != 0xc0) {
+            // not primary ignorable, and not a continuation CE
+            if (seenPrimary) {
+                return TRUE;
+            }
+            seenPrimary = TRUE;
+        }
+    }
+    return FALSE;
+}
 
-    UVector preferenceSorting(status);   // Vector of UnicodeStrings; owned by the vector.
-    preferenceSorting.setDeleter(uprv_deleteUObject);
-    appendUnicodeSetToUVector(preferenceSorting, *initialLabels_, status);
-    preferenceSorting.sortWithUComparator(PreferenceComparator, &status, status);
+}  // namespace
 
-    // We now make a set of Labels.
-    // Some of the input may, however, be redundant.
-    // That is, we might have c, ch, d, where "ch" sorts just like "c", "h"
-    // So we make a pass through, filtering out those cases.
-    // TODO: filtering these out would seem to be at odds with the eventual goal
-    //       of being able to split buckets that contain too many items.
+BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const {
+    // Initialize indexCharacters.
+    UVector indexCharacters(errorCode);
+    indexCharacters.setDeleter(uprv_deleteUObject);
+    initLabels(indexCharacters, errorCode);
+    if (U_FAILURE(errorCode)) { return NULL; }
 
-    UnicodeSet labelSet;
-    for (int32_t psIndex=0; psIndex<preferenceSorting.size(); psIndex++) {
-        UnicodeString item = *static_cast<const UnicodeString *>(preferenceSorting.elementAt(psIndex));
-        // TODO:  Since preferenceSorting was originally populated from the contents of a UnicodeSet,
-        //        is it even possible for duplicates to show up in this check?
-        if (labelSet.contains(item)) {
-            UnicodeSetIterator itemAlreadyInIter(labelSet);
-            while (itemAlreadyInIter.next()) {
-                const UnicodeString &itemAlreadyIn = itemAlreadyInIter.getString();
-                if (collatorPrimaryOnly_->compare(item, itemAlreadyIn) == 0) {
-                    UnicodeSet *targets = static_cast<UnicodeSet *>(uhash_get(alreadyIn_, &itemAlreadyIn));
-                    if (targets == NULL) {
-                        // alreadyIn.put(itemAlreadyIn, targets = new LinkedHashSet<String>());
-                        targets = new UnicodeSet();
-                        uhash_put(alreadyIn_, itemAlreadyIn.clone(), targets, &status);
+    // Variables for hasMultiplePrimaryWeights().
+    LocalPointer<CollationElementIterator> cei(
+        collatorPrimaryOnly_->createCollationElementIterator(emptyString_));
+    if (cei.isNull()) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+    int32_t variableTop;
+    if (collatorPrimaryOnly_->getAttribute(UCOL_ALTERNATE_HANDLING, errorCode) == UCOL_SHIFTED) {
+        variableTop = CollationElementIterator::primaryOrder(
+            (int32_t)collatorPrimaryOnly_->getVariableTop(errorCode));
+    } else {
+        variableTop = 0;
+    }
+    UBool hasInvisibleBuckets = FALSE;
+
+    // Helper arrays for Chinese Pinyin collation.
+    Bucket *asciiBuckets[26] = {
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
+    };
+    Bucket *pinyinBuckets[26] = {
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+        NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
+    };
+    UBool hasPinyin = FALSE;
+
+    LocalPointer<UVector> bucketList(new UVector(errorCode));
+    if (bucketList.isNull()) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+    bucketList->setDeleter(uprv_deleteUObject);
+
+    // underflow bucket
+    Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW);
+    if (bucket == NULL) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+    bucketList->addElement(bucket, errorCode);
+    if (U_FAILURE(errorCode)) { return NULL; }
+
+    UnicodeString temp;
+
+    // fix up the list, adding underflow, additions, overflow
+    // Insert inflow labels as needed.
+    int32_t scriptIndex = -1;
+    const UnicodeString *scriptUpperBoundary = &emptyString_;
+    for (int32_t i = 0; i < indexCharacters.size(); ++i) {
+        UnicodeString &current = *getString(indexCharacters, i);
+        if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) >= 0) {
+            // We crossed the script boundary into a new script.
+            const UnicodeString &inflowBoundary = *scriptUpperBoundary;
+            UBool skippedScript = FALSE;
+            for (;;) {
+                scriptUpperBoundary = getString(*firstCharsInScripts_, ++scriptIndex);
+                if (collatorPrimaryOnly_->compare(current, *scriptUpperBoundary, errorCode) < 0) {
+                    break;
+                }
+                skippedScript = TRUE;
+            }
+            if (skippedScript && bucketList->size() > 1) {
+                // We are skipping one or more scripts,
+                // and we are not just getting out of the underflow label.
+                bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW);
+                if (bucket == NULL) {
+                    errorCode = U_MEMORY_ALLOCATION_ERROR;
+                    return NULL;
+                }
+                bucketList->addElement(bucket, errorCode);
+            }
+        }
+        // Add a bucket with the current label.
+        bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL);
+        if (bucket == NULL) {
+            errorCode = U_MEMORY_ALLOCATION_ERROR;
+            return NULL;
+        }
+        bucketList->addElement(bucket, errorCode);
+        // Remember ASCII and Pinyin buckets for Pinyin redirects.
+        UChar c;
+        if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) {  // A-Z
+            asciiBuckets[c - 0x41] = bucket;
+        } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) &&
+                0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) {
+            pinyinBuckets[c - 0x41] = bucket;
+            hasPinyin = TRUE;
+        }
+        // Check for multiple primary weights.
+        if (!current.startsWith(BASE, BASE_LENGTH) &&
+                hasMultiplePrimaryWeights(*cei, variableTop, current, errorCode) &&
+                current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) {
+            // "AE-ligature" or "Sch" etc.
+            for (int32_t i = bucketList->size() - 2;; --i) {
+                Bucket *singleBucket = getBucket(*bucketList, i);
+                if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
+                    // There is no single-character bucket since the last
+                    // underflow or inflow label.
+                    break;
+                }
+                if (singleBucket->displayBucket_ == NULL &&
+                        !hasMultiplePrimaryWeights(
+                            *cei, variableTop, singleBucket->lowerBoundary_, errorCode)) {
+                    // Add an invisible bucket that redirects strings greater than the expansion
+                    // to the previous single-character bucket.
+                    // For example, after ... Q R S Sch we add Sch\uFFFF->S
+                    // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S.
+                    bucket = new Bucket(emptyString_,
+                        UnicodeString(current).append((UChar)0xFFFF),
+                        U_ALPHAINDEX_NORMAL);
+                    if (bucket == NULL) {
+                        errorCode = U_MEMORY_ALLOCATION_ERROR;
+                        return NULL;
                     }
-                    targets->add(item);
+                    bucket->displayBucket_ = singleBucket;
+                    bucketList->addElement(bucket, errorCode);
+                    hasInvisibleBuckets = TRUE;
                     break;
                 }
             }
-        } else if (item.moveIndex32(0, 1) < item.length() &&  // Label contains more than one code point.
-                   collatorPrimaryOnly_->compare(item, separated(item)) == 0) {
-            noDistinctSorting_->add(item);
-        } else if (!ALPHABETIC->containsSome(item)) {
-            notAlphabetic_->add(item);
-        } else {
-            labelSet.add(item);
         }
     }
-
-    // If we have no labels, hard-code a fallback default set of [A-Z]
-    // This case can occur with locales that don't have exemplar character data, including root.
-    // A no-labels situation will cause other problems; it needs to be avoided.
-    //
-    // TODO: This case should be handled by having an underflow label only.
-    if (labelSet.isEmpty()) {
-        labelSet.add((UChar32)0x41, (UChar32)0x5A);
+    if (U_FAILURE(errorCode)) { return NULL; }
+    if (bucketList->size() == 1) {
+        // No real labels, show only the underflow label.
+        BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
+        if (bl == NULL) {
+            errorCode = U_MEMORY_ALLOCATION_ERROR;
+            return NULL;
+        }
+        bucketList.orphan();
+        return bl;
     }
+    // overflow bucket
+    bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW);
+    if (bucket == NULL) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+    bucketList->addElement(bucket, errorCode); // final
 
-    // Move the set of Labels from the set into a vector, and sort
-    // according to the collator.
-
-    appendUnicodeSetToUVector(*labels_, labelSet, status);
-    labels_->sortWithUComparator(sortCollateComparator, collatorPrimaryOnly_, status);
-
-    // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
-    //    Implemented by copying the elements to be retained to a new UVector.
-
-    const int32_t size = labelSet.size() - 1;
-    if (size > maxLabelCount_) {
-        UVector *newLabels = new UVector(status);
-        newLabels->setDeleter(uprv_deleteUObject);
-        int32_t count = 0;
-        int32_t old = -1;
-        for (int32_t srcIndex=0; srcIndex<labels_->size(); srcIndex++) {
-            const UnicodeString *str = static_cast<const UnicodeString *>(labels_->elementAt(srcIndex));
-            ++count;
-            const int32_t bump = count * maxLabelCount_ / size;
-            if (bump == old) {
-                // it.remove();
-            } else {
-                newLabels->addElement(str->clone(), status);
-                old = bump;
+    if (hasPinyin) {
+        // Redirect Pinyin buckets.
+        Bucket *asciiBucket = NULL;
+        for (int32_t i = 0; i < 26; ++i) {
+            if (asciiBuckets[i] != NULL) {
+                asciiBucket = asciiBuckets[i];
+            }
+            if (pinyinBuckets[i] != NULL && asciiBucket != NULL) {
+                pinyinBuckets[i]->displayBucket_ = asciiBucket;
+                hasInvisibleBuckets = TRUE;
             }
         }
-        delete labels_;
-        labels_ = newLabels;
     }
 
-    // We now know the list of labels.
-    // Create a corresponding list of buckets, one per label.
-    
-    buildBucketList(status);    // Corresponds to Java BucketList constructor.
-
-    // Bin the Records into the Buckets.
-    bucketRecords(status);
-
-    indexBuildRequired_ = FALSE;
-    resetBucketIterator(status);
-}
-
-//
-//  buildBucketList()    Corresponds to the BucketList constructor in the Java version.
-
-void AlphabeticIndex::buildBucketList(UErrorCode &status) {
-    UnicodeString labelStr = getUnderflowLabel();
-    Bucket *b = new Bucket(labelStr, *EMPTY_STRING, U_ALPHAINDEX_UNDERFLOW, status);
-    bucketList_->addElement(b, status);
-
-    // Build up the list, adding underflow, additions, overflow
-    // insert infix labels as needed, using \uFFFF.
-    const UnicodeString *last = static_cast<UnicodeString *>(labels_->elementAt(0));
-    b = new Bucket(*last, *last, U_ALPHAINDEX_NORMAL, status);
-    bucketList_->addElement(b, status);
-
-    UnicodeSet lastSet;
-    UnicodeSet set;
-    AlphabeticIndex::getScriptSet(lastSet, *last, status);
-    lastSet.removeAll(*IGNORE_SCRIPTS);
-
-    for (int i = 1; i < labels_->size(); ++i) {
-        UnicodeString *current = static_cast<UnicodeString *>(labels_->elementAt(i));
-        getScriptSet(set, *current, status);
-        set.removeAll(*IGNORE_SCRIPTS);
-        if (lastSet.containsNone(set)) {
-            // check for adjacent
-            const UnicodeString &overflowComparisonString = getOverflowComparisonString(*last, status);
-            if (collatorPrimaryOnly_->compare(overflowComparisonString, *current) < 0) {
-                labelStr = getInflowLabel();
-                b = new Bucket(labelStr, overflowComparisonString, U_ALPHAINDEX_INFLOW, status);
-                bucketList_->addElement(b, status);
-                i++;
-                lastSet = set;
+    if (U_FAILURE(errorCode)) { return NULL; }
+    if (!hasInvisibleBuckets) {
+        BucketList *bl = new BucketList(bucketList.getAlias(), bucketList.getAlias());
+        if (bl == NULL) {
+            errorCode = U_MEMORY_ALLOCATION_ERROR;
+            return NULL;
+        }
+        bucketList.orphan();
+        return bl;
+    }
+    // Merge inflow buckets that are visually adjacent.
+    // Iterate backwards: Merge inflow into overflow rather than the other way around.
+    int32_t i = bucketList->size() - 1;
+    Bucket *nextBucket = getBucket(*bucketList, i);
+    while (--i > 0) {
+        bucket = getBucket(*bucketList, i);
+        if (bucket->displayBucket_ != NULL) {
+            continue;  // skip invisible buckets
+        }
+        if (bucket->labelType_ == U_ALPHAINDEX_INFLOW) {
+            if (nextBucket->labelType_ != U_ALPHAINDEX_NORMAL) {
+                bucket->displayBucket_ = nextBucket;
+                continue;
             }
         }
-        b = new Bucket(*current, *current, U_ALPHAINDEX_NORMAL, status);
-        bucketList_->addElement(b, status);
-        last = current;
-        lastSet = set;
+        nextBucket = bucket;
     }
-    const UnicodeString &limitString = getOverflowComparisonString(*last, status);
-    b = new Bucket(getOverflowLabel(), limitString, U_ALPHAINDEX_OVERFLOW, status);
-    bucketList_->addElement(b, status);
-    // final overflow bucket
+
+    LocalPointer<UVector> publicBucketList(new UVector(errorCode));
+    if (bucketList.isNull()) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+    // Do not call publicBucketList->setDeleter():
+    // This vector shares its objects with the bucketList.
+    for (int32_t i = 0; i < bucketList->size(); ++i) {
+        bucket = getBucket(*bucketList, i);
+        if (bucket->displayBucket_ == NULL) {
+            publicBucketList->addElement(bucket, errorCode);
+        }
+    }
+    if (U_FAILURE(errorCode)) { return NULL; }
+    BucketList *bl = new BucketList(bucketList.getAlias(), publicBucketList.getAlias());
+    if (bl == NULL) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+    bucketList.orphan();
+    publicBucketList.orphan();
+    return bl;
 }
 
-
-//
-//   Place all of the raw input records into the correct bucket.
-//
-//       Begin by sorting the input records; this lets us bin them in a single pass.
-//
-//       Note on storage management:  The input records are owned by the
-//       inputRecords_ vector, and will (eventually) be auto-deleted by it.
-//       The Bucket objects have pointers to the Record objects, but do not own them.
-//
-void AlphabeticIndex::bucketRecords(UErrorCode &status) {
-    if (U_FAILURE(status)) {
+/**
+ * Creates an index, and buckets and sorts the list of records into the index.
+ */
+void AlphabeticIndex::initBuckets(UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode) || buckets_ != NULL) {
+        return;
+    }
+    buckets_ = createBucketList(errorCode);
+    if (U_FAILURE(errorCode) || inputList_ == NULL || inputList_->isEmpty()) {
         return;
     }
 
-    inputRecords_->sortWithUComparator(recordCompareFn, collator_, status);
-    U_ASSERT(bucketList_->size() > 0);   // Should always have at least an overflow
-                                         //   bucket, even if no user labels.
-    int32_t bucketIndex = 0;
-    Bucket *destBucket = static_cast<Bucket *>(bucketList_->elementAt(bucketIndex));
-    Bucket *nextBucket = NULL;
-    if (bucketIndex+1 < bucketList_->size()) {
-        nextBucket = static_cast<Bucket *>(bucketList_->elementAt(bucketIndex+1));
-    }
-    int32_t recordIndex = 0;
-    Record *r = static_cast<Record *>(inputRecords_->elementAt(recordIndex));
-    while (recordIndex < inputRecords_->size()) {
-        if (nextBucket == NULL ||
-            collatorPrimaryOnly_->compare(r->sortingName_, nextBucket->lowerBoundary_) < 0) {
-                // Record goes in current bucket.  Advance to next record,
-                // stay on current bucket.
-                destBucket->records_->addElement(r, status);
-                ++recordIndex;
-                r = static_cast<Record *>(inputRecords_->elementAt(recordIndex));
-        } else {
-            // Advance to the next bucket, stay on current record.
-            bucketIndex++;
-            destBucket = nextBucket;
-            if (bucketIndex+1 < bucketList_->size()) {
-                nextBucket = static_cast<Bucket *>(bucketList_->elementAt(bucketIndex+1));
-            } else {
-                nextBucket = NULL;
-            }
-            U_ASSERT(destBucket != NULL);
-        }
-    }
+    // Sort the records by name.
+    // Stable sort preserves input order of collation duplicates.
+    inputList_->sortWithUComparator(recordCompareFn, collator_, errorCode);
 
+    // Now, we traverse all of the input, which is now sorted.
+    // If the item doesn't go in the current bucket, we find the next bucket that contains it.
+    // This makes the process order n*log(n), since we just sort the list and then do a linear process.
+    // However, if the user adds an item at a time and then gets the buckets, this isn't efficient, so
+    // we need to improve it for that case.
+
+    Bucket *currentBucket = getBucket(*buckets_->bucketList_, 0);
+    int32_t bucketIndex = 1;
+    Bucket *nextBucket;
+    const UnicodeString *upperBoundary;
+    if (bucketIndex < buckets_->bucketList_->size()) {
+        nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
+        upperBoundary = &nextBucket->lowerBoundary_;
+    } else {
+        nextBucket = NULL;
+        upperBoundary = NULL;
+    }
+    for (int32_t i = 0; i < inputList_->size(); ++i) {
+        Record *r = getRecord(*inputList_, i);
+        // if the current bucket isn't the right one, find the one that is
+        // We have a special flag for the last bucket so that we don't look any further
+        while (upperBoundary != NULL &&
+                collatorPrimaryOnly_->compare(r->name_, *upperBoundary, errorCode) >= 0) {
+            currentBucket = nextBucket;
+            // now reset the boundary that we compare against
+            if (bucketIndex < buckets_->bucketList_->size()) {
+                nextBucket = getBucket(*buckets_->bucketList_, bucketIndex++);
+                upperBoundary = &nextBucket->lowerBoundary_;
+            } else {
+                upperBoundary = NULL;
+            }
+        }
+        // now put the record into the bucket.
+        Bucket *bucket = currentBucket;
+        if (bucket->displayBucket_ != NULL) {
+            bucket = bucket->displayBucket_;
+        }
+        if (bucket->records_ == NULL) {
+            bucket->records_ = new UVector(errorCode);
+            if (bucket->records_ == NULL) {
+                errorCode = U_MEMORY_ALLOCATION_ERROR;
+                return;
+            }
+        }
+        bucket->records_->addElement(r, errorCode);
+    }
+}
+
+void AlphabeticIndex::clearBuckets() {
+    if (buckets_ != NULL) {
+        delete buckets_;
+        buckets_ = NULL;
+        internalResetBucketIterator();
+    }
+}
+
+void AlphabeticIndex::internalResetBucketIterator() {
+    labelsIterIndex_ = -1;
+    currentBucket_ = NULL;
 }
 
 
-void AlphabeticIndex::getIndexExemplars(UnicodeSet  &dest, const Locale &locale, UErrorCode &status) {
-    if (U_FAILURE(status)) {
-        return;
+void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
+    if (U_FAILURE(status)) { return; }
+    // Chinese index characters, which are specific to each of the several Chinese tailorings,
+    // take precedence over the single locale data exemplar set per language.
+    const char *language = locale.getLanguage();
+    if (uprv_strcmp(language, "zh") == 0 || uprv_strcmp(language, "ja") == 0 ||
+            uprv_strcmp(language, "ko") == 0) {
+        // TODO: This should be done regardless of the language, but it's expensive.
+        // We should add a Collator function (can be @internal)
+        // to enumerate just the contractions that start with a given code point or string.
+        if (addChineseIndexCharacters(status) || U_FAILURE(status)) {
+            return;
+        }
     }
 
     LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
+    if (U_FAILURE(status)) {
+        return;
+    }
+
     UnicodeSet exemplars;
     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
     if (U_SUCCESS(status)) {
-        dest.addAll(exemplars);
+        initialLabels_->addAll(exemplars);
         return;
     }
     status = U_ZERO_ERROR;  // Clear out U_MISSING_RESOURCE_ERROR
 
-    // Locale data did not include explicit Index characters.
+    // The locale data did not include explicit Index characters.
     // Synthesize a set of them from the locale's standard exemplar characters.
-
     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
     if (U_FAILURE(status)) {
         return;
     }
 
-    // Upper-case any that aren't already so.
-    //   (We only do this for synthesized index characters.)
-
-    UnicodeSetIterator it(exemplars);
-    UnicodeString upperC;
-    UnicodeSet  lowersToRemove;
-    UnicodeSet  uppersToAdd;
-    while (it.next()) {
-        const UnicodeString &exemplarC = it.getString();
-        upperC = exemplarC;
-        upperC.toUpper(locale);
-        if (exemplarC != upperC) {
-            lowersToRemove.add(exemplarC);
-            uppersToAdd.add(upperC);
-        }
-    }
-    exemplars.removeAll(lowersToRemove);
-    exemplars.addAll(uppersToAdd);
-
-    // get the exemplars, and handle special cases
-
     // question: should we add auxiliary exemplars?
-    if (exemplars.containsSome(*CORE_LATIN)) {
-        exemplars.addAll(*CORE_LATIN);
+    if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
+        exemplars.add(0x61, 0x7A);
     }
-    if (exemplars.containsSome(*HANGUL)) {
+    if (exemplars.containsSome(0xAC00, 0xD7A3)) {  // Hangul syllables
         // cut down to small list
-        UnicodeSet BLOCK_HANGUL_SYLLABLES(UNICODE_STRING_SIMPLE("[:block=hangul_syllables:]"), status);
-        exemplars.removeAll(BLOCK_HANGUL_SYLLABLES);
-        exemplars.addAll(*HANGUL);
+        exemplars.remove(0xAC00, 0xD7A3).
+            add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
+            add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
+            add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
+            add(0xD30C).add(0xD558);
     }
-    if (exemplars.containsSome(*ETHIOPIC)) {
+    if (exemplars.containsSome(0x1200, 0x137F)) {  // Ethiopic block
         // cut down to small list
         // make use of the fact that Ethiopic is allocated in 8's, where
         // the base is 0 mod 8.
-        UnicodeSetIterator  it(*ETHIOPIC);
+        UnicodeSet ethiopic(
+            UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
+        UnicodeSetIterator it(ethiopic);
         while (it.next() && !it.isString()) {
             if ((it.getCodepoint() & 0x7) != 0) {
                 exemplars.remove(it.getCodepoint());
             }
         }
     }
-    dest.addAll(exemplars);
+
+    // Upper-case any that aren't already so.
+    //   (We only do this for synthesized index characters.)
+    UnicodeSetIterator it(exemplars);
+    UnicodeString upperC;
+    while (it.next()) {
+        const UnicodeString &exemplarC = it.getString();
+        upperC = exemplarC;
+        upperC.toUpper(locale);
+        initialLabels_->add(upperC);
+    }
+}
+
+UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
+    UnicodeSet contractions;
+    ucol_getContractionsAndExpansions(collatorPrimaryOnly_->getUCollator(),
+                                      contractions.toUSet(), NULL, FALSE, &errorCode);
+    if (U_FAILURE(errorCode)) { return FALSE; }
+    UnicodeString firstHanBoundary;
+    UBool hasPinyin = FALSE;
+    UnicodeSetIterator iter(contractions);
+    while (iter.next()) {
+        const UnicodeString &s = iter.getString();
+        if (s.startsWith(BASE, BASE_LENGTH)) {
+            initialLabels_->add(s);
+            if (firstHanBoundary.isEmpty() ||
+                    collatorPrimaryOnly_->compare(s, firstHanBoundary, errorCode) < 0) {
+                firstHanBoundary = s;
+            }
+            UChar c = s.charAt(s.length() - 1);
+            if (0x41 <= c && c <= 0x5A) {  // A-Z
+                hasPinyin = TRUE;
+            }
+        }
+    }
+    if (hasPinyin) {
+        initialLabels_->add(0x41, 0x5A);  // A-Z
+    }
+    if (!firstHanBoundary.isEmpty()) {
+        // The hardcoded list of script boundaries includes U+4E00
+        // which is tailored to not be the first primary
+        // in all Chinese tailorings except "unihan".
+        // Replace U+4E00 with the first boundary string from the tailoring.
+        // TODO: This becomes obsolete when the root collator gets
+        // reliable script-first-primary mappings.
+        int32_t hanIndex = binarySearch(
+                *firstCharsInScripts_, UnicodeString((UChar)0x4E00), *collatorPrimaryOnly_);
+        if (hanIndex >= 0) {
+            UnicodeString *fh = new UnicodeString(firstHanBoundary);
+            if (fh == NULL) {
+                errorCode = U_MEMORY_ALLOCATION_ERROR;
+                return FALSE;
+            }
+            firstCharsInScripts_->setElementAt(fh, hanIndex);
+        }
+        return TRUE;
+    } else {
+        return FALSE;
+    }
 }
 
 
 /*
  * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
  */
-static const UChar32 CGJ = (UChar)0x034F;
+static const UChar CGJ = 0x034F;
 UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
     UnicodeString result;
     if (item.length() == 0) {
@@ -509,21 +880,21 @@ const UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
 
 AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
     inflowLabel_ = label;
-    indexBuildRequired_ = TRUE;
+    clearBuckets();
     return *this;
 }
 
 
 AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
     overflowLabel_ = label;
-    indexBuildRequired_ = TRUE;
+    clearBuckets();
     return *this;
 }
 
 
 AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
     underflowLabel_ = label;
-    indexBuildRequired_ = TRUE;
+    clearBuckets();
     return *this;
 }
 
@@ -542,213 +913,86 @@ AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UError
         return *this;
     }
     maxLabelCount_ = maxLabelCount;
-    if (maxLabelCount < bucketList_->size()) {
-        indexBuildRequired_ = TRUE;
-    }
+    clearBuckets();
     return *this;
 }
 
 
-const UnicodeString &AlphabeticIndex::getOverflowComparisonString(const UnicodeString &lowerLimit, UErrorCode &/*status*/) {
-    for (int32_t i=0; i<firstScriptCharacters_->size(); i++) {
-        const UnicodeString *s =
-                static_cast<const UnicodeString *>(firstScriptCharacters_->elementAt(i));
-        if (collator_->compare(*s, lowerLimit) > 0) {
-            return *s;
-        }
-    }
-    return *EMPTY_STRING;
-}
-
-UnicodeSet *AlphabeticIndex::getScriptSet(UnicodeSet &dest, const UnicodeString &codePoint, UErrorCode &status) {
-    if (U_FAILURE(status)) {
-        return &dest;
-    }
-    UChar32 cp = codePoint.char32At(0);
-    UScriptCode scriptCode = uscript_getScript(cp, &status);
-    dest.applyIntPropertyValue(UCHAR_SCRIPT, scriptCode, status);
-    return &dest;
-}
-
 //
 //  init() - Common code for constructors.
 //
 
-void AlphabeticIndex::init(UErrorCode &status) {
-    // Initialize statics if needed.
-    AlphabeticIndex::staticInit(status);
-
-    // Put the object into a known state so that the destructor will function.
-
-    alreadyIn_             = NULL;
-    bucketList_            = NULL;
-    collator_              = NULL;
-    collatorPrimaryOnly_   = NULL;
-    currentBucket_         = NULL;
-    firstScriptCharacters_ = NULL;
-    initialLabels_         = NULL;
-    indexBuildRequired_    = TRUE;
-    inputRecords_          = NULL;
-    itemsIterIndex_        = 0;
-    labels_                = NULL;
-    labelsIterIndex_       = 0;
-    maxLabelCount_         = 99;
-    noDistinctSorting_     = NULL;
-    notAlphabetic_         = NULL;
-    recordCounter_         = 0;
-
-    if (U_FAILURE(status)) {
+void AlphabeticIndex::init(const Locale *locale, UErrorCode &status) {
+    if (U_FAILURE(status)) { return; }
+    if (locale == NULL && collator_ == NULL) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
         return;
     }
-    alreadyIn_             = uhash_open(uhash_hashUnicodeString,    // Key Hash,
-                                        uhash_compareUnicodeString, // key Comparator,
-                                        NULL,                       // value Comparator
-                                        &status);
-    uhash_setKeyDeleter(alreadyIn_, uprv_deleteUObject);
-    uhash_setValueDeleter(alreadyIn_, uprv_deleteUObject);
 
-    bucketList_            = new UVector(status);
-    bucketList_->setDeleter(alphaIndex_deleteBucket);
-    labels_                = new UVector(status);
-    labels_->setDeleter(uprv_deleteUObject);
-    labels_->setComparer(uhash_compareUnicodeString);
-    inputRecords_          = new UVector(status);
-    inputRecords_->setDeleter(alphaIndex_deleteRecord);
-
-    noDistinctSorting_     = new UnicodeSet();
-    notAlphabetic_         = new UnicodeSet();
     initialLabels_         = new UnicodeSet();
+    if (initialLabels_ == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
 
-    inflowLabel_.remove();
-    inflowLabel_.append((UChar)0x2026);    // Ellipsis
+    inflowLabel_.setTo((UChar)0x2026);    // Ellipsis
     overflowLabel_ = inflowLabel_;
     underflowLabel_ = inflowLabel_;
 
-    // TODO:  check for memory allocation failures.
-}
-
-
-static  UBool  indexCharactersAreInitialized = FALSE;
-
-//  Index Characters Clean up function.  Delete statically allocated constant stuff.
-U_CDECL_BEGIN
-static UBool U_CALLCONV indexCharacters_cleanup(void) {
-    AlphabeticIndex::staticCleanup();
-    return TRUE;
-}
-U_CDECL_END
-
-void AlphabeticIndex::staticCleanup() {
-    delete ALPHABETIC;
-    ALPHABETIC = NULL;
-    delete HANGUL;
-    HANGUL = NULL;
-    delete ETHIOPIC;
-    ETHIOPIC = NULL;
-    delete CORE_LATIN;
-    CORE_LATIN = NULL;
-    delete IGNORE_SCRIPTS;
-    IGNORE_SCRIPTS = NULL;
-    delete TO_TRY;
-    TO_TRY = NULL;
-    delete UNIHAN;
-    UNIHAN = NULL;
-    delete EMPTY_STRING;
-    EMPTY_STRING = NULL;
-    nfkdNormalizer = NULL;  // ref to a singleton.  Do not delete.
-    indexCharactersAreInitialized = FALSE;
-}
-
-
-UnicodeSet *AlphabeticIndex::ALPHABETIC;
-UnicodeSet *AlphabeticIndex::HANGUL;
-UnicodeSet *AlphabeticIndex::ETHIOPIC;
-UnicodeSet *AlphabeticIndex::CORE_LATIN;
-UnicodeSet *AlphabeticIndex::IGNORE_SCRIPTS;
-UnicodeSet *AlphabeticIndex::TO_TRY;
-UnicodeSet *AlphabeticIndex::UNIHAN;
-const UnicodeString *AlphabeticIndex::EMPTY_STRING;
-
-//
-//  staticInit()    One-time initialization of constants.
-//                  Thread safe.  Called from constructors.
-//                  Mutex overhead is not a concern.  AlphabeticIndex constructors are
-//                  sufficiently heavy that the cost of the mutex check is not significant.
-
-void AlphabeticIndex::staticInit(UErrorCode &status) {
-    static UMutex IndexCharsInitMutex = U_MUTEX_INITIALIZER;
-
-    Mutex mutex(&IndexCharsInitMutex);
-    if (indexCharactersAreInitialized || U_FAILURE(status)) {
-        return;
-    }
-    UBool finishedInit = FALSE;
-
-    {
-        UnicodeString alphaString = UNICODE_STRING_SIMPLE("[[:alphabetic:]-[:mark:]]");
-        ALPHABETIC = new UnicodeSet(alphaString, status);
-        if (ALPHABETIC == NULL) {
-            goto err;
-        }
-
-        HANGUL = new UnicodeSet();
-        HANGUL->add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).add(0xB9C8).add(0xBC14).add(0xC0AC).
-                add(0xC544).add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).add(0xD30C).add(0xD558);
-        if (HANGUL== NULL) {
-            goto err;
-        }
-
-
-        UnicodeString EthiopicStr = UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]");
-        ETHIOPIC = new UnicodeSet(EthiopicStr, status);
-        if (ETHIOPIC == NULL) {
-            goto err;
-        }
-
-        CORE_LATIN = new UnicodeSet((UChar32)0x61, (UChar32)0x7a);  // ('a', 'z');
-        if (CORE_LATIN == NULL) {
-            goto err;
-        }
-
-        UnicodeString IgnoreStr= UNICODE_STRING_SIMPLE(
-                "[[:sc=Common:][:sc=inherited:][:script=Unknown:][:script=braille:]]");
-        IGNORE_SCRIPTS = new UnicodeSet(IgnoreStr, status);
-        IGNORE_SCRIPTS->freeze();
-        if (IGNORE_SCRIPTS == NULL) {
-            goto err;
-        }
-
-        UnicodeString nfcqcStr = UNICODE_STRING_SIMPLE("[:^nfcqc=no:]");
-        TO_TRY = new UnicodeSet(nfcqcStr, status);
-        if (TO_TRY == NULL) {
-            goto err;
-        }
-
-        UnicodeString unihanStr = UNICODE_STRING_SIMPLE("[:script=Hani:]");
-        UNIHAN = new UnicodeSet(unihanStr, status);
-        if (UNIHAN == NULL) {
-            goto err;
-        }
-
-        EMPTY_STRING = new UnicodeString();
-
-        nfkdNormalizer = Normalizer2::getNFKDInstance(status);
-        if (nfkdNormalizer == NULL) {
-            goto err;
+    if (collator_ == NULL) {
+        collator_ = static_cast<RuleBasedCollator *>(Collator::createInstance(*locale, status));
+        if (U_FAILURE(status)) { return; }
+        if (collator_ == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return;
         }
     }
-    finishedInit = TRUE;
-
-  err:
-    if (!finishedInit && U_SUCCESS(status)) {
+    collatorPrimaryOnly_ = static_cast<RuleBasedCollator *>(collator_->clone());
+    if (collatorPrimaryOnly_ == NULL) {
         status = U_MEMORY_ALLOCATION_ERROR;
-    }
-    if (U_FAILURE(status)) {
-        indexCharacters_cleanup();
         return;
     }
-    ucln_i18n_registerCleanup(UCLN_I18N_INDEX_CHARACTERS, indexCharacters_cleanup);
-    indexCharactersAreInitialized = TRUE;
+    collatorPrimaryOnly_->setAttribute(UCOL_STRENGTH, UCOL_PRIMARY, status);
+    firstCharsInScripts_ = firstStringsInScript(status);
+    if (U_FAILURE(status)) { return; }
+    firstCharsInScripts_->sortWithUComparator(collatorComparator, collatorPrimaryOnly_, status);
+    UnicodeString _4E00((UChar)0x4E00);
+    UnicodeString _1100((UChar)0x1100);
+    UnicodeString _1112((UChar)0x1112);
+    if (collatorPrimaryOnly_->compare(_4E00, _1112, status) <= 0 &&
+            collatorPrimaryOnly_->compare(_1100, _4E00, status) <= 0) {
+        // The standard Korean tailoring sorts Hanja (Han characters)
+        // as secondary differences from Hangul syllables.
+        // This makes U+4E00 not useful as a Han-script boundary.
+        // TODO: This becomes obsolete when the root collator gets
+        // reliable script-first-primary mappings.
+        int32_t hanIndex = binarySearch(
+                *firstCharsInScripts_, _4E00, *collatorPrimaryOnly_);
+        if (hanIndex >= 0) {
+            firstCharsInScripts_->removeElementAt(hanIndex);
+        }
+    }
+    // Guard against a degenerate collator where
+    // some script boundary strings are primary ignorable.
+    for (;;) {
+        if (U_FAILURE(status)) { return; }
+        if (firstCharsInScripts_->isEmpty()) {
+            // AlphabeticIndex requires some non-ignorable script boundary strings.
+            status = U_ILLEGAL_ARGUMENT_ERROR;
+            return;
+        }
+        if (collatorPrimaryOnly_->compare(
+                *static_cast<UnicodeString *>(firstCharsInScripts_->elementAt(0)),
+                emptyString_, status) == UCOL_EQUAL) {
+            firstCharsInScripts_->removeElementAt(0);
+        } else {
+            break;
+        }
+    }
+
+    if (locale != NULL) {
+        addIndexExemplars(*locale, status);
+    }
 }
 
 
@@ -756,12 +1000,11 @@ void AlphabeticIndex::staticInit(UErrorCode &status) {
 //  Comparison function for UVector<UnicodeString *> sorting with a collator.
 //
 static int32_t U_CALLCONV
-sortCollateComparator(const void *context, const void *left, const void *right) {
+collatorComparator(const void *context, const void *left, const void *right) {
     const UElement *leftElement = static_cast<const UElement *>(left);
     const UElement *rightElement = static_cast<const UElement *>(right);
     const UnicodeString *leftString  = static_cast<const UnicodeString *>(leftElement->pointer);
     const UnicodeString *rightString = static_cast<const UnicodeString *>(rightElement->pointer);
-    const Collator *col = static_cast<const Collator *>(context);
 
     if (leftString == rightString) {
         // Catches case where both are NULL
@@ -773,8 +1016,9 @@ sortCollateComparator(const void *context, const void *left, const void *right)
     if (rightString == NULL) {
         return -1;
     }
-    Collator::EComparisonResult r = col->compare(*leftString, *rightString);
-    return (int32_t) r;
+    const Collator *col = static_cast<const Collator *>(context);
+    UErrorCode errorCode = U_ZERO_ERROR;
+    return col->compare(*leftString, *rightString, errorCode);
 }
 
 //
@@ -787,130 +1031,68 @@ recordCompareFn(const void *context, const void *left, const void *right) {
     const AlphabeticIndex::Record *leftRec  = static_cast<const AlphabeticIndex::Record *>(leftElement->pointer);
     const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightElement->pointer);
     const Collator *col = static_cast<const Collator *>(context);
-
-    Collator::EComparisonResult r = col->compare(leftRec->sortingName_, rightRec->sortingName_);
-    if (r == Collator::EQUAL) {
-        if (leftRec->serialNumber_ < rightRec->serialNumber_) {
-            r = Collator::LESS;
-        } else if (leftRec->serialNumber_ > rightRec->serialNumber_) {
-            r = Collator::GREATER;
-        }
-    }
-    return (int32_t) r;
+    UErrorCode errorCode = U_ZERO_ERROR;
+    return col->compare(leftRec->name_, rightRec->name_, errorCode);
 }
 
 
-#if 0
-//
-//  First characters in scripts.
-//  Create a UVector whose contents are pointers to UnicodeStrings for the First Characters in each script.
-//  The vector is sorted according to this index's collation.
-//
-//  This code is too slow to use, so for now hard code the data.
-//    Hard coded implementation is follows.
-//
-UVector *AlphabeticIndex::firstStringsInScript(Collator *ruleBasedCollator, UErrorCode &status) {
-
-    if (U_FAILURE(status)) {
-        return NULL;
-    }
-
-    UnicodeString results[USCRIPT_CODE_LIMIT];
-    UnicodeString LOWER_A = UNICODE_STRING_SIMPLE("a");
-
-    UnicodeSetIterator siter(*TO_TRY);
-    while (siter.next()) {
-        const UnicodeString &current = siter.getString();
-        Collator::EComparisonResult r = ruleBasedCollator->compare(current, LOWER_A);
-        if (r < 0) {  // TODO fix; we only want "real" script characters, not
-                      // symbols.
-            continue;
-        }
-
-        int script = uscript_getScript(current.char32At(0), &status);
-        if (results[script].length() == 0) {
-            results[script] = current;
-        }
-        else if (ruleBasedCollator->compare(current, results[script]) < 0) {
-            results[script] = current;
-        }
-    }
-
-    UnicodeSet extras;
-    UnicodeSet expansions;
-    RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(ruleBasedCollator);
-    const UCollator *uRuleBasedCollator = rbc->getUCollator();
-    ucol_getContractionsAndExpansions(uRuleBasedCollator, extras.toUSet(), expansions.toUSet(), true, &status);
-    extras.addAll(expansions).removeAll(*TO_TRY);
-    if (extras.size() != 0) {
-        const Normalizer2 *normalizer = Normalizer2::getNFKCInstance(status);
-        UnicodeSetIterator extrasIter(extras);
-        while (extrasIter.next()) {
-            const UnicodeString &current = extrasIter.next();
-            if (!TO_TRY->containsAll(current))
-                continue;
-            if (!normalizer->isNormalized(current, status) ||
-                ruleBasedCollator->compare(current, LOWER_A) < 0) {
-                continue;
-            }
-            int script = uscript_getScript(current.char32At(0), &status);
-            if (results[script].length() == 0) {
-                results[script] = current;
-            } else if (ruleBasedCollator->compare(current, results[script]) < 0) {
-                results[script] = current;
-            }
-        }
-    }
-
-    UVector *dest = new UVector(status);
-    dest->setDeleter(uprv_deleteUObject);
-    for (uint32_t i = 0; i < sizeof(results) / sizeof(results[0]); ++i) {
-        if (results[i].length() > 0) {
-            dest->addElement(results[i].clone(), status);
-        }
-    }
-    dest->sortWithUComparator(sortCollateComparator, ruleBasedCollator, status);
-    return dest;
-}
-#endif
-
-
-//
-//  First characters in scripts.
-//  Create a UVector whose contents are pointers to UnicodeStrings for the First Characters in each script.
-//  The vector is sorted according to this index's collation.
-//
-//  It takes too much time to compute this from character properties, so hard code it for now.
-//  Character constants copied from corresponding declaration in ICU4J.
-//  See main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java
-
-static const UChar HACK_FIRST_CHARS_IN_SCRIPTS[] =  { 0x61, 0, 0x03B1, 0,
-            0x2C81, 0, 0x0430, 0, 0x2C30, 0, 0x10D0, 0, 0x0561, 0, 0x05D0, 0, 0xD802, 0xDD00, 0, 0x0800, 0, 0x0621, 0, 0x0710, 0,
-            0x0780, 0, 0x07CA, 0, 0x2D30, 0, 0x1200, 0, 0x0950, 0, 0x0985, 0, 0x0A74, 0, 0x0AD0, 0, 0x0B05, 0, 0x0BD0, 0,
-            0x0C05, 0, 0x0C85, 0, 0x0D05, 0, 0x0D85, 0,
-            0xAAF2, 0,  // Meetei Mayek
-            0xA800, 0, 0xA882, 0, 0xD804, 0xDC83, 0,
-            U16_LEAD(0x111C4), U16_TRAIL(0x111C4), 0,  // Sharada
-            U16_LEAD(0x11680), U16_TRAIL(0x11680), 0,  // Takri
-            0x1B83, 0,
-            0xD802, 0xDE00, 0, 0x0E01, 0,
-            0x0EDE, 0,  // Lao
-            0xAA80, 0, 0x0F40, 0, 0x1C00, 0, 0xA840, 0, 0x1900, 0, 0x1700, 0, 0x1720, 0,
-            0x1740, 0, 0x1760, 0, 0x1A00, 0, 0xA930, 0, 0xA90A, 0, 0x1000, 0,
-            U16_LEAD(0x11103), U16_TRAIL(0x11103), 0,  // Chakma
-            0x1780, 0, 0x1950, 0, 0x1980, 0, 0x1A20, 0,
-            0xAA00, 0, 0x1B05, 0, 0xA984, 0, 0x1880, 0, 0x1C5A, 0, 0x13A0, 0, 0x1401, 0, 0x1681, 0, 0x16A0, 0, 0xD803, 0xDC00, 0,
-            0xA500, 0, 0xA6A0, 0, 0x1100, 0, 0x3041, 0, 0x30A1, 0, 0x3105, 0, 0xA000, 0, 0xA4F8, 0,
-            U16_LEAD(0x16F00), U16_TRAIL(0x16F00), 0,  // Miao
-            0xD800, 0xDE80, 0,
-            0xD800, 0xDEA0, 0, 0xD802, 0xDD20, 0, 0xD800, 0xDF00, 0, 0xD800, 0xDF30, 0, 0xD801, 0xDC28, 0, 0xD801, 0xDC50, 0,
-            0xD801, 0xDC80, 0,
-            U16_LEAD(0x110D0), U16_TRAIL(0x110D0), 0,  // Sora Sompeng
-            0xD800, 0xDC00, 0, 0xD802, 0xDC00, 0, 0xD802, 0xDE60, 0, 0xD802, 0xDF00, 0, 0xD802, 0xDC40, 0,
-            0xD802, 0xDF40, 0, 0xD802, 0xDF60, 0, 0xD800, 0xDF80, 0, 0xD800, 0xDFA0, 0, 0xD808, 0xDC00, 0, 0xD80C, 0xDC00, 0,
-            U16_LEAD(0x109A0), U16_TRAIL(0x109A0), 0,  // Meroitic Cursive
-            U16_LEAD(0x10980), U16_TRAIL(0x10980), 0,  // Meroitic Hieroglyphs
-            0x4E00, 0 };
+/**
+ * This list contains one character per script that has the
+ * lowest primary weight for that script in the root collator.
+ * This list will be copied and sorted to account for script reordering.
+ *
+ * <p>TODO: This is fragile. If the first character of a script is tailored
+ * so that it does not map to the script's lowest primary weight any more,
+ * then the buckets will be off.
+ * There are hacks in the code to handle the known CJK tailorings of U+4E00.
+ *
+ * <p>We use "A" not "a" because the en_US_POSIX tailoring sorts A primary-before a.
+ *
+ * Keep this in sync with HACK_FIRST_CHARS_IN_SCRIPTS in
+ * ICU4J main/classes/collate/src/com/ibm/icu/text/AlphabeticIndex.java
+ */
+static const UChar HACK_FIRST_CHARS_IN_SCRIPTS[] =  {
+    0x41, 0, 0x03B1, 0,
+    0x2C81, 0, 0x0430, 0, 0x2C30, 0, 0x10D0, 0, 0x0561, 0, 0x05D0, 0, 0xD802, 0xDD00, 0, 0x0800, 0, 0x0621, 0, 0x0710, 0,
+    0x0780, 0, 0x07CA, 0, 0x2D30, 0, 0x1200, 0, 0x0950, 0, 0x0985, 0, 0x0A74, 0, 0x0AD0, 0, 0x0B05, 0, 0x0BD0, 0,
+    0x0C05, 0, 0x0C85, 0, 0x0D05, 0, 0x0D85, 0,
+    0xAAF2, 0,  // Meetei Mayek
+    0xA800, 0, 0xA882, 0, 0xD804, 0xDC83, 0,
+    U16_LEAD(0x111C4), U16_TRAIL(0x111C4), 0,  // Sharada
+    U16_LEAD(0x11680), U16_TRAIL(0x11680), 0,  // Takri
+    0x1B83, 0,
+    0xD802, 0xDE00, 0, 0x0E01, 0,
+    0x0EDE, 0,  // Lao
+    0xAA80, 0, 0x0F40, 0, 0x1C00, 0, 0xA840, 0, 0x1900, 0, 0x1700, 0, 0x1720, 0,
+    0x1740, 0, 0x1760, 0, 0x1A00, 0, 0xA930, 0, 0xA90A, 0, 0x1000, 0,
+    U16_LEAD(0x11103), U16_TRAIL(0x11103), 0,  // Chakma
+    0x1780, 0, 0x1950, 0, 0x1980, 0, 0x1A20, 0,
+    0xAA00, 0, 0x1B05, 0, 0xA984, 0, 0x1880, 0, 0x1C5A, 0, 0x13A0, 0, 0x1401, 0, 0x1681, 0, 0x16A0, 0, 0xD803, 0xDC00, 0,
+    0xA500, 0, 0xA6A0, 0, 0x1100, 0, 0x3041, 0, 0x30A1, 0, 0x3105, 0, 0xA000, 0, 0xA4F8, 0,
+    U16_LEAD(0x16F00), U16_TRAIL(0x16F00), 0,  // Miao
+    0xD800, 0xDE80, 0,
+    0xD800, 0xDEA0, 0, 0xD802, 0xDD20, 0, 0xD800, 0xDF00, 0, 0xD800, 0xDF30, 0, 0xD801, 0xDC28, 0, 0xD801, 0xDC50, 0,
+    0xD801, 0xDC80, 0,
+    U16_LEAD(0x110D0), U16_TRAIL(0x110D0), 0,  // Sora Sompeng
+    0xD800, 0xDC00, 0, 0xD802, 0xDC00, 0, 0xD802, 0xDE60, 0, 0xD802, 0xDF00, 0, 0xD802, 0xDC40, 0,
+    0xD802, 0xDF40, 0, 0xD802, 0xDF60, 0, 0xD800, 0xDF80, 0, 0xD800, 0xDFA0, 0, 0xD808, 0xDC00, 0, 0xD80C, 0xDC00, 0,
+    U16_LEAD(0x109A0), U16_TRAIL(0x109A0), 0,  // Meroitic Cursive
+    U16_LEAD(0x10980), U16_TRAIL(0x10980), 0,  // Meroitic Hieroglyphs
+    0x4E00, 0,
+    // TODO: The overflow bucket's lowerBoundary string should be the
+    // first item after the last reordering group in the collator's script order.
+    // This should normally be the first Unicode code point
+    // that is unassigned (U+0378 in Unicode 6.3) and untailored.
+    // However, at least up to ICU 51 the Hani reordering group includes
+    // unassigned code points,
+    // and there is no stable string for the start of the trailing-weights range.
+    // The only known string that sorts "high" is U+FFFF.
+    // When ICU separates Hani vs. unassigned reordering groups, we need to fix this,
+    // and fix relevant test code.
+    // Ideally, FractionalUCA.txt will have a "script first primary"
+    // for unassigned code points.
+    0xFFFF, 0
+};
 
 UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
     if (U_FAILURE(status)) {
@@ -918,14 +1100,12 @@ UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
     }
     UVector *dest = new UVector(status);
     if (dest == NULL) {
-        if (U_SUCCESS(status)) {
-            status = U_MEMORY_ALLOCATION_ERROR;
-        }
+        status = U_MEMORY_ALLOCATION_ERROR;
         return NULL;
     }
     dest->setDeleter(uprv_deleteUObject);
     const UChar *src  = HACK_FIRST_CHARS_IN_SCRIPTS;
-    const UChar *limit = src + sizeof(HACK_FIRST_CHARS_IN_SCRIPTS) / sizeof(HACK_FIRST_CHARS_IN_SCRIPTS[0]);
+    const UChar *limit = src + LENGTHOF(HACK_FIRST_CHARS_IN_SCRIPTS);
     do {
         if (U_FAILURE(status)) {
             return dest;
@@ -933,225 +1113,41 @@ UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
         UnicodeString *str = new UnicodeString(src, -1);
         if (str == NULL) {
             status = U_MEMORY_ALLOCATION_ERROR;
-        } else {
-            dest->addElement(str, status);
-            src += str->length() + 1;
+            return dest;
         }
+        dest->addElement(str, status);
+        src += str->length() + 1;
     } while (src < limit);
-    dest->sortWithUComparator(sortCollateComparator, collator_, status);
     return dest;
 }
 
 
-AlphabeticIndex::ELangType AlphabeticIndex::langTypeFromLocale(const Locale &loc) {
-    const char *lang = loc.getLanguage();
-    if (uprv_strcmp(lang, "zh") != 0) {
-        return kNormal;
-    }
-    const char *script = loc.getScript();
-    if (uprv_strcmp(script, "Hant") == 0) {
-        return kTraditional;
-    }
-    const char *country = loc.getCountry();
-    if (uprv_strcmp(country, "TW") == 0) {
-        return kTraditional;
-    }
-    return kSimplified;
-}
-
-
-//
-// Pinyin Hacks.  Direct port from Java.
-//
-
-static const UChar32  probeCharInLong = 0x28EAD;
-
-
-static const UChar PINYIN_LOWER_BOUNDS_SHORT[] = {      // "\u0101bcd\u0113fghjkl\u1E3F\u0144\u014Dpqrstwxyz"
-            0x0101, 0x62, 0x63, 0x64, 0x0113, 0x66, 0x67, 0x68, 0x6A, 0x6B, /*l*/0x6C, 0x1E3F, 0x0144, 0x014D,
-            /*p*/0x70, 0x71, 0x72, 0x73, 0x74, /*w*/0x77, 0x78, 0x79, 0x7A};
-
-
-// Pinyin lookup tables copied, pasted (and reformatted) from the ICU4J code.
-
-AlphabeticIndex::PinyinLookup AlphabeticIndex::HACK_PINYIN_LOOKUP_SHORT = {
-        {(UChar)0,      (UChar)0, (UChar)0}, // A 
-        {(UChar)0x516B, (UChar)0, (UChar)0}, // B 
-        {(UChar)0x5693, (UChar)0, (UChar)0}, // C 
-        {(UChar)0x5491, (UChar)0, (UChar)0}, // D 
-        {(UChar)0x59B8, (UChar)0, (UChar)0}, // E 
-        {(UChar)0x53D1, (UChar)0, (UChar)0}, // F 
-        {(UChar)0x65EE, (UChar)0, (UChar)0}, // G 
-        {(UChar)0x54C8, (UChar)0, (UChar)0}, // H 
-        {(UChar)0x4E0C, (UChar)0, (UChar)0}, // J 
-        {(UChar)0x5494, (UChar)0, (UChar)0}, // K 
-        {(UChar)0x5783, (UChar)0, (UChar)0}, // L 
-        {(UChar)0x5452, (UChar)0, (UChar)0}, // M 
-        {(UChar)0x5514, (UChar)0, (UChar)0}, // N 
-        {(UChar)0x5594, (UChar)0, (UChar)0}, // O 
-        {(UChar)0x5991, (UChar)0, (UChar)0}, // P 
-        {(UChar)0x4E03, (UChar)0, (UChar)0}, // Q 
-        {(UChar)0x513F, (UChar)0, (UChar)0}, // R 
-        {(UChar)0x4EE8, (UChar)0, (UChar)0}, // S 
-        {(UChar)0x4ED6, (UChar)0, (UChar)0}, // T 
-        {(UChar)0x7A75, (UChar)0, (UChar)0}, // W 
-        {(UChar)0x5915, (UChar)0, (UChar)0}, // X 
-        {(UChar)0x4E2B, (UChar)0, (UChar)0}, // Y 
-        {(UChar)0x5E00, (UChar)0, (UChar)0}, // Z 
-        {(UChar)0xFFFF, (UChar)0, (UChar)0}, // mark end of array 
-    };
-
-static const UChar PINYIN_LOWER_BOUNDS_LONG[] = {   // "\u0101bcd\u0113fghjkl\u1E3F\u0144\u014Dpqrstwxyz";
-            0x0101, 0x62, 0x63, 0x64, 0x0113, 0x66, 0x67, 0x68, 0x6A, 0x6B, /*l*/0x6C, 0x1E3F, 0x0144, 0x014D,
-            /*p*/0x70, 0x71, 0x72, 0x73, 0x74, /*w*/0x77, 0x78, 0x79, 0x7A};
-
-AlphabeticIndex::PinyinLookup AlphabeticIndex::HACK_PINYIN_LOOKUP_LONG = {
-        {(UChar)0,      (UChar)0,      (UChar)0}, // A
-        {(UChar)0x516B, (UChar)0,      (UChar)0}, // b 
-        {(UChar)0xD863, (UChar)0xDEAD, (UChar)0}, // c 
-        {(UChar)0xD844, (UChar)0xDE51, (UChar)0}, // d 
-        {(UChar)0x59B8, (UChar)0,      (UChar)0}, // e 
-        {(UChar)0x53D1, (UChar)0,      (UChar)0}, // f 
-        {(UChar)0xD844, (UChar)0xDE45, (UChar)0}, // g 
-        {(UChar)0x54C8, (UChar)0,      (UChar)0}, // h 
-        {(UChar)0x4E0C, (UChar)0,      (UChar)0}, // j 
-        {(UChar)0x5494, (UChar)0,      (UChar)0}, // k 
-        {(UChar)0x3547, (UChar)0,      (UChar)0}, // l 
-        {(UChar)0x5452, (UChar)0,      (UChar)0}, // m 
-        {(UChar)0x5514, (UChar)0,      (UChar)0}, // n 
-        {(UChar)0x5594, (UChar)0,      (UChar)0}, // o 
-        {(UChar)0xD84F, (UChar)0xDC7A, (UChar)0}, // p 
-        {(UChar)0x4E03, (UChar)0,      (UChar)0}, // q 
-        {(UChar)0x513F, (UChar)0,      (UChar)0}, // r 
-        {(UChar)0x4EE8, (UChar)0,      (UChar)0}, // s 
-        {(UChar)0x4ED6, (UChar)0,      (UChar)0}, // t 
-        {(UChar)0x7A75, (UChar)0,      (UChar)0}, // w 
-        {(UChar)0x5915, (UChar)0,      (UChar)0}, // x 
-        {(UChar)0x4E2B, (UChar)0,      (UChar)0}, // y 
-        {(UChar)0x5E00, (UChar)0,      (UChar)0}, // z 
-        {(UChar)0xFFFF, (UChar)0,      (UChar)0}, // mark end of array 
-    };
-
-
-//
-//  Probe the collation data, and decide which Pinyin tables should be used
-//
-//  ICU can be built with a choice between two Chinese collations.
-//  The hack Pinyin tables to use depend on which one is in use.
-//  We can assume that any given copy of ICU will have only one of the collations available,
-//  and that there is no way, in a given process, to create two alphabetic indexes using
-//  different Chinese collations.  Which means the probe can be done once
-//  and the results cached.
-//
-//  This whole arrangement is temporary.
-//
-AlphabeticIndex::PinyinLookup *AlphabeticIndex::HACK_PINYIN_LOOKUP  = NULL;
-const UChar  *AlphabeticIndex::PINYIN_LOWER_BOUNDS = NULL;
-
-void AlphabeticIndex::initPinyinBounds(const Collator *col, UErrorCode &status) {
-    {
-        Mutex m;
-        if (PINYIN_LOWER_BOUNDS != NULL) {
-            return;
-        }
-    }
-    UnicodeSet *colSet = col->getTailoredSet(status);
-    if (U_FAILURE(status) || colSet == NULL) {
-        delete colSet;
-        if (U_SUCCESS(status))  {
-            status = U_MEMORY_ALLOCATION_ERROR;
-        }
-        return;
-    }
-    UBool useLongTables = colSet->contains(probeCharInLong);
-    delete colSet;
-    {
-        Mutex m;
-        if (useLongTables) {
-            PINYIN_LOWER_BOUNDS = PINYIN_LOWER_BOUNDS_LONG;
-            HACK_PINYIN_LOOKUP  = &HACK_PINYIN_LOOKUP_LONG;
-        } else {
-            PINYIN_LOWER_BOUNDS = PINYIN_LOWER_BOUNDS_SHORT;
-            HACK_PINYIN_LOOKUP  = &HACK_PINYIN_LOOKUP_SHORT;
-        }
-    }
-}
-
-// Pinyin Hack:
-//    Modify a Chinese name by prepending a Latin letter.  The modified name is used
-//      when putting records (names) into buckets, to put the name under a Latin index heading.
-
-void AlphabeticIndex::hackName(UnicodeString &dest, const UnicodeString &name, const Collator *col) {
-
-    if (langType_ != kSimplified || !UNIHAN->contains(name.char32At(0))) {
-        dest = name;
-        return;
-    }
-
-    UErrorCode status = U_ZERO_ERROR;
-    initPinyinBounds(col, status);
-    if (U_FAILURE(status)) {
-        dest = name;
-        return;
-    }
-    // TODO:  use binary search
-    int index;
-    for (index=0; ; index++) {
-        if ((*HACK_PINYIN_LOOKUP)[index][0] == (UChar)0xffff) {
-            index--;
-            break;
-        }
-        int32_t compareResult = col->compare(name, UnicodeString(TRUE, (*HACK_PINYIN_LOOKUP)[index], -1));
-        if (compareResult < 0) {
-            index--;
-        }
-        if (compareResult <= 0) {
-            break;
-        }
-    }
-    UChar c = PINYIN_LOWER_BOUNDS[index];
-    dest.setTo(c);
-    dest.append(name);
-    return;
-}
-
-
+namespace {
 
 /**
- * Comparator that returns "better" items first, where shorter NFKD is better, and otherwise NFKD binary order is
- * better, and otherwise binary order is better.
- *
- * For use with array sort or UVector.
- * @param context  A UErrorCode pointer.
- * @param left     A UElement pointer, which must refer to a UnicodeString *
- * @param right    A UElement pointer, which must refer to a UnicodeString *
+ * Returns true if one index character string is "better" than the other.
+ * Shorter NFKD is better, and otherwise NFKD-binary-less-than is
+ * better, and otherwise binary-less-than is better.
  */
-
-static int32_t U_CALLCONV
-PreferenceComparator(const void *context, const void *left, const void *right) {
-    const UElement *leftElement  = static_cast<const UElement *>(left);
-    const UElement *rightElement = static_cast<const UElement *>(right);
-    const UnicodeString *s1  = static_cast<const UnicodeString *>(leftElement->pointer);
-    const UnicodeString *s2  = static_cast<const UnicodeString *>(rightElement->pointer);
-    UErrorCode &status       = *(UErrorCode *)(context);   // Cast off both static and const.
-    if (s1 == s2) {
-        return 0;
-    }
-
-    UnicodeString n1 = nfkdNormalizer->normalize(*s1, status);
-    UnicodeString n2 = nfkdNormalizer->normalize(*s2, status);
-    int32_t result = n1.length() - n2.length();
+UBool isOneLabelBetterThanOther(const Normalizer2 &nfkdNormalizer,
+                                const UnicodeString &one, const UnicodeString &other) {
+    // This is called with primary-equal strings, but never with one.equals(other).
+    UErrorCode status = U_ZERO_ERROR;
+    UnicodeString n1 = nfkdNormalizer.normalize(one, status);
+    UnicodeString n2 = nfkdNormalizer.normalize(other, status);
+    if (U_FAILURE(status)) { return FALSE; }
+    int32_t result = n1.countChar32() - n2.countChar32();
     if (result != 0) {
-        return result;
+        return result < 0;
     }
-
     result = n1.compareCodePointOrder(n2);
     if (result != 0) {
-        return result;
+        return result < 0;
     }
-    return s1->compareCodePointOrder(*s2);
+    return one.compareCodePointOrder(other) < 0;
 }
 
+}  // namespace
 
 //
 //  Constructor & Destructor for AlphabeticIndex::Record
@@ -1159,14 +1155,9 @@ PreferenceComparator(const void *context, const void *left, const void *right) {
 //     Records are internal only, instances are not directly surfaced in the public API.
 //     This class is mostly struct-like, with all public fields.
 
-AlphabeticIndex::Record::Record(AlphabeticIndex *alphaIndex, const UnicodeString &name, const void *data):
-    alphaIndex_(alphaIndex), name_(name), data_(data) 
-{
-    UnicodeString prefixedName;
-    alphaIndex->hackName(sortingName_, name_, alphaIndex->collatorPrimaryOnly_);
-    serialNumber_ = ++alphaIndex->recordCounter_;
-}
-    
+AlphabeticIndex::Record::Record(const UnicodeString &name, const void *data)
+        : name_(name), data_(data) {}
+
 AlphabeticIndex::Record::~Record() {
 }
 
@@ -1175,9 +1166,21 @@ AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const vo
     if (U_FAILURE(status)) {
         return *this;
     }
-    Record *r = new Record(this, name, data);
-    inputRecords_->addElement(r, status);
-    indexBuildRequired_ = TRUE;
+    if (inputList_ == NULL) {
+        inputList_ = new UVector(status);
+        if (inputList_ == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return *this;
+        }
+        inputList_->setDeleter(alphaIndex_deleteRecord);
+    }
+    Record *r = new Record(name, data);
+    if (r == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return *this;
+    }
+    inputList_->addElement(r, status);
+    clearBuckets();
     //std::string ss;
     //std::string ss2;
     //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" << 
@@ -1187,40 +1190,19 @@ AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const vo
 
 
 AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
-    if (U_FAILURE(status)) {
-        return *this;
+    if (U_SUCCESS(status) && inputList_ != NULL && !inputList_->isEmpty()) {
+        inputList_->removeAllElements();
+        clearBuckets();
     }
-    inputRecords_->removeAllElements();
-    indexBuildRequired_ = TRUE;
     return *this;
 }
 
-
 int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
-    buildIndex(status);
+    initBuckets(status);
     if (U_FAILURE(status)) {
         return 0;
     }
-
-    // For simplified Chinese prepend a prefix to the name.
-    //   For non-Chinese locales or non-Chinese names, the name is not modified.
-
-    UnicodeString prefixedName;
-    hackName(prefixedName, name, collatorPrimaryOnly_);
-
-    // TODO:  use a binary search.
-    for (int32_t i = 0; i < bucketList_->size(); ++i) {
-        Bucket *bucket = static_cast<Bucket *>(bucketList_->elementAt(i));
-        Collator::EComparisonResult comp = collatorPrimaryOnly_->compare(prefixedName, bucket->lowerBoundary_);
-        if (comp < 0) {
-            return i - 1;
-        }
-    }
-    // Loop runs until we find the bucket following the one that would hold prefixedName.
-    // If the prefixedName belongs in the last bucket the loop will drop out the bottom rather
-    //  than returning from the middle.
-
-    return bucketList_->size() - 1;
+    return buckets_->getBucketIndex(name, *collatorPrimaryOnly_, status);
 }
 
 
@@ -1233,20 +1215,20 @@ UBool AlphabeticIndex::nextBucket(UErrorCode &status) {
     if (U_FAILURE(status)) {
         return FALSE;
     }
-    if (indexBuildRequired_ && currentBucket_ != NULL) {
+    if (buckets_ == NULL && currentBucket_ != NULL) {
         status = U_ENUM_OUT_OF_SYNC_ERROR;
         return FALSE;
     }
-    buildIndex(status);
+    initBuckets(status);
     if (U_FAILURE(status)) {
         return FALSE;
     }
     ++labelsIterIndex_;
-    if (labelsIterIndex_ >= bucketList_->size()) {
-        labelsIterIndex_ = bucketList_->size();
+    if (labelsIterIndex_ >= buckets_->getBucketCount()) {
+        labelsIterIndex_ = buckets_->getBucketCount();
         return FALSE;
     }
-    currentBucket_ = static_cast<Bucket *>(bucketList_->elementAt(labelsIterIndex_));
+    currentBucket_ = getBucket(*buckets_->immutableVisibleList_, labelsIterIndex_);
     resetRecordIterator();
     return TRUE;
 }
@@ -1255,7 +1237,7 @@ const UnicodeString &AlphabeticIndex::getBucketLabel() const {
     if (currentBucket_ != NULL) {
         return currentBucket_->label_;
     } else {
-        return *EMPTY_STRING;
+        return emptyString_;
     }
 }
 
@@ -1270,7 +1252,7 @@ UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
 
 
 int32_t AlphabeticIndex::getBucketRecordCount() const {
-    if (currentBucket_ != NULL) {
+    if (currentBucket_ != NULL && currentBucket_->records_ != NULL) {
         return currentBucket_->records_->size();
     } else {
         return 0;
@@ -1282,9 +1264,7 @@ AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
     if (U_FAILURE(status)) {
         return *this;
     }
-    buildIndex(status);
-    labelsIterIndex_ = -1;
-    currentBucket_ = NULL;
+    internalResetBucketIterator();
     return *this;
 }
 
@@ -1299,10 +1279,13 @@ UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
         status = U_INVALID_STATE_ERROR;
         return FALSE;
     }
-    if (indexBuildRequired_) {
+    if (buckets_ == NULL) {
         status = U_ENUM_OUT_OF_SYNC_ERROR;
         return FALSE;
     }
+    if (currentBucket_->records_ == NULL) {
+        return FALSE;
+    }
     ++itemsIterIndex_;
     if (itemsIterIndex_ >= currentBucket_->records_->size()) {
         itemsIterIndex_  = currentBucket_->records_->size();
@@ -1313,8 +1296,8 @@ UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
 
 
 const UnicodeString &AlphabeticIndex::getRecordName() const {
-    const UnicodeString *retStr = EMPTY_STRING;
-    if (currentBucket_ != NULL &&
+    const UnicodeString *retStr = &emptyString_;
+    if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
         itemsIterIndex_ >= 0 &&
         itemsIterIndex_ < currentBucket_->records_->size()) {
             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
@@ -1325,7 +1308,7 @@ const UnicodeString &AlphabeticIndex::getRecordName() const {
 
 const void *AlphabeticIndex::getRecordData() const {
     const void *retPtr = NULL;
-    if (currentBucket_ != NULL &&
+    if (currentBucket_ != NULL && currentBucket_->records_ != NULL &&
         itemsIterIndex_ >= 0 &&
         itemsIterIndex_ < currentBucket_->records_->size()) {
             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
@@ -1344,16 +1327,10 @@ AlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
 
 AlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
                                 const UnicodeString &lowerBoundary,
-                                UAlphabeticIndexLabelType type,
-                                UErrorCode &status):
-         label_(label), lowerBoundary_(lowerBoundary), labelType_(type), records_(NULL) {
-    if (U_FAILURE(status)) {
-        return;
-    }
-    records_ = new UVector(status);
-    if (records_ == NULL && U_SUCCESS(status)) {
-        status = U_MEMORY_ALLOCATION_ERROR;
-    }
+                                UAlphabeticIndexLabelType type)
+        : label_(label), lowerBoundary_(lowerBoundary), labelType_(type),
+          displayBucket_(NULL), displayIndex_(-1),
+          records_(NULL) {
 }
 
 
diff --git a/icu4c/source/i18n/ucln_in.h b/icu4c/source/i18n/ucln_in.h
index ee0846702e9..2e94a83f229 100644
--- a/icu4c/source/i18n/ucln_in.h
+++ b/icu4c/source/i18n/ucln_in.h
@@ -51,7 +51,6 @@ typedef enum ECleanupI18NType {
     UCLN_I18N_UCOL_RES,
     UCLN_I18N_UCOL_BLD,
     UCLN_I18N_CSDET,
-    UCLN_I18N_INDEX_CHARACTERS,
     UCLN_I18N_GENDERINFO,
     UCLN_I18N_CDFINFO,
     UCLN_I18N_REGION,
diff --git a/icu4c/source/i18n/unicode/alphaindex.h b/icu4c/source/i18n/unicode/alphaindex.h
index 232e103495d..0ce1b031fe0 100644
--- a/icu4c/source/i18n/unicode/alphaindex.h
+++ b/icu4c/source/i18n/unicode/alphaindex.h
@@ -71,19 +71,25 @@ U_NAMESPACE_BEGIN
 
 // Forward Declarations
 
+namespace {
+
+class BucketList;
+
+}  // namespace;
+
 class Collator;
 class RuleBasedCollator;
 class StringEnumeration;
 class UnicodeSet;
 class UVector;
 
-
-
 /**
- * class AlphabeticIndex supports the creation of a UI index appropriate for a given language, such as:
- *
+ * AlphabeticIndex supports the creation of a UI index appropriate for a given language.
+ * It can support either direct use, or use with a client that doesn't support localized collation.
+ * The following is an example of what an index might look like in a UI:
+ * 
  * <pre>
- *  <b>... A B C D E F G H I J K L M N O P Q R S T U V W X Y Z \\u00C6 \\u00D8 \\u00C5 ...</b>
+ *  <b>... A B C D E F G H I J K L M N O P Q R S T U V W X Y Z  ...</b>
  *
  *  <b>A</b>
  *     Addison
@@ -107,10 +113,14 @@ class UVector;
  * into an inflow bucket between the other two scripts.
  * <p>
  * The AlphabeticIndex class is not intended for public subclassing.
- * <p>
- * <i>Example</i>
- * <p>
- * The "show..." methods below are just to illustrate usage.
+ *
+ * <p><em>Note:</em> If you expect to have a lot of ASCII or Latin characters
+ * as well as characters from the user's language,
+ * then it is a good idea to call addLabels(Locale::getEnglish(), status).</p>
+ *
+ * <h2>Direct Use</h2>
+ * <p>The following shows an example of building an index directly.
+ *  The "show..." methods below are just to illustrate usage.
  *
  * <pre>
  * // Create a simple index.  "Item" is assumed to be an application
@@ -150,21 +160,138 @@ class UVector;
  * <b>... A-F G-N O-Z ...</b>
  * </pre>
  *
- * <p>
- * <b>Notes:</b>
+ * <h2>Client Support</h2>
+ * <p>Callers can also use the AlphabeticIndex::ImmutableIndex, or the AlphabeticIndex itself,
+ * to support sorting on a client that doesn't support AlphabeticIndex functionality.
+ *
+ * <p>The ImmutableIndex is both immutable and thread-safe.
+ * The corresponding AlphabeticIndex methods are not thread-safe because
+ * they "lazily" build the index buckets.
  * <ul>
- * <li>Additional collation parameters can be passed in as part of the locale name.
- *     For example, German plus numeric
- *     sorting would be "de@kn-true".
+ * <li>ImmutableIndex.getBucket(index) provides random access to all
+ *     buckets and their labels and label types.
+ * <li>The AlphabeticIndex bucket iterator or ImmutableIndex.getBucket(0..getBucketCount-1)
+ *     can be used to get a list of the labels,
+ *     such as "...", "A", "B",..., and send that list to the client.
+ * <li>When the client has a new name, it sends that name to the server.
+ * The server needs to call the following methods,
+ * and communicate the bucketIndex and collationKey back to the client.
+ *
+ * <pre>
+ * int32_t bucketIndex = index.getBucketIndex(name, status);
+ * const UnicodeString &label = immutableIndex.getBucket(bucketIndex)->getLabel();  // optional
+ * int32_t skLength = collator.getSortKey(name, sk, skCapacity);
+ * </pre>
+ *
+ * <li>The client would put the name (and associated information) into its bucket for bucketIndex. The sort key sk is a
+ * sequence of bytes that can be compared with a binary compare, and produce the right localized result.</li>
  * </ul>
  *
  * @stable ICU 4.8
  */
-
-
 class U_I18N_API AlphabeticIndex: public UObject {
+public:
+     /**
+      * An index "bucket" with a label string and type.
+      * It is referenced by getBucketIndex(),
+      * and returned by ImmutableIndex.getBucket().
+      *
+      * The Bucket class is not intended for public subclassing.
+      * @draft ICU 51
+      */
+     class U_I18N_API Bucket : public UObject {
+     public:
+        /**
+         * Destructor.
+         * @draft ICU 51
+         */
+        virtual ~Bucket();
 
-  public:
+        /**
+         * Returns the label string.
+         *
+         * @return the label string for the bucket
+         * @draft ICU 51
+         */
+        const UnicodeString &getLabel() const { return label_; }
+        /**
+         * Returns whether this bucket is a normal, underflow, overflow, or inflow bucket.
+         *
+         * @return the bucket label type
+         * @draft ICU 51
+         */
+        UAlphabeticIndexLabelType getLabelType() const { return labelType_; }
+
+     private:
+        friend class AlphabeticIndex;
+        friend class BucketList;
+
+        UnicodeString label_;
+        UnicodeString lowerBoundary_;
+        UAlphabeticIndexLabelType labelType_;
+        Bucket *displayBucket_;
+        int32_t displayIndex_;
+        UVector *records_;  // Records are owned by the inputList_ vector.
+
+        Bucket(const UnicodeString &label,   // Parameter strings are copied.
+               const UnicodeString &lowerBoundary,
+               UAlphabeticIndexLabelType type);
+     };
+
+    /**
+     * Immutable, thread-safe version of AlphabeticIndex.
+     * This class provides thread-safe methods for bucketing,
+     * and random access to buckets and their properties,
+     * but does not offer adding records to the index.
+     *
+     * The ImmutableIndex class is not intended for public subclassing.
+     *
+     * @draft ICU 51
+     */
+    class U_I18N_API ImmutableIndex : public UObject {
+    public:
+        /**
+         * Destructor.
+         * @draft ICU 51
+         */
+        virtual ~ImmutableIndex();
+
+        /**
+         * Returns the number of index buckets and labels, including underflow/inflow/overflow.
+         *
+         * @return the number of index buckets
+         * @draft ICU 51
+         */
+        int32_t getBucketCount() const;
+
+        /**
+         * Finds the index bucket for the given name and returns the number of that bucket.
+         * Use getBucket() to get the bucket's properties.
+         *
+         * @param name the string to be sorted into an index bucket
+         * @return the bucket number for the name
+         * @draft ICU 51
+         */
+        int32_t getBucketIndex(const UnicodeString &name, UErrorCode &errorCode) const;
+
+        /**
+         * Returns the index-th bucket. Returns NULL if the index is out of range.
+         *
+         * @param index bucket number
+         * @return the index-th bucket
+         * @draft ICU 51
+         */
+        const Bucket *getBucket(int32_t index) const;
+
+    private:
+        friend class AlphabeticIndex;
+
+        ImmutableIndex(BucketList *bucketList, Collator *collatorPrimaryOnly)
+                : buckets_(bucketList), collatorPrimaryOnly_(collatorPrimaryOnly) {}
+
+        BucketList *buckets_;
+        Collator *collatorPrimaryOnly_;
+    };
 
     /**
      * Construct an AlphabeticIndex object for the specified locale.  If the locale's
@@ -230,6 +357,14 @@ class U_I18N_API AlphabeticIndex: public UObject {
      virtual ~AlphabeticIndex();
 
 
+    /**
+     * Builds an immutable, thread-safe version of this instance, without data records.
+     *
+     * @return an immutable index instance
+     * @draft ICU 51
+     */
+    ImmutableIndex *buildImmutableIndex(UErrorCode &errorCode);
+
     /**
      * Get the Collator that establishes the ordering of the items in this index.
      * Ownership of the collator remains with the AlphabeticIndex instance.
@@ -269,7 +404,6 @@ class U_I18N_API AlphabeticIndex: public UObject {
     virtual AlphabeticIndex &setInflowLabel(const UnicodeString &inflowLabel, UErrorCode &status);
 
 
-
    /**
      * Get the special label used for items that sort after the last normal label,
      * and that would not otherwise have an appropriate label.
@@ -336,22 +470,6 @@ class U_I18N_API AlphabeticIndex: public UObject {
     virtual AlphabeticIndex &setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status);
 
 
-    /**
-     * Get the Unicode character (or tailored string) that defines an overflow bucket;
-     * that is anything greater than or equal to that string should go in that bucket,
-     * instead of with the last character. Normally that is the first character of the script
-     * after lowerLimit. Thus in X Y Z ... <i>Devanagari-ka</i>, the overflow character for Z
-     * would be the <i>Greek-alpha</i>.
-     *
-     * @param lowerLimit   The character below the overflow (or inflow) bucket
-     * @param status error code
-     * @return string that defines top of the overflow buck for lowerLimit, or an empty string if there is none
-     * @internal
-     */
-    virtual const UnicodeString &getOverflowComparisonString(const UnicodeString &lowerLimit,
-                                                             UErrorCode &status);
-
-
     /**
      * Add a record to the index.  Each record will be associated with an index Bucket
      *  based on the record's name.  The list of records for each bucket will be sorted
@@ -549,187 +667,90 @@ private:
      virtual UBool operator!=(const AlphabeticIndex& other) const;
 
      // Common initialization, for use from all constructors.
-     void init(UErrorCode &status);
+     void init(const Locale *locale, UErrorCode &status);
 
-     // Initialize & destruct static constants used by this class.
-     static void staticInit(UErrorCode &status);
+    /**
+     * This method is called to get the index exemplars. Normally these come from the locale directly,
+     * but if they aren't available, we have to synthesize them.
+     */
+    void addIndexExemplars(const Locale &locale, UErrorCode &status);
+    /**
+     * Add Chinese index characters from the tailoring.
+     */
+    UBool addChineseIndexCharacters(UErrorCode &errorCode);
 
-     // Pinyin stuff.  If the input name is Chinese, add the Pinyin prefix to the dest string.
-     void hackName(UnicodeString &dest, const UnicodeString &name, const Collator *coll);
-     void initPinyinBounds(const Collator *coll, UErrorCode &status);
+    UVector *firstStringsInScript(UErrorCode &status);
 
-   public:
-#ifndef U_HIDE_INTERNAL_API
-     /**
-      *   Delete all shared (static) data associated with an AlphabeticIndex.
-      *   Internal function, not intended for direct use.
-      *   @internal.
-      */
-     static void staticCleanup();
-#endif  /* U_HIDE_INTERNAL_API */
-   private:
+    static UnicodeString separated(const UnicodeString &item);
 
-     // Add index characters from the specified locale to the dest set.
-     // Does not remove any previous contents from dest.
-     static void getIndexExemplars(UnicodeSet &dest, const Locale &locale, UErrorCode &status);
+    /**
+     * Determine the best labels to use.
+     * This is based on the exemplars, but we also process to make sure that they are unique,
+     * and sort differently, and that the overall list is small enough.
+     */
+    void initLabels(UVector &indexCharacters, UErrorCode &errorCode) const;
+    BucketList *createBucketList(UErrorCode &errorCode) const;
+    void initBuckets(UErrorCode &errorCode);
+    void clearBuckets();
+    void internalResetBucketIterator();
 
-     UVector *firstStringsInScript(UErrorCode &status);
+public:
 
-     static UnicodeString separated(const UnicodeString &item);
-
-     static UnicodeSet *getScriptSet(UnicodeSet &dest, const UnicodeString &codePoint, UErrorCode &status);
-
-     void buildIndex(UErrorCode &status);
-     void buildBucketList(UErrorCode &status);
-     void bucketRecords(UErrorCode &status);
-
-
-  public:
-
-    //  The following internal items are declared public only to allow access from
-    //  implementation code written in plain C.  They are not intended for
-    //  public use.
+    //  The Record is declared public only to allow access from
+    //  implementation code written in plain C.
+    //  It is not intended for public use.
 
 #ifndef U_HIDE_INTERNAL_API
     /**
-     * A record, or item, in the index.
+     * A (name, data) pair, to be sorted by name into one of the index buckets.
+     * The user data is not used by the index implementation.
      * @internal
      */
-     struct Record: public UMemory {
-         AlphabeticIndex     *alphaIndex_;
-         const UnicodeString  name_;
-         UnicodeString        sortingName_;  // Usually the same as name_; different for Pinyin.
-         const void           *data_;
-         int32_t              serialNumber_;  // Defines sorting order for names that compare equal.
-         Record(AlphabeticIndex *alphaIndex, const UnicodeString &name, const void *data);
-         ~Record();
-     };
+    struct Record: public UMemory {
+        const UnicodeString  name_;
+        const void           *data_;
+        Record(const UnicodeString &name, const void *data);
+        ~Record();
+    };
 #endif  /* U_HIDE_INTERNAL_API */
 
-     /**
-       * Holds all user records before they are distributed into buckets.
-       * Type of contents is (Record *)
-       * @internal
-       */
-     UVector  *inputRecords_;
-
-     /**
-      * A Bucket holds an index label and references to everything belonging to that label.
-      * For implementation use only.  Declared public because pure C implementation code needs access.
-      * @internal
-      */
-     struct Bucket: public UMemory {
-         UnicodeString     label_;
-         UnicodeString     lowerBoundary_;
-         UAlphabeticIndexLabelType labelType_;
-         UVector           *records_; // Records are owned by inputRecords_ vector.
-
-         Bucket(const UnicodeString &label,   // Parameter strings are copied.
-                const UnicodeString &lowerBoundary,
-                UAlphabeticIndexLabelType type, UErrorCode &status);
-         ~Bucket();
-     };
-
-  public:
-
-    /** 
-      * Language Types.  For internal ICU use only.
-      * @internal (but not hidden with U_HIDE_INTERNAL_API because it is used in public API)
-      */
-    enum ELangType {
-        /** @internal */
-        kNormal,
-        /** @internal */
-        kSimplified,
-        /** @internal */
-        kTraditional
-    };
+private:
 
     /**
-      * Get the Language Type for this Index.  Based on the locale.
-      * @internal
-      */
-    static ELangType  langTypeFromLocale(const Locale &loc);
+     * Holds all user records before they are distributed into buckets.
+     * Type of contents is (Record *)
+     * @internal
+     */
+    UVector  *inputList_;
 
+    int32_t  labelsIterIndex_;        // Index of next item to return.
+    int32_t  itemsIterIndex_;
+    Bucket   *currentBucket_;         // While an iteration of the index in underway,
+                                      //   point to the bucket for the current label.
+                                      // NULL when no iteration underway.
 
-   private:
+    int32_t    maxLabelCount_;        // Limit on # of labels permitted in the index.
 
-     // Holds the contents of this index, buckets of user items.
-     // UVector elements are of type (Bucket *)
-     UVector *bucketList_;
+    UnicodeSet *initialLabels_;       // Initial (unprocessed) set of Labels.  Union
+                                      //   of those explicitly set by the user plus
+                                      //   those from locales.  Raw values, before
+                                      //   crunching into bucket labels.
 
-     int32_t  labelsIterIndex_;      // Index of next item to return.
-     int32_t  itemsIterIndex_;
-     Bucket   *currentBucket_;       // While an iteration of the index in underway,
-                                     //   point to the bucket for the current label.
-                                     // NULL when no iteration underway.
+    UVector *firstCharsInScripts_;    // The first character from each script,
+                                      //   in collation order.
 
-     UBool    indexBuildRequired_;   //  Caller has made changes to the index that
-                                     //  require rebuilding & bucketing before the
-                                     //  contents can be iterated.
+    RuleBasedCollator *collator_;
+    RuleBasedCollator *collatorPrimaryOnly_;
 
-     int32_t    maxLabelCount_;      // Limit on # of labels permitted in the index.
+    // Lazy evaluated: null means that we have not built yet.
+    BucketList *buckets_;
 
-     UHashtable *alreadyIn_;         // Key=UnicodeString, value=UnicodeSet
-
-     UnicodeSet *initialLabels_;     // Initial (unprocessed) set of Labels.  Union
-                                     //   of those explicitly set by the user plus
-                                     //   those from locales.  Raw values, before
-                                     //   crunching into bucket labels.
-
-     UVector    *labels_;            // List of Labels, after processing, sorting.
-                                     //   Contents are (UnicodeString *)
-
-     UnicodeSet *noDistinctSorting_; // As the set of labels is built, strings may 
-                                     // be discarded from the exemplars. This contains 
-                                     // some of the discards, and is
-                                     // intended for debugging.
-
-     UnicodeSet *notAlphabetic_;     // As the set of labels is built, strings may 
-                                     // be discarded from the exemplars. This contains 
-                                     // some of the discards, and is
-                                     // intended for debugging.
-
-
-     UVector    *firstScriptCharacters_;  // The first character from each script,
-                                          //   in collation order.
-
-     Locale    locale_;
-     Collator  *collator_;
-     Collator  *collatorPrimaryOnly_;
-
-     UnicodeString  inflowLabel_;
-     UnicodeString  overflowLabel_;
-     UnicodeString  underflowLabel_;
-     UnicodeString  overflowComparisonString_;
-
-     ELangType      langType_;        // The language type, simplified Chinese, Traditional Chinese,
-                                      //  or not Chinese (Normal).  Part of the Pinyin support
-
-     typedef const UChar PinyinLookup[24][3];
-     static PinyinLookup   HACK_PINYIN_LOOKUP_SHORT;
-     static PinyinLookup   HACK_PINYIN_LOOKUP_LONG;
-     
-     // These will be lazily set to the short or long tables based on which
-     //   Chinese collation has been configured into the ICU library.
-     static PinyinLookup   *HACK_PINYIN_LOOKUP;
-     static const UChar    *PINYIN_LOWER_BOUNDS;
-
-
-
-     int32_t    recordCounter_;         // Counts Records created.  For minting record serial numbers.
-
-// Constants.  Lazily initialized the first time an AlphabeticIndex object is created.
-
-     static UnicodeSet *ALPHABETIC;
-     static UnicodeSet *CORE_LATIN;
-     static UnicodeSet *ETHIOPIC;
-     static UnicodeSet *HANGUL;
-     static UnicodeSet *IGNORE_SCRIPTS;
-     static UnicodeSet *TO_TRY;
-     static UnicodeSet *UNIHAN;
-     static const UnicodeString *EMPTY_STRING;
+    UnicodeString  inflowLabel_;
+    UnicodeString  overflowLabel_;
+    UnicodeString  underflowLabel_;
+    UnicodeString  overflowComparisonString_;
 
+    UnicodeString emptyString_;
 };
 
 U_NAMESPACE_END
diff --git a/icu4c/source/test/intltest/alphaindextst.cpp b/icu4c/source/test/intltest/alphaindextst.cpp
index adb9b0fe957..8e70dcfa18c 100644
--- a/icu4c/source/test/intltest/alphaindextst.cpp
+++ b/icu4c/source/test/intltest/alphaindextst.cpp
@@ -12,6 +12,7 @@
 
 #include "unicode/alphaindex.h"
 #include "unicode/coll.h"
+#include "unicode/localpointer.h"
 #include "unicode/tblcoll.h"
 #include "unicode/uniset.h"
 
@@ -20,6 +21,24 @@
 // #include <string>
 // #include <iostream>
 
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+
+namespace {
+
+UnicodeString joinLabelsAndAppend(AlphabeticIndex::ImmutableIndex &index, UnicodeString &dest) {
+    int32_t oldLength = dest.length();
+    const AlphabeticIndex::Bucket *bucket;
+    for (int32_t i = 0; (bucket = index.getBucket(i)) != NULL; ++i) {
+        if (dest.length() > oldLength) {
+            dest.append((UChar)0x3A);  // ':'
+        }
+        dest.append(bucket->getLabel());
+    }
+    return dest;
+}
+
+}  // namespace
+
 AlphabeticIndexTest::AlphabeticIndexTest() {
 }
 
@@ -29,27 +48,18 @@ AlphabeticIndexTest::~AlphabeticIndexTest() {
 void AlphabeticIndexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
 {
     if (exec) logln("TestSuite AlphabeticIndex: ");
-    switch (index) {
-
-        case 0: name = "APITest";
-            if (exec) APITest();
-            break;
-
-        case 1: name = "ManyLocales";
-            if (exec) ManyLocalesTest();
-            break;
-
-        case 2: name = "HackPinyinTest";
-            if (exec) HackPinyinTest();
-            break;
-
-        case 3: name = "TestBug9009";
-            if (exec) TestBug9009();
-            break;
-
-        default: name = "";
-            break; //needed to end loop
-    }
+    TESTCASE_AUTO_BEGIN;
+    TESTCASE_AUTO(APITest);
+    TESTCASE_AUTO(ManyLocalesTest);
+    TESTCASE_AUTO(HackPinyinTest);
+    TESTCASE_AUTO(TestBug9009);
+    TESTCASE_AUTO(TestIndexCharactersList);
+    TESTCASE_AUTO(TestHaniFirst);
+    TESTCASE_AUTO(TestPinyinFirst);
+    TESTCASE_AUTO(TestSchSt);
+    TESTCASE_AUTO(TestNoLabels);
+    TESTCASE_AUTO(TestChineseZhuyin);
+    TESTCASE_AUTO_END;
 }
 
 #define TEST_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: Test failure.  status=%s", \
@@ -85,10 +95,8 @@ void AlphabeticIndexTest::APITest() {
     index = new AlphabeticIndex(coll, status);
     TEST_CHECK_STATUS;
     TEST_ASSERT(coll == &index->getCollator());
-    // TODO: The bucket count for an index built from a collator should be one, the underflow label.
-    //       The current implementation adds A-Z if the index is otherwise empty.
-    // TEST_ASSERT(1 == index->getBucketCount(status));
-    TEST_ASSERT(28 == index->getBucketCount(status));
+    assertEquals("only the underflow label in an index built from a collator",
+                 1, index->getBucketCount(status));
     TEST_CHECK_STATUS;
     delete index;
     
@@ -104,12 +112,8 @@ void AlphabeticIndexTest::APITest() {
     TEST_CHECK_STATUS;
     lc = index->getBucketCount(status);
     TEST_CHECK_STATUS;
-    // TODO:  should get 31.  Java also gives 30.  Needs fixing
-    TEST_ASSERT(30 == lc);    // 26 Latin letters plus
-    // TEST_ASSERT(31 == lc);    // 26 Latin letters plus
-                              //  2 Cyrillic letters plus
-                              //  1 inflow label plus
-                              //  two under/overflow labels.
+    assertEquals("underflow, A-Z, inflow, 2 Cyrillic, overflow",
+                 31, index->getBucketCount(status));
     // std::cout << lc << std::endl;
     delete index;
 
@@ -298,17 +302,26 @@ void AlphabeticIndexTest::APITest() {
 
     delete index;
     index = new AlphabeticIndex(Locale::createFromName("ru"), status);
-        //Locale loc = Locale::createFromName(localeName);
     TEST_CHECK_STATUS;
+    assertEquals("Russian index.getBucketCount()", 32, index->getBucketCount(status));
+    // Latin-script names should go into the underflow label (0)
+    // if the Russian collation does not use script reordering,
+    // but into the overflow label (getBucketCount()-1)
+    // if Russian sorts Cyrillic first.
+    int32_t reorderCodes[20];
+    int32_t expectedLatinIndex = 0;
+    if (index->getCollator().getReorderCodes(reorderCodes, LENGTHOF(reorderCodes), status) > 0) {
+        expectedLatinIndex = index->getBucketCount(status) - 1;
+    }
     n = index->getBucketIndex(adam, status);
     TEST_CHECK_STATUS;
-    TEST_ASSERT(n == 32);   // Now Latin is in overflow label for Russian collation
+    assertEquals("Russian index.getBucketIndex(adam)", expectedLatinIndex, n);
     n = index->getBucketIndex(baker, status);
-    TEST_ASSERT(n == 32);
+    assertEquals("Russian index.getBucketIndex(baker)", expectedLatinIndex, n);
     n = index->getBucketIndex(Cyrillic, status);
-    TEST_ASSERT(n == 1);    // First label
+    assertEquals("Russian index.getBucketIndex(Cyrillic)", 1, n);
     n = index->getBucketIndex(zed, status);
-    TEST_ASSERT(n == 32);
+    assertEquals("Russian index.getBucketIndex(zed)", expectedLatinIndex, n);
 
     delete index;
 
@@ -327,7 +340,6 @@ static const char * KEY_LOCALES[] = {
 void AlphabeticIndexTest::ManyLocalesTest() {
     UErrorCode status = U_ZERO_ERROR;
     int32_t  lc = 0;
-    AlphabeticIndex *index = NULL;
 
     for (int i=0; ; ++i) {
         status = U_ZERO_ERROR;
@@ -337,23 +349,40 @@ void AlphabeticIndexTest::ManyLocalesTest() {
         }
         // std::cout <<  localeName << "  ";
         Locale loc = Locale::createFromName(localeName);
-        index = new AlphabeticIndex(loc, status);
+        AlphabeticIndex index(loc, status);
         TEST_CHECK_STATUS;
-        lc = index->getBucketCount(status);
+        lc = index.getBucketCount(status);
         TEST_CHECK_STATUS;
         // std::cout << "getBucketCount() == " << lc << std::endl;
 
-        while (index->nextBucket(status)) {
+        LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status));
+        TEST_CHECK_STATUS;
+        TEST_ASSERT(lc == immIndex->getBucketCount());
+
+        assertEquals("initial bucket index", -1, index.getBucketIndex());
+        int32_t bucketIndex = 0;
+        while (index.nextBucket(status)) {
             TEST_CHECK_STATUS;
-            const UnicodeString &label = index->getBucketLabel();
+            assertEquals("bucket index", bucketIndex, index.getBucketIndex());
+            const UnicodeString &label = index.getBucketLabel();
             TEST_ASSERT(label.length()>0);
             // std::string ss;
             // std::cout << ":" << label.toUTF8String(ss);
+            const AlphabeticIndex::Bucket *bucket = immIndex->getBucket(bucketIndex);
+            TEST_ASSERT(bucket != NULL);
+            assertEquals("bucket label vs. immutable: locale=" + UnicodeString(localeName) +
+                         " index=" + bucketIndex,
+                         label, bucket->getLabel());
+            TEST_ASSERT(&label != &bucket->getLabel());  // not the same pointers
+            UAlphabeticIndexLabelType labelType = index.getBucketLabelType();
+            TEST_ASSERT(labelType == bucket->getLabelType());
+            ++bucketIndex;
         }
         // std::cout << ":" << std::endl;
 
-
-        delete index;
+        TEST_ASSERT(immIndex->getBucketCount() == bucketIndex);
+        TEST_ASSERT(immIndex->getBucket(-1) == NULL);
+        TEST_ASSERT(immIndex->getBucket(bucketIndex) == NULL);
     }
 }
 
@@ -447,6 +476,191 @@ void AlphabeticIndexTest::TestBug9009() {
     aindex.nextBucket(status);  // Crash here before bug was fixed.
     TEST_CHECK_STATUS; 
 }
-    
+
+static const char *localeAndIndexCharactersLists[][2] = {
+    /* Arabic*/ {"ar", "\\u0627:\\u0628:\\u062A:\\u062B:\\u062C:\\u062D:\\u062E:\\u062F:\\u0630:\\u0631:\\u0632:\\u0633:\\u0634:\\u0635:\\u0636:\\u0637:\\u0638:\\u0639:\\u063A:\\u0641:\\u0642:\\u0643:\\u0644:\\u0645:\\u0646:\\u0647:\\u0648:\\u064A"},
+    /* Bulgarian*/  {"bg", "\\u0410:\\u0411:\\u0412:\\u0413:\\u0414:\\u0415:\\u0416:\\u0417:\\u0418:\\u0419:\\u041A:\\u041B:\\u041C:\\u041D:\\u041E:\\u041F:\\u0420:\\u0421:\\u0422:\\u0423:\\u0424:\\u0425:\\u0426:\\u0427:\\u0428:\\u0429:\\u042E:\\u042F"},
+    /* Catalan*/    {"ca", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
+    /* Czech*/  {"cs", "A:B:C:\\u010C:D:E:F:G:H:CH:I:J:K:L:M:N:O:P:Q:R:\\u0158:S:\\u0160:T:U:V:W:X:Y:Z:\\u017D"},
+    /* Danish*/ {"da", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\\u00C6:\\u00D8:\\u00C5"},
+    /* German*/ {"de", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:Sch:St:T:U:V:W:X:Y:Z"},
+    /* Greek*/  {"el", "\\u0391:\\u0392:\\u0393:\\u0394:\\u0395:\\u0396:\\u0397:\\u0398:\\u0399:\\u039A:\\u039B:\\u039C:\\u039D:\\u039E:\\u039F:\\u03A0:\\u03A1:\\u03A3:\\u03A4:\\u03A5:\\u03A6:\\u03A7:\\u03A8:\\u03A9"},
+    /* English*/    {"en", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
+    /* Spanish*/    {"es", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\\u00D1:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
+    /* Estonian*/   {"et", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:\\u0160:Z:\\u017D:T:U:V:\\u00D5:\\u00C4:\\u00D6:\\u00DC:X:Y"},
+    /* Basque*/ {"eu", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
+    /* Finnish*/    {"fi", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\\u00C5:\\u00C4:\\u00D6"},
+    /* Filipino*/   {"fil", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
+    /* French*/ {"fr", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
+    /* Hebrew*/ {"he", "\\u05D0:\\u05D1:\\u05D2:\\u05D3:\\u05D4:\\u05D5:\\u05D6:\\u05D7:\\u05D8:\\u05D9:\\u05DB:\\u05DC:\\u05DE:\\u05E0:\\u05E1:\\u05E2:\\u05E4:\\u05E6:\\u05E7:\\u05E8:\\u05E9:\\u05EA"},
+    /* Icelandic*/  {"is", "A:\\u00C1:B:C:D:\\u00D0:E:\\u00C9:F:G:H:I:\\u00CD:J:K:L:M:N:O:\\u00D3:P:Q:R:S:T:U:\\u00DA:V:W:X:Y:\\u00DD:Z:\\u00DE:\\u00C6:\\u00D6"},
+    /* Italian*/    {"it", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
+    /* Japanese*/   {"ja", "\\u3042:\\u304B:\\u3055:\\u305F:\\u306A:\\u306F:\\u307E:\\u3084:\\u3089:\\u308F"},
+    /* Korean*/ {"ko", "\\u3131:\\u3134:\\u3137:\\u3139:\\u3141:\\u3142:\\u3145:\\u3147:\\u3148:\\u314A:\\u314B:\\u314C:\\u314D:\\u314E"},
+    /* Lithuanian*/ {"lt", "A:B:C:\\u010C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:\\u0160:T:U:V:Z:\\u017D"},
+    // This should be the correct data.  Commented till it is fixed in CLDR collation data.
+    // {"lv", "A:B:C:\\u010C:D:E:F:G:\\u0122:H:I:Y:J:K:\\u0136:L:\\u013B:M:N:\\u0145:O:P:Q:R:S:\\u0160:T:U:V:W:X:Z:\\u017D"},
+    /* Latvian*/    {"lv", "A:B:C:\\u010C:D:E:F:G:\\u0122:H:I:J:K:\\u0136:L:\\u013B:M:N:\\u0145:O:P:Q:R:S:\\u0160:T:U:V:W:X:Y:Z:\\u017D"},
+    /* Norwegian Bokm\\u00E5l*/  {"nb", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\\u00C6:\\u00D8:\\u00C5"},
+    /* Dutch*/  {"nl", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
+    /* Polish*/ {"pl", "A:\\u0104:B:C:\\u0106:D:E:\\u0118:F:G:H:I:J:K:L:\\u0141:M:N:\\u0143:O:\\u00D3:P:Q:R:S:\\u015A:T:U:V:W:X:Y:Z:\\u0179:\\u017B"},
+    /* Portuguese*/ {"pt", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
+    /* Romanian*/   {"ro", "A:\\u0102:\\u00C2:B:C:D:E:F:G:H:I:\\u00CE:J:K:L:M:N:O:P:Q:R:S:\\u0218:T:\\u021A:U:V:W:X:Y:Z"},
+    /* Russian*/    {"ru", "\\u0410:\\u0411:\\u0412:\\u0413:\\u0414:\\u0415:\\u0416:\\u0417:\\u0418:\\u0419:\\u041A:\\u041B:\\u041C:\\u041D:\\u041E:\\u041F:\\u0420:\\u0421:\\u0422:\\u0423:\\u0424:\\u0425:\\u0426:\\u0427:\\u0428:\\u0429:\\u042B:\\u042D:\\u042E:\\u042F"},
+    /* Slovak*/ {"sk", "A:\\u00C4:B:C:\\u010C:D:E:F:G:H:CH:I:J:K:L:M:N:O:\\u00D4:P:Q:R:S:\\u0160:T:U:V:W:X:Y:Z:\\u017D"},
+    /* Slovenian*/  {"sl", "A:B:C:\\u010C:\\u0106:D:\\u0110:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:\\u0160:T:U:V:W:X:Y:Z:\\u017D"},
+    /* Serbian*/    {"sr", "\\u0410:\\u0411:\\u0412:\\u0413:\\u0414:\\u0402:\\u0415:\\u0416:\\u0417:\\u0418:\\u0408:\\u041A:\\u041B:\\u0409:\\u041C:\\u041D:\\u040A:\\u041E:\\u041F:\\u0420:\\u0421:\\u0422:\\u040B:\\u0423:\\u0424:\\u0425:\\u0426:\\u0427:\\u040F:\\u0428"},
+    /* Swedish*/    {"sv", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\\u00C5:\\u00C4:\\u00D6"},
+    /* Turkish*/    {"tr", "A:B:C:\\u00C7:D:E:F:G:H:I:\\u0130:J:K:L:M:N:O:\\u00D6:P:Q:R:S:\\u015E:T:U:\\u00DC:V:W:X:Y:Z"},
+    /* Ukrainian*/  {"uk", "\\u0410:\\u0411:\\u0412:\\u0413:\\u0490:\\u0414:\\u0415:\\u0404:\\u0416:\\u0417:\\u0418:\\u0406:\\u0407:\\u0419:\\u041A:\\u041B:\\u041C:\\u041D:\\u041E:\\u041F:\\u0420:\\u0421:\\u0422:\\u0423:\\u0424:\\u0425:\\u0426:\\u0427:\\u0428:\\u0429:\\u042E:\\u042F"},
+    /* Vietnamese*/ {"vi", "A:\\u0102:\\u00C2:B:C:D:\\u0110:E:\\u00CA:F:G:H:I:J:K:L:M:N:O:\\u00D4:\\u01A0:P:Q:R:S:T:U:\\u01AF:V:W:X:Y:Z"},
+    /* Chinese*/    {"zh", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
+    /* Chinese (Traditional Han)*/  {"zh_Hant", "1\\u5283:2\\u5283:3\\u5283:4\\u5283:5\\u5283:6\\u5283:7\\u5283:8\\u5283:9\\u5283:10\\u5283:11\\u5283:12\\u5283:13\\u5283:14\\u5283:15\\u5283:16\\u5283:17\\u5283:18\\u5283:19\\u5283:20\\u5283:21\\u5283:22\\u5283:23\\u5283:24\\u5283:25\\u5283:26\\u5283:27\\u5283:28\\u5283:29\\u5283:30\\u5283:31\\u5283:32\\u5283:33\\u5283:35\\u5283:36\\u5283:39\\u5283:48\\u5283"},
+};
+
+void AlphabeticIndexTest::TestIndexCharactersList() {
+    UErrorCode status = U_ZERO_ERROR;
+    for (int32_t i = 0; i < LENGTHOF(localeAndIndexCharactersLists); ++i) {
+        const char *(&localeAndIndexCharacters)[2] = localeAndIndexCharactersLists[i];
+        const char *locale = localeAndIndexCharacters[0];
+        UnicodeString expectedIndexCharacters
+            = (UnicodeString("\\u2026:") + localeAndIndexCharacters[1] + ":\\u2026").unescape();
+        AlphabeticIndex index(locale, status);
+        TEST_CHECK_STATUS;
+        LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status));
+        TEST_CHECK_STATUS;
+
+        // Join the elements of the list to a string with delimiter ":"
+        UnicodeString actualIndexCharacters;
+        assertEquals(locale,
+                     expectedIndexCharacters,
+                     joinLabelsAndAppend(*immIndex, actualIndexCharacters));
+        logln(locale + UnicodeString(": ") + actualIndexCharacters);
+    }
+}
+
+void AlphabeticIndexTest::TestHaniFirst() {
+    UErrorCode status = U_ZERO_ERROR;
+    LocalPointer<RuleBasedCollator> coll(
+        static_cast<RuleBasedCollator *>(Collator::createInstance(Locale::getRoot(), status)));
+    int32_t reorderCodes[] = { USCRIPT_HAN };
+    coll->setReorderCodes(reorderCodes, LENGTHOF(reorderCodes), status);
+    TEST_CHECK_STATUS; 
+    AlphabeticIndex index(coll.orphan(), status);
+    TEST_CHECK_STATUS; 
+    assertEquals("getBucketCount()", 1, index.getBucketCount(status));   // ... (underflow only)
+    index.addLabels(Locale::getEnglish(), status);
+    assertEquals("getBucketCount()", 28, index.getBucketCount(status));  // ... A-Z ...
+    int32_t bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x897f), status);
+    assertEquals("getBucketIndex(U+897F)", 0, bucketIndex);  // underflow bucket
+    bucketIndex = index.getBucketIndex("i", status);
+    assertEquals("getBucketIndex(i)", 9, bucketIndex);
+    bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x03B1), status);
+    assertEquals("getBucketIndex(Greek alpha)", 27, bucketIndex);
+    // TODO: Test with an unassigned code point (not just U+FFFF)
+    // when unassigned code points are not in the Hani reordering group any more.
+    // String unassigned = UTF16.valueOf(0x50005);
+    bucketIndex = index.getBucketIndex(UnicodeString((UChar)0xFFFF), status);
+    assertEquals("getBucketIndex(U+FFFF)", 27, bucketIndex);
+}
+
+void AlphabeticIndexTest::TestPinyinFirst() {
+    UErrorCode status = U_ZERO_ERROR;
+    LocalPointer<RuleBasedCollator> coll(
+        static_cast<RuleBasedCollator *>(Collator::createInstance(Locale::getChinese(), status)));
+    int32_t reorderCodes[] = { USCRIPT_HAN };
+    coll->setReorderCodes(reorderCodes, LENGTHOF(reorderCodes), status);
+    TEST_CHECK_STATUS; 
+    AlphabeticIndex index(coll.orphan(), status);
+    TEST_CHECK_STATUS; 
+    assertEquals("getBucketCount()", 1, index.getBucketCount(status));   // ... (underflow only)
+    index.addLabels(Locale::getChinese(), status);
+    assertEquals("getBucketCount()", 28, index.getBucketCount(status));  // ... A-Z ...
+    int bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x897f), status);
+    assertEquals("getBucketIndex(U+897F)", 'X' - 'A' + 1, bucketIndex);
+    bucketIndex = index.getBucketIndex("i", status);
+    assertEquals("getBucketIndex(i)", 9, bucketIndex);
+    bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x03B1), status);
+    assertEquals("getBucketIndex(Greek alpha)", 27, bucketIndex);
+    // TODO: Test with an unassigned code point (not just U+FFFF)
+    // when unassigned code points are not in the Hani reordering group any more.
+    // String unassigned = UTF16.valueOf(0x50005);
+    bucketIndex = index.getBucketIndex(UnicodeString((UChar)0xFFFF), status);
+    assertEquals("getBucketIndex(U+FFFF)", 27, bucketIndex);
+}
+
+void AlphabeticIndexTest::TestSchSt() {
+    UErrorCode status = U_ZERO_ERROR;
+    AlphabeticIndex index(Locale::getGerman(), status);
+    index.addLabels(UnicodeSet("[\\u00C6{Sch*}{St*}]", status), status);
+    TEST_CHECK_STATUS; 
+    // ... A AE-ligature B-R S Sch St T-Z ...
+    LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status));
+    TEST_CHECK_STATUS; 
+    assertEquals("getBucketCount()", 31, index.getBucketCount(status));
+    assertEquals("immutable getBucketCount()", 31, immIndex->getBucketCount());
+    static const struct TestCase {
+        const char *name;
+        int32_t bucketIndex;
+        const char *bucketLabel;
+    } testCases[] = {
+        // name, bucket index, bucket label
+        { "Adelbert", 1, "A" },
+        { "Afrika", 1, "A" },
+        { "\\u00C6sculap", 2, "\\u00C6" },
+        { "Aesthet", 2, "\\u00C6" },
+        { "Berlin", 3, "B" },
+        { "Rilke", 19, "R" },
+        { "Sacher", 20, "S" },
+        { "Seiler", 20, "S" },
+        { "Sultan", 20, "S" },
+        { "Schiller", 21, "Sch" },
+        { "Steiff", 22, "St" },
+        { "Thomas", 23, "T" }
+    };
+    for (int32_t i = 0; i < LENGTHOF(testCases); ++i) {
+        const TestCase &testCase = testCases[i];
+        UnicodeString name = UnicodeString(testCase.name).unescape();
+        UnicodeString label = UnicodeString(testCase.bucketLabel).unescape();
+        char msg[100];
+        sprintf(msg, "getBucketIndex(%s)", testCase.name);
+        assertEquals(msg, testCase.bucketIndex, index.getBucketIndex(name, status));
+        sprintf(msg, "immutable getBucketIndex(%s)", testCase.name);
+        assertEquals(msg, testCase.bucketIndex, immIndex->getBucketIndex(name, status));
+        sprintf(msg, "immutable bucket label (%s)", testCase.name);
+        assertEquals(msg, label, immIndex->getBucket(testCase.bucketIndex)->getLabel());
+    }
+}
+
+void AlphabeticIndexTest::TestNoLabels() {
+    UErrorCode status = U_ZERO_ERROR;
+    LocalPointer<RuleBasedCollator> coll(
+        static_cast<RuleBasedCollator *>(Collator::createInstance(Locale::getRoot(), status)));
+    TEST_CHECK_STATUS; 
+    AlphabeticIndex index(coll.orphan(), status);
+    TEST_CHECK_STATUS; 
+    index.addRecord(UnicodeString((UChar)0x897f), NULL, status);
+    index.addRecord("i", NULL, status);
+    index.addRecord(UnicodeString((UChar)0x03B1), NULL, status);
+    assertEquals("getBucketCount()", 1, index.getBucketCount(status));  // ...
+    TEST_ASSERT(index.nextBucket(status));
+    assertEquals("underflow label type", U_ALPHAINDEX_UNDERFLOW, index.getBucketLabelType());
+    assertEquals("all records in the underflow bucket", 3, index.getBucketRecordCount());
+}
+
+void AlphabeticIndexTest::TestChineseZhuyin() {
+    UErrorCode status = U_ZERO_ERROR;
+    char loc[100];
+    uloc_forLanguageTag("zh-u-co-zhuyin", loc, LENGTHOF(loc), NULL, &status);
+    AlphabeticIndex index(loc, status);
+    LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status));
+    TEST_CHECK_STATUS; 
+    assertEquals("getBucketCount()", 38, immIndex->getBucketCount());
+    assertEquals("label 1", UnicodeString((UChar)0x3105), immIndex->getBucket(1)->getLabel());
+    assertEquals("label 2", UnicodeString((UChar)0x3106), immIndex->getBucket(2)->getLabel());
+    assertEquals("label 3", UnicodeString((UChar)0x3107), immIndex->getBucket(3)->getLabel());
+    assertEquals("label 4", UnicodeString((UChar)0x3108), immIndex->getBucket(4)->getLabel());
+    assertEquals("label 5", UnicodeString((UChar)0x3109), immIndex->getBucket(5)->getLabel());
+}
 
 #endif
diff --git a/icu4c/source/test/intltest/alphaindextst.h b/icu4c/source/test/intltest/alphaindextst.h
index 8c8392dd287..2f864712d05 100644
--- a/icu4c/source/test/intltest/alphaindextst.h
+++ b/icu4c/source/test/intltest/alphaindextst.h
@@ -1,6 +1,6 @@
 /********************************************************************
  * COPYRIGHT:
- * Copyright (c) 2012, International Business Machines Corporation and
+ * Copyright (c) 2012-2013, International Business Machines Corporation and
  * others. All Rights Reserved.
  ********************************************************************/
 //
@@ -24,6 +24,27 @@ public:
     virtual void ManyLocalesTest();
     virtual void HackPinyinTest();
     virtual void TestBug9009();
+    void TestIndexCharactersList();
+    /**
+     * Test AlphabeticIndex vs. root with script reordering.
+     */
+    void TestHaniFirst();
+    /**
+     * Test AlphabeticIndex vs. Pinyin with script reordering.
+     */
+    void TestPinyinFirst();
+    /**
+     * Test labels with multiple primary weights.
+     */
+    void TestSchSt();
+    /**
+     * With no real labels, there should be only the underflow label.
+     */
+    void TestNoLabels();
+    /**
+     * Test with the Bopomofo-phonetic tailoring.
+     */
+    void TestChineseZhuyin();
 };
 
 #endif
diff --git a/icu4c/source/test/intltest/intltest.cpp b/icu4c/source/test/intltest/intltest.cpp
index 92d339f9866..6d08af153d8 100644
--- a/icu4c/source/test/intltest/intltest.cpp
+++ b/icu4c/source/test/intltest/intltest.cpp
@@ -233,6 +233,14 @@ IntlTest::appendHex(uint32_t number,
         0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0
     }; /* "0123456789ABCDEF" */
 
+    if (digits < 0) {  // auto-digits
+        digits = 2;
+        uint32_t max = 0xff;
+        while (number > max) {
+            digits += 2;
+            max = (max << 8) | 0xff;
+        }
+    }
     switch (digits)
     {
     case 8:
@@ -258,6 +266,13 @@ IntlTest::appendHex(uint32_t number,
     return target;
 }
 
+UnicodeString
+IntlTest::toHex(uint32_t number, int32_t digits) {
+    UnicodeString result;
+    appendHex(number, digits, result);
+    return result;
+}
+
 static inline UBool isPrintable(UChar32 c) {
     return c <= 0x7E && (c >= 0x20 || c == 9 || c == 0xA || c == 0xD);
 }
@@ -1728,6 +1743,23 @@ UBool IntlTest::assertEquals(const char* message,
     return TRUE;
 }
 
+UBool IntlTest::assertEquals(const char* message,
+                             int32_t expected,
+                             int32_t actual) {
+    if (expected != actual) {
+        errln((UnicodeString)"FAIL: " + message + "; got " +
+              actual + "=0x" + toHex(actual) +
+              "; expected " + expected + "=0x" + toHex(expected));
+        return FALSE;
+    }
+#ifdef VERBOSE_ASSERTIONS
+    else {
+        logln((UnicodeString)"Ok: " + message + "; got " + actual + "=0x" + toHex(actual));
+    }
+#endif
+    return TRUE;
+}
+
 #if !UCONFIG_NO_FORMATTING
 UBool IntlTest::assertEquals(const char* message,
                              const Formattable& expected,
diff --git a/icu4c/source/test/intltest/intltest.h b/icu4c/source/test/intltest/intltest.h
index 395fd3329ce..e71576d259b 100644
--- a/icu4c/source/test/intltest/intltest.h
+++ b/icu4c/source/test/intltest/intltest.h
@@ -241,6 +241,7 @@ protected:
                        const UnicodeString& actual, UBool possibleDataError=FALSE);
     UBool assertEquals(const char* message, const char* expected,
                        const char* actual);
+    UBool assertEquals(const char* message, int32_t expected, int32_t actual);
 #if !UCONFIG_NO_FORMATTING
     UBool assertEquals(const char* message, const Formattable& expected,
                        const Formattable& actual);
@@ -299,7 +300,12 @@ protected:
 
     static UnicodeString &prettify(const UnicodeString &source, UnicodeString &target);
     static UnicodeString prettify(const UnicodeString &source, UBool parseBackslash=FALSE);
+    // digits=-1 determines the number of digits automatically
     static UnicodeString &appendHex(uint32_t number, int32_t digits, UnicodeString &target);
+    static UnicodeString toHex(uint32_t number, int32_t digits=-1);
+    static inline UnicodeString toHex(int32_t number, int32_t digits=-1) {
+        return toHex((uint32_t)number, digits);
+    }
 
 public:
     static void setICU_DATA();       // Set up ICU_DATA if necessary.