ICU-12507 rbbi, switch impl from UTrie to UTrie2.

X-SVN-Rev: 40270
2025-04-20 20:19:32 +00:00 · 2017-07-19 22:31:12 +00:00 · 2017-07-19 22:31:12 +00:00 · 1292197198
commit 1292197198
parent b6cd1cddeb 4a5b4e4b44
12 changed files with 138 additions and 277 deletions
--- a/icu4c/source/common/rbbi.cpp
+++ b/icu4c/source/common/rbbi.cpp
@ -1078,7 +1078,7 @@ int32_t RuleBasedBreakIterator::handleNext(const RBBIStateTable *statetable) {
            // Note:  the 16 in UTRIE_GET16 refers to the size of the data being returned,
            //        not the size of the character going in, which is a UChar32.
            //
-            UTRIE_GET16(&fData->fTrie, c, category);
+            category = UTRIE2_GET16(fData->fTrie, c);

            // Check the dictionary bit in the character's category.
            //    Counter is only used by dictionary based iterators (subclasses).
@ -1275,7 +1275,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
            // Note:  the 16 in UTRIE_GET16 refers to the size of the data being returned,
            //        not the size of the character going in, which is a UChar32.
            //
-            UTRIE_GET16(&fData->fTrie, c, category);
+            category = UTRIE2_GET16(fData->fTrie, c);

            // Check the dictionary bit in the character's category.
            //    Counter is only used by dictionary based iterators (subclasses).
@ -1510,26 +1510,6 @@ BreakIterator *  RuleBasedBreakIterator::createBufferClone(void * /*stackBuffer*
 }


-//-------------------------------------------------------------------------------
-//
-//  isDictionaryChar      Return true if the category lookup for this char
-//                        indicates that it is in the set of dictionary lookup
-//                        chars.
-//
-//                        This function is intended for use by dictionary based
-//                        break iterators.
-//
-//-------------------------------------------------------------------------------
-/*UBool RuleBasedBreakIterator::isDictionaryChar(UChar32   c) {
-    if (fData == NULL) {
-        return FALSE;
-    }
-    uint16_t category;
-    UTRIE_GET16(&fData->fTrie, c, category);
-    return (category & 0x4000) != 0;
-}*/
-
-
 //-------------------------------------------------------------------------------
 //
 //  checkDictionary       This function handles all processing of characters in
@ -1569,7 +1549,7 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
    int32_t     foundBreakCount = 0;
    UChar32     c = utext_current32(fText);

-    UTRIE_GET16(&fData->fTrie, c, category);
+    category = UTRIE2_GET16(fData->fTrie, c);

    // Is the character we're starting on a dictionary character? If so, we
    // need to back up to include the entire run; otherwise the results of
@ -1581,7 +1561,7 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
            do {
                utext_next32(fText);          // TODO:  recast to work directly with postincrement.
                c = utext_current32(fText);
-                UTRIE_GET16(&fData->fTrie, c, category);
+                category = UTRIE2_GET16(fData->fTrie, c);
            } while (c != U_SENTINEL && (category & 0x4000));
            // Back up to the last dictionary character
            rangeEnd = (int32_t)UTEXT_GETNATIVEINDEX(fText);
@ -1597,7 +1577,7 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
        else {
            do {
                c = UTEXT_PREVIOUS32(fText);
-                UTRIE_GET16(&fData->fTrie, c, category);
+                category = UTRIE2_GET16(fData->fTrie, c);
            }
            while (c != U_SENTINEL && (category & 0x4000));
            // Back up to the last dictionary character
@ -1611,7 +1591,7 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
            }
            rangeStart = (int32_t)UTEXT_GETNATIVEINDEX(fText);;
        }
-        UTRIE_GET16(&fData->fTrie, c, category);
+        category = UTRIE2_GET16(fData->fTrie, c);
    }

    // Loop through the text, looking for ranges of dictionary characters.
@ -1622,13 +1602,13 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
    if (reverse) {
        utext_setNativeIndex(fText, rangeStart);
        c = utext_current32(fText);
-        UTRIE_GET16(&fData->fTrie, c, category);
+        category = UTRIE2_GET16(fData->fTrie, c);
    }
    while(U_SUCCESS(status)) {
        while((current = (int32_t)UTEXT_GETNATIVEINDEX(fText)) < rangeEnd && (category & 0x4000) == 0) {
            utext_next32(fText);           // TODO:  tweak for post-increment operation
            c = utext_current32(fText);
-            UTRIE_GET16(&fData->fTrie, c, category);
+            category = UTRIE2_GET16(fData->fTrie, c);
        }
        if (current >= rangeEnd) {
            break;
@ -1646,7 +1626,7 @@ int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,

        // Reload the loop variables for the next go-round
        c = utext_current32(fText);
-        UTRIE_GET16(&fData->fTrie, c, category);
+        category = UTRIE2_GET16(fData->fTrie, c);
    }

    // If we found breaks, build a new break cache. The first and last entries must
--- a/icu4c/source/common/rbbidata.cpp
+++ b/icu4c/source/common/rbbidata.cpp
@ -23,23 +23,6 @@
 #include "uassert.h"


-//-----------------------------------------------------------------------------------
-//
-//   Trie access folding function.  Copied as-is from properties code in uchar.c
-//
-//-----------------------------------------------------------------------------------
-U_CDECL_BEGIN
-static int32_t U_CALLCONV
-getFoldingOffset(uint32_t data) {
-    /* if bit 15 is set, then the folding offset is in bits 14..0 of the 16-bit trie result */
-    if(data&0x8000) {
-        return (int32_t)(data&0x7fff);
-    } else {
-        return 0;
-    }
-}
-U_CDECL_END
-
 U_NAMESPACE_BEGIN

 //-----------------------------------------------------------------------------
@ -71,9 +54,8 @@ RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) {
            dh->info.dataFormat[0] == 0x42 &&  // dataFormat="Brk "
            dh->info.dataFormat[1] == 0x72 &&
            dh->info.dataFormat[2] == 0x6b &&
-            dh->info.dataFormat[3] == 0x20)
-            // Note: info.fFormatVersion is duplicated in the RBBIDataHeader, and is
-            //       validated when checking that.
+            dh->info.dataFormat[3] == 0x20 &&
+            isDataVersionAcceptable(dh->info.formatVersion))
        ) {
        status = U_INVALID_FORMAT_ERROR;
        return;
@ -84,6 +66,11 @@ RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) {
    fUDataMem = udm;
 }

+UBool RBBIDataWrapper::isDataVersionAcceptable(const UVersionInfo version) {
+    return RBBI_DATA_FORMAT_VERSION[0] == version[0];
+}
+
+
 //-----------------------------------------------------------------------------
 //
 //    init().   Does most of the work of construction, shared between the
@ -98,6 +85,7 @@ void RBBIDataWrapper::init0() {
    fSafeRevTable = NULL;
    fRuleSource = NULL;
    fRuleStatusTable = NULL;
+    fTrie = NULL;
    fUDataMem = NULL;
    fRefCount = 0;
    fDontFreeData = TRUE;
@ -108,8 +96,7 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
        return;
    }
    fHeader = data;
-    if (fHeader->fMagic != 0xb1a0 || fHeader->fFormatVersion[0] != 3) 
-    {
+    if (fHeader->fMagic != 0xb1a0 || !isDataVersionAcceptable(fHeader->fFormatVersion)) {
        status = U_INVALID_FORMAT_ERROR;
        return;
    }
@ -132,15 +119,14 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
    }


-    utrie_unserialize(&fTrie,
-                       (uint8_t *)data + fHeader->fTrie,
-                       fHeader->fTrieLen,
-                       &status);
+    fTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
+                                      (uint8_t *)data + fHeader->fTrie,
+                                      fHeader->fTrieLen,
+                                      NULL,           // *actual length
+                                      &status);
    if (U_FAILURE(status)) {
        return;
    }
-    fTrie.getFoldingOffset=getFoldingOffset;
-

    fRuleSource   = (UChar *)((char *)data + fHeader->fRuleSource);
    fRuleString.setTo(TRUE, fRuleSource, -1);
@ -165,6 +151,8 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
 //-----------------------------------------------------------------------------
 RBBIDataWrapper::~RBBIDataWrapper() {
    U_ASSERT(fRefCount == 0);
+    utrie2_close(fTrie);
+    fTrie = NULL;
    if (fUDataMem) {
        udata_close(fUDataMem);
    } else if (!fDontFreeData) {
@ -323,7 +311,7 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
           pInfo->dataFormat[1]==0x72 &&
           pInfo->dataFormat[2]==0x6b &&
           pInfo->dataFormat[3]==0x20 &&
-           pInfo->formatVersion[0]==3  )) {
+           RBBIDataWrapper::isDataVersionAcceptable(pInfo->formatVersion) )) {
        udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n",
                         pInfo->dataFormat[0], pInfo->dataFormat[1],
                         pInfo->dataFormat[2], pInfo->dataFormat[3],
@ -344,17 +332,11 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
    //
    // Get the RRBI Data Header, and check that it appears to be OK.
    //
-    //    Note:  ICU 3.2 and earlier, RBBIDataHeader::fDataFormat was actually 
-    //           an int32_t with a value of 1.  Starting with ICU 3.4,
-    //           RBBI's fDataFormat matches the dataFormat field from the
-    //           UDataInfo header, four int8_t bytes.  The value is {3,1,0,0}
-    //
    const uint8_t  *inBytes =(const uint8_t *)inData+headerSize;
    RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes;
    if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 || 
-        rbbiDH->fFormatVersion[0] != 3 ||
-        ds->readUInt32(rbbiDH->fLength)  <  sizeof(RBBIDataHeader)) 
-    {
+            !RBBIDataWrapper::isDataVersionAcceptable(rbbiDH->fFormatVersion) ||
+            ds->readUInt32(rbbiDH->fLength)  <  sizeof(RBBIDataHeader)) {
        udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n");
        *status=U_UNSUPPORTED_ERROR;
        return 0;
@ -451,8 +433,8 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
    }

    // Trie table for character categories
-    utrie_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen),
-                            outBytes+ds->readUInt32(rbbiDH->fTrie), status);
+    utrie2_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen),
+                    outBytes+ds->readUInt32(rbbiDH->fTrie), status);

    // Source Rules Text.  It's UChar data
    ds->swapArray16(ds, inBytes+ds->readUInt32(rbbiDH->fRuleSource), ds->readUInt32(rbbiDH->fRuleSourceLen),
--- a/icu4c/source/common/rbbidata.h
+++ b/icu4c/source/common/rbbidata.h
@ -51,22 +51,23 @@ ubrk_swap(const UDataSwapper *ds,

 #include "unicode/uobject.h"
 #include "unicode/unistr.h"
+#include "unicode/uversion.h"
 #include "umutex.h"
-#include "utrie.h"
+#include "utrie2.h"

 U_NAMESPACE_BEGIN

+// The current RBBI data format version.
+static const uint8_t RBBI_DATA_FORMAT_VERSION[] = {4, 0, 0, 0};
+
 /*  
 *   The following structs map exactly onto the raw data from ICU common data file. 
 */
 struct RBBIDataHeader {
    uint32_t         fMagic;           /*  == 0xbla0                                               */
-    uint8_t          fFormatVersion[4]; /* Data Format.  Same as the value in struct UDataInfo      */
+    UVersionInfo     fFormatVersion;   /* Data Format.  Same as the value in struct UDataInfo      */
                                       /*   if there is one associated with this data.             */
                                       /*     (version originates in rbbi, is copied to UDataInfo) */
-                                       /*   For ICU 3.2 and earlier, this field was                */
-                                       /*       uint32_t  fVersion                                 */
-                                       /*   with a value of 1.                                     */
    uint32_t         fLength;          /*  Total length in bytes of this RBBI Data,                */
                                       /*      including all sections, not just the header.        */
    uint32_t         fCatCount;        /*  Number of character categories.                         */
@ -152,6 +153,8 @@ public:
    RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
    ~RBBIDataWrapper();

+    static UBool          isDataVersionAcceptable(const UVersionInfo version);
+
    void                  init0();
    void                  init(const RBBIDataHeader *data, UErrorCode &status);
    RBBIDataWrapper      *addReference();
@ -181,7 +184,7 @@ public:
    /* number of int32_t values in the rule status table.   Used to sanity check indexing */
    int32_t             fStatusMaxIdx;

-    UTrie               fTrie;
+    UTrie2              *fTrie;

 private:
    u_atomic_int32_t    fRefCount;
--- a/icu4c/source/common/rbbirb.cpp
+++ b/icu4c/source/common/rbbirb.cpp
@ -177,10 +177,10 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {


    data->fMagic            = 0xb1a0;
-    data->fFormatVersion[0] = 3;
-    data->fFormatVersion[1] = 1;
-    data->fFormatVersion[2] = 0;
-    data->fFormatVersion[3] = 0;
+    data->fFormatVersion[0] = RBBI_DATA_FORMAT_VERSION[0];
+    data->fFormatVersion[1] = RBBI_DATA_FORMAT_VERSION[1];
+    data->fFormatVersion[2] = RBBI_DATA_FORMAT_VERSION[2];
+    data->fFormatVersion[3] = RBBI_DATA_FORMAT_VERSION[3];
    data->fLength           = totalSize;
    data->fCatCount         = fSetBuilder->getNumCharCategories();

--- a/icu4c/source/common/rbbisetb.cpp
+++ b/icu4c/source/common/rbbisetb.cpp
@ -35,7 +35,7 @@
 #if !UCONFIG_NO_BREAK_ITERATION

 #include "unicode/uniset.h"
-#include "utrie.h"
+#include "utrie2.h"
 #include "uvector.h"
 #include "uassert.h"
 #include "cmemory.h"
@ -44,43 +44,6 @@
 #include "rbbisetb.h"
 #include "rbbinode.h"

-
-//------------------------------------------------------------------------
-//
-//   getFoldedRBBIValue        Call-back function used during building of Trie table.
-//                             Folding value: just store the offset (16 bits)
-//                             if there is any non-0 entry.
-//                             (It'd really be nice if the Trie builder would provide a
-//                             simple default, so this function could go away from here.)
-//
-//------------------------------------------------------------------------
-/* folding value: just store the offset (16 bits) if there is any non-0 entry */
-U_CDECL_BEGIN
-static uint32_t U_CALLCONV
-getFoldedRBBIValue(UNewTrie *trie, UChar32 start, int32_t offset) {
-    uint32_t value;
-    UChar32 limit;
-    UBool inBlockZero;
-
-    limit=start+0x400;
-    while(start<limit) {
-        value=utrie_get32(trie, start, &inBlockZero);
-        if(inBlockZero) {
-            start+=UTRIE_DATA_BLOCK_LENGTH;
-        } else if(value!=0) {
-            return (uint32_t)(offset|0x8000);
-        } else {
-            ++start;
-        }
-    }
-    return 0;
-}
-
-
-U_CDECL_END
-
-
-
 U_NAMESPACE_BEGIN

 //------------------------------------------------------------------------
@ -116,7 +79,7 @@ RBBISetBuilder::~RBBISetBuilder()
        delete r;
    }

-    utrie_close(fTrie);
+    utrie2_close(fTrie);
 }


@ -287,33 +250,30 @@ void RBBISetBuilder::build() {
    // Build the Trie table for mapping UChar32 values to the corresponding
    //   range group number
    //
-    fTrie = utrie_open(NULL,    //  Pre-existing trie to be filled in
-                      NULL,    //  Data array  (utrie will allocate one)
-                      100000,  //  Max Data Length
-                      0,       //  Initial value for all code points
-                      0,       //  Lead surrogate unit value
-                      TRUE);   //  Keep Latin 1 in separately
-
+    fTrie = utrie2_open(0,       //  Initial value for all code points
+                        0,       //  errorValue
+                        fStatus);

    for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
-        utrie_setRange32(fTrie, rlRange->fStartChar, rlRange->fEndChar+1, rlRange->fNum, TRUE);
+        utrie2_setRange32(fTrie, rlRange->fStartChar, rlRange->fEndChar, rlRange->fNum, TRUE, fStatus);
    }
 }


-
 //-----------------------------------------------------------------------------------
 //
 //  getTrieSize()    Return the size that will be required to serialize the Trie.
 //
 //-----------------------------------------------------------------------------------
 int32_t RBBISetBuilder::getTrieSize() /*const*/ {
-    fTrieSize  = utrie_serialize(fTrie,
-                                    NULL,                // Buffer
-                                    0,                   // Capacity
-                                    getFoldedRBBIValue,
-                                    TRUE,                // Reduce to 16 bits
-                                    fStatus);
+    utrie2_freeze(fTrie, UTRIE2_16_VALUE_BITS, fStatus);
+    fTrieSize  = utrie2_serialize(fTrie,
+                                  NULL,                // Buffer
+                                  0,                   // Capacity
+                                  fStatus);
+    if (*fStatus == U_BUFFER_OVERFLOW_ERROR) {
+        *fStatus = U_ZERO_ERROR;
+    }
    // RBBIDebugPrintf("Trie table size is %d\n", trieSize);
    return fTrieSize;
 }
@ -327,12 +287,10 @@ int32_t RBBISetBuilder::getTrieSize() /*const*/ {
 //
 //-----------------------------------------------------------------------------------
 void RBBISetBuilder::serializeTrie(uint8_t *where) {
-    utrie_serialize(fTrie,
-                    where,                   // Buffer
-                    fTrieSize,               // Capacity
-                    getFoldedRBBIValue,
-                    TRUE,                    // Reduce to 16 bits
-                    fStatus);
+    utrie2_serialize(fTrie,
+                     where,                   // Buffer
+                     fTrieSize,               // Capacity
+                     fStatus);
 }

 //------------------------------------------------------------------------
--- a/icu4c/source/common/rbbisetb.h
+++ b/icu4c/source/common/rbbisetb.h
@ -15,10 +15,9 @@
 #include "unicode/utypes.h"
 #include "unicode/uobject.h"
 #include "rbbirb.h"
+#include "utrie2.h"
 #include "uvector.h"

-struct  UNewTrie;
-
 U_NAMESPACE_BEGIN

 //
@ -109,7 +108,7 @@ private:

    RangeDescriptor       *fRangeList;      // Head of the linked list of RangeDescriptors

-    UNewTrie              *fTrie;           // The mapping TRIE that is the end result of processing
+    UTrie2               *fTrie;            // The mapping TRIE that is the end result of processing
    uint32_t              fTrieSize;        //  the Unicode Sets.

    // Groups correspond to character categories -
--- a/icu4c/source/common/unicode/rbbi.h
+++ b/icu4c/source/common/unicode/rbbi.h
@ -32,8 +32,6 @@
 #include "unicode/uchriter.h"


-struct UTrie;
-
 U_NAMESPACE_BEGIN

 /** @internal */
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java
@ -13,10 +13,9 @@ import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;

-import com.ibm.icu.impl.CharTrie;
 import com.ibm.icu.impl.ICUBinary;
 import com.ibm.icu.impl.ICUBinary.Authenticate;
-import com.ibm.icu.impl.Trie;
+import com.ibm.icu.impl.Trie2;

 /**
 * <p>Internal class used for Rule Based Break Iterators</p>
@ -33,20 +32,20 @@ final class RBBIDataWrapper {
    short          fRTable[];
    short          fSFTable[];
    short          fSRTable[];
-    CharTrie       fTrie;
+    Trie2          fTrie;
    String         fRuleSource;
    int            fStatusTable[];

    private boolean isBigEndian;

-    static final int DATA_FORMAT = 0x42726b20;  // "Brk "
-    static final int FORMAT_VERSION = 0x03010000;  // 3.1
+    static final int DATA_FORMAT = 0x42726b20;     // "Brk "
+    static final int FORMAT_VERSION = 0x04000000;  // 4.0.0.0

    private static final class IsAcceptable implements Authenticate {
-        // @Override when we switch to Java 6
        @Override
        public boolean isDataVersionAcceptable(byte version[]) {
-            return version[0] == (FORMAT_VERSION >>> 24);
+            int intVersion = (version[0] << 24) + (version[1] << 16) + (version[2] << 8) + version[3];
+            return intVersion == FORMAT_VERSION;
        }
    }
    private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
@ -105,7 +104,6 @@ final class RBBIDataWrapper {
     */
    final static class RBBIDataHeader {
        int         fMagic;         //  == 0xbla0
-        int         fVersion;       //  == 1 (for ICU 3.2 and earlier.
        byte[]      fFormatVersion; //  For ICU 3.4 and later.
        int         fLength;        //  Total length in bytes of this RBBI Data,
                                       //      including all sections, not just the header.
@ -147,19 +145,6 @@ final class RBBIDataWrapper {
        return ROW_DATA + state * (fHeader.fCatCount + 4);
    }

-    static class TrieFoldingFunc implements  Trie.DataManipulate {
-        @Override
-        public int getFoldingOffset(int data) {
-            if ((data & 0x8000) != 0) {
-                return data & 0x7fff;
-            } else {
-                return 0;
-            }
-        }
-    }
-    static TrieFoldingFunc  fTrieFoldingFunc = new TrieFoldingFunc();
-
-
    RBBIDataWrapper() {
    }

@ -176,10 +161,6 @@ final class RBBIDataWrapper {
        // Read in the RBBI data header...
        This.fHeader = new  RBBIDataHeader();
        This.fHeader.fMagic          = bytes.getInt();
-        // Read the same 4 bytes as an int and as a byte array: The data format could be
-        // the old fVersion=1 (TODO: probably not with a real ICU data header?)
-        // or the new fFormatVersion=3.x.
-        This.fHeader.fVersion        = bytes.getInt(bytes.position());
        This.fHeader.fFormatVersion[0] = bytes.get();
        This.fHeader.fFormatVersion[1] = bytes.get();
        This.fHeader.fFormatVersion[2] = bytes.get();
@ -203,10 +184,7 @@ final class RBBIDataWrapper {
        ICUBinary.skipBytes(bytes, 6 * 4);    // uint32_t  fReserved[6];


-        if (This.fHeader.fMagic != 0xb1a0 ||
-                ! (This.fHeader.fVersion == 1  ||         // ICU 3.2 and earlier
-                   This.fHeader.fFormatVersion[0] == 3)   // ICU 3.4
-            ) {
+        if (This.fHeader.fMagic != 0xb1a0 || !IS_ACCEPTABLE.isDataVersionAcceptable(This.fHeader.fFormatVersion)) {
            throw new IOException("Break Iterator Rule Data Magic Number Incorrect, or unsupported data version.");
        }

@ -286,7 +264,7 @@ final class RBBIDataWrapper {
                                                //  as we don't go more than 100 bytes past the
                                                //  past the end of the TRIE.

-        This.fTrie = new CharTrie(bytes, fTrieFoldingFunc);  // Deserialize the TRIE, leaving buffer
+        This.fTrie = Trie2.createFromSerialized(bytes);  // Deserialize the TRIE, leaving buffer
                                                //  at an unknown position, preceding the
                                                //  padding between TRIE and following section.

@ -461,7 +439,7 @@ final class RBBIDataWrapper {
        out.println("\nCharacter Categories");
        out.println("--------------------");
        for (char32 = 0; char32<=0x10ffff; char32++) {
-            category = fTrie.getCodePointValue(char32);
+            category = fTrie.get(char32);
            category &= ~0x4000;            // Mask off dictionary bit.
            if (category < 0 || category > fHeader.fCatCount) {
                out.println("Error, bad category " + Integer.toHexString(category) +
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java
@ -25,17 +25,17 @@ class RBBIRuleBuilder {
    //   This is the main class for building (compiling) break rules into the tables
    //    required by the runtime RBBI engine.
    //
-    
+
    String fDebugEnv;              // controls debug trace output
    String fRules;                 // The rule string that we are compiling
    RBBIRuleScanner fScanner;      // The scanner.

-    
+
    //
    //  There are four separate parse trees generated, one for each of the
    //    forward rules, reverse rules, safe forward rules and safe reverse rules.
    //  This array references the root of each of the trees.
-    //  
+    //
    RBBINode[]         fTreeRoots = new RBBINode[4];
    static final int   fForwardTree = 0;  // Indexes into the above fTreeRoots array
    static final int   fReverseTree = 1;  //   for each of the trees.
@ -69,7 +69,7 @@ class RBBIRuleBuilder {
                                                                                   //  Map Value is the runtime array index.

    List<Integer> fRuleStatusVals;        // List of Integer objects.  Has same layout as the
-                                          //   runtime array of status (tag) values - 
+                                          //   runtime array of status (tag) values -
                                          //     number of values in group 1
                                          //        first status value in group 1
                                          //        2nd status value in group 1
@ -84,50 +84,50 @@ class RBBIRuleBuilder {
    //
    static final int U_BRK_ERROR_START = 0x10200;
    /**< Start of codes indicating Break Iterator failures */
-    
+
    static final int U_BRK_INTERNAL_ERROR = 0x10201;
    /**< An internal error (bug) was detected.             */
-    
+
    static final int U_BRK_HEX_DIGITS_EXPECTED = 0x10202;
    /**< Hex digits expected as part of a escaped char in a rule. */
-    
+
    static final int U_BRK_SEMICOLON_EXPECTED = 0x10203;
    /**< Missing ';' at the end of a RBBI rule.            */
-    
+
    static final int U_BRK_RULE_SYNTAX = 0x10204;
    /**< Syntax error in RBBI rule.                        */
-    
+
    static final int U_BRK_UNCLOSED_SET = 0x10205;
    /**< UnicodeSet witing an RBBI rule missing a closing ']'.  */
-    
+
    static final int U_BRK_ASSIGN_ERROR = 0x10206;
    /**< Syntax error in RBBI rule assignment statement.   */
-    
+
    static final int U_BRK_VARIABLE_REDFINITION = 0x10207;
    /**< RBBI rule $Variable redefined.                    */
-    
+
    static final int U_BRK_MISMATCHED_PAREN = 0x10208;
    /**< Mis-matched parentheses in an RBBI rule.          */
-    
+
    static final int U_BRK_NEW_LINE_IN_QUOTED_STRING = 0x10209;
    /**< Missing closing quote in an RBBI rule.            */
-    
+
    static final int U_BRK_UNDEFINED_VARIABLE = 0x1020a;
    /**< Use of an undefined $Variable in an RBBI rule.    */
-    
+
    static final int U_BRK_INIT_ERROR = 0x1020b;
    /**< Initialization failure.  Probable missing ICU Data. */
-    
+
    static final int U_BRK_RULE_EMPTY_SET = 0x1020c;
    /**< Rule contains an empty Unicode Set.               */
-    
+
    static final int U_BRK_UNRECOGNIZED_OPTION = 0x1020d;
    /**< !!option in RBBI rules not recognized.            */
-    
+
    static final int U_BRK_MALFORMED_RULE_TAG = 0x1020e;
    /**< The {nnn} tag on a rule is mal formed             */
    static final int U_BRK_MALFORMED_SET = 0x1020f;
-    
+
    static final int U_BRK_ERROR_LIMIT = 0x10210;
    /**< This must always be the last value to indicate the limit for Break Iterator failures */

@ -196,7 +196,7 @@ class RBBIRuleBuilder {
        //
        int[] header = new int[RBBIDataWrapper.DH_SIZE];                 // sizeof struct RBBIDataHeader
        header[RBBIDataWrapper.DH_MAGIC]         = 0xb1a0;
-        header[RBBIDataWrapper.DH_FORMATVERSION] = 0x03010000;           // uint8_t fFormatVersion[4];
+        header[RBBIDataWrapper.DH_FORMATVERSION] = RBBIDataWrapper.FORMAT_VERSION;
        header[RBBIDataWrapper.DH_LENGTH]        = totalSize;            // fLength, the total size of all rule sections.
        header[RBBIDataWrapper.DH_CATCOUNT]      = fSetBuilder.getNumCharCategories(); // fCatCount.
        header[RBBIDataWrapper.DH_FTABLE]        = headerSize;           // fFTable
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBISetBuilder.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBISetBuilder.java
@ -14,7 +14,8 @@ import java.util.ArrayList;
 import java.util.List;

 import com.ibm.icu.impl.Assert;
-import com.ibm.icu.impl.IntTrieBuilder;
+import com.ibm.icu.impl.Trie2Writable;
+import com.ibm.icu.impl.Trie2_16;

 //
 //  RBBISetBuilder   Handles processing of Unicode Sets from RBBI rules
@ -49,14 +50,14 @@ class RBBISetBuilder {
            RangeDescriptor() {
                fIncludesSets = new ArrayList<RBBINode>();
            }
-            
+
            RangeDescriptor(RangeDescriptor other) {
                fStartChar = other.fStartChar;
                fEndChar   = other.fEndChar;
                fNum       = other.fNum;
                fIncludesSets = new ArrayList<RBBINode>(other.fIncludesSets);
            }
- 
+
            //-------------------------------------------------------------------------------------
            //
            //          RangeDesriptor::split()
@ -65,20 +66,20 @@ class RBBISetBuilder {
            void split(int where) {
                Assert.assrt(where>fStartChar && where<=fEndChar);
                RangeDescriptor nr = new RangeDescriptor(this);
- 
+
                //  RangeDescriptor copy constructor copies all fields.
                //  Only need to update those that are different after the split.
                nr.fStartChar = where;
                this.fEndChar = where-1;
                nr.fNext      = this.fNext;
                this.fNext    = nr;
-                
+
                // TODO:  fIncludesSets is not updated.  Check it out.
-                //         Probably because they haven't been populated yet, 
+                //         Probably because they haven't been populated yet,
                //         but still sloppy.
            }

-            
+
            //-------------------------------------------------------------------------------------
            //
            //          RangeDescriptor::setDictionaryFlag
@ -95,11 +96,11 @@ class RBBISetBuilder {
            //          TODO:  a faster way would be to find the set node for
            //          "dictionary" just once, rather than looking it
            //          up by name every time.
-            //            
+            //
            // -------------------------------------------------------------------------------------
            void setDictionaryFlag() {
                int i;
-                
+
                for (i=0; i<this.fIncludesSets.size(); i++) {
                    RBBINode        usetNode    = fIncludesSets.get(i);
                    String          setName = "";
@ -119,12 +120,13 @@ class RBBISetBuilder {
        }
    }

-    
+
    RBBIRuleBuilder       fRB;             // The RBBI Rule Compiler that owns us.
    RangeDescriptor       fRangeList;      // Head of the linked list of RangeDescriptors

-    IntTrieBuilder        fTrie;           // The mapping TRIE that is the end result of processing
+    Trie2Writable         fTrie;           // The mapping TRIE that is the end result of processing
                                           //  the Unicode Sets.
+    Trie2_16              fFrozenTrie;

    // Groups correspond to character categories -
    //       groups of ranges that are in the same original UnicodeSets.
@ -135,8 +137,8 @@ class RBBISetBuilder {
    int                fGroupCount;

    boolean             fSawBOF;
-    
-    
+
+
    //------------------------------------------------------------------------
    //
    //       RBBISetBuilder Constructor
@ -162,7 +164,7 @@ class RBBISetBuilder {
        //  Initialize the process by creating a single range encompassing all characters
        //  that is in no sets.
        //
-        fRangeList               = new RangeDescriptor(); 
+        fRangeList               = new RangeDescriptor();
        fRangeList.fStartChar    = 0;
        fRangeList.fEndChar      = 0x10ffff;

@ -245,7 +247,7 @@ class RBBISetBuilder {
            }
            if (rlRange.fNum == 0) {
                fGroupCount ++;
-                rlRange.fNum = fGroupCount+2; 
+                rlRange.fNum = fGroupCount+2;
                rlRange.setDictionaryFlag();
                addValToSets(rlRange.fIncludesSets, fGroupCount+2);
            }
@ -260,7 +262,7 @@ class RBBISetBuilder {
        //     subtree for each UnicodeSet that contains the string {eof}
        //   Because {bof} and {eof} are not a characters in the normal sense,
        //   they doesn't affect the computation of ranges or TRIE.
-        
+
        String eofString = "eof";
        String bofString = "bof";

@ -279,67 +281,26 @@ class RBBISetBuilder {
        if (fRB.fDebugEnv!=null  && fRB.fDebugEnv.indexOf("rgroup")>=0) {printRangeGroups();}
        if (fRB.fDebugEnv!=null  && fRB.fDebugEnv.indexOf("esets")>=0) {printSets();}

+        fTrie = new Trie2Writable(0,       //   Initial value for all code points
+                                  0);      //   Error value.

-        //IntTrieBuilder(int aliasdata[], int maxdatalength, 
-        //        int initialvalue, int leadunitvalue, 
-        //        boolean latin1linear)
-        
-        fTrie = new IntTrieBuilder(null,   //   Data array  (utrie will allocate one)
-                                   100000,  //   Max Data Length
-                                   0,       //   Initial value for all code points
-                                   0,       //   Lead Surrogate unit value,
-                                   true);   //   Keep Latin 1 in separately.
-        
        for (rlRange = fRangeList; rlRange!=null; rlRange=rlRange.fNext) {
-            fTrie.setRange(rlRange.fStartChar, rlRange.fEndChar+1, rlRange.fNum, true);
+            fTrie.setRange(rlRange.fStartChar, rlRange.fEndChar, rlRange.fNum, true);
        }
    }


-
-    //-----------------------------------------------------------------------------------
-    //
-    //   RBBIDataManipulate  A little internal class needed only to wrap of the 
-    //                       getFoldedValue() function needed for Trie table creation.
-    //
-    //-----------------------------------------------------------------------------------
-   class RBBIDataManipulate implements IntTrieBuilder.DataManipulate {
-        public int getFoldedValue(int start, int offset) {
-            int  value;
-            int  limit;
-            boolean [] inBlockZero = new boolean[1];
-            
-            limit = start + 0x400;
-            while(start<limit) {
-                value = fTrie.getValue(start, inBlockZero);
-                if (inBlockZero[0]) {
-                    start += IntTrieBuilder.DATA_BLOCK_LENGTH;  
-                } else if (value != 0) {
-                    return offset | 0x08000;
-                } else {
-                    ++start;
-                }
-            }
-            return 0;
-         }
-    }
-    RBBIDataManipulate dm = new RBBIDataManipulate();
-    
    //-----------------------------------------------------------------------------------
    //
    //          getTrieSize()    Return the size that will be required to serialize the Trie.
    //
    //-----------------------------------------------------------------------------------
    int getTrieSize()  {
-        int size = 0;
-        try {
-            // The trie serialize function returns the size of the data written.
-            //    null output stream says give size only, don't actually write anything.
-            size = fTrie.serialize(null, true, dm );
-        } catch (IOException e) {
-            Assert.assrt (false);
+        if (fFrozenTrie == null) {
+            fFrozenTrie = fTrie.toTrie2_16();
+            fTrie = null;
        }
-        return size;
+        return fFrozenTrie.getSerializedLength();
    }


@ -349,7 +310,11 @@ class RBBISetBuilder {
    //
    //-----------------------------------------------------------------------------------
    void serializeTrie(OutputStream os) throws IOException {
-        fTrie.serialize(os, true, dm );
+        if (fFrozenTrie == null) {
+            fFrozenTrie = fTrie.toTrie2_16();
+            fTrie = null;
+        }
+        fFrozenTrie.serialize(os);
   }

    //------------------------------------------------------------------------
@ -416,7 +381,7 @@ class RBBISetBuilder {
    //------------------------------------------------------------------------
    //
    //           getFirstChar      Given a runtime RBBI character category, find
-    //                             the first UChar32 that is in the set of chars 
+    //                             the first UChar32 that is in the set of chars
    //                             in the category.
    //------------------------------------------------------------------------
    int  getFirstChar(int category)  {
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java
@ -24,10 +24,10 @@ import java.util.ArrayList;
 import java.util.List;

 import com.ibm.icu.impl.Assert;
-import com.ibm.icu.impl.CharTrie;
 import com.ibm.icu.impl.CharacterIteration;
 import com.ibm.icu.impl.ICUBinary;
 import com.ibm.icu.impl.ICUDebug;
+import com.ibm.icu.impl.Trie2;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.lang.UProperty;
 import com.ibm.icu.lang.UScript;
@ -495,7 +495,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
        DictionaryBreakEngine.DequeI breaks = new DictionaryBreakEngine.DequeI();
        int     foundBreakCount = 0;
        int     c = CharacterIteration.current32(fText);
-        category = (short)fRData.fTrie.getCodePointValue(c);
+        category = (short)fRData.fTrie.get(c);

        // Is the character we're starting on a dictionary character? If so, we
        // need to back up to include the entire run; otherwise the results of
@ -507,7 +507,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
                do {
                    CharacterIteration.next32(fText);
                    c = CharacterIteration.current32(fText);
-                    category = (short)fRData.fTrie.getCodePointValue(c);
+                    category = (short)fRData.fTrie.get(c);
                } while (c != CharacterIteration.DONE32 && ((category & 0x4000)) != 0);

                // Back up to the last dictionary character
@ -524,7 +524,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
            else {
                do {
                    c = CharacterIteration.previous32(fText);
-                    category = (short)fRData.fTrie.getCodePointValue(c);
+                    category = (short)fRData.fTrie.get(c);
                }
                while (c != CharacterIteration.DONE32 && ((category & 0x4000) != 0));
                // Back up to the last dictionary character
@ -538,7 +538,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
                }
                rangeStart = fText.getIndex();
            }
-            category = (short)fRData.fTrie.getCodePointValue(c);
+            category = (short)fRData.fTrie.get(c);
        }


@ -550,14 +550,14 @@ public class RuleBasedBreakIterator extends BreakIterator {
        if (reverse) {
            fText.setIndex(rangeStart);
            c = CharacterIteration.current32(fText);
-            category = (short)fRData.fTrie.getCodePointValue(c);
+            category = (short)fRData.fTrie.get(c);
        }
        LanguageBreakEngine lbe = null;
        while(true) {
            while((current = fText.getIndex()) < rangeEnd && (category & 0x4000) == 0) {
                CharacterIteration.next32(fText);
                c = CharacterIteration.current32(fText);
-                category = (short)fRData.fTrie.getCodePointValue(c);
+                category = (short)fRData.fTrie.get(c);
            }
            if (current >= rangeEnd) {
                break;
@ -577,7 +577,7 @@ public class RuleBasedBreakIterator extends BreakIterator {

            // Reload the loop variables for the next go-round
            c = CharacterIteration.current32(fText);
-            category = (short)fRData.fTrie.getCodePointValue(c);
+            category = (short)fRData.fTrie.get(c);
        }

        // If we found breaks, build a new break cache. The first and last entries must
@ -1285,7 +1285,7 @@ public class RuleBasedBreakIterator extends BreakIterator {

        // caches for quicker access
        CharacterIterator text = fText;
-        CharTrie trie = fRData.fTrie;
+        Trie2 trie = fRData.fTrie;

        // Set up the starting char
        int c               = text.current();
@ -1338,7 +1338,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
                // look up the current character's character category, which tells us
                // which column in the state table to look at.
                //
-                category = (short) trie.getCodePointValue(c);
+                category = (short) trie.get(c);

                // Check the dictionary bit in the character's category.
                //    Counter is only used by dictionary based iterators (subclasses).
@ -1483,10 +1483,8 @@ public class RuleBasedBreakIterator extends BreakIterator {
        mainLoop: for (;;) {
            if (c == DONE32) {
                // Reached end of input string.
-                if (mode == RBBI_END || fRData.fHeader.fVersion == 1) {
-                    // Either this is the old (ICU 3.2 and earlier) format data which
-                    // does not support explicit support for matching {eof}, or
-                    // we have already done the {eof} iteration.  Now is the time
+                if (mode == RBBI_END) {
+                    // We have already done the {eof} iteration.  Now is the time
                    // to unconditionally bail out.
                    if (result == initialPosition) {
                        // Ran off start, no match found.
@ -1504,7 +1502,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
                // look up the current character's category, which tells us
                // which column in the state table to look at.
                //
-                category = (short) fRData.fTrie.getCodePointValue(c);
+                category = (short) fRData.fTrie.get(c);

                // Check the dictionary bit in the character's category.
                //    Counter is only used by dictionary based iterators (subclasses).
--- a/icu4j/main/shared/data/icudata.jar
+++ b/icu4j/main/shared/data/icudata.jar
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4b1866a85ceb079d912a3283e5ec6a7d6988df8c0e56e98fd67def82c35dcf3
-size 12225515
+oid sha256:f0d65ed59329e1eaae1813db0fa8e1236a3b58ddfa5e7e1ff33d4bea7eef3c31
+size 12226292