ICU-8105 hardcode Unicode property names data (formatVersion 2); includes new dictionary-type tries (ByteTrie & UCharTrie see ticket #8167); merge branches/markus/tries -r 29040:29249

X-SVN-Rev: 29252
2025-04-06 14:05:32 +00:00 · 2010-12-31 18:21:36 +00:00 · 2010-12-31 18:21:36 +00:00 · c04082d93c
commit c04082d93c
parent 3e29cb9f1f
53 changed files with 9209 additions and 1067 deletions
--- a/icu4c/source/common/Makefile.in
+++ b/icu4c/source/common/Makefile.in
@ -85,7 +85,7 @@ ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
 ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o ucnv_ct.o \
 uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
 ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o loclikely.o locresdata.o \
-bytestream.o stringpiece.o \
+bytestream.o stringpiece.o bytetrie.o \
 ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \
 utf_impl.o ustring.o ustrcase.o ucasemap.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
 normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o unorm_it.o \
--- a/icu4c/source/common/bytetrie.cpp
+++ b/icu4c/source/common/bytetrie.cpp
@ -0,0 +1,431 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  bytetrie.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010sep25
+*   created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/bytestream.h"
+#include "unicode/uobject.h"
+#include "uassert.h"
+#include "bytetrie.h"
+
+U_NAMESPACE_BEGIN
+
+// lead byte already shifted right by 1.
+int32_t
+ByteTrie::readValue(const uint8_t *pos, int32_t leadByte) {
+    int32_t value;
+    if(leadByte<kMinTwoByteValueLead) {
+        value=leadByte-kMinOneByteValueLead;
+    } else if(leadByte<kMinThreeByteValueLead) {
+        value=((leadByte-kMinTwoByteValueLead)<<8)|*pos;
+    } else if(leadByte<kFourByteValueLead) {
+        value=((leadByte-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
+    } else if(leadByte==kFourByteValueLead) {
+        value=(pos[0]<<16)|(pos[1]<<8)|pos[2];
+    } else {
+        value=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
+    }
+    return value;
+}
+
+const uint8_t *
+ByteTrie::jumpByDelta(const uint8_t *pos) {
+    int32_t delta=*pos++;
+    if(delta<kMinTwoByteDeltaLead) {
+        // nothing to do
+    } else if(delta<kMinThreeByteDeltaLead) {
+        delta=((delta-kMinTwoByteDeltaLead)<<8)|*pos++;
+    } else if(delta<kFourByteDeltaLead) {
+        delta=((delta-kMinThreeByteDeltaLead)<<16)|(pos[0]<<8)|pos[1];
+        pos+=2;
+    } else if(delta==kFourByteDeltaLead) {
+        delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
+        pos+=3;
+    } else {
+        delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
+        pos+=4;
+    }
+    return pos+delta;
+}
+
+UDictTrieResult
+ByteTrie::current() const {
+    const uint8_t *pos=pos_;
+    if(pos==NULL) {
+        return UDICTTRIE_NO_MATCH;
+    } else {
+        int32_t node;
+        return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
+                valueResult(node) : UDICTTRIE_NO_VALUE;
+    }
+}
+
+UDictTrieResult
+ByteTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
+    // Branch according to the current byte.
+    if(length==0) {
+        length=*pos++;
+    }
+    ++length;
+    // The length of the branch is the number of bytes to select from.
+    // The data structure encodes a binary search.
+    while(length>kMaxBranchLinearSubNodeLength) {
+        if(inByte<*pos++) {
+            length>>=1;
+            pos=jumpByDelta(pos);
+        } else {
+            length=length-(length>>1);
+            pos=skipDelta(pos);
+        }
+    }
+    // Drop down to linear search for the last few bytes.
+    // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
+    // and divides length by 2.
+    do {
+        if(inByte==*pos++) {
+            UDictTrieResult result;
+            int32_t node=*pos;
+            U_ASSERT(node>=kMinValueLead);
+            if(node&kValueIsFinal) {
+                // Leave the final value for getValue() to read.
+                result=UDICTTRIE_HAS_FINAL_VALUE;
+            } else {
+                // Use the non-final value as the jump delta.
+                ++pos;
+                // int32_t delta=readValue(pos, node>>1);
+                node>>=1;
+                int32_t delta;
+                if(node<kMinTwoByteValueLead) {
+                    delta=node-kMinOneByteValueLead;
+                } else if(node<kMinThreeByteValueLead) {
+                    delta=((node-kMinTwoByteValueLead)<<8)|*pos++;
+                } else if(node<kFourByteValueLead) {
+                    delta=((node-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
+                    pos+=2;
+                } else if(node==kFourByteValueLead) {
+                    delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
+                    pos+=3;
+                } else {
+                    delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
+                    pos+=4;
+                }
+                // end readValue()
+                pos+=delta;
+                node=*pos;
+                result= node>=kMinValueLead ? valueResult(node) : UDICTTRIE_NO_VALUE;
+            }
+            pos_=pos;
+            return result;
+        }
+        --length;
+        pos=skipValue(pos);
+    } while(length>1);
+    if(inByte==*pos++) {
+        pos_=pos;
+        int32_t node=*pos;
+        return node>=kMinValueLead ? valueResult(node) : UDICTTRIE_NO_VALUE;
+    } else {
+        stop();
+        return UDICTTRIE_NO_MATCH;
+    }
+}
+
+UDictTrieResult
+ByteTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
+    for(;;) {
+        int32_t node=*pos++;
+        if(node<kMinLinearMatch) {
+            return branchNext(pos, node, inByte);
+        } else if(node<kMinValueLead) {
+            // Match the first of length+1 bytes.
+            int32_t length=node-kMinLinearMatch;  // Actual match length minus 1.
+            if(inByte==*pos++) {
+                remainingMatchLength_=--length;
+                pos_=pos;
+                return (length<0 && (node=*pos)>=kMinValueLead) ?
+                        valueResult(node) : UDICTTRIE_NO_VALUE;
+            } else {
+                // No match.
+                break;
+            }
+        } else if(node&kValueIsFinal) {
+            // No further matching bytes.
+            break;
+        } else {
+            // Skip intermediate value.
+            pos=skipValue(pos, node);
+            // The next node must not also be a value node.
+            U_ASSERT(*pos<kMinValueLead);
+        }
+    }
+    stop();
+    return UDICTTRIE_NO_MATCH;
+}
+
+UDictTrieResult
+ByteTrie::next(int32_t inByte) {
+    const uint8_t *pos=pos_;
+    if(pos==NULL) {
+        return UDICTTRIE_NO_MATCH;
+    }
+    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
+    if(length>=0) {
+        // Remaining part of a linear-match node.
+        if(inByte==*pos++) {
+            remainingMatchLength_=--length;
+            pos_=pos;
+            int32_t node;
+            return (length<0 && (node=*pos)>=kMinValueLead) ?
+                    valueResult(node) : UDICTTRIE_NO_VALUE;
+        } else {
+            stop();
+            return UDICTTRIE_NO_MATCH;
+        }
+    }
+    return nextImpl(pos, inByte);
+}
+
+UDictTrieResult
+ByteTrie::next(const char *s, int32_t sLength) {
+    if(sLength<0 ? *s==0 : sLength==0) {
+        // Empty input.
+        return current();
+    }
+    const uint8_t *pos=pos_;
+    if(pos==NULL) {
+        return UDICTTRIE_NO_MATCH;
+    }
+    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
+    for(;;) {
+        // Fetch the next input byte, if there is one.
+        // Continue a linear-match node without rechecking sLength<0.
+        int32_t inByte;
+        if(sLength<0) {
+            for(;;) {
+                if((inByte=*s++)==0) {
+                    remainingMatchLength_=length;
+                    pos_=pos;
+                    int32_t node;
+                    return (length<0 && (node=*pos)>=kMinValueLead) ?
+                            valueResult(node) : UDICTTRIE_NO_VALUE;
+                }
+                if(length<0) {
+                    remainingMatchLength_=length;
+                    break;
+                }
+                if(inByte!=*pos) {
+                    stop();
+                    return UDICTTRIE_NO_MATCH;
+                }
+                ++pos;
+                --length;
+            }
+        } else {
+            for(;;) {
+                if(sLength==0) {
+                    remainingMatchLength_=length;
+                    pos_=pos;
+                    int32_t node;
+                    return (length<0 && (node=*pos)>=kMinValueLead) ?
+                            valueResult(node) : UDICTTRIE_NO_VALUE;
+                }
+                inByte=*s++;
+                --sLength;
+                if(length<0) {
+                    remainingMatchLength_=length;
+                    break;
+                }
+                if(inByte!=*pos) {
+                    stop();
+                    return UDICTTRIE_NO_MATCH;
+                }
+                ++pos;
+                --length;
+            }
+        }
+        for(;;) {
+            int32_t node=*pos++;
+            if(node<kMinLinearMatch) {
+                UDictTrieResult result=branchNext(pos, node, inByte);
+                if(result==UDICTTRIE_NO_MATCH) {
+                    return UDICTTRIE_NO_MATCH;
+                }
+                // Fetch the next input byte, if there is one.
+                if(sLength<0) {
+                    if((inByte=*s++)==0) {
+                        return result;
+                    }
+                } else {
+                    if(sLength==0) {
+                        return result;
+                    }
+                    inByte=*s++;
+                    --sLength;
+                }
+                if(result==UDICTTRIE_HAS_FINAL_VALUE) {
+                    // No further matching bytes.
+                    stop();
+                    return UDICTTRIE_NO_MATCH;
+                }
+                pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
+            } else if(node<kMinValueLead) {
+                // Match length+1 bytes.
+                length=node-kMinLinearMatch;  // Actual match length minus 1.
+                if(inByte!=*pos) {
+                    stop();
+                    return UDICTTRIE_NO_MATCH;
+                }
+                ++pos;
+                --length;
+                break;
+            } else if(node&kValueIsFinal) {
+                // No further matching bytes.
+                stop();
+                return UDICTTRIE_NO_MATCH;
+            } else {
+                // Skip intermediate value.
+                pos=skipValue(pos, node);
+                // The next node must not also be a value node.
+                U_ASSERT(*pos<kMinValueLead);
+            }
+        }
+    }
+}
+
+const uint8_t *
+ByteTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
+                                    UBool haveUniqueValue, int32_t &uniqueValue) {
+    while(length>kMaxBranchLinearSubNodeLength) {
+        ++pos;  // ignore the comparison byte
+        if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
+            return NULL;
+        }
+        length=length-(length>>1);
+        pos=skipDelta(pos);
+    }
+    do {
+        ++pos;  // ignore a comparison byte
+        // handle its value
+        int32_t node=*pos++;
+        UBool isFinal=(UBool)(node&kValueIsFinal);
+        int32_t value=readValue(pos, node>>1);
+        pos=skipValue(pos, node);
+        if(isFinal) {
+            if(haveUniqueValue) {
+                if(value!=uniqueValue) {
+                    return NULL;
+                }
+            } else {
+                uniqueValue=value;
+                haveUniqueValue=TRUE;
+            }
+        } else {
+            if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
+                return NULL;
+            }
+            haveUniqueValue=TRUE;
+        }
+    } while(--length>1);
+    return pos+1;  // ignore the last comparison byte
+}
+
+UBool
+ByteTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
+    for(;;) {
+        int32_t node=*pos++;
+        if(node<kMinLinearMatch) {
+            if(node==0) {
+                node=*pos++;
+            }
+            pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
+            if(pos==NULL) {
+                return FALSE;
+            }
+            haveUniqueValue=TRUE;
+        } else if(node<kMinValueLead) {
+            // linear-match node
+            pos+=node-kMinLinearMatch+1;  // Ignore the match bytes.
+        } else {
+            UBool isFinal=(UBool)(node&kValueIsFinal);
+            int32_t value=readValue(pos, node>>1);
+            if(haveUniqueValue) {
+                if(value!=uniqueValue) {
+                    return FALSE;
+                }
+            } else {
+                uniqueValue=value;
+                haveUniqueValue=TRUE;
+            }
+            if(isFinal) {
+                return TRUE;
+            }
+            pos=skipValue(pos, node);
+        }
+    }
+}
+
+int32_t
+ByteTrie::getNextBytes(ByteSink &out) const {
+    const uint8_t *pos=pos_;
+    if(pos==NULL) {
+        return 0;
+    }
+    if(remainingMatchLength_>=0) {
+        append(out, *pos);  // Next byte of a pending linear-match node.
+        return 1;
+    }
+    int32_t node=*pos++;
+    if(node>=kMinValueLead) {
+        if(node&kValueIsFinal) {
+            return 0;
+        } else {
+            pos=skipValue(pos, node);
+            node=*pos++;
+            U_ASSERT(node<kMinValueLead);
+        }
+    }
+    if(node<kMinLinearMatch) {
+        if(node==0) {
+            node=*pos++;
+        }
+        getNextBranchBytes(pos, ++node, out);
+        return node;
+    } else {
+        // First byte of the linear-match node.
+        append(out, *pos);
+        return 1;
+    }
+}
+
+void
+ByteTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) {
+    while(length>kMaxBranchLinearSubNodeLength) {
+        ++pos;  // ignore the comparison byte
+        getNextBranchBytes(jumpByDelta(pos), length>>1, out);
+        length=length-(length>>1);
+        pos=skipDelta(pos);
+    }
+    do {
+        append(out, *pos++);
+        pos=skipValue(pos);
+    } while(--length>1);
+    append(out, *pos);
+}
+
+void
+ByteTrie::append(ByteSink &out, int c) {
+    char ch=(char)c;
+    out.Append(&ch, 1);
+}
+
+U_NAMESPACE_END
--- a/icu4c/source/common/bytetrie.h
+++ b/icu4c/source/common/bytetrie.h
@ -0,0 +1,331 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  bytetrie.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010sep25
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __BYTETRIE_H__
+#define __BYTETRIE_H__
+
+/**
+ * \file
+ * \brief C++ API: Dictionary trie for mapping arbitrary byte sequences
+ *                 to integer values.
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "uassert.h"
+#include "udicttrie.h"
+
+U_NAMESPACE_BEGIN
+
+class ByteSink;
+class ByteTrieBuilder;
+class ByteTrieIterator;
+
+/**
+ * Light-weight, non-const reader class for a ByteTrie.
+ * Traverses a byte-serialized data structure with minimal state,
+ * for mapping byte sequences to non-negative integer values.
+ */
+class U_COMMON_API ByteTrie : public UMemory {
+public:
+    ByteTrie(const void *trieBytes)
+            : bytes_(reinterpret_cast<const uint8_t *>(trieBytes)),
+              pos_(bytes_), remainingMatchLength_(-1) {}
+
+    /**
+     * Resets this trie to its initial state.
+     */
+    ByteTrie &reset() {
+        pos_=bytes_;
+        remainingMatchLength_=-1;
+        return *this;
+    }
+
+    /**
+     * ByteTrie state object, for saving a trie's current state
+     * and resetting the trie back to this state later.
+     */
+    class State : public UMemory {
+    public:
+        State() { bytes=NULL; }
+    private:
+        friend class ByteTrie;
+
+        const uint8_t *bytes;
+        const uint8_t *pos;
+        int32_t remainingMatchLength;
+    };
+
+    /**
+     * Saves the state of this trie.
+     * @see resetToState
+     */
+    const ByteTrie &saveState(State &state) const {
+        state.bytes=bytes_;
+        state.pos=pos_;
+        state.remainingMatchLength=remainingMatchLength_;
+        return *this;
+    }
+
+    /**
+     * Resets this trie to the saved state.
+     * If the state object contains no state, or the state of a different trie,
+     * then this trie remains unchanged.
+     * @see saveState
+     * @see reset
+     */
+    ByteTrie &resetToState(const State &state) {
+        if(bytes_==state.bytes && bytes_!=NULL) {
+            pos_=state.pos;
+            remainingMatchLength_=state.remainingMatchLength;
+        }
+        return *this;
+    }
+
+    /**
+     * Determines whether the byte sequence so far matches, whether it has a value,
+     * and whether another input byte can continue a matching byte sequence.
+     * @return The match/value Result.
+     */
+    UDictTrieResult current() const;
+
+    /**
+     * Traverses the trie from the initial state for this input byte.
+     * Equivalent to reset().next(inByte).
+     * @return The match/value Result.
+     */
+    inline UDictTrieResult first(int32_t inByte) {
+        remainingMatchLength_=-1;
+        return nextImpl(bytes_, inByte);
+    }
+
+    /**
+     * Traverses the trie from the current state for this input byte.
+     * @return The match/value Result.
+     */
+    UDictTrieResult next(int32_t inByte);
+
+    /**
+     * Traverses the trie from the current state for this byte sequence.
+     * Equivalent to
+     * \code
+     * Result result=current();
+     * for(each c in s)
+     *   if((result=next(c))==UDICTTRIE_NO_MATCH) return UDICTTRIE_NO_MATCH;
+     * return result;
+     * \endcode
+     * @return The match/value Result.
+     */
+    UDictTrieResult next(const char *s, int32_t length);
+
+    /**
+     * Returns a matching byte sequence's value if called immediately after
+     * current()/first()/next() returned UDICTTRIE_HAS_VALUE or UDICTTRIE_HAS_FINAL_VALUE.
+     * getValue() can be called multiple times.
+     *
+     * Do not call getValue() after UDICTTRIE_NO_MATCH or UDICTTRIE_NO_VALUE!
+     */
+    inline int32_t getValue() const {
+        const uint8_t *pos=pos_;
+        int32_t leadByte=*pos++;
+        U_ASSERT(leadByte>=kMinValueLead);
+        return readValue(pos, leadByte>>1);
+    }
+
+    /**
+     * Determines whether all byte sequences reachable from the current state
+     * map to the same value.
+     * @param uniqueValue Receives the unique value, if this function returns TRUE.
+     *                    (output-only)
+     * @return TRUE if all byte sequences reachable from the current state
+     *         map to the same value.
+     */
+    inline UBool hasUniqueValue(int32_t &uniqueValue) const {
+        const uint8_t *pos=pos_;
+        // Skip the rest of a pending linear-match node.
+        return pos!=NULL && findUniqueValue(pos+remainingMatchLength_+1, FALSE, uniqueValue);
+    }
+
+    /**
+     * Finds each byte which continues the byte sequence from the current state.
+     * That is, each byte b for which it would be next(b)!=UDICTTRIE_NO_MATCH now.
+     * @param out Each next byte is appended to this object.
+     *            (Only uses the out.Append(s, length) method.)
+     * @return the number of bytes which continue the byte sequence from here
+     */
+    int32_t getNextBytes(ByteSink &out) const;
+
+private:
+    friend class ByteTrieBuilder;
+    friend class ByteTrieIterator;
+
+    inline void stop() {
+        pos_=NULL;
+    }
+
+    // Reads a compact 32-bit integer.
+    // pos is already after the leadByte, and the lead byte is already shifted right by 1.
+    static int32_t readValue(const uint8_t *pos, int32_t leadByte);
+    static inline const uint8_t *skipValue(const uint8_t *pos, int32_t leadByte) {
+        U_ASSERT(leadByte>=kMinValueLead);
+        if(leadByte>=(kMinTwoByteValueLead<<1)) {
+            if(leadByte<(kMinThreeByteValueLead<<1)) {
+                ++pos;
+            } else if(leadByte<(kFourByteValueLead<<1)) {
+                pos+=2;
+            } else {
+                pos+=3+((leadByte>>1)&1);
+            }
+        }
+        return pos;
+    }
+    static inline const uint8_t *skipValue(const uint8_t *pos) {
+        int32_t leadByte=*pos++;
+        return skipValue(pos, leadByte);
+    }
+
+    // Reads a jump delta and jumps.
+    static const uint8_t *jumpByDelta(const uint8_t *pos);
+
+    static inline const uint8_t *skipDelta(const uint8_t *pos) {
+        int32_t delta=*pos++;
+        if(delta>=kMinTwoByteDeltaLead) {
+            if(delta<kMinThreeByteDeltaLead) {
+                ++pos;
+            } else if(delta<kFourByteDeltaLead) {
+                pos+=2;
+            } else {
+                pos+=3+(delta&1);
+            }
+        }
+        return pos;
+    }
+
+    static inline UDictTrieResult valueResult(int32_t node) {
+        return (UDictTrieResult)(UDICTTRIE_HAS_VALUE-(node&kValueIsFinal));
+    }
+
+    // Handles a branch node for both next(byte) and next(string).
+    UDictTrieResult branchNext(const uint8_t *pos, int32_t length, int32_t inByte);
+
+    // Requires remainingLength_<0.
+    UDictTrieResult nextImpl(const uint8_t *pos, int32_t inByte);
+
+    // Helper functions for hasUniqueValue().
+    // Recursively finds a unique value (or whether there is not a unique one)
+    // from a branch.
+    static const uint8_t *findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
+                                                    UBool haveUniqueValue, int32_t &uniqueValue);
+    // Recursively finds a unique value (or whether there is not a unique one)
+    // starting from a position on a node lead byte.
+    static UBool findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue);
+
+    // Helper functions for getNextBytes().
+    // getNextBytes() when pos is on a branch node.
+    static void getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out);
+    static void append(ByteSink &out, int c);
+
+    // ByteTrie data structure
+    //
+    // The trie consists of a series of byte-serialized nodes for incremental
+    // string/byte sequence matching. The root node is at the beginning of the trie data.
+    //
+    // Types of nodes are distinguished by their node lead byte ranges.
+    // After each node, except a final-value node, another node follows to
+    // encode match values or continue matching further bytes.
+    //
+    // Node types:
+    //  - Value node: Stores a 32-bit integer in a compact, variable-length format.
+    //    The value is for the string/byte sequence so far.
+    //    One node bit indicates whether the value is final or whether
+    //    matching continues with the next node.
+    //  - Linear-match node: Matches a number of bytes.
+    //  - Branch node: Branches to other nodes according to the current input byte.
+    //    The node byte is the length of the branch (number of bytes to select from)
+    //    minus 1. It is followed by a sub-node:
+    //    - If the length is at most kMaxBranchLinearSubNodeLength, then
+    //      there are length-1 (key, value) pairs and then one more comparison byte.
+    //      If one of the key bytes matches, then the value is either a final value for
+    //      the string/byte sequence so far, or a "jump" delta to the next node.
+    //      If the last byte matches, then matching continues with the next node.
+    //      (Values have the same encoding as value nodes.)
+    //    - If the length is greater than kMaxBranchLinearSubNodeLength, then
+    //      there is one byte and one "jump" delta.
+    //      If the input byte is less than the sub-node byte, then "jump" by delta to
+    //      the next sub-node which will have a length of length/2.
+    //      (The delta has its own compact encoding.)
+    //      Otherwise, skip the "jump" delta to the next sub-node
+    //      which will have a length of length-length/2.
+
+    // Node lead byte values.
+
+    // 00..0f: Branch node. If node!=0 then the length is node+1, otherwise
+    // the length is one more than the next byte.
+
+    // For a branch sub-node with at most this many entries, we drop down
+    // to a linear search.
+    static const int32_t kMaxBranchLinearSubNodeLength=5;
+
+    // 10..1f: Linear-match node, match 1..16 bytes and continue reading the next node.
+    static const int32_t kMinLinearMatch=0x10;
+    static const int32_t kMaxLinearMatchLength=0x10;
+
+    // 20..ff: Variable-length value node.
+    // If odd, the value is final. (Otherwise, intermediate value or jump delta.)
+    // Then shift-right by 1 bit.
+    // The remaining lead byte value indicates the number of following bytes (0..4)
+    // and contains the value's top bits.
+    static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;  // 0x20
+    // It is a final value if bit 0 is set.
+    static const int32_t kValueIsFinal=1;
+
+    // Compact value: After testing bit 0, shift right by 1 and then use the following thresholds.
+    static const int32_t kMinOneByteValueLead=kMinValueLead/2;  // 0x10
+    static const int32_t kMaxOneByteValue=0x40;  // At least 6 bits in the first byte.
+
+    static const int32_t kMinTwoByteValueLead=kMinOneByteValueLead+kMaxOneByteValue+1;  // 0x51
+    static const int32_t kMaxTwoByteValue=0x1aff;
+
+    static const int32_t kMinThreeByteValueLead=kMinTwoByteValueLead+(kMaxTwoByteValue>>8)+1;  // 0x6c
+    static const int32_t kFourByteValueLead=0x7e;
+
+    // A little more than Unicode code points. (0x11ffff)
+    static const int32_t kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1;
+
+    static const int32_t kFiveByteValueLead=0x7f;
+
+    // Compact delta integers.
+    static const int32_t kMaxOneByteDelta=0xbf;
+    static const int32_t kMinTwoByteDeltaLead=kMaxOneByteDelta+1;  // 0xc0
+    static const int32_t kMinThreeByteDeltaLead=0xf0;
+    static const int32_t kFourByteDeltaLead=0xfe;
+    static const int32_t kFiveByteDeltaLead=0xff;
+
+    static const int32_t kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1;  // 0x2fff
+    static const int32_t kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1;  // 0xdffff
+
+    // Fixed value referencing the ByteTrie bytes.
+    const uint8_t *bytes_;
+
+    // Iterator variables.
+
+    // Pointer to next trie byte to read. NULL if no more matches.
+    const uint8_t *pos_;
+    // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
+    int32_t remainingMatchLength_;
+};
+
+U_NAMESPACE_END
+
+#endif  // __BYTETRIE_H__
--- a/icu4c/source/common/common.vcxproj
+++ b/icu4c/source/common/common.vcxproj
@ -400,6 +400,7 @@
    <ClCompile Include="servslkf.cpp" />
    <ClCompile Include="usprep.cpp" />
    <ClCompile Include="bytestream.cpp" />
+    <ClCompile Include="bytetrie.cpp" />
    <ClCompile Include="chariter.cpp" />
    <ClCompile Include="charstr.cpp" />
    <ClCompile Include="cstring.c" />
@ -1365,6 +1366,7 @@
 </Command>
      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
    </CustomBuild>
+    <ClInclude Include="bytetrie.h" />
    <CustomBuild Include="unicode\chariter.h">
      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
 </Command>
@ -1608,4 +1610,4 @@
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
-</Project>
+</Project>
--- a/icu4c/source/common/propname.cpp
+++ b/icu4c/source/common/propname.cpp
@ -1,11 +1,12 @@
 /*
 **********************************************************************
-* Copyright (c) 2002-2009, International Business Machines
+* Copyright (c) 2002-2010, International Business Machines
 * Corporation and others.  All Rights Reserved.
 **********************************************************************
 * Author: Alan Liu
 * Created: October 30 2002
 * Since: ICU 2.4
+* 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
 **********************************************************************
 */
 #include "propname.h"
@ -16,6 +17,10 @@
 #include "cstring.h"
 #include "ucln_cmn.h"
 #include "uarrsort.h"
+#include "uinvchar.h"
+
+#define INCLUDED_FROM_PROPNAME_CPP
+#include "propname_data.h"

 U_CDECL_BEGIN

@ -94,7 +99,7 @@ uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
        if(((r1|r2)&0xff)==0) {
            return 0;
        }
-        
+
        /* Compare the lowercased characters */
        if(r1!=r2) {
            rc=(r1&0xff)-(r2&0xff);
@ -120,7 +125,7 @@ uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
        if(((r1|r2)&0xff)==0) {
            return 0;
        }
-        
+
        /* Compare the lowercased characters */
        if(r1!=r2) {
            rc=(r1&0xff)-(r2&0xff);
@ -138,615 +143,169 @@ U_CDECL_END

 U_NAMESPACE_BEGIN

-//----------------------------------------------------------------------
-// PropertyAliases implementation
+int32_t PropNameData::findProperty(int32_t property) {
+    int32_t i=1;  // valueMaps index, initially after numRanges
+    for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
+        // Read and skip the start and limit of this range.
+        int32_t start=valueMaps[i];
+        int32_t limit=valueMaps[i+1];
+        i+=2;
+        if(property<start) {
+            break;
+        }
+        if(property<limit) {
+            return i+(property-start)*2;
+        }
+        i+=(limit-start)*2;  // Skip all entries for this range.
+    }
+    return 0;
+}

-const char*
-PropertyAliases::chooseNameInGroup(Offset offset,
-                                   UPropertyNameChoice choice) const {
-    int32_t c = choice;
-    if (!offset || c < 0) {
+int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
+    if(valueMapIndex==0) {
+        return 0;  // The property does not have named values.
+    }
+    ++valueMapIndex;  // Skip the ByteTrie offset.
+    int32_t numRanges=valueMaps[valueMapIndex++];
+    if(numRanges<0x10) {
+        // Ranges of values.
+        for(; numRanges>0; --numRanges) {
+            // Read and skip the start and limit of this range.
+            int32_t start=valueMaps[valueMapIndex];
+            int32_t limit=valueMaps[valueMapIndex+1];
+            valueMapIndex+=2;
+            if(value<start) {
+                break;
+            }
+            if(value<limit) {
+                return valueMaps[valueMapIndex+value-start];
+            }
+            valueMapIndex+=limit-start;  // Skip all entries for this range.
+        }
+    } else {
+        // List of values.
+        int32_t valuesStart=valueMapIndex;
+        int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
+        do {
+            int32_t v=valueMaps[valueMapIndex];
+            if(value<v) {
+                break;
+            }
+            if(value==v) {
+                return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
+            }
+        } while(++valueMapIndex<nameGroupOffsetsStart);
+    }
+    return 0;
+}
+
+const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
+    int32_t numNames=*nameGroup++;
+    if(nameIndex<0 || numNames<=nameIndex) {
        return NULL;
    }
-    const Offset* p = (const Offset*) getPointer(offset);
-    while (c-- > 0) {
-        if (*p++ < 0) return NULL;
+    // Skip nameIndex names.
+    for(; nameIndex>0; --nameIndex) {
+        nameGroup=uprv_strchr(nameGroup, 0)+1;
    }
-    Offset a = *p;
-    if (a < 0) a = -a;
-    return (const char*) getPointerNull(a);
-}
-
-const ValueMap*
-PropertyAliases::getValueMap(EnumValue prop) const {
-    NonContiguousEnumToOffset* e2o = (NonContiguousEnumToOffset*) getPointer(enumToValue_offset);
-    Offset a = e2o->getOffset(prop);
-    return (const ValueMap*) (a ? getPointerNull(a) : NULL);
-}
-
-inline const char*
-PropertyAliases::getPropertyName(EnumValue prop,
-                                 UPropertyNameChoice choice) const {
-    NonContiguousEnumToOffset* e2n = (NonContiguousEnumToOffset*) getPointer(enumToName_offset);
-    return chooseNameInGroup(e2n->getOffset(prop), choice);
-}
-
-inline EnumValue
-PropertyAliases::getPropertyEnum(const char* alias) const {
-    NameToEnum* n2e = (NameToEnum*) getPointer(nameToEnum_offset);
-    return n2e->getEnum(alias, *this);
-}
-
-inline const char*
-PropertyAliases::getPropertyValueName(EnumValue prop,
-                                      EnumValue value,
-                                      UPropertyNameChoice choice) const {
-    const ValueMap* vm = getValueMap(prop);
-    if (!vm) return NULL;
-    Offset a;
-    if (vm->enumToName_offset) {
-        a = ((EnumToOffset*) getPointer(vm->enumToName_offset))->
-            getOffset(value);
-    } else {
-        a = ((NonContiguousEnumToOffset*) getPointer(vm->ncEnumToName_offset))->
-            getOffset(value);
+    if(*nameGroup==0) {
+        return NULL;  // no name (Property[Value]Aliases.txt has "n/a")
    }
-    return chooseNameInGroup(a, choice);
+    return nameGroup;
 }

-inline EnumValue
-PropertyAliases::getPropertyValueEnum(EnumValue prop,
-                                      const char* alias) const {
-    const ValueMap* vm = getValueMap(prop);
-    if (!vm) return UCHAR_INVALID_CODE;
-    NameToEnum* n2e = (NameToEnum*) getPointer(vm->nameToEnum_offset);
-    return n2e->getEnum(alias, *this);
-}
-
-U_NAMESPACE_END
-U_NAMESPACE_USE
-
-//----------------------------------------------------------------------
-// UDataMemory structures
-
-static const PropertyAliases* PNAME = NULL;
-static UDataMemory* UDATA = NULL;
-
-//----------------------------------------------------------------------
-// UDataMemory loading/unloading
-
-/**
- * udata callback to verify the zone data.
- */
-U_CDECL_BEGIN
-static UBool U_CALLCONV
-isPNameAcceptable(void* /*context*/,
-             const char* /*type*/, const char* /*name*/,
-             const UDataInfo* info) {
-    return
-        info->size >= sizeof(UDataInfo) &&
-        info->isBigEndian == U_IS_BIG_ENDIAN &&
-        info->charsetFamily == U_CHARSET_FAMILY &&
-        info->dataFormat[0] == PNAME_SIG_0 &&
-        info->dataFormat[1] == PNAME_SIG_1 &&
-        info->dataFormat[2] == PNAME_SIG_2 &&
-        info->dataFormat[3] == PNAME_SIG_3 &&
-        info->formatVersion[0] == PNAME_FORMAT_VERSION;
-}
-
-static UBool U_CALLCONV pname_cleanup(void) {
-    if (UDATA) {
-        udata_close(UDATA);
-        UDATA = NULL;
+UBool PropNameData::containsName(ByteTrie &trie, const char *name) {
+    if(name==NULL) {
+        return FALSE;
    }
-    PNAME = NULL;
-    return TRUE;
-}
-U_CDECL_END
-
-/**
- * Load the property names data.  Caller should check that data is
- * not loaded BEFORE calling this function.  Returns TRUE if the load
- * succeeds.
- */
-static UBool _load() {
-    UErrorCode ec = U_ZERO_ERROR;
-    UDataMemory* data =
-        udata_openChoice(0, PNAME_DATA_TYPE, PNAME_DATA_NAME,
-                         isPNameAcceptable, 0, &ec);
-    if (U_SUCCESS(ec)) {
-        umtx_lock(NULL);
-        if (UDATA == NULL) {
-            UDATA = data;
-            PNAME = (const PropertyAliases*) udata_getMemory(UDATA);
-            ucln_common_registerCleanup(UCLN_COMMON_PNAME, pname_cleanup);
-            data = NULL;
+    UDictTrieResult result=UDICTTRIE_NO_VALUE;
+    char c;
+    while((c=*name++)!=0) {
+        c=uprv_invCharToLowercaseAscii(c);
+        // Ignore delimiters '-', '_', and ASCII White_Space.
+        if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
+            continue;
        }
-        umtx_unlock(NULL);
+        if(!UDICTTRIE_RESULT_HAS_NEXT(result)) {
+            return FALSE;
+        }
+        result=trie.next((uint8_t)c);
    }
-    if (data) {
-        udata_close(data);
-    }
-    return PNAME!=NULL;
+    return UDICTTRIE_RESULT_HAS_VALUE(result);
 }

-/**
- * Inline function that expands to code that does a lazy load of the
- * property names data.  If the data is already loaded, avoids an
- * unnecessary function call.  If the data is not loaded, call _load()
- * to load it, and return TRUE if the load succeeds.
- */
-static inline UBool load() {
-    UBool f;
-    UMTX_CHECK(NULL, (PNAME!=NULL), f);
-    return f || _load();
+const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
+    int32_t valueMapIndex=findProperty(property);
+    if(valueMapIndex==0) {
+        return NULL;  // Not a known property.
+    }
+    return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
+}
+
+const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
+    int32_t valueMapIndex=findProperty(property);
+    if(valueMapIndex==0) {
+        return NULL;  // Not a known property.
+    }
+    int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
+    if(nameGroupOffset==0) {
+        return NULL;
+    }
+    return getName(nameGroups+nameGroupOffset, nameChoice);
+}
+
+int32_t PropNameData::getPropertyOrValueEnum(int32_t byteTrieOffset, const char *alias) {
+    ByteTrie trie(byteTries+byteTrieOffset);
+    if(containsName(trie, alias)) {
+        return trie.getValue();
+    } else {
+        return UCHAR_INVALID_CODE;
+    }
+}
+
+int32_t PropNameData::getPropertyEnum(const char *alias) {
+    return getPropertyOrValueEnum(0, alias);
+}
+
+int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
+    int32_t valueMapIndex=findProperty(property);
+    if(valueMapIndex==0) {
+        return UCHAR_INVALID_CODE;  // Not a known property.
+    }
+    valueMapIndex=valueMaps[valueMapIndex+1];
+    if(valueMapIndex==0) {
+        return UCHAR_INVALID_CODE;  // The property does not have named values.
+    }
+    // valueMapIndex is the start of the property's valueMap,
+    // where the first word is the ByteTrie offset.
+    return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
 }

 //----------------------------------------------------------------------
 // Public API implementation

-// The C API is just a thin wrapper.  Each function obtains a pointer
-// to the singleton PropertyAliases, and calls the appropriate method
-// on it.  If it cannot obtain a pointer, because valid data is not
-// available, then it returns NULL or UCHAR_INVALID_CODE.
-
 U_CAPI const char* U_EXPORT2
 u_getPropertyName(UProperty property,
                  UPropertyNameChoice nameChoice) {
-    return load() ? PNAME->getPropertyName(property, nameChoice)
-                  : NULL;
+    return PropNameData::getPropertyName(property, nameChoice);
 }

 U_CAPI UProperty U_EXPORT2
 u_getPropertyEnum(const char* alias) {
-    UProperty p = load() ? (UProperty) PNAME->getPropertyEnum(alias)
-                         : UCHAR_INVALID_CODE;
-    return p;
+    return (UProperty)PropNameData::getPropertyEnum(alias);
 }

 U_CAPI const char* U_EXPORT2
 u_getPropertyValueName(UProperty property,
                       int32_t value,
                       UPropertyNameChoice nameChoice) {
-    return load() ? PNAME->getPropertyValueName(property, value, nameChoice)
-                  : NULL;
+    return PropNameData::getPropertyValueName(property, value, nameChoice);
 }

 U_CAPI int32_t U_EXPORT2
 u_getPropertyValueEnum(UProperty property,
                       const char* alias) {
-    return load() ? PNAME->getPropertyValueEnum(property, alias)
-                  : (int32_t)UCHAR_INVALID_CODE;
+    return PropNameData::getPropertyValueEnum(property, alias);
 }

-/* data swapping ------------------------------------------------------------ */
-
-/*
- * Sub-structure-swappers use the temp array (which is as large as the
- * actual data) for intermediate storage,
- * as well as to indicate if a particular structure has been swapped already.
- * The temp array is initially reset to all 0.
- * pos is the byte offset of the sub-structure in the inBytes/outBytes/temp arrays.
- */
-
-int32_t
-EnumToOffset::swap(const UDataSwapper *ds,
-                   const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
-                   uint8_t *temp, int32_t pos,
-                   UErrorCode *pErrorCode) {
-    const EnumToOffset *inMap;
-    EnumToOffset *outMap, *tempMap;
-    int32_t size;
-
-    tempMap=(EnumToOffset *)(temp+pos);
-    if(tempMap->enumStart!=0 || tempMap->enumLimit!=0) {
-        /* this map was swapped already */
-        size=tempMap->getSize();
-        return size;
-    }
-
-    inMap=(const EnumToOffset *)(inBytes+pos);
-    outMap=(EnumToOffset *)(outBytes+pos);
-
-    tempMap->enumStart=udata_readInt32(ds, inMap->enumStart);
-    tempMap->enumLimit=udata_readInt32(ds, inMap->enumLimit);
-    size=tempMap->getSize();
-
-    if(length>=0) {
-        if(length<(pos+size)) {
-            if(length<(int32_t)sizeof(PropertyAliases)) {
-                udata_printError(ds, "upname_swap(EnumToOffset): too few bytes (%d after header)\n"
-                                     "    for pnames.icu EnumToOffset{%d..%d} at %d\n",
-                                 length, tempMap->enumStart, tempMap->enumLimit, pos);
-                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-                return 0;
-            }
-        }
-
-        /* swap enumStart and enumLimit */
-        ds->swapArray32(ds, inMap, 2*sizeof(EnumValue), outMap, pErrorCode);
-
-        /* swap _offsetArray[] */
-        ds->swapArray16(ds, inMap->getOffsetArray(), (tempMap->enumLimit-tempMap->enumStart)*sizeof(Offset),
-                           outMap->getOffsetArray(), pErrorCode);
-    }
-
-    return size;
-}
-
-int32_t
-NonContiguousEnumToOffset::swap(const UDataSwapper *ds,
-                   const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
-                   uint8_t *temp, int32_t pos,
-                   UErrorCode *pErrorCode) {
-    const NonContiguousEnumToOffset *inMap;
-    NonContiguousEnumToOffset *outMap, *tempMap;
-    int32_t size;
-
-    tempMap=(NonContiguousEnumToOffset *)(temp+pos);
-    if(tempMap->count!=0) {
-        /* this map was swapped already */
-        size=tempMap->getSize();
-        return size;
-    }
-
-    inMap=(const NonContiguousEnumToOffset *)(inBytes+pos);
-    outMap=(NonContiguousEnumToOffset *)(outBytes+pos);
-
-    tempMap->count=udata_readInt32(ds, inMap->count);
-    size=tempMap->getSize();
-
-    if(length>=0) {
-        if(length<(pos+size)) {
-            if(length<(int32_t)sizeof(PropertyAliases)) {
-                udata_printError(ds, "upname_swap(NonContiguousEnumToOffset): too few bytes (%d after header)\n"
-                                     "    for pnames.icu NonContiguousEnumToOffset[%d] at %d\n",
-                                 length, tempMap->count, pos);
-                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-                return 0;
-            }
-        }
-
-        /* swap count and _enumArray[] */
-        length=(1+tempMap->count)*sizeof(EnumValue);
-        ds->swapArray32(ds, inMap, length,
-                           outMap, pErrorCode);
-
-        /* swap _offsetArray[] */
-        pos+=length;
-        ds->swapArray16(ds, inBytes+pos, tempMap->count*sizeof(Offset),
-                           outBytes+pos, pErrorCode);
-    }
-
-    return size;
-}
-
-struct NameAndIndex {
-    Offset name, index;
-};
-
-U_CDECL_BEGIN
-typedef int32_t U_CALLCONV PropNameCompareFn(const char *name1, const char *name2);
-
-struct CompareContext {
-    const char *chars;
-    PropNameCompareFn *propCompare;
-};
-
-static int32_t U_CALLCONV
-upname_compareRows(const void *context, const void *left, const void *right) {
-    CompareContext *cmp=(CompareContext *)context;
-    return cmp->propCompare(cmp->chars+((const NameAndIndex *)left)->name,
-                            cmp->chars+((const NameAndIndex *)right)->name);
-}
-U_CDECL_END
-
-int32_t
-NameToEnum::swap(const UDataSwapper *ds,
-                   const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
-                   uint8_t *temp, int32_t pos,
-                   UErrorCode *pErrorCode) {
-    const NameToEnum *inMap;
-    NameToEnum *outMap, *tempMap;
-
-    const EnumValue *inEnumArray;
-    EnumValue *outEnumArray;
-
-    const Offset *inNameArray;
-    Offset *outNameArray;
-
-    NameAndIndex *sortArray;
-    CompareContext cmp;
-
-    int32_t i, size, oldIndex;
-
-    tempMap=(NameToEnum *)(temp+pos);
-    if(tempMap->count!=0) {
-        /* this map was swapped already */
-        size=tempMap->getSize();
-        return size;
-    }
-
-    inMap=(const NameToEnum *)(inBytes+pos);
-    outMap=(NameToEnum *)(outBytes+pos);
-
-    tempMap->count=udata_readInt32(ds, inMap->count);
-    size=tempMap->getSize();
-
-    if(length>=0) {
-        if(length<(pos+size)) {
-            if(length<(int32_t)sizeof(PropertyAliases)) {
-                udata_printError(ds, "upname_swap(NameToEnum): too few bytes (%d after header)\n"
-                                     "    for pnames.icu NameToEnum[%d] at %d\n",
-                                 length, tempMap->count, pos);
-                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-                return 0;
-            }
-        }
-
-        /* swap count */
-        ds->swapArray32(ds, inMap, 4, outMap, pErrorCode);
-
-        inEnumArray=inMap->getEnumArray();
-        outEnumArray=outMap->getEnumArray();
-
-        inNameArray=(const Offset *)(inEnumArray+tempMap->count);
-        outNameArray=(Offset *)(outEnumArray+tempMap->count);
-
-        if(ds->inCharset==ds->outCharset) {
-            /* no need to sort, just swap the enum/name arrays */
-            ds->swapArray32(ds, inEnumArray, tempMap->count*4, outEnumArray, pErrorCode);
-            ds->swapArray16(ds, inNameArray, tempMap->count*2, outNameArray, pErrorCode);
-            return size;
-        }
-
-        /*
-         * The name and enum arrays are sorted by names and must be resorted
-         * if inCharset!=outCharset.
-         * We use the corresponding part of the temp array to sort an array
-         * of pairs of name offsets and sorting indexes.
-         * Then the sorting indexes are used to permutate-swap the name and enum arrays.
-         *
-         * The outBytes must already contain the swapped strings.
-         */
-        sortArray=(NameAndIndex *)tempMap->getEnumArray();
-        for(i=0; i<tempMap->count; ++i) {
-            sortArray[i].name=udata_readInt16(ds, inNameArray[i]);
-            sortArray[i].index=(Offset)i;
-        }
-
-        /*
-         * use a stable sort to avoid shuffling of equal strings,
-         * which makes testing harder
-         */
-        cmp.chars=(const char *)outBytes;
-        if (ds->outCharset==U_ASCII_FAMILY) {
-            cmp.propCompare=uprv_compareASCIIPropertyNames;
-        }
-        else {
-            cmp.propCompare=uprv_compareEBCDICPropertyNames;
-        }
-        uprv_sortArray(sortArray, tempMap->count, sizeof(NameAndIndex),
-                       upname_compareRows, &cmp,
-                       TRUE, pErrorCode);
-        if(U_FAILURE(*pErrorCode)) {
-            udata_printError(ds, "upname_swap(NameToEnum).uprv_sortArray(%d items) failed\n",
-                             tempMap->count);
-            return 0;
-        }
-
-        /* copy/swap/permutate _enumArray[] and _nameArray[] */
-        if(inEnumArray!=outEnumArray) {
-            for(i=0; i<tempMap->count; ++i) {
-                oldIndex=sortArray[i].index;
-                ds->swapArray32(ds, inEnumArray+oldIndex, 4, outEnumArray+i, pErrorCode);
-                ds->swapArray16(ds, inNameArray+oldIndex, 2, outNameArray+i, pErrorCode);
-            }
-        } else {
-            /*
-             * in-place swapping: need to permutate into a temporary array
-             * and then copy back to not destroy the data
-             */
-            EnumValue *tempEnumArray;
-            Offset *oldIndexes;
-
-            /* write name offsets directly from sortArray */
-            for(i=0; i<tempMap->count; ++i) {
-                ds->writeUInt16((uint16_t *)outNameArray+i, (uint16_t)sortArray[i].name);
-            }
-
-            /*
-             * compress the oldIndexes into a separate array to make space for tempEnumArray
-             * the tempMap _nameArray becomes oldIndexes[], getting the index
-             *   values from the 2D sortArray[],
-             * while sortArray=tempMap _enumArray[] becomes tempEnumArray[]
-             * this saves us allocating more memory
-             *
-             * it works because sizeof(NameAndIndex)<=sizeof(EnumValue)
-             * and because the nameArray[] can be used for oldIndexes[]
-             */
-            tempEnumArray=(EnumValue *)sortArray;
-            oldIndexes=(Offset *)(sortArray+tempMap->count);
-
-            /* copy sortArray[].index values into oldIndexes[] */
-            for(i=0; i<tempMap->count; ++i) {
-                oldIndexes[i]=sortArray[i].index;
-            }
-
-            /* permutate inEnumArray[] into tempEnumArray[] */
-            for(i=0; i<tempMap->count; ++i) {
-                ds->swapArray32(ds, inEnumArray+oldIndexes[i], 4, tempEnumArray+i, pErrorCode);
-            }
-
-            /* copy tempEnumArray[] to outEnumArray[] */
-            uprv_memcpy(outEnumArray, tempEnumArray, tempMap->count*4);
-        }
-    }
-
-    return size;
-}
-
-int32_t
-PropertyAliases::swap(const UDataSwapper *ds,
-                      const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
-                      UErrorCode *pErrorCode) {
-    const PropertyAliases *inAliases;
-    PropertyAliases *outAliases;
-    PropertyAliases aliases;
-
-    const ValueMap *inValueMaps;
-    ValueMap *outValueMaps;
-    ValueMap valueMap;
-
-    int32_t i;
-
-    inAliases=(const PropertyAliases *)inBytes;
-    outAliases=(PropertyAliases *)outBytes;
-
-    /* read the input PropertyAliases - all 16-bit values */
-    for(i=0; i<(int32_t)sizeof(PropertyAliases)/2; ++i) {
-        ((uint16_t *)&aliases)[i]=ds->readUInt16(((const uint16_t *)inBytes)[i]);
-    }
-
-    if(length>=0) {
-        if(length<aliases.total_size) {
-            udata_printError(ds, "upname_swap(): too few bytes (%d after header) for all of pnames.icu\n",
-                             length);
-            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-            return 0;
-        }
-
-        /* copy the data for inaccessible bytes */
-        if(inBytes!=outBytes) {
-            uprv_memcpy(outBytes, inBytes, aliases.total_size);
-        }
-
-        /* swap the PropertyAliases class fields */
-        ds->swapArray16(ds, inAliases, sizeof(PropertyAliases), outAliases, pErrorCode);
-
-        /* swap the name groups */
-        ds->swapArray16(ds, inBytes+aliases.nameGroupPool_offset,
-                                aliases.stringPool_offset-aliases.nameGroupPool_offset,
-                           outBytes+aliases.nameGroupPool_offset, pErrorCode);
-
-        /* swap the strings */
-        udata_swapInvStringBlock(ds, inBytes+aliases.stringPool_offset,
-                                        aliases.total_size-aliases.stringPool_offset,
-                                    outBytes+aliases.stringPool_offset, pErrorCode);
-
-        /*
-         * alloc uint8_t temp[total_size] and reset it
-         * swap each top-level struct, put at least the count fields into temp
-         *   use subclass-specific swap() functions
-         * enumerate value maps, for each
-         *   if temp does not have count!=0 yet
-         *     read count, put it into temp
-         *     swap the array(s)
-         *     resort strings in name->enum maps
-         * swap value maps
-         */
-        LocalMemory<uint8_t> temp;
-        if(temp.allocateInsteadAndReset(aliases.total_size)==NULL) {
-            udata_printError(ds, "upname_swap(): unable to allocate temp memory (%d bytes)\n",
-                             aliases.total_size);
-            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
-            return 0;
-        }
-
-        /* swap properties->name groups map */
-        NonContiguousEnumToOffset::swap(ds, inBytes, length, outBytes,
-                                        temp.getAlias(), aliases.enumToName_offset, pErrorCode);
-
-        /* swap name->properties map */
-        NameToEnum::swap(ds, inBytes, length, outBytes,
-                         temp.getAlias(), aliases.nameToEnum_offset, pErrorCode);
-
-        /* swap properties->value maps map */
-        NonContiguousEnumToOffset::swap(ds, inBytes, length, outBytes,
-                                        temp.getAlias(), aliases.enumToValue_offset, pErrorCode);
-
-        /* enumerate all ValueMaps and swap them */
-        inValueMaps=(const ValueMap *)(inBytes+aliases.valueMap_offset);
-        outValueMaps=(ValueMap *)(outBytes+aliases.valueMap_offset);
-
-        for(i=0; i<aliases.valueMap_count; ++i) {
-            valueMap.enumToName_offset=udata_readInt16(ds, inValueMaps[i].enumToName_offset);
-            valueMap.ncEnumToName_offset=udata_readInt16(ds, inValueMaps[i].ncEnumToName_offset);
-            valueMap.nameToEnum_offset=udata_readInt16(ds, inValueMaps[i].nameToEnum_offset);
-
-            if(valueMap.enumToName_offset!=0) {
-                EnumToOffset::swap(ds, inBytes, length, outBytes,
-                                   temp.getAlias(), valueMap.enumToName_offset,
-                                   pErrorCode);
-            } else if(valueMap.ncEnumToName_offset!=0) {
-                NonContiguousEnumToOffset::swap(ds, inBytes, length, outBytes,
-                                                temp.getAlias(), valueMap.ncEnumToName_offset,
-                                                pErrorCode);
-            }
-            if(valueMap.nameToEnum_offset!=0) {
-                NameToEnum::swap(ds, inBytes, length, outBytes,
-                                 temp.getAlias(), valueMap.nameToEnum_offset,
-                                 pErrorCode);
-            }
-        }
-
-        /* swap the ValueMaps array itself */
-        ds->swapArray16(ds, inValueMaps, aliases.valueMap_count*sizeof(ValueMap),
-                           outValueMaps, pErrorCode);
-
-        /* name groups and strings were swapped above */
-    }
-
-    return aliases.total_size;
-}
-
-U_CAPI int32_t U_EXPORT2
-upname_swap(const UDataSwapper *ds,
-            const void *inData, int32_t length, void *outData,
-            UErrorCode *pErrorCode) {
-    const UDataInfo *pInfo;
-    int32_t headerSize;
-
-    const uint8_t *inBytes;
-    uint8_t *outBytes;
-
-    /* udata_swapDataHeader checks the arguments */
-    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
-        return 0;
-    }
-
-    /* check data format and format version */
-    pInfo=(const UDataInfo *)((const char *)inData+4);
-    if(!(
-        pInfo->dataFormat[0]==0x70 &&   /* dataFormat="pnam" */
-        pInfo->dataFormat[1]==0x6e &&
-        pInfo->dataFormat[2]==0x61 &&
-        pInfo->dataFormat[3]==0x6d &&
-        pInfo->formatVersion[0]==1
-    )) {
-        udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n",
-                         pInfo->dataFormat[0], pInfo->dataFormat[1],
-                         pInfo->dataFormat[2], pInfo->dataFormat[3],
-                         pInfo->formatVersion[0]);
-        *pErrorCode=U_UNSUPPORTED_ERROR;
-        return 0;
-    }
-
-    inBytes=(const uint8_t *)inData+headerSize;
-    outBytes=(uint8_t *)outData+headerSize;
-
-    if(length>=0) {
-        length-=headerSize;
-        if(length<(int32_t)sizeof(PropertyAliases)) {
-            udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n",
-                             length);
-            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-            return 0;
-        }
-    }
-
-    return headerSize+PropertyAliases::swap(ds, inBytes, length, outBytes, pErrorCode);
-}
-
-//eof
+U_NAMESPACE_END
--- a/icu4c/source/common/propname.h
+++ b/icu4c/source/common/propname.h
@ -1,11 +1,12 @@
 /*
 **********************************************************************
-* Copyright (c) 2002-2004, International Business Machines
+* Copyright (c) 2002-2010, International Business Machines
 * Corporation and others.  All Rights Reserved.
 **********************************************************************
 * Author: Alan Liu
 * Created: October 30 2002
 * Since: ICU 2.4
+* 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
 **********************************************************************
 */
 #ifndef PROPNAME_H
@ -13,6 +14,7 @@

 #include "unicode/utypes.h"
 #include "unicode/uchar.h"
+#include "bytetrie.h"
 #include "udataswp.h"
 #include "uprops.h"

@ -75,441 +77,134 @@ U_CDECL_END
 #define PNAME_SIG_2 ((uint8_t)0x61) /* a */
 #define PNAME_SIG_3 ((uint8_t)0x6D) /* m */

-#define PNAME_FORMAT_VERSION ((int8_t)1) /* formatVersion[0] */
-
-/**
- * Swap pnames.icu. See udataswp.h.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-upname_swap(const UDataSwapper *ds,
-            const void *inData, int32_t length, void *outData,
-            UErrorCode *pErrorCode);
-
-
-#ifdef XP_CPLUSPLUS
-
-class Builder;
-
 U_NAMESPACE_BEGIN

-/**
- * An offset from the start of the pnames data to a contained entity.
- * This must be a signed value, since negative offsets are used as an
- * end-of-list marker.  Offsets to actual objects are non-zero.  A
- * zero offset indicates an absent entry; this corresponds to aliases
- * marked "n/a" in the original Unicode data files.
- */
-typedef int16_t Offset; /*  must be signed */
+class PropNameData {
+public:
+    enum {
+        // Byte offsets from the start of the data, after the generic header.
+        IX_VALUE_MAPS_OFFSET,
+        IX_BYTE_TRIES_OFFSET,
+        IX_NAME_GROUPS_OFFSET,
+        IX_RESERVED3_OFFSET,
+        IX_RESERVED4_OFFSET,
+        IX_TOTAL_SIZE,

-#define MAX_OFFSET 0x7FFF
+        // Other values.
+        IX_MAX_NAME_LENGTH,
+        IX_RESERVED7,
+        IX_COUNT
+    };

-/**
- * A generic value for a property or property value.  Typically an
- * enum from uchar.h, but sometimes a non-enum value.  It must be
- * large enough to accomodate the largest enum value, which as of this
- * writing is the largest general category mask.  Need not be signed
- * but may be.  Typically it doesn't matter, since the caller will
- * cast it to the proper type before use.  Takes the special value
- * UCHAR_INVALID_CODE for invalid input.
- */
-typedef int32_t EnumValue;
+    static const char *getPropertyName(int32_t property, int32_t nameChoice);
+    static const char *getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice);

-/* ---------------------------------------------------------------------- */
-/*  ValueMap */
+    static int32_t getPropertyEnum(const char *alias);
+    static int32_t getPropertyValueEnum(int32_t property, const char *alias);

-/**
- * For any top-level property that has named values (binary and
- * enumerated properties), there is a ValueMap object.  This object
- * maps from enum values to two other maps.  One goes from value enums
- * to value names.  The other goes from value names to value enums.
- * 
- * The value enum values may be contiguous or disjoint.  If they are
- * contiguous then the enumToName_offset is nonzero, and the
- * ncEnumToName_offset is zero.  Vice versa if the value enums are
- * disjoint.
- *
- * There are n of these objects, where n is the number of binary
- * properties + the number of enumerated properties.
- */
-struct ValueMap {
+private:
+    static int32_t findProperty(int32_t property);
+    static int32_t findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value);
+    static const char *getName(const char *nameGroup, int32_t nameIndex);
+    static UBool containsName(ByteTrie &trie, const char *name);

-    /*  -- begin pnames data -- */
-    /*  Enum=>name EnumToOffset / NonContiguousEnumToOffset objects. */
-    /*  Exactly one of these will be nonzero. */
-    Offset enumToName_offset;
-    Offset ncEnumToName_offset;
+    static int32_t getPropertyOrValueEnum(int32_t byteTrieOffset, const char *alias);

-    Offset nameToEnum_offset; /*  Name=>enum data */
-    /*  -- end pnames data -- */
+    static const int32_t indexes[];
+    static const int32_t valueMaps[];
+    static const uint8_t byteTries[];
+    static const char nameGroups[];
 };

-/* ---------------------------------------------------------------------- */
-/*  PropertyAliases class */
-
-/**
- * A class encapsulating access to the memory-mapped data representing
- * property aliases and property value aliases (pnames).  The class
- * MUST have no v-table and declares certain methods inline -- small
- * methods and methods that are called from only one point.
+/*
+ * pnames.icu formatVersion 2
 *
- * The data members in this class correspond to the in-memory layout
- * of the header of the pnames data.
+ * formatVersion 2 is new in ICU 4.8.
+ * In ICU 4.8, the pnames.icu data file is used only in ICU4J.
+ * ICU4C 4.8 has the same data structures hardcoded in source/common/propname_data.h.
+ *
+ * For documentation of pnames.icu formatVersion 1 see ICU4C 4.6 (2010-dec-01)
+ * or earlier versions of this header file (source/common/propname.h).
+ *
+ * The pnames.icu begins with the standard ICU DataHeader/UDataInfo.
+ * After that:
+ *
+ * int32_t indexes[8];
+ *
+ *      (See the PropNameData::IX_... constants.)
+ *
+ *      The first 6 indexes are byte offsets from the beginning of the data
+ *      (beginning of indexes[]) to following structures.
+ *      The length of each structure is the difference between its offset
+ *      and the next one.
+ *      All offsets are filled in: Where there is no data between two offsets,
+ *      those two offsets are the same.
+ *      The last offset (indexes[PropNameData::IX_TOTAL_SIZE]) indicates the
+ *      total number of bytes in the file. (Not counting the standard headers.)
+ *
+ *      The sixth index (indexes[PropNameData::IX_MAX_NAME_LENGTH]) has the
+ *      maximum length of any Unicode property (or property value) alias.
+ *      (Without normalization, that is, including underscores etc.)
+ *
+ * int32_t valueMaps[];
+ *
+ *      The valueMaps[] begins with a map from UProperty enums to properties,
+ *      followed by the per-property value maps from property values to names,
+ *      for those properties that have named values.
+ *      (Binary & enumerated, plus General_Category_Mask.)
+ *
+ *      valueMaps[0] contains the number of UProperty enum ranges.
+ *      For each range:
+ *        int32_t start, limit -- first and last+1 UProperty enum of a dense range
+ *        Followed by (limit-start) pairs of
+ *          int32_t nameGroupOffset;
+ *            Offset into nameGroups[] for the property's names/aliases.
+ *          int32_t valueMapIndex;
+ *            Offset of the property's value map in the valueMaps[] array.
+ *            If the valueMapIndex is 0, then the property does not have named values.
+ *
+ *      For each property's value map:
+ *      int32_t byteTrieOffset; -- Offset into byteTries[] for name->value mapping.
+ *      int32_t numRanges;
+ *        If numRanges is in the range 1..15, then that many ranges of values follow.
+ *        Per range:
+ *          int32_t start, limit -- first and last+1 UProperty enum of a range
+ *          Followed by (limit-start) entries of
+ *            int32_t nameGroupOffset;
+ *              Offset into nameGroups[] for the property value's names/aliases.
+ *              If the nameGroupOffset is 0, then this is not a named value for this property.
+ *              (That is, the ranges need not be dense.)
+ *        If numRanges is >=0x10, then (numRanges-0x10) sorted values
+ *        and then (numRanges-0x10) corresponding nameGroupOffsets follow.
+ *        Values are sorted as signed integers.
+ *        In this case, the set of values is dense; no nameGroupOffset will be 0.
+ *
+ *      For both properties and property values, ranges are sorted by their start/limit values.
+ *
+ * uint8_t byteTries[];
+ *
+ *      This is a sequence of ByteTrie structures, byte-serialized tries for
+ *      mapping from names/aliases to values.
+ *      The first one maps from property names/aliases to UProperty enum constants.
+ *      The following ones are indexed by property value map byteTrieOffsets
+ *      for mapping each property's names/aliases to their property values.
+ *
+ * char nameGroups[];
+ *
+ *      This is a sequence of property name groups.
+ *      Each group is a list of names/aliases (invariant-character strings) for
+ *      one property or property value, in the order of UCharNameChoice.
+ *      The first byte of each group is the number of names in the group.
+ *      It is followed by that many NUL-terminated strings.
+ *      The first string is for the short name; if there is no short name,
+ *      then the first string is empty.
+ *      The second string is the long name. Further strings are additional aliases.
+ *
+ *      The first name group is for a property rather than a property value,
+ *      so that a nameGroupOffset of 0 can be used to indicate "no value"
+ *      in a property's sparse value ranges.
 */
-class PropertyAliases {

-    /*  -- begin pnames data -- */
-    /*  Enum=>name EnumToOffset object for binary and enumerated */
-    /*  properties */
-    Offset enumToName_offset;
-
-    /*  Name=>enum data for binary & enumerated properties */
-    Offset nameToEnum_offset;
-
-    /*  Enum=>offset EnumToOffset object mapping enumerated properties */
-    /*  to ValueMap objects */
-    Offset enumToValue_offset;
-
-    /*  The following are needed by external readers of this data. */
-    /*  We don't use them ourselves. */
-    int16_t total_size; /*  size in bytes excluding the udata header */
-    Offset valueMap_offset; /*  offset to start of array */
-    int16_t valueMap_count; /*  number of entries */
-    Offset nameGroupPool_offset; /*  offset to start of array */
-    int16_t nameGroupPool_count; /*  number of entries (not groups) */
-    Offset stringPool_offset; /*  offset to start of pool */
-    int16_t stringPool_count; /*  number of strings (not size in bytes) */
-
-    /*  -- end pnames data -- */
-
-    friend class ::Builder;
-
-    const ValueMap* getValueMap(EnumValue prop) const;
-
-    const char* chooseNameInGroup(Offset offset,
-                                  UPropertyNameChoice choice) const;
-
- public:
-
-    inline const int8_t* getPointer(Offset o) const {
-        return ((const int8_t*) this) + o;
-    }
-
-    inline const int8_t* getPointerNull(Offset o) const {
-        return o ? getPointer(o) : NULL;
-    }
-
-    inline const char* getPropertyName(EnumValue prop,
-                                       UPropertyNameChoice choice) const;
-    
-    inline EnumValue getPropertyEnum(const char* alias) const;
-
-    inline const char* getPropertyValueName(EnumValue prop, EnumValue value,
-                                            UPropertyNameChoice choice) const;
-    
-    inline EnumValue getPropertyValueEnum(EnumValue prop,
-                                          const char* alias) const;
-
-    static int32_t
-    swap(const UDataSwapper *ds,
-         const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
-         UErrorCode *pErrorCode);
-};
-
-/* ---------------------------------------------------------------------- */
-/*  EnumToOffset */
-
-/**
- * A generic map from enum values to Offsets.  The enum values must be
- * contiguous, from enumStart to enumLimit.  The Offset values may
- * point to anything.
- */
-class EnumToOffset {
-
-    /*  -- begin pnames data -- */
-    EnumValue enumStart;
-    EnumValue enumLimit;
-    Offset _offsetArray; /*  [array of enumLimit-enumStart] */
-    /*  -- end pnames data -- */
-
-    friend class ::Builder;
-
-    Offset* getOffsetArray() {
-        return &_offsetArray;
-    }
-
-    const Offset* getOffsetArray() const {
-        return &_offsetArray;
-    }
-
-    static int32_t getSize(int32_t n) {
-        return sizeof(EnumToOffset) + sizeof(Offset) * (n - 1);
-    }
-
-    int32_t getSize() {
-        return getSize(enumLimit - enumStart);
-    }
-
- public:
-
-    Offset getOffset(EnumValue enumProbe) const {
-        if (enumProbe < enumStart ||
-            enumProbe >= enumLimit) {
-            return 0; /*  not found */
-        }
-        const Offset* p = getOffsetArray();
-        return p[enumProbe - enumStart];
-    }
-
-    static int32_t
-    swap(const UDataSwapper *ds,
-         const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
-         uint8_t *temp, int32_t pos,
-         UErrorCode *pErrorCode);
-};
-
-/* ---------------------------------------------------------------------- */
-/*  NonContiguousEnumToOffset */
-
-/**
- * A generic map from enum values to Offsets.  The enum values may be
- * disjoint.  If they are contiguous, an EnumToOffset should be used
- * instead.  The Offset values may point to anything.
- */
-class NonContiguousEnumToOffset {
-
-    /*  -- begin pnames data -- */
-    int32_t count;
-    EnumValue _enumArray; /*  [array of count] */
-    /*  Offset _offsetArray; // [array of count] after enumValue[count-1] */
-    /*  -- end pnames data -- */
-
-    friend class ::Builder;
-
-    EnumValue* getEnumArray() {
-        return &_enumArray;
-    }
-
-    const EnumValue* getEnumArray() const {
-        return &_enumArray;
-    }
-    
-    Offset* getOffsetArray() {
-        return (Offset*) (getEnumArray() + count);
-    }
-
-    const Offset* getOffsetArray() const {
-        return (Offset*) (getEnumArray() + count);
-    }
-
-    static int32_t getSize(int32_t n) {
-        return sizeof(int32_t) + (sizeof(EnumValue) + sizeof(Offset)) * n;
-    }
-
-    int32_t getSize() {
-        return getSize(count);
-    }
-
- public:
-
-    Offset getOffset(EnumValue enumProbe) const {
-        const EnumValue* e = getEnumArray();
-        const Offset* p = getOffsetArray();
-        /*  linear search; binary later if warranted */
-        /*  (binary is not faster for short lists) */
-        for (int32_t i=0; i<count; ++i) {
-            if (e[i] < enumProbe) continue;
-            if (e[i] > enumProbe) break;
-            return p[i];
-        }
-        return 0; /*  not found */
-    }
-
-    static int32_t
-    swap(const UDataSwapper *ds,
-         const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
-         uint8_t *temp, int32_t pos,
-         UErrorCode *pErrorCode);
-};
-
-/* ---------------------------------------------------------------------- */
-/*  NameToEnum */
-
-/**
- * A map from names to enum values.
- */
-class NameToEnum {
-
-    /*  -- begin pnames data -- */
-    int32_t count;       /*  number of entries */
-    EnumValue _enumArray; /*  [array of count] EnumValues */
-    /*  Offset _nameArray; // [array of count] offsets to names */
-    /*  -- end pnames data -- */
-
-    friend class ::Builder;
-
-    EnumValue* getEnumArray() {
-        return &_enumArray;
-    }
-
-    const EnumValue* getEnumArray() const {
-        return &_enumArray;
-    }
-
-    Offset* getNameArray() {
-        return (Offset*) (getEnumArray() + count);
-    }
-
-    const Offset* getNameArray() const {
-        return (Offset*) (getEnumArray() + count);
-    }
-
-    static int32_t getSize(int32_t n) {
-        return sizeof(int32_t) + (sizeof(Offset) + sizeof(EnumValue)) * n;
-    }
-
-    int32_t getSize() {
-        return getSize(count);
-    }
-
- public:
-  
-    EnumValue getEnum(const char* alias, const PropertyAliases& data) const {
-
-        const Offset* n = getNameArray();
-        const EnumValue* e = getEnumArray();
-
-        /*  linear search; binary later if warranted */
-        /*  (binary is not faster for short lists) */
-        for (int32_t i=0; i<count; ++i) {
-            const char* name = (const char*) data.getPointer(n[i]);
-            int32_t c = uprv_comparePropertyNames(alias, name);
-            if (c > 0) continue;
-            if (c < 0) break;
-            return e[i];
-        }
-        
-        return UCHAR_INVALID_CODE;
-    }
-
-    static int32_t
-    swap(const UDataSwapper *ds,
-         const uint8_t *inBytes, int32_t length, uint8_t *outBytes,
-         uint8_t *temp, int32_t pos,
-         UErrorCode *pErrorCode);
-};
-
-/*----------------------------------------------------------------------
- * 
- * In-memory layout.  THIS IS NOT A STANDALONE DOCUMENT.  It goes
- * together with above C++ declarations and gives an overview.
- *
- * See above for definitions of Offset and EnumValue.  Also, refer to
- * above class declarations for the "bottom line" on data layout.
- *
- * Sizes:
- * '*_offset' is an Offset (see above)
- * 'count' members are typically int32_t (see above declarations)
- * 'enumArray' is an array of EnumValue (see above)
- * 'offsetArray' is an array of Offset (see above)
- * 'nameArray' is an array of Offset (see above)
- * 'enum*' is an EnumValue (see above)
- * '*Array [x n]' means that *Array has n elements
- *
- * References:
- * Instead of pointers, this flat data structure contains offsets.
- * All offsets are relative to the start of 'header'.  A notation
- * is used to indicate what structure each offset points to:
- * 'foo (>x)' the offset(s) in foo point to structure x
- * 
- * Structures:
- * Each structure is assigned a number, except for the header,
- * which is called 'header'.  The numbers are not contiguous
- * for historical reasons.  Some structures have sub-parts
- * that are denoted with a letter, e.g., "5a".
- * 
- * BEGIN LAYOUT
- * ============
- * header:
- *  enumToName_offset (>0)
- *  nameToEnum_offset (>2)
- *  enumToValue_offset (>3)
- *  (alignment padding build in to header)
- *
- * The header also contains the following, used by "external readers"
- * like ICU4J and icuswap.
- *
- *  // The following are needed by external readers of this data.
- *  // We don't use them ourselves.
- *  int16_t total_size; // size in bytes excluding the udata header
- *  Offset valueMap_offset; // offset to start of array
- *  int16_t valueMap_count; // number of entries
- *  Offset nameGroupPool_offset; // offset to start of array
- *  int16_t nameGroupPool_count; // number of entries (not groups)
- *  Offset stringPool_offset; // offset to start of pool
- *  int16_t stringPool_count; // number of strings (not size in bytes)
- *
- * 0: # NonContiguousEnumToOffset obj for props => name groups
- *  count
- *  enumArray [x count]
- *  offsetArray [x count] (>98)
- * 
- * => pad to next 4-byte boundary
- * 
- * (1: omitted -- no longer used)
- * 
- * 2: # NameToEnum obj for binary & enumerated props
- *  count
- *  enumArray [x count]
- *  nameArray [x count] (>99)
- * 
- * => pad to next 4-byte boundary
- * 
- * 3: # NonContiguousEnumToOffset obj for enumerated props => ValueMaps
- *  count
- *  enumArray [x count]
- *  offsetArray [x count] (>4)
- * 
- * => pad to next 4-byte boundary
- * 
- * 4: # ValueMap array [x one for each enumerated prop i]
- *  enumToName_offset (>5a +2*i)   one of these two is NULL, one is not
- *  ncEnumToName_offset (>5b +2*i)
- *  nameToEnums_offset (>6 +2*i)
- * 
- * => pad to next 4-byte boundary
- * 
- * for each enumerated prop (either 5a or 5b):
- * 
- *   5a: # EnumToOffset for enumerated prop's values => name groups
- *    enumStart
- *    enumLimit
- *    offsetArray [x enumLimit - enumStart] (>98) 
- * 
- *   => pad to next 4-byte boundary
- * 
- *   5b: # NonContiguousEnumToOffset for enumerated prop's values => name groups
- *    count
- *    enumArray [x count]
- *    offsetArray [x count] (>98)
- * 
- *   => pad to next 4-byte boundary
- * 
- *   6: # NameToEnum for enumerated prop's values
- *    count
- *    enumArray [x count]
- *    nameArray [x count] (>99)
- * 
- *   => pad to next 4-byte boundary
- * 
- * 98: # name group pool {NGP}
- *  [array of Offset values] (>99)
- * 
- * 99: # string pool {SP}
- *  [pool of nul-terminated char* strings]
- */
 U_NAMESPACE_END

-#endif /* C++ */
-
 #endif
--- a/icu4c/source/common/propname_data.h
+++ b/icu4c/source/common/propname_data.h
--- a/icu4c/source/common/ubidi_props.h
+++ b/icu4c/source/common/ubidi_props.h
@ -34,11 +34,6 @@ typedef struct UBiDiProps UBiDiProps;
 U_CFUNC const UBiDiProps *
 ubidi_getSingleton(void);

-U_CAPI int32_t
-ubidi_swap(const UDataSwapper *ds,
-           const void *inData, int32_t length, void *outData,
-           UErrorCode *pErrorCode);
-
 U_CFUNC void
 ubidi_addPropertyStarts(const UBiDiProps *bdp, const USetAdder *sa, UErrorCode *pErrorCode);

--- a/icu4c/source/common/ucase.h
+++ b/icu4c/source/common/ucase.h
@ -34,11 +34,6 @@ typedef struct UCaseProps UCaseProps;
 U_CAPI const UCaseProps * U_EXPORT2
 ucase_getSingleton(void);

-U_CAPI int32_t U_EXPORT2
-ucase_swap(const UDataSwapper *ds,
-           const void *inData, int32_t length, void *outData,
-           UErrorCode *pErrorCode);
-
 U_CFUNC void U_EXPORT2
 ucase_addPropertyStarts(const UCaseProps *csp, const USetAdder *sa, UErrorCode *pErrorCode);

--- a/icu4c/source/common/ucln_cmn.h
+++ b/icu4c/source/common/ucln_cmn.h
@ -44,7 +44,6 @@ typedef enum ECleanupCommonType {
    UCLN_COMMON_NORMALIZER2,
    UCLN_COMMON_USET,
    UCLN_COMMON_UNAMES,
-    UCLN_COMMON_PNAME,
    UCLN_COMMON_UPROPS,
    UCLN_COMMON_UCNV,
    UCLN_COMMON_UCNV_IO,
--- a/icu4c/source/common/udicttrie.h
+++ b/icu4c/source/common/udicttrie.h
@ -0,0 +1,83 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  udicttrie.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010dec17
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __UDICTTRIE_H__
+#define __UDICTTRIE_H__
+
+/**
+ * \file
+ * \brief C API: Helper definitions for dictionary trie APIs.
+ */
+
+#include "unicode/utypes.h"
+
+/**
+ * Return values for ByteTrie::next(), UCharTrie::next() and similar methods.
+ * @see UDICTTRIE_RESULT_MATCHES
+ * @see UDICTTRIE_RESULT_HAS_VALUE
+ * @see UDICTTRIE_RESULT_HAS_NEXT
+ */
+enum UDictTrieResult {
+    /**
+     * The input unit(s) did not continue a matching string.
+     */
+    UDICTTRIE_NO_MATCH,
+    /**
+     * The input unit(s) continued a matching string
+     * but there is no value for the string so far.
+     * (It is a prefix of a longer string.)
+     */
+    UDICTTRIE_NO_VALUE,
+    /**
+     * The input unit(s) continued a matching string
+     * and there is a value for the string so far.
+     * This value will be returned by getValue().
+     * No further input byte/unit can continue a matching string.
+     */
+    UDICTTRIE_HAS_FINAL_VALUE,
+    /**
+     * The input unit(s) continued a matching string
+     * and there is a value for the string so far.
+     * This value will be returned by getValue().
+     * Another input byte/unit can continue a matching string.
+     */
+    UDICTTRIE_HAS_VALUE
+};
+
+/**
+ * Same as (result!=UDICTTRIE_NO_MATCH).
+ * @param result A result from ByteTrie::first(), UCharTrie::next() etc.
+ * @return true if the input bytes/units so far are part of a matching string/byte sequence.
+ */
+#define UDICTTRIE_RESULT_MATCHES(result) ((result)!=UDICTTRIE_NO_MATCH)
+
+/**
+ * Equivalent to (result==UDICTTRIE_HAS_VALUE || result==UDICTTRIE_HAS_FINAL_VALUE) but
+ * this macro evaluates result exactly once.
+ * @param result A result from ByteTrie::first(), UCharTrie::next() etc.
+ * @return true if there is a value for the input bytes/units so far.
+ * @see ByteTrie::getValue
+ * @see UCharTrie::getValue
+ */
+#define UDICTTRIE_RESULT_HAS_VALUE(result) ((result)>=UDICTTRIE_HAS_FINAL_VALUE)
+
+/**
+ * Equivalent to (result==UDICTTRIE_NO_VALUE || result==UDICTTRIE_HAS_VALUE) but
+ * this macro evaluates result exactly once.
+ * @param result A result from ByteTrie::first(), UCharTrie::next() etc.
+ * @return true if another input byte/unit can continue a matching string.
+ */
+#define UDICTTRIE_RESULT_HAS_NEXT(result) ((result)&1)
+
+#endif  /* __UDICTTRIE_H__ */
--- a/icu4c/source/common/uhash.c
+++ b/icu4c/source/common/uhash.c
@ -1,6 +1,6 @@
 /*
 ******************************************************************************
-*   Copyright (C) 1997-2009, International Business Machines
+*   Copyright (C) 1997-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 ******************************************************************************
 *   Date        Name        Description
@ -866,6 +866,11 @@ uhash_hashUCharsN(const UChar *str, int32_t length) {
    STRING_HASH(UChar, str, length, *p);
 }

+U_CAPI int32_t U_EXPORT2
+uhash_hashCharsN(const char *str, int32_t length) {
+    STRING_HASH(char, str, length, *p);
+}
+
 U_CAPI int32_t U_EXPORT2
 uhash_hashChars(const UHashTok key) {
    STRING_HASH(uint8_t, key.pointer, uprv_strlen((char*)p), *p);
--- a/icu4c/source/common/uhash.h
+++ b/icu4c/source/common/uhash.h
@ -583,6 +583,9 @@ uhash_hashChars(const UHashTok key);
 U_CAPI int32_t U_EXPORT2 
 uhash_hashUCharsN(const UChar *key, int32_t length);

+U_CAPI int32_t U_EXPORT2 
+uhash_hashCharsN(const char *key, int32_t length);
+
 /**
 * Generate a case-insensitive hash code for a null-terminated char*
 * string.  If the string is not null-terminated do not use this
--- a/icu4c/source/common/uinvchar.c
+++ b/icu4c/source/common/uinvchar.c
@ -104,6 +104,29 @@ static const uint8_t ebcdicFromAscii[256]={
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };

+/* Same as asciiFromEbcdic[] except maps all letters to lowercase. */
+static const uint8_t lowercaseAsciiFromEbcdic[256]={
+    0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+    0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
+    0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
+
+    0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
+    0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
+    0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
+
+    0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
+    0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
+
+    0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
 /*
 * Bit sets indicating which characters of the ASCII repertoire
 * (by ASCII/Unicode code) are "invariant".
@ -535,6 +558,10 @@ uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) {
    }
 }

+U_CAPI char U_EXPORT2
+uprv_ebcdicToLowercaseAscii(char c) {
+    return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];
+}

 U_INTERNAL uint8_t* U_EXPORT2
 uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
--- a/icu4c/source/common/uinvchar.h
+++ b/icu4c/source/common/uinvchar.h
@ -83,6 +83,26 @@ uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2);
 #   error Unknown charset family!
 #endif

+/**
+ * Converts an EBCDIC invariant character to lowercase ASCII.
+ * @internal
+ */
+U_INTERNAL char U_EXPORT2
+uprv_ebcdicToLowercaseAscii(char c);
+
+/**
+ * \def uprv_invCharToLowercaseAscii
+ * Converts an invariant character to lowercase ASCII.
+ * @internal
+ */
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+#   define uprv_invCharToLowercaseAscii uprv_asciitolower
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+#   define uprv_invCharToLowercaseAscii uprv_ebcdicToLowercaseAscii
+#else
+#   error Unknown charset family!
+#endif
+
 /**
 * Copy EBCDIC to ASCII
 * @internal
--- a/icu4c/source/common/unicode/urename.h
+++ b/icu4c/source/common/unicode/urename.h
@ -1160,7 +1160,6 @@
 #define uplug_setPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_setPlugLevel)
 #define uplug_setPlugName U_ICU_ENTRY_POINT_RENAME(uplug_setPlugName)
 #define uplug_setPlugNoUnload U_ICU_ENTRY_POINT_RENAME(uplug_setPlugNoUnload)
-#define upname_swap U_ICU_ENTRY_POINT_RENAME(upname_swap)
 #define uprops_getSource U_ICU_ENTRY_POINT_RENAME(uprops_getSource)
 #define upropsvec_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(upropsvec_addPropertyStarts)
 #define uprv_aestrncpy U_ICU_ENTRY_POINT_RENAME(uprv_aestrncpy)
--- a/icu4c/source/common/unormimp.h
+++ b/icu4c/source/common/unormimp.h
@ -162,15 +162,6 @@ enum {
    UNORM_NX_CJK_COMPAT=2
 };

-/**
- * Swap unorm.icu. See udataswp.h.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-unorm_swap(const UDataSwapper *ds,
-           const void *inData, int32_t length, void *outData,
-           UErrorCode *pErrorCode);
-
 /**
 * Description of the format of unorm.icu version 2.3.
 *
--- a/icu4c/source/common/uprops.h
+++ b/icu4c/source/common/uprops.h
@ -397,15 +397,6 @@ upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
 uprv_getInclusions(const USetAdder *sa, UErrorCode *pErrorCode);
 */

-/**
- * Swap the ICU Unicode properties file. See uchar.c.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-uprops_swap(const UDataSwapper *ds,
-            const void *inData, int32_t length, void *outData,
-            UErrorCode *pErrorCode);
-
 /**
 * Swap the ICU Unicode character names file. See uchar.c.
 * @internal
--- a/icu4c/source/configure
+++ b/icu4c/source/configure
@ -7748,7 +7748,7 @@ then
 fi

 # output the Makefiles
-ac_config_files="$ac_config_files icudefs.mk Makefile data/pkgdataMakefile config/Makefile.inc config/icu.pc config/pkgdataMakefile data/Makefile stubdata/Makefile common/Makefile i18n/Makefile layout/Makefile layoutex/Makefile io/Makefile extra/Makefile extra/uconv/Makefile extra/uconv/pkgdataMakefile extra/scrptrun/Makefile tools/Makefile tools/ctestfw/Makefile tools/toolutil/Makefile tools/makeconv/Makefile tools/genrb/Makefile tools/genccode/Makefile tools/gencmn/Makefile tools/gencnval/Makefile tools/genctd/Makefile tools/gentest/Makefile tools/gennorm2/Makefile tools/genbrk/Makefile tools/gensprep/Makefile tools/icuinfo/Makefile tools/icupkg/Makefile tools/icuswap/Makefile tools/pkgdata/Makefile tools/tzcode/Makefile tools/gencfu/Makefile test/Makefile test/compat/Makefile test/testdata/Makefile test/testdata/pkgdataMakefile test/hdrtst/Makefile test/intltest/Makefile test/cintltst/Makefile test/iotest/Makefile test/letest/Makefile test/perf/Makefile test/perf/collationperf/Makefile test/perf/ubrkperf/Makefile test/perf/charperf/Makefile test/perf/convperf/Makefile test/perf/normperf/Makefile test/perf/DateFmtPerf/Makefile test/perf/strsrchperf/Makefile test/perf/unisetperf/Makefile test/perf/usetperf/Makefile test/perf/ustrperf/Makefile test/perf/utfperf/Makefile test/perf/utrie2perf/Makefile samples/Makefile samples/date/Makefile samples/cal/Makefile samples/layout/Makefile common/unicode/platform.h"
+ac_config_files="$ac_config_files icudefs.mk Makefile data/pkgdataMakefile config/Makefile.inc config/icu.pc config/pkgdataMakefile data/Makefile stubdata/Makefile common/Makefile i18n/Makefile layout/Makefile layoutex/Makefile io/Makefile extra/Makefile extra/uconv/Makefile extra/uconv/pkgdataMakefile extra/scrptrun/Makefile tools/Makefile tools/ctestfw/Makefile tools/toolutil/Makefile tools/makeconv/Makefile tools/genrb/Makefile tools/genccode/Makefile tools/gencmn/Makefile tools/gencnval/Makefile tools/genctd/Makefile tools/gentest/Makefile tools/gennorm2/Makefile tools/genbrk/Makefile tools/gensprep/Makefile tools/icuinfo/Makefile tools/icupkg/Makefile tools/icuswap/Makefile tools/pkgdata/Makefile tools/tzcode/Makefile tools/gencfu/Makefile test/Makefile test/compat/Makefile test/testdata/Makefile test/testdata/pkgdataMakefile test/hdrtst/Makefile test/intltest/Makefile test/cintltst/Makefile test/iotest/Makefile test/letest/Makefile test/perf/Makefile test/perf/collationperf/Makefile test/perf/dicttrieperf/Makefile test/perf/ubrkperf/Makefile test/perf/charperf/Makefile test/perf/convperf/Makefile test/perf/normperf/Makefile test/perf/DateFmtPerf/Makefile test/perf/strsrchperf/Makefile test/perf/unisetperf/Makefile test/perf/usetperf/Makefile test/perf/ustrperf/Makefile test/perf/utfperf/Makefile test/perf/utrie2perf/Makefile samples/Makefile samples/date/Makefile samples/cal/Makefile samples/layout/Makefile common/unicode/platform.h"

 cat >confcache <<\_ACEOF
 # This file is a shell script that caches the results of configure
@ -8489,6 +8489,7 @@ do
    "test/letest/Makefile") CONFIG_FILES="$CONFIG_FILES test/letest/Makefile" ;;
    "test/perf/Makefile") CONFIG_FILES="$CONFIG_FILES test/perf/Makefile" ;;
    "test/perf/collationperf/Makefile") CONFIG_FILES="$CONFIG_FILES test/perf/collationperf/Makefile" ;;
+    "test/perf/dicttrieperf/Makefile") CONFIG_FILES="$CONFIG_FILES test/perf/dicttrieperf/Makefile" ;;
    "test/perf/ubrkperf/Makefile") CONFIG_FILES="$CONFIG_FILES test/perf/ubrkperf/Makefile" ;;
    "test/perf/charperf/Makefile") CONFIG_FILES="$CONFIG_FILES test/perf/charperf/Makefile" ;;
    "test/perf/convperf/Makefile") CONFIG_FILES="$CONFIG_FILES test/perf/convperf/Makefile" ;;
--- a/icu4c/source/configure.in
+++ b/icu4c/source/configure.in
@ -1354,6 +1354,7 @@ AC_CONFIG_FILES([icudefs.mk \
 		test/letest/Makefile \
 		test/perf/Makefile \
 		test/perf/collationperf/Makefile \
+		test/perf/dicttrieperf/Makefile \
 		test/perf/ubrkperf/Makefile \
 		test/perf/charperf/Makefile \
 		test/perf/convperf/Makefile \
--- a/icu4c/source/data/Makefile.in
+++ b/icu4c/source/data/Makefile.in
@ -226,8 +226,10 @@ package390: $(OUTTMPDIR)/icudata390.lst $(PKGDATA_LIST) ./icupkg.inc packagedata
 ## DAT files - Misc. data files.
 #  2005-may-05 Removed Unicode properties files (unorm.icu, uprops.icu, ucase.icu, ubidi.icu)
 #  from data build. See Jitterbug 4497. (makedata.mak revision 1.117)
+#  2010-dec Removed pnames.icu.
+# These are now hardcoded in ICU4C and only loaded in ICU4J.
 #
-DAT_FILES_SHORT=pnames.icu unames.icu cnvalias.icu coll/ucadata.icu coll/invuca.icu nfc.nrm nfkc.nrm nfkc_cf.nrm uts46.nrm
+DAT_FILES_SHORT=unames.icu cnvalias.icu coll/ucadata.icu coll/invuca.icu nfc.nrm nfkc.nrm nfkc_cf.nrm uts46.nrm
 DAT_FILES=$(DAT_FILES_SHORT:%=$(BUILDDIR)/%)

 ## BRK files
@ -411,7 +413,7 @@ COLL_FILES_LIST=$(COLLATION_FILES_SHORT) $(COLLATION_INDEX_RES_SHORT)
 BRK_FILES_LIST=$(BRK_FILES_SHORT) $(CTD_FILES_SHORT) $(BRK_RES_FILES_SHORT) $(BRK_RES_INDEX_RES_SHORT) 
 LOCALE_FILES_LIST= $(RES_FILES_SHORT) $(LANG_FILES_SHORT) $(REGION_FILES_SHORT) $(ZONE_FILES_SHORT)
 MISC_FILES_LIST=$(DAT_FILES_SHORT) $(CNV_FILES_SHORT) $(CNV_FILES_SHORT_SPECIAL) $(CURR_FILES_SHORT) $(RBNF_FILES_SHORT) $(RBNF_INDEX_RES_SHORT) $(TRANSLIT_FILES_SHORT) $(SPREP_FILES_SHORT) $(CFU_FILES_SHORT)
-UNI_CORE_DATA=uprops.icu ucase.icu ubidi.icu
+UNI_CORE_DATA=pnames.icu uprops.icu ucase.icu ubidi.icu
 UNI_CORE_TARGET_DATA=$(UNI_CORE_DATA:%=$(BUILDDIR)/%)

 ifneq ($(INCLUDE_UNI_CORE_DATA),)
@ -494,7 +496,7 @@ $(BUILDDIR)/coll/%.icu: $(SRCDATADIR)/in/coll/%.icu
 ####################################################    SPP
 # SPP FILES

-$(BUILDDIR)/%.spp: $(SPREPSRCDIR)/%.txt $(TOOLBINDIR)/gensprep$(TOOLEXEEXT) $(BUILDDIR)/unames.icu $(BUILDDIR)/pnames.icu
+$(BUILDDIR)/%.spp: $(SPREPSRCDIR)/%.txt $(TOOLBINDIR)/gensprep$(TOOLEXEEXT) $(BUILDDIR)/unames.icu
 	$(INVOKE) $(TOOLBINDIR)/gensprep -d $(BUILDDIR) -i $(BUILDDIR) -s $(SPREPSRCDIR) -b $(@F:%.spp=%) -m $(UNICODEDATADIR) -u 3.2.0 $(<F)

 ####################################################    BRK
@ -753,11 +755,10 @@ clean-resindex:
 $(BUILDDIR)/$(INDEX_NAME).res: $(INDEX_FILE) $(TOOLBINDIR)/genrb$(TOOLEXEEXT)
 	$(INVOKE) $(TOOLBINDIR)/genrb $(GENRBOPTS) -i $(BUILDDIR) -d $(BUILDDIR) $(INDEX_FILE)

-# The core Unicode properties files (uprops.icu, ucase.icu, ubidi.icu)
+# The core Unicode properties files (pnames.icu, uprops.icu, ucase.icu, ubidi.icu)
 # are hardcoded in the common DLL and therefore not included in the data package any more.
 # They are not built by default but need to be built for ICU4J data and for getting the .c source files
 # when updating the Unicode data.
-# Changed in Makefile.in revision 1.147. See Jitterbug 4497.
 uni-core-data: build-dir $(UNI_CORE_TARGET_DATA)
 	@echo Unicode .icu files built to $(BUILDDIR)

@ -778,7 +779,7 @@ JAR=jar
 # - package them into the .jar file
 $(OUTDIR)/icu4j/icudata.jar: build-dir packagedata $(OUTTMPDIR)/$(ICUDATA_PLATFORM_NAME).dat uni-core-data
 	mkdir -p $(OUTDIR)/icu4j/com/ibm/icu/impl/data/$(ICUDATA_BASENAME_VERSION)b
-	echo ubidi.icu ucase.icu uprops.icu > $(OUTDIR)/icu4j/add.txt
+	echo pnames.icu ubidi.icu ucase.icu uprops.icu > $(OUTDIR)/icu4j/add.txt
 	$(INVOKE) $(TOOLBINDIR)/icupkg $(OUTTMPDIR)/$(ICUDATA_PLATFORM_NAME).dat $(OUTDIR)/icu4j/$(ICUDATA_BASENAME_VERSION)b.dat -a $(OUTDIR)/icu4j/add.txt -s $(BUILDDIR) -x '*' -tb -d $(OUTDIR)/icu4j/com/ibm/icu/impl/data/$(ICUDATA_BASENAME_VERSION)b
 	$(JAR) cf $(OUTDIR)/icu4j/icudata.jar -C $(OUTDIR)/icu4j com/ibm/icu/impl/data/$(ICUDATA_BASENAME_VERSION)b

@ -821,9 +822,9 @@ pkgdataMakefile:
 ###########
 ########### 390 (z/OS) support
 UCMFILES390=ebcdic-xml-us.ucm ibm-37_P100-1995.ucm ibm-1047_P100-1995.ucm ibm-4909_P100-1999.ucm
-# used to depend on uprops.icu ucase.icu ubidi.icu
-# see Jitterbug 4497
-ALLFILES390=pnames.icu cnvalias.icu $(UCMFILES390:.ucm=.cnv)
+# used to depend on pnames.icu uprops.icu ucase.icu ubidi.icu
+# These are now hardcoded in ICU4C and only loaded in ICU4J.
+ALLFILES390=cnvalias.icu $(UCMFILES390:.ucm=.cnv)

 $(OUTTMPDIR)/icudata390.lst: $(SRCLISTDEPS)
 	@echo "generating $@ (list of 390 data files)"
--- a/icu4c/source/data/in/pnames.icu
+++ b/icu4c/source/data/in/pnames.icu
--- a/icu4c/source/data/makedata.mak
+++ b/icu4c/source/data/makedata.mak
@ -486,9 +486,10 @@ ALL : GODATA "$(ICU_LIB_TARGET)" "$(TESTDATAOUT)\testdata.dat"
 # They are not built by default but need to be built for ICU4J data and for getting the .c source files
 # when updating the Unicode data.
 # Changed in makedata.mak revision 1.117. See Jitterbug 4497.
+# 2010-dec Removed pnames.icu.
 # Command line:
 #   C:\svn\icuproj\icu\trunk\source\data>nmake -f makedata.mak ICUMAKE=C:\svn\icuproj\icu\trunk\source\data\ CFG=x86\Debug uni-core-data
-uni-core-data: GODATA "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu"
+uni-core-data: GODATA "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu"
 	@echo Unicode .icu files built to "$(ICUBLD_PKG)"

 # Build the ICU4J icudata.jar and testdata.jar.
@ -501,7 +502,7 @@ uni-core-data: GODATA "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(IC
 # - package them into the .jar file
 "$(ICUOUT)\icu4j\icudata.jar": GODATA "$(ICUOUT)\$(ICUPKG).dat" uni-core-data
 	if not exist "$(ICUOUT)\icu4j\com\ibm\icu\impl\data\$(U_ICUDATA_NAME)b" mkdir "$(ICUOUT)\icu4j\com\ibm\icu\impl\data\$(U_ICUDATA_NAME)b"
-	echo ubidi.icu ucase.icu uprops.icu > "$(ICUOUT)\icu4j\add.txt"
+	echo pnames.icu ubidi.icu ucase.icu uprops.icu > "$(ICUOUT)\icu4j\add.txt"
 	"$(ICUPBIN)\icupkg" "$(ICUOUT)\$(ICUPKG).dat" "$(ICUOUT)\icu4j\$(U_ICUDATA_NAME)b.dat" -a "$(ICUOUT)\icu4j\add.txt" -s "$(ICUBLD_PKG)" -x * -tb -d "$(ICUOUT)\icu4j\com\ibm\icu\impl\data\$(U_ICUDATA_NAME)b"
 	"$(JAR)" cf "$(ICUOUT)\icu4j\icudata.jar" -C "$(ICUOUT)\icu4j" com\ibm\icu\impl\data\$(U_ICUDATA_NAME)b

@ -586,11 +587,10 @@ icu4j-data-install :
 	copy "$(ICUTMP)\$(ICUPKG).dat" "$(ICUOUT)\$(U_ICUDATA_NAME)$(U_ICUDATA_ENDIAN_SUFFIX).dat"
 	-@erase "$(ICUTMP)\$(ICUPKG).dat"
 !ELSE
-"$(ICU_LIB_TARGET)" : $(COMMON_ICUDATA_DEPENDENCIES) $(CNV_FILES) $(CNV_FILES_SPECIAL) "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\cnvalias.icu" "$(ICUBLD_PKG)\nfc.nrm" "$(ICUBLD_PKG)\nfkc.nrm" "$(ICUBLD_PKG)\nfkc_cf.nrm" "$(ICUBLD_PKG)\uts46.nrm" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu" "$(ICUBLD_PKG)\$(ICUCOL)\invuca.icu" $(CURR_RES_FILES) $(LANG_RES_FILES) $(REGION_RES_FILES) $(ZONE_RES_FILES) $(BRK_FILES) $(BRK_CTD_FILES) $(BRK_RES_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES) $(ALL_RES) $(SPREP_FILES) "$(ICUBLD_PKG)\confusables.cfu"
+"$(ICU_LIB_TARGET)" : $(COMMON_ICUDATA_DEPENDENCIES) $(CNV_FILES) $(CNV_FILES_SPECIAL) "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\cnvalias.icu" "$(ICUBLD_PKG)\nfc.nrm" "$(ICUBLD_PKG)\nfkc.nrm" "$(ICUBLD_PKG)\nfkc_cf.nrm" "$(ICUBLD_PKG)\uts46.nrm" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu" "$(ICUBLD_PKG)\$(ICUCOL)\invuca.icu" $(CURR_RES_FILES) $(LANG_RES_FILES) $(REGION_RES_FILES) $(ZONE_RES_FILES) $(BRK_FILES) $(BRK_CTD_FILES) $(BRK_RES_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(TRANSLIT_RES_FILES) $(ALL_RES) $(SPREP_FILES) "$(ICUBLD_PKG)\confusables.cfu"
 	@echo Building icu data
 	cd "$(ICUBLD_PKG)"
 	"$(ICUPBIN)\pkgdata" $(COMMON_ICUDATA_ARGUMENTS) <<"$(ICUTMP)\icudata.lst"
-pnames.icu
 unames.icu
 confusables.cfu
 $(ICUCOL)\ucadata.icu
@ -985,9 +985,8 @@ $(UCM_SOURCE_SPECIAL): {"$(ICUTOOLS)\makeconv\$(CFG)"}makeconv.exe
 # See Jitterbug 4497 for details.
 $(MISC_SOURCE) $(RB_FILES) $(CURR_FILES) $(LANG_FILES) $(REGION_FILES) $(ZONE_FILES) $(COL_COL_FILES) $(RBNF_RES_FILES) $(BRK_RES_FILES) $(TRANSLIT_RES_FILES): {"$(ICUTOOLS)\genrb\$(CFG)"}genrb.exe "$(ICUBLD_PKG)\nfc.nrm" "$(ICUBLD_PKG)\$(ICUCOL)\ucadata.icu"

-# This used to depend on "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu"
-# This data is now hard coded as a part of the library.
-# See Jitterbug 4497 for details.
-$(BRK_SOURCE) : "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\nfc.nrm"
+# This used to depend on "$(ICUBLD_PKG)\pnames.icu" "$(ICUBLD_PKG)\uprops.icu" "$(ICUBLD_PKG)\ucase.icu" "$(ICUBLD_PKG)\ubidi.icu"
+# These are now hardcoded in ICU4C and only loaded in ICU4J.
+$(BRK_SOURCE) : "$(ICUBLD_PKG)\unames.icu" "$(ICUBLD_PKG)\nfc.nrm"
 !ENDIF

--- a/icu4c/source/data/unidata/changes.txt
+++ b/icu4c/source/data/unidata/changes.txt
@ -13,6 +13,20 @@

 ---------------------------------------------------------------------------- ***

+Unicode 6.1 update
+
+(TODO: Copy and adjust most of the 6.0 update instructions,
+ except retain this following section in this new form.
+ So far, this just documents the new procedure for building the property names data.)
+
+* run genpname
+  (builds both pnames.icu and propname_data.h)
+- ~/svn.icu/tools/trunk/bld/unicode$ c/genpname/genpname -v -d ~/svn.icu/trunk/src/source/data/in
+- ~/svn.icu/tools/trunk/bld/unicode$ c/genpname/genpname -v -d ~/svn.icu/trunk/src/source/common --csource
+- rebuild ICU & tools
+
+---------------------------------------------------------------------------- ***
+
 Unicode 6.0 update

 *** related ICU Trac tickets
--- a/icu4c/source/test/cintltst/udatatst.c
+++ b/icu4c/source/test/cintltst/udatatst.c
@ -52,7 +52,6 @@
 #include "ucol_swp.h"
 #include "ucnv_bld.h"
 #include "sprpimpl.h"
-#include "propname.h"
 #include "rbbidata.h"

 /* swapping implementation in i18n */
@ -1310,10 +1309,16 @@ static const struct {
    {"thaidict",                 "ctd", triedict_swap},
 #endif

-    /* the last item should not be #if'ed so that it can reliably omit the last comma */
-
+#if 0
+    /*
+     * Starting with ICU 4.8, the Unicode property (value) aliases data
+     * is hardcoded in the ICU4C common library.
+     * The swapper was moved to the toolutil library for swapping for ICU4J.
+     */
    /* Unicode properties */
    {"pnames",                   "icu", upname_swap},
+#endif
+
 #if 0
    /*
     * Starting with ICU4C 3.4, the core Unicode properties files
@ -1336,6 +1341,7 @@ static const struct {
    {"confusables",              "cfu", uspoof_swap},
 #endif
    {"unames",                   "icu", uchar_swapNames}
+    /* the last item should not be #if'ed so that it can reliably omit the last comma */
 };

 /* Large enough for the largest swappable data item. */
@ -1673,6 +1679,7 @@ TestSwapData() {
        uprv_strcat(name, swapCases[i].type);

        pData=udata_open(pkg, swapCases[i].type, nm, &errorCode);
+
        if(U_SUCCESS(errorCode)) {
            TestSwapCase(pData, name, swapCases[i].swapFn, buffer, buffer+SWAP_BUFFER_SIZE);
            udata_close(pData);
--- a/icu4c/source/test/intltest/Makefile.in
+++ b/icu4c/source/test/intltest/Makefile.in
@ -50,6 +50,7 @@ sdtfmtts.o svccoll.o tchcfmt.o	selfmts.o \
 tfsmalls.o tmsgfmt.o trcoll.o tscoll.o tsdate.o tsdcfmsy.o tsdtfmsy.o	\
 tsmthred.o tsnmfmt.o tsputil.o tstnrapi.o tstnorm.o tzbdtest.o		\
 tzregts.o tztest.o ucdtest.o usettest.o ustrtest.o strcase.o transtst.o strtest.o thcoll.o \
+bytetrietest.o uchartrietest.o \
 itrbbi.o rbbiapts.o rbbitst.o ittrans.o transapi.o cpdtrtst.o \
 testutil.o transrt.o trnserr.o normconf.o sfwdchit.o \
 jamotest.o srchtest.o reptest.o regextst.o \
--- a/icu4c/source/test/intltest/bytetrietest.cpp
+++ b/icu4c/source/test/intltest/bytetrietest.cpp
@ -0,0 +1,843 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  bytetrietest.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010nov16
+*   created by: Markus W. Scherer
+*/
+
+#include <string.h>
+
+#include "unicode/utypes.h"
+#include "unicode/stringpiece.h"
+#include "bytetrie.h"
+#include "bytetriebuilder.h"
+#include "bytetrieiterator.h"
+#include "intltest.h"
+
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+
+struct StringAndValue {
+    const char *s;
+    int32_t value;
+};
+
+class ByteTrieTest : public IntlTest {
+public:
+    ByteTrieTest() {}
+    virtual ~ByteTrieTest();
+
+    void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
+    void TestBuilder();
+    void TestEmpty();
+    void Test_a();
+    void Test_a_ab();
+    void TestShortestBranch();
+    void TestBranches();
+    void TestLongSequence();
+    void TestLongBranch();
+    void TestValuesForState();
+    void TestCompact();
+
+    StringPiece buildMonthsTrie(ByteTrieBuilder &builder, UDictTrieBuildOption buildOption);
+    void TestHasUniqueValue();
+    void TestGetNextBytes();
+    void TestIteratorFromBranch();
+    void TestIteratorFromLinearMatch();
+    void TestTruncatingIteratorFromRoot();
+    void TestTruncatingIteratorFromLinearMatchShort();
+    void TestTruncatingIteratorFromLinearMatchLong();
+
+    void checkData(const StringAndValue data[], int32_t dataLength);
+    void checkData(const StringAndValue data[], int32_t dataLength, UDictTrieBuildOption buildOption);
+    StringPiece buildTrie(const StringAndValue data[], int32_t dataLength,
+                          ByteTrieBuilder &builder, UDictTrieBuildOption buildOption);
+    void checkFirst(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
+    void checkNext(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
+    void checkNextWithState(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
+    void checkNextString(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
+    void checkIterator(const StringPiece &trieBytes, const StringAndValue data[], int32_t dataLength);
+    void checkIterator(ByteTrieIterator &iter, const StringAndValue data[], int32_t dataLength);
+};
+
+extern IntlTest *createByteTrieTest() {
+    return new ByteTrieTest();
+}
+
+ByteTrieTest::~ByteTrieTest() {
+}
+
+void ByteTrieTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
+    if(exec) {
+        logln("TestSuite ByteTrieTest: ");
+    }
+    TESTCASE_AUTO_BEGIN;
+    TESTCASE_AUTO(TestBuilder);
+    TESTCASE_AUTO(TestEmpty);
+    TESTCASE_AUTO(Test_a);
+    TESTCASE_AUTO(Test_a_ab);
+    TESTCASE_AUTO(TestShortestBranch);
+    TESTCASE_AUTO(TestBranches);
+    TESTCASE_AUTO(TestLongSequence);
+    TESTCASE_AUTO(TestLongBranch);
+    TESTCASE_AUTO(TestValuesForState);
+    TESTCASE_AUTO(TestCompact);
+    TESTCASE_AUTO(TestHasUniqueValue);
+    TESTCASE_AUTO(TestGetNextBytes);
+    TESTCASE_AUTO(TestIteratorFromBranch);
+    TESTCASE_AUTO(TestIteratorFromLinearMatch);
+    TESTCASE_AUTO(TestTruncatingIteratorFromRoot);
+    TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchShort);
+    TESTCASE_AUTO(TestTruncatingIteratorFromLinearMatchLong);
+    TESTCASE_AUTO_END;
+}
+
+void ByteTrieTest::TestBuilder() {
+    IcuTestErrorCode errorCode(*this, "TestBuilder()");
+    ByteTrieBuilder builder;
+    builder.build(UDICTTRIE_BUILD_FAST, errorCode);
+    if(errorCode.reset()!=U_INDEX_OUTOFBOUNDS_ERROR) {
+        errln("ByteTrieBuilder().build() did not set U_INDEX_OUTOFBOUNDS_ERROR");
+        return;
+    }
+    builder.add("=", 0, errorCode).add("=", 1, errorCode).build(UDICTTRIE_BUILD_FAST, errorCode);
+    if(errorCode.reset()!=U_ILLEGAL_ARGUMENT_ERROR) {
+        errln("ByteTrieBuilder.build() did not detect duplicates");
+        return;
+    }
+}
+
+void ByteTrieTest::TestEmpty() {
+    static const StringAndValue data[]={
+        { "", 0 }
+    };
+    checkData(data, LENGTHOF(data));
+}
+
+void ByteTrieTest::Test_a() {
+    static const StringAndValue data[]={
+        { "a", 1 }
+    };
+    checkData(data, LENGTHOF(data));
+}
+
+void ByteTrieTest::Test_a_ab() {
+    static const StringAndValue data[]={
+        { "a", 1 },
+        { "ab", 100 }
+    };
+    checkData(data, LENGTHOF(data));
+}
+
+void ByteTrieTest::TestShortestBranch() {
+    static const StringAndValue data[]={
+        { "a", 1000 },
+        { "b", 2000 }
+    };
+    checkData(data, LENGTHOF(data));
+}
+
+void ByteTrieTest::TestBranches() {
+    static const StringAndValue data[]={
+        { "a", 0x10 },
+        { "cc", 0x40 },
+        { "e", 0x100 },
+        { "ggg", 0x400 },
+        { "i", 0x1000 },
+        { "kkkk", 0x4000 },
+        { "n", 0x10000 },
+        { "ppppp", 0x40000 },
+        { "r", 0x100000 },
+        { "sss", 0x200000 },
+        { "t", 0x400000 },
+        { "uu", 0x800000 },
+        { "vv", 0x7fffffff },
+        { "zz", 0x80000000 }
+    };
+    for(int32_t length=2; length<=LENGTHOF(data); ++length) {
+        infoln("TestBranches length=%d", (int)length);
+        checkData(data, length);
+    }
+}
+
+void ByteTrieTest::TestLongSequence() {
+    static const StringAndValue data[]={
+        { "a", -1 },
+        // sequence of linear-match nodes
+        { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -2 },
+        // more than 256 bytes
+        { "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+          "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", -3 }
+    };
+    checkData(data, LENGTHOF(data));
+}
+
+void ByteTrieTest::TestLongBranch() {
+    // Split-branch and interesting compact-integer values.
+    static const StringAndValue data[]={
+        { "a", -2 },
+        { "b", -1 },
+        { "c", 0 },
+        { "d2", 1 },
+        { "f", 0x3f },
+        { "g", 0x40 },
+        { "h", 0x41 },
+        { "j23", 0x1900 },
+        { "j24", 0x19ff },
+        { "j25", 0x1a00 },
+        { "k2", 0x1a80 },
+        { "k3", 0x1aff },
+        { "l234567890", 0x1b00 },
+        { "l234567890123", 0x1b01 },
+        { "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn", 0x10ffff },
+        { "oooooooooooooooooooooooooooooooooooooooooooooooooooooo", 0x110000 },
+        { "pppppppppppppppppppppppppppppppppppppppppppppppppppppp", 0x120000 },
+        { "r", 0x333333 },
+        { "s2345", 0x4444444 },
+        { "t234567890", 0x77777777 },
+        { "z", 0x80000001 }
+    };
+    checkData(data, LENGTHOF(data));
+}
+
+void ByteTrieTest::TestValuesForState() {
+    // Check that saveState() and resetToState() interact properly
+    // with next() and current().
+    static const StringAndValue data[]={
+        { "a", -1 },
+        { "ab", -2 },
+        { "abc", -3 },
+        { "abcd", -4 },
+        { "abcde", -5 },
+        { "abcdef", -6 }
+    };
+    checkData(data, LENGTHOF(data));
+}
+
+void ByteTrieTest::TestCompact() {
+    // Duplicate trailing strings and values provide opportunities for compacting.
+    static const StringAndValue data[]={
+        { "+", 0 },
+        { "+august", 8 },
+        { "+december", 12 },
+        { "+july", 7 },
+        { "+june", 6 },
+        { "+november", 11 },
+        { "+october", 10 },
+        { "+september", 9 },
+        { "-", 0 },
+        { "-august", 8 },
+        { "-december", 12 },
+        { "-july", 7 },
+        { "-june", 6 },
+        { "-november", 11 },
+        { "-october", 10 },
+        { "-september", 9 },
+        // The l+n branch (with its sub-nodes) is a duplicate but will be written
+        // both times because each time it follows a different linear-match node.
+        { "xjuly", 7 },
+        { "xjune", 6 }
+    };
+    checkData(data, LENGTHOF(data));
+}
+
+StringPiece ByteTrieTest::buildMonthsTrie(ByteTrieBuilder &builder, UDictTrieBuildOption buildOption) {
+    // All types of nodes leading to the same value,
+    // for code coverage of recursive functions.
+    // In particular, we need a lot of branches on some single level
+    // to exercise a split-branch node.
+    static const StringAndValue data[]={
+        { "august", 8 },
+        { "jan", 1 },
+        { "jan.", 1 },
+        { "jana", 1 },
+        { "janbb", 1 },
+        { "janc", 1 },
+        { "janddd", 1 },
+        { "janee", 1 },
+        { "janef", 1 },
+        { "janf", 1 },
+        { "jangg", 1 },
+        { "janh", 1 },
+        { "janiiii", 1 },
+        { "janj", 1 },
+        { "jankk", 1 },
+        { "jankl", 1 },
+        { "jankmm", 1 },
+        { "janl", 1 },
+        { "janm", 1 },
+        { "jannnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
+        { "jano", 1 },
+        { "janpp", 1 },
+        { "janqqq", 1 },
+        { "janr", 1 },
+        { "januar", 1 },
+        { "january", 1 },
+        { "july", 7 },
+        { "jun", 6 },
+        { "jun.", 6 },
+        { "june", 6 }
+    };
+    return buildTrie(data, LENGTHOF(data), builder, buildOption);
+}
+
+void ByteTrieTest::TestHasUniqueValue() {
+    ByteTrieBuilder builder;
+    StringPiece sp=buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST);
+    if(sp.empty()) {
+        return;  // buildTrie() reported an error
+    }
+    ByteTrie trie(sp.data());
+    int32_t uniqueValue;
+    if(trie.hasUniqueValue(uniqueValue)) {
+        errln("unique value at root");
+    }
+    trie.next('j');
+    trie.next('a');
+    trie.next('n');
+    // hasUniqueValue() directly after next()
+    if(!trie.hasUniqueValue(uniqueValue) || uniqueValue!=1) {
+        errln("not unique value 1 after \"jan\"");
+    }
+    trie.first('j');
+    trie.next('u');
+    if(trie.hasUniqueValue(uniqueValue)) {
+        errln("unique value after \"ju\"");
+    }
+    if(trie.next('n')!=UDICTTRIE_HAS_VALUE || 6!=trie.getValue()) {
+        errln("not normal value 6 after \"jun\"");
+    }
+    // hasUniqueValue() after getValue()
+    if(!trie.hasUniqueValue(uniqueValue) || uniqueValue!=6) {
+        errln("not unique value 6 after \"jun\"");
+    }
+    // hasUniqueValue() from within a linear-match node
+    trie.first('a');
+    trie.next('u');
+    if(!trie.hasUniqueValue(uniqueValue) || uniqueValue!=8) {
+        errln("not unique value 8 after \"au\"");
+    }
+}
+
+void ByteTrieTest::TestGetNextBytes() {
+    ByteTrieBuilder builder;
+    StringPiece sp=buildMonthsTrie(builder, UDICTTRIE_BUILD_SMALL);
+    if(sp.empty()) {
+        return;  // buildTrie() reported an error
+    }
+    ByteTrie trie(sp.data());
+    char buffer[40];
+    CheckedArrayByteSink sink(buffer, LENGTHOF(buffer));
+    int32_t count=trie.getNextBytes(sink);
+    if(count!=2 || sink.NumberOfBytesAppended()!=2 || buffer[0]!='a' || buffer[1]!='j') {
+        errln("months getNextBytes()!=[aj] at root");
+    }
+    trie.next('j');
+    trie.next('a');
+    trie.next('n');
+    // getNextBytes() directly after next()
+    count=trie.getNextBytes(sink.Reset());
+    buffer[count]=0;
+    if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
+        errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"");
+    }
+    // getNextBytes() after getValue()
+    trie.getValue();  // next() had returned UDICTTRIE_HAS_VALUE.
+    memset(buffer, 0, sizeof(buffer));
+    count=trie.getNextBytes(sink.Reset());
+    if(count!=20 || sink.NumberOfBytesAppended()!=20 || 0!=strcmp(buffer, ".abcdefghijklmnopqru")) {
+        errln("months getNextBytes()!=[.abcdefghijklmnopqru] after \"jan\"+getValue()");
+    }
+    // getNextBytes() from a linear-match node
+    trie.next('u');
+    memset(buffer, 0, sizeof(buffer));
+    count=trie.getNextBytes(sink.Reset());
+    if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='a') {
+        errln("months getNextBytes()!=[a] after \"janu\"");
+    }
+    trie.next('a');
+    memset(buffer, 0, sizeof(buffer));
+    count=trie.getNextBytes(sink.Reset());
+    if(count!=1 || sink.NumberOfBytesAppended()!=1 || buffer[0]!='r') {
+        errln("months getNextBytes()!=[r] after \"janua\"");
+    }
+    trie.next('r');
+    trie.next('y');
+    // getNextBytes() after a final match
+    count=trie.getNextBytes(sink.Reset());
+    if(count!=0 || sink.NumberOfBytesAppended()!=0) {
+        errln("months getNextBytes()!=[] after \"january\"");
+    }
+}
+
+void ByteTrieTest::TestIteratorFromBranch() {
+    ByteTrieBuilder builder;
+    StringPiece sp=buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST);
+    if(sp.empty()) {
+        return;  // buildTrie() reported an error
+    }
+    ByteTrie trie(sp.data());
+    // Go to a branch node.
+    trie.next('j');
+    trie.next('a');
+    trie.next('n');
+    IcuTestErrorCode errorCode(*this, "TestIteratorFromBranch()");
+    ByteTrieIterator iter(trie, 0, errorCode);
+    if(errorCode.logIfFailureAndReset("ByteTrieIterator(trie) constructor")) {
+        return;
+    }
+    // Expected data: Same as in buildMonthsTrie(), except only the suffixes
+    // following "jan".
+    static const StringAndValue data[]={
+        { "", 1 },
+        { ".", 1 },
+        { "a", 1 },
+        { "bb", 1 },
+        { "c", 1 },
+        { "ddd", 1 },
+        { "ee", 1 },
+        { "ef", 1 },
+        { "f", 1 },
+        { "gg", 1 },
+        { "h", 1 },
+        { "iiii", 1 },
+        { "j", 1 },
+        { "kk", 1 },
+        { "kl", 1 },
+        { "kmm", 1 },
+        { "l", 1 },
+        { "m", 1 },
+        { "nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1 },
+        { "o", 1 },
+        { "pp", 1 },
+        { "qqq", 1 },
+        { "r", 1 },
+        { "uar", 1 },
+        { "uary", 1 }
+    };
+    checkIterator(iter, data, LENGTHOF(data));
+    // Reset, and we should get the same result.
+    logln("after iter.reset()");
+    checkIterator(iter.reset(), data, LENGTHOF(data));
+}
+
+void ByteTrieTest::TestIteratorFromLinearMatch() {
+    ByteTrieBuilder builder;
+    StringPiece sp=buildMonthsTrie(builder, UDICTTRIE_BUILD_SMALL);
+    if(sp.empty()) {
+        return;  // buildTrie() reported an error
+    }
+    ByteTrie trie(sp.data());
+    // Go into a linear-match node.
+    trie.next('j');
+    trie.next('a');
+    trie.next('n');
+    trie.next('u');
+    trie.next('a');
+    IcuTestErrorCode errorCode(*this, "TestIteratorFromLinearMatch()");
+    ByteTrieIterator iter(trie, 0, errorCode);
+    if(errorCode.logIfFailureAndReset("ByteTrieIterator(trie) constructor")) {
+        return;
+    }
+    // Expected data: Same as in buildMonthsTrie(), except only the suffixes
+    // following "janua".
+    static const StringAndValue data[]={
+        { "r", 1 },
+        { "ry", 1 }
+    };
+    checkIterator(iter, data, LENGTHOF(data));
+    // Reset, and we should get the same result.
+    logln("after iter.reset()");
+    checkIterator(iter.reset(), data, LENGTHOF(data));
+}
+
+void ByteTrieTest::TestTruncatingIteratorFromRoot() {
+    ByteTrieBuilder builder;
+    StringPiece sp=buildMonthsTrie(builder, UDICTTRIE_BUILD_FAST);
+    if(sp.empty()) {
+        return;  // buildTrie() reported an error
+    }
+    IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromRoot()");
+    ByteTrieIterator iter(sp.data(), 4, errorCode);
+    if(errorCode.logIfFailureAndReset("ByteTrieIterator(trie) constructor")) {
+        return;
+    }
+    // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
+    // of each string, and no string duplicates from the truncation.
+    static const StringAndValue data[]={
+        { "augu", -1 },
+        { "jan", 1 },
+        { "jan.", 1 },
+        { "jana", 1 },
+        { "janb", -1 },
+        { "janc", 1 },
+        { "jand", -1 },
+        { "jane", -1 },
+        { "janf", 1 },
+        { "jang", -1 },
+        { "janh", 1 },
+        { "jani", -1 },
+        { "janj", 1 },
+        { "jank", -1 },
+        { "janl", 1 },
+        { "janm", 1 },
+        { "jann", -1 },
+        { "jano", 1 },
+        { "janp", -1 },
+        { "janq", -1 },
+        { "janr", 1 },
+        { "janu", -1 },
+        { "july", 7 },
+        { "jun", 6 },
+        { "jun.", 6 },
+        { "june", 6 }
+    };
+    checkIterator(iter, data, LENGTHOF(data));
+    // Reset, and we should get the same result.
+    logln("after iter.reset()");
+    checkIterator(iter.reset(), data, LENGTHOF(data));
+}
+
+void ByteTrieTest::TestTruncatingIteratorFromLinearMatchShort() {
+    static const StringAndValue data[]={
+        { "abcdef", 10 },
+        { "abcdepq", 200 },
+        { "abcdeyz", 3000 }
+    };
+    ByteTrieBuilder builder;
+    StringPiece sp=buildTrie(data, LENGTHOF(data), builder, UDICTTRIE_BUILD_FAST);
+    if(sp.empty()) {
+        return;  // buildTrie() reported an error
+    }
+    ByteTrie trie(sp.data());
+    // Go into a linear-match node.
+    trie.next('a');
+    trie.next('b');
+    IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchShort()");
+    // Truncate within the linear-match node.
+    ByteTrieIterator iter(trie, 2, errorCode);
+    if(errorCode.logIfFailureAndReset("ByteTrieIterator(trie) constructor")) {
+        return;
+    }
+    static const StringAndValue expected[]={
+        { "cd", -1 }
+    };
+    checkIterator(iter, expected, LENGTHOF(expected));
+    // Reset, and we should get the same result.
+    logln("after iter.reset()");
+    checkIterator(iter.reset(), expected, LENGTHOF(expected));
+}
+
+void ByteTrieTest::TestTruncatingIteratorFromLinearMatchLong() {
+    static const StringAndValue data[]={
+        { "abcdef", 10 },
+        { "abcdepq", 200 },
+        { "abcdeyz", 3000 }
+    };
+    ByteTrieBuilder builder;
+    StringPiece sp=buildTrie(data, LENGTHOF(data), builder, UDICTTRIE_BUILD_FAST);
+    if(sp.empty()) {
+        return;  // buildTrie() reported an error
+    }
+    ByteTrie trie(sp.data());
+    // Go into a linear-match node.
+    trie.next('a');
+    trie.next('b');
+    trie.next('c');
+    IcuTestErrorCode errorCode(*this, "TestTruncatingIteratorFromLinearMatchLong()");
+    // Truncate after the linear-match node.
+    ByteTrieIterator iter(trie, 3, errorCode);
+    if(errorCode.logIfFailureAndReset("ByteTrieIterator(trie) constructor")) {
+        return;
+    }
+    static const StringAndValue expected[]={
+        { "def", 10 },
+        { "dep", -1 },
+        { "dey", -1 }
+    };
+    checkIterator(iter, expected, LENGTHOF(expected));
+    // Reset, and we should get the same result.
+    logln("after iter.reset()");
+    checkIterator(iter.reset(), expected, LENGTHOF(expected));
+}
+
+void ByteTrieTest::checkData(const StringAndValue data[], int32_t dataLength) {
+    logln("checkData(dataLength=%d, fast)", (int)dataLength);
+    checkData(data, dataLength, UDICTTRIE_BUILD_FAST);
+    logln("checkData(dataLength=%d, small)", (int)dataLength);
+    checkData(data, dataLength, UDICTTRIE_BUILD_SMALL);
+}
+
+void ByteTrieTest::checkData(const StringAndValue data[], int32_t dataLength, UDictTrieBuildOption buildOption) {
+    ByteTrieBuilder builder;
+    StringPiece sp=buildTrie(data, dataLength, builder, buildOption);
+    if(sp.empty()) {
+        return;  // buildTrie() reported an error
+    }
+    checkFirst(sp, data, dataLength);
+    checkNext(sp, data, dataLength);
+    checkNextWithState(sp, data, dataLength);
+    checkNextString(sp, data, dataLength);
+    checkIterator(sp, data, dataLength);
+}
+
+StringPiece ByteTrieTest::buildTrie(const StringAndValue data[], int32_t dataLength,
+                                    ByteTrieBuilder &builder, UDictTrieBuildOption buildOption) {
+    IcuTestErrorCode errorCode(*this, "buildTrie()");
+    // Add the items to the trie builder in an interesting (not trivial, not random) order.
+    int32_t index, step;
+    if(dataLength&1) {
+        // Odd number of items.
+        index=dataLength/2;
+        step=2;
+    } else if((dataLength%3)!=0) {
+        // Not a multiple of 3.
+        index=dataLength/5;
+        step=3;
+    } else {
+        index=dataLength-1;
+        step=-1;
+    }
+    builder.clear();
+    for(int32_t i=0; i<dataLength; ++i) {
+        builder.add(data[index].s, data[index].value, errorCode);
+        index=(index+step)%dataLength;
+    }
+    StringPiece sp(builder.build(buildOption, errorCode));
+    if(!errorCode.logIfFailureAndReset("add()/build()")) {
+        builder.add("zzz", 999, errorCode);
+        if(errorCode.reset()!=U_NO_WRITE_PERMISSION) {
+            errln("builder.build().add(zzz) did not set U_NO_WRITE_PERMISSION");
+        }
+    }
+    logln("serialized trie size: %ld bytes\n", (long)sp.length());
+    return sp;
+}
+
+void ByteTrieTest::checkFirst(const StringPiece &trieBytes,
+                              const StringAndValue data[], int32_t dataLength) {
+    ByteTrie trie(trieBytes.data());
+    for(int32_t i=0; i<dataLength; ++i) {
+        int c=(uint8_t)*data[i].s;
+        if(c==0) {
+            continue;  // skip empty string
+        }
+        UDictTrieResult firstResult=trie.first(c);
+        int32_t firstValue=UDICTTRIE_RESULT_HAS_VALUE(firstResult) ? trie.getValue() : -1;
+        UDictTrieResult nextResult=trie.next((uint8_t)data[i].s[1]);
+        if(firstResult!=trie.reset().next(c) ||
+           firstResult!=trie.current() ||
+           firstValue!=(UDICTTRIE_RESULT_HAS_VALUE(firstResult) ? trie.getValue() : -1) ||
+           nextResult!=trie.next((uint8_t)data[i].s[1])
+        ) {
+            errln("trie.first(%c)!=trie.reset().next(same) for %s",
+                  c, data[i].s);
+        }
+    }
+}
+
+void ByteTrieTest::checkNext(const StringPiece &trieBytes,
+                             const StringAndValue data[], int32_t dataLength) {
+    ByteTrie trie(trieBytes.data());
+    ByteTrie::State state;
+    for(int32_t i=0; i<dataLength; ++i) {
+        int32_t stringLength= (i&1) ? -1 : strlen(data[i].s);
+        UDictTrieResult result;
+        if( !UDICTTRIE_RESULT_HAS_VALUE(result=trie.next(data[i].s, stringLength)) ||
+            result!=trie.current()
+        ) {
+            errln("trie does not seem to contain %s", data[i].s);
+        } else if(trie.getValue()!=data[i].value) {
+            errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
+                  data[i].s,
+                  (long)trie.getValue(), (long)trie.getValue(),
+                  (long)data[i].value, (long)data[i].value);
+        } else if(result!=trie.current() || trie.getValue()!=data[i].value) {
+            errln("trie value for %s changes when repeating current()/getValue()", data[i].s);
+        }
+        trie.reset();
+        stringLength=strlen(data[i].s);
+        result=trie.current();
+        for(int32_t j=0; j<stringLength; ++j) {
+            if(!UDICTTRIE_RESULT_HAS_NEXT(result)) {
+                errln("trie.current()!=hasNext before end of %s (at index %d)", data[i].s, j);
+                break;
+            }
+            if(result==UDICTTRIE_HAS_VALUE) {
+                trie.getValue();
+                if(trie.current()!=UDICTTRIE_HAS_VALUE) {
+                    errln("trie.getValue().current()!=UDICTTRIE_HAS_VALUE before end of %s (at index %d)", data[i].s, j);
+                    break;
+                }
+            }
+            result=trie.next(data[i].s[j]);
+            if(!UDICTTRIE_RESULT_MATCHES(result)) {
+                errln("trie.next()=UDICTTRIE_NO_MATCH before end of %s (at index %d)", data[i].s, j);
+                break;
+            }
+            if(result!=trie.current()) {
+                errln("trie.next()!=following current() before end of %s (at index %d)", data[i].s, j);
+                break;
+            }
+        }
+        if(!UDICTTRIE_RESULT_HAS_VALUE(result)) {
+            errln("trie.next()!=hasValue at the end of %s", data[i].s);
+            continue;
+        }
+        trie.getValue();
+        if(result!=trie.current()) {
+            errln("trie.current() != current()+getValue()+current() after end of %s",
+                  data[i].s);
+        }
+        // Compare the final current() with whether next() can actually continue.
+        trie.saveState(state);
+        UBool nextContinues=FALSE;
+        for(int32_t c=0x20; c<0x7f; ++c) {
+            if(trie.resetToState(state).next(c)) {
+                nextContinues=TRUE;
+                break;
+            }
+        }
+        if((result==UDICTTRIE_HAS_VALUE)!=nextContinues) {
+            errln("(trie.current()==UDICTTRIE_HAS_VALUE) contradicts "
+                  "(trie.next(some UChar)!=UDICTTRIE_NO_MATCH) after end of %s", data[i].s);
+        }
+        trie.reset();
+    }
+}
+
+void ByteTrieTest::checkNextWithState(const StringPiece &trieBytes,
+                                      const StringAndValue data[], int32_t dataLength) {
+    ByteTrie trie(trieBytes.data());
+    ByteTrie::State noState, state;
+    for(int32_t i=0; i<dataLength; ++i) {
+        if((i&1)==0) {
+            // This should have no effect.
+            trie.resetToState(noState);
+        }
+        const char *expectedString=data[i].s;
+        int32_t stringLength=strlen(expectedString);
+        int32_t partialLength=stringLength/3;
+        for(int32_t j=0; j<partialLength; ++j) {
+            if(!UDICTTRIE_RESULT_MATCHES(trie.next(expectedString[j]))) {
+                errln("trie.next()=UDICTTRIE_NO_MATCH for a prefix of %s", data[i].s);
+                return;
+            }
+        }
+        trie.saveState(state);
+        UDictTrieResult resultAtState=trie.current();
+        UDictTrieResult result;
+        int32_t valueAtState=-99;
+        if(UDICTTRIE_RESULT_HAS_VALUE(resultAtState)) {
+            valueAtState=trie.getValue();
+        }
+        result=trie.next(0);  // mismatch
+        if(result!=UDICTTRIE_NO_MATCH || result!=trie.current()) {
+            errln("trie.next(0) matched after part of %s", data[i].s);
+        }
+        if( resultAtState!=trie.resetToState(state).current() ||
+            (UDICTTRIE_RESULT_HAS_VALUE(resultAtState) && valueAtState!=trie.getValue())
+        ) {
+            errln("trie.next(part of %s) changes current()/getValue() after "
+                  "saveState/next(0)/resetToState",
+                  data[i].s);
+        } else if(!UDICTTRIE_RESULT_HAS_VALUE(
+                      result=trie.next(expectedString+partialLength,
+                                       stringLength-partialLength)) ||
+                  result!=trie.current()) {
+            errln("trie.next(rest of %s) does not seem to contain %s after "
+                  "saveState/next(0)/resetToState",
+                  data[i].s);
+        } else if(!UDICTTRIE_RESULT_HAS_VALUE(
+                      result=trie.resetToState(state).
+                                  next(expectedString+partialLength,
+                                       stringLength-partialLength)) ||
+                  result!=trie.current()) {
+            errln("trie does not seem to contain %s after saveState/next(rest)/resetToState",
+                  data[i].s);
+        } else if(trie.getValue()!=data[i].value) {
+            errln("trie value for %s is %ld=0x%lx instead of expected %ld=0x%lx",
+                  data[i].s,
+                  (long)trie.getValue(), (long)trie.getValue(),
+                  (long)data[i].value, (long)data[i].value);
+        }
+        trie.reset();
+    }
+}
+
+// next(string) is also tested in other functions,
+// but here we try to go partway through the string, and then beyond it.
+void ByteTrieTest::checkNextString(const StringPiece &trieBytes,
+                                   const StringAndValue data[], int32_t dataLength) {
+    ByteTrie trie(trieBytes.data());
+    for(int32_t i=0; i<dataLength; ++i) {
+        const char *expectedString=data[i].s;
+        int32_t stringLength=strlen(expectedString);
+        if(!trie.next(expectedString, stringLength/2)) {
+            errln("trie.next(up to middle of string)=UDICTTRIE_NO_MATCH for %s", data[i].s);
+            continue;
+        }
+        // Test that we stop properly at the end of the string.
+        if(trie.next(expectedString+stringLength/2, stringLength+1-stringLength/2)) {
+            errln("trie.next(string+NUL)!=UDICTTRIE_NO_MATCH for %s", data[i].s);
+        }
+        trie.reset();
+    }
+}
+
+void ByteTrieTest::checkIterator(const StringPiece &trieBytes,
+                                 const StringAndValue data[], int32_t dataLength) {
+    IcuTestErrorCode errorCode(*this, "checkIterator()");
+    ByteTrieIterator iter(trieBytes.data(), 0, errorCode);
+    if(errorCode.logIfFailureAndReset("ByteTrieIterator(trieBytes) constructor")) {
+        return;
+    }
+    checkIterator(iter, data, dataLength);
+}
+
+void ByteTrieTest::checkIterator(ByteTrieIterator &iter,
+                                 const StringAndValue data[], int32_t dataLength) {
+    IcuTestErrorCode errorCode(*this, "checkIterator()");
+    for(int32_t i=0; i<dataLength; ++i) {
+        if(!iter.hasNext()) {
+            errln("trie iterator hasNext()=FALSE for item %d: %s", (int)i, data[i].s);
+            break;
+        }
+        UBool hasNext=iter.next(errorCode);
+        if(errorCode.logIfFailureAndReset("trie iterator next() for item %d: %s", (int)i, data[i].s)) {
+            break;
+        }
+        if(!hasNext) {
+            errln("trie iterator next()=FALSE for item %d: %s", (int)i, data[i].s);
+            break;
+        }
+        if(iter.getString()!=StringPiece(data[i].s)) {
+            errln("trie iterator next().getString()=%s but expected %s for item %d",
+                  iter.getString().data(), data[i].s, (int)i);
+        }
+        if(iter.getValue()!=data[i].value) {
+            errln("trie iterator next().getValue()=%ld=0x%lx but expected %ld=0x%lx for item %d: %s",
+                  (long)iter.getValue(), (long)iter.getValue(),
+                  (long)data[i].value, (long)data[i].value,
+                  (int)i, data[i].s);
+        }
+    }
+    if(iter.hasNext()) {
+        errln("trie iterator hasNext()=TRUE after all items");
+    }
+    UBool hasNext=iter.next(errorCode);
+    errorCode.logIfFailureAndReset("trie iterator next() after all items");
+    if(hasNext) {
+        errln("trie iterator next()=TRUE after all items");
+    }
+}
--- a/icu4c/source/test/intltest/intltest.vcxproj
+++ b/icu4c/source/test/intltest/intltest.vcxproj
@ -223,6 +223,8 @@
    </Link>
  </ItemDefinitionGroup>
  <ItemGroup>
+    <ClCompile Include="bytetrietest.cpp" />
+    <ClCompile Include="uchartrietest.cpp" />
    <ClCompile Include="itrbbi.cpp" />
    <ClCompile Include="rbbiapts.cpp" />
    <ClCompile Include="rbbitst.cpp" />
@ -529,4 +531,4 @@
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
-</Project>
+</Project>
--- a/icu4c/source/test/intltest/itutil.cpp
+++ b/icu4c/source/test/intltest/itutil.cpp
@ -29,7 +29,9 @@
 #include "aliastst.h"
 #include "usettest.h"

+extern IntlTest *createByteTrieTest();
 static IntlTest *createLocalPointerTest();
+extern IntlTest *createUCharTrieTest();

 #define CASE(id, test) case id:                               \
                          name = #test;                       \
@ -68,6 +70,22 @@ void IntlTestUtilities::runIndexedTest( int32_t index, UBool exec, const char* &
                callTest(*test, par);
            }
            break;
+        case 17:
+            name = "ByteTrieTest";
+            if (exec) {
+                logln("TestSuite ByteTrieTest---"); logln();
+                LocalPointer<IntlTest> test(createByteTrieTest());
+                callTest(*test, par);
+            }
+            break;
+        case 18:
+            name = "UCharTrieTest";
+            if (exec) {
+                logln("TestSuite UCharTrieTest---"); logln();
+                LocalPointer<IntlTest> test(createUCharTrieTest());
+                callTest(*test, par);
+            }
+            break;
        default: name = ""; break; //needed to end loop
    }
 }
--- a/icu4c/source/test/intltest/uchartrietest.cpp
+++ b/icu4c/source/test/intltest/uchartrietest.cpp
--- a/icu4c/source/test/perf/Makefile.in
+++ b/icu4c/source/test/perf/Makefile.in
@ -18,7 +18,7 @@ subdir = test/perf
 ## Files to remove for 'make clean'
 CLEANFILES = *~

-SUBDIRS = collationperf charperf normperf ubrkperf unisetperf usetperf ustrperf utfperf utrie2perf DateFmtPerf
+SUBDIRS = collationperf charperf dicttrieperf normperf ubrkperf unisetperf usetperf ustrperf utfperf utrie2perf DateFmtPerf

 # Subdirs that support 'xperf'
 XSUBDIRS = DateFmtPerf
--- a/icu4c/source/test/perf/dicttrieperf/Makefile.in
+++ b/icu4c/source/test/perf/dicttrieperf/Makefile.in
@ -0,0 +1,79 @@
+## Makefile.in for ICU - test/perf/dicttrieperf
+## Copyright (c) 2001-2010, International Business Machines Corporation and
+## others. All Rights Reserved.
+
+## Source directory information
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+
+top_builddir = ../../..
+
+include $(top_builddir)/icudefs.mk
+
+## Build directory information
+subdir = test/perf/dicttrieperf
+
+## Extra files to remove for 'make clean'
+CLEANFILES = *~ $(DEPS)
+
+## Target information
+TARGET = dicttrieperf
+
+CPPFLAGS += -I$(top_builddir)/common -I$(top_srcdir)/common -I$(top_srcdir)/tools/toolutil -I$(top_srcdir)/tools/ctestfw
+LIBS = $(LIBCTESTFW) $(LIBICUI18N) $(LIBICUUC) $(LIBICUTOOLUTIL) $(DEFAULT_LIBS) $(LIB_M)
+
+OBJECTS = dicttrieperf.o
+
+DEPS = $(OBJECTS:.o=.d)
+
+## List of phony targets
+.PHONY : all all-local install install-local clean clean-local	\
+distclean distclean-local dist dist-local check check-local
+
+## Clear suffix list
+.SUFFIXES :
+
+## List of standard targets
+all: all-local
+install: install-local
+clean: clean-local
+distclean : distclean-local
+dist: dist-local
+check: all check-local
+
+all-local: $(TARGET)
+
+install-local:
+
+dist-local:
+
+clean-local:
+	test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
+	$(RMV) $(OBJECTS) $(TARGET)
+
+distclean-local: clean-local
+	$(RMV) Makefile
+
+check-local: all-local
+
+Makefile: $(srcdir)/Makefile.in  $(top_builddir)/config.status
+	cd $(top_builddir) \
+	 && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+$(TARGET) : $(OBJECTS)
+	$(LINK.cc) -o $@ $^ $(LIBS)
+	$(POST_BUILD_STEP)
+
+invoke:
+	ICU_DATA=$${ICU_DATA:-$(top_builddir)/data/} TZ=PST8PDT $(INVOKE) $(INVOCATION)
+
+ifeq (,$(MAKECMDGOALS))
+-include $(DEPS)
+else
+ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
+ifneq ($(patsubst %install,,$(MAKECMDGOALS)),)
+-include $(DEPS)
+endif
+endif
+endif
+
--- a/icu4c/source/test/perf/dicttrieperf/dicttrieperf.cpp
+++ b/icu4c/source/test/perf/dicttrieperf/dicttrieperf.cpp
@ -0,0 +1,766 @@
+/*  
+ **********************************************************************
+ *   Copyright (C) 2002-2010, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ *  file name:  dicttrieperf.cpp
+ *  encoding:   US-ASCII
+ *  tab size:   8 (not used)
+ *  indentation:4
+ *
+ *  created on: 2010dec09
+ *  created by: Markus W. Scherer
+ *
+ *  Performance test program for dictionary-type tries.
+ *
+ * Usage from within <ICU build tree>/test/perf/dicttrieperf/ :
+ * (Linux)
+ *  make
+ *  export LD_LIBRARY_PATH=../../../lib:../../../stubdata:../../../tools/ctestfw
+ *  ./dicttrieperf --sourcedir <ICU build tree>/data/out/tmp --passes 3 --iterations 1000
+ * or
+ *  ./dicttrieperf -f <ICU source tree>/source/data/brkitr/thaidict.txt --passes 3 --iterations 250
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "unicode/uperf.h"
+#include "unicode/utext.h"
+#include "bytetrie.h"
+#include "bytetriebuilder.h"
+#include "charstr.h"
+#include "package.h"
+#include "toolutil.h"
+#include "triedict.h"
+#include "ucbuf.h"  // struct ULine
+#include "uchartrie.h"
+#include "uchartriebuilder.h"
+#include "uoptions.h"
+#include "uvectr32.h"
+
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+
+// Test object.
+class DictionaryTriePerfTest : public UPerfTest {
+public:
+    DictionaryTriePerfTest(int32_t argc, const char *argv[], UErrorCode &status)
+            : UPerfTest(argc, argv, NULL, 0, "", status), numTextLines(0) {
+        if(hasFile()) {
+            getLines(status);
+            for(int32_t i=0; i<numLines; ++i) {
+                // Skip comment lines (start with a character below 'A').
+                if(lines[i].name[0]>=0x41) {
+                    ++numTextLines;
+                    // Remove trailing CR LF.
+                    int32_t len=lines[i].len;
+                    UChar c;
+                    while(len>0 && ((c=lines[i].name[len-1])==0xa || c==0xd)) {
+                        --len;
+                    }
+                    lines[i].len=len;
+                }
+            }
+        }
+    }
+
+    virtual UPerfFunction *runIndexedTest(int32_t index, UBool exec, const char *&name, char *par=NULL);
+
+    const char *getSourceDir() const { return sourceDir; }
+
+    UBool hasFile() const { return ucharBuf!=NULL; }
+    const ULine *getCachedLines() const { return lines; }
+    int32_t getNumLines() const { return numLines; }
+    int32_t numTextLines;  // excluding comment lines
+};
+
+// Performance test function object.
+// Loads icudt46l.dat (or whatever its current versioned filename)
+// from the -s or --sourcedir path.
+class PackageLookup : public UPerfFunction {
+protected:
+    PackageLookup(const DictionaryTriePerfTest &perf) {
+        IcuToolErrorCode errorCode("PackageLookup()");
+        CharString filename(perf.getSourceDir(), errorCode);
+        int32_t filenameLength=filename.length();
+        if(filenameLength>0 && filename[filenameLength-1]!=U_FILE_SEP_CHAR &&
+                               filename[filenameLength-1]!=U_FILE_ALT_SEP_CHAR) {
+            filename.append(U_FILE_SEP_CHAR, errorCode);
+        }
+        filename.append(U_ICUDATA_NAME, errorCode);
+        filename.append(".dat", errorCode);
+        pkg.readPackage(filename.data());
+    }
+
+public:
+    virtual ~PackageLookup() {}
+
+    // virtual void call(UErrorCode* pErrorCode) { ... }
+
+    virtual long getOperationsPerIteration() {
+        return pkg.getItemCount();
+    }
+
+    // virtual long getEventsPerIteration();
+
+protected:
+    Package pkg;
+};
+
+struct TOCEntry {
+    int32_t nameOffset, dataOffset;
+};
+
+// Similar to ICU 4.6 offsetTOCLookupFn() (in ucmndata.c).
+static int32_t simpleBinarySearch(const char *s, const char *names, const TOCEntry *toc, int32_t count) {
+    int32_t start=0;
+    int32_t limit=count;
+    int32_t lastNumber=limit;
+    for(;;) {
+        int32_t number=(start+limit)/2;
+        if(lastNumber==number) {  // have we moved?
+            return -1;  // not found
+        }
+        lastNumber=number;
+        int32_t cmp=strcmp(s, names+toc[number].nameOffset);
+        if(cmp<0) {
+            limit=number;
+        } else if(cmp>0) {
+            start=number;
+        } else {  // found s
+            return number;
+        }
+    }
+}
+
+class BinarySearchPackageLookup : public PackageLookup {
+public:
+    BinarySearchPackageLookup(const DictionaryTriePerfTest &perf)
+            : PackageLookup(perf) {
+        IcuToolErrorCode errorCode("BinarySearchPackageLookup()");
+        int32_t count=pkg.getItemCount();
+        toc=new TOCEntry[count];
+        for(int32_t i=0; i<count; ++i) {
+            toc[i].nameOffset=itemNames.length();
+            toc[i].dataOffset=i;  // arbitrary value, see toc comment below
+            // The Package class removes the "icudt46l/" prefix.
+            // We restore that here for a fair performance test.
+            const char *name=pkg.getItem(i)->name;
+            itemNames.append("icudt46l/", errorCode);
+            itemNames.append(name, strlen(name)+1, errorCode);
+        }
+        printf("size of item names: %6ld\n", (long)itemNames.length());
+        printf("size of TOC:        %6ld\n", (long)(count*8));
+        printf("total index size:   %6ld\n", (long)(itemNames.length()+count*8));
+    }
+    virtual ~BinarySearchPackageLookup() {
+        delete[] toc;
+    }
+
+    virtual void call(UErrorCode * /*pErrorCode*/) {
+        int32_t count=pkg.getItemCount();
+        const char *itemNameChars=itemNames.data();
+        const char *name=itemNameChars;
+        for(int32_t i=0; i<count; ++i) {
+            if(simpleBinarySearch(name, itemNameChars, toc, count)<0) {
+                fprintf(stderr, "item not found: %s\n", name);
+            }
+            name=strchr(name, 0)+1;
+        }
+    }
+
+protected:
+    CharString itemNames;
+    // toc imitates a .dat file's array of UDataOffsetTOCEntry
+    // with nameOffset and dataOffset.
+    // We don't need the dataOffsets, but we want to imitate the real
+    // memory density, to measure equivalent CPU cache usage.
+    TOCEntry *toc;
+};
+
+#ifndef MIN
+#define MIN(a,b) (((a)<(b)) ? (a) : (b))
+#endif
+
+// Compare strings where we know the shared prefix length,
+// and advance the prefix length as we find that the strings share even more characters.
+static int32_t strcmpAfterPrefix(const char *s1, const char *s2, int32_t &prefixLength) {
+    int32_t pl=prefixLength;
+    s1+=pl;
+    s2+=pl;
+    int32_t cmp=0;
+    for(;;) {
+        int32_t c1=(uint8_t)*s1++;
+        int32_t c2=(uint8_t)*s2++;
+        cmp=c1-c2;
+        if(cmp!=0 || c1==0) {  // different or done
+            break;
+        }
+        ++pl;  // increment shared same-prefix length
+    }
+    prefixLength=pl;
+    return cmp;
+}
+
+static int32_t prefixBinarySearch(const char *s, const char *names, const TOCEntry *toc, int32_t count) {
+    if(count==0) {
+        return -1;
+    }
+    int32_t start=0;
+    int32_t limit=count;
+    // Remember the shared prefix between s, start and limit,
+    // and don't compare that shared prefix again.
+    // The shared prefix should get longer as we narrow the [start, limit[ range.
+    int32_t startPrefixLength=0;
+    int32_t limitPrefixLength=0;
+    // Prime the prefix lengths so that we don't keep prefixLength at 0 until
+    // both the start and limit indexes have moved.
+    // At the same time, we find if s is one of the start and (limit-1) names,
+    // and if not, exclude them from the actual binary search.
+    if(0==strcmpAfterPrefix(s, names+toc[0].nameOffset, startPrefixLength)) {
+        return 0;
+    }
+    ++start;
+    --limit;
+    if(0==strcmpAfterPrefix(s, names+toc[limit].nameOffset, limitPrefixLength)) {
+        return limit;
+    }
+    while(start<limit) {
+        int32_t i=(start+limit)/2;
+        int32_t prefixLength=MIN(startPrefixLength, limitPrefixLength);
+        int32_t cmp=strcmpAfterPrefix(s, names+toc[i].nameOffset, prefixLength);
+        if(cmp<0) {
+            limit=i;
+            limitPrefixLength=prefixLength;
+        } else if(cmp==0) {
+            return i;
+        } else {
+            start=i;
+            startPrefixLength=prefixLength;
+        }
+    }
+    return -1;
+}
+
+class PrefixBinarySearchPackageLookup : public BinarySearchPackageLookup {
+public:
+    PrefixBinarySearchPackageLookup(const DictionaryTriePerfTest &perf)
+            : BinarySearchPackageLookup(perf) {}
+
+    virtual void call(UErrorCode * /*pErrorCode*/) {
+        int32_t count=pkg.getItemCount();
+        const char *itemNameChars=itemNames.data();
+        const char *name=itemNameChars;
+        for(int32_t i=0; i<count; ++i) {
+            if(prefixBinarySearch(name, itemNameChars, toc, count)<0) {
+                fprintf(stderr, "item not found: %s\n", name);
+            }
+            name=strchr(name, 0)+1;
+        }
+    }
+};
+
+static int32_t byteTrieLookup(const char *s, const char *nameTrieBytes) {
+    ByteTrie trie(nameTrieBytes);
+    if(UDICTTRIE_RESULT_HAS_VALUE(trie.next(s, -1))) {
+        return trie.getValue();
+    } else {
+        return -1;
+    }
+}
+
+class ByteTriePackageLookup : public PackageLookup {
+public:
+    ByteTriePackageLookup(const DictionaryTriePerfTest &perf)
+            : PackageLookup(perf) {
+        IcuToolErrorCode errorCode("BinarySearchPackageLookup()");
+        int32_t count=pkg.getItemCount();
+        for(int32_t i=0; i<count; ++i) {
+            // The Package class removes the "icudt46l/" prefix.
+            // We restore that here for a fair performance test.
+            // We store all full names so that we do not have to reconstruct them
+            // in the call() function.
+            const char *name=pkg.getItem(i)->name;
+            int32_t offset=itemNames.length();
+            itemNames.append("icudt46l/", errorCode);
+            itemNames.append(name, -1, errorCode);
+            // As value, set the data item index.
+            // In a real implementation, we would use that to get the
+            // start and limit offset of the data item.
+            StringPiece fullName(itemNames.toStringPiece());
+            fullName.remove_prefix(offset);
+            builder.add(fullName, i, errorCode);
+            // NUL-terminate the name for call() to find the next one.
+            itemNames.append(0, errorCode);
+        }
+        int32_t length=builder.build(UDICTTRIE_BUILD_SMALL, errorCode).length();
+        printf("size of ByteTrie:   %6ld\n", (long)length);
+        // count+1: +1 for the last-item limit offset which we should have always had
+        printf("size of dataOffsets:%6ld\n", (long)((count+1)*4));
+        printf("total index size:   %6ld\n", (long)(length+(count+1)*4));
+    }
+    virtual ~ByteTriePackageLookup() {}
+
+    virtual void call(UErrorCode *pErrorCode) {
+        int32_t count=pkg.getItemCount();
+        const char *nameTrieBytes=builder.build(UDICTTRIE_BUILD_SMALL, *pErrorCode).data();
+        const char *name=itemNames.data();
+        for(int32_t i=0; i<count; ++i) {
+            if(byteTrieLookup(name, nameTrieBytes)<0) {
+                fprintf(stderr, "item not found: %s\n", name);
+            }
+            name=strchr(name, 0)+1;
+        }
+    }
+
+protected:
+    ByteTrieBuilder builder;
+    CharString itemNames;
+};
+
+// Performance test function object.
+// Each subclass loads a dictionary text file
+// from the -s or --sourcedir path plus -f or --file-name.
+// For example, <ICU source dir>/source/data/brkitr/thaidict.txt.
+class DictLookup : public UPerfFunction {
+public:
+    DictLookup(const DictionaryTriePerfTest &perfTest) : perf(perfTest) {}
+
+    virtual long getOperationsPerIteration() {
+        return perf.numTextLines;
+    }
+
+protected:
+    const DictionaryTriePerfTest &perf;
+};
+
+class CompactTrieDictLookup : public DictLookup {
+public:
+    CompactTrieDictLookup(const DictionaryTriePerfTest &perfTest)
+            : DictLookup(perfTest), ctd(NULL) {
+        IcuToolErrorCode errorCode("UCharTrieDictLookup()");
+        // U+0E1C is the median code unit, from
+        // the UCharTrie root node (split-branch node) for thaidict.txt.
+        MutableTrieDictionary builder(0xe1c, errorCode);
+        const ULine *lines=perf.getCachedLines();
+        int32_t numLines=perf.getNumLines();
+        for(int32_t i=0; i<numLines; ++i) {
+            // Skip comment lines (start with a character below 'A').
+            if(lines[i].name[0]<0x41) {
+                continue;
+            }
+            builder.addWord(lines[i].name, lines[i].len, errorCode);
+        }
+        ctd=new CompactTrieDictionary(builder, errorCode);
+        int32_t length=(int32_t)ctd->dataSize();
+        printf("size of CompactTrieDict:    %6ld bytes\n", (long)length);
+    }
+
+    virtual ~CompactTrieDictLookup() {
+        delete ctd;
+    }
+
+    virtual void call(UErrorCode *pErrorCode) {
+        UText text=UTEXT_INITIALIZER;
+        int32_t lengths[20];
+        const ULine *lines=perf.getCachedLines();
+        int32_t numLines=perf.getNumLines();
+        for(int32_t i=0; i<numLines; ++i) {
+            // Skip comment lines (start with a character below 'A').
+            if(lines[i].name[0]<0x41) {
+                continue;
+            }
+            utext_openUChars(&text, lines[i].name, lines[i].len, pErrorCode);
+            int32_t count;
+            ctd->matches(&text, lines[i].len,
+                         lengths, count, LENGTHOF(lengths));
+            if(count==0 || lengths[count-1]!=lines[i].len) {
+                fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
+            }
+        }
+    }
+
+protected:
+    CompactTrieDictionary *ctd;
+};
+
+// Closely imitate CompactTrieDictionary::matches().
+// Note: CompactTrieDictionary::matches() is part of its trie implementation,
+// and while it loops over the text, it knows the current state.
+// By contrast, this implementation uses UCharTrie API functions that have to
+// check the trie state each time and load/store state in the object.
+// (Whether it hasNext() and whether it is in the middle of a linear-match node.)
+static int32_t
+ucharTrieMatches(UCharTrie &trie,
+                 UText *text, int32_t textLimit,
+                 int32_t *lengths, int &count, int limit ) {
+    UChar32 c=utext_next32(text);
+    // Notes:
+    // a) CompactTrieDictionary::matches() does not check for U_SENTINEL.
+    // b) It also ignores non-BMP code points by casting to UChar!
+    if(c<0) {
+        return 0;
+    }
+    // Should be firstForCodePoint() but CompactTrieDictionary
+    // handles only code units.
+    UDictTrieResult result=trie.first(c);
+    int32_t numChars=1;
+    count=0;
+    for(;;) {
+        if(UDICTTRIE_RESULT_HAS_VALUE(result)) {
+            if(count<limit) {
+                // lengths[count++]=(int32_t)utext_getNativeIndex(text);
+                lengths[count++]=numChars;  // CompactTrieDictionary just counts chars too.
+            }
+            if(result==UDICTTRIE_HAS_FINAL_VALUE) {
+                break;
+            }
+        } else if(result==UDICTTRIE_NO_MATCH) {
+            break;
+        }
+        if(numChars>=textLimit) {
+            // Note: Why do we have both a text limit and a UText that knows its length?
+            break;
+        }
+        UChar32 c=utext_next32(text);
+        // Notes:
+        // a) CompactTrieDictionary::matches() does not check for U_SENTINEL.
+        // b) It also ignores non-BMP code points by casting to UChar!
+        if(c<0) {
+            break;
+        }
+        ++numChars;
+        // Should be nextForCodePoint() but CompactTrieDictionary
+        // handles only code units.
+        result=trie.next(c);
+    }
+#if 0
+    // Note: CompactTrieDictionary::matches() comments say that it leaves the UText
+    // after the longest prefix match and returns the number of characters
+    // that were matched.
+    if(index!=lastMatch) {
+        utext_setNativeIndex(text, lastMatch);
+    }
+    return lastMatch-start;
+    // However, it does not do either of these, so I am not trying to
+    // imitate it (or its docs) 100%.
+#endif
+    return numChars;
+}
+
+class UCharTrieDictLookup : public DictLookup {
+public:
+    UCharTrieDictLookup(const DictionaryTriePerfTest &perfTest)
+            : DictLookup(perfTest) {
+        IcuToolErrorCode errorCode("UCharTrieDictLookup()");
+        const ULine *lines=perf.getCachedLines();
+        int32_t numLines=perf.getNumLines();
+        for(int32_t i=0; i<numLines; ++i) {
+            // Skip comment lines (start with a character below 'A').
+            if(lines[i].name[0]<0x41) {
+                continue;
+            }
+            builder.add(UnicodeString(FALSE, lines[i].name, lines[i].len), 0, errorCode);
+        }
+        UnicodeString trieUChars;
+        int32_t length=builder.build(UDICTTRIE_BUILD_SMALL, trieUChars, errorCode).length();
+        printf("size of UCharTrie:          %6ld bytes\n", (long)length*2);
+    }
+
+    virtual ~UCharTrieDictLookup() {}
+
+protected:
+    UCharTrieBuilder builder;
+};
+
+class UCharTrieDictMatches : public UCharTrieDictLookup {
+public:
+    UCharTrieDictMatches(const DictionaryTriePerfTest &perfTest)
+            : UCharTrieDictLookup(perfTest) {}
+
+    virtual void call(UErrorCode *pErrorCode) {
+        UnicodeString uchars;
+        UCharTrie trie(builder.build(UDICTTRIE_BUILD_SMALL, uchars, *pErrorCode).getBuffer());
+        UText text=UTEXT_INITIALIZER;
+        int32_t lengths[20];
+        const ULine *lines=perf.getCachedLines();
+        int32_t numLines=perf.getNumLines();
+        for(int32_t i=0; i<numLines; ++i) {
+            // Skip comment lines (start with a character below 'A').
+            if(lines[i].name[0]<0x41) {
+                continue;
+            }
+            utext_openUChars(&text, lines[i].name, lines[i].len, pErrorCode);
+            int32_t count=0;
+            ucharTrieMatches(trie, &text, lines[i].len,
+                             lengths, count, LENGTHOF(lengths));
+            if(count==0 || lengths[count-1]!=lines[i].len) {
+                fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
+            }
+        }
+    }
+};
+
+class UCharTrieDictContains : public UCharTrieDictLookup {
+public:
+    UCharTrieDictContains(const DictionaryTriePerfTest &perfTest)
+            : UCharTrieDictLookup(perfTest) {}
+
+    virtual void call(UErrorCode *pErrorCode) {
+        UnicodeString uchars;
+        UCharTrie trie(builder.build(UDICTTRIE_BUILD_SMALL, uchars, *pErrorCode).getBuffer());
+        const ULine *lines=perf.getCachedLines();
+        int32_t numLines=perf.getNumLines();
+        for(int32_t i=0; i<numLines; ++i) {
+            // Skip comment lines (start with a character below 'A').
+            if(lines[i].name[0]<0x41) {
+                continue;
+            }
+            if(!UDICTTRIE_RESULT_HAS_VALUE(trie.reset().next(lines[i].name, lines[i].len))) {
+                fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
+            }
+        }
+    }
+};
+
+static inline int32_t thaiCharToByte(UChar32 c) {
+    if(0xe00<=c && c<=0xefe) {
+        return c&0xff;
+    } else if(c==0x2e) {
+        return 0xff;
+    } else {
+        return -1;
+    }
+}
+
+static UBool thaiWordToBytes(const UChar *s, int32_t length,
+                             CharString &str, UErrorCode &errorCode) {
+    for(int32_t i=0; i<length; ++i) {
+        UChar c=s[i];
+        int32_t b=thaiCharToByte(c);
+        if(b>=0) {
+            str.append((char)b, errorCode);
+        } else {
+            fprintf(stderr, "thaiWordToBytes(): unable to encode U+%04X as a byte\n", c);
+            return FALSE;
+        }
+    }
+    return TRUE;
+}
+
+class ByteTrieDictLookup : public DictLookup {
+public:
+    ByteTrieDictLookup(const DictionaryTriePerfTest &perfTest)
+            : DictLookup(perfTest), noDict(FALSE) {
+        IcuToolErrorCode errorCode("ByteTrieDictLookup()");
+        CharString str;
+        const ULine *lines=perf.getCachedLines();
+        int32_t numLines=perf.getNumLines();
+        for(int32_t i=0; i<numLines; ++i) {
+            // Skip comment lines (start with a character below 'A').
+            if(lines[i].name[0]<0x41) {
+                continue;
+            }
+            if(!thaiWordToBytes(lines[i].name, lines[i].len, str.clear(), errorCode)) {
+                fprintf(stderr, "thaiWordToBytes(): failed for word %ld (0-based)\n", (long)i);
+                noDict=TRUE;
+                break;
+            }
+            builder.add(str.toStringPiece(), 0, errorCode);
+        }
+        if(!noDict) {
+            int32_t length=builder.build(UDICTTRIE_BUILD_SMALL, errorCode).length();
+            printf("size of ByteTrie:           %6ld bytes\n", (long)length);
+        }
+    }
+
+    virtual ~ByteTrieDictLookup() {}
+
+protected:
+    ByteTrieBuilder builder;
+    UBool noDict;
+};
+
+static int32_t
+byteTrieMatches(ByteTrie &trie,
+                UText *text, int32_t textLimit,
+                int32_t *lengths, int &count, int limit ) {
+    UChar32 c=utext_next32(text);
+    if(c<0) {
+        return 0;
+    }
+    UDictTrieResult result=trie.first(thaiCharToByte(c));
+    int32_t numChars=1;
+    count=0;
+    for(;;) {
+        if(UDICTTRIE_RESULT_HAS_VALUE(result)) {
+            if(count<limit) {
+                // lengths[count++]=(int32_t)utext_getNativeIndex(text);
+                lengths[count++]=numChars;  // CompactTrieDictionary just counts chars too.
+            }
+            if(result==UDICTTRIE_HAS_FINAL_VALUE) {
+                break;
+            }
+        } else if(result==UDICTTRIE_NO_MATCH) {
+            break;
+        }
+        if(numChars>=textLimit) {
+            break;
+        }
+        UChar32 c=utext_next32(text);
+        if(c<0) {
+            break;
+        }
+        ++numChars;
+        result=trie.next(thaiCharToByte(c));
+    }
+    return numChars;
+}
+
+class ByteTrieDictMatches : public ByteTrieDictLookup {
+public:
+    ByteTrieDictMatches(const DictionaryTriePerfTest &perfTest)
+            : ByteTrieDictLookup(perfTest) {}
+
+    virtual void call(UErrorCode *pErrorCode) {
+        if(noDict) {
+            return;
+        }
+        ByteTrie trie(builder.build(UDICTTRIE_BUILD_SMALL, *pErrorCode).data());
+        UText text=UTEXT_INITIALIZER;
+        int32_t lengths[20];
+        const ULine *lines=perf.getCachedLines();
+        int32_t numLines=perf.getNumLines();
+        for(int32_t i=0; i<numLines; ++i) {
+            // Skip comment lines (start with a character below 'A').
+            if(lines[i].name[0]<0x41) {
+                continue;
+            }
+            utext_openUChars(&text, lines[i].name, lines[i].len, pErrorCode);
+            int32_t count=0;
+            byteTrieMatches(trie, &text, lines[i].len,
+                            lengths, count, LENGTHOF(lengths));
+            if(count==0 || lengths[count-1]!=lines[i].len) {
+                fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
+            }
+        }
+    }
+};
+
+class ByteTrieDictContains : public ByteTrieDictLookup {
+public:
+    ByteTrieDictContains(const DictionaryTriePerfTest &perfTest)
+            : ByteTrieDictLookup(perfTest) {}
+
+    virtual void call(UErrorCode *pErrorCode) {
+        if(noDict) {
+            return;
+        }
+        ByteTrie trie(builder.build(UDICTTRIE_BUILD_SMALL, *pErrorCode).data());
+        const ULine *lines=perf.getCachedLines();
+        int32_t numLines=perf.getNumLines();
+        for(int32_t i=0; i<numLines; ++i) {
+            const UChar *line=lines[i].name;
+            // Skip comment lines (start with a character below 'A').
+            if(line[0]<0x41) {
+                continue;
+            }
+            UDictTrieResult result=trie.first(thaiCharToByte(line[0]));
+            int32_t lineLength=lines[i].len;
+            for(int32_t j=1; j<lineLength; ++j) {
+                if(!UDICTTRIE_RESULT_HAS_NEXT(result)) {
+                    fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
+                    break;
+                }
+                result=trie.next(thaiCharToByte(line[j]));
+            }
+            if(!UDICTTRIE_RESULT_HAS_VALUE(result)) {
+                fprintf(stderr, "word %ld (0-based) not found\n", (long)i);
+            }
+        }
+    }
+};
+
+UPerfFunction *DictionaryTriePerfTest::runIndexedTest(int32_t index, UBool exec,
+                                                      const char *&name, char * /*par*/) {
+    if(hasFile()) {
+        switch(index) {
+        case 0:
+            name="compacttriematches";
+            if(exec) {
+                return new CompactTrieDictLookup(*this);
+            }
+            break;
+        case 1:
+            name="uchartriematches";
+            if(exec) {
+                return new UCharTrieDictMatches(*this);
+            }
+            break;
+        case 2:
+            name="uchartriecontains";
+            if(exec) {
+                return new UCharTrieDictContains(*this);
+            }
+            break;
+        case 3:
+            name="bytetriematches";
+            if(exec) {
+                return new ByteTrieDictMatches(*this);
+            }
+            break;
+        case 4:
+            name="bytetriecontains";
+            if(exec) {
+                return new ByteTrieDictContains(*this);
+            }
+            break;
+        default:
+            name="";
+            break;
+        }
+    } else {
+        if(index==0 && exec) {
+            puts("Running ByteTrie perf tests on the .dat package file from the --sourcedir.\n"
+                 "For UCharTrie perf tests on a dictionary text file, specify the -f or --file-name.\n");
+        }
+        switch(index) {
+        case 0:
+            name="simplebinarysearch";
+            if(exec) {
+                return new BinarySearchPackageLookup(*this);
+            }
+            break;
+        case 1:
+            name="prefixbinarysearch";
+            if(exec) {
+                return new PrefixBinarySearchPackageLookup(*this);
+            }
+            break;
+        case 2:
+            name="bytetrie";
+            if(exec) {
+                return new ByteTriePackageLookup(*this);
+            }
+            break;
+        default:
+            name="";
+            break;
+        }
+    }
+    return NULL;
+}
+
+int main(int argc, const char *argv[]) {
+    IcuToolErrorCode errorCode("dicttrieperf main()");
+    DictionaryTriePerfTest test(argc, argv, errorCode);
+    if(errorCode.isFailure()) {
+        fprintf(stderr, "DictionaryTriePerfTest() failed: %s\n", errorCode.errorName());
+        test.usage();
+        return errorCode.reset();
+    }
+    if(!test.run()) {
+        fprintf(stderr, "FAILED: Tests could not be run, please check the arguments.\n");
+        return -1;
+    }
+    return 0;
+}
--- a/icu4c/source/tools/ctestfw/uperf.cpp
+++ b/icu4c/source/tools/ctestfw/uperf.cpp
@ -1,6 +1,6 @@
 /********************************************************************
 * COPYRIGHT:
- * Copyright (c) 2002-2009, International Business Machines Corporation and
+ * Copyright (c) 2002-2010, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************/

@ -203,6 +203,12 @@ void UPerfTest::init(UOption addOptions[], int32_t addOptionsCount,
 }

 ULine* UPerfTest::getLines(UErrorCode& status){
+    if (U_FAILURE(status)) {
+        return NULL;
+    }
+    if (lines != NULL) {
+        return lines;  // don't do it again
+    }
    lines     = new ULine[MAXLINES];
    int maxLines = MAXLINES;
    numLines=0;
--- a/icu4c/source/tools/toolutil/Makefile.in
+++ b/icu4c/source/tools/toolutil/Makefile.in
@ -52,6 +52,9 @@ LDFLAGS += $(LDFLAGSICUTOOLUTIL)
 LIBS = $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS)

 OBJECTS = filestrm.o package.o pkgitems.o swapimpl.o toolutil.o unewdata.o \
+dicttriebuilder.o bytetriebuilder.o bytetrieiterator.o \
+uchartrie.o uchartriebuilder.o uchartrieiterator.o \
+denseranges.o \
 ucm.o ucmstate.o uoptions.o uparse.o \
 ucbuf.o xmlparser.o writesrc.o \
 pkg_icu.o pkg_genc.o pkg_gencmn.o flagparser.o filetools.o \
--- a/icu4c/source/tools/toolutil/bytetriebuilder.cpp
+++ b/icu4c/source/tools/toolutil/bytetriebuilder.cpp
@ -0,0 +1,755 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  bytetriebuilder.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010sep25
+*   created by: Markus W. Scherer
+*
+* Builder class for ByteTrie dictionary trie.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/stringpiece.h"
+#include "bytetrie.h"
+#include "bytetriebuilder.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "uarrsort.h"
+
+U_NAMESPACE_BEGIN
+
+/*
+ * Note: This builder implementation stores (bytes, value) pairs with full copies
+ * of the byte sequences, until the ByteTrie is built.
+ * It might(!) take less memory if we collected the data in a temporary, dynamic trie.
+ */
+
+class ByteTrieElement : public UMemory {
+public:
+    // Use compiler's default constructor, initializes nothing.
+
+    void setTo(const StringPiece &s, int32_t val, CharString &strings, UErrorCode &errorCode);
+
+    StringPiece getString(const CharString &strings) const {
+        int32_t offset=stringOffset;
+        int32_t length;
+        if(offset>=0) {
+            length=(uint8_t)strings[offset++];
+        } else {
+            offset=~offset;
+            length=((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
+            offset+=2;
+        }
+        return StringPiece(strings.data()+offset, length);
+    }
+    int32_t getStringLength(const CharString &strings) const {
+        int32_t offset=stringOffset;
+        if(offset>=0) {
+            return (uint8_t)strings[offset];
+        } else {
+            offset=~offset;
+            return ((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
+        }
+    }
+
+    char charAt(int32_t index, const CharString &strings) const { return data(strings)[index]; }
+
+    int32_t getValue() const { return value; }
+
+    int32_t compareStringTo(const ByteTrieElement &o, const CharString &strings) const;
+
+private:
+    const char *data(const CharString &strings) const {
+        int32_t offset=stringOffset;
+        if(offset>=0) {
+            ++offset;
+        } else {
+            offset=~offset+2;
+        }
+        return strings.data()+offset;
+    }
+
+    // If the stringOffset is non-negative, then the first strings byte contains
+    // the string length.
+    // If the stringOffset is negative, then the first two strings bytes contain
+    // the string length (big-endian), and the offset needs to be bit-inverted.
+    // (Compared with a stringLength field here, this saves 3 bytes per string for most strings.)
+    int32_t stringOffset;
+    int32_t value;
+};
+
+void
+ByteTrieElement::setTo(const StringPiece &s, int32_t val,
+                       CharString &strings, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return;
+    }
+    int32_t length=s.length();
+    if(length>0xffff) {
+        // Too long: We store the length in 1 or 2 bytes.
+        errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+        return;
+    }
+    int32_t offset=strings.length();
+    if(length>0xff) {
+        offset=~offset;
+        strings.append((char)(length>>8), errorCode);
+    }
+    strings.append((char)length, errorCode);
+    stringOffset=offset;
+    value=val;
+    strings.append(s, errorCode);
+}
+
+int32_t
+ByteTrieElement::compareStringTo(const ByteTrieElement &other, const CharString &strings) const {
+    // TODO: add StringPiece::compare(), see ticket #8187
+    StringPiece thisString=getString(strings);
+    StringPiece otherString=other.getString(strings);
+    int32_t lengthDiff=thisString.length()-otherString.length();
+    int32_t commonLength;
+    if(lengthDiff<=0) {
+        commonLength=thisString.length();
+    } else {
+        commonLength=otherString.length();
+    }
+    int32_t diff=uprv_memcmp(thisString.data(), otherString.data(), commonLength);
+    return diff!=0 ? diff : lengthDiff;
+}
+
+ByteTrieBuilder::~ByteTrieBuilder() {
+    delete[] elements;
+    uprv_free(bytes);
+}
+
+ByteTrieBuilder &
+ByteTrieBuilder::add(const StringPiece &s, int32_t value, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return *this;
+    }
+    if(bytesLength>0) {
+        // Cannot add elements after building.
+        errorCode=U_NO_WRITE_PERMISSION;
+        return *this;
+    }
+    bytesCapacity+=s.length()+1;  // Crude bytes preallocation estimate.
+    if(elementsLength==elementsCapacity) {
+        int32_t newCapacity;
+        if(elementsCapacity==0) {
+            newCapacity=1024;
+        } else {
+            newCapacity=4*elementsCapacity;
+        }
+        ByteTrieElement *newElements=new ByteTrieElement[newCapacity];
+        if(newElements==NULL) {
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+        }
+        if(elementsLength>0) {
+            uprv_memcpy(newElements, elements, elementsLength*sizeof(ByteTrieElement));
+        }
+        delete[] elements;
+        elements=newElements;
+        elementsCapacity=newCapacity;
+    }
+    elements[elementsLength++].setTo(s, value, strings, errorCode);
+    return *this;
+}
+
+U_CDECL_BEGIN
+
+static int32_t U_CALLCONV
+compareElementStrings(const void *context, const void *left, const void *right) {
+    const CharString *strings=reinterpret_cast<const CharString *>(context);
+    const ByteTrieElement *leftElement=reinterpret_cast<const ByteTrieElement *>(left);
+    const ByteTrieElement *rightElement=reinterpret_cast<const ByteTrieElement *>(right);
+    return leftElement->compareStringTo(*rightElement, *strings);
+}
+
+U_CDECL_END
+
+StringPiece
+ByteTrieBuilder::build(UDictTrieBuildOption buildOption, UErrorCode &errorCode) {
+    StringPiece result;
+    if(U_FAILURE(errorCode)) {
+        return result;
+    }
+    if(bytesLength>0) {
+        // Already built.
+        result.set(bytes+(bytesCapacity-bytesLength), bytesLength);
+        return result;
+    }
+    if(elementsLength==0) {
+        errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+        return result;
+    }
+    uprv_sortArray(elements, elementsLength, (int32_t)sizeof(ByteTrieElement),
+                   compareElementStrings, &strings,
+                   FALSE,  // need not be a stable sort
+                   &errorCode);
+    if(U_FAILURE(errorCode)) {
+        return result;
+    }
+    // Duplicate strings are not allowed.
+    StringPiece prev=elements[0].getString(strings);
+    for(int32_t i=1; i<elementsLength; ++i) {
+        StringPiece current=elements[i].getString(strings);
+        if(prev==current) {
+            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+            return result;
+        }
+        prev=current;
+    }
+    // Create and byte-serialize the trie for the elements.
+    if(bytesCapacity<1024) {
+        bytesCapacity=1024;
+    }
+    bytes=reinterpret_cast<char *>(uprv_malloc(bytesCapacity));
+    if(bytes==NULL) {
+        errorCode=U_MEMORY_ALLOCATION_ERROR;
+        return result;
+    }
+    if(buildOption==UDICTTRIE_BUILD_FAST) {
+        writeNode(0, elementsLength, 0);
+    } else /* UDICTTRIE_BUILD_SMALL */ {
+        createCompactBuilder(2*elementsLength, errorCode);
+        Node *root=makeNode(0, elementsLength, 0, errorCode);
+        if(U_SUCCESS(errorCode)) {
+            root->markRightEdgesFirst(-1);
+            root->write(*this);
+        }
+        deleteCompactBuilder();
+    }
+    if(bytes==NULL) {
+        errorCode=U_MEMORY_ALLOCATION_ERROR;
+    } else {
+        result.set(bytes+(bytesCapacity-bytesLength), bytesLength);
+    }
+    return result;
+}
+
+// Requires start<limit,
+// and all strings of the [start..limit[ elements must be sorted and
+// have a common prefix of length byteIndex.
+void
+ByteTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t byteIndex) {
+    UBool hasValue=FALSE;
+    int32_t value=0;
+    if(byteIndex==elements[start].getStringLength(strings)) {
+        // An intermediate or final value.
+        value=elements[start++].getValue();
+        if(start==limit) {
+            writeValueAndFinal(value, TRUE);  // final-value node
+            return;
+        }
+        hasValue=TRUE;
+    }
+    // Now all [start..limit[ strings are longer than byteIndex.
+    const ByteTrieElement &minElement=elements[start];
+    const ByteTrieElement &maxElement=elements[limit-1];
+    int32_t minByte=(uint8_t)minElement.charAt(byteIndex, strings);
+    int32_t maxByte=(uint8_t)maxElement.charAt(byteIndex, strings);
+    if(minByte==maxByte) {
+        // Linear-match node: All strings have the same character at byteIndex.
+        int32_t minStringLength=minElement.getStringLength(strings);
+        int32_t lastByteIndex=byteIndex;
+        while(++lastByteIndex<minStringLength &&
+                minElement.charAt(lastByteIndex, strings)==
+                maxElement.charAt(lastByteIndex, strings)) {}
+        writeNode(start, limit, lastByteIndex);
+        // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
+        const char *s=minElement.getString(strings).data();
+        int32_t length=lastByteIndex-byteIndex;
+        while(length>ByteTrie::kMaxLinearMatchLength) {
+            lastByteIndex-=ByteTrie::kMaxLinearMatchLength;
+            length-=ByteTrie::kMaxLinearMatchLength;
+            write(s+lastByteIndex, ByteTrie::kMaxLinearMatchLength);
+            write(ByteTrie::kMinLinearMatch+ByteTrie::kMaxLinearMatchLength-1);
+        }
+        write(s+byteIndex, length);
+        write(ByteTrie::kMinLinearMatch+length-1);
+    } else {
+        // Branch node.
+        int32_t length=0;  // Number of different bytes at byteIndex.
+        int32_t i=start;
+        do {
+            char byte=elements[i++].charAt(byteIndex, strings);
+            while(i<limit && byte==elements[i].charAt(byteIndex, strings)) {
+                ++i;
+            }
+            ++length;
+        } while(i<limit);
+        // length>=2 because minByte!=maxByte.
+        writeBranchSubNode(start, limit, byteIndex, length);
+        write(--length);
+        if(length>=ByteTrie::kMinLinearMatch) {
+            write(0);
+        }
+    }
+    if(hasValue) {
+        writeValueAndFinal(value, FALSE);
+    }
+}
+
+// start<limit && all strings longer than byteIndex &&
+// length different bytes at byteIndex
+void
+ByteTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t byteIndex, int32_t length) {
+    char middleBytes[16];
+    int32_t lessThan[16];
+    int32_t ltLength=0;
+    while(length>ByteTrie::kMaxBranchLinearSubNodeLength) {
+        // Branch on the middle byte.
+        // First, find the middle byte.
+        int32_t count=length/2;
+        int32_t i=start;
+        char byte;
+        do {
+            byte=elements[i++].charAt(byteIndex, strings);
+            while(byte==elements[i].charAt(byteIndex, strings)) {
+                ++i;
+            }
+        } while(--count>0);
+        // Encode the less-than branch first.
+        byte=middleBytes[ltLength]=elements[i].charAt(byteIndex, strings);  // middle byte
+        writeBranchSubNode(start, i, byteIndex, length/2);
+        lessThan[ltLength]=bytesLength;
+        ++ltLength;
+        // Continue for the greater-or-equal branch.
+        start=i;
+        length=length-length/2;
+    }
+    // For each byte, find its elements array start and whether it has a final value.
+    int32_t starts[ByteTrie::kMaxBranchLinearSubNodeLength];
+    UBool final[ByteTrie::kMaxBranchLinearSubNodeLength-1];
+    int32_t byteNumber=0;
+    do {
+        int32_t i=starts[byteNumber]=start;
+        char byte=elements[i++].charAt(byteIndex, strings);
+        while(byte==elements[i].charAt(byteIndex, strings)) {
+            ++i;
+        }
+        final[byteNumber]= start==i-1 && byteIndex+1==elements[start].getStringLength(strings);
+        start=i;
+    } while(++byteNumber<length-1);
+    // byteNumber==length-1, and the maxByte elements range is [start..limit[
+    starts[byteNumber]=start;
+
+    // Write the sub-nodes in reverse order: The jump lengths are deltas from
+    // after their own positions, so if we wrote the minByte sub-node first,
+    // then its jump delta would be larger.
+    // Instead we write the minByte sub-node last, for a shorter delta.
+    int32_t jumpTargets[ByteTrie::kMaxBranchLinearSubNodeLength-1];
+    do {
+        --byteNumber;
+        if(!final[byteNumber]) {
+            writeNode(starts[byteNumber], starts[byteNumber+1], byteIndex+1);
+            jumpTargets[byteNumber]=bytesLength;
+        }
+    } while(byteNumber>0);
+    // The maxByte sub-node is written as the very last one because we do
+    // not jump for it at all.
+    byteNumber=length-1;
+    writeNode(start, limit, byteIndex+1);
+    write((uint8_t)elements[start].charAt(byteIndex, strings));
+    // Write the rest of this node's byte-value pairs.
+    while(--byteNumber>=0) {
+        start=starts[byteNumber];
+        int32_t value;
+        if(final[byteNumber]) {
+            // Write the final value for the one string ending with this byte.
+            value=elements[start].getValue();
+        } else {
+            // Write the delta to the start position of the sub-node.
+            value=bytesLength-jumpTargets[byteNumber];
+        }
+        writeValueAndFinal(value, final[byteNumber]);
+        write((uint8_t)elements[start].charAt(byteIndex, strings));
+    }
+    // Write the split-branch nodes.
+    while(ltLength>0) {
+        --ltLength;
+        writeDelta(bytesLength-lessThan[ltLength]);  // less-than
+        write((uint8_t)middleBytes[ltLength]);
+    }
+}
+
+// Requires start<limit,
+// and all strings of the [start..limit[ elements must be sorted and
+// have a common prefix of length byteIndex.
+DictTrieBuilder::Node *
+ByteTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t byteIndex, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return NULL;
+    }
+    UBool hasValue=FALSE;
+    int32_t value=0;
+    if(byteIndex==elements[start].getStringLength(strings)) {
+        // An intermediate or final value.
+        value=elements[start++].getValue();
+        if(start==limit) {
+            return registerFinalValue(value, errorCode);
+        }
+        hasValue=TRUE;
+    }
+    Node *node;
+    // Now all [start..limit[ strings are longer than byteIndex.
+    const ByteTrieElement &minElement=elements[start];
+    const ByteTrieElement &maxElement=elements[limit-1];
+    int32_t minByte=(uint8_t)minElement.charAt(byteIndex, strings);
+    int32_t maxByte=(uint8_t)maxElement.charAt(byteIndex, strings);
+    if(minByte==maxByte) {
+        // Linear-match node: All strings have the same character at byteIndex.
+        int32_t minStringLength=minElement.getStringLength(strings);
+        int32_t lastByteIndex=byteIndex;
+        while(++lastByteIndex<minStringLength &&
+                minElement.charAt(lastByteIndex, strings)==
+                maxElement.charAt(lastByteIndex, strings)) {}
+        Node *nextNode=makeNode(start, limit, lastByteIndex, errorCode);
+        // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
+        const char *s=minElement.getString(strings).data();
+        int32_t length=lastByteIndex-byteIndex;
+        while(length>ByteTrie::kMaxLinearMatchLength) {
+            lastByteIndex-=ByteTrie::kMaxLinearMatchLength;
+            length-=ByteTrie::kMaxLinearMatchLength;
+            node=new BTLinearMatchNode(
+                s+lastByteIndex,
+                ByteTrie::kMaxLinearMatchLength,
+                nextNode);
+            node=registerNode(node, errorCode);
+            nextNode=node;
+        }
+        node=new BTLinearMatchNode(s+byteIndex, length, nextNode);
+    } else {
+        // Branch node.
+        int32_t length=0;  // Number of different bytes at byteIndex.
+        int32_t i=start;
+        do {
+            char byte=elements[i++].charAt(byteIndex, strings);
+            while(i<limit && byte==elements[i].charAt(byteIndex, strings)) {
+                ++i;
+            }
+            ++length;
+        } while(i<limit);
+        // length>=2 because minByte!=maxByte.
+        Node *subNode=makeBranchSubNode(start, limit, byteIndex, length, errorCode);
+        node=new BTBranchHeadNode(length, subNode);
+    }
+    node=registerNode(node, errorCode);
+    if(hasValue) {
+        node=registerNode(new BTValueNode(value, node), errorCode);
+    }
+    return node;
+}
+
+// start<limit && all strings longer than byteIndex &&
+// length different bytes at byteIndex
+DictTrieBuilder::Node *
+ByteTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t byteIndex,
+                                   int32_t length, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return NULL;
+    }
+    char middleBytes[16];
+    Node *lessThan[16];
+    int32_t ltLength=0;
+    while(length>ByteTrie::kMaxBranchLinearSubNodeLength) {
+        // Branch on the middle byte.
+        // First, find the middle byte.
+        int32_t count=length/2;
+        int32_t i=start;
+        char byte;
+        do {
+            byte=elements[i++].charAt(byteIndex, strings);
+            while(byte==elements[i].charAt(byteIndex, strings)) {
+                ++i;
+            }
+        } while(--count>0);
+        // Encode the less-than branch first.
+        byte=middleBytes[ltLength]=elements[i].charAt(byteIndex, strings);  // middle byte
+        lessThan[ltLength]=makeBranchSubNode(start, i, byteIndex, length/2, errorCode);
+        ++ltLength;
+        // Continue for the greater-or-equal branch.
+        start=i;
+        length=length-length/2;
+    }
+    if(U_FAILURE(errorCode)) {
+        return NULL;
+    }
+    BTListBranchNode *listNode=new BTListBranchNode();
+    if(listNode==NULL) {
+        errorCode=U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+    // For each byte, find its elements array start and whether it has a final value.
+    int32_t byteNumber=0;
+    do {
+        int32_t i=start;
+        char byte=elements[i++].charAt(byteIndex, strings);
+        while(byte==elements[i].charAt(byteIndex, strings)) {
+            ++i;
+        }
+        if(start==i-1 && byteIndex+1==elements[start].getStringLength(strings)) {
+            listNode->add((uint8_t)byte, elements[start].getValue());
+        } else {
+            listNode->add((uint8_t)byte, makeNode(start, i, byteIndex+1, errorCode));
+        }
+        start=i;
+    } while(++byteNumber<length-1);
+    // byteNumber==length-1, and the maxByte elements range is [start..limit[
+    char byte=elements[start].charAt(byteIndex, strings);
+    if(start==limit-1 && byteIndex+1==elements[start].getStringLength(strings)) {
+        listNode->add((uint8_t)byte, elements[start].getValue());
+    } else {
+        listNode->add((uint8_t)byte, makeNode(start, limit, byteIndex+1, errorCode));
+    }
+    Node *node=registerNode(listNode, errorCode);
+    // Create the split-branch nodes.
+    while(ltLength>0) {
+        --ltLength;
+        node=registerNode(
+            new BTSplitBranchNode(middleBytes[ltLength], lessThan[ltLength], node), errorCode);
+    }
+    return node;
+}
+
+void
+ByteTrieBuilder::BTFinalValueNode::write(DictTrieBuilder &builder) {
+    ByteTrieBuilder &b=(ByteTrieBuilder &)builder;
+    offset=b.writeValueAndFinal(value, TRUE);
+}
+
+UBool
+ByteTrieBuilder::BTValueNode::operator==(const Node &other) const {
+    if(this==&other) {
+        return TRUE;
+    }
+    if(!ValueNode::operator==(other)) {
+        return FALSE;
+    }
+    const BTValueNode &o=(const BTValueNode &)other;
+    return next==o.next;
+}
+
+int32_t
+ByteTrieBuilder::BTValueNode::markRightEdgesFirst(int32_t edgeNumber) {
+    if(offset==0) {
+        offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
+    }
+    return edgeNumber;
+}
+
+void
+ByteTrieBuilder::BTValueNode::write(DictTrieBuilder &builder) {
+    ByteTrieBuilder &b=(ByteTrieBuilder &)builder;
+    next->write(builder);
+    offset=b.writeValueAndFinal(value, FALSE);
+}
+
+ByteTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode)
+        : LinearMatchNode(len, nextNode), s(bytes) {
+    hash=hash*37+uhash_hashCharsN(bytes, len);
+}
+
+UBool
+ByteTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const {
+    if(this==&other) {
+        return TRUE;
+    }
+    if(!LinearMatchNode::operator==(other)) {
+        return FALSE;
+    }
+    const BTLinearMatchNode &o=(const BTLinearMatchNode &)other;
+    return 0==uprv_memcmp(s, o.s, length);
+}
+
+void
+ByteTrieBuilder::BTLinearMatchNode::write(DictTrieBuilder &builder) {
+    ByteTrieBuilder &b=(ByteTrieBuilder &)builder;
+    next->write(builder);
+    b.write(s, length);
+    offset=b.write(ByteTrie::kMinLinearMatch+length-1);
+}
+
+void
+ByteTrieBuilder::BTListBranchNode::write(DictTrieBuilder &builder) {
+    ByteTrieBuilder &b=(ByteTrieBuilder &)builder;
+    // Write the sub-nodes in reverse order: The jump lengths are deltas from
+    // after their own positions, so if we wrote the minByte sub-node first,
+    // then its jump delta would be larger.
+    // Instead we write the minByte sub-node last, for a shorter delta.
+    int32_t byteNumber=length-1;
+    Node *rightEdge=equal[byteNumber];
+    int32_t rightEdgeNumber= rightEdge==NULL ? firstEdgeNumber : rightEdge->getOffset();
+    do {
+        --byteNumber;
+        if(equal[byteNumber]!=NULL) {
+            equal[byteNumber]->writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder);
+        }
+    } while(byteNumber>0);
+    // The maxByte sub-node is written as the very last one because we do
+    // not jump for it at all.
+    byteNumber=length-1;
+    if(rightEdge==NULL) {
+        b.writeValueAndFinal(values[byteNumber], TRUE);
+    } else {
+        rightEdge->write(builder);
+    }
+    b.write(units[byteNumber]);
+    // Write the rest of this node's byte-value pairs.
+    while(--byteNumber>=0) {
+        int32_t value;
+        UBool isFinal;
+        if(equal[byteNumber]==NULL) {
+            // Write the final value for the one string ending with this byte.
+            value=values[byteNumber];
+            isFinal=TRUE;
+        } else {
+            // Write the delta to the start position of the sub-node.
+            U_ASSERT(equal[byteNumber]->getOffset()>0);
+            value=b.bytesLength-equal[byteNumber]->getOffset();
+            isFinal=FALSE;
+        }
+        b.writeValueAndFinal(value, isFinal);
+        offset=b.write(units[byteNumber]);
+    }
+}
+
+void
+ByteTrieBuilder::BTSplitBranchNode::write(DictTrieBuilder &builder) {
+    ByteTrieBuilder &b=(ByteTrieBuilder &)builder;
+    // Encode the less-than branch first.
+    lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder);
+    // Encode the greater-or-equal branch last because we do not jump for it at all.
+    greaterOrEqual->write(builder);
+    // Write this node.
+    U_ASSERT(lessThan->getOffset()>0);
+    b.writeDelta(b.bytesLength-lessThan->getOffset());  // less-than
+    offset=b.write(unit);
+}
+
+void
+ByteTrieBuilder::BTBranchHeadNode::write(DictTrieBuilder &builder) {
+    ByteTrieBuilder &b=(ByteTrieBuilder &)builder;
+    next->write(builder);
+    offset=b.write((length-1));
+    if(length>ByteTrie::kMinLinearMatch) {
+        offset=b.write(0);
+    }
+}
+
+UBool
+ByteTrieBuilder::ensureCapacity(int32_t length) {
+    if(bytes==NULL) {
+        return FALSE;  // previous memory allocation had failed
+    }
+    if(length>bytesCapacity) {
+        int32_t newCapacity=bytesCapacity;
+        do {
+            newCapacity*=2;
+        } while(newCapacity<=length);
+        char *newBytes=reinterpret_cast<char *>(uprv_malloc(newCapacity));
+        if(newBytes==NULL) {
+            // unable to allocate memory
+            uprv_free(bytes);
+            bytes=NULL;
+            return FALSE;
+        }
+        uprv_memcpy(newBytes+(newCapacity-bytesLength),
+                    bytes+(bytesCapacity-bytesLength), bytesLength);
+        uprv_free(bytes);
+        bytes=newBytes;
+        bytesCapacity=newCapacity;
+    }
+    return TRUE;
+}
+
+int32_t
+ByteTrieBuilder::write(int32_t byte) {
+    int32_t newLength=bytesLength+1;
+    if(ensureCapacity(newLength)) {
+        bytesLength=newLength;
+        bytes[bytesCapacity-bytesLength]=(char)byte;
+    }
+    return bytesLength;
+}
+
+int32_t
+ByteTrieBuilder::write(const char *b, int32_t length) {
+    int32_t newLength=bytesLength+length;
+    if(ensureCapacity(newLength)) {
+        bytesLength=newLength;
+        uprv_memcpy(bytes+(bytesCapacity-bytesLength), b, length);
+    }
+    return bytesLength;
+}
+
+int32_t
+ByteTrieBuilder::writeValueAndFinal(int32_t i, UBool final) {
+    char intBytes[5];
+    int32_t length=1;
+    if(i<0 || i>0xffffff) {
+        intBytes[0]=(char)ByteTrie::kFiveByteValueLead;
+        intBytes[1]=(char)(i>>24);
+        intBytes[2]=(char)(i>>16);
+        intBytes[3]=(char)(i>>8);
+        intBytes[4]=(char)i;
+        length=5;
+    } else if(i<=ByteTrie::kMaxOneByteValue) {
+        intBytes[0]=(char)(ByteTrie::kMinOneByteValueLead+i);
+    } else {
+        if(i<=ByteTrie::kMaxTwoByteValue) {
+            intBytes[0]=(char)(ByteTrie::kMinTwoByteValueLead+(i>>8));
+        } else {
+            if(i<=ByteTrie::kMaxThreeByteValue) {
+                intBytes[0]=(char)(ByteTrie::kMinThreeByteValueLead+(i>>16));
+            } else {
+                intBytes[0]=(char)ByteTrie::kFourByteValueLead;
+                intBytes[1]=(char)(i>>16);
+                length=2;
+            }
+            intBytes[length++]=(char)(i>>8);
+        }
+        intBytes[length++]=(char)i;
+    }
+    intBytes[0]=(char)((intBytes[0]<<1)|final);
+    return write(intBytes, length);
+}
+
+int32_t
+ByteTrieBuilder::writeDelta(int32_t i) {
+    char intBytes[5];
+    int32_t length;
+    U_ASSERT(i>=0);
+    if(i<=ByteTrie::kMaxOneByteDelta) {
+        length=0;
+    } else if(i<=ByteTrie::kMaxTwoByteDelta) {
+        intBytes[0]=(char)(ByteTrie::kMinTwoByteDeltaLead+(i>>8));
+        length=1;
+    } else {
+        if(i<=ByteTrie::kMaxThreeByteDelta) {
+            intBytes[0]=(char)(ByteTrie::kMinThreeByteDeltaLead+(i>>16));
+            length=2;
+        } else {
+            if(i<=0xffffff) {
+                intBytes[0]=(char)ByteTrie::kFourByteDeltaLead;
+                length=3;
+            } else {
+                intBytes[0]=(char)ByteTrie::kFiveByteDeltaLead;
+                intBytes[1]=(char)(i>>24);
+                length=4;
+            }
+            intBytes[1]=(char)(i>>16);
+        }
+        intBytes[1]=(char)(i>>8);
+    }
+    intBytes[length++]=(char)i;
+    return write(intBytes, length);
+}
+
+U_NAMESPACE_END
--- a/icu4c/source/tools/toolutil/bytetriebuilder.h
+++ b/icu4c/source/tools/toolutil/bytetriebuilder.h
@ -0,0 +1,123 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  bytetriebuilder.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010sep25
+*   created by: Markus W. Scherer
+*
+* Builder class for ByteTrie dictionary trie.
+*/
+
+#ifndef __BYTETRIEBUILDER_H__
+#define __BYTETRIEBUILDER_H__
+
+#include "unicode/utypes.h"
+#include "unicode/stringpiece.h"
+#include "charstr.h"
+#include "dicttriebuilder.h"
+
+U_NAMESPACE_BEGIN
+
+class ByteTrieElement;
+
+class U_TOOLUTIL_API ByteTrieBuilder : public DictTrieBuilder {
+public:
+    ByteTrieBuilder()
+            : elements(NULL), elementsCapacity(0), elementsLength(0),
+              bytes(NULL), bytesCapacity(0), bytesLength(0) {}
+    ~ByteTrieBuilder();
+
+    ByteTrieBuilder &add(const StringPiece &s, int32_t value, UErrorCode &errorCode);
+
+    StringPiece build(UDictTrieBuildOption buildOption, UErrorCode &errorCode);
+
+    ByteTrieBuilder &clear() {
+        strings.clear();
+        elementsLength=0;
+        bytesLength=0;
+        return *this;
+    }
+
+private:
+    void writeNode(int32_t start, int32_t limit, int32_t byteIndex);
+    void writeBranchSubNode(int32_t start, int32_t limit, int32_t byteIndex, int32_t length);
+
+    Node *makeNode(int32_t start, int32_t limit, int32_t byteIndex, UErrorCode &errorCode);
+    Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t byteIndex,
+                            int32_t length, UErrorCode &errorCode);
+
+    UBool ensureCapacity(int32_t length);
+    int32_t write(int32_t byte);
+    int32_t write(const char *b, int32_t length);
+    int32_t writeValueAndFinal(int32_t i, UBool final);
+    int32_t writeDelta(int32_t i);
+
+    // Compacting builder.
+    class BTFinalValueNode : public FinalValueNode {
+    public:
+        BTFinalValueNode(int32_t v) : FinalValueNode(v) {}
+        virtual void write(DictTrieBuilder &builder);
+    };
+
+    class BTValueNode : public ValueNode {
+    public:
+        BTValueNode(int32_t v, Node *nextNode)
+                : ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); }
+        virtual UBool operator==(const Node &other) const;
+        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
+        virtual void write(DictTrieBuilder &builder);
+    private:
+        Node *next;
+    };
+
+    class BTLinearMatchNode : public LinearMatchNode {
+    public:
+        BTLinearMatchNode(const char *units, int32_t len, Node *nextNode);
+        virtual UBool operator==(const Node &other) const;
+        virtual void write(DictTrieBuilder &builder);
+    private:
+        const char *s;
+    };
+
+    class BTListBranchNode : public ListBranchNode {
+    public:
+        BTListBranchNode() : ListBranchNode() {}
+        virtual void write(DictTrieBuilder &builder);
+    };
+
+    class BTSplitBranchNode : public SplitBranchNode {
+    public:
+        BTSplitBranchNode(char middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
+                : SplitBranchNode((uint8_t)middleUnit, lessThanNode, greaterOrEqualNode) {}
+        virtual void write(DictTrieBuilder &builder);
+    };
+
+    class BTBranchHeadNode : public BranchHeadNode {
+    public:
+        BTBranchHeadNode(int32_t len, Node *subNode) : BranchHeadNode(len, subNode) {}
+        virtual void write(DictTrieBuilder &builder);
+    };
+
+    virtual Node *createFinalValueNode(int32_t value) const { return new BTFinalValueNode(value); }
+
+    CharString strings;
+    ByteTrieElement *elements;
+    int32_t elementsCapacity;
+    int32_t elementsLength;
+
+    // Byte serialization of the trie.
+    // Grows from the back: bytesLength measures from the end of the buffer!
+    char *bytes;
+    int32_t bytesCapacity;
+    int32_t bytesLength;
+};
+
+U_NAMESPACE_END
+
+#endif  // __BYTETRIEBUILDER_H__
--- a/icu4c/source/tools/toolutil/bytetrieiterator.cpp
+++ b/icu4c/source/tools/toolutil/bytetrieiterator.cpp
@ -0,0 +1,167 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  bytetrieiterator.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010nov03
+*   created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/stringpiece.h"
+#include "bytetrie.h"
+#include "bytetrieiterator.h"
+#include "charstr.h"
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+ByteTrieIterator::ByteTrieIterator(const void *trieBytes, int32_t maxStringLength,
+                                   UErrorCode &errorCode)
+        : bytes_(reinterpret_cast<const uint8_t *>(trieBytes)),
+          pos_(bytes_), initialPos_(bytes_),
+          remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
+          maxLength_(maxStringLength), value_(0), stack_(errorCode) {}
+
+ByteTrieIterator::ByteTrieIterator(const ByteTrie &trie, int32_t maxStringLength,
+                                   UErrorCode &errorCode)
+        : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
+          remainingMatchLength_(trie.remainingMatchLength_),
+          initialRemainingMatchLength_(trie.remainingMatchLength_),
+          maxLength_(maxStringLength), value_(0), stack_(errorCode) {
+    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
+    if(length>=0) {
+        // Pending linear-match node, append remaining bytes to str.
+        ++length;
+        if(maxLength_>0 && length>maxLength_) {
+            length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
+        }
+        str_.append(reinterpret_cast<const char *>(pos_), length, errorCode);
+        pos_+=length;
+        remainingMatchLength_-=length;
+    }
+}
+
+ByteTrieIterator &ByteTrieIterator::reset() {
+    pos_=initialPos_;
+    remainingMatchLength_=initialRemainingMatchLength_;
+    int32_t length=remainingMatchLength_+1;  // Remaining match length.
+    if(maxLength_>0 && length>maxLength_) {
+        length=maxLength_;
+    }
+    str_.truncate(length);
+    pos_+=length;
+    remainingMatchLength_-=length;
+    stack_.setSize(0);
+    return *this;
+}
+
+UBool
+ByteTrieIterator::next(UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return FALSE;
+    }
+    const uint8_t *pos=pos_;
+    if(pos==NULL) {
+        if(stack_.isEmpty()) {
+            return FALSE;
+        }
+        // Pop the state off the stack and continue with the next outbound edge of
+        // the branch node.
+        int32_t stackSize=stack_.size();
+        int32_t length=stack_.elementAti(stackSize-1);
+        pos=bytes_+stack_.elementAti(stackSize-2);
+        stack_.setSize(stackSize-2);
+        str_.truncate(length&0xffff);
+        length=(int32_t)((uint32_t)length>>16);
+        if(length>1) {
+            pos=branchNext(pos, length, errorCode);
+            if(pos==NULL) {
+                return TRUE;  // Reached a final value.
+            }
+        } else {
+            str_.append((char)*pos++, errorCode);
+        }
+    }
+    if(remainingMatchLength_>=0) {
+        // We only get here if we started in a pending linear-match node
+        // with more than maxLength remaining bytes.
+        return truncateAndStop();
+    }
+    for(;;) {
+        int32_t node=*pos++;
+        if(node>=ByteTrie::kMinValueLead) {
+            // Deliver value for the byte sequence so far.
+            UBool isFinal=(UBool)(node&ByteTrie::kValueIsFinal);
+            value_=ByteTrie::readValue(pos, node>>1);
+            if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
+                pos_=NULL;
+            } else {
+                pos_=ByteTrie::skipValue(pos, node);
+            }
+            sp_.set(str_.data(), str_.length());
+            return TRUE;
+        }
+        if(maxLength_>0 && str_.length()==maxLength_) {
+            return truncateAndStop();
+        }
+        if(node<ByteTrie::kMinLinearMatch) {
+            if(node==0) {
+                node=*pos++;
+            }
+            pos=branchNext(pos, node+1, errorCode);
+            if(pos==NULL) {
+                return TRUE;  // Reached a final value.
+            }
+        } else {
+            // Linear-match node, append length bytes to str_.
+            int32_t length=node-ByteTrie::kMinLinearMatch+1;
+            if(maxLength_>0 && str_.length()+length>maxLength_) {
+                str_.append(reinterpret_cast<const char *>(pos),
+                            maxLength_-str_.length(), errorCode);
+                return truncateAndStop();
+            }
+            str_.append(reinterpret_cast<const char *>(pos), length, errorCode);
+            pos+=length;
+        }
+    }
+}
+
+// Branch node, needs to take the first outbound edge and push state for the rest.
+const uint8_t *
+ByteTrieIterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
+    while(length>ByteTrie::kMaxBranchLinearSubNodeLength) {
+        ++pos;  // ignore the comparison byte
+        // Push state for the greater-or-equal edge.
+        stack_.addElement((int32_t)(ByteTrie::skipDelta(pos)-bytes_), errorCode);
+        stack_.addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
+        // Follow the less-than edge.
+        length>>=1;
+        pos=ByteTrie::jumpByDelta(pos);
+    }
+    // List of key-value pairs where values are either final values or jump deltas.
+    // Read the first (key, value) pair.
+    uint8_t trieByte=*pos++;
+    int32_t node=*pos++;
+    UBool isFinal=(UBool)(node&ByteTrie::kValueIsFinal);
+    int32_t value=ByteTrie::readValue(pos, node>>1);
+    pos=ByteTrie::skipValue(pos, node);
+    stack_.addElement((int32_t)(pos-bytes_), errorCode);
+    stack_.addElement(((length-1)<<16)|str_.length(), errorCode);
+    str_.append((char)trieByte, errorCode);
+    if(isFinal) {
+        pos_=NULL;
+        sp_.set(str_.data(), str_.length());
+        value_=value;
+        return NULL;
+    } else {
+        return pos+value;
+    }
+}
+
+U_NAMESPACE_END
--- a/icu4c/source/tools/toolutil/bytetrieiterator.h
+++ b/icu4c/source/tools/toolutil/bytetrieiterator.h
@ -0,0 +1,126 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  bytetrieiterator.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010nov03
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __BYTETRIEITERATOR_H__
+#define __BYTETRIEITERATOR_H__
+
+/**
+ * \file
+ * \brief C++ API: ByteTrie iterator for all of its (byte sequence, value) pairs.
+ */
+
+// Needed if and when we change the .dat package index to a ByteTrie,
+// so that icupkg can work with an input package.
+
+#include "unicode/utypes.h"
+#include "unicode/stringpiece.h"
+#include "bytetrie.h"
+#include "charstr.h"
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Iterator for all of the (byte sequence, value) pairs in a ByteTrie.
+ */
+class U_TOOLUTIL_API ByteTrieIterator : public UMemory {
+public:
+    /**
+     * Iterates from the root of a byte-serialized ByteTrie.
+     * @param trieBytes The trie bytes.
+     * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
+     *                        Otherwise, the iterator returns strings with this maximum length.
+     * @param errorCode Standard ICU error code. Its input value must
+     *                  pass the U_SUCCESS() test, or else the function returns
+     *                  immediately. Check for U_FAILURE() on output or use with
+     *                  function chaining. (See User Guide for details.)
+     */
+    ByteTrieIterator(const void *trieBytes, int32_t maxStringLength, UErrorCode &errorCode);
+
+    /**
+     * Iterates from the current state of the specified ByteTrie.
+     * @param trie The trie whose state will be copied for iteration.
+     * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
+     *                        Otherwise, the iterator returns strings with this maximum length.
+     * @param errorCode Standard ICU error code. Its input value must
+     *                  pass the U_SUCCESS() test, or else the function returns
+     *                  immediately. Check for U_FAILURE() on output or use with
+     *                  function chaining. (See User Guide for details.)
+     */
+    ByteTrieIterator(const ByteTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
+
+    /**
+     * Resets this iterator to its initial state.
+     */
+    ByteTrieIterator &reset();
+
+    /**
+     * Finds the next (byte sequence, value) pair if there is one.
+     *
+     * If the byte sequence is truncated to the maximum length and does not
+     * have a real value, then the value is set to -1.
+     * In this case, this "not a real value" is indistinguishable from
+     * a real value of -1.
+     * @return TRUE if there is another element.
+     */
+    UBool next(UErrorCode &errorCode);
+
+    /**
+     * @return TRUE if there are more elements.
+     */
+    UBool hasNext() const { return pos_!=NULL || !stack_.isEmpty(); }
+
+    /**
+     * @return the NUL-terminated byte sequence for the last successful next()
+     */
+    const StringPiece &getString() const { return sp_; }
+    /**
+     * @return the value for the last successful next()
+     */
+    int32_t getValue() const { return value_; }
+
+private:
+    UBool truncateAndStop() {
+        pos_=NULL;
+        value_=-1;  // no real value for str
+        sp_.set(str_.data(), str_.length());
+        return TRUE;
+    }
+
+    const uint8_t *branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode);
+
+    const uint8_t *bytes_;
+    const uint8_t *pos_;
+    const uint8_t *initialPos_;
+    int32_t remainingMatchLength_;
+    int32_t initialRemainingMatchLength_;
+
+    CharString str_;
+    StringPiece sp_;
+    int32_t maxLength_;
+    int32_t value_;
+
+    // The stack stores pairs of integers for backtracking to another
+    // outbound edge of a branch node.
+    // The first integer is an offset from ByteTrie.bytes.
+    // The second integer has the str.length() from before the node in bits 15..0,
+    // and the remaining branch length in bits 24..16. (Bits 31..25 are unused.)
+    // (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24,
+    // but the code looks more confusing that way.)
+    UVector32 stack_;
+};
+
+U_NAMESPACE_END
+
+#endif  // __BYTETRIEITERATOR_H__
--- a/icu4c/source/tools/toolutil/denseranges.cpp
+++ b/icu4c/source/tools/toolutil/denseranges.cpp
@ -0,0 +1,158 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  denseranges.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010sep25
+*   created by: Markus W. Scherer
+*
+* Helper code for finding a small number of dense ranges.
+*/
+
+#include "unicode/utypes.h"
+#include "denseranges.h"
+
+// Definitions in the anonymous namespace are invisible outside this file.
+namespace {
+
+/**
+ * Collect up to 15 range gaps and sort them by ascending gap size.
+ */
+class LargestGaps {
+public:
+    LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {}
+
+    void add(int32_t gapStart, int64_t gapLength) {
+        int32_t i=length;
+        while(i>0 && gapLength>gapLengths[i-1]) {
+            --i;
+        }
+        if(i<maxLength) {
+            // The new gap is now one of the maxLength largest.
+            // Insert the new gap, moving up smaller ones of the previous
+            // length largest.
+            int32_t j= length<maxLength ? length++ : maxLength-1;
+            while(j>i) {
+                gapStarts[j]=gapStarts[j-1];
+                gapLengths[j]=gapLengths[j-1];
+                --j;
+            }
+            gapStarts[i]=gapStart;
+            gapLengths[i]=gapLength;
+        }
+    }
+
+    void truncate(int32_t newLength) {
+        if(newLength<length) {
+            length=newLength;
+        }
+    }
+
+    int32_t count() const { return length; }
+    int32_t gapStart(int32_t i) const { return gapStarts[i]; }
+    int64_t gapLength(int32_t i) const { return gapLengths[i]; }
+
+    int32_t firstAfter(int32_t value) const {
+        if(length==0) {
+            return -1;
+        }
+        int32_t minValue=0;
+        int32_t minIndex=-1;
+        for(int32_t i=0; i<length; ++i) {
+            if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) {
+                minValue=gapStarts[i];
+                minIndex=i;
+            }
+        }
+        return minIndex;
+    }
+
+private:
+    static const int32_t kCapacity=15;
+
+    int32_t maxLength;
+    int32_t length;
+    int32_t gapStarts[kCapacity];
+    int64_t gapLengths[kCapacity];
+};
+
+}  // namespace
+
+/**
+ * Does it make sense to write 1..capacity ranges?
+ * Returns 0 if not, otherwise the number of ranges.
+ * @param values Sorted array of signed-integer values.
+ * @param length Number of values.
+ * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.)
+ *                Should be 0x80..0x100, must be 1..0x100.
+ * @param ranges Output ranges array.
+ * @param capacity Maximum number of ranges.
+ * @return Minimum number of ranges (at most capacity) that have the desired density,
+ *         or 0 if that density cannot be achieved.
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_makeDenseRanges(const int32_t values[], int32_t length,
+                     int32_t density,
+                     int32_t ranges[][2], int32_t capacity) {
+    if(length<=2) {
+        return 0;
+    }
+    int32_t minValue=values[0];
+    int32_t maxValue=values[length-1];  // Assume minValue<=maxValue.
+    // Use int64_t variables for intermediate-value precision and to avoid
+    // signed-int32_t overflow of maxValue-minValue.
+    int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1;
+    if(length>=(density*maxLength)/0x100) {
+        // Use one range.
+        ranges[0][0]=minValue;
+        ranges[0][1]=maxValue;
+        return 1;
+    }
+    if(length<=4) {
+        return 0;
+    }
+    // See if we can split [minValue, maxValue] into 2..capacity ranges,
+    // divided by the 1..(capacity-1) largest gaps.
+    LargestGaps gaps(capacity-1);
+    int32_t i;
+    int32_t expectedValue=minValue;
+    for(i=1; i<length; ++i) {
+        ++expectedValue;
+        int32_t actualValue=values[i];
+        if(expectedValue!=actualValue) {
+            gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue);
+            expectedValue=actualValue;
+        }
+    }
+    // We know gaps.count()>=1 because we have fewer values (length) than
+    // the length of the [minValue..maxValue] range (maxLength).
+    // (Otherwise we would have returned with the one range above.)
+    int32_t num;
+    for(i=0, num=2;; ++i, ++num) {
+        if(i>=gaps.count()) {
+            // The values are too sparse for capacity or fewer ranges
+            // of the requested density.
+            return 0;
+        }
+        maxLength-=gaps.gapLength(i);
+        if(length>num*2 && length>=(density*maxLength)/0x100) {
+            break;
+        }
+    }
+    // Use the num ranges with the num-1 largest gaps.
+    gaps.truncate(num-1);
+    ranges[0][0]=minValue;
+    for(i=0; i<=num-2; ++i) {
+        int32_t gapIndex=gaps.firstAfter(minValue);
+        int32_t gapStart=gaps.gapStart(gapIndex);
+        ranges[i][1]=gapStart-1;
+        ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex));
+    }
+    ranges[num-1][1]=maxValue;
+    return num;
+}
--- a/icu4c/source/tools/toolutil/denseranges.h
+++ b/icu4c/source/tools/toolutil/denseranges.h
@ -0,0 +1,39 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  denseranges.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010sep25
+*   created by: Markus W. Scherer
+*
+* Helper code for finding a small number of dense ranges.
+*/
+
+#ifndef __DENSERANGES_H__
+#define __DENSERANGES_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * Does it make sense to write 1..capacity ranges?
+ * Returns 0 if not, otherwise the number of ranges.
+ * @param values Sorted array of signed-integer values.
+ * @param length Number of values.
+ * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.)
+ *                Should be 0x80..0x100, must be 1..0x100.
+ * @param ranges Output ranges array.
+ * @param capacity Maximum number of ranges.
+ * @return Minimum number of ranges (at most capacity) that have the desired density,
+ *         or 0 if that density cannot be achieved.
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_makeDenseRanges(const int32_t values[], int32_t length,
+                     int32_t density,
+                     int32_t ranges[][2], int32_t capacity);
+
+#endif  // __DENSERANGES_H__
--- a/icu4c/source/tools/toolutil/dicttriebuilder.cpp
+++ b/icu4c/source/tools/toolutil/dicttriebuilder.cpp
@ -0,0 +1,267 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  dicttriebuilder.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010dec24
+*   created by: Markus W. Scherer
+*
+* Base class for dictionary-trie builder classes.
+*/
+
+#include <typeinfo>  // for 'typeid' to work
+#include "unicode/utypes.h"
+#include "dicttriebuilder.h"
+#include "uassert.h"
+#include "uhash.h"
+
+U_CDECL_BEGIN
+
+static int32_t U_CALLCONV
+hashDictTrieNode(const UHashTok key) {
+    return U_NAMESPACE_QUALIFIER DictTrieBuilder::hashNode(key.pointer);
+}
+
+static UBool U_CALLCONV
+equalDictTrieNodes(const UHashTok key1, const UHashTok key2) {
+    return U_NAMESPACE_QUALIFIER DictTrieBuilder::equalNodes(key1.pointer, key2.pointer);
+}
+
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+DictTrieBuilder::DictTrieBuilder() : nodes(NULL) {}
+
+DictTrieBuilder::~DictTrieBuilder() {
+    deleteCompactBuilder();
+}
+
+void
+DictTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return;
+    }
+    nodes=uhash_openSize(hashDictTrieNode, equalDictTrieNodes, NULL,
+                         sizeGuess, &errorCode);
+    if(U_SUCCESS(errorCode) && nodes==NULL) {
+        errorCode=U_MEMORY_ALLOCATION_ERROR;
+    }
+    if(U_SUCCESS(errorCode)) {
+        uhash_setKeyDeleter(nodes, uhash_deleteUObject);
+    }
+}
+
+void
+DictTrieBuilder::deleteCompactBuilder() {
+    uhash_close(nodes);
+    nodes=NULL;
+}
+
+DictTrieBuilder::Node *
+DictTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        delete newNode;
+        return NULL;
+    }
+    if(newNode==NULL) {
+        errorCode=U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+    const UHashElement *old=uhash_find(nodes, newNode);
+    if(old!=NULL) {
+        delete newNode;
+        return (Node *)old->key.pointer;
+    }
+    // If uhash_puti() returns a non-zero value from an equivalent, previously
+    // registered node, then uhash_find() failed to find that and we will leak newNode.
+#if !U_RELEASE
+    int32_t oldValue=  // Only in debug mode to avoid a compiler warning about unused oldValue.
+#endif
+    uhash_puti(nodes, newNode, 1, &errorCode);
+    U_ASSERT(oldValue==0);
+    if(U_FAILURE(errorCode)) {
+        delete newNode;
+        return NULL;
+    }
+    return newNode;
+}
+
+DictTrieBuilder::Node *
+DictTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return NULL;
+    }
+    FinalValueNode key(value);
+    const UHashElement *old=uhash_find(nodes, &key);
+    if(old!=NULL) {
+        return (Node *)old->key.pointer;
+    }
+    Node *newNode=createFinalValueNode(value);
+    if(newNode==NULL) {
+        errorCode=U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+    // If uhash_puti() returns a non-zero value from an equivalent, previously
+    // registered node, then uhash_find() failed to find that and we will leak newNode.
+#if !U_RELEASE
+    int32_t oldValue=  // Only in debug mode to avoid a compiler warning about unused oldValue.
+#endif
+    uhash_puti(nodes, newNode, 1, &errorCode);
+    U_ASSERT(oldValue==0);
+    if(U_FAILURE(errorCode)) {
+        delete newNode;
+        return NULL;
+    }
+    return newNode;
+}
+
+UBool DictTrieBuilder::hashNode(const void *node) {
+    return ((const Node *)node)->hashCode();
+}
+
+UBool DictTrieBuilder::equalNodes(const void *left, const void *right) {
+    return *(const Node *)left==*(const Node *)right;
+}
+
+UBool DictTrieBuilder::Node::operator==(const Node &other) const {
+    return this==&other || (typeid(*this)==typeid(other) && hash==other.hash);
+}
+
+int32_t DictTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) {
+    if(offset==0) {
+        offset=edgeNumber;
+    }
+    return edgeNumber;
+}
+
+UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(DictTrieBuilder::Node)
+
+UBool DictTrieBuilder::FinalValueNode::operator==(const Node &other) const {
+    if(this==&other) {
+        return TRUE;
+    }
+    // Not:
+    //   if(!Node::operator==(other)) {
+    //       return FALSE;
+    //   }
+    // because registerFinalValue() compares a stack-allocated FinalValueNode
+    // (stack-allocated so that we don't unnecessarily create lots of duplicate nodes)
+    // with the specific builder's subclass of FinalValueNode,
+    // and !Node::operator==(other) will always be false for that because it
+    // compares the typeid's.
+    // This workaround assumes that the subclass does not add fields that need to be compared.
+    if(hash!=other.hashCode()) {
+        return FALSE;
+    }
+    const FinalValueNode *o=dynamic_cast<const FinalValueNode *>(&other);
+    return o!=NULL && value==o->value;
+}
+
+UBool DictTrieBuilder::ValueNode::operator==(const Node &other) const {
+    if(this==&other) {
+        return TRUE;
+    }
+    if(!Node::operator==(other)) {
+        return FALSE;
+    }
+    const ValueNode &o=(const ValueNode &)other;
+    return hasValue==o.hasValue && (!hasValue || value==o.value);
+}
+
+UBool DictTrieBuilder::LinearMatchNode::operator==(const Node &other) const {
+    if(this==&other) {
+        return TRUE;
+    }
+    if(!ValueNode::operator==(other)) {
+        return FALSE;
+    }
+    const LinearMatchNode &o=(const LinearMatchNode &)other;
+    return length==o.length && next==o.next;
+}
+
+int32_t DictTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) {
+    if(offset==0) {
+        offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
+    }
+    return edgeNumber;
+}
+
+UBool DictTrieBuilder::ListBranchNode::operator==(const Node &other) const {
+    if(this==&other) {
+        return TRUE;
+    }
+    if(!Node::operator==(other)) {
+        return FALSE;
+    }
+    const ListBranchNode &o=(const ListBranchNode &)other;
+    for(int32_t i=0; i<length; ++i) {
+        if(units[i]!=o.units[i] || values[i]!=o.values[i] || equal[i]!=o.equal[i]) {
+            return FALSE;
+        }
+    }
+    return TRUE;
+}
+
+int32_t DictTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
+    if(offset==0) {
+        firstEdgeNumber=edgeNumber;
+        int32_t step=0;
+        int32_t i=length;
+        do {
+            Node *edge=equal[--i];
+            if(edge!=NULL) {
+                edgeNumber=edge->markRightEdgesFirst(edgeNumber-step);
+            }
+            // For all but the rightmost edge, decrement the edge number.
+            step=1;
+        } while(i>0);
+        offset=edgeNumber;
+    }
+    return edgeNumber;
+}
+
+UBool DictTrieBuilder::SplitBranchNode::operator==(const Node &other) const {
+    if(this==&other) {
+        return TRUE;
+    }
+    if(!Node::operator==(other)) {
+        return FALSE;
+    }
+    const SplitBranchNode &o=(const SplitBranchNode &)other;
+    return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual;
+}
+
+int32_t DictTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) {
+    if(offset==0) {
+        firstEdgeNumber=edgeNumber;
+        edgeNumber=greaterOrEqual->markRightEdgesFirst(edgeNumber);
+        offset=edgeNumber=lessThan->markRightEdgesFirst(edgeNumber-1);
+    }
+    return edgeNumber;
+}
+
+UBool DictTrieBuilder::BranchHeadNode::operator==(const Node &other) const {
+    if(this==&other) {
+        return TRUE;
+    }
+    if(!ValueNode::operator==(other)) {
+        return FALSE;
+    }
+    const BranchHeadNode &o=(const BranchHeadNode &)other;
+    return length==o.length && next==o.next;
+}
+
+int32_t DictTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) {
+    if(offset==0) {
+        offset=edgeNumber=next->markRightEdgesFirst(edgeNumber);
+    }
+    return edgeNumber;
+}
+
+U_NAMESPACE_END
--- a/icu4c/source/tools/toolutil/dicttriebuilder.h
+++ b/icu4c/source/tools/toolutil/dicttriebuilder.h
@ -0,0 +1,251 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  dicttriebuilder.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010dec24
+*   created by: Markus W. Scherer
+*
+* Base class for dictionary-trie builder classes.
+*/
+
+#ifndef __DICTTRIEBUILDER_H__
+#define __DICTTRIEBUILDER_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "uhash.h"
+
+enum UDictTrieBuildOption {
+    UDICTTRIE_BUILD_FAST,
+    UDICTTRIE_BUILD_SMALL
+};
+
+U_NAMESPACE_BEGIN
+
+class U_TOOLUTIL_API DictTrieBuilder : public UMemory {
+public:
+    /** @internal */
+    static UBool hashNode(const void *node);
+    /** @internal */
+    static UBool equalNodes(const void *left, const void *right);
+
+protected:
+    DictTrieBuilder();
+    ~DictTrieBuilder();
+
+    class Node;
+
+    void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);
+    void deleteCompactBuilder();
+
+    /**
+     * Makes sure that there is only one unique node registered that is
+     * equivalent to newNode.
+     * @param newNode Input node. The builder takes ownership.
+     * @param errorCode ICU in/out UErrorCode.
+                        Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.
+     * @return newNode if it is the first of its kind, or
+     *         an equivalent node if newNode is a duplicate.
+     */
+    Node *registerNode(Node *newNode, UErrorCode &errorCode);
+    /**
+     * Makes sure that there is only one unique FinalValueNode registered
+     * with this value.
+     * Avoids creating a node if the value is a duplicate.
+     * @param value A final value.
+     * @param errorCode ICU in/out UErrorCode.
+                        Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.
+     * @return A FinalValueNode with the given value.
+     */
+    Node *registerFinalValue(int32_t value, UErrorCode &errorCode);
+
+    /*
+     * C++ note:
+     * registerNode() and registerFinalValue() take ownership of their input nodes,
+     * and only return owned nodes.
+     * If they see a failure UErrorCode, they will delete the input node.
+     * If they get a NULL pointer, they will record a U_MEMORY_ALLOCATION_ERROR.
+     * If there is a failure, they return NULL.
+     *
+     * NULL Node pointers can be safely passed into other Nodes because
+     * they call the static Node::hashCode() which checks for a NULL pointer first.
+     *
+     * Therefore, as long as builder functions register a new node,
+     * they need to check for failures only before explicitly dereferencing
+     * a Node pointer, or before setting a new UErrorCode.
+     */
+
+    virtual Node *createFinalValueNode(int32_t value) const = 0;
+
+    // Hash set of nodes, maps from nodes to integer 1.
+    UHashtable *nodes;
+
+    class Node : public UObject {
+    public:
+        Node(int32_t initialHash) : hash(initialHash), offset(0) {}
+        inline int32_t hashCode() const { return hash; }
+        // Handles node==NULL.
+        static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); }
+        // Base class operator==() compares the actual class types.
+        virtual UBool operator==(const Node &other) const;
+        inline UBool operator!=(const Node &other) const { return !operator==(other); }
+        /**
+         * Traverses the Node graph and numbers branch edges, with rightmost edges first.
+         * This is to avoid writing a duplicate node twice.
+         *
+         * Branch nodes in this trie data structure are not symmetric.
+         * Most branch edges "jump" to other nodes but the rightmost branch edges
+         * just continue without a jump.
+         * Therefore, write() must write the rightmost branch edge last
+         * (trie units are written backwards), and must write it at that point even if
+         * it is a duplicate of a node previously written elsewhere.
+         *
+         * This function visits and marks right branch edges first.
+         * Edges are numbered with increasingly negative values because we share the
+         * offset field which gets positive values when nodes are written.
+         * A branch edge also remembers the first number for any of its edges.
+         *
+         * When a further-left branch edge has a number in the range of the rightmost
+         * edge's numbers, then it will be written as part of the required right edge
+         * and we can avoid writing it first.
+         *
+         * After root.markRightEdgesFirst(-1) the offsets of all nodes are negative
+         * edge numbers.
+         *
+         * @param edgeNumber The first edge number for this node and its sub-nodes.
+         * @return An edge number that is at least the maximum-negative
+         *         of the input edge number and the numbers of this node and all of its sub-nodes.
+         */
+        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
+        // write() must set the offset to a positive value.
+        virtual void write(DictTrieBuilder &builder) = 0;
+        // See markRightEdgesFirst.
+        inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
+                                               DictTrieBuilder &builder) {
+            // Note: Edge numbers are negative, lastRight<=firstRight.
+            // If offset>0 then this node and its sub-nodes have been written already
+            // and we need not write them again.
+            // If this node is part of the unwritten right branch edge,
+            // then we wait until that is written.
+            if(offset<0 && (offset<lastRight || firstRight<offset)) {
+                write(builder);
+            }
+        }
+        inline int32_t getOffset() const { return offset; }
+    protected:
+        int32_t hash;
+        int32_t offset;
+    private:
+        // No ICU "poor man's RTTI" for this class nor its subclasses.
+        virtual UClassID getDynamicClassID() const;
+    };
+
+    class FinalValueNode : public Node {
+    public:
+        FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {}
+        virtual UBool operator==(const Node &other) const;
+        // Dummy default implementation, must be overridden for real writing.
+        virtual void write(DictTrieBuilder & /*builder*/) {}
+    protected:
+        int32_t value;
+    };
+
+    class ValueNode : public Node {
+    public:
+        ValueNode(int32_t initialHash) : Node(initialHash), hasValue(FALSE), value(0) {}
+        virtual UBool operator==(const Node &other) const;
+        void setValue(int32_t v) {
+            hasValue=TRUE;
+            value=v;
+            hash=hash*37+v;
+        }
+    protected:
+        UBool hasValue;
+        int32_t value;
+    };
+
+    class LinearMatchNode : public ValueNode {
+    public:
+        LinearMatchNode(int32_t len, Node *nextNode)
+                : ValueNode((0x333333*37+len)*37+hashCode(nextNode)),
+                  length(len), next(nextNode) {}
+        virtual UBool operator==(const Node &other) const;
+        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
+    protected:
+        int32_t length;
+        Node *next;
+    };
+
+    class BranchNode : public Node {
+    public:
+        BranchNode(int32_t initialHash) : Node(initialHash) {}
+    protected:
+        int32_t firstEdgeNumber;
+    };
+
+    class ListBranchNode : public BranchNode {
+    public:
+        ListBranchNode() : BranchNode(0x444444), length(0) {}
+        virtual UBool operator==(const Node &other) const;
+        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
+        // Adds a unit with a final value.
+        void add(int32_t c, int32_t value) {
+            units[length]=(UChar)c;
+            equal[length]=NULL;
+            values[length]=value;
+            ++length;
+            hash=(hash*37+c)*37+value;
+        }
+        // Adds a unit which leads to another match node.
+        void add(int32_t c, Node *node) {
+            units[length]=(UChar)c;
+            equal[length]=node;
+            values[length]=0;
+            ++length;
+            hash=(hash*37+c)*37+hashCode(node);
+        }
+    protected:
+        // TODO: 10 -> max(BT/UCT max list lengths)
+        Node *equal[10];  // NULL means "has final value".
+        int32_t length;
+        int32_t values[10];
+        UChar units[10];
+    };
+
+    class SplitBranchNode : public BranchNode {
+    public:
+        SplitBranchNode(UChar middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
+                : BranchNode(((0x555555*37+middleUnit)*37+
+                              hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)),
+                  unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
+        virtual UBool operator==(const Node &other) const;
+        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
+    protected:
+        UChar unit;
+        Node *lessThan;
+        Node *greaterOrEqual;
+    };
+
+    // Branch head node, for writing the actual node lead unit.
+    class BranchHeadNode : public ValueNode {
+    public:
+        BranchHeadNode(int32_t len, Node *subNode)
+                : ValueNode((0x666666*37+len)*37+hashCode(subNode)),
+                  length(len), next(subNode) {}
+        virtual UBool operator==(const Node &other) const;
+        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
+    protected:
+        int32_t length;
+        Node *next;  // A branch sub-node.
+    };
+};
+
+U_NAMESPACE_END
+
+#endif  // __DICTTRIEBUILDER_H__
--- a/icu4c/source/tools/toolutil/swapimpl.cpp
+++ b/icu4c/source/tools/toolutil/swapimpl.cpp
@ -68,9 +68,94 @@

 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))

+/* Unicode property (value) aliases data swapping --------------------------- */
+
+static int32_t
+upname_swap(const UDataSwapper *ds,
+            const void *inData, int32_t length, void *outData,
+            UErrorCode *pErrorCode) {
+    /* udata_swapDataHeader checks the arguments */
+    int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+
+    /* check data format and format version */
+    const UDataInfo *pInfo=
+        reinterpret_cast<const UDataInfo *>(
+            reinterpret_cast<const char *>(inData)+4);
+    if(!(
+        pInfo->dataFormat[0]==0x70 &&   /* dataFormat="pnam" */
+        pInfo->dataFormat[1]==0x6e &&
+        pInfo->dataFormat[2]==0x61 &&
+        pInfo->dataFormat[3]==0x6d &&
+        pInfo->formatVersion[0]==2
+    )) {
+        udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n",
+                         pInfo->dataFormat[0], pInfo->dataFormat[1],
+                         pInfo->dataFormat[2], pInfo->dataFormat[3],
+                         pInfo->formatVersion[0]);
+        *pErrorCode=U_UNSUPPORTED_ERROR;
+        return 0;
+    }
+
+    const uint8_t *inBytes=reinterpret_cast<const uint8_t *>(inData)+headerSize;
+    uint8_t *outBytes=reinterpret_cast<uint8_t *>(outData)+headerSize;
+
+    if(length>=0) {
+        length-=headerSize;
+        // formatVersion 2 initially has indexes[8], 32 bytes.
+        if(length<32) {
+            udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n",
+                             (int)length);
+            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
+        }
+    }
+
+    const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes);
+    int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]);
+    if(length>=0) {
+        if(length<totalSize) {
+            udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) "
+                             "for pnames.icu\n",
+                             (int)length, (int)totalSize);
+            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
+        }
+
+        int32_t numBytesIndexesAndValueMaps=
+            udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]);
+
+        // Swap the indexes[] and the valueMaps[].
+        ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode);
+
+        // Copy the rest of the data.
+        if(inBytes!=outBytes) {
+            uprv_memcpy(outBytes+numBytesIndexesAndValueMaps,
+                        inBytes+numBytesIndexesAndValueMaps,
+                        totalSize-numBytesIndexesAndValueMaps);
+        }
+
+        // We need not swap anything else:
+        //
+        // The ByteTries are already byte-serialized, and are fixed on ASCII.
+        // (On an EBCDIC machine, the input string is converted to lowercase ASCII
+        // while matching.)
+        //
+        // The name groups are mostly invariant characters, but since we only
+        // generate, and keep in subversion, ASCII versions of pnames.icu,
+        // and since only ICU4J uses the pnames.icu data file
+        // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files,
+        // we just copy those bytes too.
+    }
+
+    return headerSize+totalSize;
+}
+
 /* Unicode properties data swapping ----------------------------------------- */

-U_CAPI int32_t U_EXPORT2
+static int32_t
 uprops_swap(const UDataSwapper *ds,
            const void *inData, int32_t length, void *outData,
            UErrorCode *pErrorCode) {
@ -218,7 +303,7 @@ uprops_swap(const UDataSwapper *ds,

 /* Unicode case mapping data swapping --------------------------------------- */

-U_CAPI int32_t U_EXPORT2
+static int32_t
 ucase_swap(const UDataSwapper *ds,
           const void *inData, int32_t length, void *outData,
           UErrorCode *pErrorCode) {
@ -320,7 +405,7 @@ ucase_swap(const UDataSwapper *ds,

 /* Unicode bidi/shaping data swapping --------------------------------------- */

-U_CAPI int32_t U_EXPORT2
+static int32_t
 ubidi_swap(const UDataSwapper *ds,
           const void *inData, int32_t length, void *outData,
           UErrorCode *pErrorCode) {
@ -428,7 +513,7 @@ ubidi_swap(const UDataSwapper *ds,

 #if !UCONFIG_NO_NORMALIZATION

-U_CAPI int32_t U_EXPORT2
+static int32_t
 unorm_swap(const UDataSwapper *ds,
           const void *inData, int32_t length, void *outData,
           UErrorCode *pErrorCode) {
@ -552,7 +637,7 @@ unorm_swap(const UDataSwapper *ds,
 #endif

 /* Swap 'Test' data from gentest */
-U_CAPI int32_t U_EXPORT2
+static int32_t
 test_swap(const UDataSwapper *ds,
           const void *inData, int32_t length, void *outData,
           UErrorCode *pErrorCode) {
--- a/icu4c/source/tools/toolutil/toolutil.vcxproj
+++ b/icu4c/source/tools/toolutil/toolutil.vcxproj
@ -246,6 +246,10 @@
    </Link>
  </ItemDefinitionGroup>
  <ItemGroup>
+    <ClCompile Include="bytetriebuilder.cpp" />
+    <ClCompile Include="bytetrieiterator.cpp" />
+    <ClCompile Include="denseranges.cpp" />
+    <ClCompile Include="dicttriebuilder.cpp" />
    <ClCompile Include="filestrm.c" />
    <ClCompile Include="filetools.cpp" />
    <ClCompile Include="flagparser.c" />
@ -272,6 +276,9 @@
      <DisableLanguageExtensions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</DisableLanguageExtensions>
    </ClCompile>
    <ClCompile Include="ucbuf.c" />
+    <ClCompile Include="uchartrie.cpp" />
+    <ClCompile Include="uchartriebuilder.cpp" />
+    <ClCompile Include="uchartrieiterator.cpp" />
    <ClCompile Include="ucm.c" />
    <ClCompile Include="ucmstate.c" />
    <ClCompile Include="unewdata.c" />
@ -289,6 +296,10 @@
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
+    <ClInclude Include="bytetriebuilder.h" />
+    <ClInclude Include="bytetrieiterator.h" />
+    <ClInclude Include="denseranges.h" />
+    <ClInclude Include="dicttriebuilder.h" />
    <ClInclude Include="filestrm.h" />
    <ClInclude Include="filetools.h" />
    <ClInclude Include="flagparser.h" />
@ -301,6 +312,9 @@
    <ClInclude Include="swapimpl.h" />
    <ClInclude Include="toolutil.h" />
    <ClInclude Include="ucbuf.h" />
+    <ClInclude Include="uchartrie.h" />
+    <ClInclude Include="uchartriebuilder.h" />
+    <ClInclude Include="uchartrieiterator.h" />
    <ClInclude Include="ucm.h" />
    <ClInclude Include="unewdata.h" />
    <ClInclude Include="uoptions.h" />
@ -323,4 +337,4 @@
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
-</Project>
+</Project>
--- a/icu4c/source/tools/toolutil/uchartrie.cpp
+++ b/icu4c/source/tools/toolutil/uchartrie.cpp
@ -0,0 +1,414 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  uchartrie.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010nov14
+*   created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "uassert.h"
+#include "uchartrie.h"
+
+U_NAMESPACE_BEGIN
+
+Appendable &
+Appendable::appendCodePoint(UChar32 c) {
+    if(c<=0xffff) {
+        return append((UChar)c);
+    } else {
+        return append(U16_LEAD(c)).append(U16_TRAIL(c));
+    }
+}
+
+Appendable &
+Appendable::append(const UChar *s, int32_t length) {
+    if(s!=NULL && length!=0) {
+        if(length<0) {
+            UChar c;
+            while((c=*s++)!=0) {
+                append(c);
+            }
+        } else {
+            const UChar *limit=s+length;
+            while(s<limit) {
+                append(*s++);
+            }
+        }
+    }
+    return *this;
+}
+
+UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Appendable)
+
+UDictTrieResult
+UCharTrie::current() const {
+    const UChar *pos=pos_;
+    if(pos==NULL) {
+        return UDICTTRIE_NO_MATCH;
+    } else {
+        int32_t node;
+        return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
+                valueResult(node) : UDICTTRIE_NO_VALUE;
+    }
+}
+
+UDictTrieResult
+UCharTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) {
+    // Branch according to the current unit.
+    if(length==0) {
+        length=*pos++;
+    }
+    ++length;
+    // The length of the branch is the number of units to select from.
+    // The data structure encodes a binary search.
+    while(length>kMaxBranchLinearSubNodeLength) {
+        if(uchar<*pos++) {
+            length>>=1;
+            pos=jumpByDelta(pos);
+        } else {
+            length=length-(length>>1);
+            pos=skipDelta(pos);
+        }
+    }
+    // Drop down to linear search for the last few units.
+    // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
+    // and divides length by 2.
+    do {
+        if(uchar==*pos++) {
+            UDictTrieResult result;
+            int32_t node=*pos;
+            if(node&kValueIsFinal) {
+                // Leave the final value for getValue() to read.
+                result=UDICTTRIE_HAS_FINAL_VALUE;
+            } else {
+                // Use the non-final value as the jump delta.
+                ++pos;
+                // int32_t delta=readValue(pos, node>>1);
+                int32_t delta;
+                if(node<kMinTwoUnitValueLead) {
+                    delta=node;
+                } else if(node<kThreeUnitValueLead) {
+                    delta=((node-kMinTwoUnitValueLead)<<16)|*pos++;
+                } else {
+                    delta=(pos[0]<<16)|pos[1];
+                    pos+=2;
+                }
+                // end readValue()
+                pos+=delta;
+                node=*pos;
+                result= node>=kMinValueLead ? valueResult(node) : UDICTTRIE_NO_VALUE;
+            }
+            pos_=pos;
+            return result;
+        }
+        --length;
+        pos=skipValue(pos);
+    } while(length>1);
+    if(uchar==*pos++) {
+        pos_=pos;
+        int32_t node=*pos;
+        return node>=kMinValueLead ? valueResult(node) : UDICTTRIE_NO_VALUE;
+    } else {
+        stop();
+        return UDICTTRIE_NO_MATCH;
+    }
+}
+
+UDictTrieResult
+UCharTrie::nextImpl(const UChar *pos, int32_t uchar) {
+    int32_t node=*pos++;
+    for(;;) {
+        if(node<kMinLinearMatch) {
+            return branchNext(pos, node, uchar);
+        } else if(node<kMinValueLead) {
+            // Match the first of length+1 units.
+            int32_t length=node-kMinLinearMatch;  // Actual match length minus 1.
+            if(uchar==*pos++) {
+                remainingMatchLength_=--length;
+                pos_=pos;
+                return (length<0 && (node=*pos)>=kMinValueLead) ?
+                        valueResult(node) : UDICTTRIE_NO_VALUE;
+            } else {
+                // No match.
+                break;
+            }
+        } else if(node&kValueIsFinal) {
+            // No further matching units.
+            break;
+        } else {
+            // Skip intermediate value.
+            pos=skipNodeValue(pos, node);
+            node&=kNodeTypeMask;
+        }
+    }
+    stop();
+    return UDICTTRIE_NO_MATCH;
+}
+
+UDictTrieResult
+UCharTrie::next(int32_t uchar) {
+    const UChar *pos=pos_;
+    if(pos==NULL) {
+        return UDICTTRIE_NO_MATCH;
+    }
+    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
+    if(length>=0) {
+        // Remaining part of a linear-match node.
+        if(uchar==*pos++) {
+            remainingMatchLength_=--length;
+            pos_=pos;
+            int32_t node;
+            return (length<0 && (node=*pos)>=kMinValueLead) ?
+                    valueResult(node) : UDICTTRIE_NO_VALUE;
+        } else {
+            stop();
+            return UDICTTRIE_NO_MATCH;
+        }
+    }
+    return nextImpl(pos, uchar);
+}
+
+UDictTrieResult
+UCharTrie::next(const UChar *s, int32_t sLength) {
+    if(sLength<0 ? *s==0 : sLength==0) {
+        // Empty input.
+        return current();
+    }
+    const UChar *pos=pos_;
+    if(pos==NULL) {
+        return UDICTTRIE_NO_MATCH;
+    }
+    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
+    for(;;) {
+        // Fetch the next input unit, if there is one.
+        // Continue a linear-match node without rechecking sLength<0.
+        int32_t uchar;
+        if(sLength<0) {
+            for(;;) {
+                if((uchar=*s++)==0) {
+                    remainingMatchLength_=length;
+                    pos_=pos;
+                    int32_t node;
+                    return (length<0 && (node=*pos)>=kMinValueLead) ?
+                            valueResult(node) : UDICTTRIE_NO_VALUE;
+                }
+                if(length<0) {
+                    remainingMatchLength_=length;
+                    break;
+                }
+                if(uchar!=*pos) {
+                    stop();
+                    return UDICTTRIE_NO_MATCH;
+                }
+                ++pos;
+                --length;
+            }
+        } else {
+            for(;;) {
+                if(sLength==0) {
+                    remainingMatchLength_=length;
+                    pos_=pos;
+                    int32_t node;
+                    return (length<0 && (node=*pos)>=kMinValueLead) ?
+                            valueResult(node) : UDICTTRIE_NO_VALUE;
+                }
+                uchar=*s++;
+                --sLength;
+                if(length<0) {
+                    remainingMatchLength_=length;
+                    break;
+                }
+                if(uchar!=*pos) {
+                    stop();
+                    return UDICTTRIE_NO_MATCH;
+                }
+                ++pos;
+                --length;
+            }
+        }
+        int32_t node=*pos++;
+        for(;;) {
+            if(node<kMinLinearMatch) {
+                UDictTrieResult result=branchNext(pos, node, uchar);
+                if(result==UDICTTRIE_NO_MATCH) {
+                    return UDICTTRIE_NO_MATCH;
+                }
+                // Fetch the next input unit, if there is one.
+                if(sLength<0) {
+                    if((uchar=*s++)==0) {
+                        return result;
+                    }
+                } else {
+                    if(sLength==0) {
+                        return result;
+                    }
+                    uchar=*s++;
+                    --sLength;
+                }
+                if(result==UDICTTRIE_HAS_FINAL_VALUE) {
+                    // No further matching units.
+                    stop();
+                    return UDICTTRIE_NO_MATCH;
+                }
+                pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
+                node=*pos++;
+            } else if(node<kMinValueLead) {
+                // Match length+1 units.
+                length=node-kMinLinearMatch;  // Actual match length minus 1.
+                if(uchar!=*pos) {
+                    stop();
+                    return UDICTTRIE_NO_MATCH;
+                }
+                ++pos;
+                --length;
+                break;
+            } else if(node&kValueIsFinal) {
+                // No further matching units.
+                stop();
+                return UDICTTRIE_NO_MATCH;
+            } else {
+                // Skip intermediate value.
+                pos=skipNodeValue(pos, node);
+                node&=kNodeTypeMask;
+            }
+        }
+    }
+}
+
+const UChar *
+UCharTrie::findUniqueValueFromBranch(const UChar *pos, int32_t length,
+                                    UBool haveUniqueValue, int32_t &uniqueValue) {
+    while(length>kMaxBranchLinearSubNodeLength) {
+        ++pos;  // ignore the comparison byte
+        if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
+            return NULL;
+        }
+        length=length-(length>>1);
+        pos=skipDelta(pos);
+    }
+    do {
+        ++pos;  // ignore a comparison unit
+        // handle its value
+        int32_t node=*pos++;
+        UBool isFinal=(UBool)(node>>15);
+        node&=0x7fff;
+        int32_t value=readValue(pos, node);
+        pos=skipValue(pos, node);
+        if(isFinal) {
+            if(haveUniqueValue) {
+                if(value!=uniqueValue) {
+                    return NULL;
+                }
+            } else {
+                uniqueValue=value;
+                haveUniqueValue=TRUE;
+            }
+        } else {
+            if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
+                return NULL;
+            }
+            haveUniqueValue=TRUE;
+        }
+    } while(--length>1);
+    return pos+1;  // ignore the last comparison unit
+}
+
+UBool
+UCharTrie::findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
+    int32_t node=*pos++;
+    for(;;) {
+        if(node<kMinLinearMatch) {
+            if(node==0) {
+                node=*pos++;
+            }
+            pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
+            if(pos==NULL) {
+                return FALSE;
+            }
+            haveUniqueValue=TRUE;
+            node=*pos++;
+        } else if(node<kMinValueLead) {
+            // linear-match node
+            pos+=node-kMinLinearMatch+1;  // Ignore the match units.
+            node=*pos++;
+        } else {
+            UBool isFinal=(UBool)(node>>15);
+            int32_t value;
+            if(isFinal) {
+                value=readValue(pos, node&0x7fff);
+            } else {
+                value=readNodeValue(pos, node);
+            }
+            if(haveUniqueValue) {
+                if(value!=uniqueValue) {
+                    return FALSE;
+                }
+            } else {
+                uniqueValue=value;
+                haveUniqueValue=TRUE;
+            }
+            if(isFinal) {
+                return TRUE;
+            }
+            pos=skipNodeValue(pos, node);
+            node&=kNodeTypeMask;
+        }
+    }
+}
+
+int32_t
+UCharTrie::getNextUChars(Appendable &out) const {
+    const UChar *pos=pos_;
+    if(pos==NULL) {
+        return 0;
+    }
+    if(remainingMatchLength_>=0) {
+        out.append(*pos);  // Next unit of a pending linear-match node.
+        return 1;
+    }
+    int32_t node=*pos++;
+    if(node>=kMinValueLead) {
+        if(node&kValueIsFinal) {
+            return 0;
+        } else {
+            pos=skipNodeValue(pos, node);
+            node&=kNodeTypeMask;
+        }
+    }
+    if(node<kMinLinearMatch) {
+        if(node==0) {
+            node=*pos++;
+        }
+        getNextBranchUChars(pos, ++node, out);
+        return node;
+    } else {
+        // First unit of the linear-match node.
+        out.append(*pos);
+        return 1;
+    }
+}
+
+void
+UCharTrie::getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out) {
+    while(length>kMaxBranchLinearSubNodeLength) {
+        ++pos;  // ignore the comparison unit
+        getNextBranchUChars(jumpByDelta(pos), length>>1, out);
+        length=length-(length>>1);
+        pos=skipDelta(pos);
+    }
+    do {
+        out.append(*pos++);
+        pos=skipValue(pos);
+    } while(--length>1);
+    out.append(*pos);
+}
+
+U_NAMESPACE_END
--- a/icu4c/source/tools/toolutil/uchartrie.h
+++ b/icu4c/source/tools/toolutil/uchartrie.h
@ -0,0 +1,433 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  uchartrie.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010nov14
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __UCHARTRIE_H__
+#define __UCHARTRIE_H__
+
+/**
+ * \file
+ * \brief C++ API: Dictionary trie for mapping Unicode strings (or 16-bit-unit sequences)
+ *                 to integer values.
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "uassert.h"
+#include "udicttrie.h"
+
+U_NAMESPACE_BEGIN
+
+class UCharTrieBuilder;
+class UCharTrieIterator;
+
+/**
+ * Base class for objects to which Unicode characters and strings can be appended.
+ * Combines elements of Java Appendable and ICU4C ByteSink.
+ * TODO: Should live in separate files, could be public API.
+ */
+class U_TOOLUTIL_API Appendable : public UObject {
+public:
+    /**
+     * Appends a 16-bit code unit.
+     * @param c code unit
+     * @return *this
+     */
+    virtual Appendable &append(UChar c) = 0;
+    /**
+     * Appends a code point; has a default implementation.
+     * @param c code point
+     * @return *this
+     */
+    virtual Appendable &appendCodePoint(UChar32 c);
+    /**
+     * Appends a string; has a default implementation.
+     * @param s string
+     * @param length string length, or -1 if NUL-terminated
+     * @return *this
+     */
+    virtual Appendable &append(const UChar *s, int32_t length);
+
+    // TODO: getAppendBuffer(), see ByteSink
+    // TODO: flush() (?) see ByteSink
+
+private:
+    // No ICU "poor man's RTTI" for this class nor its subclasses.
+    virtual UClassID getDynamicClassID() const;
+};
+
+/**
+ * Light-weight, non-const reader class for a UCharTrie.
+ * Traverses a UChar-serialized data structure with minimal state,
+ * for mapping strings (16-bit-unit sequences) to non-negative integer values.
+ */
+class U_TOOLUTIL_API UCharTrie : public UMemory {
+public:
+    UCharTrie(const UChar *trieUChars)
+            : uchars_(trieUChars),
+              pos_(uchars_), remainingMatchLength_(-1) {}
+
+    /**
+     * Resets this trie to its initial state.
+     */
+    UCharTrie &reset() {
+        pos_=uchars_;
+        remainingMatchLength_=-1;
+        return *this;
+    }
+
+    /**
+     * UCharTrie state object, for saving a trie's current state
+     * and resetting the trie back to this state later.
+     */
+    class State : public UMemory {
+    public:
+        State() { uchars=NULL; }
+    private:
+        friend class UCharTrie;
+
+        const UChar *uchars;
+        const UChar *pos;
+        int32_t remainingMatchLength;
+    };
+
+    /**
+     * Saves the state of this trie.
+     * @see resetToState
+     */
+    const UCharTrie &saveState(State &state) const {
+        state.uchars=uchars_;
+        state.pos=pos_;
+        state.remainingMatchLength=remainingMatchLength_;
+        return *this;
+    }
+
+    /**
+     * Resets this trie to the saved state.
+     * If the state object contains no state, or the state of a different trie,
+     * then this trie remains unchanged.
+     * @see saveState
+     * @see reset
+     */
+    UCharTrie &resetToState(const State &state) {
+        if(uchars_==state.uchars && uchars_!=NULL) {
+            pos_=state.pos;
+            remainingMatchLength_=state.remainingMatchLength;
+        }
+        return *this;
+    }
+
+    /**
+     * Determines whether the string so far matches, whether it has a value,
+     * and whether another input UChar can continue a matching string.
+     * @return The match/value Result.
+     */
+    UDictTrieResult current() const;
+
+    /**
+     * Traverses the trie from the initial state for this input UChar.
+     * Equivalent to reset().next(uchar).
+     * @return The match/value Result.
+     */
+    inline UDictTrieResult first(int32_t uchar) {
+        remainingMatchLength_=-1;
+        return nextImpl(uchars_, uchar);
+    }
+
+    /**
+     * Traverses the trie from the initial state for the
+     * one or two UTF-16 code units for this input code point.
+     * Equivalent to reset().nextForCodePoint(cp).
+     * @return The match/value Result.
+     */
+    inline UDictTrieResult firstForCodePoint(UChar32 cp) {
+        return cp<=0xffff ?
+            first(cp) :
+            (first(U16_LEAD(cp))!=UDICTTRIE_NO_MATCH ?
+                next(U16_TRAIL(cp)) :
+                UDICTTRIE_NO_MATCH);
+    }
+
+    /**
+     * Traverses the trie from the current state for this input UChar.
+     * @return The match/value Result.
+     */
+    UDictTrieResult next(int32_t uchar);
+
+    /**
+     * Traverses the trie from the current state for the
+     * one or two UTF-16 code units for this input code point.
+     * @return The match/value Result.
+     */
+    inline UDictTrieResult nextForCodePoint(UChar32 cp) {
+        return cp<=0xffff ?
+            next(cp) :
+            (next(U16_LEAD(cp))!=UDICTTRIE_NO_MATCH ?
+                next(U16_TRAIL(cp)) :
+                UDICTTRIE_NO_MATCH);
+    }
+
+    /**
+     * Traverses the trie from the current state for this string.
+     * Equivalent to
+     * \code
+     * Result result=current();
+     * for(each c in s)
+     *   if((result=next(c))==UDICTTRIE_NO_MATCH) return UDICTTRIE_NO_MATCH;
+     * return result;
+     * \endcode
+     * @return The match/value Result.
+     */
+    UDictTrieResult next(const UChar *s, int32_t length);
+
+    /**
+     * Returns a matching string's value if called immediately after
+     * current()/first()/next() returned UDICTTRIE_HAS_VALUE or UDICTTRIE_HAS_FINAL_VALUE.
+     * getValue() can be called multiple times.
+     *
+     * Do not call getValue() after UDICTTRIE_NO_MATCH or UDICTTRIE_NO_VALUE!
+     */
+    inline int32_t getValue() const {
+        const UChar *pos=pos_;
+        int32_t leadUnit=*pos++;
+        U_ASSERT(leadUnit>=kMinValueLead);
+        return leadUnit&kValueIsFinal ?
+            readValue(pos, leadUnit&0x7fff) : readNodeValue(pos, leadUnit);
+    }
+
+    /**
+     * Determines whether all strings reachable from the current state
+     * map to the same value.
+     * @param uniqueValue Receives the unique value, if this function returns TRUE.
+     *                    (output-only)
+     * @return TRUE if all strings reachable from the current state
+     *         map to the same value.
+     */
+    inline UBool hasUniqueValue(int32_t &uniqueValue) const {
+        const UChar *pos=pos_;
+        // Skip the rest of a pending linear-match node.
+        return pos!=NULL && findUniqueValue(pos+remainingMatchLength_+1, FALSE, uniqueValue);
+    }
+
+    /**
+     * Finds each UChar which continues the string from the current state.
+     * That is, each UChar c for which it would be next(c)!=UDICTTRIE_NO_MATCH now.
+     * @param out Each next UChar is appended to this object.
+     *            (Only uses the out.append(c) method.)
+     * @return the number of UChars which continue the string from here
+     */
+    int32_t getNextUChars(Appendable &out) const;
+
+private:
+    friend class UCharTrieBuilder;
+    friend class UCharTrieIterator;
+
+    inline void stop() {
+        pos_=NULL;
+    }
+
+    // Reads a compact 32-bit integer.
+    // pos is already after the leadUnit, and the lead unit has bit 15 reset.
+    static inline int32_t readValue(const UChar *pos, int32_t leadUnit) {
+        int32_t value;
+        if(leadUnit<kMinTwoUnitValueLead) {
+            value=leadUnit;
+        } else if(leadUnit<kThreeUnitValueLead) {
+            value=((leadUnit-kMinTwoUnitValueLead)<<16)|*pos;
+        } else {
+            value=(pos[0]<<16)|pos[1];
+        }
+        return value;
+    }
+    static inline const UChar *skipValue(const UChar *pos, int32_t leadUnit) {
+        if(leadUnit>=kMinTwoUnitValueLead) {
+            if(leadUnit<kThreeUnitValueLead) {
+                ++pos;
+            } else {
+                pos+=2;
+            }
+        }
+        return pos;
+    }
+    static inline const UChar *skipValue(const UChar *pos) {
+        int32_t leadUnit=*pos++;
+        return skipValue(pos, leadUnit&0x7fff);
+    }
+
+    static inline int32_t readNodeValue(const UChar *pos, int32_t leadUnit) {
+        U_ASSERT(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal);
+        int32_t value;
+        if(leadUnit<kMinTwoUnitNodeValueLead) {
+            value=(leadUnit>>6)-1;
+        } else if(leadUnit<kThreeUnitNodeValueLead) {
+            value=(((leadUnit&0x7fc0)-kMinTwoUnitNodeValueLead)<<10)|*pos;
+        } else {
+            value=(pos[0]<<16)|pos[1];
+        }
+        return value;
+    }
+    static inline const UChar *skipNodeValue(const UChar *pos, int32_t leadUnit) {
+        U_ASSERT(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal);
+        if(leadUnit>=kMinTwoUnitNodeValueLead) {
+            if(leadUnit<kThreeUnitNodeValueLead) {
+                ++pos;
+            } else {
+                pos+=2;
+            }
+        }
+        return pos;
+    }
+
+    static inline const UChar *jumpByDelta(const UChar *pos) {
+        int32_t delta=*pos++;
+        if(delta>=kMinTwoUnitDeltaLead) {
+            if(delta==kThreeUnitDeltaLead) {
+                delta=(pos[0]<<16)|pos[1];
+                pos+=2;
+            } else {
+                delta=((delta-kMinTwoUnitDeltaLead)<<16)|*pos++;
+            }
+        }
+        return pos+delta;
+    }
+
+    static const UChar *skipDelta(const UChar *pos) {
+        int32_t delta=*pos++;
+        if(delta>=kMinTwoUnitDeltaLead) {
+            if(delta==kThreeUnitDeltaLead) {
+                pos+=2;
+            } else {
+                ++pos;
+            }
+        }
+        return pos;
+    }
+
+    static inline UDictTrieResult valueResult(int32_t node) {
+        return (UDictTrieResult)(UDICTTRIE_HAS_VALUE-(node>>15));
+    }
+
+    // Handles a branch node for both next(uchar) and next(string).
+    UDictTrieResult branchNext(const UChar *pos, int32_t length, int32_t uchar);
+
+    // Requires remainingLength_<0.
+    UDictTrieResult nextImpl(const UChar *pos, int32_t uchar);
+
+    // Helper functions for hasUniqueValue().
+    // Recursively finds a unique value (or whether there is not a unique one)
+    // from a branch.
+    static const UChar *findUniqueValueFromBranch(const UChar *pos, int32_t length,
+                                                  UBool haveUniqueValue, int32_t &uniqueValue);
+    // Recursively finds a unique value (or whether there is not a unique one)
+    // starting from a position on a node lead unit.
+    static UBool findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue);
+
+    // Helper functions for getNextUChars().
+    // getNextUChars() when pos is on a branch node.
+    static void getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out);
+
+    // UCharTrie data structure
+    //
+    // The trie consists of a series of UChar-serialized nodes for incremental
+    // Unicode string/UChar sequence matching. (UChar=16-bit unsigned integer)
+    // The root node is at the beginning of the trie data.
+    //
+    // Types of nodes are distinguished by their node lead unit ranges.
+    // After each node, except a final-value node, another node follows to
+    // encode match values or continue matching further units.
+    //
+    // Node types:
+    //  - Final-value node: Stores a 32-bit integer in a compact, variable-length format.
+    //    The value is for the string/UChar sequence so far.
+    //  - Match node, optionally with an intermediate value in a different compact format.
+    //    The value, if present, is for the string/UChar sequence so far.
+    //
+    //  Aside from the value, which uses the node lead unit's high bits:
+    //
+    //  - Linear-match node: Matches a number of units.
+    //  - Branch node: Branches to other nodes according to the current input unit.
+    //    The node unit is the length of the branch (number of units to select from)
+    //    minus 1. It is followed by a sub-node:
+    //    - If the length is at most kMaxBranchLinearSubNodeLength, then
+    //      there are length-1 (key, value) pairs and then one more comparison unit.
+    //      If one of the key units matches, then the value is either a final value for
+    //      the string so far, or a "jump" delta to the next node.
+    //      If the last unit matches, then matching continues with the next node.
+    //      (Values have the same encoding as final-value nodes.)
+    //    - If the length is greater than kMaxBranchLinearSubNodeLength, then
+    //      there is one unit and one "jump" delta.
+    //      If the input unit is less than the sub-node unit, then "jump" by delta to
+    //      the next sub-node which will have a length of length/2.
+    //      (The delta has its own compact encoding.)
+    //      Otherwise, skip the "jump" delta to the next sub-node
+    //      which will have a length of length-length/2.
+
+    // Match-node lead unit values, after masking off intermediate-value bits:
+
+    // 0000..002f: Branch node. If node!=0 then the length is node+1, otherwise
+    // the length is one more than the next unit.
+
+    // For a branch sub-node with at most this many entries, we drop down
+    // to a linear search.
+    static const int32_t kMaxBranchLinearSubNodeLength=5;
+
+    // 0030..003f: Linear-match node, match 1..16 units and continue reading the next node.
+    static const int32_t kMinLinearMatch=0x30;
+    static const int32_t kMaxLinearMatchLength=0x10;
+
+    // Match-node lead unit bits 14..6 for the optional intermediate value.
+    // If these bits are 0, then there is no intermediate value.
+    // Otherwise, see the *NodeValue* constants below.
+    static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;  // 0x0040
+    static const int32_t kNodeTypeMask=kMinValueLead-1;  // 0x003f
+
+    // A final-value node has bit 15 set.
+    static const int32_t kValueIsFinal=0x8000;
+
+    // Compact value: After testing and masking off bit 15, use the following thresholds.
+    static const int32_t kMaxOneUnitValue=0x3fff;
+
+    static const int32_t kMinTwoUnitValueLead=kMaxOneUnitValue+1;  // 0x4000
+    static const int32_t kThreeUnitValueLead=0x7fff;
+
+    static const int32_t kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1;  // 0x3ffeffff
+
+    // Compact intermediate-value integer, lead unit shared with a branch or linear-match node.
+    static const int32_t kMaxOneUnitNodeValue=0xff;
+    static const int32_t kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6);  // 0x4040
+    static const int32_t kThreeUnitNodeValueLead=0x7fc0;
+
+    static const int32_t kMaxTwoUnitNodeValue=
+        ((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1;  // 0xfdffff
+
+    // Compact delta integers.
+    static const int32_t kMaxOneUnitDelta=0xfbff;
+    static const int32_t kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1;  // 0xfc00
+    static const int32_t kThreeUnitDeltaLead=0xffff;
+
+    static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1;  // 0x03feffff
+
+    // Fixed value referencing the UCharTrie words.
+    const UChar *uchars_;
+
+    // Iterator variables.
+
+    // Pointer to next trie unit to read. NULL if no more matches.
+    const UChar *pos_;
+    // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
+    int32_t remainingMatchLength_;
+};
+
+U_NAMESPACE_END
+
+#endif  // __UCHARTRIE_H__
--- a/icu4c/source/tools/toolutil/uchartriebuilder.cpp
+++ b/icu4c/source/tools/toolutil/uchartriebuilder.cpp
@ -0,0 +1,696 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  uchartriebuilder.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010nov14
+*   created by: Markus W. Scherer
+*
+* Builder class for UCharTrie dictionary trie.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/ustring.h"
+#include "cmemory.h"
+#include "uarrsort.h"
+#include "uchartrie.h"
+#include "uchartriebuilder.h"
+
+U_NAMESPACE_BEGIN
+
+/*
+ * Note: This builder implementation stores (string, value) pairs with full copies
+ * of the 16-bit-unit sequences, until the UCharTrie is built.
+ * It might(!) take less memory if we collected the data in a temporary, dynamic trie.
+ */
+
+class UCharTrieElement : public UMemory {
+public:
+    // Use compiler's default constructor, initializes nothing.
+
+    void setTo(const UnicodeString &s, int32_t val, UnicodeString &strings, UErrorCode &errorCode);
+
+    UnicodeString getString(const UnicodeString &strings) const {
+        int32_t length=strings[stringOffset];
+        return strings.tempSubString(stringOffset+1, length);
+    }
+    int32_t getStringLength(const UnicodeString &strings) const {
+        return strings[stringOffset];
+    }
+
+    UChar charAt(int32_t index, const UnicodeString &strings) const {
+        return strings[stringOffset+1+index];
+    }
+
+    int32_t getValue() const { return value; }
+
+    int32_t compareStringTo(const UCharTrieElement &o, const UnicodeString &strings) const;
+
+private:
+    // The first strings unit contains the string length.
+    // (Compared with a stringLength field here, this saves 2 bytes per string.)
+    int32_t stringOffset;
+    int32_t value;
+};
+
+void
+UCharTrieElement::setTo(const UnicodeString &s, int32_t val,
+                        UnicodeString &strings, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return;
+    }
+    int32_t length=s.length();
+    if(length>0xffff) {
+        // Too long: We store the length in 1 unit.
+        errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+        return;
+    }
+    stringOffset=strings.length();
+    strings.append((UChar)length);
+    value=val;
+    strings.append(s);
+}
+
+int32_t
+UCharTrieElement::compareStringTo(const UCharTrieElement &other, const UnicodeString &strings) const {
+    return getString(strings).compare(other.getString(strings));
+}
+
+UCharTrieBuilder::~UCharTrieBuilder() {
+    delete[] elements;
+    uprv_free(uchars);
+}
+
+UCharTrieBuilder &
+UCharTrieBuilder::add(const UnicodeString &s, int32_t value, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return *this;
+    }
+    if(ucharsLength>0) {
+        // Cannot add elements after building.
+        errorCode=U_NO_WRITE_PERMISSION;
+        return *this;
+    }
+    ucharsCapacity+=s.length()+1;  // Crude uchars preallocation estimate.
+    if(elementsLength==elementsCapacity) {
+        int32_t newCapacity;
+        if(elementsCapacity==0) {
+            newCapacity=1024;
+        } else {
+            newCapacity=4*elementsCapacity;
+        }
+        UCharTrieElement *newElements=new UCharTrieElement[newCapacity];
+        if(newElements==NULL) {
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+        }
+        if(elementsLength>0) {
+            uprv_memcpy(newElements, elements, elementsLength*sizeof(UCharTrieElement));
+        }
+        delete[] elements;
+        elements=newElements;
+        elementsCapacity=newCapacity;
+    }
+    elements[elementsLength++].setTo(s, value, strings, errorCode);
+    if(U_SUCCESS(errorCode) && strings.isBogus()) {
+        errorCode=U_MEMORY_ALLOCATION_ERROR;
+    }
+    return *this;
+}
+
+U_CDECL_BEGIN
+
+static int32_t U_CALLCONV
+compareElementStrings(const void *context, const void *left, const void *right) {
+    const UnicodeString *strings=reinterpret_cast<const UnicodeString *>(context);
+    const UCharTrieElement *leftElement=reinterpret_cast<const UCharTrieElement *>(left);
+    const UCharTrieElement *rightElement=reinterpret_cast<const UCharTrieElement *>(right);
+    return leftElement->compareStringTo(*rightElement, *strings);
+}
+
+U_CDECL_END
+
+UnicodeString &
+UCharTrieBuilder::build(UDictTrieBuildOption buildOption, UnicodeString &result, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return result;
+    }
+    if(ucharsLength>0) {
+        // Already built.
+        result.setTo(FALSE, uchars+(ucharsCapacity-ucharsLength), ucharsLength);
+        return result;
+    }
+    if(elementsLength==0) {
+        errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+        return result;
+    }
+    if(strings.isBogus()) {
+        errorCode=U_MEMORY_ALLOCATION_ERROR;
+        return result;
+    }
+    uprv_sortArray(elements, elementsLength, (int32_t)sizeof(UCharTrieElement),
+                   compareElementStrings, &strings,
+                   FALSE,  // need not be a stable sort
+                   &errorCode);
+    if(U_FAILURE(errorCode)) {
+        return result;
+    }
+    // Duplicate strings are not allowed.
+    UnicodeString prev=elements[0].getString(strings);
+    for(int32_t i=1; i<elementsLength; ++i) {
+        UnicodeString current=elements[i].getString(strings);
+        if(prev==current) {
+            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+            return result;
+        }
+        prev.fastCopyFrom(current);
+    }
+    // Create and UChar-serialize the trie for the elements.
+    if(ucharsCapacity<1024) {
+        ucharsCapacity=1024;
+    }
+    uchars=reinterpret_cast<UChar *>(uprv_malloc(ucharsCapacity*2));
+    if(uchars==NULL) {
+        errorCode=U_MEMORY_ALLOCATION_ERROR;
+        return result;
+    }
+    if(buildOption==UDICTTRIE_BUILD_FAST) {
+        writeNode(0, elementsLength, 0);
+    } else /* UDICTTRIE_BUILD_SMALL */ {
+        createCompactBuilder(2*elementsLength, errorCode);
+        Node *root=makeNode(0, elementsLength, 0, errorCode);
+        if(U_SUCCESS(errorCode)) {
+            root->markRightEdgesFirst(-1);
+            root->write(*this);
+        }
+        deleteCompactBuilder();
+    }
+    if(uchars==NULL) {
+        errorCode=U_MEMORY_ALLOCATION_ERROR;
+    } else {
+        result.setTo(FALSE, uchars+(ucharsCapacity-ucharsLength), ucharsLength);
+    }
+    return result;
+}
+
+// Requires start<limit,
+// and all strings of the [start..limit[ elements must be sorted and
+// have a common prefix of length unitIndex.
+void
+UCharTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) {
+    UBool hasValue=FALSE;
+    int32_t value=0;
+    int32_t type;
+    if(unitIndex==elements[start].getStringLength(strings)) {
+        // An intermediate or final value.
+        value=elements[start++].getValue();
+        if(start==limit) {
+            writeValueAndFinal(value, TRUE);  // final-value node
+            return;
+        }
+        hasValue=TRUE;
+    }
+    // Now all [start..limit[ strings are longer than unitIndex.
+    const UCharTrieElement &minElement=elements[start];
+    const UCharTrieElement &maxElement=elements[limit-1];
+    int32_t minUnit=minElement.charAt(unitIndex, strings);
+    int32_t maxUnit=maxElement.charAt(unitIndex, strings);
+    if(minUnit==maxUnit) {
+        // Linear-match node: All strings have the same character at unitIndex.
+        int32_t minStringLength=minElement.getStringLength(strings);
+        int32_t lastUnitIndex=unitIndex;
+        while(++lastUnitIndex<minStringLength &&
+                minElement.charAt(lastUnitIndex, strings)==
+                maxElement.charAt(lastUnitIndex, strings)) {}
+        writeNode(start, limit, lastUnitIndex);
+        // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
+        const UChar *s=minElement.getString(strings).getBuffer();
+        int32_t length=lastUnitIndex-unitIndex;
+        while(length>UCharTrie::kMaxLinearMatchLength) {
+            lastUnitIndex-=UCharTrie::kMaxLinearMatchLength;
+            length-=UCharTrie::kMaxLinearMatchLength;
+            write(s+lastUnitIndex, UCharTrie::kMaxLinearMatchLength);
+            write(UCharTrie::kMinLinearMatch+UCharTrie::kMaxLinearMatchLength-1);
+        }
+        write(s+unitIndex, length);
+        type=UCharTrie::kMinLinearMatch+length-1;
+    } else {
+        // Branch node.
+        int32_t length=0;  // Number of different units at unitIndex.
+        int32_t i=start;
+        do {
+            UChar unit=elements[i++].charAt(unitIndex, strings);
+            while(i<limit && unit==elements[i].charAt(unitIndex, strings)) {
+                ++i;
+            }
+            ++length;
+        } while(i<limit);
+        // length>=2 because minUnit!=maxUnit.
+        writeBranchSubNode(start, limit, unitIndex, length);
+        if(--length<UCharTrie::kMinLinearMatch) {
+            type=length;
+        } else {
+            write(length);
+            type=0;
+        }
+    }
+    writeValueAndType(hasValue, value, type);
+}
+
+// start<limit && all strings longer than unitIndex &&
+// length different units at unitIndex
+void
+UCharTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) {
+    UChar middleUnits[16];
+    int32_t lessThan[16];
+    int32_t ltLength=0;
+    while(length>UCharTrie::kMaxBranchLinearSubNodeLength) {
+        // Branch on the middle unit.
+        // First, find the middle unit.
+        int32_t count=length/2;
+        int32_t i=start;
+        UChar unit;
+        do {
+            unit=elements[i++].charAt(unitIndex, strings);
+            while(unit==elements[i].charAt(unitIndex, strings)) {
+                ++i;
+            }
+        } while(--count>0);
+        // Encode the less-than branch first.
+        unit=middleUnits[ltLength]=elements[i].charAt(unitIndex, strings);  // middle unit
+        writeBranchSubNode(start, i, unitIndex, length/2);
+        lessThan[ltLength]=ucharsLength;
+        ++ltLength;
+        // Continue for the greater-or-equal branch.
+        start=i;
+        length=length-length/2;
+    }
+    // For each unit, find its elements array start and whether it has a final value.
+    int32_t starts[UCharTrie::kMaxBranchLinearSubNodeLength];
+    UBool final[UCharTrie::kMaxBranchLinearSubNodeLength-1];
+    int32_t unitNumber=0;
+    do {
+        int32_t i=starts[unitNumber]=start;
+        UChar unit=elements[i++].charAt(unitIndex, strings);
+        while(unit==elements[i].charAt(unitIndex, strings)) {
+            ++i;
+        }
+        final[unitNumber]= start==i-1 && unitIndex+1==elements[start].getStringLength(strings);
+        start=i;
+    } while(++unitNumber<length-1);
+    // unitNumber==length-1, and the maxUnit elements range is [start..limit[
+    starts[unitNumber]=start;
+
+    // Write the sub-nodes in reverse order: The jump lengths are deltas from
+    // after their own positions, so if we wrote the minUnit sub-node first,
+    // then its jump delta would be larger.
+    // Instead we write the minUnit sub-node last, for a shorter delta.
+    int32_t jumpTargets[UCharTrie::kMaxBranchLinearSubNodeLength-1];
+    do {
+        --unitNumber;
+        if(!final[unitNumber]) {
+            writeNode(starts[unitNumber], starts[unitNumber+1], unitIndex+1);
+            jumpTargets[unitNumber]=ucharsLength;
+        }
+    } while(unitNumber>0);
+    // The maxUnit sub-node is written as the very last one because we do
+    // not jump for it at all.
+    unitNumber=length-1;
+    writeNode(start, limit, unitIndex+1);
+    write(elements[start].charAt(unitIndex, strings));
+    // Write the rest of this node's unit-value pairs.
+    while(--unitNumber>=0) {
+        start=starts[unitNumber];
+        int32_t value;
+        if(final[unitNumber]) {
+            // Write the final value for the one string ending with this unit.
+            value=elements[start].getValue();
+        } else {
+            // Write the delta to the start position of the sub-node.
+            value=ucharsLength-jumpTargets[unitNumber];
+        }
+        writeValueAndFinal(value, final[unitNumber]);
+        write(elements[start].charAt(unitIndex, strings));
+    }
+    // Write the split-branch nodes.
+    while(ltLength>0) {
+        --ltLength;
+        writeDelta(ucharsLength-lessThan[ltLength]);  // less-than
+        write(middleUnits[ltLength]);
+    }
+}
+
+// Requires start<limit,
+// and all strings of the [start..limit[ elements must be sorted and
+// have a common prefix of length unitIndex.
+DictTrieBuilder::Node *
+UCharTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return NULL;
+    }
+    UBool hasValue=FALSE;
+    int32_t value=0;
+    if(unitIndex==elements[start].getStringLength(strings)) {
+        // An intermediate or final value.
+        value=elements[start++].getValue();
+        if(start==limit) {
+            return registerFinalValue(value, errorCode);
+        }
+        hasValue=TRUE;
+    }
+    ValueNode *node;
+    // Now all [start..limit[ strings are longer than unitIndex.
+    const UCharTrieElement &minElement=elements[start];
+    const UCharTrieElement &maxElement=elements[limit-1];
+    int32_t minUnit=minElement.charAt(unitIndex, strings);
+    int32_t maxUnit=maxElement.charAt(unitIndex, strings);
+    if(minUnit==maxUnit) {
+        // Linear-match node: All strings have the same character at unitIndex.
+        int32_t minStringLength=minElement.getStringLength(strings);
+        int32_t lastUnitIndex=unitIndex;
+        while(++lastUnitIndex<minStringLength &&
+                minElement.charAt(lastUnitIndex, strings)==
+                maxElement.charAt(lastUnitIndex, strings)) {}
+        Node *nextNode=makeNode(start, limit, lastUnitIndex, errorCode);
+        // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength.
+        const UChar *s=minElement.getString(strings).getBuffer();
+        int32_t length=lastUnitIndex-unitIndex;
+        while(length>UCharTrie::kMaxLinearMatchLength) {
+            lastUnitIndex-=UCharTrie::kMaxLinearMatchLength;
+            length-=UCharTrie::kMaxLinearMatchLength;
+            node=new UCTLinearMatchNode(
+                s+lastUnitIndex,
+                UCharTrie::kMaxLinearMatchLength,
+                nextNode);
+            node=(ValueNode *)registerNode(node, errorCode);
+            nextNode=node;
+        }
+        node=new UCTLinearMatchNode(s+unitIndex, length, nextNode);
+    } else {
+        // Branch node.
+        int32_t length=0;  // Number of different units at unitIndex.
+        int32_t i=start;
+        do {
+            UChar unit=elements[i++].charAt(unitIndex, strings);
+            while(i<limit && unit==elements[i].charAt(unitIndex, strings)) {
+                ++i;
+            }
+            ++length;
+        } while(i<limit);
+        // length>=2 because minUnit!=maxUnit.
+        Node *subNode=makeBranchSubNode(start, limit, unitIndex, length, errorCode);
+        node=new UCTBranchHeadNode(length, subNode);
+    }
+    if(hasValue && node!=NULL) {
+        node->setValue(value);
+    }
+    return registerNode(node, errorCode);
+}
+
+// start<limit && all strings longer than unitIndex &&
+// length different units at unitIndex
+DictTrieBuilder::Node *
+UCharTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
+                                    int32_t length, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return NULL;
+    }
+    UChar middleUnits[16];
+    Node *lessThan[16];
+    int32_t ltLength=0;
+    while(length>UCharTrie::kMaxBranchLinearSubNodeLength) {
+        // Branch on the middle unit.
+        // First, find the middle unit.
+        int32_t count=length/2;
+        int32_t i=start;
+        UChar unit;
+        do {
+            unit=elements[i++].charAt(unitIndex, strings);
+            while(unit==elements[i].charAt(unitIndex, strings)) {
+                ++i;
+            }
+        } while(--count>0);
+        // Create the less-than branch.
+        unit=middleUnits[ltLength]=elements[i].charAt(unitIndex, strings);  // middle unit
+        lessThan[ltLength]=makeBranchSubNode(start, i, unitIndex, length/2, errorCode);
+        ++ltLength;
+        // Continue for the greater-or-equal branch.
+        start=i;
+        length=length-length/2;
+    }
+    if(U_FAILURE(errorCode)) {
+        return NULL;
+    }
+    UCTListBranchNode *listNode=new UCTListBranchNode();
+    if(listNode==NULL) {
+        errorCode=U_MEMORY_ALLOCATION_ERROR;
+        return NULL;
+    }
+    // For each unit, find its elements array start and whether it has a final value.
+    int32_t unitNumber=0;
+    do {
+        int32_t i=start;
+        UChar unit=elements[i++].charAt(unitIndex, strings);
+        while(unit==elements[i].charAt(unitIndex, strings)) {
+            ++i;
+        }
+        if(start==i-1 && unitIndex+1==elements[start].getStringLength(strings)) {
+            listNode->add(unit, elements[start].getValue());
+        } else {
+            listNode->add(unit, makeNode(start, i, unitIndex+1, errorCode));
+        }
+        start=i;
+    } while(++unitNumber<length-1);
+    // unitNumber==length-1, and the maxUnit elements range is [start..limit[
+    UChar unit=elements[start].charAt(unitIndex, strings);
+    if(start==limit-1 && unitIndex+1==elements[start].getStringLength(strings)) {
+        listNode->add(unit, elements[start].getValue());
+    } else {
+        listNode->add(unit, makeNode(start, limit, unitIndex+1, errorCode));
+    }
+    Node *node=registerNode(listNode, errorCode);
+    // Create the split-branch nodes.
+    while(ltLength>0) {
+        --ltLength;
+        node=registerNode(
+            new UCTSplitBranchNode(middleUnits[ltLength], lessThan[ltLength], node), errorCode);
+    }
+    return node;
+}
+
+void
+UCharTrieBuilder::UCTFinalValueNode::write(DictTrieBuilder &builder) {
+    UCharTrieBuilder &b=(UCharTrieBuilder &)builder;
+    offset=b.writeValueAndFinal(value, TRUE);
+}
+
+UCharTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode)
+        : LinearMatchNode(len, nextNode), s(units) {
+    hash=hash*37+uhash_hashUCharsN(units, len);
+}
+
+UBool
+UCharTrieBuilder::UCTLinearMatchNode::operator==(const Node &other) const {
+    if(this==&other) {
+        return TRUE;
+    }
+    if(!LinearMatchNode::operator==(other)) {
+        return FALSE;
+    }
+    const UCTLinearMatchNode &o=(const UCTLinearMatchNode &)other;
+    return 0==u_memcmp(s, o.s, length);
+}
+
+void
+UCharTrieBuilder::UCTLinearMatchNode::write(DictTrieBuilder &builder) {
+    UCharTrieBuilder &b=(UCharTrieBuilder &)builder;
+    next->write(builder);
+    b.write(s, length);
+    offset=b.writeValueAndType(hasValue, value, UCharTrie::kMinLinearMatch+length-1);
+}
+
+void
+UCharTrieBuilder::UCTListBranchNode::write(DictTrieBuilder &builder) {
+    UCharTrieBuilder &b=(UCharTrieBuilder &)builder;
+    // Write the sub-nodes in reverse order: The jump lengths are deltas from
+    // after their own positions, so if we wrote the minUnit sub-node first,
+    // then its jump delta would be larger.
+    // Instead we write the minUnit sub-node last, for a shorter delta.
+    int32_t unitNumber=length-1;
+    Node *rightEdge=equal[unitNumber];
+    int32_t rightEdgeNumber= rightEdge==NULL ? firstEdgeNumber : rightEdge->getOffset();
+    do {
+        --unitNumber;
+        if(equal[unitNumber]!=NULL) {
+            equal[unitNumber]->writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder);
+        }
+    } while(unitNumber>0);
+    // The maxUnit sub-node is written as the very last one because we do
+    // not jump for it at all.
+    unitNumber=length-1;
+    if(rightEdge==NULL) {
+        b.writeValueAndFinal(values[unitNumber], TRUE);
+    } else {
+        rightEdge->write(builder);
+    }
+    b.write(units[unitNumber]);
+    // Write the rest of this node's unit-value pairs.
+    while(--unitNumber>=0) {
+        int32_t value;
+        UBool isFinal;
+        if(equal[unitNumber]==NULL) {
+            // Write the final value for the one string ending with this unit.
+            value=values[unitNumber];
+            isFinal=TRUE;
+        } else {
+            // Write the delta to the start position of the sub-node.
+            U_ASSERT(equal[unitNumber]->getOffset()>0);
+            value=b.ucharsLength-equal[unitNumber]->getOffset();
+            isFinal=FALSE;
+        }
+        b.writeValueAndFinal(value, isFinal);
+        offset=b.write(units[unitNumber]);
+    }
+}
+
+void
+UCharTrieBuilder::UCTSplitBranchNode::write(DictTrieBuilder &builder) {
+    UCharTrieBuilder &b=(UCharTrieBuilder &)builder;
+    // Encode the less-than branch first.
+    lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder);
+    // Encode the greater-or-equal branch last because we do not jump for it at all.
+    greaterOrEqual->write(builder);
+    // Write this node.
+    U_ASSERT(lessThan->getOffset()>0);
+    b.writeDelta(b.ucharsLength-lessThan->getOffset());  // less-than
+    offset=b.write(unit);
+}
+
+void
+UCharTrieBuilder::UCTBranchHeadNode::write(DictTrieBuilder &builder) {
+    UCharTrieBuilder &b=(UCharTrieBuilder &)builder;
+    next->write(builder);
+    if(length<=UCharTrie::kMinLinearMatch) {
+        offset=b.writeValueAndType(hasValue, value, length-1);
+    } else {
+        b.write(length-1);
+        offset=b.writeValueAndType(hasValue, value, 0);
+    }
+}
+
+UBool
+UCharTrieBuilder::ensureCapacity(int32_t length) {
+    if(uchars==NULL) {
+        return FALSE;  // previous memory allocation had failed
+    }
+    if(length>ucharsCapacity) {
+        int32_t newCapacity=ucharsCapacity;
+        do {
+            newCapacity*=2;
+        } while(newCapacity<=length);
+        UChar *newUChars=reinterpret_cast<UChar *>(uprv_malloc(newCapacity*2));
+        if(newUChars==NULL) {
+            // unable to allocate memory
+            uprv_free(uchars);
+            uchars=NULL;
+            return FALSE;
+        }
+        u_memcpy(newUChars+(newCapacity-ucharsLength),
+                 uchars+(ucharsCapacity-ucharsLength), ucharsLength);
+        uprv_free(uchars);
+        uchars=newUChars;
+        ucharsCapacity=newCapacity;
+    }
+    return TRUE;
+}
+
+int32_t
+UCharTrieBuilder::write(int32_t unit) {
+    int32_t newLength=ucharsLength+1;
+    if(ensureCapacity(newLength)) {
+        ucharsLength=newLength;
+        uchars[ucharsCapacity-ucharsLength]=(UChar)unit;
+    }
+    return ucharsLength;
+}
+
+int32_t
+UCharTrieBuilder::write(const UChar *s, int32_t length) {
+    int32_t newLength=ucharsLength+length;
+    if(ensureCapacity(newLength)) {
+        ucharsLength=newLength;
+        u_memcpy(uchars+(ucharsCapacity-ucharsLength), s, length);
+    }
+    return ucharsLength;
+}
+
+int32_t
+UCharTrieBuilder::writeValueAndFinal(int32_t i, UBool final) {
+    UChar intUnits[3];
+    int32_t length;
+    if(i<0 || i>UCharTrie::kMaxTwoUnitValue) {
+        intUnits[0]=(UChar)(UCharTrie::kThreeUnitValueLead);
+        intUnits[1]=(UChar)(i>>16);
+        intUnits[2]=(UChar)i;
+        length=3;
+    } else if(i<=UCharTrie::kMaxOneUnitValue) {
+        intUnits[0]=(UChar)(i);
+        length=1;
+    } else {
+        intUnits[0]=(UChar)(UCharTrie::kMinTwoUnitValueLead+(i>>16));
+        intUnits[1]=(UChar)i;
+        length=2;
+    }
+    intUnits[0]=(UChar)(intUnits[0]|(final<<15));
+    return write(intUnits, length);
+}
+
+int32_t
+UCharTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
+    if(!hasValue) {
+        return write(node);
+    }
+    UChar intUnits[3];
+    int32_t length;
+    if(value<0 || value>UCharTrie::kMaxTwoUnitNodeValue) {
+        intUnits[0]=(UChar)(UCharTrie::kThreeUnitNodeValueLead);
+        intUnits[1]=(UChar)(value>>16);
+        intUnits[2]=(UChar)value;
+        length=3;
+    } else if(value<=UCharTrie::kMaxOneUnitNodeValue) {
+        intUnits[0]=(UChar)((value+1)<<6);
+        length=1;
+    } else {
+        intUnits[0]=(UChar)(UCharTrie::kMinTwoUnitNodeValueLead+((value>>10)&0x7fc0));
+        intUnits[1]=(UChar)value;
+        length=2;
+    }
+    intUnits[0]|=(UChar)node;
+    return write(intUnits, length);
+}
+
+int32_t
+UCharTrieBuilder::writeDelta(int32_t i) {
+    UChar intUnits[3];
+    int32_t length;
+    U_ASSERT(i>=0);
+    if(i<=UCharTrie::kMaxOneUnitDelta) {
+        length=0;
+    } else if(i<=UCharTrie::kMaxTwoUnitDelta) {
+        intUnits[0]=(UChar)(UCharTrie::kMinTwoUnitDeltaLead+(i>>16));
+        length=1;
+    } else {
+        intUnits[0]=(UChar)(UCharTrie::kThreeUnitDeltaLead);
+        intUnits[1]=(UChar)(i>>16);
+        length=2;
+    }
+    intUnits[length++]=(UChar)i;
+    return write(intUnits, length);
+}
+
+U_NAMESPACE_END
--- a/icu4c/source/tools/toolutil/uchartriebuilder.h
+++ b/icu4c/source/tools/toolutil/uchartriebuilder.h
@ -0,0 +1,112 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  uchartriebuilder.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010nov14
+*   created by: Markus W. Scherer
+*
+* Builder class for UCharTrie dictionary trie.
+*/
+
+#ifndef __UCHARTRIEBUILDER_H__
+#define __UCHARTRIEBUILDER_H__
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "dicttriebuilder.h"
+
+U_NAMESPACE_BEGIN
+
+class UCharTrieElement;
+
+class U_TOOLUTIL_API UCharTrieBuilder : public DictTrieBuilder {
+public:
+    UCharTrieBuilder()
+            : elements(NULL), elementsCapacity(0), elementsLength(0),
+              uchars(NULL), ucharsCapacity(0), ucharsLength(0) {}
+    ~UCharTrieBuilder();
+
+    UCharTrieBuilder &add(const UnicodeString &s, int32_t value, UErrorCode &errorCode);
+
+    UnicodeString &build(UDictTrieBuildOption buildOption, UnicodeString &result, UErrorCode &errorCode);
+
+    UCharTrieBuilder &clear() {
+        strings.remove();
+        elementsLength=0;
+        ucharsLength=0;
+        return *this;
+    }
+
+private:
+    void writeNode(int32_t start, int32_t limit, int32_t unitIndex);
+    void writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length);
+
+    Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode);
+    Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
+                            int32_t length, UErrorCode &errorCode);
+
+    UBool ensureCapacity(int32_t length);
+    int32_t write(int32_t unit);
+    int32_t write(const UChar *s, int32_t length);
+    int32_t writeValueAndFinal(int32_t i, UBool final);
+    int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node);
+    int32_t writeDelta(int32_t i);
+
+    // Compacting builder.
+    class UCTFinalValueNode : public FinalValueNode {
+    public:
+        UCTFinalValueNode(int32_t v) : FinalValueNode(v) {}
+        virtual void write(DictTrieBuilder &builder);
+    };
+
+    class UCTLinearMatchNode : public LinearMatchNode {
+    public:
+        UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode);
+        virtual UBool operator==(const Node &other) const;
+        virtual void write(DictTrieBuilder &builder);
+    private:
+        const UChar *s;
+    };
+
+    class UCTListBranchNode : public ListBranchNode {
+    public:
+        UCTListBranchNode() : ListBranchNode() {}
+        virtual void write(DictTrieBuilder &builder);
+    };
+
+    class UCTSplitBranchNode : public SplitBranchNode {
+    public:
+        UCTSplitBranchNode(UChar middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
+                : SplitBranchNode(middleUnit, lessThanNode, greaterOrEqualNode) {}
+        virtual void write(DictTrieBuilder &builder);
+    };
+
+    class UCTBranchHeadNode : public BranchHeadNode {
+    public:
+        UCTBranchHeadNode(int32_t len, Node *subNode) : BranchHeadNode(len, subNode) {}
+        virtual void write(DictTrieBuilder &builder);
+    };
+
+    virtual Node *createFinalValueNode(int32_t value) const { return new UCTFinalValueNode(value); }
+
+    UnicodeString strings;
+    UCharTrieElement *elements;
+    int32_t elementsCapacity;
+    int32_t elementsLength;
+
+    // UChar serialization of the trie.
+    // Grows from the back: ucharsLength measures from the end of the buffer!
+    UChar *uchars;
+    int32_t ucharsCapacity;
+    int32_t ucharsLength;
+};
+
+U_NAMESPACE_END
+
+#endif  // __UCHARTRIEBUILDER_H__
--- a/icu4c/source/tools/toolutil/uchartrieiterator.cpp
+++ b/icu4c/source/tools/toolutil/uchartrieiterator.cpp
@ -0,0 +1,181 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  uchartrieiterator.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010nov15
+*   created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "uchartrie.h"
+#include "uchartrieiterator.h"
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+UCharTrieIterator::UCharTrieIterator(const UChar *trieUChars, int32_t maxStringLength,
+                                     UErrorCode &errorCode)
+        : uchars_(trieUChars),
+          pos_(uchars_), initialPos_(uchars_),
+          remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
+          skipValue_(FALSE),
+          maxLength_(maxStringLength), value_(0), stack_(errorCode) {}
+
+UCharTrieIterator::UCharTrieIterator(const UCharTrie &trie, int32_t maxStringLength,
+                                     UErrorCode &errorCode)
+        : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_),
+          remainingMatchLength_(trie.remainingMatchLength_),
+          initialRemainingMatchLength_(trie.remainingMatchLength_),
+          skipValue_(FALSE),
+          maxLength_(maxStringLength), value_(0), stack_(errorCode) {
+    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
+    if(length>=0) {
+        // Pending linear-match node, append remaining UChars to str.
+        ++length;
+        if(maxLength_>0 && length>maxLength_) {
+            length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
+        }
+        str_.append(pos_, length);
+        pos_+=length;
+        remainingMatchLength_-=length;
+    }
+}
+
+UCharTrieIterator &UCharTrieIterator::reset() {
+    pos_=initialPos_;
+    remainingMatchLength_=initialRemainingMatchLength_;
+    skipValue_=FALSE;
+    int32_t length=remainingMatchLength_+1;  // Remaining match length.
+    if(maxLength_>0 && length>maxLength_) {
+        length=maxLength_;
+    }
+    str_.truncate(length);
+    pos_+=length;
+    remainingMatchLength_-=length;
+    stack_.setSize(0);
+    return *this;
+}
+
+UBool
+UCharTrieIterator::next(UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
+        return FALSE;
+    }
+    const UChar *pos=pos_;
+    if(pos==NULL) {
+        if(stack_.isEmpty()) {
+            return FALSE;
+        }
+        // Pop the state off the stack and continue with the next outbound edge of
+        // the branch node.
+        int32_t stackSize=stack_.size();
+        int32_t length=stack_.elementAti(stackSize-1);
+        pos=uchars_+stack_.elementAti(stackSize-2);
+        stack_.setSize(stackSize-2);
+        str_.truncate(length&0xffff);
+        length=(int32_t)((uint32_t)length>>16);
+        if(length>1) {
+            pos=branchNext(pos, length, errorCode);
+            if(pos==NULL) {
+                return TRUE;  // Reached a final value.
+            }
+        } else {
+            str_.append(*pos++);
+        }
+    }
+    if(remainingMatchLength_>=0) {
+        // We only get here if we started in a pending linear-match node
+        // with more than maxLength remaining units.
+        return truncateAndStop();
+    }
+    for(;;) {
+        int32_t node=*pos++;
+        if(node>=UCharTrie::kMinValueLead) {
+            if(skipValue_) {
+                pos=UCharTrie::skipNodeValue(pos, node);
+                node&=UCharTrie::kNodeTypeMask;
+                skipValue_=FALSE;
+            } else {
+                // Deliver value for the string so far.
+                UBool isFinal=(UBool)(node>>15);
+                if(isFinal) {
+                    value_=UCharTrie::readValue(pos, node&0x7fff);
+                } else {
+                    value_=UCharTrie::readNodeValue(pos, node);
+                }
+                if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
+                    pos_=NULL;
+                } else {
+                    // We cannot skip the value right here because it shares its
+                    // lead unit with a match node which we have to evaluate
+                    // next time.
+                    // Instead, keep pos_ on the node lead unit itself.
+                    pos_=pos-1;
+                    skipValue_=TRUE;
+                }
+                return TRUE;
+            }
+        }
+        if(maxLength_>0 && str_.length()==maxLength_) {
+            return truncateAndStop();
+        }
+        if(node<UCharTrie::kMinLinearMatch) {
+            if(node==0) {
+                node=*pos++;
+            }
+            pos=branchNext(pos, node+1, errorCode);
+            if(pos==NULL) {
+                return TRUE;  // Reached a final value.
+            }
+        } else {
+            // Linear-match node, append length units to str_.
+            int32_t length=node-UCharTrie::kMinLinearMatch+1;
+            if(maxLength_>0 && str_.length()+length>maxLength_) {
+                str_.append(pos, maxLength_-str_.length());
+                return truncateAndStop();
+            }
+            str_.append(pos, length);
+            pos+=length;
+        }
+    }
+}
+
+// Branch node, needs to take the first outbound edge and push state for the rest.
+const UChar *
+UCharTrieIterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) {
+    while(length>UCharTrie::kMaxBranchLinearSubNodeLength) {
+        ++pos;  // ignore the comparison unit
+        // Push state for the greater-or-equal edge.
+        stack_.addElement((int32_t)(UCharTrie::skipDelta(pos)-uchars_), errorCode);
+        stack_.addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
+        // Follow the less-than edge.
+        length>>=1;
+        pos=UCharTrie::jumpByDelta(pos);
+    }
+    // List of key-value pairs where values are either final values or jump deltas.
+    // Read the first (key, value) pair.
+    UChar trieUnit=*pos++;
+    int32_t node=*pos++;
+    UBool isFinal=(UBool)(node>>15);
+    int32_t value=UCharTrie::readValue(pos, node&=0x7fff);
+    pos=UCharTrie::skipValue(pos, node);
+    stack_.addElement((int32_t)(pos-uchars_), errorCode);
+    stack_.addElement(((length-1)<<16)|str_.length(), errorCode);
+    str_.append(trieUnit);
+    if(isFinal) {
+        pos_=NULL;
+        value_=value;
+        return NULL;
+    } else {
+        return pos+value;
+    }
+}
+
+U_NAMESPACE_END
--- a/icu4c/source/tools/toolutil/uchartrieiterator.h
+++ b/icu4c/source/tools/toolutil/uchartrieiterator.h
@ -0,0 +1,121 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  uchartrieiterator.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010nov15
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __UCHARTRIEITERATOR_H__
+#define __UCHARTRIEITERATOR_H__
+
+/**
+ * \file
+ * \brief C++ API: UCharTrie iterator for all of its (string, value) pairs.
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "uchartrie.h"
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Iterator for all of the (string, value) pairs in a UCharTrie.
+ */
+class U_TOOLUTIL_API UCharTrieIterator : public UMemory {
+public:
+    /**
+     * Iterates from the root of a UChar-serialized UCharTrie.
+     * @param trieUChars The trie UChars.
+     * @param maxStringLength If 0, the iterator returns full strings.
+     *                        Otherwise, the iterator returns strings with this maximum length.
+     * @param errorCode Standard ICU error code. Its input value must
+     *                  pass the U_SUCCESS() test, or else the function returns
+     *                  immediately. Check for U_FAILURE() on output or use with
+     *                  function chaining. (See User Guide for details.)
+     */
+    UCharTrieIterator(const UChar *trieUChars, int32_t maxStringLength, UErrorCode &errorCode);
+
+    /**
+     * Iterates from the current state of the specified UCharTrie.
+     * @param trie The trie whose state will be copied for iteration.
+     * @param maxStringLength If 0, the iterator returns full strings.
+     *                        Otherwise, the iterator returns strings with this maximum length.
+     * @param errorCode Standard ICU error code. Its input value must
+     *                  pass the U_SUCCESS() test, or else the function returns
+     *                  immediately. Check for U_FAILURE() on output or use with
+     *                  function chaining. (See User Guide for details.)
+     */
+    UCharTrieIterator(const UCharTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
+
+    /**
+     * Resets this iterator to its initial state.
+     */
+    UCharTrieIterator &reset();
+
+    /**
+     * Finds the next (string, value) pair if there is one.
+     *
+     * If the string is truncated to the maximum length and does not
+     * have a real value, then the value is set to -1.
+     * In this case, this "not a real value" is indistinguishable from
+     * a real value of -1.
+     * @return TRUE if there is another element.
+     */
+    UBool next(UErrorCode &errorCode);
+
+    /**
+     * @return TRUE if there are more elements.
+     */
+    UBool hasNext() const { return pos_!=NULL || !stack_.isEmpty(); }
+
+    /**
+     * @return the NUL-terminated string for the last successful next()
+     */
+    const UnicodeString &getString() const { return str_; }
+    /**
+     * @return the value for the last successful next()
+     */
+    int32_t getValue() const { return value_; }
+
+private:
+    UBool truncateAndStop() {
+        pos_=NULL;
+        value_=-1;  // no real value for str
+        return TRUE;
+    }
+
+    const UChar *branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode);
+
+    const UChar *uchars_;
+    const UChar *pos_;
+    const UChar *initialPos_;
+    int32_t remainingMatchLength_;
+    int32_t initialRemainingMatchLength_;
+    UBool skipValue_;  // Skip intermediate value which was already delivered.
+
+    UnicodeString str_;
+    int32_t maxLength_;
+    int32_t value_;
+
+    // The stack stores pairs of integers for backtracking to another
+    // outbound edge of a branch node.
+    // The first integer is an offset from ByteTrie.bytes.
+    // The second integer has the str.length() from before the node in bits 15..0,
+    // and the remaining branch length in bits 31..16.
+    // (We could store the remaining branch length minus 1 in bits 30..16 and not use the sign bit,
+    // but the code looks more confusing that way.)
+    UVector32 stack_;
+};
+
+U_NAMESPACE_END
+
+#endif  // __UCHARTRIEITERATOR_H__
--- a/icu4c/source/tools/toolutil/writesrc.c
+++ b/icu4c/source/tools/toolutil/writesrc.c
@ -216,3 +216,42 @@ usrc_writeUTrie2Struct(FILE *f,
        fputs(postfix, f);
    }
 }
+
+U_CAPI void U_EXPORT2
+usrc_writeArrayOfMostlyInvChars(FILE *f,
+                                const char *prefix,
+                                const char *p, int32_t length,
+                                const char *postfix) {
+    int32_t i, col;
+    int prev2, prev, c;
+
+    if(prefix!=NULL) {
+        fprintf(f, prefix, (long)length);
+    }
+    prev2=prev=-1;
+    for(i=col=0; i<length; ++i, ++col) {
+        c=(uint8_t)p[i];
+        if(i>0) {
+            /* Break long lines. Try to break at interesting places, to minimize revision diffs. */
+            if( 
+                /* Very long line. */
+                col>=32 ||
+                /* Long line, break after terminating NUL. */
+                (col>=24 && prev2>=0x20 && prev==0) ||
+                /* Medium-long line, break before non-NUL, non-character byte. */
+                (col>=16 && (prev==0 || prev>=0x20) && 0<c && c<0x20)
+            ) {
+                fputs(",\n", f);
+                col=0;
+            } else {
+                fputc(',', f);
+            }
+        }
+        fprintf(f, c<0x20 ? "%u" : "'%c'", c);
+        prev2=prev;
+        prev=c;
+    }
+    if(postfix!=NULL) {
+        fputs(postfix, f);
+    }
+}
--- a/icu4c/source/tools/toolutil/writesrc.h
+++ b/icu4c/source/tools/toolutil/writesrc.h
@ -24,21 +24,21 @@
 #include "utrie2.h"

 /**
- * Create a source text file and write a header comment with the ICU copyright.
+ * Creates a source text file and write a header comment with the ICU copyright.
 * Writes a C/Java-style comment.
 */
 U_CAPI FILE * U_EXPORT2
 usrc_create(const char *path, const char *filename);

 /**
- * Create a source text file and write a header comment with the ICU copyright.
+ * Creates a source text file and write a header comment with the ICU copyright.
 * Writes the comment with # lines, as used in scripts and text data.
 */
 U_CAPI FILE * U_EXPORT2
 usrc_createTextData(const char *path, const char *filename);

 /**
- * Write the contents of an array of 8/16/32-bit words.
+ * Writes the contents of an array of 8/16/32-bit words.
 * The prefix and postfix are optional (can be NULL) and are written first/last.
 * The prefix may contain a %ld or similar field for the array length.
 * The {} and declaration etc. need to be included in prefix/postfix or
@ -73,4 +73,20 @@ usrc_writeUTrie2Struct(FILE *f,
                       const char *indexName, const char *dataName,
                       const char *postfix);

+/**
+ * Writes the contents of an array of mostly invariant characters.
+ * Characters 0..0x1f are printed as numbers,
+ * others as characters with single quotes: '%c'.
+ *
+ * The prefix and postfix are optional (can be NULL) and are written first/last.
+ * The prefix may contain a %ld or similar field for the array length.
+ * The {} and declaration etc. need to be included in prefix/postfix or
+ * printed before and after the array contents.
+ */
+U_CAPI void U_EXPORT2
+usrc_writeArrayOfMostlyInvChars(FILE *f,
+                                const char *prefix,
+                                const char *p, int32_t length,
+                                const char *postfix);
+
 #endif