ICU-11449 reorder single scripts not groups, scripts/groups can start on top-16-bit boundaries, data formatVersion 5 for new scripts data and optional reorderRanges appended to reorderCodes

X-SVN-Rev: 36925
2025-04-10 07:39:16 +00:00 · 2015-01-07 03:49:20 +00:00 · 2015-01-07 03:49:20 +00:00 · e65a679a26
commit e65a679a26
parent a9d7c3e4bd
16 changed files with 684 additions and 377 deletions
--- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/Collation.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/Collation.java
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-* Copyright (C) 2010-2014, International Business Machines
+* Copyright (C) 2010-2015, International Business Machines
 * Corporation and others.  All Rights Reserved.
 *******************************************************************************
 * Collation.java, ported from collation.h/.cpp
@ -587,9 +587,5 @@ public final class Collation {
        return makeCE(unassignedPrimaryFromCodePoint(c));
    }

-    static long reorder(byte[] reorderTable, long primary) {
-        return ((reorderTable[(int)primary >>> 24] & 0xffL) << 24) | (primary & 0xffffff);
-    }
-
    // private Collation()  // No instantiation.
 }
--- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationCompare.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationCompare.java
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
- * Copyright (C) 1996-2014, International Business Machines
+ * Copyright (C) 1996-2015, International Business Machines
 * Corporation and others.  All Rights Reserved.
 *******************************************************************************
 * CollationCompare.java, ported from collationcompare.h/.cpp
@ -79,10 +79,9 @@ public final class CollationCompare /* all static */ {

            if (leftPrimary != rightPrimary) {
                // Return the primary difference, with script reordering.
-                byte[] reorderTable = settings.reorderTable;
-                if (reorderTable != null) {
-                    leftPrimary = Collation.reorder(reorderTable, leftPrimary);
-                    rightPrimary = Collation.reorder(reorderTable, rightPrimary);
+                if (settings.hasReordering()) {
+                    leftPrimary = settings.reorder(leftPrimary);
+                    rightPrimary = settings.reorder(rightPrimary);
                }
                return (leftPrimary < rightPrimary) ? Collation.LESS : Collation.GREATER;
            }
@ -335,10 +334,9 @@ public final class CollationCompare /* all static */ {

            if (leftQuaternary != rightQuaternary) {
                // Return the difference, with script reordering.
-                byte[] reorderTable = settings.reorderTable;
-                if (reorderTable != null) {
-                    leftQuaternary = Collation.reorder(reorderTable, leftQuaternary);
-                    rightQuaternary = Collation.reorder(reorderTable, rightQuaternary);
+                if (settings.hasReordering()) {
+                    leftQuaternary = settings.reorder(leftQuaternary);
+                    rightQuaternary = settings.reorder(rightQuaternary);
                }
                return (leftQuaternary < rightQuaternary) ? Collation.LESS : Collation.GREATER;
            }
--- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationData.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationData.java
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-* Copyright (C) 2010-2014, International Business Machines
+* Copyright (C) 2010-2015, International Business Machines
 * Corporation and others.  All Rights Reserved.
 *******************************************************************************
 * CollationData.java, ported from collationdata.h/.cpp
@ -16,6 +16,7 @@ import com.ibm.icu.impl.Trie2_32;
 import com.ibm.icu.lang.UScript;
 import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.util.ICUException;

 /**
 * Collation data container.
@ -25,6 +26,14 @@ import com.ibm.icu.text.UnicodeSet;
 * Includes data for the collation base (root/default), aliased if this is not the base.
 */
 public final class CollationData {
+    // Note: The ucadata.icu loader could discover the reserved ranges by setting an array
+    // parallel with the ranges, and resetting ranges that are indexed.
+    // The reordering builder code could clone the resulting template array.
+    static final int REORDER_RESERVED_BEFORE_LATIN = Collator.ReorderCodes.FIRST + 14;
+    static final int REORDER_RESERVED_AFTER_LATIN = Collator.ReorderCodes.FIRST + 15;
+
+    static final int MAX_NUM_SPECIAL_REORDER_CODES = 8;
+
    CollationData(Normalizer2Impl nfc) {
        nfcImpl = nfc;
    }
@ -182,12 +191,8 @@ public final class CollationData {
     *         or 0 if the script is unknown
     */
    long getFirstPrimaryForGroup(int script) {
-        int index = findScript(script);
-        if(index < 0) {
-            return 0;
-        }
-        long head = scripts[index];
-        return (head & 0xff00) << 16;
+        int index = getScriptIndex(script);
+        return index == 0 ? 0 : (long)scriptStarts[index] << 16;
    }

    /**
@ -197,13 +202,12 @@ public final class CollationData {
     *         or 0 if the script is unknown
     */
    public long getLastPrimaryForGroup(int script) {
-        int index = findScript(script);
-        if(index < 0) {
+        int index = getScriptIndex(script);
+        if(index == 0) {
            return 0;
        }
-        int head = scripts[index];
-        long lastByte = head & 0xff;
-        return ((lastByte + 1) << 24) - 1;
+        long limit = scriptStarts[index + 1];
+        return (limit << 16) - 1;
    }

    /**
@ -211,108 +215,154 @@ public final class CollationData {
     * @return the first script of the group, or -1 if the weight is beyond the last group
     */
    public int getGroupForPrimary(long p) {
-        p >>= 24;  // Reordering groups are distinguished by primary lead bytes.
-        for(int i = 0; i < scripts.length; i = i + 2 + scripts[i + 1]) {
-            int lastByte = scripts[i] & 0xff;
-            if(p <= lastByte) {
-                return scripts[i + 2];
+        p >>= 16;
+        if(p < scriptStarts[1] || scriptStarts[scriptStarts.length - 1] <= p) {
+            return -1;
+        }
+        int index = 1;
+        while(p >= scriptStarts[index + 1]) { ++index; }
+        for(int i = 0; i < numScripts; ++i) {
+            if(scriptsIndex[i] == index) {
+                return i;
+            }
+        }
+        for(int i = 0; i < MAX_NUM_SPECIAL_REORDER_CODES; ++i) {
+            if(scriptsIndex[numScripts + i] == index) {
+                return Collator.ReorderCodes.FIRST + i;
            }
        }
        return -1;
    }

-    private int findScript(int script) {
-        if(script < 0 || 0xffff < script) { return -1; }
-        for(int i = 0; i < scripts.length;) {
-            int limit = i + 2 + scripts[i + 1];
-            for(int j = i + 2; j < limit; ++j) {
-                if(script == scripts[j]) { return i; }
+    private int getScriptIndex(int script) {
+        if(script < 0) {
+            return 0;
+        } else if(script < numScripts) {
+            return scriptsIndex[script];
+        } else if(script < Collator.ReorderCodes.FIRST) {
+            return 0;
+        } else {
+            script -= Collator.ReorderCodes.FIRST;
+            if(script < MAX_NUM_SPECIAL_REORDER_CODES) {
+                return scriptsIndex[numScripts + script];
+            } else {
+                return 0;
            }
-            i = limit;
        }
-        return -1;
    }

    public int[] getEquivalentScripts(int script) {
-        int i = findScript(script);
-        if(i < 0) { return EMPTY_INT_ARRAY; }
-        int length = scripts[i + 1];
-        assert(length != 0);
-        int dest[] = new int[length];
-        i += 2;
-        dest[0] = scripts[i++];
-        for(int j = 1; j < length; ++j) {
-            script = scripts[i++];
-            // Sorted insertion.
-            for(int k = j;; --k) {
-                // Invariant: dest[k] is free to receive either script or dest[k - 1].
-                if(k > 0 && script < dest[k - 1]) {
-                    dest[k] = dest[k - 1];
-                } else {
-                    dest[k] = script;
-                    break;
-                }
+        int index = getScriptIndex(script);
+        if(index == 0) { return EMPTY_INT_ARRAY; }
+        if(script >= Collator.ReorderCodes.FIRST) {
+            // Special groups have no aliases.
+            return new int[] { script };
+        }
+
+        int length = 0;
+        for(int i = 0; i < numScripts; ++i) {
+            if(scriptsIndex[i] == index) {
+                ++length;
+            }
+        }
+        int[] dest = new int[length];
+        if(length == 1) {
+            dest[0] = script;
+            return dest;
+        }
+        length = 0;
+        for(int i = 0; i < numScripts; ++i) {
+            if(scriptsIndex[i] == index) {
+                dest[length++] = i;
            }
        }
        return dest;
    }

    /**
-     * Writes the permutation table for the given reordering of scripts and groups,
-     * mapping from default-order primary-weight lead bytes to reordered lead bytes.
+     * Writes the permutation of primary-weight ranges
+     * for the given reordering of scripts and groups.
     * The caller checks for illegal arguments and
     * takes care of [DEFAULT] and memory allocation.
+     *
+     * <p>Each list element will be a (limit, offset) pair as described
+     * for the CollationSettings.reorderRanges.
+     * The list will be empty if no ranges are reordered.
     */
-    public void makeReorderTable(int[] reorder, byte[] table) {
+    void makeReorderRanges(int[] reorder, UVector32 ranges) {
+        makeReorderRanges(reorder, false, ranges);
+    }
+
+    private void makeReorderRanges(int[] reorder, boolean latinMustMove, UVector32 ranges) {
+        ranges.removeAllElements();
        int length = reorder.length;
-        // Initialize the table.
+        if(length == 0 || (length == 1 && reorder[0] == UScript.UNKNOWN)) {
+            return;
+        }
+
+        // Maps each script-or-group range to a new lead byte.
+        short[] table = new short[scriptStarts.length - 1];  // C++: uint8_t[]
+
+        {
+            // Set "don't care" values for reserved ranges.
+            int index = scriptsIndex[
+                    numScripts + REORDER_RESERVED_BEFORE_LATIN - Collator.ReorderCodes.FIRST];
+            if(index != 0) {
+                table[index] = 0xff;
+            }
+            index = scriptsIndex[
+                    numScripts + REORDER_RESERVED_AFTER_LATIN - Collator.ReorderCodes.FIRST];
+            if(index != 0) {
+                table[index] = 0xff;
+            }
+        }
+
        // Never reorder special low and high primary lead bytes.
-        int lowByte;
-        for(lowByte = 0; lowByte <= Collation.MERGE_SEPARATOR_BYTE; ++lowByte) {
-            table[lowByte] = (byte)lowByte;
-        }
-        // lowByte == 03
-
-        int highByte;
-        for(highByte = 0xff; highByte >= Collation.TRAIL_WEIGHT_BYTE; --highByte) {
-            table[highByte] = (byte)highByte;
-        }
-        // highByte == FE
-
-        // Set intermediate bytes to 0 to indicate that they have not been set yet.
-        for(int i = lowByte; i <= highByte; ++i) {
-            table[i] = 0;
-        }
+        assert(scriptStarts.length >= 2);
+        assert(scriptStarts[0] == 0);
+        int lowStart = scriptStarts[1];
+        assert(lowStart == ((Collation.MERGE_SEPARATOR_BYTE + 1) << 8));
+        int highLimit = scriptStarts[scriptStarts.length - 1];
+        assert(highLimit == (Collation.TRAIL_WEIGHT_BYTE << 8));

        // Get the set of special reorder codes in the input list.
-        // This supports up to 32 special reorder codes;
+        // This supports a fixed number of special reorder codes;
        // it works for data with codes beyond Collator.ReorderCodes.LIMIT.
        int specials = 0;
        for(int i = 0; i < length; ++i) {
            int reorderCode = reorder[i] - Collator.ReorderCodes.FIRST;
-            if(0 <= reorderCode && reorderCode <= 31) {
+            if(0 <= reorderCode && reorderCode < MAX_NUM_SPECIAL_REORDER_CODES) {
                specials |= 1 << reorderCode;
            }
        }

        // Start the reordering with the special low reorder codes that do not occur in the input.
-        for(int i = 0;; i += 3) {
-            if(scripts[i + 1] != 1) { break; }  // Went beyond special single-code reorder codes.
-            int reorderCode = scripts[i + 2] - Collator.ReorderCodes.FIRST;
-            if(reorderCode < 0) { break; }  // Went beyond special reorder codes.
-            if((specials & (1 << reorderCode)) == 0) {
-                int head = scripts[i];
-                int firstByte = head >> 8;
-                int lastByte = head & 0xff;
-                do { table[firstByte++] = (byte)lowByte++; } while(firstByte <= lastByte);
+        for(int i = 0; i < MAX_NUM_SPECIAL_REORDER_CODES; ++i) {
+            int index = scriptsIndex[numScripts + i];
+            if(index != 0 && (specials & (1 << i)) == 0) {
+                lowStart = addLowScriptRange(table, index, lowStart);
            }
        }

-        // Reorder according to the input scripts, continuing from the bottom of the bytes range.
+        // Skip the reserved range before Latin if Latin is the first script,
+        // so that we do not move it unnecessarily.
+        int skippedReserved = 0;
+        if(specials == 0 && reorder[0] == UScript.LATIN && !latinMustMove) {
+            int index = scriptsIndex[UScript.LATIN];
+            assert(index != 0);
+            int start = scriptStarts[index];
+            assert(lowStart <= start);
+            skippedReserved = start - lowStart;
+            lowStart = start;
+        }
+
+        // Reorder according to the input scripts, continuing from the bottom of the primary range.
+        boolean hasReorderToEnd = false;
        for(int i = 0; i < length;) {
            int script = reorder[i++];
            if(script == UScript.UNKNOWN) {
                // Put the remaining scripts at the top.
+                hasReorderToEnd = true;
                while(i < length) {
                    script = reorder[--length];
                    if(script == UScript.UNKNOWN) {  // Must occur at most once.
@ -323,17 +373,14 @@ public final class CollationData {
                        throw new IllegalArgumentException(
                                "setReorderCodes(): UScript.DEFAULT together with other scripts");
                    }
-                    int index = findScript(script);
-                    if(index < 0) { continue; }
-                    int head = scripts[index];
-                    int firstByte = head >> 8;
-                    int lastByte = head & 0xff;
-                    if(table[firstByte] != 0) {  // Duplicate or equivalent script.
+                    int index = getScriptIndex(script);
+                    if(index == 0) { continue; }
+                    if(table[index] != 0) {  // Duplicate or equivalent script.
                        throw new IllegalArgumentException(
                                "setReorderCodes(): duplicate or equivalent script " +
                                scriptCodeString(script));
                    }
-                    do { table[lastByte--] = (byte)highByte--; } while(firstByte <= lastByte);
+                    highLimit = addHighScriptRange(table, index, highLimit);
                }
                break;
            }
@ -343,25 +390,82 @@ public final class CollationData {
                throw new IllegalArgumentException(
                        "setReorderCodes(): UScript.DEFAULT together with other scripts");
            }
-            int index = findScript(script);
-            if(index < 0) { continue; }
-            int head = scripts[index];
-            int firstByte = head >> 8;
-            int lastByte = head & 0xff;
-            if(table[firstByte] != 0) {  // Duplicate or equivalent script.
+            int index = getScriptIndex(script);
+            if(index == 0) { continue; }
+            if(table[index] != 0) {  // Duplicate or equivalent script.
                throw new IllegalArgumentException(
                        "setReorderCodes(): duplicate or equivalent script " +
                        scriptCodeString(script));
            }
-            do { table[firstByte++] = (byte)lowByte++; } while(firstByte <= lastByte);
+            lowStart = addLowScriptRange(table, index, lowStart);
        }

        // Put all remaining scripts into the middle.
-        // Avoid table[0] which must remain 0.
-        for(int i = 1; i <= 0xff; ++i) {
-            if(table[i] == 0) { table[i] = (byte)lowByte++; }
+        for(int i = 1; i < scriptStarts.length - 1; ++i) {
+            int leadByte = table[i];
+            if(leadByte != 0) { continue; }
+            int start = scriptStarts[i];
+            if(!hasReorderToEnd && start > lowStart) {
+                // No need to move this script.
+                lowStart = start;
+            }
+            lowStart = addLowScriptRange(table, i, lowStart);
        }
-        assert(lowByte == highByte + 1);
+        if(lowStart > highLimit) {
+            if((lowStart - (skippedReserved & 0xff00)) <= highLimit) {
+                // Try not skipping the before-Latin reserved range.
+                makeReorderRanges(reorder, true, ranges);
+                return;
+            }
+            // We need more primary lead bytes than available, despite the reserved ranges.
+            throw new ICUException(
+                    "setReorderCodes(): reordering too many partial-primary-lead-byte scripts");
+        }
+
+        // Turn lead bytes into a list of (limit, offset) pairs.
+        // Encode each pair in one list element:
+        // Upper 16 bits = limit, lower 16 = signed lead byte offset.
+        int offset = 0;
+        for(int i = 1;; ++i) {
+            int nextOffset = offset;
+            while(i < scriptStarts.length - 1) {
+                int newLeadByte = table[i];
+                if(newLeadByte == 0xff) {
+                    // "Don't care" lead byte for reserved range, continue with current offset.
+                } else {
+                    nextOffset = newLeadByte - (scriptStarts[i] >> 8);
+                    if(nextOffset != offset) { break; }
+                }
+                ++i;
+            }
+            if(offset != 0 || i < scriptStarts.length - 1) {
+                ranges.addElement(((int)scriptStarts[i] << 16) | (offset & 0xffff));
+            }
+            if(i == scriptStarts.length - 1) { break; }
+            offset = nextOffset;
+        }
+    }
+
+    private int addLowScriptRange(short[] table, int index, int lowStart) {
+        int start = scriptStarts[index];
+        if((start & 0xff) < (lowStart & 0xff)) {
+            lowStart += 0x100;
+        }
+        table[index] = (short)(lowStart >> 8);
+        int limit = scriptStarts[index + 1];
+        lowStart = ((lowStart & 0xff00) + ((limit & 0xff00) - (start & 0xff00))) | (limit & 0xff);
+        return lowStart;
+    }
+
+    private int addHighScriptRange(short[] table, int index, int highLimit) {
+        int limit = scriptStarts[index + 1];
+        if((limit & 0xff) > (highLimit & 0xff)) {
+            highLimit -= 0x100;
+        }
+        int start = scriptStarts[index];
+        highLimit = ((highLimit & 0xff00) - ((limit & 0xff00) - (start & 0xff00))) | (start & 0xff);
+        table[index] = (short)(highLimit >> 8);
+        return highLimit;
    }

    private static String scriptCodeString(int script) {
@ -423,21 +527,25 @@ public final class CollationData {
     * Data for scripts and reordering groups.
     * Uses include building a reordering permutation table and
     * providing script boundaries to AlphabeticIndex.
-     *
-     * This data is a sorted list of primary-weight lead byte ranges (reordering groups),
-     * each with a list of pairs sorted in base collation order;
-     * each pair contains a script/reorder code and the lowest primary weight for that script.
-     *
-     * Data structure:
-     * - Each reordering group is encoded in n+2 16-bit integers.
-     *   - First integer:
-     *     Bits 15..8: First byte of the reordering group's range.
-     *     Bits  7..0: Last byte of the reordering group's range.
-     *   - Second integer:
-     *     Length n of the list of script/reordering codes.
-     *   - Each further integer is a script or reordering code.
     */
-    char[] scripts;
+    int numScripts;
+    /**
+     * The length of scriptsIndex is numScripts+16.
+     * It maps from a UScriptCode or a special reorder code to an entry in scriptStarts.
+     * 16 special reorder codes (not all used) are mapped starting at numScripts.
+     * Up to MAX_NUM_SPECIAL_REORDER_CODES are codes for special groups like space/punct/digit.
+     * There are special codes at the end for reorder-reserved primary ranges.
+     *
+     * <p>Multiple scripts may share a range and index, for example Hira & Kana.
+     */
+    char[] scriptsIndex;
+    /**
+     * Start primary weight (top 16 bits only) for a group/script/reserved range
+     * indexed by scriptsIndex.
+     * The first range (separators & terminators) and the last range (trailing weights)
+     * are not reorderable, and no scriptsIndex entry points to them.
+     */
+    char[] scriptStarts;

    /**
     * Collation elements in the root collator.
--- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataBuilder.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataBuilder.java
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-* Copyright (C) 2012-2014, International Business Machines
+* Copyright (C) 2012-2015, International Business Machines
 * Corporation and others.  All Rights Reserved.
 *******************************************************************************
 * CollationDataBuilder.java, ported from collationdatabuilder.h/.cpp
@ -310,7 +310,9 @@ final class CollationDataBuilder {  // not final in C++
        if(base != null) {
            data.numericPrimary = base.numericPrimary;
            data.compressibleBytes = base.compressibleBytes;
-            data.scripts = base.scripts;
+            data.numScripts = base.numScripts;
+            data.scriptsIndex = base.scriptsIndex;
+            data.scriptStarts = base.scriptStarts;
        }
        buildFastLatinTable(data);
    }
--- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationDataReader.java
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-* Copyright (C) 2013-2014, International Business Machines
+* Copyright (C) 2013-2015, International Business Machines
 * Corporation and others.  All Rights Reserved.
 *******************************************************************************
 * CollationDataReader.java, ported from collationdatareader.h/.cpp
@ -13,6 +13,7 @@ package com.ibm.icu.impl.coll;

 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
 import java.util.Arrays;

 import com.ibm.icu.impl.ICUBinary;
@ -143,6 +144,7 @@ final class CollationDataReader /* all static */ {

        CollationData baseData = base == null ? null : base.data;
        int[] reorderCodes;
+        int reorderCodesLength;
        index = IX_REORDER_CODES_OFFSET;
        offset = inIndexes[index];
        length = inIndexes[index + 1] - offset;
@ -152,13 +154,27 @@ final class CollationDataReader /* all static */ {
                // the base data does not have a reordering.
                throw new ICUException("Collation base data must not reorder scripts");
            }
-            reorderCodes = new int[length / 4];
-            for(int i = 0; i < length / 4; ++i) {
+            reorderCodesLength = length / 4;
+            reorderCodes = new int[reorderCodesLength];
+            for(int i = 0; i < reorderCodesLength; ++i) {
                reorderCodes[i] = inBytes.getInt();
            }
            length &= 3;
+
+            // The reorderRanges (if any) are the trailing reorderCodes entries.
+            // Split the array at the boundary.
+            // Script or reorder codes do not exceed 16-bit values.
+            // Range limits are stored in the upper 16 bits, and are never 0.
+            int reorderRangesLength = 0;
+            while(reorderRangesLength < reorderCodesLength &&
+                    (reorderCodes[reorderCodesLength - reorderRangesLength - 1] & 0xffff0000) != 0) {
+                ++reorderRangesLength;
+            }
+            assert(reorderRangesLength < reorderCodesLength);
+            reorderCodesLength -= reorderRangesLength;
        } else {
            reorderCodes = new int[0];
+            reorderCodesLength = 0;
        }
        ICUBinary.skipBytes(inBytes, length);

@ -170,7 +186,7 @@ final class CollationDataReader /* all static */ {
        offset = inIndexes[index];
        length = inIndexes[index + 1] - offset;
        if(length >= 256) {
-            if(reorderCodes.length == 0) {
+            if(reorderCodesLength == 0) {
                throw new ICUException("Reordering table without reordering codes");
            }
            reorderTable = new byte[256];
@ -410,15 +426,28 @@ final class CollationDataReader /* all static */ {
            if(data == null) {
                throw new ICUException("Script order data but no mappings");
            }
-            data.scripts = new char[length / 2];
-            for(int i = 0; i < length / 2; ++i) {
-                data.scripts[i] = inBytes.getChar();
+            int scriptsLength = length / 2;
+            CharBuffer inChars = inBytes.asCharBuffer();
+            data.numScripts = inChars.get();
+            // There must be enough entries for both arrays, including more than two range starts.
+            int scriptStartsLength = scriptsLength - (1 + data.numScripts + 16);
+            if(scriptStartsLength <= 2) {
+                throw new ICUException("Script order data too short");
+            }
+            inChars.get(data.scriptsIndex = new char[data.numScripts + 16]);
+            inChars.get(data.scriptStarts = new char[scriptStartsLength]);
+            if(!(data.scriptStarts[0] == 0 &&
+                    data.scriptStarts[1] == ((Collation.MERGE_SEPARATOR_BYTE + 1) << 8) &&
+                    data.scriptStarts[scriptStartsLength - 1] ==
+                            (Collation.TRAIL_WEIGHT_BYTE << 8))) {
+                throw new ICUException("Script order data not valid");
            }
-            length &= 1;
        } else if(data == null) {
            // Nothing to do.
        } else if(baseData != null) {
-            data.scripts = baseData.scripts;
+            data.numScripts = baseData.numScripts;
+            data.scriptsIndex = baseData.scriptsIndex;
+            data.scriptStarts = baseData.scriptStarts;
        }
        ICUBinary.skipBytes(inBytes, length);

@ -470,12 +499,8 @@ final class CollationDataReader /* all static */ {
            throw new ICUException("The maxVariable could not be mapped to a variableTop");
        }

-        if(reorderCodes.length == 0 || reorderTable != null) {
-            settings.setReordering(reorderCodes, reorderTable);
-        } else {
-            byte[] table = new byte[256];
-            baseData.makeReorderTable(reorderCodes, table);
-            settings.setReordering(reorderCodes, table);
+        if(reorderCodesLength != 0) {
+            settings.aliasReordering(baseData, reorderCodes, reorderCodesLength, reorderTable);
        }

        settings.fastLatinOptions = CollationFastLatin.getOptions(
@ -486,7 +511,7 @@ final class CollationDataReader /* all static */ {
    private static final class IsAcceptable implements ICUBinary.Authenticate {
        // @Override when we switch to Java 6
        public boolean isDataVersionAcceptable(byte version[]) {
-            return version[0] == 4;
+            return version[0] == 5;
        }
    }
    private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
--- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationFastLatin.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationFastLatin.java
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-* Copyright (C) 2013-2014, International Business Machines
+* Copyright (C) 2013-2015, International Business Machines
 * Corporation and others.  All Rights Reserved.
 *******************************************************************************
 * CollationFastLatin.java, ported from collationfastlatin.h/.cpp
@ -23,7 +23,7 @@ public final class CollationFastLatin /* all static */ {
     * When the major version number of the main data format changes,
     * we can reset this fast Latin version to 1.
     */
-    public static final int VERSION = 1;
+    public static final int VERSION = 2;

    public static final int LATIN_MAX = 0x17f;
    public static final int LATIN_LIMIT = LATIN_MAX + 1;
@ -211,33 +211,50 @@ public final class CollationFastLatin /* all static */ {
            // lowest long mini primary.
            miniVarTop = MIN_LONG - 1;
        } else {
-            int v1 = (int)(settings.variableTop >> 24);
            int headerLength = header[0] & 0xff;
-            int i = headerLength - 1;
-            if(i <= 0 || v1 > (header[i] & 0x7f)) {
+            int i = 1 + settings.getMaxVariable();
+            if(i >= headerLength) {
                return -1;  // variableTop >= digits, should not occur
            }
-            while(i > 1 && v1 <= (header[i - 1] & 0x7f)) { --i; }
-            // In the table header, the miniVarTop is in bits 15..7, with 4 zero bits 19..16 implied.
-            // Shift right to make it comparable with long mini primaries in bits 15..3.
-            miniVarTop = (header[i] & 0xff80) >> 4;
+            miniVarTop = header[i];
        }

-        byte[] reorderTable = settings.reorderTable;
-        if(reorderTable != null) {
-            char[] scripts = data.scripts;
-            int length = data.scripts.length;
-            int prevLastByte = 0;
-            for(int i = 0; i < length;) {
-                // reordered last byte of the group
-                int lastByte = reorderTable[scripts[i] & 0xff] & 0xff;
-                if(lastByte < prevLastByte) {
-                    // The permutation affects the groups up to Latin.
-                    return -1;
+        boolean digitsAreReordered = false;
+        if(settings.hasReordering()) {
+            long prevStart = 0;
+            long beforeDigitStart = 0;
+            long digitStart = 0;
+            long afterDigitStart = 0;
+            for(int group = Collator.ReorderCodes.FIRST;
+                    group < Collator.ReorderCodes.FIRST + CollationData.MAX_NUM_SPECIAL_REORDER_CODES;
+                    ++group) {
+                long start = data.getFirstPrimaryForGroup(group);
+                start = settings.reorder(start);
+                if(group == Collator.ReorderCodes.DIGIT) {
+                    beforeDigitStart = prevStart;
+                    digitStart = start;
+                } else if(start != 0) {
+                    if(start < prevStart) {
+                        // The permutation affects the groups up to Latin.
+                        return -1;
+                    }
+                    // In the future, there might be a special group between digits & Latin.
+                    if(digitStart != 0 && afterDigitStart == 0 && prevStart == beforeDigitStart) {
+                        afterDigitStart = start;
+                    }
+                    prevStart = start;
                }
-                if(scripts[i + 2] == UScript.LATIN) { break; }
-                i = i + 2 + scripts[i + 1];
-                prevLastByte = lastByte;
+            }
+            long latinStart = data.getFirstPrimaryForGroup(UScript.LATIN);
+            latinStart = settings.reorder(latinStart);
+            if(latinStart < prevStart) {
+                return -1;
+            }
+            if(afterDigitStart == 0) {
+                afterDigitStart = latinStart;
+            }
+            if(!(beforeDigitStart < digitStart && digitStart < afterDigitStart)) {
+                digitsAreReordered = true;
            }
        }

@ -253,7 +270,7 @@ public final class CollationFastLatin /* all static */ {
            }
            primaries[c] = (char)p;
        }
-        if((settings.options & CollationSettings.NUMERIC) != 0) {
+        if(digitsAreReordered || (settings.options & CollationSettings.NUMERIC) != 0) {
            // Bail out for digits.
            for(int c = 0x30; c <= 0x39; ++c) { primaries[c] = 0; }
        }
--- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationFastLatinBuilder.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationFastLatinBuilder.java
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-* Copyright (C) 2013-2014, International Business Machines
+* Copyright (C) 2013-2015, International Business Machines
 * Corporation and others.  All Rights Reserved.
 *******************************************************************************
 * CollationFastLatinBuilder.java, ported from collationfastlatinbuilder.h/.cpp
@ -127,38 +127,26 @@ final class CollationFastLatinBuilder {
    }

    private boolean loadGroups(CollationData data) {
-        result.append(0);  // reserved for version & headerLength
+        headerLength = 1 + NUM_SPECIAL_GROUPS;
+        int r0 = (CollationFastLatin.VERSION << 8) | headerLength;
+        result.append((char)r0);
        // The first few reordering groups should be special groups
        // (space, punct, ..., digit) followed by Latn, then Grek and other scripts.
-        for(int i = 0;;) {
-            if(i >= data.scripts.length) {
-                throw new AssertionError("no Latn script");
+        for(int i = 0; i < NUM_SPECIAL_GROUPS; ++i) {
+            lastSpecialPrimaries[i] = data.getLastPrimaryForGroup(Collator.ReorderCodes.FIRST + i);
+            if(lastSpecialPrimaries[i] == 0) {
+                // missing data
+                return false;
            }
-            int head = data.scripts[i];
-            int lastByte = head & 0xff;  // last primary byte in the group
-            int group = data.scripts[i + 2];
-            if(group == Collator.ReorderCodes.DIGIT) {
-                firstDigitPrimary = (long)(head & 0xff00) << 16;
-                headerLength = result.length();
-                int r0 = (CollationFastLatin.VERSION << 8) | headerLength;
-                result.setCharAt(0, (char)r0);
-            } else if(group == UScript.LATIN) {
-                if(firstDigitPrimary == 0) {
-                    throw new AssertionError("no digit group");
-                }
-                firstLatinPrimary = (long)(head & 0xff00) << 16;
-                lastLatinPrimary = ((long)lastByte << 24) | 0xffffff;
-                break;
-            } else if(firstDigitPrimary == 0) {
-                // a group below digits
-                if(lastByte > 0x7f) {
-                    // We only use 7 bits for the last byte of a below-digits group.
-                    // This does not warrant an errorCode, but we do not build a fast Latin table.
-                    return false;
-                }
-                result.append((char)lastByte);
-            }
-            i = i + 2 + data.scripts[i + 1];
+            result.append(0);  // reserve a slot for this group
+        }
+
+        firstDigitPrimary = data.getFirstPrimaryForGroup(Collator.ReorderCodes.DIGIT);
+        firstLatinPrimary = data.getFirstPrimaryForGroup(UScript.LATIN);
+        lastLatinPrimary = data.getLastPrimaryForGroup(UScript.LATIN);
+        if(firstDigitPrimary == 0 || firstLatinPrimary == 0) {
+            // missing data
+            return false;
        }
        return true;
    }
@ -173,23 +161,21 @@ final class CollationFastLatinBuilder {
        }
        // Both or neither must be potentially-variable,
        // so that we can test only one and determine if both are variable.
-        if(p >= firstDigitPrimary) {
-            return q >= firstDigitPrimary;
-        } else if(q >= firstDigitPrimary) {
+        long lastVariablePrimary = lastSpecialPrimaries[NUM_SPECIAL_GROUPS - 1];
+        if(p > lastVariablePrimary) {
+            return q > lastVariablePrimary;
+        } else if(q > lastVariablePrimary) {
            return false;
        }
        // Both will be encoded with long mini primaries.
        // They must be in the same special reordering group,
        // so that we can test only one and determine if both are variable.
-        p >>= 24;  // first primary byte
-        q >>= 24;
        assert(p != 0 && q != 0);
-        assert(p <= result.charAt(headerLength - 1));  // the loop will terminate
-        for(int i = 1;; ++i) {
-            long lastByte = result.charAt(i);
-            if(p <= lastByte) {
-                return q <= lastByte;
-            } else if(q <= lastByte) {
+        for(int i = 0;; ++i) {  // will terminate
+            long lastPrimary = lastSpecialPrimaries[i];
+            if(p <= lastPrimary) {
+                return q <= lastPrimary;
+            } else if(q <= lastPrimary) {
                return false;
            }
        }
@ -416,8 +402,8 @@ final class CollationFastLatinBuilder {

    private void encodeUniqueCEs() {
        miniCEs = new char[uniqueCEs.size()];
-        int group = 1;
-        long lastGroupByte = result.charAt(group);
+        int group = 0;
+        long lastGroupPrimary = lastSpecialPrimaries[group];
        // The lowest unique CE must be at least a secondary CE.
        assert(((int)uniqueCEs.elementAti(0) >>> 16) != 0);
        long prevPrimary = 0;
@ -431,16 +417,15 @@ final class CollationFastLatinBuilder {
            // (uniqueCEs does not store case bits.)
            long p = ce >>> 32;
            if(p != prevPrimary) {
-                int p1 = (int)(p >> 24);
-                while(p1 > lastGroupByte) {
+                while(p > lastGroupPrimary) {
                    assert(pri <= CollationFastLatin.MAX_LONG);
-                    // Add the last "long primary" in or before the group
-                    // into the upper 9 bits of the group entry.
-                    result.setCharAt(group, (char)((pri << 4) | lastGroupByte));
-                    if(++group < headerLength) {  // group is 1-based
-                        lastGroupByte = result.charAt(group);
+                    // Set the group's header entry to the
+                    // last "long primary" in or before the group.
+                    result.setCharAt(1 + group, (char)pri);
+                    if(++group < NUM_SPECIAL_GROUPS) {
+                        lastGroupPrimary = lastSpecialPrimaries[group];
                    } else {
-                        lastGroupByte = 0xff;
+                        lastGroupPrimary = 0xffffffffL;
                        break;
                    }
                }
@ -686,6 +671,10 @@ final class CollationFastLatinBuilder {
        return (ce >>> 32) == Collation.NO_CE_PRIMARY && ce != Collation.NO_CE;
    }

+    // space, punct, symbol, currency (not digit)
+    private static final int NUM_SPECIAL_GROUPS =
+            Collator.ReorderCodes.CURRENCY - Collator.ReorderCodes.FIRST + 1;
+
    private static final long CONTRACTION_FLAG = 0x80000000L;

    // temporary "buffer"
@ -699,7 +688,8 @@ final class CollationFastLatinBuilder {
    /** One 16-bit mini CE per unique CE. */
    private char[] miniCEs;

-    // These are constant for a given list of CollationData.scripts.
+    // These are constant for a given root collator.
+    long[] lastSpecialPrimaries = new long[NUM_SPECIAL_GROUPS];
    private long firstDigitPrimary;
    private long firstLatinPrimary;
    private long lastLatinPrimary;
--- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationKeys.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationKeys.java
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
- * Copyright (C) 2012-2014, International Business Machines
+ * Copyright (C) 2012-2015, International Business Machines
 * Corporation and others.  All Rights Reserved.
 *******************************************************************************
 * CollationKeys.java, ported from collationkeys.h/.cpp
@ -348,7 +348,6 @@ public final class CollationKeys /* all methods are static */ {
            // +1 so that we can use "<" and primary ignorables test out early.
            variableTop = settings.variableTop + 1;
        }
-        byte[] reorderTable = settings.reorderTable;

        int tertiaryMask = CollationSettings.getTertiaryMask(options);

@ -358,7 +357,7 @@ public final class CollationKeys /* all methods are static */ {
        SortKeyLevel tertiaries = getSortKeyLevel(levels, Collation.TERTIARY_LEVEL_FLAG);
        SortKeyLevel quaternaries = getSortKeyLevel(levels, Collation.QUATERNARY_LEVEL_FLAG);

-        int compressedP1 = 0; // 0==no compression; otherwise reordered compressible lead byte
+        long prevReorderedPrimary = 0;  // 0==no compression
        int commonCases = 0;
        int commonSecondaries = 0;
        int commonTertiaries = 0;
@ -387,16 +386,15 @@ public final class CollationKeys /* all methods are static */ {
                }
                do {
                    if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) {
-                        int p1 = (int) p >>> 24;
-                        if (reorderTable != null) {
-                            p1 = reorderTable[p1] & 0xff;
+                        if (settings.hasReordering()) {
+                            p = settings.reorder(p);
                        }
-                        if (p1 >= QUAT_SHIFTED_LIMIT_BYTE) {
+                        if (((int) p >>> 24) >= QUAT_SHIFTED_LIMIT_BYTE) {
                            // Prevent shifted primary lead bytes from
                            // overlapping with the common compression range.
                            quaternaries.appendByte(QUAT_SHIFTED_LIMIT_BYTE);
                        }
-                        quaternaries.appendWeight32((p1 << 24) | (p & 0xffffff));
+                        quaternaries.appendWeight32(p);
                    }
                    do {
                        ce = iter.nextCE();
@ -409,13 +407,15 @@ public final class CollationKeys /* all methods are static */ {
            // If ce==NO_CE, then write nothing for the primary level but
            // terminate compression on all levels and then exit the loop.
            if (p > Collation.NO_CE_PRIMARY && (levels & Collation.PRIMARY_LEVEL_FLAG) != 0) {
-                int p1 = (int) p >>> 24;
-                if (reorderTable != null) {
-                    p1 = reorderTable[p1] & 0xff;
+                // Test the un-reordered primary for compressibility.
+                boolean isCompressible = compressibleBytes[(int) p >>> 24];
+                if(settings.hasReordering()) {
+                    p = settings.reorder(p);
                }
-                if (p1 != compressedP1) {
-                    if (compressedP1 != 0) {
-                        if (p1 < compressedP1) {
+                int p1 = (int) p >>> 24;
+                if (!isCompressible || p1 != ((int) prevReorderedPrimary >>> 24)) {
+                    if (prevReorderedPrimary != 0) {
+                        if (p < prevReorderedPrimary) {
                            // No primary compression terminator
                            // at the end of the level or merged segment.
                            if (p1 > Collation.MERGE_SEPARATOR_BYTE) {
@ -426,12 +426,10 @@ public final class CollationKeys /* all methods are static */ {
                        }
                    }
                    sink.Append(p1);
-                    // Test the un-reordered lead byte for compressibility but
-                    // remember the reordered lead byte.
-                    if (compressibleBytes[(int) p >>> 24]) {
-                        compressedP1 = p1;
+                    if(isCompressible) {
+                        prevReorderedPrimary = p;
                    } else {
-                        compressedP1 = 0;
+                        prevReorderedPrimary = 0;
                    }
                }
                byte p2 = (byte) (p >>> 16);
--- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRuleParser.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationRuleParser.java
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-* Copyright (C) 2013-2014, International Business Machines
+* Copyright (C) 2013-2015, International Business Machines
 * Corporation and others.  All Rights Reserved.
 *******************************************************************************
 * CollationRuleParser.java, ported from collationruleparser.h/.cpp
@ -718,17 +718,14 @@ public final class CollationRuleParser {
            reorderCodes.add(code);
            i = limit;
        }
-        int length = reorderCodes.size();
-        if(length == 1 && reorderCodes.get(0) == Collator.ReorderCodes.NONE) {
+        if(reorderCodes.isEmpty()) {
            settings.resetReordering();
-            return;
+        } else {
+            int[] codes = new int[reorderCodes.size()];
+            int j = 0;
+            for(Integer code : reorderCodes) { codes[j++] = code; }
+            settings.setReordering(baseData, codes);
        }
-        int[] codes = new int[reorderCodes.size()];
-        int j = 0;
-        for(Integer code : reorderCodes) { codes[j++] = code; }
-        byte[] table = new byte[256];
-        baseData.makeReorderTable(codes, table);
-        settings.setReordering(codes, table);
    }

    private static final String[] gSpecialReorderCodes = {
--- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationSettings.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationSettings.java
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-* Copyright (C) 2013-2014, International Business Machines
+* Copyright (C) 2013-2015, International Business Machines
 * Corporation and others.  All Rights Reserved.
 *******************************************************************************
 * CollationSettings.java, ported from collationsettings.h/.cpp
@ -93,7 +93,7 @@ public final class CollationSettings extends SharedObject {
    @Override
    public CollationSettings clone() {
        CollationSettings newSettings = (CollationSettings)super.clone();
-        // Note: The reorderTable and reorderCodes need not be cloned
+        // Note: The reorderTable, reorderRanges, and reorderCodes need not be cloned
        // because, in Java, they only get replaced but not modified.
        newSettings.fastLatinPrimaries = fastLatinPrimaries.clone();
        return newSettings;
@ -125,16 +125,180 @@ public final class CollationSettings extends SharedObject {
        // When we turn off reordering, we want to set a null permutation
        // rather than a no-op permutation.
        reorderTable = null;
+        minHighNoReorder = 0;
+        reorderRanges = null;
        reorderCodes = EMPTY_INT_ARRAY;
    }
-    // No aliasReordering() in Java. Use setReordering(). See comments near reorderCodes.
-    public void setReordering(int[] codes, byte[] table) {
+
+    void aliasReordering(CollationData data, int[] codesAndRanges, int codesLength, byte[] table) {
+        int[] codes;
+        if(codesLength == codesAndRanges.length) {
+            codes = codesAndRanges;
+        } else {
+            // TODO: Java 6: Arrays.copyOf(codes, codesLength);
+            codes = new int[codesLength];
+            System.arraycopy(codesAndRanges, 0, codes, 0, codesLength);
+        }
+        int rangesStart = codesLength;
+        int rangesLimit = codesAndRanges.length;
+        int rangesLength = rangesLimit - rangesStart;
+        if(table != null &&
+                (rangesLength == 0 ?
+                        !reorderTableHasSplitBytes(table) :
+                        rangesLength >= 2 &&
+                        // The first offset must be 0. The last offset must not be 0.
+                        (codesAndRanges[rangesStart] & 0xffff) == 0 &&
+                        (codesAndRanges[rangesLimit - 1] & 0xffff) != 0)) {
+            reorderTable = table;
+            reorderCodes = codes;
+            // Drop ranges before the first split byte. They are reordered by the table.
+            // This then speeds up reordering of the remaining ranges.
+            int firstSplitByteRangeIndex = rangesStart;
+            while(firstSplitByteRangeIndex < rangesLimit &&
+                    (codesAndRanges[firstSplitByteRangeIndex] & 0xff0000) == 0) {
+                // The second byte of the primary limit is 0.
+                ++firstSplitByteRangeIndex;
+            }
+            if(firstSplitByteRangeIndex == rangesLimit) {
+                assert(!reorderTableHasSplitBytes(table));
+                minHighNoReorder = 0;
+                reorderRanges = null;
+            } else {
+                assert(table[codesAndRanges[firstSplitByteRangeIndex] >>> 24] == 0);
+                minHighNoReorder = codesAndRanges[rangesLimit - 1] & 0xffff0000L;
+                setReorderRanges(codesAndRanges, firstSplitByteRangeIndex,
+                        rangesLimit - firstSplitByteRangeIndex);
+            }
+            return;
+        }
+        // Regenerate missing data.
+        setReordering(data, codes);
+    }
+
+    public void setReordering(CollationData data, int[] codes) {
+        if(codes.length == 0 || (codes.length == 1 && codes[0] == Collator.ReorderCodes.NONE)) {
+            resetReordering();
+            return;
+        }
+        UVector32 rangesList = new UVector32();
+        data.makeReorderRanges(codes, rangesList);
+        int rangesLength = rangesList.size();
+        if(rangesLength == 0) {
+            resetReordering();
+            return;
+        }
+        int[] ranges = rangesList.getBuffer();
+        // ranges[] contains at least two (limit, offset) pairs.
+        // The first offset must be 0. The last offset must not be 0.
+        // Separators (at the low end) and trailing weights (at the high end)
+        // are never reordered.
+        assert(rangesLength >= 2);
+        assert((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0);
+        minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000L;
+
+        // Write the lead byte permutation table.
+        // Set a 0 for each lead byte that has a range boundary in the middle.
+        byte[] table = new byte[256];
+        int b = 0;
+        int firstSplitByteRangeIndex = -1;
+        for(int i = 0; i < rangesLength; ++i) {
+            int pair = ranges[i];
+            int limit1 = pair >>> 24;
+            while(b < limit1) {
+                table[b] = (byte)(b + pair);
+                ++b;
+            }
+            // Check the second byte of the limit.
+            if((pair & 0xff0000) != 0) {
+                table[limit1] = 0;
+                b = limit1 + 1;
+                if(firstSplitByteRangeIndex < 0) {
+                    firstSplitByteRangeIndex = i;
+                }
+            }
+        }
+        while(b <= 0xff) {
+            table[b] = (byte)b;
+            ++b;
+        }
+        int rangesStart;
+        if(firstSplitByteRangeIndex < 0) {
+            // The lead byte permutation table alone suffices for reordering.
+            rangesStart = rangesLength = 0;
+        } else {
+            // Remove the ranges below the first split byte.
+            rangesStart = firstSplitByteRangeIndex;
+            rangesLength -= firstSplitByteRangeIndex;
+        }
+        setReorderArrays(codes, ranges, rangesStart, rangesLength, table);
+    }
+
+    private void setReorderArrays(int[] codes,
+            int[] ranges, int rangesStart, int rangesLength, byte[] table) {
+        // Very different from C++. See the comments after the reorderCodes declaration.
        if(codes == null) {
            codes = EMPTY_INT_ARRAY;
        }
        assert (codes.length == 0) == (table == null);
        reorderTable = table;
        reorderCodes = codes;
+        setReorderRanges(ranges, rangesStart, rangesLength);
+    }
+
+    private void setReorderRanges(int[] ranges, int rangesStart, int rangesLength) {
+        if(rangesLength == 0) {
+            reorderRanges = null;
+        } else {
+            reorderRanges = new long[rangesLength];
+            int i = 0;
+            do {
+                reorderRanges[i++] = ranges[rangesStart++] & 0xffffffffL;
+            } while(i < rangesLength);
+        }
+    }
+
+    public void copyReorderingFrom(CollationSettings other) {
+        if(!other.hasReordering()) {
+            resetReordering();
+            return;
+        }
+        minHighNoReorder = other.minHighNoReorder;
+        reorderTable = other.reorderTable;
+        reorderRanges = other.reorderRanges;
+        reorderCodes = other.reorderCodes;
+    }
+
+    public boolean hasReordering() { return reorderTable != null; }
+
+    private static boolean reorderTableHasSplitBytes(byte[] table) {
+        assert(table[0] == 0);
+        for(int i = 1; i < 256; ++i) {
+            if(table[i] == 0) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    public long reorder(long p) {
+        byte b = reorderTable[(int)p >>> 24];
+        if(b != 0 || p <= Collation.NO_CE_PRIMARY) {
+            return ((b & 0xffL) << 24) | (p & 0xffffff);
+        } else {
+            return reorderEx(p);
+        }
+    }
+
+    private long reorderEx(long p) {
+        assert minHighNoReorder > 0;
+        if(p >= minHighNoReorder) { return p; }
+        // Round up p so that its lower 16 bits are >= any offset bits.
+        // Then compare q directly with (limit, offset) pairs.
+        long q = p | 0xffff;
+        long r;
+        int i = 0;
+        while(q >= (r = reorderRanges[i])) { ++i; }
+        return p + ((long)(short)r << 24);
    }

    // In C++, we use enums for attributes and their values, with a special value for the default.
@ -276,11 +440,39 @@ public final class CollationSettings extends SharedObject {
            (MAX_VAR_PUNCT << MAX_VARIABLE_SHIFT);
    /** Variable-top primary weight. */
    public long variableTop;
-    /** 256-byte table for reordering permutation of primary lead bytes; null if no reordering. */
+    /**
+     * 256-byte table for reordering permutation of primary lead bytes; null if no reordering.
+     * A 0 entry at a non-zero index means that the primary lead byte is "split"
+     * (there are different offsets for primaries that share that lead byte)
+     * and the reordering offset must be determined via the reorderRanges.
+     */
    public byte[] reorderTable;
+    /** Limit of last reordered range. 0 if no reordering or no split bytes. */
+    long minHighNoReorder;
+    /**
+     * Primary-weight ranges for script reordering,
+     * to be used by reorder(p) for split-reordered primary lead bytes.
+     *
+     * <p>Each entry is a (limit, offset) pair.
+     * The upper 16 bits of the entry are the upper 16 bits of the
+     * exclusive primary limit of a range.
+     * Primaries between the previous limit and this one have their lead bytes
+     * modified by the signed offset (-0xff..+0xff) stored in the lower 16 bits.
+     *
+     * <p>CollationData.makeReorderRanges() writes a full list where the first range
+     * (at least for terminators and separators) has a 0 offset.
+     * The last range has a non-zero offset.
+     * minHighNoReorder is set to the limit of that last range.
+     *
+     * <p>In the settings object, the initial ranges before the first split lead byte
+     * are omitted for efficiency; they are handled by reorder(p) via the reorderTable.
+     * If there are no split-reordered lead bytes, then no ranges are needed.
+     */
+    long[] reorderRanges;
    /** Array of reorder codes; ignored if length == 0. */
    public int[] reorderCodes = EMPTY_INT_ARRAY;
-    // Note: In C++, we keep a memory block around for the reorder codes and the permutation table,
+    // Note: In C++, we keep a memory block around for the reorder codes,
+    // the ranges, and the permutation table,
    // and modify them for new codes.
    // In Java, we simply copy references and then never modify the array contents.
    // The caller must abandon the arrays.
--- a/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationTailoring.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/impl/coll/CollationTailoring.java
@ -1,6 +1,6 @@
 /*
 *******************************************************************************
-* Copyright (C) 2013-2014, International Business Machines
+* Copyright (C) 2013-2015, International Business Machines
 * Corporation and others.  All Rights Reserved.
 *******************************************************************************
 * CollationTailoring.java, ported from collationtailoring.h/.cpp
@ -33,6 +33,7 @@ public final class CollationTailoring {
        if(baseSettings != null) {
            assert(baseSettings.readOnly().reorderCodes.length == 0);
            assert(baseSettings.readOnly().reorderTable == null);
+            assert(baseSettings.readOnly().minHighNoReorder == 0);
            settings = baseSettings.clone();
        } else {
            settings = new SharedObject.Reference<CollationSettings>(new CollationSettings());
--- a/icu4j/main/classes/collate/src/com/ibm/icu/text/Collator.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/Collator.java
@ -1,6 +1,6 @@
 /**
 *******************************************************************************
-* Copyright (C) 1996-2014, International Business Machines Corporation and
+* Copyright (C) 1996-2015, International Business Machines Corporation and
 * others. All Rights Reserved.
 *******************************************************************************
 */
@ -399,27 +399,35 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator

    /** 
     * Sets the reordering codes for this collator.
-     * <p>Collation reordering allows scripts and some other defined blocks of characters 
-     * to be moved relative to each other as a block. This reordering is done on top of 
+     * Collation reordering allows scripts and some other groups of characters
+     * to be moved relative to each other. This reordering is done on top of
     * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed 
     * at the start and/or the end of the collation order. These groups are specified using
-     * UScript codes and UColReorderCode entries.
+     * UScript codes and {@link Collator.ReorderCodes} entries.
+     *
     * <p>By default, reordering codes specified for the start of the order are placed in the 
-     * order given after a group of "special" non-script blocks. These special groups of characters 
+     * order given after several special non-script blocks. These special groups of characters 
     * are space, punctuation, symbol, currency, and digit. These special groups are represented with
-     * UColReorderCode entries. Script groups can be intermingled with 
-     * these special non-script blocks if those special blocks are explicitly specified in the reordering.
-     * <p>The special code OTHERS stands for any script that is not explicitly 
+     * {@link Collator.ReorderCodes} entries. Script groups can be intermingled with
+     * these special non-script groups if those special groups are explicitly specified in the reordering.
+     *
+     * <p>The special code {@link Collator.ReorderCodes#OTHERS OTHERS}
+     * stands for any script that is not explicitly
     * mentioned in the list of reordering codes given. Anything that is after OTHERS
     * will go at the very end of the reordering in the order given.
-     * <p>The special reorder code DEFAULT will reset the reordering for this collator
+     *
+     * <p>The special reorder code {@link Collator.ReorderCodes#DEFAULT DEFAULT}
+     * will reset the reordering for this collator
     * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that
     * was specified when this collator was created from resource data or from rules. The 
-     * DEFAULT code <b>must</b> be the sole code supplied when it used. If not
-     * that will result in an U_ILLEGAL_ARGUMENT_ERROR being set.
-     * <p>The special reorder code NONE will remove any reordering for this collator.
+     * DEFAULT code <b>must</b> be the sole code supplied when it is used.
+     * If not, then an {@link IllegalArgumentException} will be thrown.
+     *
+     * <p>The special reorder code {@link Collator.ReorderCodes#NONE NONE}
+     * will remove any reordering for this collator.
     * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The 
-     * NONE code <b>must</b> be the sole code supplied when it used.
+     * NONE code <b>must</b> be the sole code supplied when it is used.
+     *
     * @param order the reordering codes to apply to this collator; if this is null or an empty array
     * then this clears any existing reordering
     * @see #getReorderCodes
@ -1401,7 +1409,9 @@ public abstract class Collator implements Comparator<Object>, Freezable<Collator
    /**
     * Retrieves all the reorder codes that are grouped with the given reorder code. Some reorder
     * codes are grouped and must reorder together.
-     * 
+     * Beginning with ICU 55, scripts only reorder together if they are primary-equal,
+     * for example Hiragana and Katakana.
+     *
     * @param reorderCode The reorder code to determine equivalence for. 
     * @return the set of all reorder codes in the same group as the given reorder code.
     * @see #setReorderCodes
--- a/icu4j/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java
+++ b/icu4j/main/classes/collate/src/com/ibm/icu/text/RuleBasedCollator.java
@ -1,6 +1,6 @@
 /**
 *******************************************************************************
- * Copyright (C) 1996-2014, International Business Machines Corporation and
+ * Copyright (C) 1996-2015, International Business Machines Corporation and
 * others. All Rights Reserved.
 *******************************************************************************
 */
@ -34,6 +34,7 @@ import com.ibm.icu.impl.coll.FCDUTF16CollationIterator;
 import com.ibm.icu.impl.coll.SharedObject;
 import com.ibm.icu.impl.coll.TailoredSet;
 import com.ibm.icu.impl.coll.UTF16CollationIterator;
+import com.ibm.icu.lang.UScript;
 import com.ibm.icu.util.ULocale;
 import com.ibm.icu.util.VersionInfo;

@ -909,35 +910,18 @@ public final class RuleBasedCollator extends Collator {
        setFastLatinOptions(ownedSettings);
    }

-    /** 
-     * Sets the reordering codes for this collator.
-     * Collation reordering allows scripts and some other defined blocks of characters 
-     * to be moved relative to each other as a block. This reordering is done on top of 
-     * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed 
-     * at the start and/or the end of the collation order.
-     * <p>By default, reordering codes specified for the start of the order are placed in the 
-     * order given after a group of “special” non-script blocks. These special groups of characters 
-     * are space, punctuation, symbol, currency, and digit. These special groups are represented with
-     * {@link Collator.ReorderCodes}. Script groups can be intermingled with 
-     * these special non-script blocks if those special blocks are explicitly specified in the reordering.
-     * <p>The special code {@link Collator.ReorderCodes#OTHERS OTHERS} stands for any script that is not explicitly 
-     * mentioned in the list of reordering codes given. Anything that is after {@link Collator.ReorderCodes#OTHERS OTHERS}
-     * will go at the very end of the reordering in the order given.
-     * <p>The special reorder code {@link Collator.ReorderCodes#DEFAULT DEFAULT} will reset the reordering for this collator
-     * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that
-     * was specified when this collator was created from resource data or from rules. The 
-     * {@link Collator.ReorderCodes#DEFAULT DEFAULT} code <b>must</b> be the sole code supplied when it used. If not
-     * that will result in an {@link IllegalArgumentException} being thrown.
-     * <p>The special reorder code {@link Collator.ReorderCodes#NONE NONE} will remove any reordering for this collator.
-     * The result of setting no reordering will be to have the DUCET/CLDR reordering used. The 
-     * {@link Collator.ReorderCodes#NONE NONE} code <b>must</b> be the sole code supplied when it used.
+    /**
+     * {@inheritDoc}
+     *
     * @param order the reordering codes to apply to this collator; if this is null or an empty array
     * then this clears any existing reordering
     * @throws IllegalArgumentException if the reordering codes are malformed in any way (e.g. duplicates, multiple reset codes, overlapping equivalent scripts)
     * @see #getReorderCodes
     * @see Collator#getEquivalentReorderCodes
+     * @see Collator.ReorderCodes
+     * @see UScript
     * @stable ICU 4.8
-     */ 
+     */
    @Override
    public void setReorderCodes(int... order) {
        checkNotFrozen();
@ -954,8 +938,7 @@ public final class RuleBasedCollator extends Collator {
        if(length == 1 && order[0] == Collator.ReorderCodes.DEFAULT) {
            if(settings.readOnly() != defaultSettings) {
                CollationSettings ownedSettings = getOwnedSettings();
-                ownedSettings.setReordering(defaultSettings.reorderCodes,
-                                            defaultSettings.reorderTable);
+                ownedSettings.copyReorderingFrom(defaultSettings);
                setFastLatinOptions(ownedSettings);
            }
            return;
@ -964,9 +947,7 @@ public final class RuleBasedCollator extends Collator {
        if(length == 0) {
            ownedSettings.resetReordering();
        } else {
-            byte[] reorderTable = new byte[256];
-            data.makeReorderTable(order, reorderTable);
-            ownedSettings.setReordering(order.clone(), reorderTable);
+            ownedSettings.setReordering(data, order.clone());
        }
        setFastLatinOptions(ownedSettings);
    }
--- a/icu4j/main/shared/data/icudata.jar
+++ b/icu4j/main/shared/data/icudata.jar
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7bf72e445a207052fe2e2de0d70a989b69bc55da3df272f8e3096d6d9cb2ad0
-size 11801973
+oid sha256:49983175d1f04593f311dab35e6db8ad4b802d8c5de99a03d0e7333bd6ffcfc0
+size 11802910
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/data/collationtest.txt
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/data/collationtest.txt
@ -1,4 +1,4 @@
-# Copyright (c) 2012-2014 International Business Machines
+# Copyright (c) 2012-2015 International Business Machines
 # Corporation and others. All Rights Reserved.
 #
 # This file should be in UTF-8 with a signature byte sequence ("BOM").
@ -2526,3 +2526,15 @@
 <3 あ
 <3 ァ
 <1 い
+
+** test: reorder single scripts not groups, ICU ticket 11449
+@ root
+% reorder Goth Latn
+* compare
+<1 4
+<1 𐌰  # Gothic
+<1 L
+<1 Ω
+# Before ICU 55, the following reordered together with Gothic.
+<1 𐌈  # Old Italic
+<1 𐑐  # Shavian
--- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationMiscTest.java
+++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationMiscTest.java
@ -1,7 +1,6 @@
-
- /*
+/*
 *******************************************************************************
- * Copyright (C) 2002-2014, International Business Machines Corporation and
+ * Copyright (C) 2002-2015, International Business Machines Corporation and
 * others. All Rights Reserved.
 *******************************************************************************
 */
@ -3153,7 +3152,7 @@ public class CollationMiscTest extends TestFmwk {
    {
        Collator myCollation;
        int[] reorderCodes = {UScript.GREEK, UScript.HAN, ReorderCodes.PUNCTUATION};
-        int[] duplicateReorderCodes = {UScript.CUNEIFORM, UScript.GREEK, ReorderCodes.CURRENCY, UScript.EGYPTIAN_HIEROGLYPHS};
+        int[] duplicateReorderCodes = {UScript.HIRAGANA, UScript.GREEK, ReorderCodes.CURRENCY, UScript.KATAKANA};
        int[] reorderCodesStartingWithDefault = {ReorderCodes.DEFAULT, UScript.GREEK, UScript.HAN, ReorderCodes.PUNCTUATION};
        int[] retrievedReorderCodes;
        String greekString = "\u03b1";
@ -3283,47 +3282,7 @@ public class CollationMiscTest extends TestFmwk {
            errln("ERROR: retrieved reorder codes do not match set reorder codes.");
        }
    }
-    
-    public void TestSameLeadBytScriptReorder(){
-        String[] testSourceCases = {
-                "\ud800\udf31", // Gothic
-                "\ud801\udc50", // Shavian
-        };

-        String[] testTargetCases = {
-                "\u0100",   // Latin Extended-A
-                "\u2c74",   // Latin Extended-C
-        };
-
-        int[] results = {
-                -1,
-                -1,
-        };
-
-        Collator  myCollation;
-        String rules = "[reorder Goth Latn]";
-        try {
-            myCollation = new RuleBasedCollator(rules);
-        } catch (Exception e) {
-            warnln("ERROR: in creation of rule based collator");
-            return;
-        }
-        myCollation.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
-        myCollation.setStrength(Collator.TERTIARY);
-        for (int i = 0; i < testSourceCases.length ; i++)
-        {
-            CollationTest.doTest(this, (RuleBasedCollator)myCollation, 
-                    testSourceCases[i], testTargetCases[i], 
-                    results[i]);
-        }
-
-        // ensure that the non-reordered and reordered collation is the same
-        Collator nonReorderdCollator = RuleBasedCollator.getInstance();
-        int nonReorderedResults = nonReorderdCollator.compare(testSourceCases[0], testSourceCases[1]);
-        CollationTest.doTest(this, (RuleBasedCollator)myCollation, 
-                testSourceCases[0], testSourceCases[1], nonReorderedResults);   
-     }
-    
    static boolean containsExpectedScript(int[] scripts, int expectedScript) {
        for (int i = 0; i < scripts.length; ++i) {
            if (expectedScript == scripts[i]) { return true; }
@ -3332,66 +3291,87 @@ public class CollationMiscTest extends TestFmwk {
    }

    public void TestEquivalentReorderingScripts() {
+        // Beginning with ICU 55, collation reordering moves single scripts
+        // rather than groups of scripts,
+        // except where scripts share a range and sort primary-equal.
        final int[] expectedScripts = {
-                UScript.BOPOMOFO,               //Bopo
-                UScript.LISU,                   //Lisu
-                UScript.LYCIAN,                 //Lyci
-                UScript.CARIAN,                 //Cari
-                UScript.LYDIAN,                 //Lydi
-                UScript.YI,                     //Yiii
-                UScript.OLD_ITALIC,             //Ital
-                UScript.GOTHIC,                 //Goth
-                UScript.DESERET,                //Dsrt
-                UScript.SHAVIAN,                //Shaw
-                UScript.OSMANYA,                //Osma
-                UScript.LINEAR_B,               //Linb
-                UScript.CYPRIOT,                //Cprt
-                UScript.OLD_SOUTH_ARABIAN,      //Sarb
-                UScript.AVESTAN,                //Avst
-                UScript.IMPERIAL_ARAMAIC,       //Armi
-                UScript.INSCRIPTIONAL_PARTHIAN, //Prti
-                UScript.INSCRIPTIONAL_PAHLAVI,  //Phli
-                UScript.UGARITIC,               //Ugar
-                UScript.OLD_PERSIAN,            //Xpeo
-                UScript.CUNEIFORM,              //Xsux
-                UScript.EGYPTIAN_HIEROGLYPHS,   //Egyp
-                UScript.PHONETIC_POLLARD,       //Plrd
-                UScript.SORA_SOMPENG,           //Sora
-                UScript.MEROITIC_CURSIVE,       //Merc
-                UScript.MEROITIC_HIEROGLYPHS    //Mero
+                UScript.HIRAGANA,
+                UScript.KATAKANA,
+                UScript.KATAKANA_OR_HIRAGANA
        };

        int[] equivalentScripts = RuleBasedCollator.getEquivalentReorderCodes(UScript.GOTHIC);
-        if (equivalentScripts.length < expectedScripts.length) {
-            errln(String.format("ERROR/Gothic: retrieved equivalent script length wrong: " +
-                    "expected at least %d, was = %d",
+        if (equivalentScripts.length != 1 || equivalentScripts[0] != UScript.GOTHIC) {
+            errln(String.format("ERROR/Gothic: retrieved equivalent scripts wrong: " +
+                    "length expected 1, was = %d; expected [%d] was [%d]",
+                    equivalentScripts.length, UScript.GOTHIC, equivalentScripts[0]));
+        }
+
+        equivalentScripts = RuleBasedCollator.getEquivalentReorderCodes(UScript.HIRAGANA);
+        if (equivalentScripts.length != expectedScripts.length) {
+            errln(String.format("ERROR/Hiragana: retrieved equivalent script length wrong: " +
+                    "expected %d, was = %d",
                    expectedScripts.length, equivalentScripts.length));
        }
        int prevScript = -1;
        for (int i = 0; i < equivalentScripts.length; ++i) {
            int script = equivalentScripts[i];
            if (script <= prevScript) {
-                errln("ERROR/Gothic: equivalent scripts out of order at index " + i);
+                errln("ERROR/Hiragana: equivalent scripts out of order at index " + i);
            }
            prevScript = script;
        }
        for (int code : expectedScripts) {
            if (!containsExpectedScript(equivalentScripts, code)) {
-                errln("ERROR/Gothic: equivalent scripts do not contain " + code);
+                errln("ERROR/Hiragana: equivalent scripts do not contain " + code);
            }
        }

-        equivalentScripts = RuleBasedCollator.getEquivalentReorderCodes(UScript.SHAVIAN);
-        if (equivalentScripts.length < expectedScripts.length) {
-            errln(String.format("ERROR/Shavian: retrieved equivalent script length wrong: " +
-                    "expected at least %d, was = %d",
+        equivalentScripts = RuleBasedCollator.getEquivalentReorderCodes(UScript.KATAKANA);
+        if (equivalentScripts.length != expectedScripts.length) {
+            errln(String.format("ERROR/Katakana: retrieved equivalent script length wrong: " +
+                    "expected %d, was = %d",
                    expectedScripts.length, equivalentScripts.length));
        }
        for (int code : expectedScripts) {
            if (!containsExpectedScript(equivalentScripts, code)) {
-                errln("ERROR/Shavian: equivalent scripts do not contain " + code);
+                errln("ERROR/Katakana: equivalent scripts do not contain " + code);
            }
        }
+
+        equivalentScripts = RuleBasedCollator.getEquivalentReorderCodes(UScript.KATAKANA_OR_HIRAGANA);
+        if (equivalentScripts.length != expectedScripts.length) {
+            errln(String.format("ERROR/Hrkt: retrieved equivalent script length wrong: " +
+                    "expected %d, was = %d",
+                    expectedScripts.length, equivalentScripts.length));
+        }
+
+        equivalentScripts = RuleBasedCollator.getEquivalentReorderCodes(UScript.HAN);
+        if (equivalentScripts.length != 3) {
+            errln("ERROR/Hani: retrieved equivalent script length wrong: " +
+                    "expected 3, was = " + equivalentScripts.length);
+        }
+        equivalentScripts = RuleBasedCollator.getEquivalentReorderCodes(UScript.SIMPLIFIED_HAN);
+        if (equivalentScripts.length != 3) {
+            errln("ERROR/Hans: retrieved equivalent script length wrong: " +
+                    "expected 3, was = " + equivalentScripts.length);
+        }
+        equivalentScripts = RuleBasedCollator.getEquivalentReorderCodes(UScript.TRADITIONAL_HAN);
+        if (equivalentScripts.length != 3) {
+            errln("ERROR/Hant: retrieved equivalent script length wrong: " +
+                    "expected 3, was = " + equivalentScripts.length);
+        }
+
+        equivalentScripts = RuleBasedCollator.getEquivalentReorderCodes(UScript.MEROITIC_CURSIVE);
+        if (equivalentScripts.length != 2) {
+            errln("ERROR/Merc: retrieved equivalent script length wrong: " +
+                    "expected 2, was = " + equivalentScripts.length);
+        }
+        equivalentScripts = RuleBasedCollator.getEquivalentReorderCodes(UScript.MEROITIC_HIEROGLYPHS);
+        if (equivalentScripts.length != 2) {
+            errln("ERROR/Mero: retrieved equivalent script length wrong: " +
+                    "expected 2, was = " + equivalentScripts.length);
+        }
    }
    
    public void TestGreekFirstReorderCloning() {