ICU-13569 RBBI state table optimizations, ICU4J, work in progress, plus branch refresh.

X-SVN-Rev: 40914
2025-04-08 06:53:45 +00:00 · 2018-02-14 01:31:35 +00:00 · 2018-02-14 01:31:35 +00:00 · ff3ebb8c32
commit ff3ebb8c32
parent acae049ee1
32 changed files with 626 additions and 227 deletions
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java
@ -91,8 +91,7 @@ public class LocaleDisplayNamesImpl extends LocaleDisplayNames {
            CaseMap.toTitle().wholeString().noLowercase();

    private static String toTitleWholeStringNoLowercase(ULocale locale, String s) {
-        return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(
-                locale.toLocale(), null, s, new StringBuilder(), null).toString();
+        return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(locale.toLocale(), null, s);
    }

    public static LocaleDisplayNames getInstance(ULocale locale, DialectHandling dialectHandling) {
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/Grouper.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/Grouper.java
@ -37,7 +37,7 @@ public class Grouper {
            return GROUPER_AUTO;
        case ON_ALIGNED:
            return GROUPER_ON_ALIGNED;
-        case WESTERN:
+        case THOUSANDS:
            return GROUPER_WESTERN;
        default:
            throw new AssertionError();
@ -63,9 +63,9 @@ public class Grouper {
            return GROUPER_WESTERN;
        } else if (grouping1 == 3 && grouping2 == 2 && minGrouping == 1) {
            return GROUPER_INDIC;
-        } else if (grouping1 == 3 && grouping2 == 3 && minGrouping == 1) {
+        } else if (grouping1 == 3 && grouping2 == 3 && minGrouping == 2) {
            return GROUPER_WESTERN_MIN2;
-        } else if (grouping1 == 3 && grouping2 == 2 && minGrouping == 1) {
+        } else if (grouping1 == 3 && grouping2 == 2 && minGrouping == 2) {
            return GROUPER_INDIC_MIN2;
        } else {
            return new Grouper(grouping1, grouping2, minGrouping);
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/RoundingUtils.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/RoundingUtils.java
@ -22,7 +22,7 @@ public class RoundingUtils {
     * The maximum number of fraction places, integer numerals, or significant digits. TODO: This does
     * not feel like the best home for this value.
     */
-    public static final int MAX_INT_FRAC_SIG = 100;
+    public static final int MAX_INT_FRAC_SIG = 999;

    /**
     * Converts a rounding mode and metadata about the quantity being rounded to a boolean determining
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java
@ -35,7 +35,6 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok
            return null;
        }

-        affixPattern = ParsingUtils.maybeFold(affixPattern, parseFlags);
        AffixPatternMatcher series = new AffixPatternMatcher(affixPattern);
        series.factory = factory;
        series.ignorables = (0 != (parseFlags & ParsingUtils.PARSE_FLAG_EXACT_AFFIX)) ? null
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CodePointMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CodePointMatcher.java
@ -24,8 +24,8 @@ public class CodePointMatcher implements NumberParseMatcher {

    @Override
    public boolean match(StringSegment segment, ParsedNumber result) {
-        if (segment.getCodePoint() == cp) {
-            segment.adjustOffset(Character.charCount(cp));
+        if (segment.matches(cp)) {
+            segment.adjustOffsetByCodePoint();
            result.setCharsConsumed(segment);
        }
        return false;
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/CurrencyMatcher.java
@ -15,10 +15,10 @@ public class CurrencyMatcher implements NumberParseMatcher {
    private final String currency1;
    private final String currency2;

-    public static CurrencyMatcher getInstance(Currency currency, ULocale loc, int setupFlags) {
+    public static CurrencyMatcher getInstance(Currency currency, ULocale loc) {
        return new CurrencyMatcher(currency.getSubtype(),
-                ParsingUtils.maybeFold(currency.getSymbol(loc), setupFlags),
-                ParsingUtils.maybeFold(currency.getCurrencyCode(), setupFlags));
+                currency.getSymbol(loc),
+                currency.getCurrencyCode());
    }

    private CurrencyMatcher(String isoCode, String currency1, String currency2) {
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MatcherFactory.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/MatcherFactory.java
@ -15,7 +15,6 @@ public class MatcherFactory {
    DecimalFormatSymbols symbols;
    IgnorablesMatcher ignorables;
    ULocale locale;
-    int parseFlags;

    public MinusSignMatcher minusSign(boolean allowTrailing) {
        return MinusSignMatcher.getInstance(symbols, allowTrailing);
@ -35,7 +34,7 @@ public class MatcherFactory {

    public AnyMatcher currency() {
        AnyMatcher any = new AnyMatcher();
-        any.addMatcher(CurrencyMatcher.getInstance(currency, locale, parseFlags));
+        any.addMatcher(CurrencyMatcher.getInstance(currency, locale));
        any.addMatcher(CurrencyTrieMatcher.getInstance(locale));
        any.freeze();
        return any;
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NanMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NanMatcher.java
@ -2,7 +2,6 @@
 // License & terms of use: http://www.unicode.org/copyright.html#License
 package com.ibm.icu.impl.number.parse;

-import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.text.DecimalFormatSymbols;
 import com.ibm.icu.text.UnicodeSet;

@ -13,14 +12,11 @@ import com.ibm.icu.text.UnicodeSet;
 public class NanMatcher extends SymbolMatcher {

    private static final NanMatcher DEFAULT = new NanMatcher("NaN");
-    private static final NanMatcher DEFAULT_FOLDED = new NanMatcher(UCharacter.foldCase("NaN", true));

    public static NanMatcher getInstance(DecimalFormatSymbols symbols, int parseFlags) {
-        String symbolString = ParsingUtils.maybeFold(symbols.getNaN(), parseFlags);
+        String symbolString = symbols.getNaN();
        if (DEFAULT.string.equals(symbolString)) {
            return DEFAULT;
-        } else if (DEFAULT_FOLDED.string.equals(symbolString)) {
-            return DEFAULT_FOLDED;
        } else {
            return new NanMatcher(symbolString);
        }
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java
@ -31,6 +31,30 @@ import com.ibm.icu.util.ULocale;
 */
 public class NumberParserImpl {

+    @Deprecated
+    public static NumberParserImpl removeMeWhenMerged(ULocale locale, String pattern, int parseFlags) {
+        NumberParserImpl parser = new NumberParserImpl(parseFlags);
+        DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
+        IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT;
+
+        MatcherFactory factory = new MatcherFactory();
+        factory.currency = Currency.getInstance("USD");
+        factory.symbols = symbols;
+        factory.ignorables = ignorables;
+        factory.locale = locale;
+
+        ParsedPatternInfo patternInfo = PatternStringParser.parseToPatternInfo(pattern);
+        AffixMatcher.newGenerate(patternInfo, parser, factory, ignorables, parseFlags);
+
+        Grouper grouper = Grouper.forStrategy(GroupingStrategy.AUTO).withLocaleData(locale, patternInfo);
+        parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
+        parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
+        parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
+
+        parser.freeze();
+        return parser;
+    }
+
    // TODO: Find a better place for this enum.
    /** Controls the set of rules for parsing a string. */
    public static enum ParseMode {
@ -74,12 +98,13 @@ public class NumberParserImpl {
        // Temporary frontend for testing.

        int parseFlags = ParsingUtils.PARSE_FLAG_IGNORE_CASE
-                | ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
+                | ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES
+                | ParsingUtils.PARSE_FLAG_OPTIMIZE;
        if (strictGrouping) {
            parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE;
        }

-        NumberParserImpl parser = new NumberParserImpl(parseFlags, true);
+        NumberParserImpl parser = new NumberParserImpl(parseFlags);
        DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
        IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT;

@ -88,7 +113,6 @@ public class NumberParserImpl {
        factory.symbols = symbols;
        factory.ignorables = ignorables;
        factory.locale = locale;
-        factory.parseFlags = parseFlags;

        ParsedPatternInfo patternInfo = PatternStringParser.parseToPatternInfo(pattern);
        AffixMatcher.newGenerate(patternInfo, parser, factory, ignorables, parseFlags);
@ -99,7 +123,7 @@ public class NumberParserImpl {
        parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
        parser.addMatcher(MinusSignMatcher.getInstance(symbols, false));
        parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
-        parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags));
+        parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
        parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
        parser.addMatcher(new RequireNumberMatcher());

@ -193,16 +217,18 @@ public class NumberParserImpl {
        if (parseCurrency || patternInfo.hasCurrencySign()) {
            parseFlags |= ParsingUtils.PARSE_FLAG_MONETARY_SEPARATORS;
        }
+        if (optimize) {
+            parseFlags |= ParsingUtils.PARSE_FLAG_OPTIMIZE;
+        }
        IgnorablesMatcher ignorables = isStrict ? IgnorablesMatcher.STRICT : IgnorablesMatcher.DEFAULT;

-        NumberParserImpl parser = new NumberParserImpl(parseFlags, optimize);
+        NumberParserImpl parser = new NumberParserImpl(parseFlags);

        MatcherFactory factory = new MatcherFactory();
        factory.currency = currency;
        factory.symbols = symbols;
        factory.ignorables = ignorables;
        factory.locale = locale;
-        factory.parseFlags = parseFlags;

        //////////////////////
        /// AFFIX MATCHERS ///
@ -216,7 +242,7 @@ public class NumberParserImpl {
        ////////////////////////

        if (parseCurrency || patternInfo.hasCurrencySign()) {
-            parser.addMatcher(CurrencyMatcher.getInstance(currency, locale, parseFlags));
+            parser.addMatcher(CurrencyMatcher.getInstance(currency, locale));
            parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
        }

@ -239,7 +265,7 @@ public class NumberParserImpl {
        parser.addMatcher(ignorables);
        parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
        if (!properties.getParseNoExponent()) {
-            parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags));
+            parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
        }

        //////////////////
@ -281,18 +307,12 @@ public class NumberParserImpl {
    /**
     * Creates a new, empty parser.
     *
-     * @param ignoreCase
-     *            If true, perform case-folding. This parameter needs to go into the constructor because
-     *            its value is used during the construction of the matcher chain.
-     * @param optimize
-     *            If true, compute "lead chars" UnicodeSets for the matchers. This reduces parsing
-     *            runtime but increases construction runtime. If the parser is going to be used only once
-     *            or twice, set this to false; if it is going to be used hundreds of times, set it to
-     *            true.
+     * @param parseFlags
+     *            The parser settings defined in the PARSE_FLAG_* fields.
     */
-    public NumberParserImpl(int parseFlags, boolean optimize) {
+    public NumberParserImpl(int parseFlags) {
        matchers = new ArrayList<NumberParseMatcher>();
-        if (optimize) {
+        if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_OPTIMIZE)) {
            leadCodePointses = new ArrayList<UnicodeSet>();
        } else {
            leadCodePointses = null;
@ -306,9 +326,7 @@ public class NumberParserImpl {
        assert !frozen;
        this.matchers.add(matcher);
        if (leadCodePointses != null) {
-            UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
-            assert leadCodePoints.isFrozen();
-            this.leadCodePointses.add(leadCodePoints);
+            addLeadCodePointsForMatcher(matcher);
        }
    }

@ -317,13 +335,22 @@ public class NumberParserImpl {
        this.matchers.addAll(matchers);
        if (leadCodePointses != null) {
            for (NumberParseMatcher matcher : matchers) {
-                UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
-                assert leadCodePoints.isFrozen();
-                this.leadCodePointses.add(leadCodePoints);
+                addLeadCodePointsForMatcher(matcher);
            }
        }
    }

+    private void addLeadCodePointsForMatcher(NumberParseMatcher matcher) {
+        UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
+        assert leadCodePoints.isFrozen();
+        // TODO: Avoid the clone operation here.
+        if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE)) {
+            leadCodePoints = leadCodePoints.cloneAsThawed().closeOver(UnicodeSet.ADD_CASE_MAPPINGS)
+                    .freeze();
+        }
+        this.leadCodePointses.add(leadCodePoints);
+    }
+
    public void setComparator(Comparator<ParsedNumber> comparator) {
        assert !frozen;
        this.comparator = comparator;
@ -353,7 +380,7 @@ public class NumberParserImpl {
    public void parse(String input, int start, boolean greedy, ParsedNumber result) {
        assert frozen;
        assert start >= 0 && start < input.length();
-        StringSegment segment = new StringSegment(ParsingUtils.maybeFold(input, parseFlags));
+        StringSegment segment = new StringSegment(input, parseFlags);
        segment.adjustOffset(start);
        if (greedy) {
            parseGreedyRecursive(segment, result);
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java
@ -2,7 +2,6 @@
 // License & terms of use: http://www.unicode.org/copyright.html#License
 package com.ibm.icu.impl.number.parse;

-import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.UnicodeSet.EntryRange;

@ -23,6 +22,7 @@ public class ParsingUtils {
    public static final int PARSE_FLAG_EXACT_AFFIX = 0x0200;
    public static final int PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400;
    public static final int PARSE_FLAG_FRACTION_GROUPING_DISABLED = 0x0800;
+    public static final int PARSE_FLAG_OPTIMIZE = 0x1000;

    public static void putLeadCodePoints(UnicodeSet input, UnicodeSet output) {
        for (EntryRange range : input.ranges()) {
@ -39,16 +39,4 @@ public class ParsingUtils {
        }
    }

-    /**
-     * Case-folds the string if IGNORE_CASE flag is set; otherwise, returns the same string.
-     */
-    public static String maybeFold(String input, int parseFlags) {
-        UnicodeSet cwcf = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.CWCF);
-        if (0 != (parseFlags & PARSE_FLAG_IGNORE_CASE) && cwcf.containsSome(input)) {
-            return UCharacter.foldCase(input, true);
-        } else {
-            return input;
-        }
-    }
-
 }
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java
@ -15,16 +15,13 @@ public class ScientificMatcher implements NumberParseMatcher {
    private final String exponentSeparatorString;
    private final DecimalMatcher exponentMatcher;

-    public static ScientificMatcher getInstance(
-            DecimalFormatSymbols symbols,
-            Grouper grouper,
-            int parseFlags) {
+    public static ScientificMatcher getInstance(DecimalFormatSymbols symbols, Grouper grouper) {
        // TODO: Static-initialize most common instances?
-        return new ScientificMatcher(symbols, grouper, parseFlags);
+        return new ScientificMatcher(symbols, grouper);
    }

-    private ScientificMatcher(DecimalFormatSymbols symbols, Grouper grouper, int parseFlags) {
-        exponentSeparatorString = ParsingUtils.maybeFold(symbols.getExponentSeparator(), parseFlags);
+    private ScientificMatcher(DecimalFormatSymbols symbols, Grouper grouper) {
+        exponentSeparatorString = symbols.getExponentSeparator();
        exponentMatcher = DecimalMatcher.getInstance(symbols,
                grouper,
                ParsingUtils.PARSE_FLAG_DECIMAL_SCIENTIFIC | ParsingUtils.PARSE_FLAG_INTEGER_ONLY);
@ -47,19 +44,14 @@ public class ScientificMatcher implements NumberParseMatcher {
            if (segment.length() == 0) {
                return true;
            }
-            int leadCp = segment.getCodePoint();
-            if (leadCp == -1) {
-                // Partial code point match
-                return true;
-            }

            // Allow a sign, and then try to match digits.
            boolean minusSign = false;
-            if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN).contains(leadCp)) {
+            if (segment.matches(UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN))) {
                minusSign = true;
-                segment.adjustOffset(Character.charCount(leadCp));
-            } else if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN).contains(leadCp)) {
-                segment.adjustOffset(Character.charCount(leadCp));
+                segment.adjustOffsetByCodePoint();
+            } else if (segment.matches(UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN))) {
+                segment.adjustOffsetByCodePoint();
            }

            int digitsOffset = segment.getOffset();
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/StringSegment.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/StringSegment.java
@ -2,6 +2,9 @@
 // License & terms of use: http://www.unicode.org/copyright.html#License
 package com.ibm.icu.impl.number.parse;

+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.UnicodeSet;
+
 /**
 * A mutable class allowing for a String with a variable offset and length. The charAt, length, and
 * subSequence methods all operate relative to the fixed offset into the String.
@ -12,11 +15,13 @@ public class StringSegment implements CharSequence {
    private final String str;
    private int start;
    private int end;
+    private boolean foldCase;

-    public StringSegment(String str) {
+    public StringSegment(String str, int parseFlags) {
        this.str = str;
        this.start = 0;
        this.end = str.length();
+        this.foldCase = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE);
    }

    public int getOffset() {
@ -42,6 +47,13 @@ public class StringSegment implements CharSequence {
        start += delta;
    }

+    /**
+     * Adjusts the offset by the width of the current code point, either 1 or 2 chars.
+     */
+    public void adjustOffsetByCodePoint() {
+        start += Character.charCount(getCodePoint());
+    }
+
    public void setLength(int length) {
        assert length >= 0;
        assert start + length <= str.length();
@ -72,28 +84,73 @@ public class StringSegment implements CharSequence {
    /**
     * Returns the first code point in the string segment, or -1 if the string starts with an invalid
     * code point.
+     *
+     * <p>
+     * <strong>Important:</strong> Most of the time, you should use {@link #matches}, which handles case
+     * folding logic, instead of this method.
     */
    public int getCodePoint() {
        assert start < end;
        char lead = str.charAt(start);
-        if (Character.isHighSurrogate(lead) && start + 1 < end) {
-            return Character.toCodePoint(lead, str.charAt(start + 1));
-        } else if (Character.isSurrogate(lead)) {
-            return -1;
-        } else {
-            return lead;
+        char trail;
+        if (Character.isHighSurrogate(lead)
+                && start + 1 < end
+                && Character.isLowSurrogate(trail = str.charAt(start + 1))) {
+            return Character.toCodePoint(lead, trail);
        }
+        return lead;
+    }
+
+    /**
+     * Returns true if the first code point of this StringSegment equals the given code point.
+     *
+     * <p>
+     * This method will perform case folding if case folding is enabled for the parser.
+     */
+    public boolean matches(int otherCp) {
+        return codePointsEqual(getCodePoint(), otherCp, foldCase);
+    }
+
+    /**
+     * Returns true if the first code point of this StringSegment is in the given UnicodeSet.
+     */
+    public boolean matches(UnicodeSet uniset) {
+        // TODO: Move UnicodeSet case-folding logic here.
+        // TODO: Handle string matches here instead of separately.
+        int cp = getCodePoint();
+        if (cp == -1) {
+            return false;
+        }
+        return uniset.contains(cp);
    }

    /**
     * Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
     * example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
     * since the first 2 characters are the same.
+     *
+     * <p>
+     * This method will perform case folding if case folding is enabled for the parser.
     */
    public int getCommonPrefixLength(CharSequence other) {
+        return getPrefixLengthInternal(other, foldCase);
+    }
+
+    /**
+     * Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding is
+     * enabled for the parser.
+     */
+    public int getCaseSensitivePrefixLength(CharSequence other) {
+        return getPrefixLengthInternal(other, false);
+    }
+
+    private int getPrefixLengthInternal(CharSequence other, boolean foldCase) {
        int offset = 0;
        for (; offset < Math.min(length(), other.length());) {
-            if (charAt(offset) != other.charAt(offset)) {
+            // TODO: case-fold code points, not chars
+            char c1 = charAt(offset);
+            char c2 = other.charAt(offset);
+            if (!codePointsEqual(c1, c2, foldCase)) {
                break;
            }
            offset++;
@ -101,6 +158,30 @@ public class StringSegment implements CharSequence {
        return offset;
    }

+    // /**
+    // * Case-folds the string if IGNORE_CASE flag is set; otherwise, returns the same string.
+    // */
+    // public static String maybeFold(String input, int parseFlags) {
+    // UnicodeSet cwcf = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.CWCF);
+    // if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_IGNORE_CASE) && cwcf.containsSome(input)) {
+    // return UCharacter.foldCase(input, true);
+    // } else {
+    // return input;
+    // }
+    // }
+
+    private static final boolean codePointsEqual(int cp1, int cp2, boolean foldCase) {
+        if (cp1 == cp2) {
+            return true;
+        }
+        if (!foldCase) {
+            return false;
+        }
+        cp1 = UCharacter.foldCase(cp1, true);
+        cp2 = UCharacter.foldCase(cp2, true);
+        return cp1 == cp2;
+    }
+
    @Override
    public String toString() {
        return str.substring(0, start) + "[" + str.substring(start, end) + "]" + str.substring(end);
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/SymbolMatcher.java
@ -47,9 +47,8 @@ public abstract class SymbolMatcher implements NumberParseMatcher {
            }
        }

-        int cp = segment.getCodePoint();
-        if (cp != -1 && uniSet.contains(cp)) {
-            segment.adjustOffset(Character.charCount(cp));
+        if (segment.matches(uniSet)) {
+            segment.adjustOffsetByCodePoint();
            accept(segment, result);
            return false;
        }
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/UnicodeSetStaticCache.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/UnicodeSetStaticCache.java
@ -51,7 +51,7 @@ public class UnicodeSetStaticCache {
        DIGITS,
        NAN_LEAD,
        SCIENTIFIC_LEAD,
-        CWCF,
+        CWCF, // TODO: Check if this is being used and remove it if not.

        // Combined Separators with Digits (for lead code points)
        DIGITS_OR_ALL_SEPARATORS,
--- a/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java
@ -5123,37 +5123,6 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
        return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
    }

-    /**
-     * Return a string with just the first word titlecased, for menus and UI, etc. This does not affect most of the string,
-     * and sometimes has no effect at all; the original string is returned whenever casing
-     * would not be appropriate for the first word (such as for CJK characters or initial numbers).
-     * Initial non-letters are skipped in order to find the character to change.
-     * Characters past the first affected are left untouched: see also TITLECASE_NO_LOWERCASE.
-     * <p>Examples:
-     * <table border='1'><tr><th>Source</th><th>Result</th><th>Locale</th></tr>
-     * <tr><td>anglo-American locale</td><td>Anglo-American locale</td></tr>
-     * <tr><td>“contact us”</td><td>“Contact us”</td></tr>
-     * <tr><td>49ers win!</td><td>49ers win!</td></tr>
-     * <tr><td>丰(abc)</td><td>丰(abc)</td></tr>
-     * <tr><td>«ijs»</td><td>«Ijs»</td></tr>
-     * <tr><td>«ijs»</td><td>«IJs»</td><td>nl-BE</td></tr>
-     * <tr><td>«ijs»</td><td>«İjs»</td><td>tr-DE</td></tr>
-     * </table>
-     * @param locale the locale for accessing exceptional behavior (eg for tr).
-     * @param str the source string to change
-     * @return the modified string, or the original if no modifications were necessary.
-     * @internal
-     * @deprecated ICU internal only
-     */
-    @Deprecated
-    public static String toTitleFirst(ULocale locale, String str) {
-        // TODO: Remove this function. Inline it where it is called in CLDR.
-        return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(locale.toLocale(), null, str);
-    }
-
-    private static final com.ibm.icu.text.CaseMap.Title TO_TITLE_WHOLE_STRING_NO_LOWERCASE =
-            com.ibm.icu.text.CaseMap.toTitle().wholeString().noLowercase();
-
    /**
     * {@icu} <p>Returns the titlecase version of the argument string.
     * <p>Position for titlecasing is determined by the argument break
--- a/icu4j/main/classes/core/src/com/ibm/icu/number/FormattedNumber.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/number/FormattedNumber.java
@ -15,8 +15,8 @@ import com.ibm.icu.text.PluralRules.IFixedDecimal;
 import com.ibm.icu.util.ICUUncheckedIOException;

 /**
- * The result of a number formatting operation. This class allows the result to be exported in several data types,
- * including a String, an AttributedCharacterIterator, and a BigDecimal.
+ * The result of a number formatting operation. This class allows the result to be exported in several
+ * data types, including a String, an AttributedCharacterIterator, and a BigDecimal.
 *
 * @draft ICU 60
 * @provisional This API might change or be removed in a future release.
@ -47,12 +47,12 @@ public class FormattedNumber {
    }

    /**
-     * Append the formatted number to an Appendable, such as a StringBuilder. This may be slightly more efficient than
-     * creating a String.
+     * Append the formatted number to an Appendable, such as a StringBuilder. This may be slightly more
+     * efficient than creating a String.
     *
     * <p>
-     * If an IOException occurs when appending to the Appendable, an unchecked {@link ICUUncheckedIOException} is thrown
-     * instead.
+     * If an IOException occurs when appending to the Appendable, an unchecked
+     * {@link ICUUncheckedIOException} is thrown instead.
     *
     * @param appendable
     *            The Appendable to which to append the formatted number string.
@ -73,16 +73,18 @@ public class FormattedNumber {
    }

    /**
-     * Determine the start and end indices of the first occurrence of the given <em>field</em> in the output string.
-     * This allows you to determine the locations of the integer part, fraction part, and sign.
+     * Determine the start and end indices of the first occurrence of the given <em>field</em> in the
+     * output string. This allows you to determine the locations of the integer part, fraction part, and
+     * sign.
     *
     * <p>
-     * If multiple different field attributes are needed, this method can be called repeatedly, or if <em>all</em> field
-     * attributes are needed, consider using getFieldIterator().
+     * If multiple different field attributes are needed, this method can be called repeatedly, or if
+     * <em>all</em> field attributes are needed, consider using getFieldIterator().
     *
     * <p>
-     * If a field occurs multiple times in an output string, such as a grouping separator, this method will only ever
-     * return the first occurrence. Use getFieldIterator() to access all occurrences of an attribute.
+     * If a field occurs multiple times in an output string, such as a grouping separator, this method
+     * will only ever return the first occurrence. Use getFieldIterator() to access all occurrences of an
+     * attribute.
     *
     * @param fieldPosition
     *            The FieldPosition to populate with the start and end indices of the desired field.
@ -106,13 +108,15 @@ public class FormattedNumber {
    }

    /**
-     * Export the formatted number as an AttributedCharacterIterator. This allows you to determine which characters in
-     * the output string correspond to which <em>fields</em>, such as the integer part, fraction part, and sign.
+     * Export the formatted number as an AttributedCharacterIterator. This allows you to determine which
+     * characters in the output string correspond to which <em>fields</em>, such as the integer part,
+     * fraction part, and sign.
     *
     * <p>
     * If information on only one field is needed, consider using populateFieldPosition() instead.
     *
-     * @return An AttributedCharacterIterator, containing information on the field attributes of the number string.
+     * @return An AttributedCharacterIterator, containing information on the field attributes of the
+     *         number string.
     * @draft ICU 60
     * @provisional This API might change or be removed in a future release.
     * @see com.ibm.icu.text.NumberFormat.Field
@ -124,8 +128,9 @@ public class FormattedNumber {
    }

    /**
-     * Export the formatted number as a BigDecimal. This endpoint is useful for obtaining the exact number being printed
-     * after scaling and rounding have been applied by the number formatting pipeline.
+     * Export the formatted number as a BigDecimal. This endpoint is useful for obtaining the exact
+     * number being printed after scaling and rounding have been applied by the number formatting
+     * pipeline.
     *
     * @return A BigDecimal representation of the formatted number.
     * @draft ICU 60
@ -138,31 +143,29 @@ public class FormattedNumber {

    /**
     * @internal
-     * @deprecated This API is ICU internal only.
+     * @deprecated This API is ICU internal only. Use {@link #populateFieldPosition} or
+     *             {@link #getFieldIterator} for similar functionality.
     */
    @Deprecated
    public String getPrefix() {
        NumberStringBuilder temp = new NumberStringBuilder();
-        int length = micros.modOuter.apply(temp, 0, 0);
-        length += micros.modMiddle.apply(temp, 0, length);
-        /* length += */ micros.modInner.apply(temp, 0, length);
-        int prefixLength = micros.modOuter.getPrefixLength() + micros.modMiddle.getPrefixLength()
-                + micros.modInner.getPrefixLength();
+        // #13453: DecimalFormat wants the affixes from the pattern only (modMiddle).
+        micros.modMiddle.apply(temp, 0, 0);
+        int prefixLength = micros.modMiddle.getPrefixLength();
        return temp.subSequence(0, prefixLength).toString();
    }

    /**
     * @internal
-     * @deprecated This API is ICU internal only.
+     * @deprecated This API is ICU internal only. Use {@link #populateFieldPosition} or
+     *             {@link #getFieldIterator} for similar functionality.
     */
    @Deprecated
    public String getSuffix() {
        NumberStringBuilder temp = new NumberStringBuilder();
-        int length = micros.modOuter.apply(temp, 0, 0);
-        length += micros.modMiddle.apply(temp, 0, length);
-        length += micros.modInner.apply(temp, 0, length);
-        int prefixLength = micros.modOuter.getPrefixLength() + micros.modMiddle.getPrefixLength()
-                + micros.modInner.getPrefixLength();
+        // #13453: DecimalFormat wants the affixes from the pattern only (modMiddle).
+        int length = micros.modMiddle.apply(temp, 0, 0);
+        int prefixLength = micros.modMiddle.getPrefixLength();
        return temp.subSequence(prefixLength, length).toString();
    }

@ -185,7 +188,9 @@ public class FormattedNumber {
    public int hashCode() {
        // NumberStringBuilder and BigDecimal are mutable, so we can't call
        // #equals() or #hashCode() on them directly.
-        return Arrays.hashCode(nsb.toCharArray()) ^ Arrays.hashCode(nsb.toFieldArray()) ^ fq.toBigDecimal().hashCode();
+        return Arrays.hashCode(nsb.toCharArray())
+                ^ Arrays.hashCode(nsb.toFieldArray())
+                ^ fq.toBigDecimal().hashCode();
    }

    /**
@ -206,7 +211,7 @@ public class FormattedNumber {
        // #equals() or #hashCode() on them directly.
        FormattedNumber _other = (FormattedNumber) other;
        return Arrays.equals(nsb.toCharArray(), _other.nsb.toCharArray())
-                ^ Arrays.equals(nsb.toFieldArray(), _other.nsb.toFieldArray())
-                ^ fq.toBigDecimal().equals(_other.fq.toBigDecimal());
+                && Arrays.equals(nsb.toFieldArray(), _other.nsb.toFieldArray())
+                && fq.toBigDecimal().equals(_other.fq.toBigDecimal());
    }
 }
--- a/icu4j/main/classes/core/src/com/ibm/icu/number/NumberFormatter.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/number/NumberFormatter.java
@ -171,7 +171,7 @@ public final class NumberFormatter {
     * <li>MIN2: 1234 and 12,34,567
     * <li>AUTO: 1,234 and 12,34,567
     * <li>ON_ALIGNED: 1,234 and 12,34,567
-     * <li>WESTERN: 1,234 and 1,234,567
+     * <li>THOUSANDS: 1,234 and 1,234,567
     * </ul>
     *
     * <p>
@ -259,7 +259,7 @@ public final class NumberFormatter {
         * @provisional This API might change or be removed in a future release.
         * @see NumberFormatter
         */
-        WESTERN
+        THOUSANDS
    }

    /**
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/MeasureFormat.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/MeasureFormat.java
@ -1044,7 +1044,7 @@ public class MeasureFormat extends UFormat {
            case TIME_UNIT_FORMAT:
                return createTimeUnitFormat();
            case CURRENCY_FORMAT:
-                return new CurrencyFormat(locale);
+                return MeasureFormat.getCurrencyFormat(locale);
            default:
                throw new InvalidObjectException("Unknown subclass: " + subClass);
            }
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIDataWrapper.java
@ -18,17 +18,19 @@ import com.ibm.icu.impl.ICUBinary.Authenticate;
 import com.ibm.icu.impl.Trie2;

 /**
-* <p>Internal class used for Rule Based Break Iterators</p>
+* <p>Internal class used for Rule Based Break Iterators.</p>
 * <p>This class provides access to the compiled break rule data, as
 * it is stored in a .brk file.
+* Not intended for public use; declared public for testing purposes only.
+* @internal
 */
-final class RBBIDataWrapper {
+public final class RBBIDataWrapper {
    //
    // These fields are the ready-to-use compiled rule data, as
    //   read from the file.
    //
-    RBBIDataHeader fHeader;
-    short          fFTable[];
+    public RBBIDataHeader fHeader;
+    public short          fFTable[];
    short          fRTable[];
    short          fSFTable[];
    short          fSRTable[];
@ -78,11 +80,16 @@ final class RBBIDataWrapper {
    // Index offsets to the fields in a state table row.
    //    Corresponds to struct RBBIStateTableRow in the C version.
    //
-    final static int      ACCEPTING  = 0;
-    final static int      LOOKAHEAD  = 1;
-    final static int      TAGIDX     = 2;
-    final static int      RESERVED   = 3;
-    final static int      NEXTSTATES = 4;
+    /** @internal */
+    public final static int      ACCEPTING  = 0;
+    /** @internal */
+    public final static int      LOOKAHEAD  = 1;
+    /** @internal */
+    public final static int      TAGIDX     = 2;
+    /** @internal */
+    public final static int      RESERVED   = 3;
+    /** @internal */
+    public final static int      NEXTSTATES = 4;

    // Index offsets to header fields of a state table
    //     struct RBBIStateTable {...   in the C version.
@ -101,13 +108,15 @@ final class RBBIDataWrapper {

    /**
     * Data Header.  A struct-like class with the fields from the RBBI data file header.
+     * Not intended for public use, declared public for testing purposes only.
+     * @internal
     */
-    final static class RBBIDataHeader {
+    public final static class RBBIDataHeader {
        int         fMagic;         //  == 0xbla0
        byte[]      fFormatVersion; //  For ICU 3.4 and later.
        int         fLength;        //  Total length in bytes of this RBBI Data,
                                       //      including all sections, not just the header.
-        int         fCatCount;      //  Number of character categories.
+        public int  fCatCount;      //  Number of character categories.

        //
        //  Offsets and sizes of each of the subsections within the RBBI data.
@ -139,9 +148,9 @@ final class RBBIDataWrapper {
    /**
     * RBBI State Table Indexing Function.  Given a state number, return the
     * array index of the start of the state table row for that state.
-     *
+     * @internal
     */
-    int getRowIndex(int state){
+    public int getRowIndex(int state){
        return ROW_DATA + state * (fHeader.fCatCount + 4);
    }

@ -311,17 +320,17 @@ final class RBBIDataWrapper {
        return This;
    }

-    ///CLOVER:OFF
-    //  Getters for fields from the state table header
-    //
-    private int getStateTableNumStates(short table[]) {
+    /**
+     *  Getters for fields from the state table header
+     *  @internal
+     */
+    public int getStateTableNumStates(short table[]) {
        if (isBigEndian) {
            return (table[NUMSTATES] << 16) | (table[NUMSTATES+1] & 0xffff);
        } else {
            return (table[NUMSTATES+1] << 16) | (table[NUMSTATES] & 0xffff);
        }
    }
-    ///CLOVER:ON

    int getStateTableFlags(short table[]) {
        // This works for up to 15 flags bits.
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java
@ -342,10 +342,10 @@ class RBBIRuleBuilder {
        //
        // UnicodeSet processing.
        //    Munge the Unicode Sets to create a set of character categories.
-        //    Generate the mapping tables (TRIE) from input 32-bit characters to
+        //    Generate the mapping tables (TRIE) from input code points to
        //    the character categories.
        //
-        builder.fSetBuilder.build();
+        builder.fSetBuilder.buildRanges();

        //
        //   Generate the DFA state transition table.
@ -363,10 +363,34 @@ class RBBIRuleBuilder {
            builder.fForwardTables.printRuleStatusTable();
        }

+        builder.optimizeTables();
+        builder.fSetBuilder.buildTrie();
        //
        //   Package up the compiled data, writing it to an output stream
        //      in the serialization format.  This is the same as the ICU4C runtime format.
        //
        builder.flattenData(os);
    }
+
+    static class ClassPair {
+        int left = 3;
+        int right = 0;
+    }
+
+    void optimizeTables() {
+        ClassPair duplPair = new ClassPair();
+
+        while (fForwardTables.findDuplCharClassFrom(duplPair)) {
+            fSetBuilder.mergeCategories(duplPair);
+            fForwardTables.removeColumn(duplPair.right);
+            fReverseTables.removeColumn(duplPair.right);
+            fSafeFwdTables.removeColumn(duplPair.right);
+            fSafeRevTables.removeColumn(duplPair.right);
+        }
+
+        fForwardTables.removeDuplicateStates();
+        fReverseTables.removeDuplicateStates();
+        fSafeFwdTables.removeDuplicateStates();
+        fSafeRevTables.removeDuplicateStates();
+
 }
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBISetBuilder.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBISetBuilder.java
@ -112,7 +112,7 @@ class RBBISetBuilder {
                        }
                    }
                    if (setName.equals("dictionary")) {
-                        this.fNum |= 0x4000;
+                        this.fNum |= DICT_BIT;
                        break;
                    }
                }
@ -138,6 +138,8 @@ class RBBISetBuilder {

    boolean             fSawBOF;

+    static final int    DICT_BIT = 0x4000;
+

    //------------------------------------------------------------------------
    //
@ -156,7 +158,7 @@ class RBBISetBuilder {
    //                          from the Unicode Sets.
    //
    //------------------------------------------------------------------------
-    void build() {
+    void buildRanges() {
        RangeDescriptor rlRange;

        if (fRB.fDebugEnv!=null  && fRB.fDebugEnv.indexOf("usets")>=0) {printSets();}
@ -280,6 +282,15 @@ class RBBISetBuilder {

        if (fRB.fDebugEnv!=null  && fRB.fDebugEnv.indexOf("rgroup")>=0) {printRangeGroups();}
        if (fRB.fDebugEnv!=null  && fRB.fDebugEnv.indexOf("esets")>=0) {printSets();}
+    }
+
+
+    /**
+     * Build the Trie table for mapping UChar32 values to the corresponding
+     * range group number.
+     */
+    void buildTrie() {
+        RangeDescriptor rlRange;

        fTrie = new Trie2Writable(0,       //   Initial value for all code points.
                                  0);      //   Error value for out-of-range input.
@ -294,7 +305,20 @@ class RBBISetBuilder {
        }
    }

-
+    void mergeCategories(int left, int right) {
+        assert(left >= 1);
+        assert(right > left);
+        for (RangeDescriptor rd = fRangeList; rd != null; rd = rd.fNext) {
+            int rangeNum = rd.fNum & ~DICT_BIT;
+            int rangeDict = rd.fNum & DICT_BIT;
+            if (rangeNum == right) {
+                rd.fNum = left | rangeDict;
+            } else if (rangeNum > right) {
+                rd.fNum--;
+            }
+        }
+        --fGroupCount;
+    }
    //-----------------------------------------------------------------------------------
    //
    //          getTrieSize()    Return the size that will be required to serialize the Trie.
@ -457,7 +481,7 @@ class RBBISetBuilder {
                if (groupNum<10) {System.out.print(" ");}
                System.out.print(groupNum + " ");

-                if ((rlRange.fNum & 0x4000) != 0) { System.out.print(" <DICT> ");}
+                if ((rlRange.fNum & DICT_BIT) != 0) { System.out.print(" <DICT> ");}

                for (i=0; i<rlRange.fIncludesSets.size(); i++) {
                    RBBINode       usetNode    = rlRange.fIncludesSets.get(i);
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java
@ -655,7 +655,7 @@ class RBBITableBuilder {
                        // if sd.fAccepting already had a value other than 0 or -1, leave it be.

                       // If the end marker node is from a look-ahead rule, set
-                       //   the fLookAhead field or this state also.
+                       //   the fLookAhead field for this state also.
                       if (endMarker.fLookAheadEnd) {
                        // TODO:  don't change value if already set?
                        // TODO:  allow for more than one active look-ahead rule in engine.
@ -832,6 +832,129 @@ class RBBITableBuilder {



+//
+//    findDuplCharClassFrom()
+//
+boolean findDuplCharClassFrom(RBBIRuleBuilder.ClassPair classPair) {
+    int numStates = fDStates.size();
+    int numCols = fRB.fSetBuilder.getNumCharCategories();
+
+    uint16_t table_base;
+    uint16_t table_dupl;
+    for (; baseCategory < numCols-1; ++baseCategory) {
+        for (duplCategory=baseCategory+1; duplCategory < numCols; ++duplCategory) {
+             for (int state=0; state<numStates; state++) {
+                 RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates.elementAt(state);
+                 table_base = (uint16_t)sd.fDtran.elementAti(baseCategory);
+                 table_dupl = (uint16_t)sd.fDtran.elementAti(duplCategory);
+                 if (table_base != table_dupl) {
+                     break;
+                 }
+             }
+             if (table_base == table_dupl) {
+                 return true;
+             }
+        }
+    }
+    return false;
+}
+
+
+//
+//    removeColumn()
+//
+void removeColumn(int column) {
+    int numStates = fDStates.size();
+    for (int state=0; state<numStates; state++) {
+        RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates.elementAt(state);
+        U_ASSERT(column < sd.fDtran.size());
+        sd.fDtran.removeElementAt(column);
+    }
+}
+
+/*
+ * findDuplicateState
+ */
+bool findDuplicateState(int &firstState, int &duplState) {
+    int numStates = fDStates.size();
+    int numCols = fRB.fSetBuilder.getNumCharCategories();
+
+    for (; firstState<numStates-1; ++firstState) {
+        RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates.elementAt(firstState);
+        for (duplState=firstState+1; duplState<numStates; ++duplState) {
+            RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates.elementAt(duplState);
+            if (firstSD.fAccepting != duplSD.fAccepting ||
+                firstSD.fLookAhead != duplSD.fLookAhead ||
+                firstSD.fTagsIdx   != duplSD.fTagsIdx) {
+                continue;
+            }
+            bool rowsMatch = true;
+            for (int col=0; col < numCols; ++col) {
+                int firstVal = firstSD.fDtran.elementAti(col);
+                int duplVal = duplSD.fDtran.elementAti(col);
+                if (!((firstVal == duplVal) ||
+                        ((firstVal == firstState || firstVal == duplState) &&
+                        (duplVal  == firstState || duplVal  == duplState)))) {
+                    rowsMatch = false;
+                    break;
+                }
+            }
+            if (rowsMatch) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+void removeState(int keepState, int duplState) {
+    U_ASSERT(keepState < duplState);
+    U_ASSERT(duplState < fDStates.size());
+
+    RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates.elementAt(duplState);
+    fDStates.removeElementAt(duplState);
+    delete duplSD;
+
+    int numStates = fDStates.size();
+    int numCols = fRB.fSetBuilder.getNumCharCategories();
+    for (int state=0; state<numStates; ++state) {
+        RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates.elementAt(state);
+        for (int col=0; col<numCols; col++) {
+            int existingVal = sd.fDtran.elementAti(col);
+            int newVal = existingVal;
+            if (existingVal == duplState) {
+                newVal = keepState;
+            } else if (existingVal > duplState) {
+                newVal = existingVal - 1;
+            }
+            sd.fDtran.setElementAt(newVal, col);
+        }
+        if (sd.fAccepting == duplState) {
+            sd.fAccepting = keepState;
+        } else if (sd.fAccepting > duplState) {
+            sd.fAccepting--;
+        }
+        if (sd.fLookAhead == duplState) {
+            sd.fLookAhead = keepState;
+        } else if (sd.fLookAhead > duplState) {
+            sd.fLookAhead--;
+        }
+    }
+}
+
+
+/*
+ * RemoveDuplicateStates
+ */
+void removeDuplicateStates() {
+    int firstState = 3;
+    int duplicateState = 0;
+    while (findDuplicateState(firstState, duplicateState)) {
+        // printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
+        removeState(firstState, duplicateState);
+    }
+}
+

       //-----------------------------------------------------------------------------
       //
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java
@ -222,9 +222,11 @@ public class RuleBasedBreakIterator extends BreakIterator {
    private CharacterIterator   fText = new java.text.StringCharacterIterator("");

    /**
-     * The rule data for this BreakIterator instance. Package private.
+     * The rule data for this BreakIterator instance.
+     * Not intended for public use. Declared public for testing purposes only.
+     * @internal
     */
-    RBBIDataWrapper             fRData;
+    public RBBIDataWrapper    fRData;

    /**
     *  The iteration state - current position, rule status for the current position,
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/TimeUnitFormat.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/TimeUnitFormat.java
@ -84,19 +84,12 @@ public class TimeUnitFormat extends MeasureFormat {

    private static final long serialVersionUID = -3707773153184971529L;

-    // These fields are supposed to be the same as the fields in mf. They
-    // are here for serialization backward compatibility and to support parsing.
+    // Unlike MeasureFormat, this class is mutable and allows a new NumberFormat to be set after
+    // initialization. Keep a second copy of NumberFormat and use it instead of the one from the parent.
    private NumberFormat format;
    private ULocale locale;
    private int style;

-    // We use this field in lieu of the super class because the super class
-    // is immutable while this class is mutable. The contents of the super class
-    // is an empty shell. Every public method of the super class is overridden to
-    // delegate to this field. Each time this object mutates, it replaces this field with
-    // a new immutable instance.
-//    private transient MeasureFormat mf;
-
    private transient Map<TimeUnit, Map<String, Object[]>> timeUnitToCountToPatterns;
    private transient PluralRules pluralRules;
    private transient boolean isReady;
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/calendar/CalendarRegressionTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/calendar/CalendarRegressionTest.java
@ -2521,5 +2521,25 @@ public class CalendarRegressionTest extends com.ibm.icu.dev.test.TestFmwk {
            }
        }
    }
- }
+
+    @Test
+    public void TestIslamicCalOverflow() {
+        String localeID = "ar@calendar=islamic-civil";
+        Calendar cal = Calendar.getInstance(new ULocale(localeID));
+        int maxMonth = cal.getMaximum(Calendar.MONTH);
+        int maxDayOfMonth = cal.getMaximum(Calendar.DATE);
+        int jd, year, month, dayOfMonth;
+        for (jd = 73530872; jd <= 73530876; jd++) { // year 202002, int32_t overflow if jd >= 73530874
+            cal.clear();
+            cal.set(Calendar.JULIAN_DAY, jd);
+            year = cal.get(Calendar.YEAR);
+            month = cal.get(Calendar.MONTH);
+            dayOfMonth = cal.get(Calendar.DATE);
+            if (month > maxMonth || dayOfMonth > maxDayOfMonth) {
+                errln("Error: localeID " + localeID + ", julianDay " + jd + "; got year " + year + "; maxMonth " + maxMonth +
+                        ", got month " + month + "; maxDayOfMonth " + maxDayOfMonth + ", got dayOfMonth " + dayOfMonth);
+            }
+        }
+    }
+}
 //eof
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDecimalFormatSymbols.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDecimalFormatSymbols.java
@ -311,6 +311,36 @@ public class IntlTestDecimalFormatSymbols extends TestFmwk
            errln("ERROR: Char digits should be Latin digits");
        }

+        // Check on copy
+        DecimalFormatSymbols copy = (DecimalFormatSymbols) symbols.clone();
+        if (!Arrays.equals(copy.getDigitStrings(), osmanyaDigitStrings)) {
+            errln("ERROR: Osmanya digits (supplementary) should be set");
+        }
+        if (Character.codePointAt(osmanyaDigitStrings[0], 0) != copy.getCodePointZero()) {
+            errln("ERROR: Code point zero be Osmanya code point zero");
+        }
+        if (defZero != copy.getZeroDigit()) {
+            errln("ERROR: Zero digit should be 0");
+        }
+        if (!Arrays.equals(copy.getDigits(), defDigits)) {
+            errln("ERROR: Char digits should be Latin digits");
+        }
+
+        // Check on resource bundle
+        DecimalFormatSymbols fromData = DecimalFormatSymbols.getInstance(new ULocale("en@numbers=osma"));
+        if (!Arrays.equals(fromData.getDigitStrings(), osmanyaDigitStrings)) {
+            errln("ERROR: Osmanya digits (supplementary) should be set");
+        }
+        if (Character.codePointAt(osmanyaDigitStrings[0], 0) != fromData.getCodePointZero()) {
+            errln("ERROR: Code point zero be Osmanya code point zero");
+        }
+        if (defZero != fromData.getZeroDigit()) {
+            errln("ERROR: Zero digit should be 0");
+        }
+        if (!Arrays.equals(fromData.getDigits(), defDigits)) {
+            errln("ERROR: Char digits should be Latin digits");
+        }
+
        symbols.setDigitStrings(differentDigitStrings);
        if (!Arrays.equals(symbols.getDigitStrings(), differentDigitStrings)) {
            errln("ERROR: Different digits should be set");
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/MeasureUnitTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/MeasureUnitTest.java
@ -16,6 +16,7 @@ import java.io.ObjectOutputStream;
 import java.io.Serializable;
 import java.lang.reflect.Field;
 import java.text.FieldPosition;
+import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@ -42,6 +43,7 @@ import com.ibm.icu.text.MeasureFormat;
 import com.ibm.icu.text.MeasureFormat.FormatWidth;
 import com.ibm.icu.text.NumberFormat;
 import com.ibm.icu.util.Currency;
+import com.ibm.icu.util.CurrencyAmount;
 import com.ibm.icu.util.Measure;
 import com.ibm.icu.util.MeasureUnit;
 import com.ibm.icu.util.NoUnit;
@ -1925,6 +1927,15 @@ public class MeasureUnitTest extends TestFmwk {
        assertEquals("getCurrencyFormat ULocale/Locale", mfu, mfj);
    }

+    @Test
+    public void testCurrencyFormatParseIsoCode() throws ParseException {
+        MeasureFormat mf = MeasureFormat.getCurrencyFormat(ULocale.ENGLISH);
+        CurrencyAmount result = (CurrencyAmount) mf.parseObject("GTQ 34.56");
+        assertEquals("Parse should succeed", result.getNumber().doubleValue(), 34.56, 0.0);
+        assertEquals("Should parse ISO code GTQ even though the currency is USD",
+                "GTQ", result.getCurrency().getCurrencyCode());
+    }
+
    @Test
    public void testDoubleZero() {
        ULocale en = new ULocale("en");
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatTest.java
@ -868,7 +868,7 @@ public class NumberFormatTest extends TestFmwk {
                new ParseCurrencyItem( "en_GB", "euros4",   "4,00\u00A0\u20AC", 6,400,  "EUR" ),
                new ParseCurrencyItem( "en_GB", "euros6",   "6\u00A0\u20AC",    3,  6,  "EUR" ),
                new ParseCurrencyItem( "en_GB", "euros8",   "\u20AC8",          2,  8,  "EUR" ),
-                new ParseCurrencyItem( "en_GB", "dollars4", "US$4",             0,  0,  "USD" ),
+                new ParseCurrencyItem( "en_GB", "dollars4", "US$4",             4,  4,  "USD" ),

                new ParseCurrencyItem( "fr_FR", "euros4",   "4,00\u00A0\u20AC", 6,  4,  "EUR" ),
                new ParseCurrencyItem( "fr_FR", "euros6",   "6\u00A0\u20AC",    3,  6,  "EUR" ),
@ -2018,7 +2018,6 @@ public class NumberFormatTest extends TestFmwk {
    };

    @SuppressWarnings("resource")  // InputStream is will be closed by the ResourceReader.
-    @Ignore("TODO: http://bugs.icu-project.org/trac/ticket/13571")
    @Test
    public void TestCases() {
        String caseFileName = "NumberFormatTestCases.txt";
@ -5331,6 +5330,23 @@ public class NumberFormatTest extends TestFmwk {
        assertEquals("Grouping should be off", false, df.isGroupingUsed());
    }

+    @Test
+    public void Test13453_AffixContent() {
+        DecimalFormat df = (DecimalFormat) DecimalFormat.getScientificInstance();
+        assertEquals("Scientific should NOT be included", "", df.getPositiveSuffix());
+
+        df = CompactDecimalFormat.getInstance(ULocale.ENGLISH, CompactDecimalFormat.CompactStyle.SHORT);
+        assertEquals("Compact should NOT be included", "", df.getPositiveSuffix());
+
+        df = (DecimalFormat) DecimalFormat.getInstance(NumberFormat.ISOCURRENCYSTYLE);
+        df.setCurrency(Currency.getInstance("GBP"));
+        assertEquals("ISO currency SHOULD be included", "GBP", df.getPositivePrefix());
+
+        df = (DecimalFormat) DecimalFormat.getInstance(NumberFormat.PLURALCURRENCYSTYLE);
+        df.setCurrency(Currency.getInstance("GBP"));
+        assertEquals("Plural name SHOULD be included", " British pounds", df.getPositiveSuffix());
+    }
+
    @Test
    public void Test11035_FormatCurrencyAmount() {
        double amount = 12345.67;
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java
@ -25,7 +25,6 @@ import com.ibm.icu.impl.number.Padder;
 import com.ibm.icu.impl.number.Padder.PadPosition;
 import com.ibm.icu.impl.number.PatternStringParser;
 import com.ibm.icu.number.CompactNotation;
-import com.ibm.icu.number.FormattedNumber;
 import com.ibm.icu.number.FractionRounder;
 import com.ibm.icu.number.IntegerWidth;
 import com.ibm.icu.number.LocalizedNumberFormatter;
@ -1176,6 +1175,21 @@ public class NumberFormatterApiTest {
                "8.765",
                "0");

+        assertFormatDescendingBig(
+                "Indic locale with THOUSANDS grouping",
+                "",
+                NumberFormatter.with().grouping(GroupingStrategy.THOUSANDS),
+                new ULocale("en-IN"),
+                "87,650,000",
+                "8,765,000",
+                "876,500",
+                "87,650",
+                "8,765",
+                "876.5",
+                "87.65",
+                "8.765",
+                "0");
+
        // NOTE: Hungarian is interesting because it has minimumGroupingDigits=4 in locale data
        // If this test breaks due to data changes, find another locale that has minimumGroupingDigits.
        assertFormatDescendingBig(
@ -1860,29 +1874,6 @@ public class NumberFormatterApiTest {
        assertNotEquals(NumberFormatter.with().locale(ULocale.ENGLISH), NumberFormatter.with().locale(Locale.FRENCH));
    }

-    @Test
-    public void getPrefixSuffix() {
-        Object[][] cases = {
-                { NumberFormatter.withLocale(ULocale.ENGLISH).unit(GBP).unitWidth(UnitWidth.ISO_CODE), "GBP", "",
-                        "-GBP", "" },
-                { NumberFormatter.withLocale(ULocale.ENGLISH).unit(GBP).unitWidth(UnitWidth.FULL_NAME), "",
-                        " British pounds", "-", " British pounds" } };
-
-        for (Object[] cas : cases) {
-            LocalizedNumberFormatter f = (LocalizedNumberFormatter) cas[0];
-            String posPrefix = (String) cas[1];
-            String posSuffix = (String) cas[2];
-            String negPrefix = (String) cas[3];
-            String negSuffix = (String) cas[4];
-            FormattedNumber positive = f.format(1);
-            FormattedNumber negative = f.format(-1);
-            assertEquals(posPrefix, positive.getPrefix());
-            assertEquals(posSuffix, positive.getSuffix());
-            assertEquals(negPrefix, negative.getPrefix());
-            assertEquals(negSuffix, negative.getSuffix());
-        }
-    }
-
    @Test
    public void plurals() {
        // TODO: Expand this test.
@ -1921,12 +1912,12 @@ public class NumberFormatterApiTest {
                Rounder.class.getDeclaredMethod("minMaxFraction", Integer.TYPE, Integer.TYPE),
                Rounder.class.getDeclaredMethod("minMaxDigits", Integer.TYPE, Integer.TYPE), };

-        final int EXPECTED_MAX_INT_FRAC_SIG = 100;
-        final String expectedSubstring0 = "between 0 and 100 (inclusive)";
-        final String expectedSubstring1 = "between 1 and 100 (inclusive)";
-        final String expectedSubstringN1 = "between -1 and 100 (inclusive)";
+        final int EXPECTED_MAX_INT_FRAC_SIG = 999;
+        final String expectedSubstring0 = "between 0 and 999 (inclusive)";
+        final String expectedSubstring1 = "between 1 and 999 (inclusive)";
+        final String expectedSubstringN1 = "between -1 and 999 (inclusive)";

-        // We require that the upper bounds all be 100 inclusive.
+        // We require that the upper bounds all be 999 inclusive.
        // The lower bound may be either -1, 0, or 1.
        Set<String> methodsWithLowerBound1 = new HashSet();
        methodsWithLowerBound1.add("fixedDigits");
@ -1936,6 +1927,12 @@ public class NumberFormatterApiTest {
        methodsWithLowerBound1.add("withMinDigits");
        methodsWithLowerBound1.add("withMaxDigits");
        methodsWithLowerBound1.add("withMinExponentDigits");
+        // Methods with lower bound 0:
+        // fixedFraction
+        // minFraction
+        // maxFraction
+        // minMaxFraction
+        // zeroFillTo
        Set<String> methodsWithLowerBoundN1 = new HashSet();
        methodsWithLowerBoundN1.add("truncateAt");

--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java
@ -13,6 +13,7 @@ import com.ibm.icu.impl.number.parse.IgnorablesMatcher;
 import com.ibm.icu.impl.number.parse.MinusSignMatcher;
 import com.ibm.icu.impl.number.parse.NumberParserImpl;
 import com.ibm.icu.impl.number.parse.ParsedNumber;
+import com.ibm.icu.impl.number.parse.ParsingUtils;
 import com.ibm.icu.impl.number.parse.PercentMatcher;
 import com.ibm.icu.impl.number.parse.PlusSignMatcher;
 import com.ibm.icu.impl.number.parse.SeriesMatcher;
@ -191,7 +192,7 @@ public class NumberParserTest {
            int expectedOffset = (Integer) cas[1];
            boolean expectedMaybeMore = (Boolean) cas[2];

-            StringSegment segment = new StringSegment(input);
+            StringSegment segment = new StringSegment(input, 0);
            ParsedNumber result = new ParsedNumber();
            boolean actualMaybeMore = series.match(segment, result);
            int actualOffset = segment.getOffset();
@ -215,4 +216,39 @@ public class NumberParserTest {
                result.getNumber().doubleValue(),
                0.0);
    }
+
+    @Test
+    public void testCaseFolding() {
+        Object[][] cases = new Object[][] {
+                // pattern, input string, case sensitive chars, case insensitive chars
+                { "0", "JP¥3456", 7, 7 },
+                { "0", "jp¥3456", 0, 0 }, // not to be accepted, even in case insensitive mode
+                { "A0", "A5", 2, 2 },
+                { "A0", "a5", 0, 2 },
+                { "0", "NaN", 3, 3 },
+                { "0", "nan", 0, 3 } };
+        for (Object[] cas : cases) {
+            String patternString = (String) cas[0];
+            String inputString = (String) cas[1];
+            int expectedCaseSensitiveChars = (Integer) cas[2];
+            int expectedCaseFoldingChars = (Integer) cas[3];
+
+            NumberParserImpl caseSensitiveParser = NumberParserImpl
+                    .removeMeWhenMerged(ULocale.ENGLISH, patternString, ParsingUtils.PARSE_FLAG_OPTIMIZE);
+            ParsedNumber result = new ParsedNumber();
+            caseSensitiveParser.parse(inputString, true, result);
+            assertEquals("Case-Sensitive: " + inputString + " on " + patternString,
+                    expectedCaseSensitiveChars,
+                    result.charEnd);
+
+            NumberParserImpl caseFoldingParser = NumberParserImpl.removeMeWhenMerged(ULocale.ENGLISH,
+                    patternString,
+                    ParsingUtils.PARSE_FLAG_IGNORE_CASE | ParsingUtils.PARSE_FLAG_OPTIMIZE);
+            result = new ParsedNumber();
+            caseFoldingParser.parse(inputString, true, result);
+            assertEquals("Folded: " + inputString + " on " + patternString,
+                    expectedCaseFoldingChars,
+                    result.charEnd);
+        }
+    }
 }
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/StringSegmentTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/StringSegmentTest.java
@ -17,7 +17,7 @@ public class StringSegmentTest {

    @Test
    public void testOffset() {
-        StringSegment segment = new StringSegment(SAMPLE_STRING);
+        StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
        assertEquals(0, segment.getOffset());
        segment.adjustOffset(3);
        assertEquals(3, segment.getOffset());
@ -29,7 +29,7 @@ public class StringSegmentTest {

    @Test
    public void testLength() {
-        StringSegment segment = new StringSegment(SAMPLE_STRING);
+        StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
        assertEquals(11, segment.length());
        segment.adjustOffset(3);
        assertEquals(8, segment.length());
@ -43,7 +43,7 @@ public class StringSegmentTest {

    @Test
    public void testCharAt() {
-        StringSegment segment = new StringSegment(SAMPLE_STRING);
+        StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
        assertCharSequenceEquals(SAMPLE_STRING, segment);
        segment.adjustOffset(3);
        assertCharSequenceEquals("radio 📻", segment);
@ -53,20 +53,20 @@ public class StringSegmentTest {

    @Test
    public void testGetCodePoint() {
-        StringSegment segment = new StringSegment(SAMPLE_STRING);
+        StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
        assertEquals(0x1F4FB, segment.getCodePoint());
        segment.setLength(1);
-        assertEquals(-1, segment.getCodePoint());
+        assertEquals(0xD83D, segment.getCodePoint());
        segment.resetLength();
        segment.adjustOffset(1);
-        assertEquals(-1, segment.getCodePoint());
+        assertEquals(0xDCFB, segment.getCodePoint());
        segment.adjustOffset(1);
        assertEquals(0x20, segment.getCodePoint());
    }

    @Test
    public void testCommonPrefixLength() {
-        StringSegment segment = new StringSegment(SAMPLE_STRING);
+        StringSegment segment = new StringSegment(SAMPLE_STRING, 0);
        assertEquals(11, segment.getCommonPrefixLength(SAMPLE_STRING));
        assertEquals(4, segment.getCommonPrefixLength("📻 r"));
        assertEquals(3, segment.getCommonPrefixLength("📻 x"));
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITest.java
@ -19,6 +19,7 @@ package com.ibm.icu.dev.test.rbbi;
 import java.text.CharacterIterator;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Locale;

 import org.junit.Test;
 import org.junit.runner.RunWith;
@ -26,6 +27,7 @@ import org.junit.runners.JUnit4;

 import com.ibm.icu.dev.test.TestFmwk;
 import com.ibm.icu.text.BreakIterator;
+import com.ibm.icu.text.RBBIDataWrapper;
 import com.ibm.icu.text.RuleBasedBreakIterator;
 import com.ibm.icu.util.ULocale;

@ -562,4 +564,62 @@ public class RBBITest extends TestFmwk {
        String rtRules = bi.toString();        // getRules() in C++
        assertEquals("Break Iterator rule stripping test", "!!forward; $x = [ab#]; '#' '?'; ",  rtRules);
    }
+
+    @Test
+    public void TestTableRedundancies() {
+        RuleBasedBreakIterator bi = (RuleBasedBreakIterator)BreakIterator.getLineInstance(Locale.ENGLISH);
+        String rules = bi.toString();
+        bi = new RuleBasedBreakIterator(rules);
+        // Build a break iterator from source rules.
+        // Want to check the rule builder in Java, not the pre-built rules that are imported from ICU4C.
+        RBBIDataWrapper dw = bi.fRData;
+        short[] fwtbl = dw.fFTable;
+        int numCharClasses = dw.fHeader.fCatCount;
+
+        // Check for duplicate columns (character categories)
+        List<String> columns = new ArrayList<String>();
+        for (int column=0; column<numCharClasses; column++) {
+            StringBuilder s = new StringBuilder();
+            for (int r = 1; r < dw.getStateTableNumStates(fwtbl); r++) {
+                int row = dw.getRowIndex(r);
+                short tableVal = fwtbl[row + RBBIDataWrapper.NEXTSTATES + column];
+                s.append((char)tableVal);
+            }
+            columns.add(s.toString());
+        }
+        // Ignore column (char class) 0 while checking; it's special, and may have duplicates.
+        for (int c1=1; c1<numCharClasses; c1++) {
+            for (int c2 = c1+1; c2 < numCharClasses; c2++) {
+                // assertFalse(String.format("Duplicate columns (%d, %d)", c1, c2), columns.get(c1).equals(columns.get(c2)));
+                if (columns.get(c1).equals(columns.get(c2))) {
+                    System.out.printf("Duplicate columns (%d, %d)\n", c1, c2);
+                }
+            }
+        }
+
+        // Check for duplicate states.
+        List<String> rows = new ArrayList<String>();
+        for (int r=0; r<dw.getStateTableNumStates(fwtbl); r++) {
+            StringBuilder s = new StringBuilder();
+            int row = dw.getRowIndex(r);
+            assertTrue("Accepting < -1", fwtbl[row + RBBIDataWrapper.ACCEPTING] >= -1);
+            s.append(fwtbl[row + RBBIDataWrapper.ACCEPTING]);
+            s.append(fwtbl[row + RBBIDataWrapper.LOOKAHEAD]);
+            s.append(fwtbl[row + RBBIDataWrapper.TAGIDX]);
+            for (int column=0; column<numCharClasses; column++) {
+                short tableVal = fwtbl[row + RBBIDataWrapper.NEXTSTATES + column];
+                s.append((char)tableVal);
+            }
+            rows.add(s.toString());
+        }
+
+        for (int r1=0; r1 < dw.getStateTableNumStates(fwtbl); r1++) {
+            for (int r2= r1+1; r2 < dw.getStateTableNumStates(fwtbl); r2++) {
+                // assertFalse(String.format("Duplicate states (%d, %d)", r1, r2), rows.get(r1).equals(rows.get(r2)));
+                if (rows.get(r1).equals(rows.get(r2))) {
+                    System.out.printf("Duplicate states (%d, %d)\n", r1, r2);
+                }
+            }
+        }
+    }
 }