From b6a2b3fddbb863df50f7f574484fe7a52ef53efa Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Fri, 6 Apr 2018 00:00:08 +0000 Subject: [PATCH] ICU-13194 RBBI safe tables, improve code consistency between C++ and Java. X-SVN-Rev: 41202 --- .../src/com/ibm/icu/text/RBBIRuleBuilder.java | 5 ++- .../src/com/ibm/icu/text/RBBISetBuilder.java | 16 +++++----- .../com/ibm/icu/text/RBBITableBuilder.java | 31 +++++++++++-------- 3 files changed, 31 insertions(+), 21 deletions(-) diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java index 65bd1f77374..34e082f5163 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleBuilder.java @@ -330,9 +330,12 @@ class RBBIRuleBuilder { } void optimizeTables() { + // Begin looking for duplicates with char class 3. + // Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively, + // and should not have other categories merged into them. IntPair duplPair = new IntPair(3, 0); while (fForwardTable.findDuplCharClassFrom(duplPair)) { - fSetBuilder.mergeCategories(duplPair.first, duplPair.second); + fSetBuilder.mergeCategories(duplPair); fForwardTable.removeColumn(duplPair.second); } fForwardTable.removeDuplicateStates(); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBISetBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBISetBuilder.java index ada22580101..17b0b10fc85 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBISetBuilder.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBISetBuilder.java @@ -16,6 +16,7 @@ import java.util.List; import com.ibm.icu.impl.Assert; import com.ibm.icu.impl.Trie2Writable; import com.ibm.icu.impl.Trie2_16; +import com.ibm.icu.text.RBBIRuleBuilder.IntPair; // // RBBISetBuilder Handles processing of Unicode Sets from RBBI rules @@ -307,17 +308,18 @@ class RBBISetBuilder { /** * Merge two character categories that have been identified as having equivalent behavior. - * The ranges belonging to the right category (table column) will be added to the left. + * The ranges belonging to the second category (table column) will be added to the first. + * @param categories the pair of categories to be merged. */ - void mergeCategories(int left, int right) { - assert(left >= 1); - assert(right > left); + void mergeCategories(IntPair categories) { + assert(categories.first >= 1); + assert(categories.second > categories.first); for (RangeDescriptor rd = fRangeList; rd != null; rd = rd.fNext) { int rangeNum = rd.fNum & ~DICT_BIT; int rangeDict = rd.fNum & DICT_BIT; - if (rangeNum == right) { - rd.fNum = left | rangeDict; - } else if (rangeNum > right) { + if (rangeNum == categories.second) { + rd.fNum = categories.first | rangeDict; + } else if (rangeNum > categories.second) { rd.fNum--; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java index 748c04eb2dd..2b8fcb8b7ed 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java @@ -833,9 +833,10 @@ class RBBITableBuilder { /** - * Find duplicate (redundant) character classes, beginning at the specified - * pair, within this state table. This is an iterator-like function, used to - * identify character classes (state table columns) that can be eliminated. + * Find duplicate (redundant) character classes. Begin looking with categories.first. + * Duplicates, if found are returned in the categories parameter. + * This is an iterator-like function, used to identify character classes + * (state table columns) that can be eliminated. * @param categories in/out parameter, specifies where to start looking for duplicates, * and returns the first pair of duplicates found, if any. * @return true if duplicate char classes were found, false otherwise. @@ -957,13 +958,14 @@ class RBBITableBuilder { } /** - * Remove a duplicate state (row) from the state table. All references to the deleted state are - * redirected to "keepState", the first encountered of the duplicated pair of states. - * @param keepState The first of the duplicate pair of states, the one to be kept. - * @param duplState The second of the duplicate pair, the one to be removed. + * Remove a duplicate state (row) from the state table. All references to the deleted (second) state + * are redirected to first state. + * @param duplStates The duplicate pair of states. * @internal */ - void removeState(int keepState, int duplState) { + void removeState(IntPair duplStates) { + final int keepState = duplStates.first; + final int duplState = duplStates.second; assert(keepState < duplState); assert(duplState < fDStates.size()); @@ -998,11 +1000,14 @@ class RBBITableBuilder { /** * Remove a duplicate state from the safe table. - * @param keepState The first of the duplicate pair of states, the one to be kept. - * @param duplState The second of the duplicate pair, the one to be removed. + * @param duplStates The duplicate pair of states. The first is kept, the second is removed. + * All references to the second in the state table are retargeted + * to the first. * @internal */ - void removeSafeState(int keepState, int duplState) { + void removeSafeState(IntPair duplStates) { + final int keepState = duplStates.first; + final int duplState = duplStates.second; assert(keepState < duplState); assert(duplState < fSafeTable.size()); @@ -1032,7 +1037,7 @@ class RBBITableBuilder { IntPair dupls = new IntPair(3, 0); while (findDuplicateState(dupls)) { // System.out.printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second); - removeState(dupls.first, dupls.second); + removeState(dupls); } } @@ -1188,7 +1193,7 @@ class RBBITableBuilder { RBBIRuleBuilder.IntPair states = new RBBIRuleBuilder.IntPair(1, 0); while (findDuplicateSafeState(states)) { // System.out.printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second); - removeSafeState(states.first, states.second); + removeSafeState(states); } }