ICU-13194 RBBI safe tables, improve code consistency between C++ and Java.

X-SVN-Rev: 41202
This commit is contained in:
Andy Heninger 2018-04-06 00:00:08 +00:00
parent 440e1e31c3
commit b6a2b3fddb
3 changed files with 31 additions and 21 deletions

View file

@ -330,9 +330,12 @@ class RBBIRuleBuilder {
}
void optimizeTables() {
// Begin looking for duplicates with char class 3.
// Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively,
// and should not have other categories merged into them.
IntPair duplPair = new IntPair(3, 0);
while (fForwardTable.findDuplCharClassFrom(duplPair)) {
fSetBuilder.mergeCategories(duplPair.first, duplPair.second);
fSetBuilder.mergeCategories(duplPair);
fForwardTable.removeColumn(duplPair.second);
}
fForwardTable.removeDuplicateStates();

View file

@ -16,6 +16,7 @@ import java.util.List;
import com.ibm.icu.impl.Assert;
import com.ibm.icu.impl.Trie2Writable;
import com.ibm.icu.impl.Trie2_16;
import com.ibm.icu.text.RBBIRuleBuilder.IntPair;
//
// RBBISetBuilder Handles processing of Unicode Sets from RBBI rules
@ -307,17 +308,18 @@ class RBBISetBuilder {
/**
* Merge two character categories that have been identified as having equivalent behavior.
* The ranges belonging to the right category (table column) will be added to the left.
* The ranges belonging to the second category (table column) will be added to the first.
* @param categories the pair of categories to be merged.
*/
void mergeCategories(int left, int right) {
assert(left >= 1);
assert(right > left);
void mergeCategories(IntPair categories) {
assert(categories.first >= 1);
assert(categories.second > categories.first);
for (RangeDescriptor rd = fRangeList; rd != null; rd = rd.fNext) {
int rangeNum = rd.fNum & ~DICT_BIT;
int rangeDict = rd.fNum & DICT_BIT;
if (rangeNum == right) {
rd.fNum = left | rangeDict;
} else if (rangeNum > right) {
if (rangeNum == categories.second) {
rd.fNum = categories.first | rangeDict;
} else if (rangeNum > categories.second) {
rd.fNum--;
}
}

View file

@ -833,9 +833,10 @@ class RBBITableBuilder {
/**
* Find duplicate (redundant) character classes, beginning at the specified
* pair, within this state table. This is an iterator-like function, used to
* identify character classes (state table columns) that can be eliminated.
* Find duplicate (redundant) character classes. Begin looking with categories.first.
* Duplicates, if found are returned in the categories parameter.
* This is an iterator-like function, used to identify character classes
* (state table columns) that can be eliminated.
* @param categories in/out parameter, specifies where to start looking for duplicates,
* and returns the first pair of duplicates found, if any.
* @return true if duplicate char classes were found, false otherwise.
@ -957,13 +958,14 @@ class RBBITableBuilder {
}
/**
* Remove a duplicate state (row) from the state table. All references to the deleted state are
* redirected to "keepState", the first encountered of the duplicated pair of states.
* @param keepState The first of the duplicate pair of states, the one to be kept.
* @param duplState The second of the duplicate pair, the one to be removed.
* Remove a duplicate state (row) from the state table. All references to the deleted (second) state
* are redirected to first state.
* @param duplStates The duplicate pair of states.
* @internal
*/
void removeState(int keepState, int duplState) {
void removeState(IntPair duplStates) {
final int keepState = duplStates.first;
final int duplState = duplStates.second;
assert(keepState < duplState);
assert(duplState < fDStates.size());
@ -998,11 +1000,14 @@ class RBBITableBuilder {
/**
* Remove a duplicate state from the safe table.
* @param keepState The first of the duplicate pair of states, the one to be kept.
* @param duplState The second of the duplicate pair, the one to be removed.
* @param duplStates The duplicate pair of states. The first is kept, the second is removed.
* All references to the second in the state table are retargeted
* to the first.
* @internal
*/
void removeSafeState(int keepState, int duplState) {
void removeSafeState(IntPair duplStates) {
final int keepState = duplStates.first;
final int duplState = duplStates.second;
assert(keepState < duplState);
assert(duplState < fSafeTable.size());
@ -1032,7 +1037,7 @@ class RBBITableBuilder {
IntPair dupls = new IntPair(3, 0);
while (findDuplicateState(dupls)) {
// System.out.printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
removeState(dupls.first, dupls.second);
removeState(dupls);
}
}
@ -1188,7 +1193,7 @@ class RBBITableBuilder {
RBBIRuleBuilder.IntPair states = new RBBIRuleBuilder.IntPair(1, 0);
while (findDuplicateSafeState(states)) {
// System.out.printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second);
removeSafeState(states.first, states.second);
removeSafeState(states);
}
}