diff --git a/icu4c/source/common/rbbirb.cpp b/icu4c/source/common/rbbirb.cpp index 8d624f4a925..a4b9a718682 100644 --- a/icu4c/source/common/rbbirb.cpp +++ b/icu4c/source/common/rbbirb.cpp @@ -303,16 +303,14 @@ RBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) { } void RBBIRuleBuilder::optimizeTables() { - int32_t leftClass; - int32_t rightClass; - - leftClass = 3; - rightClass = 0; + // Begin looking for duplicates with char class 3. + // Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively, + // and should not have other categories merged into them. IntPair duplPair = {3, 0}; while (fForwardTable->findDuplCharClassFrom(&duplPair)) { - fSetBuilder->mergeCategories(duplPair.first, duplPair.second); + fSetBuilder->mergeCategories(duplPair); fForwardTable->removeColumn(duplPair.second); } fForwardTable->removeDuplicateStates(); diff --git a/icu4c/source/common/rbbisetb.cpp b/icu4c/source/common/rbbisetb.cpp index 108d127d45f..36e2e07e9c6 100644 --- a/icu4c/source/common/rbbisetb.cpp +++ b/icu4c/source/common/rbbisetb.cpp @@ -270,15 +270,15 @@ void RBBISetBuilder::buildTrie() { } -void RBBISetBuilder::mergeCategories(int32_t left, int32_t right) { - U_ASSERT(left >= 1); - U_ASSERT(right > left); +void RBBISetBuilder::mergeCategories(IntPair categories) { + U_ASSERT(categories.first >= 1); + U_ASSERT(categories.second > categories.first); for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) { int32_t rangeNum = rd->fNum & ~DICT_BIT; int32_t rangeDict = rd->fNum & DICT_BIT; - if (rangeNum == right) { - rd->fNum = left | rangeDict; - } else if (rangeNum > right) { + if (rangeNum == categories.second) { + rd->fNum = categories.first | rangeDict; + } else if (rangeNum > categories.second) { rd->fNum--; } } diff --git a/icu4c/source/common/rbbisetb.h b/icu4c/source/common/rbbisetb.h index a7a91b3b375..ed6a76b1214 100644 --- a/icu4c/source/common/rbbisetb.h +++ b/icu4c/source/common/rbbisetb.h @@ -94,10 +94,12 @@ public: UChar32 getFirstChar(int32_t val) const; UBool sawBOF() const; // Indicate whether any references to the {bof} pseudo // character were encountered. - /** merge two character categories that have been identified as having equivalent behavior. - * The ranges belonging to the right category (table column) will be added to the left. + /** + * Merge two character categories that have been identified as having equivalent behavior. + * The ranges belonging to the second category (table column) will be added to the first. + * @param categories the pair of categories to be merged. */ - void mergeCategories(int32_t left, int32_t right); + void mergeCategories(IntPair categories); static constexpr int32_t DICT_BIT = 0x4000; diff --git a/icu4c/source/common/rbbitblb.cpp b/icu4c/source/common/rbbitblb.cpp index 30e475c063d..8a6f7c792f3 100644 --- a/icu4c/source/common/rbbitblb.cpp +++ b/icu4c/source/common/rbbitblb.cpp @@ -1179,7 +1179,9 @@ bool RBBITableBuilder::findDuplicateSafeState(IntPair *states) { } -void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) { +void RBBITableBuilder::removeState(IntPair duplStates) { + const int32_t keepState = duplStates.first; + const int32_t duplState = duplStates.second; U_ASSERT(keepState < duplState); U_ASSERT(duplState < fDStates->size()); @@ -1214,7 +1216,9 @@ void RBBITableBuilder::removeState(int32_t keepState, int32_t duplState) { } } -void RBBITableBuilder::removeSafeState(int32_t keepState, int32_t duplState) { +void RBBITableBuilder::removeSafeState(IntPair duplStates) { + const int32_t keepState = duplStates.first; + const int32_t duplState = duplStates.second; U_ASSERT(keepState < duplState); U_ASSERT(duplState < fSafeTable->size()); @@ -1245,7 +1249,7 @@ void RBBITableBuilder::removeDuplicateStates() { IntPair dupls = {3, 0}; while (findDuplicateState(&dupls)) { // printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second); - removeState(dupls.first, dupls.second); + removeState(dupls); } } @@ -1430,7 +1434,7 @@ void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) { IntPair states = {1, 0}; while (findDuplicateSafeState(&states)) { // printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second); - removeSafeState(states.first, states.second); + removeSafeState(states); } } diff --git a/icu4c/source/common/rbbitblb.h b/icu4c/source/common/rbbitblb.h index 238ae463e43..eea243e4cdd 100644 --- a/icu4c/source/common/rbbitblb.h +++ b/icu4c/source/common/rbbitblb.h @@ -51,14 +51,15 @@ public: void exportTable(void *where); /** - * Find duplicate (redundant) character classes, beginning at the specified - * pair, within this state table. This is an iterator-like function, used to - * identify character classes (state table columns) that can be eliminated. + * Find duplicate (redundant) character classes. Begin looking with categories.first. + * Duplicate, if found are returned in the categories parameter. + * This is an iterator-like function, used to identify character classes + * (state table columns) that can be eliminated. * @param categories in/out parameter, specifies where to start looking for duplicates, * and returns the first pair of duplicates found, if any. * @return true if duplicate char classes were found, false otherwise. */ - bool findDuplCharClassFrom(IntPair *statePair); + bool findDuplCharClassFrom(IntPair *categories); /** Remove a column from the state table. Used when two character categories * have been found equivalent, and merged together, to eliminate the uneeded table column. @@ -110,12 +111,12 @@ private: */ bool findDuplicateState(IntPair *states); - /** Remove a duplicate state/ - * @param keepState First of the duplicate pair. Keep it. - * @param duplState Duplicate state. Remove it. Redirect all references to the duplicate state - * to refer to keepState instead. + /** Remove a duplicate state. + * @param duplStates The duplicate states. The first is kept, the second is removed. + * All references to the second in the state table are retargeted + * to the first. */ - void removeState(int32_t keepState, int32_t duplState); + void removeState(IntPair duplStates); /** Find the next duplicate state in the safe reverse table. An iterator function. * @param states in/out parameter, specifies where to start looking for duplicates, @@ -125,11 +126,11 @@ private: bool findDuplicateSafeState(IntPair *states); /** Remove a duplicate state from the safe table. - * @param keepState First of the duplicate pair. Keep it. - * @param duplState Duplicate state. Remove it. Redirect all table references to the duplicate state - * to refer to keepState instead. + * @param duplStates The duplicate states. The first is kept, the second is removed. + * All references to the second in the state table are retargeted + * to the first. */ - void removeSafeState(int32_t keepState, int32_t duplState); + void removeSafeState(IntPair duplStates); // Set functions for UVector. // TODO: make a USet subclass of UVector diff --git a/icu4c/source/common/unicode/rbbi.h b/icu4c/source/common/unicode/rbbi.h index 843d841c253..e43dafc603d 100644 --- a/icu4c/source/common/unicode/rbbi.h +++ b/icu4c/source/common/unicode/rbbi.h @@ -70,13 +70,6 @@ public: RBBIDataWrapper *fData; private: - /** - * The iteration state - current position, rule status for the current position, - * and whether the iterator ran off the end, yielding UBRK_DONE. - * Current position is pinned to be 0 < position <= text.length. - * Current position is always set to a boundary. - * @internal - */ /** * The current position of the iterator. Pinned, 0 < fPosition <= text.length. * Never has the value UBRK_DONE (-1).