ICU-13194 RBBI safe tables, remove unnecessary Java/C++ differences.

X-SVN-Rev: 41192
This commit is contained in:
Andy Heninger 2018-04-03 23:41:28 +00:00
parent 15d9f3a9ce
commit 440e1e31c3
4 changed files with 61 additions and 48 deletions

View file

@ -308,9 +308,12 @@ void RBBIRuleBuilder::optimizeTables() {
leftClass = 3;
rightClass = 0;
while (fForwardTable->findDuplCharClassFrom(leftClass, rightClass)) {
fSetBuilder->mergeCategories(leftClass, rightClass);
fForwardTable->removeColumn(rightClass);
IntPair duplPair = {3, 0};
while (fForwardTable->findDuplCharClassFrom(&duplPair)) {
fSetBuilder->mergeCategories(duplPair.first, duplPair.second);
fForwardTable->removeColumn(duplPair.second);
}
fForwardTable->removeDuplicateStates();
}

View file

@ -18,6 +18,8 @@
#if !UCONFIG_NO_BREAK_ITERATION
#include <utility>
#include "unicode/uobject.h"
#include "unicode/rbbi.h"
#include "unicode/uniset.h"
@ -25,8 +27,7 @@
#include "uhash.h"
#include "uvector.h"
#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
// looks up references to $variables within a set.
// looks up references to $variables within a set.
U_NAMESPACE_BEGIN
@ -203,6 +204,11 @@ struct RBBISetTableEl {
RBBINode *val;
};
/**
* A pair of ints, used to bundle pairs of states or pairs of character classes.
*/
typedef std::pair<int32_t, int32_t> IntPair;
//----------------------------------------------------------------------------
//

View file

@ -1078,18 +1078,18 @@ void RBBITableBuilder::printPosSets(RBBINode *n) {
//
// findDuplCharClassFrom()
//
bool RBBITableBuilder::findDuplCharClassFrom(int32_t &baseCategory, int32_t &duplCategory) {
bool RBBITableBuilder::findDuplCharClassFrom(IntPair *categories) {
int32_t numStates = fDStates->size();
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
uint16_t table_base;
uint16_t table_dupl;
for (; baseCategory < numCols-1; ++baseCategory) {
for (duplCategory=baseCategory+1; duplCategory < numCols; ++duplCategory) {
for (; categories->first < numCols-1; categories->first++) {
for (categories->second=categories->first+1; categories->second < numCols; categories->second++) {
for (int32_t state=0; state<numStates; state++) {
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
table_base = (uint16_t)sd->fDtran->elementAti(baseCategory);
table_dupl = (uint16_t)sd->fDtran->elementAti(duplCategory);
table_base = (uint16_t)sd->fDtran->elementAti(categories->first);
table_dupl = (uint16_t)sd->fDtran->elementAti(categories->second);
if (table_base != table_dupl) {
break;
}
@ -1118,14 +1118,14 @@ void RBBITableBuilder::removeColumn(int32_t column) {
/*
* findDuplicateState
*/
bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplState) {
bool RBBITableBuilder::findDuplicateState(IntPair *states) {
int32_t numStates = fDStates->size();
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
for (; firstState<numStates-1; ++firstState) {
RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(firstState);
for (duplState=firstState+1; duplState<numStates; ++duplState) {
RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);
for (; states->first<numStates-1; states->first++) {
RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(states->first);
for (states->second=states->first+1; states->second<numStates; states->second++) {
RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(states->second);
if (firstSD->fAccepting != duplSD->fAccepting ||
firstSD->fLookAhead != duplSD->fLookAhead ||
firstSD->fTagsIdx != duplSD->fTagsIdx) {
@ -1136,8 +1136,8 @@ bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplStat
int32_t firstVal = firstSD->fDtran->elementAti(col);
int32_t duplVal = duplSD->fDtran->elementAti(col);
if (!((firstVal == duplVal) ||
((firstVal == firstState || firstVal == duplState) &&
(duplVal == firstState || duplVal == duplState)))) {
((firstVal == states->first || firstVal == states->second) &&
(duplVal == states->first || duplVal == states->second)))) {
rowsMatch = false;
break;
}
@ -1151,21 +1151,21 @@ bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplStat
}
bool RBBITableBuilder::findDuplicateSafeState(int32_t *firstState, int32_t *duplState) {
bool RBBITableBuilder::findDuplicateSafeState(IntPair *states) {
int32_t numStates = fSafeTable->size();
for (; *firstState<numStates-1; ++(*firstState)) {
UnicodeString *firstRow = static_cast<UnicodeString *>(fSafeTable->elementAt(*firstState));
for (*duplState=*firstState+1; *duplState<numStates; ++(*duplState)) {
UnicodeString *duplRow = static_cast<UnicodeString *>(fSafeTable->elementAt(*duplState));
for (; states->first<numStates-1; states->first++) {
UnicodeString *firstRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->first));
for (states->second=states->first+1; states->second<numStates; states->second++) {
UnicodeString *duplRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->second));
bool rowsMatch = true;
int32_t numCols = firstRow->length();
for (int32_t col=0; col < numCols; ++col) {
int32_t firstVal = firstRow->charAt(col);
int32_t duplVal = duplRow->charAt(col);
if (!((firstVal == duplVal) ||
((firstVal == *firstState || firstVal == *duplState) &&
(duplVal == *firstState || duplVal == *duplState)))) {
((firstVal == states->first || firstVal == states->second) &&
(duplVal == states->first || duplVal == states->second)))) {
rowsMatch = false;
break;
}
@ -1242,11 +1242,10 @@ void RBBITableBuilder::removeSafeState(int32_t keepState, int32_t duplState) {
* RemoveDuplicateStates
*/
void RBBITableBuilder::removeDuplicateStates() {
int32_t firstState = 3;
int32_t duplicateState = 0;
while (findDuplicateState(firstState, duplicateState)) {
// printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
removeState(firstState, duplicateState);
IntPair dupls = {3, 0};
while (findDuplicateState(&dupls)) {
// printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
removeState(dupls.first, dupls.second);
}
}
@ -1428,11 +1427,10 @@ void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) {
}
// Remove duplicate or redundant rows from the table.
int32_t firstState = 1;
int32_t duplicateState = 0; // initial value is not used; set by findDuplicateSafeState().
while (findDuplicateSafeState(&firstState, &duplicateState)) {
// printf("Removing duplicate safe states (%d, %d)\n", firstState, duplicateState);
removeSafeState(firstState, duplicateState);
IntPair states = {1, 0};
while (findDuplicateSafeState(&states)) {
// printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second);
removeSafeState(states.first, states.second);
}
}

View file

@ -17,6 +17,7 @@
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/rbbi.h"
#include "rbbirb.h"
#include "rbbinode.h"
@ -49,11 +50,15 @@ public:
*/
void exportTable(void *where);
/** Find duplicate (redundant) character classes, beginning after the specifed
/**
* Find duplicate (redundant) character classes, beginning at the specified
* pair, within this state table. This is an iterator-like function, used to
* identify char classes (state table columns) that can be eliminated.
* identify character classes (state table columns) that can be eliminated.
* @param categories in/out parameter, specifies where to start looking for duplicates,
* and returns the first pair of duplicates found, if any.
* @return true if duplicate char classes were found, false otherwise.
*/
bool findDuplCharClassFrom(int &baseClass, int &duplClass);
bool findDuplCharClassFrom(IntPair *statePair);
/** Remove a column from the state table. Used when two character categories
* have been found equivalent, and merged together, to eliminate the uneeded table column.
@ -95,13 +100,15 @@ private:
void addRuleRootNodes(UVector *dest, RBBINode *node);
/** Find the next duplicate state. An iterator function.
* @param firstState (in/out) begin looking at this state, return the first of the
* pair of duplicates.
* @param duplicateState returns the duplicate state of fistState
* @return true if a duplicate pair of states was found.
/**
* Find duplicate (redundant) states, beginning at the specified pair,
* within this state table. This is an iterator-like function, used to
* identify states (state table rows) that can be eliminated.
* @param states in/out parameter, specifies where to start looking for duplicates,
* and returns the first pair of duplicates found, if any.
* @return true if duplicate states were found, false otherwise.
*/
bool findDuplicateState(int32_t &firstState, int32_t &duplicateState);
bool findDuplicateState(IntPair *states);
/** Remove a duplicate state/
* @param keepState First of the duplicate pair. Keep it.
@ -111,12 +118,11 @@ private:
void removeState(int32_t keepState, int32_t duplState);
/** Find the next duplicate state in the safe reverse table. An iterator function.
* @param firstState ptr to state variable. Begin looking at this state, set to the first of the
* pair of duplicates on return.
* @param duplicateState ptr to where to return the duplicate state of fistState. Output only.
* @return true if a duplicate pair of states was found.
* @param states in/out parameter, specifies where to start looking for duplicates,
* and returns the first pair of duplicates found, if any.
* @return true if a duplicate pair of states was found.
*/
bool findDuplicateSafeState(int32_t *firstState, int32_t *duplicateState);
bool findDuplicateSafeState(IntPair *states);
/** Remove a duplicate state from the safe table.
* @param keepState First of the duplicate pair. Keep it.