mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 14:05:32 +00:00
ICU-13194 RBBI safe tables, remove unnecessary Java/C++ differences.
X-SVN-Rev: 41192
This commit is contained in:
parent
15d9f3a9ce
commit
440e1e31c3
4 changed files with 61 additions and 48 deletions
|
@ -308,9 +308,12 @@ void RBBIRuleBuilder::optimizeTables() {
|
|||
|
||||
leftClass = 3;
|
||||
rightClass = 0;
|
||||
while (fForwardTable->findDuplCharClassFrom(leftClass, rightClass)) {
|
||||
fSetBuilder->mergeCategories(leftClass, rightClass);
|
||||
fForwardTable->removeColumn(rightClass);
|
||||
|
||||
IntPair duplPair = {3, 0};
|
||||
|
||||
while (fForwardTable->findDuplCharClassFrom(&duplPair)) {
|
||||
fSetBuilder->mergeCategories(duplPair.first, duplPair.second);
|
||||
fForwardTable->removeColumn(duplPair.second);
|
||||
}
|
||||
fForwardTable->removeDuplicateStates();
|
||||
}
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/rbbi.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
@ -25,8 +27,7 @@
|
|||
#include "uhash.h"
|
||||
#include "uvector.h"
|
||||
#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
|
||||
// looks up references to $variables within a set.
|
||||
|
||||
// looks up references to $variables within a set.
|
||||
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
@ -203,6 +204,11 @@ struct RBBISetTableEl {
|
|||
RBBINode *val;
|
||||
};
|
||||
|
||||
/**
|
||||
* A pair of ints, used to bundle pairs of states or pairs of character classes.
|
||||
*/
|
||||
typedef std::pair<int32_t, int32_t> IntPair;
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//
|
||||
|
|
|
@ -1078,18 +1078,18 @@ void RBBITableBuilder::printPosSets(RBBINode *n) {
|
|||
//
|
||||
// findDuplCharClassFrom()
|
||||
//
|
||||
bool RBBITableBuilder::findDuplCharClassFrom(int32_t &baseCategory, int32_t &duplCategory) {
|
||||
bool RBBITableBuilder::findDuplCharClassFrom(IntPair *categories) {
|
||||
int32_t numStates = fDStates->size();
|
||||
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
|
||||
|
||||
uint16_t table_base;
|
||||
uint16_t table_dupl;
|
||||
for (; baseCategory < numCols-1; ++baseCategory) {
|
||||
for (duplCategory=baseCategory+1; duplCategory < numCols; ++duplCategory) {
|
||||
for (; categories->first < numCols-1; categories->first++) {
|
||||
for (categories->second=categories->first+1; categories->second < numCols; categories->second++) {
|
||||
for (int32_t state=0; state<numStates; state++) {
|
||||
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
|
||||
table_base = (uint16_t)sd->fDtran->elementAti(baseCategory);
|
||||
table_dupl = (uint16_t)sd->fDtran->elementAti(duplCategory);
|
||||
table_base = (uint16_t)sd->fDtran->elementAti(categories->first);
|
||||
table_dupl = (uint16_t)sd->fDtran->elementAti(categories->second);
|
||||
if (table_base != table_dupl) {
|
||||
break;
|
||||
}
|
||||
|
@ -1118,14 +1118,14 @@ void RBBITableBuilder::removeColumn(int32_t column) {
|
|||
/*
|
||||
* findDuplicateState
|
||||
*/
|
||||
bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplState) {
|
||||
bool RBBITableBuilder::findDuplicateState(IntPair *states) {
|
||||
int32_t numStates = fDStates->size();
|
||||
int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
|
||||
|
||||
for (; firstState<numStates-1; ++firstState) {
|
||||
RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(firstState);
|
||||
for (duplState=firstState+1; duplState<numStates; ++duplState) {
|
||||
RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);
|
||||
for (; states->first<numStates-1; states->first++) {
|
||||
RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(states->first);
|
||||
for (states->second=states->first+1; states->second<numStates; states->second++) {
|
||||
RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(states->second);
|
||||
if (firstSD->fAccepting != duplSD->fAccepting ||
|
||||
firstSD->fLookAhead != duplSD->fLookAhead ||
|
||||
firstSD->fTagsIdx != duplSD->fTagsIdx) {
|
||||
|
@ -1136,8 +1136,8 @@ bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplStat
|
|||
int32_t firstVal = firstSD->fDtran->elementAti(col);
|
||||
int32_t duplVal = duplSD->fDtran->elementAti(col);
|
||||
if (!((firstVal == duplVal) ||
|
||||
((firstVal == firstState || firstVal == duplState) &&
|
||||
(duplVal == firstState || duplVal == duplState)))) {
|
||||
((firstVal == states->first || firstVal == states->second) &&
|
||||
(duplVal == states->first || duplVal == states->second)))) {
|
||||
rowsMatch = false;
|
||||
break;
|
||||
}
|
||||
|
@ -1151,21 +1151,21 @@ bool RBBITableBuilder::findDuplicateState(int32_t &firstState, int32_t &duplStat
|
|||
}
|
||||
|
||||
|
||||
bool RBBITableBuilder::findDuplicateSafeState(int32_t *firstState, int32_t *duplState) {
|
||||
bool RBBITableBuilder::findDuplicateSafeState(IntPair *states) {
|
||||
int32_t numStates = fSafeTable->size();
|
||||
|
||||
for (; *firstState<numStates-1; ++(*firstState)) {
|
||||
UnicodeString *firstRow = static_cast<UnicodeString *>(fSafeTable->elementAt(*firstState));
|
||||
for (*duplState=*firstState+1; *duplState<numStates; ++(*duplState)) {
|
||||
UnicodeString *duplRow = static_cast<UnicodeString *>(fSafeTable->elementAt(*duplState));
|
||||
for (; states->first<numStates-1; states->first++) {
|
||||
UnicodeString *firstRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->first));
|
||||
for (states->second=states->first+1; states->second<numStates; states->second++) {
|
||||
UnicodeString *duplRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->second));
|
||||
bool rowsMatch = true;
|
||||
int32_t numCols = firstRow->length();
|
||||
for (int32_t col=0; col < numCols; ++col) {
|
||||
int32_t firstVal = firstRow->charAt(col);
|
||||
int32_t duplVal = duplRow->charAt(col);
|
||||
if (!((firstVal == duplVal) ||
|
||||
((firstVal == *firstState || firstVal == *duplState) &&
|
||||
(duplVal == *firstState || duplVal == *duplState)))) {
|
||||
((firstVal == states->first || firstVal == states->second) &&
|
||||
(duplVal == states->first || duplVal == states->second)))) {
|
||||
rowsMatch = false;
|
||||
break;
|
||||
}
|
||||
|
@ -1242,11 +1242,10 @@ void RBBITableBuilder::removeSafeState(int32_t keepState, int32_t duplState) {
|
|||
* RemoveDuplicateStates
|
||||
*/
|
||||
void RBBITableBuilder::removeDuplicateStates() {
|
||||
int32_t firstState = 3;
|
||||
int32_t duplicateState = 0;
|
||||
while (findDuplicateState(firstState, duplicateState)) {
|
||||
// printf("Removing duplicate states (%d, %d)\n", firstState, duplicateState);
|
||||
removeState(firstState, duplicateState);
|
||||
IntPair dupls = {3, 0};
|
||||
while (findDuplicateState(&dupls)) {
|
||||
// printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
|
||||
removeState(dupls.first, dupls.second);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1428,11 +1427,10 @@ void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) {
|
|||
}
|
||||
|
||||
// Remove duplicate or redundant rows from the table.
|
||||
int32_t firstState = 1;
|
||||
int32_t duplicateState = 0; // initial value is not used; set by findDuplicateSafeState().
|
||||
while (findDuplicateSafeState(&firstState, &duplicateState)) {
|
||||
// printf("Removing duplicate safe states (%d, %d)\n", firstState, duplicateState);
|
||||
removeSafeState(firstState, duplicateState);
|
||||
IntPair states = {1, 0};
|
||||
while (findDuplicateSafeState(&states)) {
|
||||
// printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second);
|
||||
removeSafeState(states.first, states.second);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/rbbi.h"
|
||||
#include "rbbirb.h"
|
||||
#include "rbbinode.h"
|
||||
|
||||
|
||||
|
@ -49,11 +50,15 @@ public:
|
|||
*/
|
||||
void exportTable(void *where);
|
||||
|
||||
/** Find duplicate (redundant) character classes, beginning after the specifed
|
||||
/**
|
||||
* Find duplicate (redundant) character classes, beginning at the specified
|
||||
* pair, within this state table. This is an iterator-like function, used to
|
||||
* identify char classes (state table columns) that can be eliminated.
|
||||
* identify character classes (state table columns) that can be eliminated.
|
||||
* @param categories in/out parameter, specifies where to start looking for duplicates,
|
||||
* and returns the first pair of duplicates found, if any.
|
||||
* @return true if duplicate char classes were found, false otherwise.
|
||||
*/
|
||||
bool findDuplCharClassFrom(int &baseClass, int &duplClass);
|
||||
bool findDuplCharClassFrom(IntPair *statePair);
|
||||
|
||||
/** Remove a column from the state table. Used when two character categories
|
||||
* have been found equivalent, and merged together, to eliminate the uneeded table column.
|
||||
|
@ -95,13 +100,15 @@ private:
|
|||
|
||||
void addRuleRootNodes(UVector *dest, RBBINode *node);
|
||||
|
||||
/** Find the next duplicate state. An iterator function.
|
||||
* @param firstState (in/out) begin looking at this state, return the first of the
|
||||
* pair of duplicates.
|
||||
* @param duplicateState returns the duplicate state of fistState
|
||||
* @return true if a duplicate pair of states was found.
|
||||
/**
|
||||
* Find duplicate (redundant) states, beginning at the specified pair,
|
||||
* within this state table. This is an iterator-like function, used to
|
||||
* identify states (state table rows) that can be eliminated.
|
||||
* @param states in/out parameter, specifies where to start looking for duplicates,
|
||||
* and returns the first pair of duplicates found, if any.
|
||||
* @return true if duplicate states were found, false otherwise.
|
||||
*/
|
||||
bool findDuplicateState(int32_t &firstState, int32_t &duplicateState);
|
||||
bool findDuplicateState(IntPair *states);
|
||||
|
||||
/** Remove a duplicate state/
|
||||
* @param keepState First of the duplicate pair. Keep it.
|
||||
|
@ -111,12 +118,11 @@ private:
|
|||
void removeState(int32_t keepState, int32_t duplState);
|
||||
|
||||
/** Find the next duplicate state in the safe reverse table. An iterator function.
|
||||
* @param firstState ptr to state variable. Begin looking at this state, set to the first of the
|
||||
* pair of duplicates on return.
|
||||
* @param duplicateState ptr to where to return the duplicate state of fistState. Output only.
|
||||
* @return true if a duplicate pair of states was found.
|
||||
* @param states in/out parameter, specifies where to start looking for duplicates,
|
||||
* and returns the first pair of duplicates found, if any.
|
||||
* @return true if a duplicate pair of states was found.
|
||||
*/
|
||||
bool findDuplicateSafeState(int32_t *firstState, int32_t *duplicateState);
|
||||
bool findDuplicateSafeState(IntPair *states);
|
||||
|
||||
/** Remove a duplicate state from the safe table.
|
||||
* @param keepState First of the duplicate pair. Keep it.
|
||||
|
|
Loading…
Add table
Reference in a new issue