mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-3043 multiple status values implementation
X-SVN-Rev: 14640
This commit is contained in:
parent
eef53a9191
commit
fdb386fb81
10 changed files with 414 additions and 115 deletions
|
@ -173,11 +173,11 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
|
|||
UBool RuleBasedBreakIterator::fTrace = FALSE;
|
||||
void RuleBasedBreakIterator::init() {
|
||||
|
||||
fText = NULL;
|
||||
fData = NULL;
|
||||
fLastBreakTag = 0;
|
||||
fLastBreakTagValid = TRUE;
|
||||
fDictionaryCharCount = 0;
|
||||
fText = NULL;
|
||||
fData = NULL;
|
||||
fLastRuleStatusIndex = 0;
|
||||
fLastStatusIndexValid = TRUE;
|
||||
fDictionaryCharCount = 0;
|
||||
|
||||
#ifdef RBBI_DEBUG
|
||||
static UBool debugInitDone = FALSE;
|
||||
|
@ -326,8 +326,8 @@ RuleBasedBreakIterator::setText(const UnicodeString& newText) {
|
|||
*/
|
||||
int32_t RuleBasedBreakIterator::first(void) {
|
||||
reset();
|
||||
fLastBreakTag = 0;
|
||||
fLastBreakTagValid = TRUE;
|
||||
fLastRuleStatusIndex = 0;
|
||||
fLastStatusIndexValid = TRUE;
|
||||
if (fText == NULL)
|
||||
return BreakIterator::DONE;
|
||||
|
||||
|
@ -343,8 +343,8 @@ int32_t RuleBasedBreakIterator::first(void) {
|
|||
int32_t RuleBasedBreakIterator::last(void) {
|
||||
reset();
|
||||
if (fText == NULL) {
|
||||
fLastBreakTag = 0;
|
||||
fLastBreakTagValid = TRUE;
|
||||
fLastRuleStatusIndex = 0;
|
||||
fLastStatusIndexValid = TRUE;
|
||||
return BreakIterator::DONE;
|
||||
}
|
||||
|
||||
|
@ -355,7 +355,7 @@ int32_t RuleBasedBreakIterator::last(void) {
|
|||
// will work correctly.)
|
||||
|
||||
|
||||
fLastBreakTagValid = FALSE;
|
||||
fLastStatusIndexValid = FALSE;
|
||||
int32_t pos = fText->endIndex();
|
||||
fText->setIndex(pos);
|
||||
|
||||
|
@ -399,8 +399,8 @@ int32_t RuleBasedBreakIterator::next(void) {
|
|||
int32_t RuleBasedBreakIterator::previous(void) {
|
||||
// if we're already sitting at the beginning of the text, return DONE
|
||||
if (fText == NULL || current() == fText->startIndex()) {
|
||||
fLastBreakTag = 0;
|
||||
fLastBreakTagValid = TRUE;
|
||||
fLastRuleStatusIndex = 0;
|
||||
fLastStatusIndexValid = TRUE;
|
||||
return BreakIterator::DONE;
|
||||
}
|
||||
|
||||
|
@ -433,7 +433,7 @@ int32_t RuleBasedBreakIterator::previous(void) {
|
|||
break;
|
||||
}
|
||||
lastResult = result;
|
||||
lastTag = fLastBreakTag;
|
||||
lastTag = fLastRuleStatusIndex;
|
||||
breakTagValid = TRUE;
|
||||
}
|
||||
|
||||
|
@ -447,8 +447,8 @@ int32_t RuleBasedBreakIterator::previous(void) {
|
|||
// set the current iteration position to be the last break position
|
||||
// before where we started, and then return that value
|
||||
fText->setIndex(lastResult);
|
||||
fLastBreakTag = lastTag; // for use by getRuleStatus()
|
||||
fLastBreakTagValid = breakTagValid;
|
||||
fLastRuleStatusIndex = lastTag; // for use by getRuleStatus()
|
||||
fLastStatusIndexValid = breakTagValid;
|
||||
return lastResult;
|
||||
}
|
||||
|
||||
|
@ -462,17 +462,13 @@ int32_t RuleBasedBreakIterator::following(int32_t offset) {
|
|||
// if the offset passed in is already past the end of the text,
|
||||
// just return DONE; if it's before the beginning, return the
|
||||
// text's starting offset
|
||||
fLastBreakTag = 0;
|
||||
fLastBreakTagValid = TRUE;
|
||||
fLastRuleStatusIndex = 0;
|
||||
fLastStatusIndexValid = TRUE;
|
||||
if (fText == NULL || offset >= fText->endIndex()) {
|
||||
// fText->setToEnd();
|
||||
// return BreakIterator::DONE;
|
||||
last();
|
||||
return next();
|
||||
}
|
||||
else if (offset < fText->startIndex()) {
|
||||
// fText->setToStart();
|
||||
// return fText->startIndex();
|
||||
return first();
|
||||
}
|
||||
|
||||
|
@ -552,6 +548,7 @@ int32_t RuleBasedBreakIterator::following(int32_t offset) {
|
|||
int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
|
||||
// if the offset passed in is already past the end of the text,
|
||||
// just return DONE; if it's before the beginning, return the
|
||||
|
||||
// text's starting offset
|
||||
if (fText == NULL || offset > fText->endIndex()) {
|
||||
// return BreakIterator::DONE;
|
||||
|
@ -679,11 +676,11 @@ int32_t RuleBasedBreakIterator::handleNext(const RBBIStateTable *statetable) {
|
|||
}
|
||||
|
||||
// No matter what, handleNext alway correctly sets the break tag value.
|
||||
fLastBreakTagValid = TRUE;
|
||||
fLastStatusIndexValid = TRUE;
|
||||
|
||||
// if we're already at the end of the text, return DONE.
|
||||
if (fText == NULL || fData == NULL || fText->hasNext() == FALSE) {
|
||||
fLastBreakTag = 0;
|
||||
fLastRuleStatusIndex = 0;
|
||||
return BreakIterator::DONE;
|
||||
}
|
||||
|
||||
|
@ -697,9 +694,9 @@ int32_t RuleBasedBreakIterator::handleNext(const RBBIStateTable *statetable) {
|
|||
UChar32 c = fText->current32();
|
||||
RBBIStateTableRow *row;
|
||||
int32_t lookaheadStatus = 0;
|
||||
int32_t lookaheadTag = 0;
|
||||
int32_t lookaheadTagIdx = 0;
|
||||
|
||||
fLastBreakTag = 0;
|
||||
fLastRuleStatusIndex = 0;
|
||||
|
||||
row = (RBBIStateTableRow *) // Point to starting row of state table.
|
||||
(statetable->fTableData + (statetable->fRowLen * state));
|
||||
|
@ -724,8 +721,8 @@ int32_t RuleBasedBreakIterator::handleNext(const RBBIStateTable *statetable) {
|
|||
// We ran off the end of the string with a pending look-ahead match.
|
||||
// Treat this as if the look-ahead condition had been met, and return
|
||||
// the match at the / position from the look-ahead rule.
|
||||
result = lookaheadResult;
|
||||
fLastBreakTag = lookaheadTag;
|
||||
result = lookaheadResult;
|
||||
fLastRuleStatusIndex = lookaheadTagIdx;
|
||||
lookaheadStatus = 0;
|
||||
} else if (result == initialPosition) {
|
||||
// Ran off end, no match found.
|
||||
|
@ -778,7 +775,7 @@ int32_t RuleBasedBreakIterator::handleNext(const RBBIStateTable *statetable) {
|
|||
// Match found, common case, could have lookahead so we move on to check it
|
||||
result = fText->getIndex();
|
||||
/// added
|
||||
fLastBreakTag = row->fTag; // Remember the break status (tag) value.
|
||||
fLastRuleStatusIndex = row->fTagIdx; // Remember the break status (tag) values.
|
||||
}
|
||||
|
||||
if (row->fLookAhead != 0) {
|
||||
|
@ -786,9 +783,9 @@ int32_t RuleBasedBreakIterator::handleNext(const RBBIStateTable *statetable) {
|
|||
&& row->fAccepting == lookaheadStatus) {
|
||||
// Lookahead match is completed. Set the result accordingly, but only
|
||||
// if no other rule has matched further in the mean time.
|
||||
result = lookaheadResult;
|
||||
fLastBreakTag = lookaheadTag;
|
||||
lookaheadStatus = 0;
|
||||
result = lookaheadResult;
|
||||
fLastRuleStatusIndex = lookaheadTagIdx;
|
||||
lookaheadStatus = 0;
|
||||
/// i think we have to back up to read the lookahead character again
|
||||
/// fText->setIndex(lookaheadResult);
|
||||
/// TODO: this is a simple hack since reverse rules only have simple
|
||||
|
@ -802,7 +799,7 @@ int32_t RuleBasedBreakIterator::handleNext(const RBBIStateTable *statetable) {
|
|||
int32_t r = fText->getIndex();
|
||||
lookaheadResult = r;
|
||||
lookaheadStatus = row->fLookAhead;
|
||||
lookaheadTag = row->fTag;
|
||||
lookaheadTagIdx = row->fTagIdx;
|
||||
goto continueOn;
|
||||
}
|
||||
|
||||
|
@ -856,7 +853,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(void) {
|
|||
int32_t result = fText->getIndex();
|
||||
int32_t lookaheadStatus = 0;
|
||||
int32_t lookaheadResult = 0;
|
||||
int32_t lookaheadTag = 0;
|
||||
int32_t lookaheadTagIdx = 0;
|
||||
UChar32 c = fText->current32();
|
||||
RBBIStateTableRow *row;
|
||||
|
||||
|
@ -927,7 +924,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(void) {
|
|||
if (r > result) {
|
||||
lookaheadResult = r;
|
||||
lookaheadStatus = row->fLookAhead;
|
||||
lookaheadTag = row->fTag;
|
||||
lookaheadTagIdx = row->fTagIdx;
|
||||
}
|
||||
goto continueOn;
|
||||
}
|
||||
|
@ -938,8 +935,8 @@ int32_t RuleBasedBreakIterator::handlePrevious(void) {
|
|||
if (lookaheadResult > result) {
|
||||
U_ASSERT(row->fAccepting == lookaheadStatus); // TODO: handle this case
|
||||
// of overlapping lookahead matches.
|
||||
result = lookaheadResult;
|
||||
fLastBreakTag = lookaheadTag;
|
||||
result = lookaheadResult;
|
||||
fLastRuleStatusIndex = lookaheadTagIdx;
|
||||
lookaheadStatus = 0;
|
||||
}
|
||||
goto continueOn;
|
||||
|
@ -986,7 +983,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
|
|||
}
|
||||
// break tag is no longer valid after icu switched to exact backwards
|
||||
// positioning.
|
||||
fLastBreakTagValid = FALSE;
|
||||
fLastStatusIndexValid = FALSE;
|
||||
if (statetable == NULL) {
|
||||
return fText->setToStart();
|
||||
}
|
||||
|
@ -1000,7 +997,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
|
|||
int32_t result = fText->getIndex();
|
||||
int32_t lookaheadStatus = 0;
|
||||
int32_t lookaheadResult = 0;
|
||||
int32_t lookaheadTag = 0;
|
||||
int32_t lookaheadTagIdx = 0;
|
||||
UBool lookAheadHardBreak = (statetable->fFlags & RBBI_LOOKAHEAD_HARD_BREAK) != 0;
|
||||
|
||||
RBBIStateTableRow *row;
|
||||
|
@ -1060,7 +1057,7 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
|
|||
// Match found, common case, could have lookahead so we move on to check it
|
||||
result = fText->getIndex();
|
||||
/// added
|
||||
fLastBreakTag = row->fTag; // Remember the break status (tag) value.
|
||||
fLastRuleStatusIndex = row->fTagIdx; // Remember the break status (tag) value.
|
||||
}
|
||||
|
||||
if (row->fLookAhead != 0) {
|
||||
|
@ -1068,9 +1065,9 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
|
|||
&& row->fAccepting == lookaheadStatus) {
|
||||
// Lookahead match is completed. Set the result accordingly, but only
|
||||
// if no other rule has matched further in the mean time.
|
||||
result = lookaheadResult;
|
||||
fLastBreakTag = lookaheadTag;
|
||||
lookaheadStatus = 0;
|
||||
result = lookaheadResult;
|
||||
fLastRuleStatusIndex = lookaheadTagIdx;
|
||||
lookaheadStatus = 0;
|
||||
/// i think we have to back up to read the lookahead character again
|
||||
/// fText->setIndex(lookaheadResult);
|
||||
/// TODO: this is a simple hack since reverse rules only have simple
|
||||
|
@ -1092,10 +1089,10 @@ int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable)
|
|||
goto continueOn;
|
||||
}
|
||||
|
||||
int32_t r = fText->getIndex();
|
||||
lookaheadResult = r;
|
||||
lookaheadStatus = row->fLookAhead;
|
||||
lookaheadTag = row->fTag;
|
||||
int32_t r = fText->getIndex();
|
||||
lookaheadResult = r;
|
||||
lookaheadStatus = row->fLookAhead;
|
||||
fLastRuleStatusIndex = row->fTagIdx;
|
||||
goto continueOn;
|
||||
}
|
||||
|
||||
|
@ -1148,19 +1145,18 @@ RuleBasedBreakIterator::reset()
|
|||
// status while doing the next().
|
||||
//
|
||||
//-------------------------------------------------------------------------------
|
||||
int32_t RuleBasedBreakIterator::getRuleStatus() const {
|
||||
RuleBasedBreakIterator *nonConstThis = (RuleBasedBreakIterator *)this;
|
||||
if (fLastBreakTagValid == FALSE) {
|
||||
void RuleBasedBreakIterator::makeRuleStatusValid() {
|
||||
if (fLastStatusIndexValid == FALSE) {
|
||||
// No cached status is available.
|
||||
if (fText == NULL || current() == fText->startIndex()) {
|
||||
// At start of text, or there is no text. Status is always zero.
|
||||
nonConstThis->fLastBreakTag = 0;
|
||||
nonConstThis->fLastBreakTagValid = TRUE;
|
||||
fLastRuleStatusIndex = 0;
|
||||
fLastStatusIndexValid = TRUE;
|
||||
} else {
|
||||
// Not at start of text. Find status the tedious way.
|
||||
int32_t pa = current();
|
||||
nonConstThis->previous();
|
||||
int32_t pb = nonConstThis->next();
|
||||
previous();
|
||||
int32_t pb = next();
|
||||
if (pa != pb) {
|
||||
// note: the if (pa != pb) test is here only to eliminate warnings for
|
||||
// unused local variables on gcc. Logically, it isn't needed.
|
||||
|
@ -1168,10 +1164,51 @@ int32_t RuleBasedBreakIterator::getRuleStatus() const {
|
|||
}
|
||||
}
|
||||
}
|
||||
return nonConstThis->fLastBreakTag;
|
||||
U_ASSERT(fLastStatusIndexValid == TRUE);
|
||||
U_ASSERT(fLastRuleStatusIndex >= 0 && fLastRuleStatusIndex < fData->fStatusMaxIdx);
|
||||
}
|
||||
|
||||
|
||||
int32_t RuleBasedBreakIterator::getRuleStatus() const {
|
||||
RuleBasedBreakIterator *nonConstThis = (RuleBasedBreakIterator *)this;
|
||||
nonConstThis->makeRuleStatusValid();
|
||||
|
||||
// fLastRuleStatusIndex indexes to the start of the appropriate status record
|
||||
// (the number of status values.)
|
||||
// This function returns the last (largest) of the array of status values.
|
||||
int32_t idx = fLastRuleStatusIndex + fData->fRuleStatusTable[fLastRuleStatusIndex];
|
||||
int32_t tagVal = fData->fRuleStatusTable[idx];
|
||||
|
||||
return tagVal;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int32_t RuleBasedBreakIterator::getRuleStatusVec(
|
||||
int32_t *fillInVec, int32_t capacity, UErrorCode &status)
|
||||
{
|
||||
if (U_FAILURE(status)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
RuleBasedBreakIterator *nonConstThis = (RuleBasedBreakIterator *)this;
|
||||
nonConstThis->makeRuleStatusValid();
|
||||
int32_t numVals = fData->fRuleStatusTable[fLastRuleStatusIndex];
|
||||
int32_t numValsToCopy = numVals;
|
||||
if (numVals > capacity) {
|
||||
status = U_STRING_NOT_TERMINATED_WARNING; // TODO: probably need a different warning
|
||||
numValsToCopy = capacity;
|
||||
}
|
||||
int i;
|
||||
for (i=0; i<numValsToCopy; i++) {
|
||||
fillInVec[i] = fData->fRuleStatusTable[fLastRuleStatusIndex + i + 1];
|
||||
}
|
||||
return numVals;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------
|
||||
//
|
||||
// getBinaryRules Access to the compiled form of the rules,
|
||||
|
|
|
@ -104,6 +104,10 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
|
|||
|
||||
fRuleSource = (UChar *)((char *)data + fHeader->fRuleSource);
|
||||
fRuleString.setTo(TRUE, fRuleSource, -1);
|
||||
U_ASSERT(data->fRuleSourceLen > 0);
|
||||
|
||||
fRuleStatusTable = (int32_t *)((char *)data + fHeader->fStatusTable);
|
||||
fStatusMaxIdx = data->fStatusTableLen / sizeof(int32_t);
|
||||
|
||||
fRefCount = 1;
|
||||
|
||||
|
@ -116,7 +120,7 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) {
|
|||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// Destructor. Don't call this - use removeReferenc() instead.
|
||||
// Destructor. Don't call this - use removeReference() instead.
|
||||
//
|
||||
//-----------------------------------------------------------------------------
|
||||
RBBIDataWrapper::~RBBIDataWrapper() {
|
||||
|
@ -202,7 +206,7 @@ void RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *tab
|
|||
|
||||
RBBIDebugPrintf(" %s\n", heading);
|
||||
|
||||
RBBIDebugPrintf("State | Acc LA Tag");
|
||||
RBBIDebugPrintf("State | Acc LA TagIx");
|
||||
for (c=0; c<fHeader->fCatCount; c++) {RBBIDebugPrintf("%3d ", c);}
|
||||
RBBIDebugPrintf("\n------|---------------"); for (c=0;c<fHeader->fCatCount; c++) {
|
||||
RBBIDebugPrintf("----");
|
||||
|
@ -216,7 +220,7 @@ void RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *tab
|
|||
for (s=0; s<table->fNumStates; s++) {
|
||||
RBBIStateTableRow *row = (RBBIStateTableRow *)
|
||||
(table->fTableData + (table->fRowLen * s));
|
||||
RBBIDebugPrintf("%4d | %3d %3d %3d ", s, row->fAccepting, row->fLookAhead, row->fTag);
|
||||
RBBIDebugPrintf("%4d | %3d %3d %3d ", s, row->fAccepting, row->fLookAhead, row->fTagIdx);
|
||||
for (c=0; c<fHeader->fCatCount; c++) {
|
||||
RBBIDebugPrintf("%3d ", row->fNextState[c]);
|
||||
}
|
||||
|
@ -247,6 +251,7 @@ void RBBIDataWrapper::printData() {
|
|||
}
|
||||
#endif
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
@ -403,6 +408,10 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD
|
|||
ds->swapArray16(ds, inBytes+ds->readUInt32(rbbiDH->fRuleSource), ds->readUInt32(rbbiDH->fRuleSourceLen),
|
||||
outBytes+ds->readUInt32(rbbiDH->fRuleSource), status);
|
||||
|
||||
// Table of rule status values. It's all int_32 values
|
||||
ds->swapArray32(ds, inBytes+ds->readUInt32(rbbiDH->fStatusTable), ds->readUInt32(rbbiDH->fStatusTableLen),
|
||||
outBytes+ds->readUInt32(rbbiDH->fStatusTable), status);
|
||||
|
||||
// And, last, the header. All 32 bit values.
|
||||
ds->swapArray32(ds, inBytes, sizeof(RBBIDataHeader), outBytes, status);
|
||||
|
||||
|
|
|
@ -68,42 +68,50 @@ struct RBBIDataHeader {
|
|||
/* All offsets are bytes from the start of the RBBIDataHeader. */
|
||||
/* All sizes are in bytes. */
|
||||
/* */
|
||||
uint32_t fFTable; /* forward state transition table. */
|
||||
uint32_t fFTable; /* forward state transition table. */
|
||||
uint32_t fFTableLen;
|
||||
uint32_t fRTable; /* Offset to the reverse state transition table. */
|
||||
uint32_t fRTable; /* Offset to the reverse state transition table. */
|
||||
uint32_t fRTableLen;
|
||||
uint32_t fSFTable; /* safe point forward transition table */
|
||||
uint32_t fSFTable; /* safe point forward transition table */
|
||||
uint32_t fSFTableLen;
|
||||
uint32_t fSRTable; /* safe point reverse transition table */
|
||||
uint32_t fSRTable; /* safe point reverse transition table */
|
||||
uint32_t fSRTableLen;
|
||||
uint32_t fTrie; /* Offset to Trie data for character categories */
|
||||
uint32_t fTrie; /* Offset to Trie data for character categories */
|
||||
uint32_t fTrieLen;
|
||||
uint32_t fRuleSource; /* Offset to the source for for the break */
|
||||
uint32_t fRuleSource; /* Offset to the source for for the break */
|
||||
uint32_t fRuleSourceLen; /* rules. Stored UChar *. */
|
||||
uint32_t fStatusTable; /* Offset to the table of rule status values */
|
||||
uint32_t fStatusTableLen;
|
||||
|
||||
uint32_t fReserved[8]; /* Reserved for expansion */
|
||||
uint32_t fReserved[6]; /* Reserved for expansion */
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
struct RBBIStateTableRow {
|
||||
int16_t fAccepting; /* Non-zero if this row is for an accepting state. */
|
||||
/* Value is the {nnn} value to return to calling */
|
||||
/* application. */
|
||||
int16_t fLookAhead; /* Non-zero if this row is for a state that */
|
||||
/* corresponds to a '/' in the rule source. */
|
||||
/* Value is the same as the fAccepting */
|
||||
/* value for the rule (which will appear */
|
||||
/* in a different state. */
|
||||
int16_t fTag; /* Non-zero if this row covers a {tagged} position */
|
||||
/* from a rule. value is the tag number. */
|
||||
int16_t fAccepting; /* Non-zero if this row is for an accepting state. */
|
||||
/* Value 0: not an accepting state. */
|
||||
/* -1: Unconditional Accepting state. */
|
||||
/* positive: Look-ahead match has completed. */
|
||||
/* Actual boundary position happened earlier */
|
||||
/* Value here == fLookAhead in earlier */
|
||||
/* state, at actual boundary pos. */
|
||||
int16_t fLookAhead; /* Non-zero if this row is for a state that */
|
||||
/* corresponds to a '/' in the rule source. */
|
||||
/* Value is the same as the fAccepting */
|
||||
/* value for the rule (which will appear */
|
||||
/* in a different state. */
|
||||
int16_t fTagIdx; /* Non-zero if this row covers a {tagged} position */
|
||||
/* from a rule. Value is the index in the */
|
||||
/* StatusTable of the set of matching */
|
||||
/* tags (rule status values) */
|
||||
int16_t fReserved;
|
||||
uint16_t fNextState[2]; /* Next State, indexed by char category. */
|
||||
/* Array Size is fNumCols from the */
|
||||
/* state table header. */
|
||||
/* CAUTION: see RBBITableBuilder::getTableSize() */
|
||||
/* before changing anything here. */
|
||||
uint16_t fNextState[2]; /* Next State, indexed by char category. */
|
||||
/* Array Size is fNumCols from the */
|
||||
/* state table header. */
|
||||
/* CAUTION: see RBBITableBuilder::getTableSize() */
|
||||
/* before changing anything here. */
|
||||
};
|
||||
|
||||
|
||||
|
@ -122,9 +130,9 @@ typedef enum {
|
|||
} RBBIStateTableFlags;
|
||||
|
||||
|
||||
/* */
|
||||
/* */
|
||||
/* The reference counting wrapper class */
|
||||
/* */
|
||||
/* */
|
||||
class RBBIDataWrapper : public UMemory {
|
||||
public:
|
||||
RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
|
||||
|
@ -145,15 +153,19 @@ public:
|
|||
#define printTable(heading, table)
|
||||
#endif
|
||||
|
||||
/* */
|
||||
/* */
|
||||
/* Pointers to items within the data */
|
||||
/* */
|
||||
/* */
|
||||
const RBBIDataHeader *fHeader;
|
||||
const RBBIStateTable *fForwardTable;
|
||||
const RBBIStateTable *fReverseTable;
|
||||
const RBBIStateTable *fSafeFwdTable;
|
||||
const RBBIStateTable *fSafeRevTable;
|
||||
const UChar *fRuleSource;
|
||||
const int32_t *fRuleStatusTable;
|
||||
|
||||
/* number of int32_t values in the rule status table. Used to sanity check indexing */
|
||||
int32_t fStatusMaxIdx;
|
||||
|
||||
UTrie fTrie;
|
||||
|
||||
|
|
|
@ -64,22 +64,27 @@ RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules,
|
|||
fReverseTables = NULL;
|
||||
fSafeFwdTables = NULL;
|
||||
fSafeRevTables = NULL;
|
||||
fRuleStatusVals = NULL;
|
||||
fChainRules = FALSE;
|
||||
fLBCMNoChain = FALSE;
|
||||
fLookAheadHardBreak = FALSE;
|
||||
fUSetNodes = NULL;
|
||||
fRuleStatusVals = NULL;
|
||||
fScanner = NULL;
|
||||
fSetBuilder = NULL;
|
||||
|
||||
UErrorCode oldstatus = status;
|
||||
|
||||
fUSetNodes = new UVector(status); // bcos status gets overwritten here
|
||||
fScanner = new RBBIRuleScanner(this);
|
||||
fSetBuilder = new RBBISetBuilder(this);
|
||||
if (U_FAILURE(oldstatus)) {
|
||||
status = oldstatus;
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
if(fSetBuilder == 0 || fScanner == 0 || fUSetNodes == 0) {
|
||||
|
||||
fUSetNodes = new UVector(status); // bcos status gets overwritten here
|
||||
fRuleStatusVals = new UVector(status);
|
||||
fScanner = new RBBIRuleScanner(this);
|
||||
fSetBuilder = new RBBISetBuilder(this);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
if(fSetBuilder == 0 || fScanner == 0 || fUSetNodes == 0 || fRuleStatusVals == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
|
@ -114,6 +119,7 @@ RBBIRuleBuilder::~RBBIRuleBuilder() {
|
|||
delete fSafeFwdTree;
|
||||
delete fSafeRevTree;
|
||||
delete fScanner;
|
||||
delete fRuleStatusVals;
|
||||
}
|
||||
|
||||
|
||||
|
@ -130,6 +136,8 @@ RBBIRuleBuilder::~RBBIRuleBuilder() {
|
|||
static int32_t align8(int32_t i) {return (i+7) & 0xfffffff8;}
|
||||
|
||||
RBBIDataHeader *RBBIRuleBuilder::flattenData() {
|
||||
int32_t i;
|
||||
|
||||
if (U_FAILURE(*fStatus)) {
|
||||
return NULL;
|
||||
}
|
||||
|
@ -148,10 +156,13 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
|
|||
int32_t safeFwdTableSize = align8(fSafeFwdTables->getTableSize());
|
||||
int32_t safeRevTableSize = align8(fSafeRevTables->getTableSize());
|
||||
int32_t trieSize = align8(fSetBuilder->getTrieSize());
|
||||
int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t));
|
||||
int32_t rulesSize = align8((strippedRules.length()+1) * sizeof(UChar));
|
||||
|
||||
int32_t totalSize = headerSize + forwardTableSize + reverseTableSize
|
||||
+ safeFwdTableSize + safeRevTableSize + trieSize + rulesSize;
|
||||
+ safeFwdTableSize + safeRevTableSize
|
||||
+ statusTableSize + trieSize + rulesSize;
|
||||
|
||||
RBBIDataHeader *data = (RBBIDataHeader *)uprv_malloc(totalSize);
|
||||
if (data == NULL) {
|
||||
*fStatus = U_MEMORY_ALLOCATION_ERROR;
|
||||
|
@ -176,7 +187,9 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
|
|||
|
||||
data->fTrie = data->fSRTable + safeRevTableSize;
|
||||
data->fTrieLen = fSetBuilder->getTrieSize();
|
||||
data->fRuleSource = data->fTrie + trieSize;
|
||||
data->fStatusTable = data->fTrie + trieSize;
|
||||
data->fStatusTableLen= statusTableSize;
|
||||
data->fRuleSource = data->fStatusTable + statusTableSize;
|
||||
data->fRuleSourceLen = strippedRules.length() * sizeof(UChar);
|
||||
|
||||
uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
|
||||
|
@ -186,6 +199,12 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
|
|||
fSafeFwdTables->exportTable((uint8_t *)data + data->fSFTable);
|
||||
fSafeRevTables->exportTable((uint8_t *)data + data->fSRTable);
|
||||
fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie);
|
||||
|
||||
int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable);
|
||||
for (i=0; i<fRuleStatusVals->size(); i++) {
|
||||
ruleStatusTable[i] = fRuleStatusVals->elementAti(i);
|
||||
}
|
||||
|
||||
strippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus);
|
||||
|
||||
return data;
|
||||
|
@ -251,6 +270,10 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (builder.fDebugEnv && uprv_strstr(builder.fDebugEnv, "states")) {
|
||||
builder.fForwardTables->printRuleStatusTable();
|
||||
}
|
||||
|
||||
//
|
||||
// Package up the compiled data into a memory image
|
||||
// in the run-time format.
|
||||
|
|
|
@ -153,6 +153,9 @@ public:
|
|||
RBBITableBuilder *fSafeFwdTables;
|
||||
RBBITableBuilder *fSafeRevTables;
|
||||
|
||||
UVector *fRuleStatusVals; // The values that can be returned
|
||||
// from getRuleStatus().
|
||||
|
||||
RBBIDataHeader *flattenData(); // Create the flattened (runtime format)
|
||||
// data tables..
|
||||
private:
|
||||
|
|
|
@ -133,8 +133,15 @@ void RBBITableBuilder::build() {
|
|||
flagAcceptingStates();
|
||||
flagLookAheadStates();
|
||||
flagTaggedStates();
|
||||
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "states")) {printStates();};
|
||||
|
||||
//
|
||||
// Update the global table of rule status {tag} values
|
||||
// The rule builder has a global vector of status values that are common
|
||||
// for all tables. Merge the ones from this table into the global set.
|
||||
//
|
||||
mergeRuleStatusVals();
|
||||
|
||||
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "states")) {printStates();};
|
||||
}
|
||||
|
||||
|
||||
|
@ -637,18 +644,142 @@ void RBBITableBuilder::flagTaggedStates() {
|
|||
}
|
||||
for (i=0; i<tagNodes.size(); i++) { // For each tag node t (all of 'em)
|
||||
tagNode = (RBBINode *)tagNodes.elementAt(i);
|
||||
|
||||
|
||||
for (n=0; n<fDStates->size(); n++) { // For each state s (row in the state table)
|
||||
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
|
||||
if (sd->fPositions->indexOf(tagNode) >= 0) { // if s include the tag node t
|
||||
if (sd->fTagVal < tagNode->fVal) {
|
||||
// If more than one rule tag applies to this state, the larger
|
||||
// tag takes precedence.
|
||||
sd->fTagVal = tagNode->fVal;
|
||||
sortedAdd(&sd->fTagVals, tagNode->fVal);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// mergeRuleStatusVals
|
||||
//
|
||||
// Update the global table of rule status {tag} values
|
||||
// The rule builder has a global vector of status values that are common
|
||||
// for all tables. Merge the ones from this table into the global set.
|
||||
//
|
||||
//-----------------------------------------------------------------------------
|
||||
void RBBITableBuilder::mergeRuleStatusVals() {
|
||||
//
|
||||
// The basic outline of what happens here is this...
|
||||
//
|
||||
// for each state in this state table
|
||||
// if the status tag list for this state is in the global statuses list
|
||||
// record where and
|
||||
// continue with the next state
|
||||
// else
|
||||
// add the tag list for this state to the global list.
|
||||
//
|
||||
int i;
|
||||
int n;
|
||||
|
||||
// Pre-set a single tag of {0} into the table.
|
||||
// We will need this as a default, for rule sets with no explicit tagging.
|
||||
if (fRB->fRuleStatusVals->size() == 0) {
|
||||
fRB->fRuleStatusVals->addElement(1, *fStatus); // Num of statuses in group
|
||||
fRB->fRuleStatusVals->addElement((int32_t)0, *fStatus); // and our single status of zero
|
||||
}
|
||||
|
||||
// For each state
|
||||
for (n=0; n<fDStates->size(); n++) {
|
||||
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
|
||||
UVector *thisStatesTagValues = sd->fTagVals;
|
||||
if (thisStatesTagValues == NULL) {
|
||||
// No tag values are explicitly associated with this state.
|
||||
// Set the default tag value.
|
||||
sd->fTagsIdx = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
// There are tag(s) associated with this state.
|
||||
// fTagsIdx will be the index into the global tag list for this state's tag values.
|
||||
// Initial value of -1 flags that we haven't got it set yet.
|
||||
sd->fTagsIdx = -1;
|
||||
int32_t thisTagGroupStart = 0; // indexes into the global rule status vals list
|
||||
int32_t nextTagGroupStart = 0;
|
||||
|
||||
// Loop runs once per group of tags in the global list
|
||||
while (nextTagGroupStart < fRB->fRuleStatusVals->size()) {
|
||||
thisTagGroupStart = nextTagGroupStart;
|
||||
nextTagGroupStart += fRB->fRuleStatusVals->elementAti(thisTagGroupStart) + 1;
|
||||
if (thisStatesTagValues->size() != fRB->fRuleStatusVals->elementAti(thisTagGroupStart)) {
|
||||
// The number of tags for this state is different from
|
||||
// the number of tags in this group from the global list.
|
||||
// Continue with the next group from the global list.
|
||||
continue;
|
||||
}
|
||||
// The lengths match, go ahead and compare the actual tag values
|
||||
// between this state and the group from the global list.
|
||||
for (i=0; i<thisStatesTagValues->size(); i++) {
|
||||
if (thisStatesTagValues->elementAti(i) !=
|
||||
fRB->fRuleStatusVals->elementAti(thisTagGroupStart + 1 + i) ) {
|
||||
// Mismatch.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == thisStatesTagValues->size()) {
|
||||
// We found a set of tag values in the global list that match
|
||||
// those for this state. Use them.
|
||||
sd->fTagsIdx = thisTagGroupStart;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (sd->fTagsIdx == -1) {
|
||||
// No suitable entry in the global tag list already. Add one
|
||||
sd->fTagsIdx = fRB->fRuleStatusVals->size();
|
||||
fRB->fRuleStatusVals->addElement(thisStatesTagValues->size(), *fStatus);
|
||||
for (i=0; i<thisStatesTagValues->size(); i++) {
|
||||
fRB->fRuleStatusVals->addElement(thisStatesTagValues->elementAti(i), *fStatus);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// sortedAdd Add a value to a vector of sorted values (ints).
|
||||
// Do not replicate entries; if the value is already there, do not
|
||||
// add a second one.
|
||||
// Lazily create the vector if it does not already exist.
|
||||
//
|
||||
//-----------------------------------------------------------------------------
|
||||
void RBBITableBuilder::sortedAdd(UVector **vector, int32_t val) {
|
||||
int32_t i;
|
||||
|
||||
if (*vector == NULL) {
|
||||
*vector = new UVector(*fStatus);
|
||||
}
|
||||
if (*vector == NULL || U_FAILURE(*fStatus)) {
|
||||
return;
|
||||
}
|
||||
UVector *vec = *vector;
|
||||
int32_t vSize = vec->size();
|
||||
for (i=0; i<vSize; i++) {
|
||||
int32_t valAtI = vec->elementAti(i);
|
||||
if (valAtI == val) {
|
||||
// The value is already in the vector. Don't add it again.
|
||||
return;
|
||||
}
|
||||
if (valAtI > val) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
vec->insertElementAt(val, i, *fStatus);
|
||||
}
|
||||
|
||||
|
||||
|
@ -678,6 +809,7 @@ void RBBITableBuilder::setAdd(UVector *dest, UVector *source) {
|
|||
}
|
||||
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// setEqual Set operation on UVector.
|
||||
|
@ -818,7 +950,7 @@ void RBBITableBuilder::exportTable(void *where) {
|
|||
U_ASSERT (-32768 < sd->fLookAhead && sd->fLookAhead <= 32767);
|
||||
row->fAccepting = (int16_t)sd->fAccepting;
|
||||
row->fLookAhead = (int16_t)sd->fLookAhead;
|
||||
row->fTag = (int16_t)sd->fTagVal;
|
||||
row->fTagIdx = (int16_t)sd->fTagsIdx;
|
||||
for (col=0; col<fRB->fSetBuilder->getNumCharCategories(); col++) {
|
||||
row->fNextState[col] = (uint16_t)sd->fDtran->elementAti(col);
|
||||
}
|
||||
|
@ -856,16 +988,20 @@ void RBBITableBuilder::printStates() {
|
|||
|
||||
RBBIDebugPrintf("state | i n p u t s y m b o l s \n");
|
||||
RBBIDebugPrintf(" | Acc LA Tag");
|
||||
for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {RBBIDebugPrintf(" %2d", c);};
|
||||
for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
|
||||
RBBIDebugPrintf(" %2d", c);
|
||||
}
|
||||
RBBIDebugPrintf("\n");
|
||||
RBBIDebugPrintf(" |---------------");
|
||||
for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {RBBIDebugPrintf("---");};
|
||||
for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
|
||||
RBBIDebugPrintf("---");
|
||||
}
|
||||
RBBIDebugPrintf("\n");
|
||||
|
||||
for (n=0; n<fDStates->size(); n++) {
|
||||
RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
|
||||
RBBIDebugPrintf(" %3d | " , n);
|
||||
RBBIDebugPrintf("%3d %3d %5d ", sd->fAccepting, sd->fLookAhead, sd->fTagVal);
|
||||
RBBIDebugPrintf("%3d %3d %5d ", sd->fAccepting, sd->fLookAhead, sd->fTagsIdx);
|
||||
for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
|
||||
RBBIDebugPrintf(" %2d", sd->fDtran->elementAti(c));
|
||||
}
|
||||
|
@ -877,6 +1013,33 @@ void RBBITableBuilder::printStates() {
|
|||
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// printRuleStatusTable Debug Function. Dump the common rule status table
|
||||
//
|
||||
//-----------------------------------------------------------------------------
|
||||
#ifdef RBBI_DEBUG
|
||||
void RBBITableBuilder::printRuleStatusTable() {
|
||||
int32_t thisRecord = 0;
|
||||
int32_t nextRecord = 0;
|
||||
int i;
|
||||
UVector *tbl = fRB->fRuleStatusVals;
|
||||
|
||||
RBBIDebugPrintf("index | tags \n");
|
||||
RBBIDebugPrintf("-------------------\n");
|
||||
|
||||
while (nextRecord < tbl->size()) {
|
||||
thisRecord = nextRecord;
|
||||
nextRecord = thisRecord + tbl->elementAti(thisRecord) + 1;
|
||||
RBBIDebugPrintf("%4d ", thisRecord);
|
||||
for (i=thisRecord+1; i<nextRecord; i++) {
|
||||
RBBIDebugPrintf(" %5d", tbl->elementAti(i));
|
||||
}
|
||||
RBBIDebugPrintf("\n");
|
||||
}
|
||||
RBBIDebugPrintf("\n\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
@ -890,19 +1053,15 @@ RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatu
|
|||
fMarked = FALSE;
|
||||
fAccepting = 0;
|
||||
fLookAhead = 0;
|
||||
fTagVal = 0;
|
||||
fTagsIdx = 0;
|
||||
fTagVals = NULL;
|
||||
fPositions = NULL;
|
||||
fDtran = NULL;
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
fDtran = new UVector(lastInputSymbol+1, status);
|
||||
fDtran = new UVector(lastInputSymbol+1, *fStatus);
|
||||
if (U_FAILURE(*fStatus)) {
|
||||
return;
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
*fStatus = status;
|
||||
return;
|
||||
}
|
||||
if (fDtran == NULL) {
|
||||
*fStatus = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
|
@ -917,8 +1076,10 @@ RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatu
|
|||
RBBIStateDescriptor::~RBBIStateDescriptor() {
|
||||
delete fPositions;
|
||||
delete fDtran;
|
||||
delete fTagVals;
|
||||
fPositions = NULL;
|
||||
fDtran = NULL;
|
||||
fTagVals = NULL;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -55,6 +55,7 @@ private:
|
|||
void flagAcceptingStates();
|
||||
void flagLookAheadStates();
|
||||
void flagTaggedStates();
|
||||
void mergeRuleStatusVals();
|
||||
|
||||
// Set functions for UVector.
|
||||
// TODO: make a USet subclass of UVector
|
||||
|
@ -62,14 +63,19 @@ private:
|
|||
void setAdd(UVector *dest, UVector *source);
|
||||
UBool setEquals(UVector *a, UVector *b);
|
||||
|
||||
void sortedAdd(UVector **dest, int32_t val);
|
||||
|
||||
public:
|
||||
#ifdef RBBI_DEBUG
|
||||
void printSet(UVector *s);
|
||||
void printPosSets(RBBINode *n /* = NULL*/);
|
||||
void printStates();
|
||||
void printRuleStatusTable();
|
||||
#else
|
||||
#define printSet(s)
|
||||
#define printPosSets(n)
|
||||
#define printStates()
|
||||
#define printRuleStatusTable()
|
||||
#endif
|
||||
|
||||
private:
|
||||
|
@ -95,7 +101,8 @@ public:
|
|||
UBool fMarked;
|
||||
int32_t fAccepting;
|
||||
int32_t fLookAhead;
|
||||
int32_t fTagVal;
|
||||
UVector *fTagVals;
|
||||
int32_t fTagsIdx;
|
||||
UVector *fPositions; // Set of parse tree positions associated
|
||||
// with this state. Unordered (it's a set).
|
||||
// UVector contents are RBBINode *
|
||||
|
|
|
@ -63,10 +63,10 @@ protected:
|
|||
*/
|
||||
RBBIDataWrapper *fData;
|
||||
|
||||
/** Rule {tag} value for the most recent match.
|
||||
/** Index of the Rule {tag} values for the most recent match.
|
||||
* @internal
|
||||
*/
|
||||
int32_t fLastBreakTag;
|
||||
int32_t fLastRuleStatusIndex;
|
||||
|
||||
/**
|
||||
* Rule tag value valid flag.
|
||||
|
@ -74,7 +74,7 @@ protected:
|
|||
* This flag lets us lazily compute the value if we are ever asked for it.
|
||||
* @internal
|
||||
*/
|
||||
UBool fLastBreakTagValid;
|
||||
UBool fLastStatusIndexValid;
|
||||
|
||||
/**
|
||||
* Counter for the number of characters encountered with the "dictionary"
|
||||
|
@ -349,6 +349,24 @@ public:
|
|||
*/
|
||||
virtual int32_t getRuleStatus() const;
|
||||
|
||||
/**
|
||||
* Get the statuses from the break rules that determined the most recently
|
||||
* returned break position. The values appear in the rule source
|
||||
* within brackets, {123}, for example. The default status value for rules
|
||||
* that do not explicitly provide one is zero.
|
||||
* <p>
|
||||
* For word break iterators, the possible values are defined in enum UWordBreak.
|
||||
* @param fillInVec an array to be filled in with the status values.
|
||||
* @param capacity the length of the supplied vector. A length of zero causes
|
||||
* the function to return the number of status values, in the
|
||||
* normal way, without attemtping to store any values.
|
||||
* @param status receives error codes.
|
||||
* @return The number of rule status values from rules that determined
|
||||
* the most recent boundary returned by the break iterator.
|
||||
* @draft ICU 3.0
|
||||
*/
|
||||
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Returns a unique class ID POLYMORPHICALLY. Pure virtual override.
|
||||
* This method is to implement a simple version of RTTI, since not all
|
||||
|
@ -496,6 +514,12 @@ private:
|
|||
* @internal
|
||||
*/
|
||||
int32_t handleNext(const RBBIStateTable *statetable);
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
void makeRuleStatusValid();
|
||||
|
||||
};
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
|
|
@ -521,6 +521,26 @@ ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
|
|||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_getRuleStatus(UBreakIterator *bi);
|
||||
|
||||
/**
|
||||
* Get the statuses from the break rules that determined the most recently
|
||||
* returned break position. The values appear in the rule source
|
||||
* within brackets, {123}, for example. The default status value for rules
|
||||
* that do not explicitly provide one is zero.
|
||||
* <p>
|
||||
* For word break iterators, the possible values are defined in enum UWordBreak.
|
||||
* @param bi The break iterator to use
|
||||
* @param fillInVec an array to be filled in with the status values.
|
||||
* @param capacity the length of the supplied vector. A length of zero causes
|
||||
* the function to return the number of status values, in the
|
||||
* normal way, without attemtping to store any values.
|
||||
* @param status receives error codes.
|
||||
* @return The number of rule status values from rules that determined
|
||||
* the most recent boundary returned by the break iterator.
|
||||
* @draft ICU 3.0
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Return the locale of the break iterator. You can choose between the valid and
|
||||
* the actual locale.
|
||||
|
|
|
@ -66,6 +66,9 @@ UVector::UVector(UObjectDeleter *d, UKeyComparator *c, int32_t initialCapacity,
|
|||
}
|
||||
|
||||
void UVector::_init(int32_t initialCapacity, UErrorCode &status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
// Fix bogus initialCapacity values; avoid malloc(0)
|
||||
if (initialCapacity < 1) {
|
||||
initialCapacity = DEFUALT_CAPACITY;
|
||||
|
|
Loading…
Add table
Reference in a new issue