mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-5170 move RBBI from CharacterIterator to UText
X-SVN-Rev: 19579
This commit is contained in:
parent
aca85b53cf
commit
9f85d5dd08
11 changed files with 588 additions and 595 deletions
|
@ -71,22 +71,23 @@ UnhandledEngine::handles(UChar32 c, int32_t breakType) const {
|
|||
}
|
||||
|
||||
int32_t
|
||||
UnhandledEngine::findBreaks( CharacterIterator *text,
|
||||
UnhandledEngine::findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UBool reverse,
|
||||
int32_t breakType,
|
||||
UStack &/*foundBreaks*/ ) const {
|
||||
if (breakType >= 0 && breakType < (int32_t)(sizeof(fHandled)/sizeof(fHandled[0]))) {
|
||||
UChar32 c = text->current32();
|
||||
UChar32 c = utext_current32(text);
|
||||
if (reverse) {
|
||||
while(text->getIndex() > startPos && fHandled[breakType]->contains(c)) {
|
||||
c = text->previous32();
|
||||
while((int32_t)utext_getNativeIndex(text) > startPos && fHandled[breakType]->contains(c)) {
|
||||
c = utext_previous32(text);
|
||||
}
|
||||
}
|
||||
else {
|
||||
while(text->getIndex() < endPos && fHandled[breakType]->contains(c)) {
|
||||
c = text->next32();
|
||||
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled[breakType]->contains(c)) {
|
||||
utext_next32(text); // TODO: recast loop to work with post-increment operations.
|
||||
c = utext_current32(text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -164,7 +165,6 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, int32_t breakType) {
|
|||
dictnlength = 0;
|
||||
status = U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
|
||||
if (U_SUCCESS(status) && dictfname) {
|
||||
UChar* extStart=u_strchr(dictfname, 0x002e);
|
||||
int len = 0;
|
||||
|
|
|
@ -10,10 +10,10 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/utext.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class CharacterIterator;
|
||||
class UnicodeSet;
|
||||
class UStack;
|
||||
|
||||
|
@ -58,7 +58,7 @@ class LanguageBreakEngine : public UMemory {
|
|||
/**
|
||||
* <p>Find any breaks within a run in the supplied text.</p>
|
||||
*
|
||||
* @param text A CharacterIterator representing the text (TODO: UText). The
|
||||
* @param text A UText representing the text. The
|
||||
* iterator is left at the end of the run of characters which the engine
|
||||
* is capable of handling.
|
||||
* @param startPos The start of the run within the supplied text.
|
||||
|
@ -69,7 +69,7 @@ class LanguageBreakEngine : public UMemory {
|
|||
* @param foundBreaks An allocated C array of the breaks found, if any
|
||||
* @return The number of breaks found.
|
||||
*/
|
||||
virtual int32_t findBreaks( CharacterIterator *text,
|
||||
virtual int32_t findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UBool reverse,
|
||||
|
@ -183,7 +183,7 @@ class UnhandledEngine : public LanguageBreakEngine {
|
|||
/**
|
||||
* <p>Find any breaks within a run in the supplied text.</p>
|
||||
*
|
||||
* @param text A CharacterIterator representing the text (TODO: UText). The
|
||||
* @param text A UText representing the text (TODO: UText). The
|
||||
* iterator is left at the end of the run of characters which the engine
|
||||
* is capable of handling.
|
||||
* @param startPos The start of the run within the supplied text.
|
||||
|
@ -194,7 +194,7 @@ class UnhandledEngine : public LanguageBreakEngine {
|
|||
* @param foundBreaks An allocated C array of the breaks found, if any
|
||||
* @return The number of breaks found.
|
||||
*/
|
||||
virtual int32_t findBreaks( CharacterIterator *text,
|
||||
virtual int32_t findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UBool reverse,
|
||||
|
|
|
@ -41,7 +41,7 @@ DictionaryBreakEngine::handles(UChar32 c, int32_t breakType) const {
|
|||
}
|
||||
|
||||
int32_t
|
||||
DictionaryBreakEngine::findBreaks( CharacterIterator *text,
|
||||
DictionaryBreakEngine::findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UBool reverse,
|
||||
|
@ -50,30 +50,31 @@ DictionaryBreakEngine::findBreaks( CharacterIterator *text,
|
|||
int32_t result = 0;
|
||||
|
||||
// Find the span of characters included in the set.
|
||||
int32_t start = text->getIndex();
|
||||
int32_t start = (int32_t)utext_getNativeIndex(text);
|
||||
int32_t current;
|
||||
int32_t rangeStart;
|
||||
int32_t rangeEnd;
|
||||
UChar32 c = text->current32();
|
||||
UChar32 c = utext_current32(text);
|
||||
if (reverse) {
|
||||
UBool isDict = fSet.contains(c);
|
||||
while((current = text->getIndex()) > startPos && isDict) {
|
||||
c = text->previous32();
|
||||
while((current = (int32_t)utext_getNativeIndex(text)) > startPos && isDict) {
|
||||
c = utext_previous32(text);
|
||||
isDict = fSet.contains(c);
|
||||
}
|
||||
rangeStart = (current < startPos) ? startPos : current+(isDict ? 0 : 1);
|
||||
rangeEnd = start + 1;
|
||||
}
|
||||
else {
|
||||
while((current = text->getIndex()) < endPos && fSet.contains(c)) {
|
||||
c = text->next32();
|
||||
while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) {
|
||||
utext_next32(text); // TODO: recast loop for postincrement
|
||||
c = utext_current32(text);
|
||||
}
|
||||
rangeStart = start;
|
||||
rangeEnd = current;
|
||||
}
|
||||
if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) {
|
||||
result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
|
||||
text->setIndex(current);
|
||||
utext_setNativeIndex(text, current);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@ -116,14 +117,14 @@ class PossibleWord {
|
|||
~PossibleWord();
|
||||
|
||||
// Fill the list of candidates if needed, select the longest, and return the number found
|
||||
int candidates( CharacterIterator *text, const TrieWordDictionary *dict, int32_t rangeEnd );
|
||||
int candidates( UText *text, const TrieWordDictionary *dict, int32_t rangeEnd );
|
||||
|
||||
// Select the currently marked candidate, point after it in the text, and invalidate self
|
||||
int32_t acceptMarked( CharacterIterator *text );
|
||||
int32_t acceptMarked( UText *text );
|
||||
|
||||
// Back up from the current candidate to the next shorter one; return TRUE if that exists
|
||||
// and point the text after it
|
||||
UBool backUp( CharacterIterator *text );
|
||||
UBool backUp( UText *text );
|
||||
|
||||
// Return the longest prefix this candidate location shares with a dictionary word
|
||||
int32_t longestPrefix();
|
||||
|
@ -142,19 +143,19 @@ PossibleWord::~PossibleWord() {
|
|||
}
|
||||
|
||||
inline int
|
||||
PossibleWord::candidates( CharacterIterator *text, const TrieWordDictionary *dict, int32_t rangeEnd ) {
|
||||
PossibleWord::candidates( UText *text, const TrieWordDictionary *dict, int32_t rangeEnd ) {
|
||||
// TODO: If getIndex is too slow, use offset < 0 and add discardAll()
|
||||
int32_t start = text->getIndex();
|
||||
int32_t start = (int32_t)utext_getNativeIndex(text);
|
||||
if (start != offset) {
|
||||
offset = start;
|
||||
prefix = dict->matches(text, rangeEnd-start, lengths, count, sizeof(lengths)/sizeof(lengths[0]));
|
||||
// Dictionary leaves text after longest prefix, not longest word. Back up.
|
||||
if (count <= 0) {
|
||||
text->setIndex(start);
|
||||
utext_setNativeIndex(text, start);
|
||||
}
|
||||
}
|
||||
if (count > 0) {
|
||||
text->setIndex(start+lengths[count-1]);
|
||||
utext_setNativeIndex(text, start+lengths[count-1]);
|
||||
}
|
||||
current = count-1;
|
||||
mark = current;
|
||||
|
@ -162,15 +163,15 @@ PossibleWord::candidates( CharacterIterator *text, const TrieWordDictionary *dic
|
|||
}
|
||||
|
||||
inline int32_t
|
||||
PossibleWord::acceptMarked( CharacterIterator *text ) {
|
||||
text->setIndex(offset + lengths[mark]);
|
||||
PossibleWord::acceptMarked( UText *text ) {
|
||||
utext_setNativeIndex(text, offset + lengths[mark]);
|
||||
return lengths[mark];
|
||||
}
|
||||
|
||||
inline UBool
|
||||
PossibleWord::backUp( CharacterIterator *text ) {
|
||||
PossibleWord::backUp( UText *text ) {
|
||||
if (current > 0) {
|
||||
text->setIndex(offset + lengths[--current]);
|
||||
utext_setNativeIndex(text, offset + lengths[--current]);
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
|
@ -231,7 +232,7 @@ ThaiBreakEngine::~ThaiBreakEngine() {
|
|||
}
|
||||
|
||||
int32_t
|
||||
ThaiBreakEngine::divideUpDictionaryRange( CharacterIterator *text,
|
||||
ThaiBreakEngine::divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UStack &foundBreaks ) const {
|
||||
|
@ -246,9 +247,9 @@ ThaiBreakEngine::divideUpDictionaryRange( CharacterIterator *text,
|
|||
PossibleWord words[THAI_LOOKAHEAD];
|
||||
UChar32 uc;
|
||||
|
||||
text->setIndex(rangeStart);
|
||||
utext_setNativeIndex(text, rangeStart);
|
||||
|
||||
while (U_SUCCESS(status) && (current = text->getIndex()) < rangeEnd) {
|
||||
while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
|
||||
wordLength = 0;
|
||||
|
||||
// Look for candidate words at the current position
|
||||
|
@ -263,7 +264,7 @@ ThaiBreakEngine::divideUpDictionaryRange( CharacterIterator *text,
|
|||
// If there was more than one, see which one can take us forward the most words
|
||||
else if (candidates > 1) {
|
||||
// If we're already at the end of the range, we're done
|
||||
if (text->getIndex() >= rangeEnd) {
|
||||
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
|
||||
goto foundBest;
|
||||
}
|
||||
do {
|
||||
|
@ -276,7 +277,7 @@ ThaiBreakEngine::divideUpDictionaryRange( CharacterIterator *text,
|
|||
}
|
||||
|
||||
// If we're already at the end of the range, we're done
|
||||
if (text->getIndex() >= rangeEnd) {
|
||||
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
|
||||
goto foundBest;
|
||||
}
|
||||
|
||||
|
@ -302,7 +303,7 @@ foundBest:
|
|||
// just found (if there is one), but only if the preceding word does not exceed
|
||||
// the threshold.
|
||||
// The text iterator should now be positioned at the end of the word we found.
|
||||
if (text->getIndex() < rangeEnd && wordLength < THAI_ROOT_COMBINE_THRESHOLD) {
|
||||
if ((int32_t)utext_getNativeIndex(text) < rangeEnd && wordLength < THAI_ROOT_COMBINE_THRESHOLD) {
|
||||
// if it is a dictionary word, do nothing. If it isn't, then if there is
|
||||
// no preceding word, or the non-word shares less than the minimum threshold
|
||||
// of characters with a dictionary word, then scan to resynchronize
|
||||
|
@ -312,10 +313,11 @@ foundBest:
|
|||
// Look for a plausible word boundary
|
||||
//TODO: This section will need a rework for UText.
|
||||
int32_t remaining = rangeEnd - (current+wordLength);
|
||||
UChar32 pc = text->current32();
|
||||
UChar32 pc = utext_current32(text);
|
||||
int32_t chars = 0;
|
||||
while (TRUE) {
|
||||
uc = text->next32();
|
||||
utext_next32(text);
|
||||
uc = utext_current32(text);
|
||||
// TODO: Here we're counting on the fact that the SA languages are all
|
||||
// in the BMP. This should get fixed with the UText rework.
|
||||
chars += 1;
|
||||
|
@ -329,7 +331,7 @@ foundBest:
|
|||
// checking the dictionary. That is just a performance filter,
|
||||
// but it's not clear it's faster than checking the trie.
|
||||
int candidates = words[(wordsFound+1)%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
|
||||
text->setIndex(current+wordLength+chars);
|
||||
utext_setNativeIndex(text, current+wordLength+chars);
|
||||
if (candidates > 0) {
|
||||
break;
|
||||
}
|
||||
|
@ -347,49 +349,52 @@ foundBest:
|
|||
}
|
||||
else {
|
||||
// Back up to where we were for next iteration
|
||||
text->setIndex(current+wordLength);
|
||||
utext_setNativeIndex(text, current+wordLength);
|
||||
}
|
||||
}
|
||||
|
||||
// Never stop before a combining mark.
|
||||
int32_t currPos;
|
||||
while ((currPos = text->getIndex()) < rangeEnd && fMarkSet.contains(text->current32())) {
|
||||
wordLength += text->move32(1, CharacterIterator::kCurrent) - currPos;
|
||||
while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
|
||||
utext_next32(text);
|
||||
wordLength += (int32_t)utext_getNativeIndex(text) - currPos;
|
||||
}
|
||||
|
||||
// Look ahead for possible suffixes if a dictionary word does not follow.
|
||||
// We do this in code rather than using a rule so that the heuristic
|
||||
// resynch continues to function. For example, one of the suffix characters
|
||||
// could be a typo in the middle of a word.
|
||||
if (text->getIndex() < rangeEnd && wordLength > 0) {
|
||||
if ((int32_t)utext_getNativeIndex(text) < rangeEnd && wordLength > 0) {
|
||||
if (words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
|
||||
&& fSuffixSet.contains(uc = text->current32())) {
|
||||
&& fSuffixSet.contains(uc = utext_current32(text))) {
|
||||
if (uc == THAI_PAIYANNOI) {
|
||||
if (!fSuffixSet.contains(text->previous32())) {
|
||||
if (!fSuffixSet.contains(utext_previous32(text))) {
|
||||
// Skip over previous end and PAIYANNOI
|
||||
text->move32(2, CharacterIterator::kCurrent);
|
||||
utext_next32(text);
|
||||
utext_next32(text);
|
||||
wordLength += 1; // Add PAIYANNOI to word
|
||||
uc = text->current32(); // Fetch next character
|
||||
uc = utext_current32(text); // Fetch next character
|
||||
}
|
||||
else {
|
||||
// Restore prior position
|
||||
text->move32(1, CharacterIterator::kCurrent);
|
||||
utext_next32(text);
|
||||
}
|
||||
}
|
||||
if (uc == THAI_MAIYAMOK) {
|
||||
if (text->previous32() != THAI_MAIYAMOK) {
|
||||
if (utext_previous32(text) != THAI_MAIYAMOK) {
|
||||
// Skip over previous end and MAIYAMOK
|
||||
text->move32(2, CharacterIterator::kCurrent);
|
||||
utext_next32(text);
|
||||
utext_next32(text);
|
||||
wordLength += 1; // Add MAIYAMOK to word
|
||||
}
|
||||
else {
|
||||
// Restore prior position
|
||||
text->move32(1, CharacterIterator::kCurrent);
|
||||
utext_next32(text);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
text->setIndex(current+wordLength);
|
||||
utext_setNativeIndex(text, current+wordLength);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -10,11 +10,12 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/utext.h"
|
||||
|
||||
#include "brkeng.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class CharacterIterator;
|
||||
class TrieWordDictionary;
|
||||
|
||||
/*******************************************************************
|
||||
|
@ -78,7 +79,7 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
|
|||
/**
|
||||
* <p>Find any breaks within a run in the supplied text.</p>
|
||||
*
|
||||
* @param text A CharacterIterator representing the text (TODO: UText). The
|
||||
* @param text A UText representing the text. The
|
||||
* iterator is left at the end of the run of characters which the engine
|
||||
* is capable of handling.
|
||||
* @param startPos The start of the run within the supplied text.
|
||||
|
@ -89,7 +90,7 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
|
|||
* @param foundBreaks An allocated C array of the breaks found, if any
|
||||
* @return The number of breaks found.
|
||||
*/
|
||||
virtual int32_t findBreaks( CharacterIterator *text,
|
||||
virtual int32_t findBreaks( UText *text,
|
||||
int32_t startPos,
|
||||
int32_t endPos,
|
||||
UBool reverse,
|
||||
|
@ -115,13 +116,13 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
|
|||
/**
|
||||
* <p>Divide up a range of known dictionary characters.</p>
|
||||
*
|
||||
* @param text A CharacterIterator representing the text
|
||||
* @param text A UText representing the text
|
||||
* @param rangeStart The start of the range of dictionary characters
|
||||
* @param rangeEnd The end of the range of dictionary characters
|
||||
* @param foundBreaks Output of C array of int32_t break positions, or 0
|
||||
* @return The number of breaks found
|
||||
*/
|
||||
virtual int32_t divideUpDictionaryRange( CharacterIterator *text,
|
||||
virtual int32_t divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UStack &foundBreaks ) const = 0;
|
||||
|
@ -172,13 +173,13 @@ class ThaiBreakEngine : public DictionaryBreakEngine {
|
|||
/**
|
||||
* <p>Divide up a range of known dictionary characters.</p>
|
||||
*
|
||||
* @param text A CharacterIterator representing the text
|
||||
* @param text A UText representing the text
|
||||
* @param rangeStart The start of the range of dictionary characters
|
||||
* @param rangeEnd The end of the range of dictionary characters
|
||||
* @param foundBreaks Output of C array of int32_t break positions, or 0
|
||||
* @return The number of breaks found
|
||||
*/
|
||||
virtual int32_t divideUpDictionaryRange( CharacterIterator *text,
|
||||
virtual int32_t divideUpDictionaryRange( UText *text,
|
||||
int32_t rangeStart,
|
||||
int32_t rangeEnd,
|
||||
UStack &foundBreaks ) const;
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -87,27 +87,27 @@ MutableTrieDictionary::MutableTrieDictionary( UChar median, UErrorCode &status )
|
|||
if (fTrie == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
fIter = new UCharCharacterIterator(0, 0);
|
||||
if (fIter == NULL) {
|
||||
fIter = utext_openUChars(NULL, NULL, 0, &status);
|
||||
if (U_SUCCESS(status) && fIter == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
MutableTrieDictionary::MutableTrieDictionary( UErrorCode &status ) {
|
||||
fTrie = NULL;
|
||||
fIter = new UCharCharacterIterator(NULL, 0);
|
||||
if (fIter == NULL) {
|
||||
fIter = utext_openUChars(NULL, NULL, 0, &status);
|
||||
if (U_SUCCESS(status) && fIter == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
MutableTrieDictionary::~MutableTrieDictionary() {
|
||||
delete fTrie;
|
||||
delete fIter;
|
||||
utext_close(fIter);
|
||||
}
|
||||
|
||||
int32_t
|
||||
MutableTrieDictionary::search( CharacterIterator *text,
|
||||
MutableTrieDictionary::search( UText *text,
|
||||
int32_t maxLength,
|
||||
int32_t *lengths,
|
||||
int &count,
|
||||
|
@ -121,7 +121,7 @@ MutableTrieDictionary::search( CharacterIterator *text,
|
|||
pMatched = TRUE;
|
||||
int i;
|
||||
|
||||
UChar uc = text->current();
|
||||
UChar uc = utext_current32(text);
|
||||
for (i = 0; i < maxLength && p != NULL; ++i) {
|
||||
while (p != NULL) {
|
||||
if (uc < p->ch) {
|
||||
|
@ -147,7 +147,8 @@ MutableTrieDictionary::search( CharacterIterator *text,
|
|||
}
|
||||
up = p;
|
||||
p = p->equal;
|
||||
uc = text->next();
|
||||
uc = utext_next32(text);
|
||||
uc = utext_current32(text);
|
||||
}
|
||||
|
||||
// Note that there is no way to reach here with up == 0 unless
|
||||
|
@ -170,13 +171,14 @@ MutableTrieDictionary::addWord( const UChar *word,
|
|||
TernaryNode *parent;
|
||||
UBool pMatched;
|
||||
int count;
|
||||
fIter->setText(word, length);
|
||||
fIter = utext_openUChars(fIter, word, length, &status);
|
||||
|
||||
int matched;
|
||||
matched = search(fIter, length, NULL, count, 0, parent, pMatched);
|
||||
|
||||
while (matched++ < length) {
|
||||
UChar uc = fIter->nextPostInc();
|
||||
UChar uc = utext_next32(fIter); // TODO: supplemetary support?
|
||||
U_ASSERT(uc != U_SENTINEL);
|
||||
TernaryNode *newNode = new TernaryNode(uc);
|
||||
if (newNode == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
|
@ -211,7 +213,7 @@ MutableTrieDictionary::addWords( UEnumeration *words,
|
|||
}
|
||||
|
||||
int32_t
|
||||
MutableTrieDictionary::matches( CharacterIterator *text,
|
||||
MutableTrieDictionary::matches( UText *text,
|
||||
int32_t maxLength,
|
||||
int32_t *lengths,
|
||||
int &count,
|
||||
|
@ -413,8 +415,7 @@ CompactTrieDictionary::CompactTrieDictionary(UDataMemory *dataObj,
|
|||
fData = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
CompactTrieDictionary::CompactTrieDictionary(const void *data,
|
||||
CompactTrieDictionary::CompactTrieDictionary( const void *data,
|
||||
UErrorCode &status )
|
||||
: fUData(NULL)
|
||||
{
|
||||
|
@ -460,7 +461,7 @@ getCompactNode(const CompactTrieHeader *header, uint16_t node) {
|
|||
}
|
||||
|
||||
int32_t
|
||||
CompactTrieDictionary::matches( CharacterIterator *text,
|
||||
CompactTrieDictionary::matches( UText *text,
|
||||
int32_t maxLength,
|
||||
int32_t *lengths,
|
||||
int &count,
|
||||
|
@ -469,7 +470,7 @@ CompactTrieDictionary::matches( CharacterIterator *text,
|
|||
const CompactTrieNode *node = getCompactNode(fData, fData->root);
|
||||
int mycount = 0;
|
||||
|
||||
UChar uc = text->current();
|
||||
UChar uc = utext_current32(text);
|
||||
int i = 0;
|
||||
|
||||
while (node != NULL) {
|
||||
|
@ -498,7 +499,8 @@ CompactTrieDictionary::matches( CharacterIterator *text,
|
|||
// We hit a non-equal character; return
|
||||
goto exit;
|
||||
}
|
||||
uc = text->next();
|
||||
utext_next32(text);
|
||||
uc = utext_current32(text);
|
||||
++i;
|
||||
}
|
||||
// To get here we must have come through the whole list successfully;
|
||||
|
@ -518,7 +520,8 @@ CompactTrieDictionary::matches( CharacterIterator *text,
|
|||
if (uc == hnode->entries[middle].ch) {
|
||||
// We hit a match; get the next node and next character
|
||||
node = getCompactNode(fData, hnode->entries[middle].equal);
|
||||
uc = text->next();
|
||||
utext_next32(text);
|
||||
uc = utext_current32(text);
|
||||
++i;
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/utext.h"
|
||||
|
||||
struct UEnumeration;
|
||||
struct UDataSwapper;
|
||||
|
@ -45,8 +46,6 @@ triedict_swap(const UDataSwapper *ds,
|
|||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class CharacterIterator;
|
||||
class UCharCharacterIterator;
|
||||
class StringEnumeration;
|
||||
struct CompactTrieHeader;
|
||||
|
||||
|
@ -76,7 +75,7 @@ class U_COMMON_API TrieWordDictionary : public UMemory {
|
|||
/**
|
||||
* <p>Find dictionary words that match the text.</p>
|
||||
*
|
||||
* @param text A CharacterIterator representing the text (TODO: UText). The
|
||||
* @param text A UText representing the text. The
|
||||
* iterator is left after the longest prefix match in the dictionary.
|
||||
* @param start The current position in text.
|
||||
* @param maxLength The maximum number of code units to match.
|
||||
|
@ -85,7 +84,7 @@ class U_COMMON_API TrieWordDictionary : public UMemory {
|
|||
* @param limit The size of the lengths array; this limits the number of words output.
|
||||
* @return The number of characters in text that were matched.
|
||||
*/
|
||||
virtual int32_t matches( CharacterIterator *text,
|
||||
virtual int32_t matches( UText *text,
|
||||
int32_t maxLength,
|
||||
int32_t *lengths,
|
||||
int &count,
|
||||
|
@ -123,11 +122,11 @@ class U_COMMON_API MutableTrieDictionary : public TrieWordDictionary {
|
|||
TernaryNode *fTrie;
|
||||
|
||||
/**
|
||||
* A UCharCharacterIterator for internal use
|
||||
* A UText for internal use
|
||||
* @internal
|
||||
*/
|
||||
|
||||
UCharCharacterIterator *fIter;
|
||||
UText *fIter;
|
||||
|
||||
friend class CompactTrieDictionary; // For fast conversion
|
||||
|
||||
|
@ -150,7 +149,7 @@ class U_COMMON_API MutableTrieDictionary : public TrieWordDictionary {
|
|||
/**
|
||||
* <p>Find dictionary words that match the text.</p>
|
||||
*
|
||||
* @param text A CharacterIterator representing the text (TODO: UText). The
|
||||
* @param text A UText representing the text. The
|
||||
* iterator is left after the longest prefix match in the dictionary.
|
||||
* @param maxLength The maximum number of code units to match.
|
||||
* @param lengths An array that is filled with the lengths of words that matched.
|
||||
|
@ -158,7 +157,7 @@ class U_COMMON_API MutableTrieDictionary : public TrieWordDictionary {
|
|||
* @param limit The size of the lengths array; this limits the number of words output.
|
||||
* @return The number of characters in text that were matched.
|
||||
*/
|
||||
virtual int32_t matches( CharacterIterator *text,
|
||||
virtual int32_t matches( UText *text,
|
||||
int32_t maxLength,
|
||||
int32_t *lengths,
|
||||
int &count,
|
||||
|
@ -196,7 +195,7 @@ protected:
|
|||
/**
|
||||
* <p>Search the dictionary for matches.</p>
|
||||
*
|
||||
* @param text A CharacterIterator representing the text (TODO: UText). The
|
||||
* @param text A UText representing the text. The
|
||||
* iterator is left after the longest prefix match in the dictionary.
|
||||
* @param maxLength The maximum number of code units to match.
|
||||
* @param lengths An array that is filled with the lengths of words that matched.
|
||||
|
@ -206,7 +205,7 @@ protected:
|
|||
* @param pMatched The returned parent node matched the input
|
||||
* @return The number of characters in text that were matched.
|
||||
*/
|
||||
virtual int32_t search( CharacterIterator *text,
|
||||
virtual int32_t search( UText *text,
|
||||
int32_t maxLength,
|
||||
int32_t *lengths,
|
||||
int &count,
|
||||
|
@ -232,21 +231,21 @@ private:
|
|||
* to save space.</p>
|
||||
*/
|
||||
class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary {
|
||||
private:
|
||||
private:
|
||||
/**
|
||||
* The root node of the trie
|
||||
*/
|
||||
const CompactTrieHeader *fData;
|
||||
|
||||
const CompactTrieHeader *fData;
|
||||
|
||||
/**
|
||||
* A UBool indicating whether or not we own the fData.
|
||||
*/
|
||||
UBool fOwnData;
|
||||
|
||||
UBool fOwnData;
|
||||
|
||||
UDataMemory *fUData;
|
||||
public:
|
||||
|
||||
public:
|
||||
/**
|
||||
* <p>Construct a dictionary from a UDataMemory.</p>
|
||||
*
|
||||
|
@ -279,7 +278,7 @@ public:
|
|||
/**
|
||||
* <p>Find dictionary words that match the text.</p>
|
||||
*
|
||||
* @param text A CharacterIterator representing the text (TODO: UText). The
|
||||
* @param text A UText representing the text. The
|
||||
* iterator is left after the longest prefix match in the dictionary.
|
||||
* @param maxLength The maximum number of code units to match.
|
||||
* @param lengths An array that is filled with the lengths of words that matched.
|
||||
|
@ -287,7 +286,7 @@ public:
|
|||
* @param limit The size of the lengths array; this limits the number of words output.
|
||||
* @return The number of characters in text that were matched.
|
||||
*/
|
||||
virtual int32_t matches( CharacterIterator *text,
|
||||
virtual int32_t matches( UText *text,
|
||||
int32_t rangeEnd,
|
||||
int32_t *lengths,
|
||||
int &count,
|
||||
|
|
|
@ -144,8 +144,6 @@ public:
|
|||
|
||||
/**
|
||||
* Return a CharacterIterator over the text being analyzed.
|
||||
* Changing the state of the returned iterator can have undefined consequences
|
||||
* on the operation of the break iterator. If you need to change it, clone it first.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual const CharacterIterator& getText(void) const = 0;
|
||||
|
@ -193,6 +191,8 @@ public:
|
|||
/**
|
||||
* Change the text over which this operates. The text boundary is
|
||||
* reset to the start.
|
||||
* Note that setText(UText *) provides similar functionality to this function,
|
||||
* and is more efficient.
|
||||
* @param it The CharacterIterator used to change the text.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
|
|
@ -63,10 +63,17 @@ class U_COMMON_API RuleBasedBreakIterator : public BreakIterator {
|
|||
|
||||
protected:
|
||||
/**
|
||||
* The character iterator through which this BreakIterator accesses the text
|
||||
* The UText through which this BreakIterator accesses the text
|
||||
* @internal
|
||||
*/
|
||||
CharacterIterator* fText;
|
||||
UText *fText;
|
||||
|
||||
/**
|
||||
* A character iterator that refers to the same text as the UText, above.
|
||||
* Lazily created when requested by a caller.
|
||||
* Only included for compatibility with old API, which was based on CharacterIterators.
|
||||
*/
|
||||
CharacterIterator *fCharIter;
|
||||
|
||||
/**
|
||||
* The rule data for this BreakIterator instance
|
||||
|
@ -280,14 +287,27 @@ public:
|
|||
//=======================================================================
|
||||
|
||||
/**
|
||||
* Return a CharacterIterator over the text being analyzed. This version
|
||||
* of this method returns the actual CharacterIterator we're using internally.
|
||||
* Changing the state of this iterator can have undefined consequences. If
|
||||
* you need to change it, clone it first.
|
||||
* Return a CharacterIterator over the text being analyzed.
|
||||
* The returned character iterator is owned by the break iterator, and must
|
||||
* not be deleted by the caller. Repeated calls to this function may
|
||||
* return the same CharacterIterator.
|
||||
* <p/>
|
||||
* The returned character iterator must not be used concurrently with
|
||||
* the break iterator. If concurrent operation is needed, clone the
|
||||
* returned character iterator first and operate on the clone.
|
||||
* <p/>
|
||||
* This function is not thread safe. Two threads must not make concurrent
|
||||
* calls to BreakIterator::getText(). This is an exception to the general
|
||||
* rules for thread safety in ICU, which are that const functions are
|
||||
* thread safe.
|
||||
* <p/>
|
||||
* The function getUText() provides similar functionality, and is more efficient.
|
||||
* TODO: deprecate this function?
|
||||
*
|
||||
* @return An iterator over the text being analyzed.
|
||||
* @stable ICU 2.0
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual const CharacterIterator& getText(void) const;
|
||||
virtual CharacterIterator& getText(void) const;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -340,7 +360,6 @@ public:
|
|||
|
||||
/**
|
||||
* Sets the current iteration position to the beginning of the text.
|
||||
* (i.e., the CharacterIterator's starting offset).
|
||||
* @return The offset of the beginning of the text.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
@ -348,7 +367,6 @@ public:
|
|||
|
||||
/**
|
||||
* Sets the current iteration position to the end of the text.
|
||||
* (i.e., the CharacterIterator's ending offset).
|
||||
* @return The text's past-the-end offset.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
|
|
@ -260,8 +260,10 @@ void RBBIAPITest::TestGetSetAdoptText()
|
|||
CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str"
|
||||
|
||||
wordIter1->setText(str1);
|
||||
if(wordIter1->getText() != *text1)
|
||||
errln((UnicodeString)"ERROR:1 error in setText or getText ");
|
||||
CharacterIterator *tci = &wordIter1->getText();
|
||||
UnicodeString tstr;
|
||||
tci->getText(tstr);
|
||||
TEST_ASSERT(tstr == str1);
|
||||
if(wordIter1->current() != 0)
|
||||
errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
|
||||
|
||||
|
@ -273,9 +275,14 @@ void RBBIAPITest::TestGetSetAdoptText()
|
|||
|
||||
|
||||
charIter1->adoptText(text1Clone);
|
||||
if( wordIter1->getText() == charIter1->getText() ||
|
||||
wordIter1->getText() != *text2 || charIter1->getText() != *text1 )
|
||||
errln((UnicodeString)"ERROR:2 error is getText or setText()");
|
||||
TEST_ASSERT(wordIter1->getText() != charIter1->getText());
|
||||
tci = &wordIter1->getText();
|
||||
tci->getText(tstr);
|
||||
TEST_ASSERT(tstr == str2);
|
||||
tci = &charIter1->getText();
|
||||
tci->getText(tstr);
|
||||
TEST_ASSERT(tstr == str1);
|
||||
|
||||
|
||||
RuleBasedBreakIterator* rb=(RuleBasedBreakIterator*)wordIter1->clone();
|
||||
rb->adoptText(text1);
|
||||
|
@ -286,13 +293,17 @@ void RBBIAPITest::TestGetSetAdoptText()
|
|||
errln((UnicodeString)"ERROR:2 error in adoptText ");
|
||||
|
||||
// Adopt where iterator range is less than the entire orignal source string.
|
||||
// (With the change of the break engine to working with UText internally,
|
||||
// CharacterIterators starting at positions other than zero are not supported)
|
||||
rb->adoptText(text3);
|
||||
if(rb->preceding(2) != 3) {
|
||||
errln((UnicodeString)"ERROR:3 error in adoptText ");
|
||||
}
|
||||
if(rb->following(11) != BreakIterator::DONE) {
|
||||
errln((UnicodeString)"ERROR:4 error in adoptText ");
|
||||
}
|
||||
TEST_ASSERT(rb->preceding(2) == 0);
|
||||
TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
|
||||
//if(rb->preceding(2) != 3) {
|
||||
// errln((UnicodeString)"ERROR:3 error in adoptText ");
|
||||
//}
|
||||
//if(rb->following(11) != BreakIterator::DONE) {
|
||||
// errln((UnicodeString)"ERROR:4 error in adoptText ");
|
||||
//}
|
||||
|
||||
// UText API
|
||||
//
|
||||
|
@ -344,7 +355,8 @@ void RBBIAPITest::TestGetSetAdoptText()
|
|||
TEST_ASSERT(pos==UBRK_DONE);
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
UText *gut2 = utext_openUnicodeString(NULL,NULL,&status);
|
||||
UnicodeString sEmpty;
|
||||
UText *gut2 = utext_openUnicodeString(NULL, &sEmpty, &status);
|
||||
wordIter1->getUText(gut2, status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
utext_close(gut2);
|
||||
|
|
|
@ -412,64 +412,7 @@ void RBBITest::TestMixedThaiLineBreak()
|
|||
|
||||
// @suwit - end of changes
|
||||
|
||||
// Arabic numerals should always be separated from surrounding Thai text
|
||||
/*
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e04\\u0e48\\u0e32", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e07\\u0e34\\u0e19", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e1a\\u0e32\\u0e17", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e41\\u0e15\\u0e30", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e23\\u0e30\\u0e14\\u0e31\\u0e1a", 0, status);
|
||||
thaiLineSelection->addElement("39");
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e1a\\u0e32\\u0e17 ", 0, status);
|
||||
|
||||
// words in non-Thai scripts should always be separated from surrounding Thai text
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e17\\u0e14", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e2a\\u0e2d\\u0e1a", 0, status);
|
||||
thaiLineSelection->addElement("Java");
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e1a\\u0e19", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e04\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e44\\u0e2d\\u0e1a\\u0e35\\u0e40\\u0e2d\\u0e47\\u0e21 ", 0, status);
|
||||
|
||||
// Thai numerals should always be separated from the text surrounding them
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e04\\u0e48\\u0e32", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e07\\u0e34\\u0e19", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e1a\\u0e32\\u0e17", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e41\\u0e15\\u0e30", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e23\\u0e30\\u0e14\\u0e31\\u0e1a", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e53\\u0e59", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e1a\\u0e32\\u0e17 ", 0, status);
|
||||
|
||||
// Thai text should interact correctly with punctuation and symbols
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e44\\u0e2d\\u0e1a\\u0e35\\u0e40\\u0e2d\\u0e47\\u0e21", 0, status);
|
||||
// ADD_DATACHUNK(thaiLineSelection, "(\\u0e1b\\u0e23\\u0e30\\u0e40\\u0e17\\u0e28", 0, status);
|
||||
// ADD_DATACHUNK(thaiLineSelection, "\\u0e44\\u0e17\\u0e22)", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "(\\u0e1b\\u0e23\\u0e30\\u0e40\\u0e17\\u0e28\\u0e44\\u0e17\\u0e22)", 0, status);
|
||||
// I believe the commented-out reading above to be the correct one, but this is what passes with our current dictionary
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e08\\u0e33\\u0e01\\u0e31\\u0e14", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e1b\\u0e34\\u0e14", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e15\\u0e31\\u0e27\"", 0, status);
|
||||
*/
|
||||
|
||||
/* remove the old data sample.
|
||||
// The Unicode Linebreak TR says do not break before or after quotes.
|
||||
// So this test is changed ot not break around the quote.
|
||||
// TODO: should Thai break around the around the quotes, like the original behavior here?
|
||||
// ADD_DATACHUNK(thaiLineSelection, "\\u0e2e\\u0e32\\u0e23\\u0e4c\\u0e14\\u0e14\\u0e34\\u0e2a\\u0e01\\u0e4c\"", 0, status);
|
||||
// ADD_DATACHUNK(thaiLineSelection, "\\u0e23\\u0e38\\u0e48\\u0e19", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e2e\\u0e32\\u0e23\\u0e4c\\u0e14\\u0e14\\u0e34\\u0e2a\\u0e01\\u0e4c\""
|
||||
"\\u0e23\\u0e38\\u0e48\\u0e19", 0, status);
|
||||
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e43\\u0e2b\\u0e21\\u0e48", 0, status);
|
||||
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e14\\u0e37\\u0e2d\\u0e19\\u0e21\\u0e34.", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e22.", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e19\\u0e35\\u0e49", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e23\\u0e32\\u0e04\\u0e32", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "$200", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e40\\u0e17\\u0e48\\u0e32", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "\\u0e19\\u0e31\\u0e49\\u0e19 ", 0, status);
|
||||
ADD_DATACHUNK(thaiLineSelection, "(\"\\u0e2e\\u0e32\\u0e23\\u0e4c\\u0e14\\u0e14\\u0e34\\u0e2a\\u0e01\\u0e4c\").", 0, status);
|
||||
*/
|
||||
RuleBasedBreakIterator* e = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale("th"), status);
|
||||
if (U_FAILURE(status))
|
||||
{
|
||||
|
@ -788,14 +731,18 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
|
|||
if(exec) TestJapaneseLineBreak(); break;
|
||||
case 2: name = "TestStatusReturn";
|
||||
if(exec) TestStatusReturn(); break;
|
||||
|
||||
case 3: name = "TestLineBreakData";
|
||||
if(exec) TestLineBreakData(); break;
|
||||
case 4: name = "TestEmptyString";
|
||||
if(exec) TestEmptyString(); break;
|
||||
|
||||
case 5: name = "TestGetAvailableLocales";
|
||||
if(exec) TestGetAvailableLocales(); break;
|
||||
|
||||
case 6: name = "TestGetDisplayName";
|
||||
if(exec) TestGetDisplayName(); break;
|
||||
|
||||
case 7: name = "TestEndBehaviour";
|
||||
if(exec) TestEndBehaviour(); break;
|
||||
case 8: name = "TestMixedThaiLineBreak";
|
||||
|
@ -1176,15 +1123,19 @@ void RBBITest::TestBug4153072() {
|
|||
UnicodeString str("...Hello, World!...");
|
||||
int32_t begin = 3;
|
||||
int32_t end = str.length() - 3;
|
||||
UBool dummy;
|
||||
UBool onBoundary;
|
||||
|
||||
StringCharacterIterator* textIterator = new StringCharacterIterator(str, begin, end, begin);
|
||||
iter->adoptText(textIterator);
|
||||
int index;
|
||||
// Note: with the switch to UText, there is no way to restrict the
|
||||
// iteration range to begin at an index other than zero.
|
||||
// String character iterators created with a non-zero bound are
|
||||
// treated by RBBI as being empty.
|
||||
for (index = -1; index < begin + 1; ++index) {
|
||||
dummy = iter->isBoundary(index);
|
||||
if (index < begin && dummy == TRUE) {
|
||||
errln((UnicodeString)"Didn't handle preceeding correctly with offset = " + index +
|
||||
onBoundary = iter->isBoundary(index);
|
||||
if (index == 0? !onBoundary : onBoundary) {
|
||||
errln((UnicodeString)"Didn't handle isBoundary correctly with offset = " + index +
|
||||
" and begin index = " + begin);
|
||||
}
|
||||
}
|
||||
|
@ -1323,11 +1274,12 @@ void RBBITest::executeTest(TestParams *t) {
|
|||
if (expectedTagVal == -1) {
|
||||
expectedTagVal = 0;
|
||||
}
|
||||
int32_t line = t->srcLine->elementAti(bp);
|
||||
int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus();
|
||||
if (rs != expectedTagVal) {
|
||||
errln("Incorrect status for forward break. Pos=%4d File line,col= %4d,%4d.\n"
|
||||
" Actual, Expected status = %4d, %4d",
|
||||
bp, t->srcLine->elementAti(bp), t->srcCol->elementAti(bp), rs, expectedTagVal);
|
||||
bp, line, t->srcCol->elementAti(bp), rs, expectedTagVal);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1375,6 +1327,7 @@ void RBBITest::executeTest(TestParams *t) {
|
|||
if (expectedTagVal == -1) {
|
||||
expectedTagVal = 0;
|
||||
}
|
||||
int line = t->srcLine->elementAti(bp);
|
||||
int32_t rs = ((RuleBasedBreakIterator *)t->bi)->getRuleStatus();
|
||||
if (rs != expectedTagVal) {
|
||||
errln("Incorrect status for reverse break. Pos=%4d File line,col= %4d,%4d.\n"
|
||||
|
|
Loading…
Add table
Reference in a new issue