ICU-2292 added safe forward and backwards rules

X-SVN-Rev: 13648
This commit is contained in:
Syn Wee Quek 2003-11-09 06:52:44 +00:00
parent d0370e2786
commit 41ac2f557b
7 changed files with 301 additions and 169 deletions

View file

@ -405,8 +405,8 @@ int32_t RuleBasedBreakIterator::previous(void) {
return BreakIterator::DONE;
}
if (fData->fSafeRevTable != NULL) {
return handleNewPrevious();
if (fData->fSafeRevTable != NULL || fData->fSafeFwdTable != NULL) {
return handlePrevious(fData->fReverseTable);
}
// old rule syntax
@ -486,27 +486,56 @@ int32_t RuleBasedBreakIterator::following(int32_t offset) {
if (fData->fSafeRevTable != NULL) {
// new rule syntax
/// todo synwee
/// fText->setIndex(offset);
fText->setIndex(fText->startIndex());
result = fText->startIndex();
}
else {
// otherwise, we have to sync up first. Use handlePrevious() to back
// us up to a known break position before the specified position (if
// we can determine that the specified position is a break position,
// we don't back up at all). This may or may not be the last break
// position at or before our starting position. Advance forward
// from here until we've passed the starting position. The position
// we stop on will be the first break position after the specified one.
// old rule syntax
fText->setIndex(offset);
if (offset == fText->startIndex()) {
return handleNext();
// move forward one codepoint to prepare for moving back to a
// safe point.
// this handles offset being between a supplementary character
fText->next32();
// handlePrevious will move most of the time to < 1 boundary away
handlePrevious(fData->fSafeRevTable);
int32_t result = next();
while (result <= offset) {
result = next();
}
result = previous();
return result;
}
if (fData->fSafeFwdTable != NULL) {
// backup plan if forward safe table is not available
fText->setIndex(offset);
fText->previous32();
// handle next will give result >= offset
handleNext(fData->fSafeFwdTable);
// previous will give result 0 or 1 boundary away from offset,
// most of the time
// we have to
int32_t oldresult = previous();
while (oldresult > offset) {
int32_t result = previous();
if (result <= offset) {
return oldresult;
}
oldresult = result;
}
int32_t result = next();
if (result <= offset) {
return next();
}
return result;
}
// otherwise, we have to sync up first. Use handlePrevious() to back
// us up to a known break position before the specified position (if
// we can determine that the specified position is a break position,
// we don't back up at all). This may or may not be the last break
// position at or before our starting position. Advance forward
// from here until we've passed the starting position. The position
// we stop on will be the first break position after the specified one.
// old rule syntax
fText->setIndex(offset);
if (offset == fText->startIndex()) {
return handleNext();
}
result = previous();
while (result != BreakIterator::DONE && result <= offset) {
result = next();
@ -537,15 +566,43 @@ int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
// position specified by the caller, we can just use previous()
// to carry out this operation
if (fData->fSafeRevTable != NULL) {
if (fData->fSafeFwdTable != NULL) {
/// todo synwee
// new rule syntax
int32_t result = fText->endIndex();
fText->setIndex(result);
while (result != BreakIterator::DONE && result >= offset) {
fText->setIndex(offset);
// move backwards one codepoint to prepare for moving forwards to a
// safe point.
// this handles offset being between a supplementary character
fText->previous32();
handleNext(fData->fSafeFwdTable);
int32_t result = previous();
while (result >= offset) {
result = previous();
}
return result;
}
if (fData->fSafeRevTable != NULL) {
// backup plan if forward safe table is not available
fText->setIndex(offset);
fText->next32();
// handle previous will give result <= offset
handlePrevious(fData->fSafeRevTable);
// next will give result 0 or 1 boundary away from offset,
// most of the time
// we have to
int32_t oldresult = next();
while (oldresult < offset) {
int32_t result = next();
if (result >= offset) {
return oldresult;
}
oldresult = result;
}
int32_t result = previous();
if (result >= offset) {
return previous();
}
return result;
}
@ -568,6 +625,11 @@ UBool RuleBasedBreakIterator::isBoundary(int32_t offset) {
return TRUE;
}
if (offset == fText->endIndex()) {
last(); // For side effects on current position, tag values.
return TRUE;
}
// out-of-range indexes are never boundary positions
if (offset < fText->startIndex()) {
first(); // For side effects on current position, tag values.
@ -608,7 +670,11 @@ int32_t RuleBasedBreakIterator::current(void) const {
// value every time the state machine passes through an accepting state.
//
//-----------------------------------------------------------------------------------
int32_t RuleBasedBreakIterator::handleNext(void) {
int32_t RuleBasedBreakIterator::handleNext() {
return handleNext(fData->fForwardTable);
}
int32_t RuleBasedBreakIterator::handleNext(const RBBIStateTable *statetable) {
if (fTrace) {
RBBIDebugPrintf("Handle Next pos char state category \n");
}
@ -637,7 +703,7 @@ int32_t RuleBasedBreakIterator::handleNext(void) {
fLastBreakTag = 0;
row = (RBBIStateTableRow *) // Point to starting row of state table.
(fData->fForwardTable->fTableData + (fData->fForwardTable->fRowLen * state));
(statetable->fTableData + (statetable->fRowLen * state));
// Character Category fetch for starting character.
// See comments on character category code within loop, below.
@ -700,7 +766,7 @@ int32_t RuleBasedBreakIterator::handleNext(void) {
// look up a state transition in the state table
state = row->fNextState[category];
row = (RBBIStateTableRow *)
(fData->fForwardTable->fTableData + (fData->fForwardTable->fRowLen * state));
(statetable->fTableData + (statetable->fRowLen * state));
// Get the next character. Doing it here positions the iterator
// to the correct position for recording matches in the code that
@ -913,14 +979,14 @@ continueOn:
// The logic of this function is very similar to handleNext(), above.
//
//-----------------------------------------------------------------------------------
int32_t RuleBasedBreakIterator::handleNewPrevious(void) {
if (fText == NULL || fData == NULL) {
int32_t RuleBasedBreakIterator::handlePrevious(const RBBIStateTable *statetable) {
if (fText == NULL || statetable == NULL) {
return 0;
}
// break tag is no longer valid after icu switched to exact backwards
// positioning.
fLastBreakTagValid = FALSE;
if (fData->fReverseTable == NULL) {
if (statetable == NULL) {
return fText->setToStart();
}
@ -938,7 +1004,7 @@ int32_t RuleBasedBreakIterator::handleNewPrevious(void) {
RBBIStateTableRow *row;
row = (RBBIStateTableRow *)
(this->fData->fReverseTable->fTableData + (state * fData->fReverseTable->fRowLen));
(statetable->fTableData + (state * statetable->fRowLen));
UTRIE_GET16(&fData->fTrie, c, category);
if ((category & 0x4000) != 0) {
fDictionaryCharCount++;
@ -954,8 +1020,7 @@ int32_t RuleBasedBreakIterator::handleNewPrevious(void) {
// if (c == CharacterIterator::DONE && fText->hasPrevious()==FALSE) {
if (hasPassedStartText) {
// if we have already considered the start of the text
if (fData->fLookAheadHardBreak == TRUE
&& row->fLookAhead != 0) {
if (row->fLookAhead != 0 && lookaheadResult == 0) {
result = 0;
}
break;
@ -987,7 +1052,7 @@ int32_t RuleBasedBreakIterator::handleNewPrevious(void) {
// look up a state transition in the backwards state table
state = row->fNextState[category];
row = (RBBIStateTableRow *)
(this->fData->fReverseTable->fTableData + (state * fData->fReverseTable->fRowLen));
(statetable->fTableData + (state * statetable->fRowLen));
if (row->fAccepting == -1) {
// Match found, common case, could have lookahead so we move on to check it

View file

@ -30,6 +30,7 @@ struct RBBIDataHeader;
class RuleBasedBreakIteratorTables;
class BreakIterator;
class RBBIDataWrapper;
struct RBBIStateTable;
@ -480,9 +481,21 @@ private:
* The various calling methods then iterate forward from this safe position to
* the appropriate position to return. (For more information, see the description
* of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.)
* @param statetable state table used of moving backwards
* @internal
*/
int32_t handleNewPrevious(void);
int32_t handlePrevious(const RBBIStateTable *statetable);
/**
* This method is the actual implementation of the next() method. All iteration
* vectors through here. This method initializes the state machine to state 1
* and advances through the text character by character until we reach the end
* of the text or the state machine transitions to state 0. We update our return
* value every time the state machine passes through a possible end state.
* @param statetable state table used of moving forwards
* @internal
*/
int32_t handleNext(const RBBIStateTable *statetable);
};
//------------------------------------------------------------------------------

View file

@ -50,4 +50,12 @@ $BackOneCluster;
!!safe_reverse;
$BackOneCluster;
# rule 6, 7, 8
$V+ $L;
## -------------------------------------------------
!!safe_forward;
# rule 6, 7, 8
$V+ $T;

View file

@ -341,7 +341,7 @@ $CM* $ALPlus $CM+ / $LB5Breaks;
!!safe_reverse;
# LB 7
$CM* [^$CM $BK $CR $LF $NL $ZW $SP];
$CM+ [^$CM $BK $CR $LF $NL $ZW $SP];
$CM+ $SP / .;
# LB 9
@ -362,17 +362,17 @@ $CL $CM* ($NU | $IS);
!!safe_forward;
# LB 7
[^$BK $CR $LF $NL $ZW $SP] $CM*;
$SP $CM+ / .;
[^$BK $CR $LF $NL $ZW $SP] $CM+;
$SP $CM+ / [^$CM];
# LB 9
$OP $CM* $SP*;
$OP $CM* $SP+;
# LB 10
$QU $CM* $SP*;
$QU $CM* $SP+;
# LB 11
$CL $CM* $SP*;
$CL $CM* $SP+;
# LB 18
$HY $CM* $NU;

View file

@ -9,7 +9,6 @@
# These rules are based on TR 29 version 4.0.0
#
!!chain;
#
# Character categories as defined in TR 29
@ -31,85 +30,79 @@ $Term = [\u0021 \u003F \u0589 \u061F \u06D4 \u0700 \u0701 \u0702 \u0964 \u1362
$Close = [[:Open_Punctuation:] [:Close_Punctuation:] [:Linebreak = Quotation:] -
[[:name = HEBREW PUNCTUATION GERESH:] $ATerm $Term]];
$Extend = [[:Grapheme_Extend = TRUE:]];
$ATermEx = $ATerm $Extend*;
$NumericEx = $Numeric $Extend*;
$UpperEx = $Upper $Extend*;
$CloseEx = $Close $Extend*;
$SpEx = $Sp $Extend*;
$LowerEx = $Lower $Extend*;
$TermEx = $Term $Extend*;
# Define extended forms of the character classes,
# incorporate grapheme cluster + format chars.
# rule 6
$Extend = [[:Grapheme_Extend = TRUE:]];
$ATermEx = $ATerm $Extend* $Format*;
$NumericEx = $Numeric $Extend* $Format*;
$UpperEx = $Upper $Extend* $Format*;
$TermEx = $Term $Extend* $Format*;
$ATermEx $Format* $NumericEx;
#
# $SepSeq keeps together CRLF as a separator. (CRLF is a grapheme cluster)
#
$SepSeq = $Sep | \u000d\u000a;
# rule 7
# $InteriorChars are those that never trigger a following break.
$InteriorChars = [^$Term $ATerm $Sep]; #Note: includes Extend and Format chars
$UpperEx $ATermEx $Format* $UpperEx;
## -------------------------------------------------
# rule 8
!!forward;
$ATermEx $Format* $CloseEx* $Format* $SpEx $Format*
[^$OLetter $Upper $Lower $Sep]* $Extend* $Format* $LowerEx;
# rule 9 forced to exit by / [^$Close $Sp]
($TermEx | $ATermEx) $Format* ($CloseEx $Format*)* $Sep;
($TermEx | $ATermEx) $Format* ($CloseEx $Format*)* ($CloseEx | $SpEx) / [^$Close $Sp];
# rule 10 forced to exit by / [^$Sp];
# Rule 6. Match an ATerm (.) that does not cause a break because a number immediately follows it.
$NumberFollows = $InteriorChars* $ATermEx $NumericEx;
($TermEx | $ATermEx) $Format* ($CloseEx $Format*)* ($SpEx $Format*)* $Sep;
($TermEx | $ATermEx) $Format* ($CloseEx $Format*)* ($SpEx $Format*)* $SpEx / [^$Sp];
# Rule 7. $UppersSurround Match a no-break sentence fragment containing a . surrounded by Uppers
$UppersSurround = $InteriorChars* $UpperEx $ATermEx $UpperEx;
# Rule 8 Matches a sentence fragment containing "." that should not cause a sentence break,
# because a lower case word follows the period.
$LowerWordFollows = $InteriorChars* $ATermEx $Close* $Sp* [^$OLetter $Upper $Lower $Sep]* $Lower;
# rule 11 partly included in rule 9 and 10
$TermEx;
$ATermEx;
# Rules 3, 9, 10, 11
# Matches a simple sentence, or the trailing part of a complex sentence,
# where a simple sentence contains no interior "."s.
$TermEndSequence = $InteriorChars* ($TermEx | $ATermEx) $Close* $Sp* $SepSeq?;
$EndSequence = $InteriorChars* $SepSeq?;
# rule 12
([^$Term $ATerm $Sep] $Extend*)+;
([^$Term $ATerm $Sep] $Extend* $Format*)+ ($Term | $ATerm | $Sep);
# Put them all together.
($NumberFollows | $UppersSurround | $LowerWordFollows)* $TermEndSequence{0}; # status = UBRK_SENTENCE_TERM
($NumberFollows | $UppersSurround | $LowerWordFollows)* $EndSequence{100}; # status = UBRK_SENTENCE_SEP
## -------------------------------------------------
!!reverse;
#
# Reverse Rules
#
$EndGorp = ($Term | $ATerm | $Sep | $Close | $Extend | $Format | $Sp);
$RevEndSequence = $EndGorp* $InteriorChars* $EndGorp* | $Sep [^$ATerm $Term]*;
$ReverseLowerWordFollows = $Lower [^$OLetter $Upper $Lower $Sep]* $ATerm $InteriorChars*;
$ReverseUpperSurround = $Upper $Format* $Extend* $ATerm $Format* $Extend* $Upper $InteriorChars*;
$ReverseNumberFollows = $Numeric $Format* $Extend* $ATerm $InteriorChars*;
$BackATermEx = $Extend* $ATerm;
$BackNumericEx = $Extend* $Numeric;
$BackUpperEx = $Extend* $Upper;
$BackCloseEx = $Extend* $Close;
$BackSpEx = $Extend* $Sp;
$BackLowerEx = $Extend* $Lower;
$BackTermEx = $Extend* $Term;
$RevEndSequence ($ReverseLowerWordFollows | $ReverseUpperSurround | $ReverseNumberFollows)* .?;
# rule 3
! $Sep .;
# rule 6
! $BackNumericEx $Format* $BackATermEx;
## -------------------------------------------------
## !!safe_reverse;
# rule 7
## $Extend* $ATerm $Format* $Extend* $Upper;
! $BackUpperEx $Format* $BackATermEx $BackUpperEx;
# rule 11
## ($Extend* $Sp $Format*)* ($Extend* $Close $Format*)* $Extend* ($Term | $ATerm);
## -------------------------------------------------
!!safe_forward;
# rule 8
! $BackLowerEx $Format* $Extend* [^$OLetter $Upper $Lower $Sep]* $Format*
$BackSpEx $Format* $BackCloseEx* $Format* $BackATermEx;
# rules 9, 10, 11, 12
$Any = [^$Term $ATerm $Sep];
$Safe = [^$Term $ATerm $Sep $Sp $Close];
$BackEnd = ($BackSpEx $Format*)* ($BackCloseEx $Format*)* ($BackTermEx | $BackATermEx);
! $BackEnd;
! $BackEnd? $Any* $Safe;
! $BackEnd? $Any* $Close / ($BackSpEx $Format*)+ ($BackTermEx | $BackATermEx);
! $BackEnd? $Any* $Sp / $Sep;
## $Lower .;

View file

@ -173,5 +173,42 @@ $BackKatakanaEx $Format* $BackKatakanaEx;
!!safe_reverse;
$Extend* [^$Extend];
$BackACMLetterEx / $Format;
# rule 3
$Extend+ [^$Extend];
# rule 4
$Format+ $BackABaseLetterEx;
$Format+ $BackACMLetterEx / $Format;
$Format+ $BackNumericEx;
$Format+ $BackMidLetterEx;
$Format+ $BackMidNumLetEx;
$Format+ $BackMidNumEx;
$Format+ $BackKatakanaEx;
# rule 6
($MidLetter | $MidNumLet) $Format* $BackABaseLetterEx;
($MidLetter | $MidNumLet) $Format* $BackACMLetterEx / $Format;
# rule 11
($MidNum | $MidNumLet) $Format* $BackNumericEx;
## -------------------------------------------------
!!safe_forward;
# rule 3
$Extend+;
# rule 4
$Format+ $ALetterEx;
$Format+ $NumericEx;
$Format+ $MidLetterEx;
$Format+ $MidNumLetEx;
$Format+ $MidNumEx;
$Format+ $KatakanaEx;
# rule 6
($MidLetter | $MidNumLet) $Format* $ALetterEx;
# rule 11
($MidNum | $MidNumLet) $Format* $NumericEx;

View file

@ -589,7 +589,6 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
if(exec) TestWordBoundary(); break;
case 14: name = "TestLineBreaks";
if(exec) TestLineBreaks(); break;
/***
case 15: name = "TestSentBreaks";
if(exec) TestSentBreaks(); break;
case 16: name = "TestExtended";
@ -603,7 +602,6 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
#endif
}
break;
***/
default: name = ""; break; //needed to end loop
}
}
@ -3005,6 +3003,74 @@ static int32_t getIntParam(UnicodeString name, UnicodeString &params, int32_t d
}
#endif
static void testBreakBoundPreceding(RBBITest *test, UnicodeString ustr,
BreakIterator *bi,
int expected[],
int expectedcount)
{
int count = 0;
int i = 0;
int forward[20];
bi->setText(ustr);
for (i = bi->first(); i != BreakIterator::DONE; i = bi->next()) {
forward[count] = i;
if (count < expectedcount && expected[count] != i) {
test->errln("break forward test failed: expected %d but got %d",
expected[count], i);
break;
}
count ++;
}
if (count != expectedcount) {
printStringBreaks(ustr, expected, expectedcount);
test->errln("break test failed: missed %d match",
expectedcount - count);
return;
}
// testing boundaries
for (i = 1; i < expectedcount; i ++) {
int j = expected[i - 1];
if (!bi->isBoundary(j)) {
printStringBreaks(ustr, expected, expectedcount);
test->errln("Expected boundary at position %d", j);
return;
}
for (j = expected[i - 1] + 1; j < expected[i]; j ++) {
if (bi->isBoundary(j)) {
printStringBreaks(ustr, expected, expectedcount);
test->errln("Not expecting boundary at position %d", j);
return;
}
}
}
for (i = bi->last(); i != BreakIterator::DONE; i = bi->previous()) {
count --;
if (forward[count] != i) {
test->errln("happy break test reverse failed: expected %d but got %d",
forward[count], i);
break;
}
}
if (count != 0) {
printStringBreaks(ustr, expected, expectedcount);
test->errln("happy break test failed: missed a match");
return;
}
// testing preceding
for (i = 0; i < expectedcount - 1; i ++) {
int j = expected[i] + 1;
for (; j <= expected[i + 1]; j ++) {
if (bi->preceding(j) != expected[i]) {
printStringBreaks(ustr, expected, expectedcount);
test->errln("Not expecting backwards boundary at position %d", j);
return;
}
}
}
}
void RBBITest::TestWordBreaks(void)
{
// <data><>\u1d4a\u206e<?>\u0603\U0001d7ff<>\u2019<></data>
@ -3015,6 +3081,7 @@ void RBBITest::TestWordBreaks(void)
UChar str[25];
char *strlist[] =
{
"\\u200e\\U000e0072\\u0a4b\\U000e003f\\ufd2b\\u2027\\u002e\\u002e",
"\\u0602\\u2019\\ua191\\U000e0063\\u0a4c\\u003a\\ub4b5\\u003a\\u827f\\u002e",
"\\u7f1f\\uc634\\u65f8\\u0944\\u04f2\\uacdf\\u1f9c\\u05f4\\u002e",
"\\U000e0042\\u002e\\u0fb8\\u09ef\\u0ed1\\u2044",
@ -3051,13 +3118,13 @@ void RBBITest::TestWordBreaks(void)
};
int loop;
for (loop = 0; loop < (sizeof(strlist) / sizeof(char *)); loop ++) {
// printf("looping %d\n", loop);
u_unescape(strlist[loop], str, 25);
UnicodeString ustr(str);
// RBBICharMonkey monkey;
RBBIWordMonkey monkey;
int expected[20];
int forward[20];
int expectedcount = 0;
monkey.setText(ustr);
@ -3066,33 +3133,7 @@ void RBBITest::TestWordBreaks(void)
expected[expectedcount ++] = i;
}
int count = 0;
bi->setText(ustr);
for (i = bi->first(); i != BreakIterator::DONE; i = bi->next()) {
forward[count] = i;
if (count > 20 || expected[count] != i) {
errln("happy break forward test failed: expected %d but got %d",
expected[count], i);
}
count ++;
}
if (count != expectedcount) {
printStringBreaks(ustr, expected, expectedcount);
errln("happy break test failed: missed a match");
break;
}
for (i = bi->last(); i != BreakIterator::DONE; i = bi->previous()) {
count --;
if (forward[count] != i) {
printStringBreaks(ustr, expected, expectedcount);
errln("happy break test reverse failed: expected %d but got %d",
forward[count], i);
break;
}
}
if (count != 0) {
errln("happy break test failed: missed a match");
}
testBreakBoundPreceding(this, ustr, bi, expected, expectedcount);
}
}
@ -3105,7 +3146,9 @@ void RBBITest::TestWordBoundary(void)
BreakIterator *bi = BreakIterator::createWordInstance(locale, status);
UChar str[20];
char *strlist[] =
{"\\U000e0042\\u002e\\u0fb8\\u09ef\\u0ed1\\u2044",
{
"\\u200e\\U000e0072\\u0a4b\\U000e003f\\ufd2b\\u2027\\u002e\\u002e",
"\\U000e0042\\u002e\\u0fb8\\u09ef\\u0ed1\\u2044",
"\\u003b\\u024a\\u102e\\U000e0071\\u0600",
"\\u2027\\U000e0067\\u0a47\\u00b7",
"\\u1fcd\\u002c\\u07aa\\u0027\\u11b0",
@ -3136,6 +3179,7 @@ void RBBITest::TestWordBoundary(void)
};
int loop;
for (loop = 0; loop < (sizeof(strlist) / sizeof(char *)); loop ++) {
// printf("looping %d\n", loop);
u_unescape(strlist[loop], str, 20);
UnicodeString ustr(str);
int forward[20];
@ -3153,7 +3197,7 @@ void RBBITest::TestWordBoundary(void)
printStringBreaks(ustr, forward, count);
errln("happy boundary test failed: expected %d not a boundary",
j);
break;
return;
}
}
}
@ -3161,7 +3205,7 @@ void RBBITest::TestWordBoundary(void)
printStringBreaks(ustr, forward, count);
errln("happy boundary test failed: expected %d a boundary",
i);
break;
return;
}
prev = i;
}
@ -3176,6 +3220,9 @@ void RBBITest::TestLineBreaks(void)
UChar str[20];
char *strlist[] =
{
"\\u169b\\U000e0130\\u002d\\u1041\\u0f3d\\u0abf\\u00b0\\u31fb\\u00a0\\u002d\\u02c8\\u003b",
"\\u2762\\u1680\\u002d\\u2028\\u0027\\u01dc\\ufe56\\u003a\\u000a\\uffe6\\u29fd\\u0020\\u30ee\\u007c\\U0001d178\\u0af1\\u0085",
"\\u3010\\u200b\\u2029\\ufeff\\ufe6a\\u275b\\U000e013b\\ufe37\\u24d4\\u002d\\u1806\\u256a\\u1806\\u247c\\u0085\\u17ac",
"\\u99ab\\u0027\\u003b\\u2026\\ueaf0\\u0020\\u0020\\u0313\\u0020\\u3099\\uff09\\u208e\\u2011\\u2007\\u2060\\u000a\\u0020\\u0020\\u300b\\u0bf9",
"\\u1806\\u060d\\u30f5\\u00b4\\u17e9\\u2544\\u2028\\u2024\\u2011\\u20a3\\u002d\\u09cc\\u1782\\u000d\\uff6f\\u0025",
"\\u002f\\uf22e\\u1944\\ufe3d\\u0020\\u206f\\u31b3\\u2014\\u002d\\u2025\\u0f0c\\u0085\\u2763",
@ -3207,7 +3254,6 @@ void RBBITest::TestLineBreaks(void)
RBBILineMonkey monkey;
int expected[20];
int forward[20];
int expectedcount = 0;
monkey.setText(ustr);
@ -3216,35 +3262,7 @@ void RBBITest::TestLineBreaks(void)
expected[expectedcount ++] = i;
}
int count = 0;
bi->setText(ustr);
for (i = bi->first(); i != BreakIterator::DONE; i = bi->next()) {
forward[count] = i;
if (count < expectedcount && expected[count] != i) {
errln("happy break forward test failed: expected %d but got %d",
expected[count], i);
}
count ++;
}
if (count != expectedcount) {
printStringBreaks(ustr, expected, expectedcount);
errln("happy break test failed: missed %d match",
expectedcount - count);
break;
}
for (i = bi->last(); i != BreakIterator::DONE; i = bi->previous()) {
count --;
if (forward[count] != i) {
printStringBreaks(ustr, expected, expectedcount);
errln("happy break test reverse failed: expected %d but got %d",
forward[count], i);
break;
}
}
if (count != 0) {
errln("happy break test failed: missed a match");
break;
}
testBreakBoundPreceding(this, ustr, bi, expected, expectedcount);
}
}
@ -3266,12 +3284,10 @@ void RBBITest::TestSentBreaks(void)
"Don't rock the boat.\\u2029Because I am the daddy, that is why. Not on my time (el timo.)!",
};
int loop;
int forward[100];
for (loop = 0; loop < (sizeof(strlist) / sizeof(char *)); loop ++) {
printf("looping %d\n", loop);
u_unescape(strlist[loop], str, 100);
UnicodeString ustr(str);
int forward[20];
int count = 0;
bi->setText(ustr);