mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-7130 infinite loop in dictionary break iterators
X-SVN-Rev: 26618
This commit is contained in:
parent
85f0ecb659
commit
4d90dc692d
3 changed files with 44 additions and 3 deletions
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
***************************************************************************
|
||||
* Copyright (C) 1999-2008 International Business Machines Corporation *
|
||||
* Copyright (C) 1999-2009 International Business Machines Corporation *
|
||||
* and others. All rights reserved. *
|
||||
***************************************************************************
|
||||
*/
|
||||
|
@ -247,7 +247,10 @@ void RuleBasedBreakIterator::init() {
|
|||
fLastRuleStatusIndex = 0;
|
||||
fLastStatusIndexValid = TRUE;
|
||||
fDictionaryCharCount = 0;
|
||||
fBreakType = -1;
|
||||
fBreakType = UBRK_WORD; // Defaulting BreakType to word gives reasonable
|
||||
// dictionary behavior for Break Iterators that are
|
||||
// built from rules. Even better would be the ability to
|
||||
// declare the type in the rules.
|
||||
|
||||
fCachedBreakPositions = NULL;
|
||||
fLanguageBreakEngines = NULL;
|
||||
|
@ -1019,7 +1022,7 @@ int32_t RuleBasedBreakIterator::handleNext(const RBBIStateTable *statetable) {
|
|||
|
||||
#ifdef RBBI_DEBUG
|
||||
if (fTrace) {
|
||||
RBBIDebugPrintf(" %4d ", utext_getNativeIndex(fText));
|
||||
RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(fText));
|
||||
if (0x20<=c && c<0x7f) {
|
||||
RBBIDebugPrintf("\"%c\" ", c);
|
||||
} else {
|
||||
|
|
|
@ -140,6 +140,8 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
|
|||
case 21: case 22: case 23: name = "skip";
|
||||
break;
|
||||
#endif
|
||||
case 24: name = "TestDictRules";
|
||||
if (exec) TestDictRules(); break;
|
||||
|
||||
default: name = ""; break; //needed to end loop
|
||||
}
|
||||
|
@ -1991,6 +1993,41 @@ void RBBITest::TestTailoredBreaks() {
|
|||
}
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------
|
||||
//
|
||||
// TestDictRules create a break iterator from source rules that includes a
|
||||
// dictionary range. Regression for bug #7130. Source rules
|
||||
// do not declare a break iterator type (word, line, sentence, etc.
|
||||
// but the dictionary code, without a type, would loop.
|
||||
//
|
||||
//-------------------------------------------------------------------------------
|
||||
void RBBITest::TestDictRules() {
|
||||
const char *rules = "$dictionary = [a-z]; \n"
|
||||
"!!forward; \n"
|
||||
"$dictionary $dictionary; \n"
|
||||
"!!reverse; \n"
|
||||
"$dictionary $dictionary; \n";
|
||||
const char *text = "aa";
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UParseError parseError;
|
||||
|
||||
RuleBasedBreakIterator bi(rules, parseError, status);
|
||||
TEST_ASSERT_SUCCESS(status);
|
||||
UnicodeString utext = text;
|
||||
bi.setText(utext);
|
||||
int32_t position;
|
||||
int32_t loops;
|
||||
for (loops = 0; loops<10; loops++) {
|
||||
position = bi.next();
|
||||
if (position == RuleBasedBreakIterator::DONE) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
TEST_ASSERT(loops == 1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------
|
||||
//
|
||||
// ReadAndConvertFile Read a text data file, convert it to UChars, and
|
||||
|
|
|
@ -70,6 +70,7 @@ public:
|
|||
void TestBug5775();
|
||||
void TestThaiBreaks();
|
||||
void TestTailoredBreaks();
|
||||
void TestDictRules();
|
||||
|
||||
void TestDebug();
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue