mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 14:05:32 +00:00
ICU-22585 Fix infinity loop while unicode set contains single surrogate
This commit is contained in:
parent
7d3cd7cba5
commit
8b14c05791
4 changed files with 58 additions and 18 deletions
|
@ -770,6 +770,7 @@ void RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, Unicode
|
|||
RBBINode *usetNode = new RBBINode(RBBINode::uset);
|
||||
if (usetNode == nullptr) {
|
||||
error(U_MEMORY_ALLOCATION_ERROR);
|
||||
delete setToAdopt;
|
||||
return;
|
||||
}
|
||||
usetNode->fInputSet = setToAdopt;
|
||||
|
@ -1210,7 +1211,6 @@ RBBINode *RBBIRuleScanner::pushNewNode(RBBINode::NodeType t) {
|
|||
//
|
||||
//------------------------------------------------------------------------------
|
||||
void RBBIRuleScanner::scanSet() {
|
||||
UnicodeSet *uset;
|
||||
ParsePosition pos;
|
||||
int startPos;
|
||||
int i;
|
||||
|
@ -1222,12 +1222,11 @@ void RBBIRuleScanner::scanSet() {
|
|||
pos.setIndex(fScanIndex);
|
||||
startPos = fScanIndex;
|
||||
UErrorCode localStatus = U_ZERO_ERROR;
|
||||
uset = new UnicodeSet();
|
||||
if (uset == nullptr) {
|
||||
localStatus = U_MEMORY_ALLOCATION_ERROR;
|
||||
} else {
|
||||
uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus);
|
||||
LocalPointer<UnicodeSet> uset(new UnicodeSet(), localStatus);
|
||||
if (U_FAILURE(localStatus)) {
|
||||
return;
|
||||
}
|
||||
uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus);
|
||||
if (U_FAILURE(localStatus)) {
|
||||
// TODO: Get more accurate position of the error from UnicodeSet's return info.
|
||||
// UnicodeSet appears to not be reporting correctly at this time.
|
||||
|
@ -1235,20 +1234,18 @@ void RBBIRuleScanner::scanSet() {
|
|||
RBBIDebugPrintf("UnicodeSet parse position.ErrorIndex = %d\n", pos.getIndex());
|
||||
#endif
|
||||
error(localStatus);
|
||||
delete uset;
|
||||
return;
|
||||
}
|
||||
|
||||
// Verify that the set contains at least one code point.
|
||||
//
|
||||
U_ASSERT(uset!=nullptr);
|
||||
U_ASSERT(uset.isValid());
|
||||
if (uset->isEmpty()) {
|
||||
// This set is empty.
|
||||
// Make it an error, because it almost certainly is not what the user wanted.
|
||||
// Also, avoids having to think about corner cases in the tree manipulation code
|
||||
// that occurs later on.
|
||||
error(U_BRK_RULE_EMPTY_SET);
|
||||
delete uset;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1257,7 +1254,7 @@ void RBBIRuleScanner::scanSet() {
|
|||
// Don't just set fScanIndex because the line/char positions maintained
|
||||
// for error reporting would be thrown off.
|
||||
i = pos.getIndex();
|
||||
for (;;) {
|
||||
for (;U_SUCCESS(*fRB->fStatus);) {
|
||||
if (fNextIndex >= i) {
|
||||
break;
|
||||
}
|
||||
|
@ -1280,7 +1277,7 @@ void RBBIRuleScanner::scanSet() {
|
|||
// character categories for run time engine.
|
||||
// - Eliminates mulitiple instances of the same set.
|
||||
// - Creates a new uset node if necessary (if this isn't a duplicate.)
|
||||
findSetFor(n->fText, n, uset);
|
||||
findSetFor(n->fText, n, uset.orphan());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -146,6 +146,7 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
|
|||
TESTCASE_AUTO(TestExternalBreakEngineWithFakeYue);
|
||||
TESTCASE_AUTO(TestBug22581);
|
||||
TESTCASE_AUTO(TestBug22584);
|
||||
TESTCASE_AUTO(TestBug22585);
|
||||
|
||||
#if U_ENABLE_TRACING
|
||||
TESTCASE_AUTO(TestTraceCreateCharacter);
|
||||
|
@ -5860,6 +5861,23 @@ void RBBITest::TestExternalBreakEngineWithFakeTaiLe() {
|
|||
expected2 == actual2);
|
||||
}
|
||||
|
||||
// Test a single unpaired unpaired char (either surrogate low or high) in
|
||||
// an Unicode set will not cause infinity loop.
|
||||
void RBBITest::TestBug22585() {
|
||||
UnicodeString rule = u"$a=[";
|
||||
rule.append(0xdecb) // an unpaired surrogate high
|
||||
.append("];");
|
||||
UParseError pe {};
|
||||
UErrorCode ec {U_ZERO_ERROR};
|
||||
RuleBasedBreakIterator bi(rule, pe, ec);
|
||||
|
||||
rule = u"$a=[";
|
||||
rule.append(0xd94e) // an unpaired surrogate low
|
||||
.append("];");
|
||||
ec = U_ZERO_ERROR;
|
||||
RuleBasedBreakIterator bi2(rule, pe, ec);
|
||||
}
|
||||
|
||||
void RBBITest::TestBug22584() {
|
||||
// Creating a break iterator from a rule consisting of a very long
|
||||
// literal input string caused a stack overflow when deleting the
|
||||
|
|
|
@ -100,6 +100,7 @@ public:
|
|||
void TestExternalBreakEngineWithFakeYue();
|
||||
void TestBug22581();
|
||||
void TestBug22584();
|
||||
void TestBug22585();
|
||||
|
||||
#if U_ENABLE_TRACING
|
||||
void TestTraceCreateCharacter();
|
||||
|
|
|
@ -908,7 +908,7 @@ public class RBBITest extends CoreTestFmwk {
|
|||
assertEquals("Wrong number of breaks found", 2, breaksFound);
|
||||
}
|
||||
|
||||
/* Test handling of unpair surrogate.
|
||||
/* Test handling of unpaired surrogate.
|
||||
*/
|
||||
@Test
|
||||
public void TestUnpairedSurrogate() {
|
||||
|
@ -919,24 +919,24 @@ public class RBBITest extends CoreTestFmwk {
|
|||
|
||||
try {
|
||||
new RuleBasedBreakIterator("a\ud800b;");
|
||||
fail("TestUnpairedSurrogate: RuleBasedBreakIterator() failed to throw an exception with unpair low surrogate.");
|
||||
fail("TestUnpairedSurrogate: RuleBasedBreakIterator() failed to throw an exception with unpaired low surrogate.");
|
||||
}
|
||||
catch (IllegalArgumentException e) {
|
||||
// expected exception with unpair surrogate.
|
||||
// expected exception with unpaired surrogate.
|
||||
}
|
||||
catch (Exception e) {
|
||||
fail("TestUnpairedSurrogate: Unexpected exception while new RuleBasedBreakIterator() with unpair low surrogate: " + e);
|
||||
fail("TestUnpairedSurrogate: Unexpected exception while new RuleBasedBreakIterator() with unpaired low surrogate: " + e);
|
||||
}
|
||||
|
||||
try {
|
||||
new RuleBasedBreakIterator("a\ude00b;");
|
||||
fail("TestUnpairedSurrogate: RuleBasedBreakIterator() failed to throw an exception with unpair high surrogate.");
|
||||
fail("TestUnpairedSurrogate: RuleBasedBreakIterator() failed to throw an exception with unpaired high surrogate.");
|
||||
}
|
||||
catch (IllegalArgumentException e) {
|
||||
// expected exception with unpair surrogate.
|
||||
// expected exception with unpaired surrogate.
|
||||
}
|
||||
catch (Exception e) {
|
||||
fail("TestUnpairedSurrogate: Unexpected exception while new RuleBasedBreakIterator() with unpair high surrogate: " + e);
|
||||
fail("TestUnpairedSurrogate: Unexpected exception while new RuleBasedBreakIterator() with unpaired high surrogate: " + e);
|
||||
}
|
||||
|
||||
|
||||
|
@ -946,6 +946,30 @@ public class RBBITest extends CoreTestFmwk {
|
|||
assertEquals("Rules does not match", rules, bi.toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestBug22585() {
|
||||
try {
|
||||
new RuleBasedBreakIterator("$a=[\udecb];");
|
||||
fail("TestBug22585: RuleBasedBreakIterator() failed to throw an exception with unpaired high surrogate.");
|
||||
}
|
||||
catch (IllegalArgumentException e) {
|
||||
// expected exception with unpaired surrogate.
|
||||
}
|
||||
catch (Exception e) {
|
||||
fail("TestBug22585: Unexpected exception while new RuleBasedBreakIterator() with unpaired high surrogate: " + e);
|
||||
}
|
||||
|
||||
try {
|
||||
new RuleBasedBreakIterator("$a=[\ud94e];");
|
||||
fail("TestBug22585: RuleBasedBreakIterator() failed to throw an exception with unpaired low surrogate.");
|
||||
}
|
||||
catch (IllegalArgumentException e) {
|
||||
// expected exception with unpaired surrogate.
|
||||
}
|
||||
catch (Exception e) {
|
||||
fail("TestBug22585: Unexpected exception while new RuleBasedBreakIterator() with unpaired low surrogate: " + e);
|
||||
}
|
||||
}
|
||||
/* Test preceding(index) and following(index), with semi-random indexes.
|
||||
* The random indexes are produced in clusters that are relatively closely spaced,
|
||||
* to increase the occurrences of hits to the internal break cache.
|
||||
|
|
Loading…
Add table
Reference in a new issue