ICU-22579 Fix Null deref while Unicode Set only has string

This commit is contained in:
Frank Tang 2023-12-11 14:19:28 -08:00 committed by Frank Yung-Fong Tang
parent 8b14c05791
commit 4a7d61d261
5 changed files with 35 additions and 2 deletions

View file

@ -1224,6 +1224,7 @@ void RBBIRuleScanner::scanSet() {
UErrorCode localStatus = U_ZERO_ERROR;
LocalPointer<UnicodeSet> uset(new UnicodeSet(), localStatus);
if (U_FAILURE(localStatus)) {
error(localStatus);
return;
}
uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus);
@ -1240,7 +1241,11 @@ void RBBIRuleScanner::scanSet() {
// Verify that the set contains at least one code point.
//
U_ASSERT(uset.isValid());
if (uset->isEmpty()) {
UnicodeSet tempSet(*uset);
// Use tempSet to handle the case that the UnicodeSet contains
// only string element, such as [{ab}] and treat it as empty set.
tempSet.removeAllStrings();
if (tempSet.isEmpty()) {
// This set is empty.
// Make it an error, because it almost certainly is not what the user wanted.
// Also, avoids having to think about corner cases in the tree manipulation code

View file

@ -144,6 +144,7 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
TESTCASE_AUTO(TestRandomAccess);
TESTCASE_AUTO(TestExternalBreakEngineWithFakeTaiLe);
TESTCASE_AUTO(TestExternalBreakEngineWithFakeYue);
TESTCASE_AUTO(TestBug22579);
TESTCASE_AUTO(TestBug22581);
TESTCASE_AUTO(TestBug22584);
TESTCASE_AUTO(TestBug22585);
@ -5895,6 +5896,14 @@ void RBBITest::TestBug22584() {
RuleBasedBreakIterator bi2(ruleStr, pe, ec);
}
void RBBITest::TestBug22579() {
// Test not causing null deref in cloneTree
UnicodeString ruleStr = u"[{ab}];";
UParseError pe {};
UErrorCode ec {U_ZERO_ERROR};
RuleBasedBreakIterator bi(ruleStr, pe, ec);
}
void RBBITest::TestBug22581() {
// Test duplicate variable setting will not leak the rule compilation
UnicodeString ruleStr = u"$foo=[abc]; $foo=[xyz]; $foo;";

View file

@ -98,6 +98,7 @@ public:
void TestRandomAccess();
void TestExternalBreakEngineWithFakeTaiLe();
void TestExternalBreakEngineWithFakeYue();
void TestBug22579();
void TestBug22581();
void TestBug22584();
void TestBug22585();

View file

@ -1062,7 +1062,11 @@ class RBBIRuleScanner {
// Verify that the set contains at least one code point.
//
if (uset.isEmpty()) {
// Use tempSet to handle the case that the UnicodeSet contains
// only string element, such as [{ab}] and treat it as empty set.
UnicodeSet tempSet = new UnicodeSet(uset);
tempSet.removeAllStrings();
if (tempSet.isEmpty()) {
// This set is empty.
// Make it an error, because it almost certainly is not what the user wanted.
// Also, avoids having to think about corner cases in the tree manipulation code

View file

@ -946,6 +946,20 @@ public class RBBITest extends CoreTestFmwk {
assertEquals("Rules does not match", rules, bi.toString());
}
@Test
public void TestBug22579() {
try {
new RuleBasedBreakIterator("[{ab}];");
fail("TestBug22579: RuleBasedBreakIterator() failed to throw an exception with only string in an Unicode set.");
}
catch (IllegalArgumentException e) {
// expected exception with only string inside an Unicode set.
}
catch (Exception e) {
fail("TestBug22579: Unexpected exception while new RuleBasedBreakIterator() with only string in an Unicode Set: " + e);
}
}
@Test
public void TestBug22585() {
try {