ICU-22636 Return U_BRK_RULE_SYNTAX when status number is too large

See #2793
This commit is contained in:
Frank Tang 2024-01-19 07:11:33 +00:00 committed by Frank Yung-Fong Tang
parent 7cec4a9308
commit 9832f48e22
5 changed files with 45 additions and 2 deletions

View file

@ -530,7 +530,13 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
n = fNodeStack[fNodeStackPtr];
uint32_t v = u_charDigitValue(fC.fChar);
U_ASSERT(v < 10);
n->fVal = n->fVal*10 + v;
int64_t updated = static_cast<int64_t>(n->fVal)*10 + v;
// Avoid overflow n->fVal
if (updated > INT32_MAX) {
error(U_BRK_RULE_SYNTAX);
break;
}
n->fVal = static_cast<int32_t>(updated);
break;
}

View file

@ -149,6 +149,7 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha
TESTCASE_AUTO(TestBug22584);
TESTCASE_AUTO(TestBug22585);
TESTCASE_AUTO(TestBug22602);
TESTCASE_AUTO(TestBug22636);
#if U_ENABLE_TRACING
TESTCASE_AUTO(TestTraceCreateCharacter);
@ -5889,6 +5890,19 @@ void RBBITest::TestBug22602() {
RuleBasedBreakIterator bi(rule, pe, ec);
}
void RBBITest::TestBug22636() {
UParseError pe {};
UErrorCode ec {U_ZERO_ERROR};
RuleBasedBreakIterator bi(u"A{77777777777777};", pe, ec);
assertEquals(WHERE, ec, U_BRK_RULE_SYNTAX);
ec = U_ZERO_ERROR;
RuleBasedBreakIterator bi2(u"A{2147483648};", pe, ec);
assertEquals(WHERE, ec, U_BRK_RULE_SYNTAX);
ec = U_ZERO_ERROR;
RuleBasedBreakIterator bi3(u"A{2147483647};", pe, ec);
assertEquals(WHERE, ec, U_ZERO_ERROR);
}
void RBBITest::TestBug22584() {
// Creating a break iterator from a rule consisting of a very long
// literal input string caused a stack overflow when deleting the

View file

@ -103,6 +103,7 @@ public:
void TestBug22584();
void TestBug22585();
void TestBug22602();
void TestBug22636();
#if U_ENABLE_TRACING
void TestTraceCreateCharacter();

View file

@ -412,7 +412,11 @@ class RBBIRuleScanner {
{
n = fNodeStack[fNodeStackPtr];
int v = UCharacter.digit((char) fC.fChar, 10);
n.fVal = n.fVal * 10 + v;
long update = (long)(n.fVal) * 10 + v;
if (update > Integer.MAX_VALUE) {
error(RBBIRuleBuilder.U_BRK_RULE_SYNTAX);
}
n.fVal = (int)(update);
break;
}

View file

@ -1002,6 +1002,24 @@ public class RBBITest extends CoreTestFmwk {
fail("TestBug22602: Unexpected exception while new RuleBasedBreakIterator() with a long string followed by a ';': " + e);
}
}
@Test
public void TestBug22636() {
try {
RuleBasedBreakIterator bi = new RuleBasedBreakIterator("A{77777777777777};");
fail("TestBug22636: new RuleBasedBreakIterator() with a large status value inside {}: should throw IllegalArgumentException");
}
catch (IllegalArgumentException e) {
// expected exception with a large status value inside {}.
}
try {
RuleBasedBreakIterator bi2 = new RuleBasedBreakIterator("A{2147483648};");
fail("TestBug22636: new RuleBasedBreakIterator() with a large status value inside {}: should throw IllegalArgumentException");
}
catch (IllegalArgumentException e) {
// expected exception with a large status value inside {}.
}
RuleBasedBreakIterator bi3 = new RuleBasedBreakIterator("A{2147483647};");
}
/* Test preceding(index) and following(index), with semi-random indexes.
* The random indexes are produced in clusters that are relatively closely spaced,
* to increase the occurrences of hits to the internal break cache.