mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-20359 Fix stack overflow in Regex Pattern Compile.
This commit is contained in:
parent
3166381f88
commit
e559b30309
3 changed files with 36 additions and 4 deletions
|
@ -4010,7 +4010,7 @@ UChar32 RegexCompile::peekCharLL() {
|
|||
//
|
||||
//------------------------------------------------------------------------------
|
||||
void RegexCompile::nextChar(RegexPatternChar &c) {
|
||||
|
||||
tailRecursion:
|
||||
fScanIndex = UTEXT_GETNATIVEINDEX(fRXPat->fPattern);
|
||||
c.fChar = nextCharLL();
|
||||
c.fQuoted = FALSE;
|
||||
|
@ -4021,7 +4021,9 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
|
|||
c.fChar == (UChar32)-1) {
|
||||
fQuoteMode = FALSE; // Exit quote mode,
|
||||
nextCharLL(); // discard the E
|
||||
nextChar(c); // recurse to get the real next char
|
||||
// nextChar(c); // recurse to get the real next char
|
||||
goto tailRecursion; // Note: fuzz testing produced testcases that
|
||||
// resulted in stack overflow here.
|
||||
}
|
||||
}
|
||||
else if (fInBackslashQuote) {
|
||||
|
@ -4139,8 +4141,10 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
|
|||
else if (peekCharLL() == chQ) {
|
||||
// "\Q" enter quote mode, which will continue until "\E"
|
||||
fQuoteMode = TRUE;
|
||||
nextCharLL(); // discard the 'Q'.
|
||||
nextChar(c); // recurse to get the real next char.
|
||||
nextCharLL(); // discard the 'Q'.
|
||||
// nextChar(c); // recurse to get the real next char.
|
||||
goto tailRecursion; // Note: fuzz testing produced test cases that
|
||||
// resulted in stack overflow here.
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -104,6 +104,7 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch
|
|||
TESTCASE_AUTO(TestBug12884);
|
||||
TESTCASE_AUTO(TestBug13631);
|
||||
TESTCASE_AUTO(TestBug13632);
|
||||
TESTCASE_AUTO(TestBug20359);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
@ -5851,4 +5852,30 @@ void RegexTest::TestBug13632() {
|
|||
uregex_close(re);
|
||||
}
|
||||
|
||||
void RegexTest::TestBug20359() {
|
||||
// The bug was stack overflow while parsing a pattern with a huge number of adjacent \Q\E
|
||||
// pairs. (Enter and exit pattern literal quote mode). Logic was correct.
|
||||
// Changed implementation to loop instead of recursing.
|
||||
|
||||
UnicodeString pattern;
|
||||
for (int i=0; i<50000; ++i) {
|
||||
pattern += u"\\Q\\E";
|
||||
}
|
||||
pattern += u"x";
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
LocalURegularExpressionPointer re(uregex_open(pattern.getBuffer(), pattern.length(),
|
||||
0, nullptr, &status));
|
||||
assertSuccess(WHERE, status);
|
||||
|
||||
// We have passed the point where the bug crashed. The following is a small sanity
|
||||
// check that the pattern works, that all the \Q\E\Q\E... didn't cause other problems.
|
||||
|
||||
uregex_setText(re.getAlias(), u"abcxyz", -1, &status);
|
||||
assertSuccess(WHERE, status);
|
||||
assertTrue(WHERE, uregex_find(re.getAlias(), 0, &status));
|
||||
assertEquals(WHERE, 3, uregex_start(re.getAlias(), 0, &status));
|
||||
assertSuccess(WHERE, status);
|
||||
}
|
||||
|
||||
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
|
||||
|
|
|
@ -59,6 +59,7 @@ public:
|
|||
virtual void TestBug12884();
|
||||
virtual void TestBug13631();
|
||||
virtual void TestBug13632();
|
||||
virtual void TestBug20359();
|
||||
|
||||
// The following functions are internal to the regexp tests.
|
||||
virtual void assertUText(const char *expected, UText *actual, const char *file, int line);
|
||||
|
|
Loading…
Add table
Reference in a new issue