ICU-20359 Fix stack overflow in Regex Pattern Compile.

This commit is contained in:
Andy Heninger 2019-03-01 16:49:21 -08:00
parent 3166381f88
commit e559b30309
3 changed files with 36 additions and 4 deletions

View file

@ -4010,7 +4010,7 @@ UChar32 RegexCompile::peekCharLL() {
//
//------------------------------------------------------------------------------
void RegexCompile::nextChar(RegexPatternChar &c) {
tailRecursion:
fScanIndex = UTEXT_GETNATIVEINDEX(fRXPat->fPattern);
c.fChar = nextCharLL();
c.fQuoted = FALSE;
@ -4021,7 +4021,9 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
c.fChar == (UChar32)-1) {
fQuoteMode = FALSE; // Exit quote mode,
nextCharLL(); // discard the E
nextChar(c); // recurse to get the real next char
// nextChar(c); // recurse to get the real next char
goto tailRecursion; // Note: fuzz testing produced testcases that
// resulted in stack overflow here.
}
}
else if (fInBackslashQuote) {
@ -4139,8 +4141,10 @@ void RegexCompile::nextChar(RegexPatternChar &c) {
else if (peekCharLL() == chQ) {
// "\Q" enter quote mode, which will continue until "\E"
fQuoteMode = TRUE;
nextCharLL(); // discard the 'Q'.
nextChar(c); // recurse to get the real next char.
nextCharLL(); // discard the 'Q'.
// nextChar(c); // recurse to get the real next char.
goto tailRecursion; // Note: fuzz testing produced test cases that
// resulted in stack overflow here.
}
else
{

View file

@ -104,6 +104,7 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch
TESTCASE_AUTO(TestBug12884);
TESTCASE_AUTO(TestBug13631);
TESTCASE_AUTO(TestBug13632);
TESTCASE_AUTO(TestBug20359);
TESTCASE_AUTO_END;
}
@ -5851,4 +5852,30 @@ void RegexTest::TestBug13632() {
uregex_close(re);
}
void RegexTest::TestBug20359() {
// The bug was stack overflow while parsing a pattern with a huge number of adjacent \Q\E
// pairs. (Enter and exit pattern literal quote mode). Logic was correct.
// Changed implementation to loop instead of recursing.
UnicodeString pattern;
for (int i=0; i<50000; ++i) {
pattern += u"\\Q\\E";
}
pattern += u"x";
UErrorCode status = U_ZERO_ERROR;
LocalURegularExpressionPointer re(uregex_open(pattern.getBuffer(), pattern.length(),
0, nullptr, &status));
assertSuccess(WHERE, status);
// We have passed the point where the bug crashed. The following is a small sanity
// check that the pattern works, that all the \Q\E\Q\E... didn't cause other problems.
uregex_setText(re.getAlias(), u"abcxyz", -1, &status);
assertSuccess(WHERE, status);
assertTrue(WHERE, uregex_find(re.getAlias(), 0, &status));
assertEquals(WHERE, 3, uregex_start(re.getAlias(), 0, &status));
assertSuccess(WHERE, status);
}
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */

View file

@ -59,6 +59,7 @@ public:
virtual void TestBug12884();
virtual void TestBug13631();
virtual void TestBug13632();
virtual void TestBug20359();
// The following functions are internal to the regexp tests.
virtual void assertUText(const char *expected, UText *actual, const char *file, int line);