ICU-20391 Fix regexp crash with nested look-behinds, from fuzz testing.

This commit is contained in:
Andy Heninger 2019-02-04 10:50:08 -08:00
parent 14eb026570
commit d685cacd9b
2 changed files with 19 additions and 14 deletions

View file

@ -3463,7 +3463,6 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) {
U_ASSERT(start <= end);
U_ASSERT(end < fRXPat->fCompiledPat->size());
int32_t loc;
int32_t op;
int32_t opType;
@ -3672,7 +3671,7 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) {
case URX_CTR_LOOP:
case URX_CTR_LOOP_NG:
// These opcodes will be skipped over by code for URX_CRT_INIT.
// These opcodes will be skipped over by code for URX_CTR_INIT.
// We shouldn't encounter them here.
UPRV_UNREACHABLE;
@ -3700,21 +3699,15 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) {
{
// Look-behind. Scan forward until the matching look-around end,
// without processing the look-behind block.
int32_t depth = 0;
for (;;) {
loc++;
int32_t dataLoc = URX_VAL(op);
for (loc = loc + 1; loc < end; ++loc) {
op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
if (URX_TYPE(op) == URX_LA_START || URX_TYPE(op) == URX_LB_START) {
depth++;
int32_t opType = URX_TYPE(op);
if ((opType == URX_LA_END || opType == URX_LBN_END) && (URX_VAL(op) == dataLoc)) {
break;
}
if (URX_TYPE(op) == URX_LA_END || URX_TYPE(op)==URX_LBN_END) {
if (depth == 0) {
break;
}
depth--;
}
U_ASSERT(loc < end);
}
U_ASSERT(loc < end);
}
break;

View file

@ -1439,6 +1439,8 @@
"[^\u0000-\U0010ffff]" "a"
"[^[^\u0000-\U0010ffff]]" "<0>a</0>"
"This is a string with (?:one |two |three )endings" "<0>This is a string with two endings</0>"
# Bug ICU-20544. Similar to 20385, above. Assertion failure with a negative look-behind assertion containing
# a set with no contents. Look-behind pattern includes more than just the empty set.
@ -1446,9 +1448,19 @@
"(?<![^\u0000-\U0010ffff]c)" "<0></0>abc"
"(?<=[^[^]]†)" "abc" # Problem also exists w positive look-behind
# Bug ICU-20391. Crash in computation of minimum match length with nested look-around patterns.
#
"(?<=(?<=((?=)){0}+)" E "aaa"
"(?<=(?<=((?=)){0}+))" "<0></0>"
"(?<=c(?<=b((?=a)){1}+))" "aaa"
"abc(?=de(?=f))...g" "<0>abcdefg</0>"
"abc(?=de(?=f))...g" "abcdxfg"
# Random debugging, Temporary
#
#
# Regexps from http://www.regexlib.com
#