mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 06:25:30 +00:00
ICU-20391 Fix regexp crash with nested look-behinds, from fuzz testing.
This commit is contained in:
parent
14eb026570
commit
d685cacd9b
2 changed files with 19 additions and 14 deletions
|
@ -3463,7 +3463,6 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) {
|
|||
U_ASSERT(start <= end);
|
||||
U_ASSERT(end < fRXPat->fCompiledPat->size());
|
||||
|
||||
|
||||
int32_t loc;
|
||||
int32_t op;
|
||||
int32_t opType;
|
||||
|
@ -3672,7 +3671,7 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) {
|
|||
|
||||
case URX_CTR_LOOP:
|
||||
case URX_CTR_LOOP_NG:
|
||||
// These opcodes will be skipped over by code for URX_CRT_INIT.
|
||||
// These opcodes will be skipped over by code for URX_CTR_INIT.
|
||||
// We shouldn't encounter them here.
|
||||
UPRV_UNREACHABLE;
|
||||
|
||||
|
@ -3700,21 +3699,15 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) {
|
|||
{
|
||||
// Look-behind. Scan forward until the matching look-around end,
|
||||
// without processing the look-behind block.
|
||||
int32_t depth = 0;
|
||||
for (;;) {
|
||||
loc++;
|
||||
int32_t dataLoc = URX_VAL(op);
|
||||
for (loc = loc + 1; loc < end; ++loc) {
|
||||
op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
|
||||
if (URX_TYPE(op) == URX_LA_START || URX_TYPE(op) == URX_LB_START) {
|
||||
depth++;
|
||||
int32_t opType = URX_TYPE(op);
|
||||
if ((opType == URX_LA_END || opType == URX_LBN_END) && (URX_VAL(op) == dataLoc)) {
|
||||
break;
|
||||
}
|
||||
if (URX_TYPE(op) == URX_LA_END || URX_TYPE(op)==URX_LBN_END) {
|
||||
if (depth == 0) {
|
||||
break;
|
||||
}
|
||||
depth--;
|
||||
}
|
||||
U_ASSERT(loc < end);
|
||||
}
|
||||
U_ASSERT(loc < end);
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
12
icu4c/source/test/testdata/regextst.txt
vendored
12
icu4c/source/test/testdata/regextst.txt
vendored
|
@ -1439,6 +1439,8 @@
|
|||
"[^\u0000-\U0010ffff]" "a"
|
||||
"[^[^\u0000-\U0010ffff]]" "<0>a</0>"
|
||||
|
||||
"This is a string with (?:one |two |three )endings" "<0>This is a string with two endings</0>"
|
||||
|
||||
# Bug ICU-20544. Similar to 20385, above. Assertion failure with a negative look-behind assertion containing
|
||||
# a set with no contents. Look-behind pattern includes more than just the empty set.
|
||||
|
||||
|
@ -1446,9 +1448,19 @@
|
|||
"(?<![^\u0000-\U0010ffff]c)" "<0></0>abc"
|
||||
"(?<=[^[^]]†)" "abc" # Problem also exists w positive look-behind
|
||||
|
||||
# Bug ICU-20391. Crash in computation of minimum match length with nested look-around patterns.
|
||||
#
|
||||
"(?<=(?<=((?=)){0}+)" E "aaa"
|
||||
"(?<=(?<=((?=)){0}+))" "<0></0>"
|
||||
"(?<=c(?<=b((?=a)){1}+))" "aaa"
|
||||
"abc(?=de(?=f))...g" "<0>abcdefg</0>"
|
||||
"abc(?=de(?=f))...g" "abcdxfg"
|
||||
|
||||
|
||||
# Random debugging, Temporary
|
||||
#
|
||||
|
||||
|
||||
#
|
||||
# Regexps from http://www.regexlib.com
|
||||
#
|
||||
|
|
Loading…
Add table
Reference in a new issue