mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 14:05:32 +00:00
ICU-21492 Fix regex compile assertion failure.
A regex pattern containing nested look-behind blocks could trigger an assertion failure during pattern compilation. The problem was caused by an off-by-one error in the code that computes an upper bound on the match length, needed because look-behind expressions are constrained to not have unbounded match length. Nested look-behind blocks come into play because, when computing the maximum match length of an outer block, any inner look-behind blocks are skipped over - they do not directly contribute to the length matched by the outer block. The problem was in the code that skips over these nested look-behind blocks.
This commit is contained in:
parent
352b481146
commit
f062244cdb
2 changed files with 10 additions and 2 deletions
|
@ -3475,6 +3475,9 @@ int32_t RegexCompile::minMatchLength(int32_t start, int32_t end) {
|
|||
// value may be longer than the actual maximum; it must
|
||||
// never be shorter.
|
||||
//
|
||||
// start, end: the range of the pattern to check.
|
||||
// end is inclusive.
|
||||
//
|
||||
//------------------------------------------------------------------------------
|
||||
int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) {
|
||||
if (U_FAILURE(*fStatus)) {
|
||||
|
@ -3720,14 +3723,14 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) {
|
|||
// Look-behind. Scan forward until the matching look-around end,
|
||||
// without processing the look-behind block.
|
||||
int32_t dataLoc = URX_VAL(op);
|
||||
for (loc = loc + 1; loc < end; ++loc) {
|
||||
for (loc = loc + 1; loc <= end; ++loc) {
|
||||
op = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
|
||||
int32_t opType = URX_TYPE(op);
|
||||
if ((opType == URX_LA_END || opType == URX_LBN_END) && (URX_VAL(op) == dataLoc)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
U_ASSERT(loc < end);
|
||||
U_ASSERT(loc <= end);
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
5
icu4c/source/test/testdata/regextst.txt
vendored
5
icu4c/source/test/testdata/regextst.txt
vendored
|
@ -1497,6 +1497,11 @@
|
|||
#
|
||||
"(?w)\b" v2 "äää<0></0> äää"
|
||||
|
||||
# Bug ICU-21492 Assertion failure with nested look-around expressions.
|
||||
#
|
||||
"(?<=(?:(?<=(?:(?<=(?:(?<=)){2})){3})){4}" E "<0></0>" # orig failure from bug report, w mismatched parens.
|
||||
"(?:(?<=(?:(?<=)){2}))" "<0></0>" # Simplified case, with a valid pattern.
|
||||
|
||||
# Random debugging, Temporary
|
||||
#
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue