diff --git a/icu4c/source/i18n/regexcmp.cpp b/icu4c/source/i18n/regexcmp.cpp index e27c3ca597f..707e212edee 100644 --- a/icu4c/source/i18n/regexcmp.cpp +++ b/icu4c/source/i18n/regexcmp.cpp @@ -3463,7 +3463,6 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) { U_ASSERT(start <= end); U_ASSERT(end < fRXPat->fCompiledPat->size()); - int32_t loc; int32_t op; int32_t opType; @@ -3672,7 +3671,7 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) { case URX_CTR_LOOP: case URX_CTR_LOOP_NG: - // These opcodes will be skipped over by code for URX_CRT_INIT. + // These opcodes will be skipped over by code for URX_CTR_INIT. // We shouldn't encounter them here. UPRV_UNREACHABLE; @@ -3700,21 +3699,15 @@ int32_t RegexCompile::maxMatchLength(int32_t start, int32_t end) { { // Look-behind. Scan forward until the matching look-around end, // without processing the look-behind block. - int32_t depth = 0; - for (;;) { - loc++; + int32_t dataLoc = URX_VAL(op); + for (loc = loc + 1; loc < end; ++loc) { op = (int32_t)fRXPat->fCompiledPat->elementAti(loc); - if (URX_TYPE(op) == URX_LA_START || URX_TYPE(op) == URX_LB_START) { - depth++; + int32_t opType = URX_TYPE(op); + if ((opType == URX_LA_END || opType == URX_LBN_END) && (URX_VAL(op) == dataLoc)) { + break; } - if (URX_TYPE(op) == URX_LA_END || URX_TYPE(op)==URX_LBN_END) { - if (depth == 0) { - break; - } - depth--; - } - U_ASSERT(loc < end); } + U_ASSERT(loc < end); } break; diff --git a/icu4c/source/test/testdata/regextst.txt b/icu4c/source/test/testdata/regextst.txt index 677f4822177..759a1963a37 100644 --- a/icu4c/source/test/testdata/regextst.txt +++ b/icu4c/source/test/testdata/regextst.txt @@ -1439,6 +1439,8 @@ "[^\u0000-\U0010ffff]" "a" "[^[^\u0000-\U0010ffff]]" "<0>a" +"This is a string with (?:one |two |three )endings" "<0>This is a string with two endings" + # Bug ICU-20544. Similar to 20385, above. Assertion failure with a negative look-behind assertion containing # a set with no contents. Look-behind pattern includes more than just the empty set. @@ -1446,9 +1448,19 @@ "(?abc" "(?<=[^[^]]†)" "abc" # Problem also exists w positive look-behind +# Bug ICU-20391. Crash in computation of minimum match length with nested look-around patterns. +# +"(?<=(?<=((?=)){0}+)" E "aaa" +"(?<=(?<=((?=)){0}+))" "<0>" +"(?<=c(?<=b((?=a)){1}+))" "aaa" +"abc(?=de(?=f))...g" "<0>abcdefg" +"abc(?=de(?=f))...g" "abcdxfg" + + # Random debugging, Temporary # + # # Regexps from http://www.regexlib.com #