ICU-3320 Regex, incorrect optimization of patterns beginning with a X{0,*} loop

X-SVN-Rev: 13481
This commit is contained in:
Andy Heninger 2003-10-24 01:01:45 +00:00
parent f1e3bc6923
commit 3c223acda5
4 changed files with 33 additions and 13 deletions

View file

@ -1902,10 +1902,6 @@ SOURCE=.\rematch.cpp
# End Source File
# Begin Source File
SOURCE=.\remtrans.cpp
# End Source File
# Begin Source File
SOURCE=.\repattrn.cpp
# End Source File
# End Group
@ -2026,6 +2022,10 @@ SOURCE=.\rbt_set.h
# End Source File
# Begin Source File
SOURCE=.\remtrans.cpp
# End Source File
# Begin Source File
SOURCE=.\remtrans.h
# End Source File
# Begin Source File

View file

@ -2392,10 +2392,15 @@ void RegexCompile::matchStartType() {
loopEndLoc = URX_VAL(loopEndLoc);
int32_t minLoopCount = fRXPat->fCompiledPat->elementAti(loc+2);
if (minLoopCount == 0) {
loc = loopEndLoc;
} else {
loc+=3; // Skips over operands of CTR_INIT
}
// Min Loop Count of 0, treat like a forward branch and
// move the current minimum length up to the target
// (end of loop) location.
U_ASSERT(loopEndLoc <= end+1);
if (forwardedLength.elementAti(loopEndLoc) > currentLen) {
forwardedLength.setElementAt(currentLen, loopEndLoc);
}
}
loc+=3; // Skips over operands of CTR_INIT
}
atStart = FALSE;
break;

View file

@ -337,9 +337,9 @@ void RegexTest::regex_find(const UnicodeString &pattern,
cleanupAndReturn:
if (failed) {
errln("\"%s\" %s \"%s\"", (const char *)CharString(pattern),
(const char *)CharString(flags),
(const char *)CharString(inputString));
errln("\"%s\" %s \"%s\"", (const char *)CharString(pattern, 0),
(const char *)CharString(flags, 0),
(const char *)CharString(inputString, 0));
// callerPattern->dump();
}
delete parseMatcher;
@ -1816,8 +1816,8 @@ void RegexTest::PerlTests() {
if (expectedS.compare(resultString) != 0) {
errln("Line %d: Incorrect perl expression results. Expected \"%s\"; got \"%s\"",
lineNum, (const char *)CharString(expectedS),
(const char *)CharString(resultString));
lineNum, (const char *)CharString(expectedS, 0),
(const char *)CharString(resultString, 0));
}
delete testMat;

View file

@ -349,7 +349,22 @@
"\ud800\ud800\udc00" "<0>\ud800\U00010000</0>\U00010000\U00010000\U00010001"
"(\ud800)(\udc00)" "\U00010000"
#
# Bug 3225
"1|9" "<0>1</0>"
"1|9" "<0>9</0>"
"1*|9" "<0>1</0>"
"1*|9" "<0></0>9"
"(?:a|ac)d" "<0>acd</0>"
"a|ac" "<0>a</0>c"
#
# Bug 3320
#
"(a([^ ]+)){0,} (c)" "<0><1>a<2>b</2></1> <3>c</3></0> "
"(a([^ ]+))* (c)" "<0><1>a<2>b</2></1> <3>c</3></0> "
#
# Random debugging, Temporary