ICU-2422 regex test reorg, improved debug options

X-SVN-Rev: 10979
This commit is contained in:
Andy Heninger 2003-02-06 01:55:17 +00:00
parent fddb6a5b0d
commit 5522a8521c
6 changed files with 78 additions and 234 deletions

View file

@ -19,7 +19,7 @@ U_NAMESPACE_BEGIN
//
//#define REGEX_SCAN_DEBUG
#define REGEX_DUMP_DEBUG
//#define REGEX_RUN_DEBUG
#define REGEX_RUN_DEBUG
// End of #defines inteded to be directly set.
#ifdef REGEX_SCAN_DEBUG

View file

@ -36,6 +36,7 @@ RegexMatcher::RegexMatcher(const RegexPattern *pat) {
fPattern = pat;
fPatternOwned = FALSE;
fInput = NULL;
fTraceDebug = FALSE;
UErrorCode status = U_ZERO_ERROR;
fStack = new UVector32(status); // TODO: do something with status.
fData = fSmallData;
@ -51,8 +52,9 @@ RegexMatcher::RegexMatcher(const RegexPattern *pat) {
RegexMatcher::RegexMatcher(const UnicodeString &regexp, const UnicodeString &input,
uint32_t flags, UErrorCode &status) {
UParseError pe;
fPattern = RegexPattern::compile(regexp, flags, pe, status);
fPatternOwned = TRUE;
fPattern = RegexPattern::compile(regexp, flags, pe, status);
fPatternOwned = TRUE;
fTraceDebug = FALSE;
fStack = new UVector32(status);
fData = fSmallData;
if (fPattern->fDataSize > sizeof(fSmallData)/sizeof(int32_t)) {
@ -67,6 +69,7 @@ RegexMatcher::RegexMatcher(const UnicodeString &regexp,
UParseError pe;
fPattern = RegexPattern::compile(regexp, flags, pe, status);
fPatternOwned = TRUE;
fTraceDebug = FALSE;
fStack = new UVector32(status);
fData = fSmallData;
if (fPattern->fDataSize > sizeof(fSmallData)/sizeof(int32_t)) {
@ -478,6 +481,19 @@ REStackFrame *RegexMatcher::resetStack() {
return (REStackFrame *)iFrame;
}
//--------------------------------------------------------------------------------
//
// setTrace
//
//--------------------------------------------------------------------------------
void RegexMatcher::setTrace(UBool state) {
fTraceDebug = state;
}
//--------------------------------------------------------------------------------
//
// start
@ -614,6 +630,7 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
int32_t opValue; // and the operand value.
#ifdef REGEX_RUN_DEBUG
if (fTraceDebug)
{
printf("MatchAt(startIdx=%d)\n", startIdx);
printf("Original Pattern: ");
@ -670,9 +687,11 @@ void RegexMatcher::MatchAt(int32_t startIdx, UErrorCode &status) {
opType = URX_TYPE(op);
opValue = URX_VAL(op);
#ifdef REGEX_RUN_DEBUG
if (fTraceDebug) {
printf("inputIdx=%d inputChar=%c sp=%3d ", fp->fInputIdx,
fInput->char32At(fp->fInputIdx), (int32_t *)fp-fStack->getBuffer());
fPattern->dumpOp(fp->fPatIdx);
}
#endif
fp->fPatIdx++;
@ -1227,11 +1246,15 @@ breakFromLoop:
fLastMatchEnd = fMatchEnd;
fMatchStart = startIdx;
fMatchEnd = fp->fInputIdx;
REGEX_RUN_DEBUG_PRINTF("Match. start=%d end=%d\n\n", fMatchStart, fMatchEnd);
if (fTraceDebug) {
REGEX_RUN_DEBUG_PRINTF("Match. start=%d end=%d\n\n", fMatchStart, fMatchEnd);
}
}
else
{
REGEX_RUN_DEBUG_PRINTF("No match\n\n");
if (fTraceDebug) {
REGEX_RUN_DEBUG_PRINTF("No match\n\n");
}
}
fFrame = fp; // The active stack frame when the engine stopped.

View file

@ -665,6 +665,14 @@ public:
virtual UnicodeString &appendTail(UnicodeString &dest);
/**
* setTrace Debug function, enable/disable tracing of the matching engine.
* @internal
*/
void setTrace(UBool state);
/**
* ICU "poor man's RTTI", returns a UClassID for the actual class.
*
@ -718,6 +726,8 @@ private:
int32_t *fData; // Data area for use by the compiled pattern.
int32_t fSmallData[8]; // Use this for data if it's enough.
UBool fTraceDebug; // Set true for debug tracing of match engine.
/**
* The address of this static class variable serves as this class's ID
* for ICU "poor man's RTTI".

View file

@ -172,12 +172,9 @@ UBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking,
//---------------------------------------------------------------------------
//
// REGEX_FIND Macro + invocation function to simplify writing tests
// regex tests.
// regex_find(pattern, inputString, lineNumber)
//
// usage:
// REGEX_FIND("pattern", "input text");
// REGEX_ERR("pattern", expected status);
// function to simplify writing tests regex tests.
//
// The input text is unescaped. The pattern is not.
// The input text is marked with the expected match positions
@ -188,15 +185,11 @@ UBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking,
//
//---------------------------------------------------------------------------
// REGEX_FIND is invoked via a macro, which allows capturing the source file line
// number for use in error messages.
#define REGEX_FIND(pat, text) regex_find(pat, text, U_ZERO_ERROR, __LINE__);
// Set a value into a UVector at position specified by a decimal number in
// a UnicodeString. This is a utility function needed by the actual test function,
// which follows.
void set(UVector &vec, int val, UnicodeString index) {
static void set(UVector &vec, int val, UnicodeString index) {
UErrorCode status=U_ZERO_ERROR;
int idx = 0;
for (int i=0; i<index.length(); i++) {
@ -208,9 +201,10 @@ void set(UVector &vec, int val, UnicodeString index) {
vec.setElementAt(val, idx);
}
void RegexTest::regex_find(const char *pat, const char *input, UErrorCode expectedStatus, int line) {
UnicodeString pattern(pat);
UnicodeString inputString(input);
void RegexTest::regex_find(const UnicodeString &pattern,
const UnicodeString &flags,
const UnicodeString &inputString,
int line) {
UnicodeString unEscapedInput;
UnicodeString deTaggedInput;
@ -228,13 +222,15 @@ void RegexTest::regex_find(const char *pat, const char *input, UErrorCode expect
//
// Compile the caller's pattern
//
UnicodeString patString(pat);
callerPattern = RegexPattern::compile(patString, 0, pe, status);
if (status != expectedStatus) {
callerPattern = RegexPattern::compile(pattern, 0, pe, status);
if (status != U_ZERO_ERROR) {
errln("Line %d: error %x compiling pattern.", line, status);
goto cleanupAndReturn;
}
// callerPattern->dump();
if (flags.indexOf((UChar)'d') >= 0) {
callerPattern->dump();
}
//
// Find the tags in the input data, remove them, and record the group boundary
@ -266,7 +262,12 @@ void RegexTest::regex_find(const char *pat, const char *input, UErrorCode expect
//
matcher = callerPattern->matcher(deTaggedInput, status);
REGEX_CHECK_STATUS_L(line);
if (flags.indexOf((UChar)'t') >= 0) {
matcher->setTrace(TRUE);
}
isMatch = matcher->find();
matcher->setTrace(FALSE);
//
// Match up the groups from the find() with the groups from the tags
@ -1068,6 +1069,9 @@ void RegexTest::API_Pattern() {
//---------------------------------------------------------------------------
//
// Extended A more thorough check for features of regex patterns
// The test cases are in a separate data file,
// source/tests/testdata/regextst.txt
// A description of the test data format is included in that file.
//
//---------------------------------------------------------------------------
void RegexTest::Extended() {
@ -1090,9 +1094,9 @@ void RegexTest::Extended() {
//
UnicodeString testString(FALSE, testData, len);
RegexMatcher quotedStuffMat("\\s*?([\\'\\\"/])(.+?)\\1", 0, status);
RegexMatcher commentMat ("\\s*?(#.*)?$", 0, status);
RegexMatcher flagsMat ("\\s*?([ixsmdt]*)([:letter:]*)", 0, status);
RegexMatcher quotedStuffMat("\\s*([\\'\\\"/])(.+?)\\1", 0, status);
RegexMatcher commentMat ("\\s*(#.*)?$", 0, status);
RegexMatcher flagsMat ("\\s*([ixsmdt]*)([:letter:]*)", 0, status);
RegexMatcher lineMat("(.*?)\\r?\\n", testString, 0, status);
UnicodeString testPattern; // The pattern for test from the test file.
@ -1128,7 +1132,7 @@ void RegexTest::Extended() {
}
//
// Pull out the pattern field, remove it from the input line.
// Pull out the pattern field, remove it from the test file line.
//
quotedStuffMat.reset(testLine);
if (quotedStuffMat.lookingAt(status)) {
@ -1141,7 +1145,7 @@ void RegexTest::Extended() {
//
// Pull out the flags from the input line.
// Pull out the flags from the test file line.
//
flagsMat.reset(testLine);
flagsMat.lookingAt(status); // Will always match, possibly an empty string.
@ -1172,216 +1176,19 @@ void RegexTest::Extended() {
commentMat.reset(testLine);
if (commentMat.lookingAt(status) == FALSE) {
errln("Line %d: unexpected characters at end of test line.", lineNum);
continue;
}
//
// Run the test
//
regex_find(testPattern, testFlags, matchString, lineNum);
}
}
#if 0
//---------------------------------------------------------------------------
//
// Extended A more thorough check for features of regex patterns
//
//---------------------------------------------------------------------------
void RegexTest::Extended() {
// Capturing parens
REGEX_FIND(".(..).", "<0>a<1>bc</1>d</0>");
REGEX_FIND(".*\\A( +hello)", "<0><1> hello</1></0>");
REGEX_FIND("(hello)|(goodbye)", "<0><1>hello</1></0>");
REGEX_FIND("(hello)|(goodbye)", "<0><2>goodbye</2></0>");
REGEX_FIND("abc( +( inner(X?) +) xyz)", "leading cruft <0>abc<1> <2> inner<3></3> </2> xyz</1></0> cruft");
// Non-capturing parens (?: stuff). Groups, but does not capture.
REGEX_FIND("(?:abc)*(tail)", "<0>abcabcabc<1>tail</1></0>");
// Non-greedy *? quantifier
REGEX_FIND(".*?(abc)", "<0> abx <1>abc</1></0> abc abc abc");
REGEX_FIND(".*(abc)", "<0> abx abc abc abc <1>abc</1></0>");
REGEX_FIND( "((?:abc |xyz )*?)abc ", "<0><1>xyz </1>abc </0>abc abc ");
REGEX_FIND( "((?:abc |xyz )*)abc ", "<0><1>xyz abc abc </1>abc </0>");
// Non-greedy +? quantifier
REGEX_FIND( "(a+?)(a*)", "<0><1>a</1><2>aaaaaaaaaaaa</2></0>");
REGEX_FIND( "(a+)(a*)", "<0><1>aaaaaaaaaaaaa</1><2></2></0>");
REGEX_FIND( "((ab)+?)((ab)*)", "<0><1><2>ab</2></1><3>ababababab<4>ab</4></3></0>");
REGEX_FIND( "((ab)+)((ab)*)", "<0><1>abababababab<2>ab</2></1><3></3></0>");
// Non-greedy ?? quantifier
REGEX_FIND( "(ab)(ab)\?\?(ab)\?\?(ab)\?\?(ab)\?\?c",
"<0><1>ab</1><4>ab</4><5>ab</5>c</0>");
// Unicode Properties as naked elements in a pattern
REGEX_FIND( "\\p{Lu}+", "here we go ... <0>ABC</0> and no more.");
REGEX_FIND( "(\\p{L}+)(\\P{L}*?) (\\p{Zs}*)", "7999<0><1>letters</1><2>4949%^&*(</2> <3> </3></0>");
// \w and \W
REGEX_FIND( "\\w+", " $%^&*( <0>hello123</0>%^&*(");
REGEX_FIND( "\\W+", "<0> $%^&*( </0>hello123%^&*(");
// \A match at beginning of input only.
REGEX_FIND (".*\\Ahello", "<0>hello</0> hello");
REGEX_FIND (".*hello", "<0>hello hello</0>");
REGEX_FIND(".*\\Ahello", "stuff\nhello"); // don't match after embedded new-line.
// \b \B
REGEX_FIND( ".*?\\b(.).*", "<0> $%^&*( <1>h</1>ello123%^&*()gxx</0>");
REGEX_FIND( "\\ba\\b", "-<0>a</0>");
REGEX_FIND("\\by\\b", "xy");
// Finds first chars of up to 5 words
REGEX_FIND( "(?:.*?\\b(\\w))?(?:.*?\\b(\\w))?(?:.*?\\b(\\w))?(?:.*?\\b(\\w))?(?:.*?\\b(\\w))?",
"<0><1>T</1>the <2>q</2>ick <3>b</3>rown <4>f</4></0>ox");
REGEX_FIND( "H.*?((?:\\B.)+)", "<0>H<1>ello</1></0> ");
REGEX_FIND( ".*?((?:\\B.)+).*?((?:\\B.)+).*?((?:\\B.)+)",
"<0>H<1>ello</1> <2> </2>g<3>oodbye</3></0> ");
REGEX_FIND("(?:.*?\\b(.))?(?:.*?\\b(.))?(?:.*?\\b(.))?(?:.*?\\b(.))?(?:.*?\\b(.))?.*",
"<0> \\u0301 \\u0301<1>A</1>\\u0302BC\\u0303\\u0304<2> </2>\\u0305 \\u0306"
"<3>X</3>\\u0307Y\\u0308</0>");
// . does not match new-lines
REGEX_FIND(".", "\\u000a\\u000d\\u0085\\u000c\\u2028\\u2029<0>X</0>\\u000aY");
REGEX_FIND("A.", "A\\u000a "); // no match
// \d for decimal digits
REGEX_FIND("\\d*", "<0>0123456789\\u0660\\u06F9\\u0969\\u0A66\\u1369"
"\\u17E2\\uFF10\\U0001D7CE\\U0001D7FF</0>non-digits");
REGEX_FIND("\\D+", "<0>non digits</0>");
REGEX_FIND("\\D*(\\d*)(\\D*)", "<0>non-digits<1>3456666</1><2>more non digits</2></0>");
// \Q...\E quote mode
REGEX_FIND("hel\\Qlo, worl\\Ed", "<0>hello, world</0>");
REGEX_FIND("\\Q$*^^(*)?\\A\\E(a*)", "<0>$*^^(*)?\\\\A<1>aaaaaaaaaaaaaaa</1></0>");
// \S and \s space characters
REGEX_FIND("\\s+", "not_space<0> \\t \\r \\n \\u3000 \\u2004 \\u2028 \\u2029</0>xyz");
REGEX_FIND("(\\S+).*?(\\S+).*", "<0><1>Not-spaces</1> <2>more-non-spaces</2> </0>");
// \X consume one combining char sequence.
REGEX_FIND("(\\X)?(\\X)?(\\X)?(\\X)?(\\X)?(\\X)?(\\X)?(\\X)?(\\X)?(\\X)?(\\X)?(\\X)?",
"<0><1>A</1><2>B</2><3> </3><4>\\r\\n</4></0>");
REGEX_FIND("(\\X)?(\\X)?(\\X)?(\\X)?(\\X)?(\\X)?(\\X)?(\\X)?(\\X)?(\\X)?(\\X)?(\\X)?",
"<0><1>A\\u0301</1><2>\n</2><3>\\u0305</3><4>a\\u0302\\u0303\\u0304</4></0>");
// ^ matches only at beginning of line
REGEX_FIND(".*^(Hello)", "<0><1>Hello</1></0> Hello Hello Hello Goodbye");
REGEX_FIND(".*(Hello)", "<0>Hello Hello Hello <1>Hello</1></0> Goodbye");
REGEX_FIND(".*^(Hello)", " Hello Hello Hello Hello Goodbye"); // No Match
// $ matches only at end of line, or before a newline preceding the end of line
REGEX_FIND(".*?(Goodbye)$", "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>");
REGEX_FIND(".*?(Goodbye)", "<0>Hello <1>Goodbye</1></0> Goodbye Goodbye");
REGEX_FIND(".*?(Goodbye)$", "Hello Goodbye> Goodbye Goodbye "); // No Match
REGEX_FIND(".*?(Goodbye)$", "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\\n");
REGEX_FIND(".*?(Goodbye)$", "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\\n");
REGEX_FIND(".*?(Goodbye)$", "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\\r\\n");
REGEX_FIND(".*?(Goodbye)$", "Hello Goodbye Goodbye Goodbye\\n\\n"); // No Match
// \Z matches at end of input, like $ with default flags.
REGEX_FIND(".*?(Goodbye)\\Z", "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>");
REGEX_FIND(".*?(Goodbye)", "<0>Hello <1>Goodbye</1></0> Goodbye Goodbye");
REGEX_FIND(".*?(Goodbye)\\Z", "Hello Goodbye> Goodbye Goodbye "); // No Match
REGEX_FIND("here$", "here\\nthe end"); // No Match
REGEX_FIND(".*?(Goodbye)\\Z", "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\\n");
REGEX_FIND(".*?(Goodbye)\\Z", "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\\n");
REGEX_FIND(".*?(Goodbye)\\Z", "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>\\r\\n");
REGEX_FIND(".*?(Goodbye)\\Z", "Hello Goodbye Goodbye Goodbye\\n\\n"); // No Match
// \z matches only at the end of string.
// no special treatment of new lines.
// no dependencies on flag settings.
REGEX_FIND(".*?(Goodbye)\\z", "<0>Hello Goodbye Goodbye <1>Goodbye</1></0>");
REGEX_FIND(".*?(Goodbye)\\z", "Hello Goodbye Goodbye Goodbye "); // No Match
REGEX_FIND("here$", "here\\nthe end"); // No Match
REGEX_FIND(".*?(Goodbye)\\z", "Hello Goodbye Goodbye Goodbye\\n"); // No Match
REGEX_FIND(".*?(Goodbye)\\n\\z", "<0>Hello Goodbye Goodbye <1>Goodbye</1>\\n</0>");
// (?# comment) doesn't muck up pattern
REGEX_FIND("Hello (?# this is a comment) world", " <0>Hello world</0>...");
// Check some implementation corner cases base on the way literal strings are compiled.
REGEX_FIND("A", "<0>A</0>");
REGEX_FIND("AB", "<0>AB</0>ABABAB");
REGEX_FIND("AB+", "<0>ABBB</0>A");
REGEX_FIND("AB+", "<0>AB</0>ABAB");
REGEX_FIND("ABC+", "<0>ABC</0>ABC");
REGEX_FIND("ABC+", "<0>ABCCCC</0>ABC");
REGEX_FIND("(?:ABC)+", "<0>ABCABCABC</0>D");
REGEX_FIND("(?:ABC)DEF+", "<0>ABCDEFFF</0>D");
REGEX_FIND("AB\\.C\\eD\\u0666E", "<0>AB.C\\u001BD\\u0666E</0>F");
// {min,max} iteration qualifier
REGEX_TESTLM("A{3}BC", "AAABC", TRUE, TRUE);
REGEX_FIND("(ABC){2,3}AB", "no matchAB");
REGEX_FIND("(ABC){2,3}AB", "ABCAB");
REGEX_FIND("(ABC){2,3}AB", "<0>ABC<1>ABC</1>AB</0>");
REGEX_FIND("(ABC){2,3}AB", "<0>ABCABC<1>ABC</1>AB</0>");
REGEX_FIND("(ABC){2,3}AB", "<0>ABCABC<1>ABC</1>AB</0>CAB");
REGEX_FIND("(ABC){2}AB", "ABCAB");
REGEX_FIND("(ABC){2}AB", "<0>ABC<1>ABC</1>AB</0>");
REGEX_FIND("(ABC){2}AB", "<0>ABC<1>ABC</1>AB</0>CAB");
REGEX_FIND("(ABC){2}AB", "<0>ABC<1>ABC</1>AB</0>CABCAB");
REGEX_FIND("(ABC){2,}AB", "ABCAB");
REGEX_FIND("(ABC){2,}AB", "<0>ABC<1>ABC</1>AB</0>");
REGEX_FIND("(ABC){2,}AB", "<0>ABCABC<1>ABC</1>AB</0>");
REGEX_FIND("(ABC){2,}AB", "<0>ABCABCABC<1>ABC</1>AB</0>");
REGEX_FIND("X{0,0}ABC", "<0>ABC</0>");
REGEX_FIND("X{0,1}ABC", "<0>ABC</0>");
REGEX_FIND("(?:Hello(!{1,3}) there){1}", "Hello there");
REGEX_FIND("(?:Hello(!{1,3}) there){1}", "<0>Hello<1>!</1> there</0>");
REGEX_FIND("(?:Hello(!{1,3}) there){1}", "<0>Hello<1>!!</1> there</0>");
REGEX_FIND("(?:Hello(!{1,3}) there){1}", "<0>Hello<1>!!!</1> there</0>");
REGEX_FIND("(?:Hello(!{1,3}) there){1}", "Hello!!!! there");
// Nongreedy {min,max}? intervals
REGEX_FIND("(ABC){2,3}?AB", "no matchAB");
REGEX_FIND("(ABC){2,3}?AB", "ABCAB");
REGEX_FIND("(ABC){2,3}?AB", "<0>ABC<1>ABC</1>AB</0>");
REGEX_FIND("(ABC){2,3}?AB", "<0>ABC<1>ABC</1>AB</0>CAB");
REGEX_FIND("(ABC){2,3}?AB", "<0>ABC<1>ABC</1>AB</0>CABCAB");
REGEX_FIND("(ABC){2,3}?AX", "<0>ABCABC<1>ABC</1>AX</0>");
REGEX_FIND("(ABC){2,3}?AX", "ABC<0>ABCABC<1>ABC</1>AX</0>");
// Atomic Grouping
REGEX_FIND("(?>.*)abc", "abcabcabc"); // no match. .* consumed entire string.
REGEX_FIND("(?>(abc{2,4}?))(c*)", "<0><1>abcc</1><2>ccc</2></0>ddd");
REGEX_FIND("(\\.\\d\\d(?>[1-9]?))\\d+", "1.625");
REGEX_FIND("(\\.\\d\\d(?>[1-9]?))\\d+", "1<0><1>.625</1>0</0>");
// Possessive *+
REGEX_FIND("(abc)*+a", "abcabcabc");
REGEX_FIND("(abc)*+a", "<0>abc<1>abc</1>a</0>b");
REGEX_FIND("(a*b)*+a", "<0><1>aaaab</1>a</0>aaa");
// Possessive ?+
REGEX_FIND("c?+ddd", "<0>cddd</0>");
REGEX_FIND("c?+cddd", "cddd");
REGEX_FIND("c?cddd", "<0>cddd</0>");
// Back Reference
REGEX_FIND("(?:ab(..)cd\\1)*", "<0>ab23cd23ab<1>ww</1>cdww</0>abxxcdyy");
REGEX_FIND("ab(?:c|(d?))(\\1)", "<0>ab<1><2></2></1></0>c");
REGEX_FIND("ab(?:c|(d?))(\\1)", "<0>ab<1>d</1><2>d</2></0>");
REGEX_FIND("ab(?:c|(d?))(\\1)", "<0>ab<1></1><2></2></0>e");
REGEX_FIND("ab(?:c|(d?))(\\1)", "<0>ab<1></1><2></2></0>");
}
#endif
//---------------------------------------------------------------------------
//

View file

@ -34,7 +34,8 @@ public:
// The following functions are internal to the regexp tests.
virtual UBool doRegexLMTest(const char *pat, const char *text, UBool looking, UBool match, int line);
virtual void regex_find(const char *pat, const char *input, UErrorCode expectedStatus, int line);
virtual void regex_find(const UnicodeString &pat, const UnicodeString &flags,
const UnicodeString &input, int line);
virtual void regex_err(const char *pat, int32_t errline, int32_t errcol,
UErrorCode expectedStatus, int line);
virtual UChar *ReadAndConvertFile(const char *fileName, int &len, UErrorCode &status);

View file

@ -7,9 +7,10 @@
# <test case> = <pattern> <flags> <match string> [# comment]
# <pattern> = "<regular expression pattern>"
# <match string> = "<tagged string>"
# the quotes on the pattern and match string can be " or ' or /
# <tagged string> = text, with the start and end of each
# capture group tagged with <n>...</n>. The overall match,
# if any is group 0, as in <0>matched text</0>
# if any, is group 0, as in <0>matched text</0>
# <flags> = any combination of
# i case insensitive match
# x free spacing and comments
@ -26,6 +27,7 @@
"(hello)|(goodbye)" "<0><1>hello</1></0>"
"(hello)|(goodbye)" "<0><2>goodbye</2></0>"
"abc( +( inner(X?) +) xyz)" "leading cruft <0>abc<1> <2> inner<3></3> </2> xyz</1></0> cruft"
"\s*([ixsmdt]*)([:letter:]*)" "<0> <1>d</1><2></2></0> "
# Non-capturing parens (?: stuff). Groups, but does not capture.
"(?:abc)*(tail)" "<0>abcabcabc<1>tail</1></0>"
@ -45,7 +47,7 @@
"((ab)+)((ab)*)" "<0><1>abababababab<2>ab</2></1><3></3></0>"
# Non-greedy ?? quantifier
"(ab)(ab)\?\?(ab)\?\?(ab)\?\?(ab)\?\?c" "<0><1>ab</1><4>ab</4><5>ab</5>c</0>"
"(ab)(ab)??(ab)??(ab)??(ab)??c" "<0><1>ab</1><4>ab</4><5>ab</5>c</0>"
# Unicode Properties as naked elements in a pattern
"\p{Lu}+" "here we go ... <0>ABC</0> and no more."
@ -65,8 +67,9 @@
"\ba\b" "-<0>a</0>"
"\by\b" "xy"
# Finds first chars of up to 5 words
# Finds first chars of up to 5 words
"(?:.*?\b(\w))?(?:.*?\b(\w))?(?:.*?\b(\w))?(?:.*?\b(\w))?(?:.*?\b(\w))?" "<0><1>T</1>the <2>q</2>ick <3>b</3>rown <4>f</4></0>ox"
"H.*?((?:\B.)+)" "<0>H<1>ello</1></0> "
".*?((?:\B.)+).*?((?:\B.)+).*?((?:\B.)+)" "<0>H<1>ello</1> <2> </2>g<3>oodbye</3></0> "
@ -83,7 +86,7 @@
# \Q...\E quote mode
"hel\Qlo, worl\Ed" "<0>hello, world</0>"
"\Q$*^^(*)?\A\E(a*)" "<0>$*^^(*)?\A<1>aaaaaaaaaaaaaaa</1></0>"
"\Q$*^^(*)?\A\E(a*)" "<0>$*^^(*)?\\A<1>aaaaaaaaaaaaaaa</1></0>"
# \S and \s space characters
"\s+" "not_space<0> \t \r \n \u3000 \u2004 \u2028 \u2029</0>xyz"