mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-2411 Regexp, support for \x hex escapes in patterns added.
X-SVN-Rev: 11862
This commit is contained in:
parent
800f4020cb
commit
efc59e36a9
7 changed files with 30 additions and 31 deletions
|
@ -1131,12 +1131,6 @@ UBool RegexCompile::doParseActions(EParseAction action)
|
|||
fRXPat->fCompiledPat->addElement(URX_BUILD(URX_BACKSLASH_X, 0), *fStatus);
|
||||
break;
|
||||
|
||||
case doBackslashx: // \x{abcd} alternate hex format
|
||||
// TODO: this is waiting for a decision on adding \x to unescape.
|
||||
error(U_REGEX_UNIMPLEMENTED);
|
||||
break;
|
||||
|
||||
|
||||
|
||||
case doBackslashZ:
|
||||
fRXPat->fCompiledPat->addElement(URX_BUILD(URX_DOLLAR, 0), *fStatus);
|
||||
|
|
|
@ -37,7 +37,6 @@ enum Regex_PatternParseAction {
|
|||
doPossesiveStar,
|
||||
doMismatchedParenErr,
|
||||
doOpenLookBehind,
|
||||
doBackslashx,
|
||||
doBackslashz,
|
||||
doIntervalError,
|
||||
doStar,
|
||||
|
@ -117,7 +116,7 @@ static const struct RegexTableEl gRuleParseStateTable[] = {
|
|||
, {doOrOperator, 124 /* | */, 2,0, TRUE} // 10
|
||||
, {doCloseParen, 41 /* ) */, 255,0, TRUE} // 11
|
||||
, {doPatFinish, 253, 2,0, FALSE} // 12
|
||||
, {doRuleError, 255, 101,0, FALSE} // 13
|
||||
, {doRuleError, 255, 100,0, FALSE} // 13
|
||||
, {doNOP, 42 /* * */, 57,0, TRUE} // 14 expr-quant
|
||||
, {doNOP, 43 /* + */, 60,0, TRUE} // 15
|
||||
, {doNOP, 63 /* ? */, 63,0, TRUE} // 16
|
||||
|
@ -144,14 +143,14 @@ static const struct RegexTableEl gRuleParseStateTable[] = {
|
|||
, {doBeginMatchMode, 115 /* s */, 49,0, FALSE} // 37
|
||||
, {doBeginMatchMode, 120 /* x */, 49,0, FALSE} // 38
|
||||
, {doBeginMatchMode, 45 /* - */, 49,0, FALSE} // 39
|
||||
, {doConditionalExpr, 40 /* ( */, 101,0, TRUE} // 40
|
||||
, {doPerlInline, 123 /* { */, 101,0, TRUE} // 41
|
||||
, {doBadOpenParenType, 255, 101,0, FALSE} // 42
|
||||
, {doConditionalExpr, 40 /* ( */, 100,0, TRUE} // 40
|
||||
, {doPerlInline, 123 /* { */, 100,0, TRUE} // 41
|
||||
, {doBadOpenParenType, 255, 100,0, FALSE} // 42
|
||||
, {doOpenLookBehind, 61 /* = */, 2, 20, TRUE} // 43 open-paren-lookbehind
|
||||
, {doOpenLookBehindNeg, 33 /* ! */, 2, 20, TRUE} // 44
|
||||
, {doBadOpenParenType, 255, 101,0, FALSE} // 45
|
||||
, {doBadOpenParenType, 255, 100,0, FALSE} // 45
|
||||
, {doNOP, 41 /* ) */, 255,0, TRUE} // 46 paren-comment
|
||||
, {doMismatchedParenErr, 253, 101,0, FALSE} // 47
|
||||
, {doMismatchedParenErr, 253, 100,0, FALSE} // 47
|
||||
, {doNOP, 255, 46,0, TRUE} // 48
|
||||
, {doMatchMode, 105 /* i */, 49,0, TRUE} // 49 paren-flag
|
||||
, {doMatchMode, 109 /* m */, 49,0, TRUE} // 50
|
||||
|
@ -160,7 +159,7 @@ static const struct RegexTableEl gRuleParseStateTable[] = {
|
|||
, {doMatchMode, 45 /* - */, 49,0, TRUE} // 53
|
||||
, {doSetMatchMode, 41 /* ) */, 2,0, TRUE} // 54
|
||||
, {doMatchModeParen, 58 /* : */, 2, 14, TRUE} // 55
|
||||
, {doNOP, 255, 101,0, FALSE} // 56
|
||||
, {doNOP, 255, 100,0, FALSE} // 56
|
||||
, {doNGStar, 63 /* ? */, 20,0, TRUE} // 57 quant-star
|
||||
, {doPossesiveStar, 43 /* + */, 20,0, TRUE} // 58
|
||||
, {doStar, 255, 20,0, FALSE} // 59
|
||||
|
@ -172,14 +171,14 @@ static const struct RegexTableEl gRuleParseStateTable[] = {
|
|||
, {doOpt, 255, 20,0, FALSE} // 65
|
||||
, {doNOP, 129, 66,0, TRUE} // 66 interval-open
|
||||
, {doNOP, 128, 69,0, FALSE} // 67
|
||||
, {doIntervalError, 255, 101,0, FALSE} // 68
|
||||
, {doIntervalError, 255, 100,0, FALSE} // 68
|
||||
, {doIntevalLowerDigit, 128, 69,0, TRUE} // 69 interval-lower
|
||||
, {doNOP, 44 /* , */, 73,0, TRUE} // 70
|
||||
, {doIntervalSame, 125 /* } */, 76,0, TRUE} // 71
|
||||
, {doIntervalError, 255, 101,0, FALSE} // 72
|
||||
, {doIntervalError, 255, 100,0, FALSE} // 72
|
||||
, {doIntervalUpperDigit, 128, 73,0, TRUE} // 73 interval-upper
|
||||
, {doNOP, 125 /* } */, 76,0, TRUE} // 74
|
||||
, {doIntervalError, 255, 101,0, FALSE} // 75
|
||||
, {doIntervalError, 255, 100,0, FALSE} // 75
|
||||
, {doNGInterval, 63 /* ? */, 20,0, TRUE} // 76 interval-type
|
||||
, {doPossesiveInterval, 43 /* + */, 20,0, TRUE} // 77
|
||||
, {doInterval, 255, 20,0, FALSE} // 78
|
||||
|
@ -198,14 +197,13 @@ static const struct RegexTableEl gRuleParseStateTable[] = {
|
|||
, {doBackslashW, 87 /* W */, 14,0, TRUE} // 91
|
||||
, {doBackslashw, 119 /* w */, 14,0, TRUE} // 92
|
||||
, {doBackslashX, 88 /* X */, 14,0, TRUE} // 93
|
||||
, {doBackslashx, 120 /* x */, 14,0, TRUE} // 94
|
||||
, {doBackslashZ, 90 /* Z */, 2,0, TRUE} // 95
|
||||
, {doBackslashz, 122 /* z */, 2,0, TRUE} // 96
|
||||
, {doOctal, 48 /* 0 */, 14,0, TRUE} // 97
|
||||
, {doBackRef, 128, 14,0, TRUE} // 98
|
||||
, {doEscapeError, 253, 101,0, FALSE} // 99
|
||||
, {doLiteralChar, 255, 14,0, TRUE} // 100
|
||||
, {doExit, 255, 101,0, TRUE} // 101 errorDeath
|
||||
, {doBackslashZ, 90 /* Z */, 2,0, TRUE} // 94
|
||||
, {doBackslashz, 122 /* z */, 2,0, TRUE} // 95
|
||||
, {doOctal, 48 /* 0 */, 14,0, TRUE} // 96
|
||||
, {doBackRef, 128, 14,0, TRUE} // 97
|
||||
, {doEscapeError, 253, 100,0, FALSE} // 98
|
||||
, {doLiteralChar, 255, 14,0, TRUE} // 99
|
||||
, {doExit, 255, 100,0, TRUE} // 100 errorDeath
|
||||
};
|
||||
static const char * const RegexStateNames[] = { 0,
|
||||
"start",
|
||||
|
@ -306,7 +304,6 @@ static const char * const RegexStateNames[] = { 0,
|
|||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
"errorDeath",
|
||||
0};
|
||||
|
|
|
@ -246,7 +246,6 @@ backslash:
|
|||
'W' n expr-quant doBackslashW
|
||||
'w' n expr-quant doBackslashw
|
||||
'X' n expr-quant doBackslashX
|
||||
'x' n expr-quant doBackslashx
|
||||
'Z' n term doBackslashZ
|
||||
'z' n term doBackslashz
|
||||
'0' n expr-quant doOctal
|
||||
|
|
|
@ -68,8 +68,8 @@ static const UnicodeSet *gRuleDigits = NULL;
|
|||
// will handle.
|
||||
//
|
||||
static const UChar gUnescapeCharPattern[] = {
|
||||
// [ a c e f n r t u U ]
|
||||
0x5b, 0x61, 0x63, 0x65, 0x66, 0x6e, 0x72, 0x74, 0x75, 0x55, 0x5d, 0};
|
||||
// [ a c e f n r t u U x ]
|
||||
0x5b, 0x61, 0x63, 0x65, 0x66, 0x6e, 0x72, 0x74, 0x75, 0x55, 0x78, 0x5d, 0};
|
||||
|
||||
|
||||
//
|
||||
|
|
|
@ -768,7 +768,7 @@ public:
|
|||
* field delimiters, is placed in the last destination string.
|
||||
* @param status A reference to a UErrorCode to receive any errors.
|
||||
* @return The number of fields into which the input string was split.
|
||||
* @draft ICU 2.4
|
||||
* @draft ICU 2.6
|
||||
*/
|
||||
virtual int32_t split(const UnicodeString &input,
|
||||
UnicodeString dest[],
|
||||
|
|
|
@ -1232,7 +1232,7 @@ void RegexTest::Extended() {
|
|||
//---------------------------------------------------------------------------
|
||||
void RegexTest::Errors() {
|
||||
// \escape sequences that aren't implemented yet.
|
||||
REGEX_ERR("hex format \\x{abcd} not implemented", 1, 13, U_REGEX_UNIMPLEMENTED);
|
||||
//REGEX_ERR("hex format \\x{abcd} not implemented", 1, 13, U_REGEX_UNIMPLEMENTED);
|
||||
|
||||
// Missing close parentheses
|
||||
REGEX_ERR("Comment (?# with no close", 1, 25, U_REGEX_MISMATCHED_PAREN);
|
||||
|
|
9
icu4c/source/test/testdata/regextst.txt
vendored
9
icu4c/source/test/testdata/regextst.txt
vendored
|
@ -328,6 +328,15 @@
|
|||
"ab([0-9]+|[A-Z]+)yz" "---<0>ab<1>AB</1>yz</0>+++"
|
||||
"ab([0-9]+|[A-Z]+)yz" "---<0>ab<1>ABCDE</1>yz</0>+++"
|
||||
|
||||
#
|
||||
# Hex format \x escaping
|
||||
#
|
||||
"ab\x63" "<0>abc</0>"
|
||||
"ab\x09w" "<0>ab\u0009w</0>"
|
||||
"ab\xabcdc" "<0>ab\u00abcdc</0>"
|
||||
"ab\x{abcd}c" "<0>ab\uabcdc</0>"
|
||||
"ab\x{101234}c" "<0>ab\U00101234c</0>"
|
||||
"abα" "<0>abα</0>"
|
||||
|
||||
#
|
||||
# Random debugging, Temporary
|
||||
|
|
Loading…
Add table
Reference in a new issue