mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-2325 make unescape recognize \e \cX
X-SVN-Rev: 10138
This commit is contained in:
parent
10bc681d20
commit
69ede390df
4 changed files with 31 additions and 13 deletions
|
@ -2785,11 +2785,12 @@ public:
|
|||
* \Uhhhhhhhh 8 hex digits
|
||||
* \xhh 1-2 hex digits
|
||||
* \ooo 1-3 octal digits; o in [0-7]
|
||||
* \cX control-X; X is masked with 0x1F
|
||||
*
|
||||
* as well as the standard ANSI C escapes:
|
||||
*
|
||||
* \a => U+0007, \b => U+0008, \t => U+0009, \n => U+000A,
|
||||
* \v => U+000B, \f => U+000C, \r => U+000D,
|
||||
* \v => U+000B, \f => U+000C, \r => U+000D, \e => U+001B,
|
||||
* \" => U+0022, \' => U+0027, \? => U+003F, \\ => U+005C
|
||||
*
|
||||
* Anything else following a backslash is generically escaped. For
|
||||
|
|
|
@ -884,11 +884,12 @@ u_memrchr32(const UChar *s, UChar32 c, int32_t count);
|
|||
* \Uhhhhhhhh 8 hex digits
|
||||
* \xhh 1-2 hex digits
|
||||
* \ooo 1-3 octal digits; o in [0-7]
|
||||
* \cX control-X; X is masked with 0x1F
|
||||
*
|
||||
* as well as the standard ANSI C escapes:
|
||||
*
|
||||
* \a => U+0007, \b => U+0008, \t => U+0009, \n => U+000A,
|
||||
* \v => U+000B, \f => U+000C, \r => U+000D,
|
||||
* \v => U+000B, \f => U+000C, \r => U+000D, \e => U+001B,
|
||||
* \" => U+0022, \' => U+0027, \? => U+003F, \\ => U+005C
|
||||
*
|
||||
* Anything else following a backslash is generically escaped. For
|
||||
|
|
|
@ -1273,6 +1273,7 @@ static const UChar UNESCAPE_MAP[] = {
|
|||
/*\ 0x5C, 0x5C */
|
||||
/*a*/ 0x61, 0x07,
|
||||
/*b*/ 0x62, 0x08,
|
||||
/*e*/ 0x65, 0x1b,
|
||||
/*f*/ 0x66, 0x0c,
|
||||
/*n*/ 0x6E, 0x0a,
|
||||
/*r*/ 0x72, 0x0d,
|
||||
|
@ -1379,6 +1380,19 @@ u_unescapeAt(UNESCAPE_CHAR_AT charAt,
|
|||
}
|
||||
}
|
||||
|
||||
/* Map \cX to control-X: X & 0x1F */
|
||||
if (c == 0x0063 /*'c'*/ && *offset < length) {
|
||||
c = charAt((*offset)++, context);
|
||||
if (UTF_IS_FIRST_SURROGATE(c) && *offset < length) {
|
||||
UChar c2 = charAt(*offset, context);
|
||||
if (UTF_IS_SECOND_SURROGATE(c2)) {
|
||||
++(*offset);
|
||||
c = (UChar) UTF16_GET_PAIR_VALUE(c, c2); /* [sic] */
|
||||
}
|
||||
}
|
||||
return 0x1F & c;
|
||||
}
|
||||
|
||||
/* If no special forms are recognized, then consider
|
||||
* the backslash to generically escape the next character.
|
||||
* Deal with surrogate pairs. */
|
||||
|
|
|
@ -1067,28 +1067,30 @@ static void TestStringCopy()
|
|||
static void
|
||||
TestUnescape() {
|
||||
static UChar buffer[200];
|
||||
|
||||
static const char* input =
|
||||
"Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\e\\cC\\n";
|
||||
|
||||
static const UChar expect[]={
|
||||
0x53, 0x63, 0x68, 0xf6, 0x6e, 0x65, 0x73, 0x20, 0x41, 0x75, 0x74, 0x6f, 0x3a, 0x20,
|
||||
0x20ac, 0x20, 0x31, 0x31, 0x32, 0x34, 0x30, 0x2e, 0x0c,
|
||||
0x50, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x73, 0x20,
|
||||
0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x0a, 0
|
||||
0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x1b, 0x03, 0x0a, 0
|
||||
};
|
||||
static const int32_t explength = sizeof(expect)/sizeof(expect[0])-1;
|
||||
int32_t length;
|
||||
|
||||
/* test u_unescape() */
|
||||
length=u_unescape(
|
||||
"Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\n",
|
||||
buffer, sizeof(buffer)/sizeof(buffer[0]));
|
||||
if(length!=44 || u_strcmp(buffer, expect)!=0) {
|
||||
log_err("failure in u_unescape(): length %d!=45 and/or incorrect result string\n", length);
|
||||
length=u_unescape(input, buffer, sizeof(buffer)/sizeof(buffer[0]));
|
||||
if(length!=explength || u_strcmp(buffer, expect)!=0) {
|
||||
log_err("failure in u_unescape(): length %d!=%d and/or incorrect result string\n", length,
|
||||
explength);
|
||||
}
|
||||
|
||||
/* try preflighting */
|
||||
length=u_unescape(
|
||||
"Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\n",
|
||||
NULL, sizeof(buffer)/sizeof(buffer[0]));
|
||||
if(length!=44 || u_strcmp(buffer, expect)!=0) {
|
||||
log_err("failure in u_unescape(preflighting): length %d!=45\n", length);
|
||||
length=u_unescape(input, NULL, sizeof(buffer)/sizeof(buffer[0]));
|
||||
if(length!=explength || u_strcmp(buffer, expect)!=0) {
|
||||
log_err("failure in u_unescape(preflighting): length %d!=%d\n", length, explength);
|
||||
}
|
||||
|
||||
/* ### TODO: test u_unescapeAt() */
|
||||
|
|
Loading…
Add table
Reference in a new issue