ICU-2325 make unescape recognize \e \cX

X-SVN-Rev: 10138
This commit is contained in:
Alan Liu 2002-11-01 14:36:38 +00:00
parent 10bc681d20
commit 69ede390df
4 changed files with 31 additions and 13 deletions

View file

@ -2785,11 +2785,12 @@ public:
* \Uhhhhhhhh 8 hex digits
* \xhh 1-2 hex digits
* \ooo 1-3 octal digits; o in [0-7]
* \cX control-X; X is masked with 0x1F
*
* as well as the standard ANSI C escapes:
*
* \a => U+0007, \b => U+0008, \t => U+0009, \n => U+000A,
* \v => U+000B, \f => U+000C, \r => U+000D,
* \v => U+000B, \f => U+000C, \r => U+000D, \e => U+001B,
* \" => U+0022, \' => U+0027, \? => U+003F, \\ => U+005C
*
* Anything else following a backslash is generically escaped. For

View file

@ -884,11 +884,12 @@ u_memrchr32(const UChar *s, UChar32 c, int32_t count);
* \Uhhhhhhhh 8 hex digits
* \xhh 1-2 hex digits
* \ooo 1-3 octal digits; o in [0-7]
* \cX control-X; X is masked with 0x1F
*
* as well as the standard ANSI C escapes:
*
* \a => U+0007, \b => U+0008, \t => U+0009, \n => U+000A,
* \v => U+000B, \f => U+000C, \r => U+000D,
* \v => U+000B, \f => U+000C, \r => U+000D, \e => U+001B,
* \" => U+0022, \' => U+0027, \? => U+003F, \\ => U+005C
*
* Anything else following a backslash is generically escaped. For

View file

@ -1273,6 +1273,7 @@ static const UChar UNESCAPE_MAP[] = {
/*\ 0x5C, 0x5C */
/*a*/ 0x61, 0x07,
/*b*/ 0x62, 0x08,
/*e*/ 0x65, 0x1b,
/*f*/ 0x66, 0x0c,
/*n*/ 0x6E, 0x0a,
/*r*/ 0x72, 0x0d,
@ -1379,6 +1380,19 @@ u_unescapeAt(UNESCAPE_CHAR_AT charAt,
}
}
/* Map \cX to control-X: X & 0x1F */
if (c == 0x0063 /*'c'*/ && *offset < length) {
c = charAt((*offset)++, context);
if (UTF_IS_FIRST_SURROGATE(c) && *offset < length) {
UChar c2 = charAt(*offset, context);
if (UTF_IS_SECOND_SURROGATE(c2)) {
++(*offset);
c = (UChar) UTF16_GET_PAIR_VALUE(c, c2); /* [sic] */
}
}
return 0x1F & c;
}
/* If no special forms are recognized, then consider
* the backslash to generically escape the next character.
* Deal with surrogate pairs. */

View file

@ -1067,28 +1067,30 @@ static void TestStringCopy()
static void
TestUnescape() {
static UChar buffer[200];
static const char* input =
"Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\e\\cC\\n";
static const UChar expect[]={
0x53, 0x63, 0x68, 0xf6, 0x6e, 0x65, 0x73, 0x20, 0x41, 0x75, 0x74, 0x6f, 0x3a, 0x20,
0x20ac, 0x20, 0x31, 0x31, 0x32, 0x34, 0x30, 0x2e, 0x0c,
0x50, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x73, 0x20,
0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x0a, 0
0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x1b, 0x03, 0x0a, 0
};
static const int32_t explength = sizeof(expect)/sizeof(expect[0])-1;
int32_t length;
/* test u_unescape() */
length=u_unescape(
"Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\n",
buffer, sizeof(buffer)/sizeof(buffer[0]));
if(length!=44 || u_strcmp(buffer, expect)!=0) {
log_err("failure in u_unescape(): length %d!=45 and/or incorrect result string\n", length);
length=u_unescape(input, buffer, sizeof(buffer)/sizeof(buffer[0]));
if(length!=explength || u_strcmp(buffer, expect)!=0) {
log_err("failure in u_unescape(): length %d!=%d and/or incorrect result string\n", length,
explength);
}
/* try preflighting */
length=u_unescape(
"Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\n",
NULL, sizeof(buffer)/sizeof(buffer[0]));
if(length!=44 || u_strcmp(buffer, expect)!=0) {
log_err("failure in u_unescape(preflighting): length %d!=45\n", length);
length=u_unescape(input, NULL, sizeof(buffer)/sizeof(buffer[0]));
if(length!=explength || u_strcmp(buffer, expect)!=0) {
log_err("failure in u_unescape(preflighting): length %d!=%d\n", length, explength);
}
/* ### TODO: test u_unescapeAt() */