ICU-2325 make unescape recognize \e \cX

X-SVN-Rev: 10138
2025-04-10 07:39:16 +00:00 · 2002-11-01 14:36:38 +00:00 · 2002-11-01 14:36:38 +00:00 · 69ede390df
commit 69ede390df
parent 10bc681d20
4 changed files with 31 additions and 13 deletions
--- a/icu4c/source/common/unicode/unistr.h
+++ b/icu4c/source/common/unicode/unistr.h
@ -2785,11 +2785,12 @@ public:
   * \Uhhhhhhhh   8 hex digits
   * \xhh         1-2 hex digits
   * \ooo         1-3 octal digits; o in [0-7]
+   * \cX          control-X; X is masked with 0x1F
   *
   * as well as the standard ANSI C escapes:
   *
   * \a => U+0007, \b => U+0008, \t => U+0009, \n => U+000A,
-   * \v => U+000B, \f => U+000C, \r => U+000D,
+   * \v => U+000B, \f => U+000C, \r => U+000D, \e => U+001B,
   * \" => U+0022, \' => U+0027, \? => U+003F, \\ => U+005C
   *
   * Anything else following a backslash is generically escaped.  For
--- a/icu4c/source/common/unicode/ustring.h
+++ b/icu4c/source/common/unicode/ustring.h
@ -884,11 +884,12 @@ u_memrchr32(const UChar *s, UChar32 c, int32_t count);
 * \Uhhhhhhhh   8 hex digits
 * \xhh         1-2 hex digits
 * \ooo         1-3 octal digits; o in [0-7]
+ * \cX          control-X; X is masked with 0x1F
 *
 * as well as the standard ANSI C escapes:
 *
 * \a => U+0007, \b => U+0008, \t => U+0009, \n => U+000A,
- * \v => U+000B, \f => U+000C, \r => U+000D,
+ * \v => U+000B, \f => U+000C, \r => U+000D, \e => U+001B,
 * \" => U+0022, \' => U+0027, \? => U+003F, \\ => U+005C
 *
 * Anything else following a backslash is generically escaped.  For
--- a/icu4c/source/common/ustring.c
+++ b/icu4c/source/common/ustring.c
@ -1273,6 +1273,7 @@ static const UChar UNESCAPE_MAP[] = {
    /*\   0x5C, 0x5C */
    /*a*/ 0x61, 0x07,
    /*b*/ 0x62, 0x08,
+    /*e*/ 0x65, 0x1b,
    /*f*/ 0x66, 0x0c,
    /*n*/ 0x6E, 0x0a,
    /*r*/ 0x72, 0x0d,
@ -1379,6 +1380,19 @@ u_unescapeAt(UNESCAPE_CHAR_AT charAt,
        }
    }

+    /* Map \cX to control-X: X & 0x1F */
+    if (c == 0x0063 /*'c'*/ && *offset < length) {
+        c = charAt((*offset)++, context);
+        if (UTF_IS_FIRST_SURROGATE(c) && *offset < length) {
+            UChar c2 = charAt(*offset, context);
+            if (UTF_IS_SECOND_SURROGATE(c2)) {
+                ++(*offset);
+                c = (UChar) UTF16_GET_PAIR_VALUE(c, c2); /* [sic] */
+            }
+        }
+        return 0x1F & c;
+    }
+
    /* If no special forms are recognized, then consider
     * the backslash to generically escape the next character.
     * Deal with surrogate pairs. */
--- a/icu4c/source/test/cintltst/custrtst.c
+++ b/icu4c/source/test/cintltst/custrtst.c
@ -1067,28 +1067,30 @@ static void TestStringCopy()
 static void
 TestUnescape() {
    static UChar buffer[200];
+    
+    static const char* input =
+        "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\e\\cC\\n";
+
    static const UChar expect[]={
        0x53, 0x63, 0x68, 0xf6, 0x6e, 0x65, 0x73, 0x20, 0x41, 0x75, 0x74, 0x6f, 0x3a, 0x20,
        0x20ac, 0x20, 0x31, 0x31, 0x32, 0x34, 0x30, 0x2e, 0x0c,
        0x50, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x73, 0x20,
-        0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x0a, 0
+        0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x1b, 0x03, 0x0a, 0
    };
+    static const int32_t explength = sizeof(expect)/sizeof(expect[0])-1;
    int32_t length;

    /* test u_unescape() */
-    length=u_unescape(
-        "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\n",
-        buffer, sizeof(buffer)/sizeof(buffer[0]));
-    if(length!=44 || u_strcmp(buffer, expect)!=0) {
-        log_err("failure in u_unescape(): length %d!=45 and/or incorrect result string\n", length);
+    length=u_unescape(input, buffer, sizeof(buffer)/sizeof(buffer[0]));
+    if(length!=explength || u_strcmp(buffer, expect)!=0) {
+        log_err("failure in u_unescape(): length %d!=%d and/or incorrect result string\n", length,
+                explength);
    }

    /* try preflighting */
-    length=u_unescape(
-        "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\n",
-        NULL, sizeof(buffer)/sizeof(buffer[0]));
-    if(length!=44 || u_strcmp(buffer, expect)!=0) {
-        log_err("failure in u_unescape(preflighting): length %d!=45\n", length);
+    length=u_unescape(input, NULL, sizeof(buffer)/sizeof(buffer[0]));
+    if(length!=explength || u_strcmp(buffer, expect)!=0) {
+        log_err("failure in u_unescape(preflighting): length %d!=%d\n", length, explength);
    }

    /* ### TODO: test u_unescapeAt() */