diff --git a/icu4j/src/com/ibm/icu/impl/Utility.java b/icu4j/src/com/ibm/icu/impl/Utility.java index 6ee12d2024b..cce98a1847c 100755 --- a/icu4j/src/com/ibm/icu/impl/Utility.java +++ b/icu4j/src/com/ibm/icu/impl/Utility.java @@ -5,12 +5,14 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/Utility.java,v $ - * $Date: 2001/07/03 16:35:12 $ - * $Revision: 1.6 $ + * $Date: 2001/09/24 19:57:51 $ + * $Revision: 1.7 $ * ***************************************************************************************** */ package com.ibm.util; +import com.ibm.text.UCharacter; +import com.ibm.text.UTF16; public final class Utility { @@ -635,6 +637,140 @@ public final class Utility { return buf.toString(); } + /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */ + static private final char[] UNESCAPE_MAP = { + /*" 0x22, 0x22 */ + /*' 0x27, 0x27 */ + /*? 0x3F, 0x3F */ + /*\ 0x5C, 0x5C */ + /*a*/ 0x61, 0x07, + /*b*/ 0x62, 0x08, + /*f*/ 0x66, 0x0c, + /*n*/ 0x6E, 0x0a, + /*r*/ 0x72, 0x0d, + /*t*/ 0x74, 0x09, + /*v*/ 0x76, 0x0b + }; + + /** + * Convert an escape to a 32-bit code point value. We attempt + * to parallel the icu4c unesacpeAt() function. + * @param offset16 an array containing offset to the character + * after the backslash. Upon return offset16[0] will + * be updated to point after the escape sequence. + * @return character value from 0 to 10FFFF, or -1 on error. + */ + public static int unescapeAt(String s, int[] offset16) { + int c; + int result = 0; + int n = 0; + int minDig = 0; + int maxDig = 0; + int bitsPerDigit = 4; + int dig; + int i; + + /* Check that offset is in range */ + int offset = offset16[0]; + int length = s.length(); + if (offset < 0 || offset >= length) { + return -1; + } + + /* Fetch first UChar after '\\' */ + c = UTF16.charAt(s, offset); + offset += UTF16.getCharCount(c); + + /* Convert hexadecimal and octal escapes */ + switch (c) { + case 'u': + minDig = maxDig = 4; + break; + case 'U': + minDig = maxDig = 8; + break; + case 'x': + minDig = 1; + maxDig = 2; + break; + default: + dig = UCharacter.digit(c, 8); + if (dig >= 0) { + minDig = 1; + maxDig = 3; + n = 1; /* Already have first octal digit */ + bitsPerDigit = 3; + result = dig; + } + break; + } + if (minDig != 0) { + while (offset < length && n < maxDig) { + // TEMPORARY + // TODO: Restore the char32-based code when UCharacter.digit + // is working (Bug 66). + + //c = UTF16.charAt(s, offset); + //dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16); + c = s.charAt(offset); + dig = Character.digit((char)c, (bitsPerDigit == 3) ? 8 : 16); + if (dig < 0) { + break; + } + result = (result << bitsPerDigit) | dig; + //offset += UTF16.getCharCount(c); + ++offset; + ++n; + } + if (n < minDig) { + return -1; + } + offset16[0] = offset; + return result; + } + + /* Convert C-style escapes in table */ + for (i=0; i * "0041". @@ -689,6 +825,16 @@ public final class Utility { return output; } + /** + * Convert a integer to size width (minimum) hex uppercase digits. + * E.g., hex('a', 4, str) => "0041". If the integer requires more + * than width digits, more will be used. + */ + public static String hex(int ch, int width) { + String foo = Integer.toString(ch, 16).toUpperCase(); + return "0000000".substring(foo.length() + 7 - width) + foo; + } + /** * Convert a string to comma-separated groups of 4 hex uppercase * digits. E.g., hex('ab') => "0041,0042". Append the output diff --git a/icu4j/src/com/ibm/util/Utility.java b/icu4j/src/com/ibm/util/Utility.java index f1bd6ac01e3..6f8f980b46c 100755 --- a/icu4j/src/com/ibm/util/Utility.java +++ b/icu4j/src/com/ibm/util/Utility.java @@ -5,12 +5,14 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/util/Attic/Utility.java,v $ - * $Date: 2001/07/03 16:35:12 $ - * $Revision: 1.6 $ + * $Date: 2001/09/24 19:57:51 $ + * $Revision: 1.7 $ * ***************************************************************************************** */ package com.ibm.util; +import com.ibm.text.UCharacter; +import com.ibm.text.UTF16; public final class Utility { @@ -635,6 +637,140 @@ public final class Utility { return buf.toString(); } + /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */ + static private final char[] UNESCAPE_MAP = { + /*" 0x22, 0x22 */ + /*' 0x27, 0x27 */ + /*? 0x3F, 0x3F */ + /*\ 0x5C, 0x5C */ + /*a*/ 0x61, 0x07, + /*b*/ 0x62, 0x08, + /*f*/ 0x66, 0x0c, + /*n*/ 0x6E, 0x0a, + /*r*/ 0x72, 0x0d, + /*t*/ 0x74, 0x09, + /*v*/ 0x76, 0x0b + }; + + /** + * Convert an escape to a 32-bit code point value. We attempt + * to parallel the icu4c unesacpeAt() function. + * @param offset16 an array containing offset to the character + * after the backslash. Upon return offset16[0] will + * be updated to point after the escape sequence. + * @return character value from 0 to 10FFFF, or -1 on error. + */ + public static int unescapeAt(String s, int[] offset16) { + int c; + int result = 0; + int n = 0; + int minDig = 0; + int maxDig = 0; + int bitsPerDigit = 4; + int dig; + int i; + + /* Check that offset is in range */ + int offset = offset16[0]; + int length = s.length(); + if (offset < 0 || offset >= length) { + return -1; + } + + /* Fetch first UChar after '\\' */ + c = UTF16.charAt(s, offset); + offset += UTF16.getCharCount(c); + + /* Convert hexadecimal and octal escapes */ + switch (c) { + case 'u': + minDig = maxDig = 4; + break; + case 'U': + minDig = maxDig = 8; + break; + case 'x': + minDig = 1; + maxDig = 2; + break; + default: + dig = UCharacter.digit(c, 8); + if (dig >= 0) { + minDig = 1; + maxDig = 3; + n = 1; /* Already have first octal digit */ + bitsPerDigit = 3; + result = dig; + } + break; + } + if (minDig != 0) { + while (offset < length && n < maxDig) { + // TEMPORARY + // TODO: Restore the char32-based code when UCharacter.digit + // is working (Bug 66). + + //c = UTF16.charAt(s, offset); + //dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16); + c = s.charAt(offset); + dig = Character.digit((char)c, (bitsPerDigit == 3) ? 8 : 16); + if (dig < 0) { + break; + } + result = (result << bitsPerDigit) | dig; + //offset += UTF16.getCharCount(c); + ++offset; + ++n; + } + if (n < minDig) { + return -1; + } + offset16[0] = offset; + return result; + } + + /* Convert C-style escapes in table */ + for (i=0; i * "0041". @@ -689,6 +825,16 @@ public final class Utility { return output; } + /** + * Convert a integer to size width (minimum) hex uppercase digits. + * E.g., hex('a', 4, str) => "0041". If the integer requires more + * than width digits, more will be used. + */ + public static String hex(int ch, int width) { + String foo = Integer.toString(ch, 16).toUpperCase(); + return "0000000".substring(foo.length() + 7 - width) + foo; + } + /** * Convert a string to comma-separated groups of 4 hex uppercase * digits. E.g., hex('ab') => "0041,0042". Append the output