diff --git a/icu4j/src/com/ibm/icu/impl/Utility.java b/icu4j/src/com/ibm/icu/impl/Utility.java
index 6ee12d2024b..cce98a1847c 100755
--- a/icu4j/src/com/ibm/icu/impl/Utility.java
+++ b/icu4j/src/com/ibm/icu/impl/Utility.java
@@ -5,12 +5,14 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/Utility.java,v $
- * $Date: 2001/07/03 16:35:12 $
- * $Revision: 1.6 $
+ * $Date: 2001/09/24 19:57:51 $
+ * $Revision: 1.7 $
*
*****************************************************************************************
*/
package com.ibm.util;
+import com.ibm.text.UCharacter;
+import com.ibm.text.UTF16;
public final class Utility {
@@ -635,6 +637,140 @@ public final class Utility {
return buf.toString();
}
+ /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
+ static private final char[] UNESCAPE_MAP = {
+ /*" 0x22, 0x22 */
+ /*' 0x27, 0x27 */
+ /*? 0x3F, 0x3F */
+ /*\ 0x5C, 0x5C */
+ /*a*/ 0x61, 0x07,
+ /*b*/ 0x62, 0x08,
+ /*f*/ 0x66, 0x0c,
+ /*n*/ 0x6E, 0x0a,
+ /*r*/ 0x72, 0x0d,
+ /*t*/ 0x74, 0x09,
+ /*v*/ 0x76, 0x0b
+ };
+
+ /**
+ * Convert an escape to a 32-bit code point value. We attempt
+ * to parallel the icu4c unesacpeAt() function.
+ * @param offset16 an array containing offset to the character
+ * after the backslash. Upon return offset16[0] will
+ * be updated to point after the escape sequence.
+ * @return character value from 0 to 10FFFF, or -1 on error.
+ */
+ public static int unescapeAt(String s, int[] offset16) {
+ int c;
+ int result = 0;
+ int n = 0;
+ int minDig = 0;
+ int maxDig = 0;
+ int bitsPerDigit = 4;
+ int dig;
+ int i;
+
+ /* Check that offset is in range */
+ int offset = offset16[0];
+ int length = s.length();
+ if (offset < 0 || offset >= length) {
+ return -1;
+ }
+
+ /* Fetch first UChar after '\\' */
+ c = UTF16.charAt(s, offset);
+ offset += UTF16.getCharCount(c);
+
+ /* Convert hexadecimal and octal escapes */
+ switch (c) {
+ case 'u':
+ minDig = maxDig = 4;
+ break;
+ case 'U':
+ minDig = maxDig = 8;
+ break;
+ case 'x':
+ minDig = 1;
+ maxDig = 2;
+ break;
+ default:
+ dig = UCharacter.digit(c, 8);
+ if (dig >= 0) {
+ minDig = 1;
+ maxDig = 3;
+ n = 1; /* Already have first octal digit */
+ bitsPerDigit = 3;
+ result = dig;
+ }
+ break;
+ }
+ if (minDig != 0) {
+ while (offset < length && n < maxDig) {
+ // TEMPORARY
+ // TODO: Restore the char32-based code when UCharacter.digit
+ // is working (Bug 66).
+
+ //c = UTF16.charAt(s, offset);
+ //dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
+ c = s.charAt(offset);
+ dig = Character.digit((char)c, (bitsPerDigit == 3) ? 8 : 16);
+ if (dig < 0) {
+ break;
+ }
+ result = (result << bitsPerDigit) | dig;
+ //offset += UTF16.getCharCount(c);
+ ++offset;
+ ++n;
+ }
+ if (n < minDig) {
+ return -1;
+ }
+ offset16[0] = offset;
+ return result;
+ }
+
+ /* Convert C-style escapes in table */
+ for (i=0; i
* "0041".
@@ -689,6 +825,16 @@ public final class Utility {
return output;
}
+ /**
+ * Convert a integer to size width (minimum) hex uppercase digits.
+ * E.g., hex('a', 4, str) => "0041". If the integer requires more
+ * than width digits, more will be used.
+ */
+ public static String hex(int ch, int width) {
+ String foo = Integer.toString(ch, 16).toUpperCase();
+ return "0000000".substring(foo.length() + 7 - width) + foo;
+ }
+
/**
* Convert a string to comma-separated groups of 4 hex uppercase
* digits. E.g., hex('ab') => "0041,0042". Append the output
diff --git a/icu4j/src/com/ibm/util/Utility.java b/icu4j/src/com/ibm/util/Utility.java
index f1bd6ac01e3..6f8f980b46c 100755
--- a/icu4j/src/com/ibm/util/Utility.java
+++ b/icu4j/src/com/ibm/util/Utility.java
@@ -5,12 +5,14 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/util/Attic/Utility.java,v $
- * $Date: 2001/07/03 16:35:12 $
- * $Revision: 1.6 $
+ * $Date: 2001/09/24 19:57:51 $
+ * $Revision: 1.7 $
*
*****************************************************************************************
*/
package com.ibm.util;
+import com.ibm.text.UCharacter;
+import com.ibm.text.UTF16;
public final class Utility {
@@ -635,6 +637,140 @@ public final class Utility {
return buf.toString();
}
+ /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
+ static private final char[] UNESCAPE_MAP = {
+ /*" 0x22, 0x22 */
+ /*' 0x27, 0x27 */
+ /*? 0x3F, 0x3F */
+ /*\ 0x5C, 0x5C */
+ /*a*/ 0x61, 0x07,
+ /*b*/ 0x62, 0x08,
+ /*f*/ 0x66, 0x0c,
+ /*n*/ 0x6E, 0x0a,
+ /*r*/ 0x72, 0x0d,
+ /*t*/ 0x74, 0x09,
+ /*v*/ 0x76, 0x0b
+ };
+
+ /**
+ * Convert an escape to a 32-bit code point value. We attempt
+ * to parallel the icu4c unesacpeAt() function.
+ * @param offset16 an array containing offset to the character
+ * after the backslash. Upon return offset16[0] will
+ * be updated to point after the escape sequence.
+ * @return character value from 0 to 10FFFF, or -1 on error.
+ */
+ public static int unescapeAt(String s, int[] offset16) {
+ int c;
+ int result = 0;
+ int n = 0;
+ int minDig = 0;
+ int maxDig = 0;
+ int bitsPerDigit = 4;
+ int dig;
+ int i;
+
+ /* Check that offset is in range */
+ int offset = offset16[0];
+ int length = s.length();
+ if (offset < 0 || offset >= length) {
+ return -1;
+ }
+
+ /* Fetch first UChar after '\\' */
+ c = UTF16.charAt(s, offset);
+ offset += UTF16.getCharCount(c);
+
+ /* Convert hexadecimal and octal escapes */
+ switch (c) {
+ case 'u':
+ minDig = maxDig = 4;
+ break;
+ case 'U':
+ minDig = maxDig = 8;
+ break;
+ case 'x':
+ minDig = 1;
+ maxDig = 2;
+ break;
+ default:
+ dig = UCharacter.digit(c, 8);
+ if (dig >= 0) {
+ minDig = 1;
+ maxDig = 3;
+ n = 1; /* Already have first octal digit */
+ bitsPerDigit = 3;
+ result = dig;
+ }
+ break;
+ }
+ if (minDig != 0) {
+ while (offset < length && n < maxDig) {
+ // TEMPORARY
+ // TODO: Restore the char32-based code when UCharacter.digit
+ // is working (Bug 66).
+
+ //c = UTF16.charAt(s, offset);
+ //dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
+ c = s.charAt(offset);
+ dig = Character.digit((char)c, (bitsPerDigit == 3) ? 8 : 16);
+ if (dig < 0) {
+ break;
+ }
+ result = (result << bitsPerDigit) | dig;
+ //offset += UTF16.getCharCount(c);
+ ++offset;
+ ++n;
+ }
+ if (n < minDig) {
+ return -1;
+ }
+ offset16[0] = offset;
+ return result;
+ }
+
+ /* Convert C-style escapes in table */
+ for (i=0; i
* "0041".
@@ -689,6 +825,16 @@ public final class Utility {
return output;
}
+ /**
+ * Convert a integer to size width (minimum) hex uppercase digits.
+ * E.g., hex('a', 4, str) => "0041". If the integer requires more
+ * than width digits, more will be used.
+ */
+ public static String hex(int ch, int width) {
+ String foo = Integer.toString(ch, 16).toUpperCase();
+ return "0000000".substring(foo.length() + 7 - width) + foo;
+ }
+
/**
* Convert a string to comma-separated groups of 4 hex uppercase
* digits. E.g., hex('ab') => "0041,0042". Append the output