Port fix for ICU (4c) jitterbug 243; Hex-Unicode and Unicode-Hex support for prefixes, suffixes, and digit counts through a pattern syntax

X-SVN-Rev: 971
2025-04-13 08:53:20 +00:00 · 2000-03-22 02:00:08 +00:00 · 2000-03-22 02:00:08 +00:00 · f7a4bbd75b
commit f7a4bbd75b
parent 258dbe98d4
6 changed files with 1126 additions and 204 deletions
--- a/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $ 
- * $Date: 2000/03/10 03:47:47 $ 
- * $Revision: 1.13 $
+ * $Date: 2000/03/22 02:00:08 $ 
+ * $Revision: 1.14 $
 *
 *****************************************************************************************
 */
@ -392,6 +392,26 @@ public class TransliteratorTest extends TestFmwk {
        }
    }

+    /**
+     * Prefix, suffix support in hex transliterators
+     */
+    public void TestJ243() {
+        // Test default Hex-Unicode, which should handle
+        // \\u, \\U, u+, and U+
+        HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator();
+        expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz");
+
+        // Try a custom Hex-Unicode
+        // \\uXXXX and &#xXXXX;
+        HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;"); 
+        expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;",
+               "abcd5fx012&#x00033;");
+
+        // Try custom Unicode-Hex (default is tested elsewhere)
+        UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;");
+        expect(hex3, "012", "&#x30;&#x31;&#x32;");
+    }
+
    //======================================================================
    // Support methods
    //======================================================================
--- a/icu4j/src/com/ibm/icu/text/HexToUnicodeTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/HexToUnicodeTransliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Attic/HexToUnicodeTransliterator.java,v $ 
- * $Date: 2000/03/10 04:07:20 $ 
- * $Revision: 1.4 $
+ * $Date: 2000/03/22 01:59:55 $ 
+ * $Revision: 1.5 $
 *
 *****************************************************************************************
 */
@ -14,16 +14,16 @@ package com.ibm.text;
 import java.util.*;

 /**
- * A transliterator that converts from hexadecimal Unicode
- * escape sequences to the characters they represent.  For example, "U+0040"
- * and '\u0040'.  It recognizes the
+ * A transliterator that converts from hexadecimal Unicode escape
+ * sequences to the characters they represent.  For example, "U+0040"
+ * and '\u0040'.  A default HexToUnicodeTransliterator recognizes the
 * prefixes "U+", "u+", "&#92;U", and "&#92;u".  Hex values may be
- * upper- or lowercase.
- *
- * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ * upper- or lowercase.  By calling the applyPattern() method, one
+ * or more custom prefix/suffix pairs may be specified.  See
+ * applyPattern() for details.
 *
 * @author Alan Liu
- * @version $RCSfile: HexToUnicodeTransliterator.java,v $ $Revision: 1.4 $ $Date: 2000/03/10 04:07:20 $
+ * @version $RCSfile: HexToUnicodeTransliterator.java,v $ $Revision: 1.5 $ $Date: 2000/03/22 01:59:55 $
 */
 public class HexToUnicodeTransliterator extends Transliterator {
    private static final String COPYRIGHT =
@ -32,75 +32,345 @@ public class HexToUnicodeTransliterator extends Transliterator {
    /**
     * Package accessible ID for this transliterator.
     */
-    static String _ID = "Hex-Unicode";
+    static final String _ID = "Hex-Unicode";
+
+    /**
+     * This pattern encodes the following specs for the default constructor:
+     *   \\u0000
+     *   \\U0000
+     *   u+0000
+     *   U+0000
+     * The multiple backslashes resolve to a single backslash
+     * in the effective prefix.
+     */
+    private static final String DEFAULT_PATTERN = "\\\\u0000;\\\\U0000;u+0000;U+0000";
+
+    // Character constants for special pattern characters
+    private static final char SEMICOLON = ';';
+    private static final char ZERO      = '0';
+    private static final char POUND     = '#';
+    private static final char BACKSLASH = '\\';
+
+    /**
+     * The pattern for this transliterator
+     */
+    private String pattern;
+
+    /**
+     * The processed pattern specification.  See applyPattern() for
+     * details.
+     */
+    private char[] affixes;
+
+    /**
+     * The number of different affix sets in affixes.
+     */
+    private int affixCount;

    /**
     * Constructs a transliterator.
     */
    public HexToUnicodeTransliterator() {
        super(_ID, null);
+        applyPattern(DEFAULT_PATTERN);
+    }
+
+    /**
+     * Constructs a transliterator.
+     */
+    public HexToUnicodeTransliterator(String thePattern) {
+        this(thePattern, null);
+    }
+    
+    /**
+     * Constructs a transliterator.
+     */
+    public HexToUnicodeTransliterator(String thePattern,
+                                      UnicodeFilter theFilter) {
+        super(_ID, theFilter);
+        applyPattern(thePattern);
+    }
+
+    /**
+     * Set the patterns recognized by this transliterator.  One or
+     * more patterns may be specified, separated by semicolons (';').
+     * Each pattern contains zero or more prefix characters, one or
+     * more digit characters, and zero or more suffix characters.  The
+     * digit characters indicates optional digits ('#') followed by
+     * required digits ('0').  The total number of digits cannot
+     * exceed 4, and must be at least 1 required digit.  Use a
+     * backslash ('\\') to escape any of the special characters.  An
+     * empty pattern is allowed; it specifies a transliterator that
+     * does nothing.
+     *
+     * <p>Example: "U+0000;<###0>" specifies two patterns.  The first
+     * has a prefix of "U+", exactly four digits, and no suffix.  The
+     * second has a prefix of "<", between one and four digits, and a
+     * suffix of ">".
+     *
+     * <p><pre>
+     * pattern := spec | ( pattern ';' spec )
+     * spec := prefix-char* digit-spec suffix-char*
+     * digit-spec := '#'* '0'+
+     * prefix-char := [^special-char] | '\\' special-char
+     * suffix-char := [^special-char] | '\\' special-char
+     * special-char := ';' | '0' | '#' | '\\'
+     * </pre>
+     */
+    public void applyPattern(String pattern) {
+
+        /* The pattern is processed and stored in affixes.  The pattern
+         * consists of zero or more affixes.  Each affix is parsed to
+         * determine the prefix, suffix, minimum digit count, and maximum
+         * digit count.  These values are then stored as a four character
+         * header.  That is, their numeric values are cast to UChars and
+         * stored in the string.  Following these four characters, the prefix
+         * characters, then suffix characters are stored.  Each spec takes
+         * n+4 characters, where n is the total length of the prefix and
+         * suffix.
+         */
+
+        StringBuffer affixes = new StringBuffer();
+        affixCount = 0;
+
+        /* The mode specifies where we are in each spec.
+         * mode 0 = in prefix
+         * mode 1 = in optional digits (#)
+         * mode 2 = in required digits (0)
+         * mode 3 = in suffix
+         */
+        int mode = 0;
+
+        int prefixLen = 0, suffixLen = 0, minDigits = 0, maxDigits = 0;
+        int start = 0;
+
+        /* To make parsing easier, we append a virtual ';' at the end of
+         * the pattern string, if there isn't one already.  When we get to
+         * the index pattern.length() (that is, one past the end), we
+         * create a virtual ';' if necessary.
+         */
+        char c = 0;                // These are outside the loop so we can
+        boolean isLiteral = false; // see the previous character...
+        for (int i=0; i<=pattern.length(); ++i) {
+            // Create the virtual trailing ';' if necessary
+            if (i == pattern.length()) {
+                // If the last character was not a non-literal ';'...
+                if (i > 0 && !(c == SEMICOLON && !isLiteral)) {
+                    c = SEMICOLON;
+                    isLiteral = false;
+                } else {
+                    break;
+                }
+            } else {
+                c = pattern.charAt(i);
+                isLiteral = false;
+            }
+
+            if (c == BACKSLASH) {
+                if ((i+1)<pattern.length()) {
+                    isLiteral = true;
+                    c = pattern.charAt(++i);
+                } else {
+                    // Trailing '\\'
+                    throw new IllegalArgumentException("Trailing '\\'");
+                }
+            }
+
+            if (!isLiteral) {
+                switch (c) {
+                case POUND:
+                    // Seeing a '#' moves us from mode 0 (prefix) to mode 1
+                    // (optional digits).
+                    if (mode == 0) {
+                        ++mode;
+                    } else if (mode != 1) {
+                        // Unquoted '#'
+                        throw new IllegalArgumentException("Unquoted '#'");
+                    }
+                    ++maxDigits;
+                    break;
+                case ZERO:
+                    // Seeing a '0' moves us to mode 2 (required digits)
+                    if (mode < 2) {
+                        mode = 2;
+                    } else if (mode != 2) {
+                        // Unquoted '0'
+                        throw new IllegalArgumentException("Unquoted '0'");
+                    }
+                    ++minDigits;
+                    ++maxDigits;
+                    break;
+                case SEMICOLON:
+                    if (minDigits < 1 || maxDigits > 4
+                        // Invalid min/max digit count
+                        || prefixLen > 0xFFFF || suffixLen > 0xFFFF) {
+                        // Suffix or prefix too long
+                        throw new IllegalArgumentException("Suffix or prefix too long");
+                    }
+                    // If there was no prefix and no suffix, then the
+                    // header will not have been allocated yet.  We need
+                    // allocate the header now.
+                    if (start == affixes.length()) {
+                        affixes.append("AAAA");
+                    }
+                    // Fill in 4-character header
+                    affixes.setCharAt(start++, (char) prefixLen);
+                    affixes.setCharAt(start++, (char) suffixLen);
+                    affixes.setCharAt(start++, (char) minDigits);
+                    affixes.setCharAt(start,   (char) maxDigits);
+                    start = affixes.length();
+                    ++affixCount;
+                    prefixLen = suffixLen = minDigits = maxDigits = mode = 0;
+                    break;
+                default:
+                    isLiteral = true;
+                    break;
+                }
+            }
+
+            if (isLiteral) {
+                if (start == affixes.length()) {
+                    // Make space for the header.  Append any four
+                    // characters as place holders for the header values.
+                    // We fill these in when we parse the ';'.
+                    affixes.append("AAAA");
+                }
+                affixes.append(c);
+                if (mode == 0) {
+                    ++prefixLen;
+                } else {
+                    // Any literal outside the prefix moves us into mode 3
+                    // (suffix)
+                    mode = 3;
+                    ++suffixLen;
+                }
+            }
+        }
+
+        // We only modify the pattern and affixes member variables if
+        // we get to this point, that is, if the parse succeeds.
+        this.pattern = pattern;
+        int len = affixes.length();
+        this.affixes = new char[len];
+        affixes.getChars(0, len, this.affixes, 0);
+    }
+
+    /**
+     * Return this transliterator's pattern.
+     */
+    public String toPattern() {
+        return pattern;
    }

    /**
     * Implements {@link Transliterator#handleTransliterate}.
     */
    protected void handleTransliterate(Replaceable text,
-                                       Position offsets, boolean incremental) {
-        /**
-         * Performs transliteration changing Unicode hexadecimal
-         * escapes to characters.  For example, "U+0040" -> '@'.  A fixed
-         * set of prefixes is recognized: "&#92;u", "&#92;U", "u+", "U+". 
-         */
+                                       Position offsets, boolean isIncremental) {
        int cursor = offsets.cursor;
        int limit = offsets.limit;
+        int i, j, ipat;

-        int maxCursor = limit - 6;
-    loop:
-        while (cursor <= maxCursor) {
-            char c = filteredCharAt(text, cursor + 5);
-            int digit0 = Character.digit(c, 16);
-            if (digit0 < 0) {
-                if (c == '\\') {
-                    cursor += 5;
-                } else if (c == 'U' || c == 'u' || c == '+') {
-                    cursor += 4;
-                } else {
-                    cursor += 6;
-                }
-                continue;
-            }
+      loop:
+        while (cursor < limit) {
+            // Loop over the specs in affixes.  If affixCount is zero (an
+            // empty pattern), then we do nothing.  We exit this loop when
+            // we match one of the specs.  We exit this function (by
+            // jumping to exit: below) if a partial match is detected and
+            // isIncremental is true.
+            for (j=0, ipat=0; j<affixCount; ++j) {

-            int u = digit0;
+                // Read the header
+                int prefixLen = affixes[ipat++];
+                int suffixLen = affixes[ipat++];
+                int minDigits = affixes[ipat++];
+                int maxDigits = affixes[ipat++];

-            for (int i=4; i>=2; --i) {
-                c = filteredCharAt(text, cursor + i);
-                int digit = Character.digit(c, 16);
-                if (digit < 0) {
-                    if (c == 'U' || c == 'u' || c == '+') {
-                        cursor += i-1;
-                    } else {
-                        cursor += 6;
+                // curs is a copy of cursor that is advanced over the
+                // characters as we parse them.
+                int curs = cursor;
+                boolean match = true;
+
+                for (i=0; i<prefixLen; ++i) {
+                    if (curs >= limit) {
+                        if (i > 0) {
+                            // We've already matched a character.  This is
+                            // a partial match, so we return if in
+                            // incremental mode.  In non-incremental mode,
+                            // go to the next spec.
+                            if (isIncremental) {
+                                break loop;
+                            }
+                            match = false;
+                            break;
+                        }
+                    }
+                    char c = filteredCharAt(text, curs++);
+                    if (c != affixes[ipat + i]) {
+                        match = false;
+                        break;
                    }
-                    continue loop;
                }
-                u |= digit << (4 * (5-i));
+
+                if (match) {
+                    char u = 0;
+                    int digitCount = 0;
+                    for (;;) {
+                        if (curs >= limit) {
+                            // Check for partial match in incremental mode.
+                            if (curs > cursor && isIncremental) {
+                                break loop;
+                            }
+                            break;
+                        }
+                        int digit = Character.digit(filteredCharAt(text, curs), 16);
+                        if (digit < 0) {
+                            break;
+                        }
+                        ++curs;
+                        u <<= 4;
+                        u |= (char) digit;
+                        if (++digitCount == maxDigits) {
+                            break;
+                        }
+                    }
+
+                    match = (digitCount >= minDigits);
+
+                    if (match) {
+                        for (i=0; i<suffixLen; ++i) {
+                            if (curs >= limit) {
+                                // Check for partial match in incremental mode.
+                                if (curs > cursor && isIncremental) {
+                                    break loop;
+                                }
+                                match = false;
+                                break;
+                            }
+                            char c = filteredCharAt(text, curs++);
+                            if (c != affixes[ipat + prefixLen + i]) {
+                                match = false;
+                                break;
+                            }
+                        }
+
+                        if (match) {
+                            // At this point, we have a match
+                            text.replace(cursor, curs, String.valueOf(u));
+                            limit -= curs - cursor - 1;
+                            // The following break statement leaves the
+                            // loop that is traversing the specs in
+                            // affixes.  We then parse the next input
+                            // character.
+                            break;
+                        }
+                    }
+                }
+
+                ipat += prefixLen + suffixLen;
            }

-            c = filteredCharAt(text, cursor);
-            char d = filteredCharAt(text, cursor + 1);
-            if (((c == 'U' || c == 'u') && d == '+')
-                || (c == '\\' && (d == 'U' || d == 'u'))) {
-                
-                // At this point, we have a match; replace cursor..cursor+5
-                // with u.
-                text.replace(cursor, cursor+6, String.valueOf((char) u));
-                limit -= 5;
-                maxCursor -= 5;
-
-                ++cursor;
-            } else {
-                cursor += 6;
-            }
+            ++cursor;
        }

        offsets.limit = limit;
--- a/icu4j/src/com/ibm/icu/text/UnicodeToHexTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/UnicodeToHexTransliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Attic/UnicodeToHexTransliterator.java,v $ 
- * $Date: 2000/03/10 04:07:25 $ 
- * $Revision: 1.5 $
+ * $Date: 2000/03/22 01:59:55 $ 
+ * $Revision: 1.6 $
 *
 *****************************************************************************************
 */
@ -19,47 +19,226 @@ import java.util.*;
 * prefix specified in the constructor and optionally converts the hex
 * digits to uppercase.
 *
- * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ * <p>The format of the output is set by a pattern.  This pattern
+ * follows the same syntax as <code>HexToUnicodeTransliterator</code>,
+ * except it does not allow multiple specifications.  The pattern sets
+ * the prefix string, suffix string, and minimum and maximum digit
+ * count.  There are no setters or getters for these attributes; they
+ * are set only through the pattern.
+ *
+ * <p>The setUppercase() and isUppercase() methods control whether 'a'
+ * through 'f' or 'A' through 'F' are output as hex digits.  This is
+ * not controlled through the pattern; only through the methods.  The
+ * default is uppercase.
 *
 * @author Alan Liu
- * @version $RCSfile: UnicodeToHexTransliterator.java,v $ $Revision: 1.5 $ $Date: 2000/03/10 04:07:25 $
+ * @version $RCSfile: UnicodeToHexTransliterator.java,v $ $Revision: 1.6 $ $Date: 2000/03/22 01:59:55 $
 */
 public class UnicodeToHexTransliterator extends Transliterator {

-    /**
-     * Package accessible ID for this transliterator.
-     */
-    static String _ID = "Unicode-Hex";
-
-    private String prefix;
-
-    private boolean uppercase;
-
    private static final String COPYRIGHT =
        "\u00A9 IBM Corporation 1999. All rights reserved.";

+    /**
+     * Package accessible ID for this transliterator.
+     */
+    static final String _ID = "Unicode-Hex";
+
+    private static final char[] HEX_DIGITS = {
+        '0', '1', '2', '3', '4', '5', '6', '7',
+        '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
+        '0', '1', '2', '3', '4', '5', '6', '7',
+        '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+    };
+
+    // Character constants for special pattern chars
+    private static final char ZERO      = '0';
+    private static final char POUND     = '#';
+    private static final char BACKSLASH = '\\';
+
+    /**
+     * The pattern set by applyPattern() and returned by toPattern().
+     */
+    private String pattern;
+
+    /**
+     * The string preceding the hex digits, parsed from the pattern.
+     */
+    private String prefix;
+
+    /**
+     * The string following the hex digits, parsed from the pattern.
+     */
+    private String suffix;
+
+    /**
+     * The minimum number of hex digits to output, between 1 and 4,
+     * inclusive.  Parsed from the pattern.
+     */
+    private int minDigits;
+
+    /**
+     * If true, output uppercase hex digits; otherwise output
+     * lowercase.  Set by setUppercase() and returned by isUppercase().
+     */
+    private boolean uppercase;
+
    /**
     * Constructs a transliterator.
-     * @param prefix the string that will precede the four hex
-     * digits for UNICODE_HEX transliterators.  Ignored
-     * if direction is HEX_UNICODE.
+     * @param pattern The pattern for this transliterator.  See
+     * applyPattern() for pattern syntax.
     * @param uppercase if true, the four hex digits will be
     * converted to uppercase; otherwise they will be lowercase.
     * Ignored if direction is HEX_UNICODE.
+     * @param filter the filter for this transliterator, or
+     * null if none.
     */
-    public UnicodeToHexTransliterator(String prefix, boolean uppercase,
+    public UnicodeToHexTransliterator(String pattern, boolean uppercase,
                                      UnicodeFilter filter) {
        super(_ID, filter);
-        this.prefix = prefix;
        this.uppercase = uppercase;
+        applyPattern(pattern);
+    }
+
+    /**
+     * Constructs an uppercase transliterator with no filter.
+     * @param pattern The pattern for this transliterator.  See
+     * applyPattern() for pattern syntax.
+     */
+    public UnicodeToHexTransliterator(String pattern) {
+        this(pattern, true, null);
    }

    /**
     * Constructs a transliterator with the default prefix "&#092;u"
-     * that outputs uppercase hex digits.
+     * that outputs four uppercase hex digits.
     */
    public UnicodeToHexTransliterator() {
-        this("\\u", true, null);
+        super(_ID, null);
+        pattern = "\\\\u0000";
+        prefix = "\\u";
+        suffix = "";
+        minDigits = 4;
+        uppercase = true;
+    }
+
+    /**
+     * Set the pattern recognized by this transliterator.  The pattern
+     * must contain zero or more prefix characters, one or more digit
+     * characters, and zero or more suffix characters.  The digit
+     * characters indicates optional digits ('#') followed by required
+     * digits ('0').  The total number of digits cannot exceed 4, and
+     * must be at least 1 required digit.  Use a backslash ('\\') to
+     * escape any of the special characters.  An empty pattern is not
+     * allowed.
+     *
+     * <p>Example: "U+0000" specifies a prefix of "U+", exactly four
+     * digits, and no suffix.  "<###0>" has a prefix of "<", between
+     * one and four digits, and a suffix of ">".
+     *
+     * <p><pre>
+     * pattern := prefix-char* digit-spec suffix-char*
+     * digit-spec := '#'* '0'+
+     * prefix-char := [^special-char] | '\\' special-char
+     * suffix-char := [^special-char] | '\\' special-char
+     * special-char := ';' | '0' | '#' | '\\'
+     * </pre>
+     *
+     * <p>Limitations: There is no way to set the uppercase attribute
+     * in the pattern.  (applyPattern() does not alter the uppercase
+     * attribute.)
+     */
+    public void applyPattern(String thePattern) {
+        StringBuffer prefixBuf = null;
+        StringBuffer suffixBuf = null;
+        int minDigits = 0;
+        int maxDigits = 0;
+
+        /* The mode specifies where we are in each spec.
+         * mode 0 = in prefix
+         * mode 1 = in optional digits (#)
+         * mode 2 = in required digits (0)
+         * mode 3 = in suffix
+         */
+        int mode = 0;
+
+        for (int i=0; i<thePattern.length(); ++i) {
+            char c = thePattern.charAt(i);
+            boolean isLiteral = false;
+            if (c == BACKSLASH) {
+                if ((i+1)<thePattern.length()) {
+                    isLiteral = true;
+                    c = thePattern.charAt(++i);
+                } else {
+                    // Trailing '\\'
+                    throw new IllegalArgumentException("Trailing '\\'");
+                }
+            }
+
+            if (!isLiteral) {
+                switch (c) {
+                case POUND:
+                    // Seeing a '#' moves us from mode 0 (prefix) to mode 1
+                    // (optional digits).
+                    if (mode == 0) {
+                        ++mode;
+                    } else if (mode != 1) {
+                        // Unquoted '#'
+                        throw new IllegalArgumentException("Unquoted '#'");
+                    }
+                    ++maxDigits;
+                    break;
+                case ZERO:
+                    // Seeing a '0' moves us to mode 2 (required digits)
+                    if (mode < 2) {
+                        mode = 2;
+                    } else if (mode != 2) {
+                        // Unquoted '0'
+                        throw new IllegalArgumentException("Unquoted '0'");
+                    }
+                    ++minDigits;
+                    ++maxDigits;
+                    break;
+                default:
+                    isLiteral = true;
+                    break;
+                }
+            }
+
+            if (isLiteral) {
+                if (mode == 0) {
+                    if (prefixBuf == null) {
+                        prefixBuf = new StringBuffer();
+                    }
+                    prefixBuf.append(c);
+                } else {
+                    // Any literal outside the prefix moves us into mode 3
+                    // (suffix)
+                    mode = 3;
+                    if (suffixBuf == null) {
+                        suffixBuf = new StringBuffer();
+                    }
+                    suffixBuf.append(c);
+                }
+            }
+        }
+
+        if (minDigits < 1 || maxDigits > 4) {
+            // Invalid min/max digit count
+            throw new IllegalArgumentException("Invalid min/max digit count");
+        }
+
+        pattern = thePattern;
+        prefix = (prefixBuf == null) ? "" : prefixBuf.toString();
+        suffix = (suffixBuf == null) ? "" : suffixBuf.toString();
+        this.minDigits = minDigits;
+    }
+
+    /**
+     * Return this transliterator's pattern.
+     */
+    public String toPattern() {
+        return pattern;
    }

    /**
@ -116,16 +295,28 @@ public class UnicodeToHexTransliterator extends Transliterator {
        int limit = offsets.limit;

        UnicodeFilter filter = getFilter();
+        StringBuffer hex = new StringBuffer(prefix);
+        int prefixLen = prefix.length();

-    loop:
        while (cursor < limit) {
            char c = text.charAt(cursor);
            if (filter != null && !filter.contains(c)) {
                ++cursor;
                continue;
            }
-            String hex = hex(c);
-            text.replace(cursor, cursor+1, hex);
+
+            hex.setLength(prefixLen);
+            boolean showRest = false;
+            for (int i=3; i>=0; --i) {
+                int d = (c >> (i*4)) & 0xF;
+                if (showRest || (d != 0) || minDigits > i) {
+                    hex.append(HEX_DIGITS[uppercase ? (d|16) : d]);
+                    showRest = true;
+                }
+            }
+            hex.append(suffix);
+
+            text.replace(cursor, cursor+1, hex.toString());
            int len = hex.length();
            cursor += len; // Advance cursor by 1 and adjust for new text
            --len;
@ -135,24 +326,4 @@ public class UnicodeToHexTransliterator extends Transliterator {
        offsets.limit = limit;
        offsets.cursor = cursor;
    }
-
-    /**
-     * Form escape sequence.
-     */
-    private final String hex(char c) {
-        StringBuffer buf = new StringBuffer();
-        buf.append(prefix);
-        if (c < 0x1000) {
-            buf.append('0');
-            if (c < 0x100) {
-                buf.append('0');
-                if (c < 0x10) {
-                    buf.append('0');
-                }
-            }
-        } 
-        String h = Integer.toHexString(c);
-        buf.append(uppercase ? h.toUpperCase() : h);
-        return buf.toString();
-    }
 }
--- a/icu4j/src/com/ibm/test/translit/TransliteratorTest.java
+++ b/icu4j/src/com/ibm/test/translit/TransliteratorTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/TransliteratorTest.java,v $ 
- * $Date: 2000/03/10 03:47:47 $ 
- * $Revision: 1.13 $
+ * $Date: 2000/03/22 02:00:08 $ 
+ * $Revision: 1.14 $
 *
 *****************************************************************************************
 */
@ -392,6 +392,26 @@ public class TransliteratorTest extends TestFmwk {
        }
    }

+    /**
+     * Prefix, suffix support in hex transliterators
+     */
+    public void TestJ243() {
+        // Test default Hex-Unicode, which should handle
+        // \\u, \\U, u+, and U+
+        HexToUnicodeTransliterator hex = new HexToUnicodeTransliterator();
+        expect(hex, "\\u0041+\\U0042,u+0043uu+0044z", "A+B,CuDz");
+
+        // Try a custom Hex-Unicode
+        // \\uXXXX and &#xXXXX;
+        HexToUnicodeTransliterator hex2 = new HexToUnicodeTransliterator("\\\\u###0;&\\#x###0\\;"); 
+        expect(hex2, "\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;",
+               "abcd5fx012&#x00033;");
+
+        // Try custom Unicode-Hex (default is tested elsewhere)
+        UnicodeToHexTransliterator hex3 = new UnicodeToHexTransliterator("&\\#x###0;");
+        expect(hex3, "012", "&#x30;&#x31;&#x32;");
+    }
+
    //======================================================================
    // Support methods
    //======================================================================
--- a/icu4j/src/com/ibm/text/HexToUnicodeTransliterator.java
+++ b/icu4j/src/com/ibm/text/HexToUnicodeTransliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/HexToUnicodeTransliterator.java,v $ 
- * $Date: 2000/03/10 04:07:20 $ 
- * $Revision: 1.4 $
+ * $Date: 2000/03/22 01:59:55 $ 
+ * $Revision: 1.5 $
 *
 *****************************************************************************************
 */
@ -14,16 +14,16 @@ package com.ibm.text;
 import java.util.*;

 /**
- * A transliterator that converts from hexadecimal Unicode
- * escape sequences to the characters they represent.  For example, "U+0040"
- * and '\u0040'.  It recognizes the
+ * A transliterator that converts from hexadecimal Unicode escape
+ * sequences to the characters they represent.  For example, "U+0040"
+ * and '\u0040'.  A default HexToUnicodeTransliterator recognizes the
 * prefixes "U+", "u+", "&#92;U", and "&#92;u".  Hex values may be
- * upper- or lowercase.
- *
- * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ * upper- or lowercase.  By calling the applyPattern() method, one
+ * or more custom prefix/suffix pairs may be specified.  See
+ * applyPattern() for details.
 *
 * @author Alan Liu
- * @version $RCSfile: HexToUnicodeTransliterator.java,v $ $Revision: 1.4 $ $Date: 2000/03/10 04:07:20 $
+ * @version $RCSfile: HexToUnicodeTransliterator.java,v $ $Revision: 1.5 $ $Date: 2000/03/22 01:59:55 $
 */
 public class HexToUnicodeTransliterator extends Transliterator {
    private static final String COPYRIGHT =
@ -32,75 +32,345 @@ public class HexToUnicodeTransliterator extends Transliterator {
    /**
     * Package accessible ID for this transliterator.
     */
-    static String _ID = "Hex-Unicode";
+    static final String _ID = "Hex-Unicode";
+
+    /**
+     * This pattern encodes the following specs for the default constructor:
+     *   \\u0000
+     *   \\U0000
+     *   u+0000
+     *   U+0000
+     * The multiple backslashes resolve to a single backslash
+     * in the effective prefix.
+     */
+    private static final String DEFAULT_PATTERN = "\\\\u0000;\\\\U0000;u+0000;U+0000";
+
+    // Character constants for special pattern characters
+    private static final char SEMICOLON = ';';
+    private static final char ZERO      = '0';
+    private static final char POUND     = '#';
+    private static final char BACKSLASH = '\\';
+
+    /**
+     * The pattern for this transliterator
+     */
+    private String pattern;
+
+    /**
+     * The processed pattern specification.  See applyPattern() for
+     * details.
+     */
+    private char[] affixes;
+
+    /**
+     * The number of different affix sets in affixes.
+     */
+    private int affixCount;

    /**
     * Constructs a transliterator.
     */
    public HexToUnicodeTransliterator() {
        super(_ID, null);
+        applyPattern(DEFAULT_PATTERN);
+    }
+
+    /**
+     * Constructs a transliterator.
+     */
+    public HexToUnicodeTransliterator(String thePattern) {
+        this(thePattern, null);
+    }
+    
+    /**
+     * Constructs a transliterator.
+     */
+    public HexToUnicodeTransliterator(String thePattern,
+                                      UnicodeFilter theFilter) {
+        super(_ID, theFilter);
+        applyPattern(thePattern);
+    }
+
+    /**
+     * Set the patterns recognized by this transliterator.  One or
+     * more patterns may be specified, separated by semicolons (';').
+     * Each pattern contains zero or more prefix characters, one or
+     * more digit characters, and zero or more suffix characters.  The
+     * digit characters indicates optional digits ('#') followed by
+     * required digits ('0').  The total number of digits cannot
+     * exceed 4, and must be at least 1 required digit.  Use a
+     * backslash ('\\') to escape any of the special characters.  An
+     * empty pattern is allowed; it specifies a transliterator that
+     * does nothing.
+     *
+     * <p>Example: "U+0000;<###0>" specifies two patterns.  The first
+     * has a prefix of "U+", exactly four digits, and no suffix.  The
+     * second has a prefix of "<", between one and four digits, and a
+     * suffix of ">".
+     *
+     * <p><pre>
+     * pattern := spec | ( pattern ';' spec )
+     * spec := prefix-char* digit-spec suffix-char*
+     * digit-spec := '#'* '0'+
+     * prefix-char := [^special-char] | '\\' special-char
+     * suffix-char := [^special-char] | '\\' special-char
+     * special-char := ';' | '0' | '#' | '\\'
+     * </pre>
+     */
+    public void applyPattern(String pattern) {
+
+        /* The pattern is processed and stored in affixes.  The pattern
+         * consists of zero or more affixes.  Each affix is parsed to
+         * determine the prefix, suffix, minimum digit count, and maximum
+         * digit count.  These values are then stored as a four character
+         * header.  That is, their numeric values are cast to UChars and
+         * stored in the string.  Following these four characters, the prefix
+         * characters, then suffix characters are stored.  Each spec takes
+         * n+4 characters, where n is the total length of the prefix and
+         * suffix.
+         */
+
+        StringBuffer affixes = new StringBuffer();
+        affixCount = 0;
+
+        /* The mode specifies where we are in each spec.
+         * mode 0 = in prefix
+         * mode 1 = in optional digits (#)
+         * mode 2 = in required digits (0)
+         * mode 3 = in suffix
+         */
+        int mode = 0;
+
+        int prefixLen = 0, suffixLen = 0, minDigits = 0, maxDigits = 0;
+        int start = 0;
+
+        /* To make parsing easier, we append a virtual ';' at the end of
+         * the pattern string, if there isn't one already.  When we get to
+         * the index pattern.length() (that is, one past the end), we
+         * create a virtual ';' if necessary.
+         */
+        char c = 0;                // These are outside the loop so we can
+        boolean isLiteral = false; // see the previous character...
+        for (int i=0; i<=pattern.length(); ++i) {
+            // Create the virtual trailing ';' if necessary
+            if (i == pattern.length()) {
+                // If the last character was not a non-literal ';'...
+                if (i > 0 && !(c == SEMICOLON && !isLiteral)) {
+                    c = SEMICOLON;
+                    isLiteral = false;
+                } else {
+                    break;
+                }
+            } else {
+                c = pattern.charAt(i);
+                isLiteral = false;
+            }
+
+            if (c == BACKSLASH) {
+                if ((i+1)<pattern.length()) {
+                    isLiteral = true;
+                    c = pattern.charAt(++i);
+                } else {
+                    // Trailing '\\'
+                    throw new IllegalArgumentException("Trailing '\\'");
+                }
+            }
+
+            if (!isLiteral) {
+                switch (c) {
+                case POUND:
+                    // Seeing a '#' moves us from mode 0 (prefix) to mode 1
+                    // (optional digits).
+                    if (mode == 0) {
+                        ++mode;
+                    } else if (mode != 1) {
+                        // Unquoted '#'
+                        throw new IllegalArgumentException("Unquoted '#'");
+                    }
+                    ++maxDigits;
+                    break;
+                case ZERO:
+                    // Seeing a '0' moves us to mode 2 (required digits)
+                    if (mode < 2) {
+                        mode = 2;
+                    } else if (mode != 2) {
+                        // Unquoted '0'
+                        throw new IllegalArgumentException("Unquoted '0'");
+                    }
+                    ++minDigits;
+                    ++maxDigits;
+                    break;
+                case SEMICOLON:
+                    if (minDigits < 1 || maxDigits > 4
+                        // Invalid min/max digit count
+                        || prefixLen > 0xFFFF || suffixLen > 0xFFFF) {
+                        // Suffix or prefix too long
+                        throw new IllegalArgumentException("Suffix or prefix too long");
+                    }
+                    // If there was no prefix and no suffix, then the
+                    // header will not have been allocated yet.  We need
+                    // allocate the header now.
+                    if (start == affixes.length()) {
+                        affixes.append("AAAA");
+                    }
+                    // Fill in 4-character header
+                    affixes.setCharAt(start++, (char) prefixLen);
+                    affixes.setCharAt(start++, (char) suffixLen);
+                    affixes.setCharAt(start++, (char) minDigits);
+                    affixes.setCharAt(start,   (char) maxDigits);
+                    start = affixes.length();
+                    ++affixCount;
+                    prefixLen = suffixLen = minDigits = maxDigits = mode = 0;
+                    break;
+                default:
+                    isLiteral = true;
+                    break;
+                }
+            }
+
+            if (isLiteral) {
+                if (start == affixes.length()) {
+                    // Make space for the header.  Append any four
+                    // characters as place holders for the header values.
+                    // We fill these in when we parse the ';'.
+                    affixes.append("AAAA");
+                }
+                affixes.append(c);
+                if (mode == 0) {
+                    ++prefixLen;
+                } else {
+                    // Any literal outside the prefix moves us into mode 3
+                    // (suffix)
+                    mode = 3;
+                    ++suffixLen;
+                }
+            }
+        }
+
+        // We only modify the pattern and affixes member variables if
+        // we get to this point, that is, if the parse succeeds.
+        this.pattern = pattern;
+        int len = affixes.length();
+        this.affixes = new char[len];
+        affixes.getChars(0, len, this.affixes, 0);
+    }
+
+    /**
+     * Return this transliterator's pattern.
+     */
+    public String toPattern() {
+        return pattern;
    }

    /**
     * Implements {@link Transliterator#handleTransliterate}.
     */
    protected void handleTransliterate(Replaceable text,
-                                       Position offsets, boolean incremental) {
-        /**
-         * Performs transliteration changing Unicode hexadecimal
-         * escapes to characters.  For example, "U+0040" -> '@'.  A fixed
-         * set of prefixes is recognized: "&#92;u", "&#92;U", "u+", "U+". 
-         */
+                                       Position offsets, boolean isIncremental) {
        int cursor = offsets.cursor;
        int limit = offsets.limit;
+        int i, j, ipat;

-        int maxCursor = limit - 6;
-    loop:
-        while (cursor <= maxCursor) {
-            char c = filteredCharAt(text, cursor + 5);
-            int digit0 = Character.digit(c, 16);
-            if (digit0 < 0) {
-                if (c == '\\') {
-                    cursor += 5;
-                } else if (c == 'U' || c == 'u' || c == '+') {
-                    cursor += 4;
-                } else {
-                    cursor += 6;
-                }
-                continue;
-            }
+      loop:
+        while (cursor < limit) {
+            // Loop over the specs in affixes.  If affixCount is zero (an
+            // empty pattern), then we do nothing.  We exit this loop when
+            // we match one of the specs.  We exit this function (by
+            // jumping to exit: below) if a partial match is detected and
+            // isIncremental is true.
+            for (j=0, ipat=0; j<affixCount; ++j) {

-            int u = digit0;
+                // Read the header
+                int prefixLen = affixes[ipat++];
+                int suffixLen = affixes[ipat++];
+                int minDigits = affixes[ipat++];
+                int maxDigits = affixes[ipat++];

-            for (int i=4; i>=2; --i) {
-                c = filteredCharAt(text, cursor + i);
-                int digit = Character.digit(c, 16);
-                if (digit < 0) {
-                    if (c == 'U' || c == 'u' || c == '+') {
-                        cursor += i-1;
-                    } else {
-                        cursor += 6;
+                // curs is a copy of cursor that is advanced over the
+                // characters as we parse them.
+                int curs = cursor;
+                boolean match = true;
+
+                for (i=0; i<prefixLen; ++i) {
+                    if (curs >= limit) {
+                        if (i > 0) {
+                            // We've already matched a character.  This is
+                            // a partial match, so we return if in
+                            // incremental mode.  In non-incremental mode,
+                            // go to the next spec.
+                            if (isIncremental) {
+                                break loop;
+                            }
+                            match = false;
+                            break;
+                        }
+                    }
+                    char c = filteredCharAt(text, curs++);
+                    if (c != affixes[ipat + i]) {
+                        match = false;
+                        break;
                    }
-                    continue loop;
                }
-                u |= digit << (4 * (5-i));
+
+                if (match) {
+                    char u = 0;
+                    int digitCount = 0;
+                    for (;;) {
+                        if (curs >= limit) {
+                            // Check for partial match in incremental mode.
+                            if (curs > cursor && isIncremental) {
+                                break loop;
+                            }
+                            break;
+                        }
+                        int digit = Character.digit(filteredCharAt(text, curs), 16);
+                        if (digit < 0) {
+                            break;
+                        }
+                        ++curs;
+                        u <<= 4;
+                        u |= (char) digit;
+                        if (++digitCount == maxDigits) {
+                            break;
+                        }
+                    }
+
+                    match = (digitCount >= minDigits);
+
+                    if (match) {
+                        for (i=0; i<suffixLen; ++i) {
+                            if (curs >= limit) {
+                                // Check for partial match in incremental mode.
+                                if (curs > cursor && isIncremental) {
+                                    break loop;
+                                }
+                                match = false;
+                                break;
+                            }
+                            char c = filteredCharAt(text, curs++);
+                            if (c != affixes[ipat + prefixLen + i]) {
+                                match = false;
+                                break;
+                            }
+                        }
+
+                        if (match) {
+                            // At this point, we have a match
+                            text.replace(cursor, curs, String.valueOf(u));
+                            limit -= curs - cursor - 1;
+                            // The following break statement leaves the
+                            // loop that is traversing the specs in
+                            // affixes.  We then parse the next input
+                            // character.
+                            break;
+                        }
+                    }
+                }
+
+                ipat += prefixLen + suffixLen;
            }

-            c = filteredCharAt(text, cursor);
-            char d = filteredCharAt(text, cursor + 1);
-            if (((c == 'U' || c == 'u') && d == '+')
-                || (c == '\\' && (d == 'U' || d == 'u'))) {
-                
-                // At this point, we have a match; replace cursor..cursor+5
-                // with u.
-                text.replace(cursor, cursor+6, String.valueOf((char) u));
-                limit -= 5;
-                maxCursor -= 5;
-
-                ++cursor;
-            } else {
-                cursor += 6;
-            }
+            ++cursor;
        }

        offsets.limit = limit;
--- a/icu4j/src/com/ibm/text/UnicodeToHexTransliterator.java
+++ b/icu4j/src/com/ibm/text/UnicodeToHexTransliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UnicodeToHexTransliterator.java,v $ 
- * $Date: 2000/03/10 04:07:25 $ 
- * $Revision: 1.5 $
+ * $Date: 2000/03/22 01:59:55 $ 
+ * $Revision: 1.6 $
 *
 *****************************************************************************************
 */
@ -19,47 +19,226 @@ import java.util.*;
 * prefix specified in the constructor and optionally converts the hex
 * digits to uppercase.
 *
- * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ * <p>The format of the output is set by a pattern.  This pattern
+ * follows the same syntax as <code>HexToUnicodeTransliterator</code>,
+ * except it does not allow multiple specifications.  The pattern sets
+ * the prefix string, suffix string, and minimum and maximum digit
+ * count.  There are no setters or getters for these attributes; they
+ * are set only through the pattern.
+ *
+ * <p>The setUppercase() and isUppercase() methods control whether 'a'
+ * through 'f' or 'A' through 'F' are output as hex digits.  This is
+ * not controlled through the pattern; only through the methods.  The
+ * default is uppercase.
 *
 * @author Alan Liu
- * @version $RCSfile: UnicodeToHexTransliterator.java,v $ $Revision: 1.5 $ $Date: 2000/03/10 04:07:25 $
+ * @version $RCSfile: UnicodeToHexTransliterator.java,v $ $Revision: 1.6 $ $Date: 2000/03/22 01:59:55 $
 */
 public class UnicodeToHexTransliterator extends Transliterator {

-    /**
-     * Package accessible ID for this transliterator.
-     */
-    static String _ID = "Unicode-Hex";
-
-    private String prefix;
-
-    private boolean uppercase;
-
    private static final String COPYRIGHT =
        "\u00A9 IBM Corporation 1999. All rights reserved.";

+    /**
+     * Package accessible ID for this transliterator.
+     */
+    static final String _ID = "Unicode-Hex";
+
+    private static final char[] HEX_DIGITS = {
+        '0', '1', '2', '3', '4', '5', '6', '7',
+        '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
+        '0', '1', '2', '3', '4', '5', '6', '7',
+        '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
+    };
+
+    // Character constants for special pattern chars
+    private static final char ZERO      = '0';
+    private static final char POUND     = '#';
+    private static final char BACKSLASH = '\\';
+
+    /**
+     * The pattern set by applyPattern() and returned by toPattern().
+     */
+    private String pattern;
+
+    /**
+     * The string preceding the hex digits, parsed from the pattern.
+     */
+    private String prefix;
+
+    /**
+     * The string following the hex digits, parsed from the pattern.
+     */
+    private String suffix;
+
+    /**
+     * The minimum number of hex digits to output, between 1 and 4,
+     * inclusive.  Parsed from the pattern.
+     */
+    private int minDigits;
+
+    /**
+     * If true, output uppercase hex digits; otherwise output
+     * lowercase.  Set by setUppercase() and returned by isUppercase().
+     */
+    private boolean uppercase;
+
    /**
     * Constructs a transliterator.
-     * @param prefix the string that will precede the four hex
-     * digits for UNICODE_HEX transliterators.  Ignored
-     * if direction is HEX_UNICODE.
+     * @param pattern The pattern for this transliterator.  See
+     * applyPattern() for pattern syntax.
     * @param uppercase if true, the four hex digits will be
     * converted to uppercase; otherwise they will be lowercase.
     * Ignored if direction is HEX_UNICODE.
+     * @param filter the filter for this transliterator, or
+     * null if none.
     */
-    public UnicodeToHexTransliterator(String prefix, boolean uppercase,
+    public UnicodeToHexTransliterator(String pattern, boolean uppercase,
                                      UnicodeFilter filter) {
        super(_ID, filter);
-        this.prefix = prefix;
        this.uppercase = uppercase;
+        applyPattern(pattern);
+    }
+
+    /**
+     * Constructs an uppercase transliterator with no filter.
+     * @param pattern The pattern for this transliterator.  See
+     * applyPattern() for pattern syntax.
+     */
+    public UnicodeToHexTransliterator(String pattern) {
+        this(pattern, true, null);
    }

    /**
     * Constructs a transliterator with the default prefix "&#092;u"
-     * that outputs uppercase hex digits.
+     * that outputs four uppercase hex digits.
     */
    public UnicodeToHexTransliterator() {
-        this("\\u", true, null);
+        super(_ID, null);
+        pattern = "\\\\u0000";
+        prefix = "\\u";
+        suffix = "";
+        minDigits = 4;
+        uppercase = true;
+    }
+
+    /**
+     * Set the pattern recognized by this transliterator.  The pattern
+     * must contain zero or more prefix characters, one or more digit
+     * characters, and zero or more suffix characters.  The digit
+     * characters indicates optional digits ('#') followed by required
+     * digits ('0').  The total number of digits cannot exceed 4, and
+     * must be at least 1 required digit.  Use a backslash ('\\') to
+     * escape any of the special characters.  An empty pattern is not
+     * allowed.
+     *
+     * <p>Example: "U+0000" specifies a prefix of "U+", exactly four
+     * digits, and no suffix.  "<###0>" has a prefix of "<", between
+     * one and four digits, and a suffix of ">".
+     *
+     * <p><pre>
+     * pattern := prefix-char* digit-spec suffix-char*
+     * digit-spec := '#'* '0'+
+     * prefix-char := [^special-char] | '\\' special-char
+     * suffix-char := [^special-char] | '\\' special-char
+     * special-char := ';' | '0' | '#' | '\\'
+     * </pre>
+     *
+     * <p>Limitations: There is no way to set the uppercase attribute
+     * in the pattern.  (applyPattern() does not alter the uppercase
+     * attribute.)
+     */
+    public void applyPattern(String thePattern) {
+        StringBuffer prefixBuf = null;
+        StringBuffer suffixBuf = null;
+        int minDigits = 0;
+        int maxDigits = 0;
+
+        /* The mode specifies where we are in each spec.
+         * mode 0 = in prefix
+         * mode 1 = in optional digits (#)
+         * mode 2 = in required digits (0)
+         * mode 3 = in suffix
+         */
+        int mode = 0;
+
+        for (int i=0; i<thePattern.length(); ++i) {
+            char c = thePattern.charAt(i);
+            boolean isLiteral = false;
+            if (c == BACKSLASH) {
+                if ((i+1)<thePattern.length()) {
+                    isLiteral = true;
+                    c = thePattern.charAt(++i);
+                } else {
+                    // Trailing '\\'
+                    throw new IllegalArgumentException("Trailing '\\'");
+                }
+            }
+
+            if (!isLiteral) {
+                switch (c) {
+                case POUND:
+                    // Seeing a '#' moves us from mode 0 (prefix) to mode 1
+                    // (optional digits).
+                    if (mode == 0) {
+                        ++mode;
+                    } else if (mode != 1) {
+                        // Unquoted '#'
+                        throw new IllegalArgumentException("Unquoted '#'");
+                    }
+                    ++maxDigits;
+                    break;
+                case ZERO:
+                    // Seeing a '0' moves us to mode 2 (required digits)
+                    if (mode < 2) {
+                        mode = 2;
+                    } else if (mode != 2) {
+                        // Unquoted '0'
+                        throw new IllegalArgumentException("Unquoted '0'");
+                    }
+                    ++minDigits;
+                    ++maxDigits;
+                    break;
+                default:
+                    isLiteral = true;
+                    break;
+                }
+            }
+
+            if (isLiteral) {
+                if (mode == 0) {
+                    if (prefixBuf == null) {
+                        prefixBuf = new StringBuffer();
+                    }
+                    prefixBuf.append(c);
+                } else {
+                    // Any literal outside the prefix moves us into mode 3
+                    // (suffix)
+                    mode = 3;
+                    if (suffixBuf == null) {
+                        suffixBuf = new StringBuffer();
+                    }
+                    suffixBuf.append(c);
+                }
+            }
+        }
+
+        if (minDigits < 1 || maxDigits > 4) {
+            // Invalid min/max digit count
+            throw new IllegalArgumentException("Invalid min/max digit count");
+        }
+
+        pattern = thePattern;
+        prefix = (prefixBuf == null) ? "" : prefixBuf.toString();
+        suffix = (suffixBuf == null) ? "" : suffixBuf.toString();
+        this.minDigits = minDigits;
+    }
+
+    /**
+     * Return this transliterator's pattern.
+     */
+    public String toPattern() {
+        return pattern;
    }

    /**
@ -116,16 +295,28 @@ public class UnicodeToHexTransliterator extends Transliterator {
        int limit = offsets.limit;

        UnicodeFilter filter = getFilter();
+        StringBuffer hex = new StringBuffer(prefix);
+        int prefixLen = prefix.length();

-    loop:
        while (cursor < limit) {
            char c = text.charAt(cursor);
            if (filter != null && !filter.contains(c)) {
                ++cursor;
                continue;
            }
-            String hex = hex(c);
-            text.replace(cursor, cursor+1, hex);
+
+            hex.setLength(prefixLen);
+            boolean showRest = false;
+            for (int i=3; i>=0; --i) {
+                int d = (c >> (i*4)) & 0xF;
+                if (showRest || (d != 0) || minDigits > i) {
+                    hex.append(HEX_DIGITS[uppercase ? (d|16) : d]);
+                    showRest = true;
+                }
+            }
+            hex.append(suffix);
+
+            text.replace(cursor, cursor+1, hex.toString());
            int len = hex.length();
            cursor += len; // Advance cursor by 1 and adjust for new text
            --len;
@ -135,24 +326,4 @@ public class UnicodeToHexTransliterator extends Transliterator {
        offsets.limit = limit;
        offsets.cursor = cursor;
    }
-
-    /**
-     * Form escape sequence.
-     */
-    private final String hex(char c) {
-        StringBuffer buf = new StringBuffer();
-        buf.append(prefix);
-        if (c < 0x1000) {
-            buf.append('0');
-            if (c < 0x100) {
-                buf.append('0');
-                if (c < 0x10) {
-                    buf.append('0');
-                }
-            }
-        } 
-        String h = Integer.toHexString(c);
-        buf.append(uppercase ? h.toUpperCase() : h);
-        return buf.toString();
-    }
 }