ICU-1225 add new Escape and Unescape transliterators to replace UnicodeToHex and HexToUnicode; register several Any-Hex and Hex-Any variants

X-SVN-Rev: 7049
2025-04-09 15:27:38 +00:00 · 2001-11-21 07:02:15 +00:00 · 2001-11-21 07:02:15 +00:00 · 76b369219e
commit 76b369219e
parent 8986e13b6a
7 changed files with 697 additions and 5 deletions
--- a/icu4c/source/i18n/Makefile.in
+++ b/icu4c/source/i18n/Makefile.in
@ -71,7 +71,7 @@ cpdtrans.o hextouni.o rbt.o rbt_data.o rbt_pars.o rbt_rule.o rbt_set.o \
 dbbi.o dbbi_tbl.o rbbi.o rbbi_tbl.o nultrans.o \
 remtrans.o titletrn.o tolowtrn.o toupptrn.o xformtrn.o \
 name2uni.o uni2name.o unitohex.o nortrans.o unifilt.o quant.o transreg.o \
-llong.o nfrs.o nfrule.o nfsubs.o rbnf.o upropset.o util.o
+llong.o nfrs.o nfrule.o nfsubs.o rbnf.o upropset.o util.o esctrn.o unesctrn.o



--- a/icu4c/source/i18n/esctrn.cpp
+++ b/icu4c/source/i18n/esctrn.cpp
@ -0,0 +1,167 @@
+/*
+**********************************************************************
+*   Copyright (c) 2001, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/19/2001  aliu        Creation.
+**********************************************************************
+*/
+
+#include "esctrn.h"
+#include "util.h"
+
+U_NAMESPACE_BEGIN
+
+static const UChar UNIPRE[] = {85,43,0}; // "U+"
+static const UChar BS_u[] = {92,117,0}; // "\\u"
+static const UChar BS_U[] = {92,85,0}; // "\\U"
+static const UChar XMLPRE[] = {38,35,120,0}; // "&#x"
+static const UChar XML10PRE[] = {38,35,0}; // "&#"
+static const UChar PERLPRE[] = {92,120,123,0}; // "\\x{"
+static const UChar SEMI[] = {59,0}; // ";"
+static const UChar RBRACE[] = {125,0}; // "}"
+static const UChar EMPTY[] = {0}; // ""
+
+/**
+ * Factory methods
+ */
+Transliterator* EscapeTransliterator::_createUnicode(const UnicodeString& ID, Token context) {
+    // Unicode: "U+10FFFF" hex, min=4, max=6
+    return new EscapeTransliterator(ID, UNIPRE, EMPTY, 16, 4, TRUE, NULL);
+}
+Transliterator* EscapeTransliterator::_createJava(const UnicodeString& ID, Token context) {
+    // Java: "\\uFFFF" hex, min=4, max=4
+    return new EscapeTransliterator(ID, BS_u, EMPTY, 16, 4, FALSE, NULL);
+}
+Transliterator* EscapeTransliterator::_createC(const UnicodeString& ID, Token context) {
+    // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
+    return new EscapeTransliterator(ID, BS_u, EMPTY, 16, 4, TRUE,
+             new EscapeTransliterator(EMPTY, BS_U, EMPTY, 16, 8, TRUE, NULL));
+}
+Transliterator* EscapeTransliterator::_createXML(const UnicodeString& ID, Token context) {
+    // XML: "&#x10FFFF;" hex, min=1, max=6
+    return new EscapeTransliterator(ID, XMLPRE, SEMI, 16, 1, TRUE, NULL);
+}
+Transliterator* EscapeTransliterator::_createXML10(const UnicodeString& ID, Token context) {
+    // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
+    return new EscapeTransliterator(ID, XML10PRE, SEMI, 10, 1, TRUE, NULL);
+}
+Transliterator* EscapeTransliterator::_createPerl(const UnicodeString& ID, Token context) {
+    // Perl: "\\x{263A}" hex, min=1, max=6
+    return new EscapeTransliterator(ID, PERLPRE, RBRACE, 16, 1, TRUE, NULL);
+}
+
+/**
+ * Registers standard variants with the system.  Called by
+ * Transliterator during initialization.
+ */
+void EscapeTransliterator::registerIDs() {
+    Token t = integerToken(0);
+
+    Transliterator::_registerFactory("Any-Hex/Unicode", _createUnicode, t);
+
+    Transliterator::_registerFactory("Any-Hex/Java", _createJava, t);
+
+    Transliterator::_registerFactory("Any-Hex/C", _createC, t);
+
+    Transliterator::_registerFactory("Any-Hex/XML", _createXML, t);
+
+    Transliterator::_registerFactory("Any-Hex/XML10", _createXML10, t);
+
+    Transliterator::_registerFactory("Any-Hex/Perl", _createPerl, t);
+
+    Transliterator::_registerFactory("Any-Hex", _createJava, t);
+}
+
+/**
+ * Constructs an escape transliterator with the given ID and
+ * parameters.  See the class member documentation for details.
+ */
+EscapeTransliterator::EscapeTransliterator(const UnicodeString& ID,
+                         const UnicodeString& prefix, const UnicodeString& suffix,
+                         int32_t radix, int32_t minDigits,
+                         UBool grokSupplementals,
+                         EscapeTransliterator* adoptedSupplementalHandler) :
+    Transliterator(ID, NULL) {
+    this->prefix = prefix;
+    this->suffix = suffix;
+    this->radix = radix;
+    this->minDigits = minDigits;
+    this->grokSupplementals = grokSupplementals;
+    this->supplementalHandler = adoptedSupplementalHandler;
+}
+
+/**
+ * Copy constructor.
+ */
+EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) :
+    Transliterator(o),
+    prefix(o.prefix),
+    suffix(o.suffix),
+    radix(o.radix),
+    minDigits(o.minDigits),
+    grokSupplementals(o.grokSupplementals) {
+    supplementalHandler = (o.supplementalHandler != 0) ?
+        new EscapeTransliterator(*o.supplementalHandler) : NULL;
+}
+
+EscapeTransliterator::~EscapeTransliterator() {
+    delete supplementalHandler;
+}
+
+/**
+ * Transliterator API.
+ */
+Transliterator* EscapeTransliterator::clone() const {
+    return new EscapeTransliterator(*this);
+}
+
+/**
+ * Implements {@link Transliterator#handleTransliterate}.
+ */
+void EscapeTransliterator::handleTransliterate(Replaceable& text, UTransPosition& pos,
+                                               UBool isIncremental) const {
+    int32_t start = pos.start;
+    int32_t limit = pos.limit;
+
+    UnicodeString buf(prefix);
+    int32_t prefixLen = prefix.length();
+    UBool redoPrefix = FALSE;
+
+    while (start < limit) {
+        int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start);
+        int32_t charLen = grokSupplementals ? UTF_CHAR_LENGTH(c) : 1;
+
+        if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) {
+            buf.truncate(0);
+            buf.append(supplementalHandler->prefix);
+            Utility::appendNumber(buf, c, supplementalHandler->radix,
+                                  supplementalHandler->minDigits);
+            buf.append(supplementalHandler->suffix);
+            redoPrefix = TRUE;
+        } else {
+            if (redoPrefix) {
+                buf.truncate(0);
+                buf.append(prefix);
+                redoPrefix = FALSE;
+            } else {
+                buf.truncate(prefixLen);
+            }
+            Utility::appendNumber(buf, c, radix, minDigits);
+            buf.append(suffix);
+        }
+
+        text.handleReplaceBetween(start, start + charLen, buf);
+        start += buf.length();
+        limit += buf.length() - charLen;
+    }
+
+    pos.contextLimit += limit - pos.limit;
+    pos.limit = limit;
+    pos.start = start;
+}
+
+U_NAMESPACE_END
+
+//eof
--- a/icu4c/source/i18n/esctrn.h
+++ b/icu4c/source/i18n/esctrn.h
@ -0,0 +1,138 @@
+/*
+**********************************************************************
+*   Copyright (c) 2001, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/20/2001  aliu        Creation.
+**********************************************************************
+*/
+#ifndef ESCTRN_H
+#define ESCTRN_H
+
+#include "unicode/translit.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * A transliterator that converts Unicode characters to an escape
+ * form.  Examples of escape forms are "U+4E01" and "&#x10FFFF;".
+ * Escape forms have a prefix and suffix, either of which may be
+ * empty, a radix, typically 16 or 10, a minimum digit count,
+ * typically 1, 4, or 8, and a boolean that specifies whether
+ * supplemental characters are handled as 32-bit code points or as two
+ * 16-bit code units.  Most escape forms handle 32-bit code points,
+ * but some, such as the Java form, intentionally break them into two
+ * surrogate pairs, for backward compatibility.
+ *
+ * <p>Some escape forms actually have two different patterns, one for
+ * BMP characters (0..FFFF) and one for supplements (>FFFF).  To
+ * handle this, a second EscapeTransliterator may be defined that
+ * specifies the pattern to be produced for supplementals.  An example
+ * of a form that requires this is the C form, which uses "\\uFFFF"
+ * for BMP characters and "\\U0010FFFF" for supplementals.
+ *
+ * <p>This class is package private.  It registers several standard
+ * variants with the system which are then accessed via their IDs.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: esctrn.h,v $ $Revision: 1.1 $ $Date: 2001/11/21 07:02:14 $
+ */
+class U_I18N_API EscapeTransliterator : public Transliterator {
+
+ private:
+
+    /**
+     * The prefix of the escape form; may be empty, but usually isn't.
+     */
+    UnicodeString prefix;
+
+    /**
+     * The prefix of the escape form; often empty.
+     */
+    UnicodeString suffix;
+
+    /**
+     * The radix to display the number in.  Typically 16 or 10.  Must
+     * be in the range 2 to 36.
+     */
+    int32_t radix;
+
+    /**
+     * The minimum number of digits.  Typically 1, 4, or 8.  Values
+     * less than 1 are equivalent to 1.
+     */
+    int32_t minDigits;
+
+    /**
+     * If true, supplementals are handled as 32-bit code points.  If
+     * false, they are handled as two 16-bit code units.
+     */
+    UBool grokSupplementals;
+
+    /**
+     * The form to be used for supplementals.  If this is null then
+     * the same form is used for BMP characters and supplementals.  If
+     * this is not null and if grokSupplementals is true then the
+     * prefix, suffix, radix, and minDigits of this object are used
+     * for supplementals.  This pointer is owned.
+     */
+    EscapeTransliterator* supplementalHandler;
+
+ public:
+
+    /**
+     * Registers standard variants with the system.  Called by
+     * Transliterator during initialization.
+     */
+    static void registerIDs();
+
+    /**
+     * Constructs an escape transliterator with the given ID and
+     * parameters.  See the class member documentation for details.
+     */
+    EscapeTransliterator(const UnicodeString& ID,
+                         const UnicodeString& prefix, const UnicodeString& suffix,
+                         int32_t radix, int32_t minDigits,
+                         UBool grokSupplementals,
+                         EscapeTransliterator* adoptedSupplementalHandler);
+
+    /**
+     * Copy constructor.
+     */
+    EscapeTransliterator(const EscapeTransliterator&);
+
+    /**
+     * Destructor.
+     */
+    virtual ~EscapeTransliterator();
+
+    /**
+     * Transliterator API.
+     */
+    virtual Transliterator* clone() const;
+
+ protected:
+
+    /**
+     * Implements {@link Transliterator#handleTransliterate}.
+     */
+    void handleTransliterate(Replaceable& text, UTransPosition& offset,
+                             UBool isIncremental) const;
+
+ private:
+
+    /**
+     * Factory methods
+     */
+    static Transliterator* _createUnicode(const UnicodeString& ID, Token context);
+    static Transliterator* _createJava(const UnicodeString& ID, Token context);
+    static Transliterator* _createC(const UnicodeString& ID, Token context);
+    static Transliterator* _createXML(const UnicodeString& ID, Token context);
+    static Transliterator* _createXML10(const UnicodeString& ID, Token context);
+    static Transliterator* _createPerl(const UnicodeString& ID, Token context);
+};
+
+U_NAMESPACE_END
+
+#endif
--- a/icu4c/source/i18n/i18n.dsp
+++ b/icu4c/source/i18n/i18n.dsp
@ -150,6 +150,10 @@ SOURCE=.\dtfmtsym.cpp
 # End Source File
 # Begin Source File

+SOURCE=.\esctrn.cpp
+# End Source File
+# Begin Source File
+
 SOURCE=.\fmtable.cpp
 # End Source File
 # Begin Source File
@ -342,6 +346,10 @@ SOURCE=.\umsg.cpp
 # End Source File
 # Begin Source File

+SOURCE=.\unesctrn.cpp
+# End Source File
+# Begin Source File
+
 SOURCE=.\uni2name.cpp
 # End Source File
 # Begin Source File
--- a/icu4c/source/i18n/translit.cpp
+++ b/icu4c/source/i18n/translit.cpp
@ -16,7 +16,6 @@
 #include "transreg.h"
 #include "ucln_in.h"
 #include "unicode/cpdtrans.h"
-#include "unicode/hextouni.h"
 #include "unicode/locid.h"
 #include "unicode/msgfmt.h"
 #include "name2uni.h"
@ -35,8 +34,9 @@
 #include "unicode/unifilt.h"
 #include "unicode/unifltlg.h"
 #include "unicode/uniset.h"
-#include "unicode/unitohex.h"
 #include "unicode/uscript.h"
+#include "esctrn.h"
+#include "unesctrn.h"


 // keep in sync with CompoundTransliterator
@ -1690,8 +1690,6 @@ void Transliterator::initializeRegistry(void) {
    // cache.  This is how new non-rule-based transliterators are
    // added to the system.

-    registry->put(new HexToUnicodeTransliterator(), TRUE);
-    registry->put(new UnicodeToHexTransliterator(), TRUE);
    registry->put(new NullTransliterator(), TRUE);
    registry->put(new RemoveTransliterator(), TRUE);
    registry->put(new LowercaseTransliterator(), TRUE);
@ -1701,6 +1699,8 @@ void Transliterator::initializeRegistry(void) {
    _registerSpecialInverse("Title", "Lower", FALSE);
    registry->put(new UnicodeNameTransliterator(), TRUE);
    registry->put(new NameUnicodeTransliterator(), TRUE);
+    EscapeTransliterator::registerIDs();
+    UnescapeTransliterator::registerIDs();
    NormalizationTransliterator::registerIDs();
    ucln_i18n_registerCleanup();
 }
--- a/icu4c/source/i18n/unesctrn.cpp
+++ b/icu4c/source/i18n/unesctrn.cpp
@ -0,0 +1,278 @@
+/*
+**********************************************************************
+*   Copyright (c) 2001, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/19/2001  aliu        Creation.
+**********************************************************************
+*/
+
+#include "unesctrn.h"
+#include "util.h"
+#include "cmemory.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Special character marking the end of the spec[] array.
+ */
+static const UChar END = 0xFFFF;
+
+// Unicode: "U+10FFFF" hex, min=4, max=6
+static const UChar SPEC_Unicode[] = {
+    2, 0, 16, 4, 6, 85/*U*/, 43/*+*/,
+    END
+};
+
+// Java: "\\uFFFF" hex, min=4, max=4
+static const UChar SPEC_Java[] = {
+    2, 0, 16, 4, 4, 92/*\*/, 117/*u*/,
+    END
+};
+
+// C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
+static const UChar SPEC_C[] = {
+    2, 0, 16, 4, 4, 92/*\*/, 117/*u*/,
+    2, 0, 16, 8, 8, 92/*\*/, 85/*U*/,
+    END
+};
+
+// XML: "&#x10FFFF;" hex, min=1, max=6
+static const UChar SPEC_XML[] = {
+    3, 1, 16, 1, 6, 38/*&*/, 35/*#*/, 120/*x*/, 59/*;*/,
+    END
+};
+
+// XML10: "&#1114111;" dec, min=1, max=7 (not really "Hex-Any")
+static const UChar SPEC_XML10[] = {
+    2, 1, 10, 1, 7, 38/*&*/, 35/*#*/, 59/*;*/,
+    END
+};
+
+// Perl: "\\x{263A}" hex, min=1, max=6
+static const UChar SPEC_Perl[] = {
+    3, 1, 16, 1, 6, 92/*\*/, 120/*x*/, 123/*{*/, 125/*}*/,
+    END
+};
+
+// All: Java, C, Perl, XML, XML10, Unicode
+static const UChar SPEC_Any[] = {
+    2, 0, 16, 4, 6, 85/*U*/, 43/*+*/,                      // Unicode
+    2, 0, 16, 4, 4, 92/*\*/, 117/*u*/,                     // Java
+    2, 0, 16, 8, 8, 92/*\*/, 85/*U*/,                      // C (surrogates)
+    3, 1, 16, 1, 6, 38/*&*/, 35/*#*/, 120/*x*/, 59/*;*/,   // XML
+    2, 1, 10, 1, 7, 38/*&*/, 35/*#*/, 59/*;*/,             // XML10
+    3, 1, 16, 1, 6, 92/*\*/, 120/*x*/, 123/*{*/, 125/*}*/, // Perl
+    END
+};
+
+/**
+ * Factory methods
+ */
+Transliterator* UnescapeTransliterator::_createUnicode(const UnicodeString& ID, Token context) {
+    return new UnescapeTransliterator(ID, SPEC_Unicode);
+}
+Transliterator* UnescapeTransliterator::_createJava(const UnicodeString& ID, Token context) {
+    return new UnescapeTransliterator(ID, SPEC_Java);
+}
+Transliterator* UnescapeTransliterator::_createC(const UnicodeString& ID, Token context) {
+    return new UnescapeTransliterator(ID, SPEC_C);
+}
+Transliterator* UnescapeTransliterator::_createXML(const UnicodeString& ID, Token context) {
+    return new UnescapeTransliterator(ID, SPEC_XML);
+}
+Transliterator* UnescapeTransliterator::_createXML10(const UnicodeString& ID, Token context) {
+    return new UnescapeTransliterator(ID, SPEC_XML10);
+}
+Transliterator* UnescapeTransliterator::_createPerl(const UnicodeString& ID, Token context) {
+    return new UnescapeTransliterator(ID, SPEC_Perl);
+}
+Transliterator* UnescapeTransliterator::_createAny(const UnicodeString& ID, Token context) {
+    return new UnescapeTransliterator(ID, SPEC_Any);
+}
+
+/**
+ * Registers standard variants with the system.  Called by
+ * Transliterator during initialization.
+ */
+void UnescapeTransliterator::registerIDs() {
+    Token t = integerToken(0);
+
+    Transliterator::_registerFactory("Hex-Any/Unicode", _createUnicode, t);
+
+    Transliterator::_registerFactory("Hex-Any/Java", _createJava, t);
+
+    Transliterator::_registerFactory("Hex-Any/C", _createC, t);
+
+    Transliterator::_registerFactory("Hex-Any/XML", _createXML, t);
+
+    Transliterator::_registerFactory("Hex-Any/XML10", _createXML10, t);
+
+    Transliterator::_registerFactory("Hex-Any/Perl", _createPerl, t);
+
+    Transliterator::_registerFactory("Hex-Any", _createAny, t);
+}
+
+/**
+ * Constructor.  Takes the encoded spec array.
+ */
+UnescapeTransliterator::UnescapeTransliterator(const UnicodeString& ID,
+                                               const UChar *spec) :
+    Transliterator(ID, NULL) {
+    this->spec = copySpec(spec);
+}
+
+/**
+ * Copy constructor.
+ */
+UnescapeTransliterator::UnescapeTransliterator(const UnescapeTransliterator& o) :
+    Transliterator(o) {
+    this->spec = copySpec(o.spec);
+}
+
+UnescapeTransliterator::~UnescapeTransliterator() {
+    delete spec;
+}
+
+/**
+ * Transliterator API.
+ */
+Transliterator* UnescapeTransliterator::clone() const {
+    return new UnescapeTransliterator(*this);
+}
+
+UChar* UnescapeTransliterator::copySpec(const UChar* spec) {
+    int32_t len = 0;
+    while (spec[len] != END) {
+        ++len;
+    }
+    ++len;
+    UChar *result = new UChar[len];
+    uprv_memcpy(result, spec, len*sizeof(result[0]));
+    return result;
+}
+
+/**
+ * Implements {@link Transliterator#handleTransliterate}.
+ */
+void UnescapeTransliterator::handleTransliterate(Replaceable& text, UTransPosition& pos,
+                                                 UBool isIncremental) const {
+    int32_t start = pos.start;
+    int32_t limit = pos.limit;
+    int32_t i, j, ipat;
+    UnicodeString str;
+
+    while (start < limit) {
+        // Loop over the forms in spec[].  Exit this loop when we
+        // match one of the specs.  Exit the outer loop if a
+        // partial match is detected and isIncremental is true.
+        for (j=0, ipat=0; spec[ipat] != END; ++j) {
+
+            // Read the header
+            int32_t prefixLen = spec[ipat++];
+            int32_t suffixLen = spec[ipat++];
+            int8_t  radix     = (int8_t) spec[ipat++];
+            int32_t minDigits = spec[ipat++];
+            int32_t maxDigits = spec[ipat++];
+
+            // s is a copy of start that is advanced over the
+            // characters as we parse them.
+            int32_t s = start;
+            UBool match = TRUE;
+
+            for (i=0; i<prefixLen; ++i) {
+                if (s >= limit) {
+                    if (i > 0) {
+                        // We've already matched a character.  This is
+                        // a partial match, so we return if in
+                        // incremental mode.  In non-incremental mode,
+                        // go to the next spec.
+                        if (isIncremental) {
+                            goto exit;
+                        }
+                        match = FALSE;
+                        break;
+                    }
+                }
+                UChar c = text.charAt(s++);
+                if (c != spec[ipat + i]) {
+                    match = FALSE;
+                    break;
+                }
+            }
+
+            if (match) {
+                UChar32 u = 0;
+                int32_t digitCount = 0;
+                for (;;) {
+                    if (s >= limit) {
+                        // Check for partial match in incremental mode.
+                        if (s > start && isIncremental) {
+                            goto exit;
+                        }
+                        break;
+                    }
+                    UChar32 ch = text.char32At(s);
+                    int32_t digit = u_digit(ch, radix);
+                    if (digit < 0) {
+                        break;
+                    }
+                    s += UTF_CHAR_LENGTH(ch);
+                    u = (u * radix) + digit;
+                    if (++digitCount == maxDigits) {
+                        break;
+                    }
+                }
+
+                match = (digitCount >= minDigits);
+
+                if (match) {
+                    for (i=0; i<suffixLen; ++i) {
+                        if (s >= limit) {
+                            // Check for partial match in incremental mode.
+                            if (s > start && isIncremental) {
+                                goto exit;
+                            }
+                            match = FALSE;
+                            break;
+                        }
+                        UChar c = text.charAt(s++);
+                        if (c != spec[ipat + prefixLen + i]) {
+                            match = FALSE;
+                            break;
+                        }
+                    }
+
+                    if (match) {
+                        // At this point, we have a match
+                        str.truncate(0);
+                        str.append(u);
+                        text.handleReplaceBetween(start, s, str);
+                        limit -= s - start - str.length();
+                        // The following break statement leaves the
+                        // loop that is traversing the forms in
+                        // spec[].  We then parse the next input
+                        // character.
+                        break;
+                    }
+                }
+            }
+
+            ipat += prefixLen + suffixLen;
+        }
+
+        if (start < limit) {
+            start += UTF_CHAR_LENGTH(text.char32At(start));
+        }
+    }
+
+  exit:
+    pos.contextLimit += limit - pos.limit;
+    pos.limit = limit;
+    pos.start = start;
+}
+
+U_NAMESPACE_END
+
+//eof
--- a/icu4c/source/i18n/unesctrn.h
+++ b/icu4c/source/i18n/unesctrn.h
@ -0,0 +1,101 @@
+/*
+**********************************************************************
+*   Copyright (c) 2001, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/20/2001  aliu        Creation.
+**********************************************************************
+*/
+#ifndef UNESCTRN_H
+#define UNESCTRN_H
+
+#include "unicode/translit.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * A transliterator that converts Unicode escape forms to the
+ * characters they represent.  Escape forms have a prefix, a suffix, a
+ * radix, and minimum and maximum digit counts.
+ *
+ * <p>This class is package private.  It registers several standard
+ * variants with the system which are then accessed via their IDs.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: unesctrn.h,v $ $Revision: 1.1 $ $Date: 2001/11/21 07:02:15 $
+ */
+class U_I18N_API UnescapeTransliterator : public Transliterator {
+
+ private:
+
+    /**
+     * The encoded pattern specification.  The pattern consists of
+     * zero or more forms.  Each form consists of a prefix, suffix,
+     * radix, minimum digit count, and maximum digit count.  These
+     * values are stored as a five character header.  That is, their
+     * numeric values are cast to 16-bit characters and stored in the
+     * string.  Following these five characters, the prefix
+     * characters, then suffix characters are stored.  Each form thus
+     * takes n+5 characters, where n is the total length of the prefix
+     * and suffix.  The end is marked by a header of length one
+     * consisting of the character END.
+     */
+    UChar* spec; // owned; may not be NULL
+
+ public:
+
+    /**
+     * Registers standard variants with the system.  Called by
+     * Transliterator during initialization.
+     */
+    static void registerIDs();
+
+    /**
+     * Constructor.  Takes the encoded spec array (does not adopt it).
+     */
+    UnescapeTransliterator(const UnicodeString& ID,
+                           const UChar *spec);
+
+    /**
+     * Copy constructor.
+     */
+    UnescapeTransliterator(const UnescapeTransliterator&);
+
+    /**
+     * Destructor.
+     */
+    virtual ~UnescapeTransliterator();
+
+    /**
+     * Transliterator API.
+     */
+    virtual Transliterator* clone() const;
+
+ protected:
+
+    /**
+     * Implements {@link Transliterator#handleTransliterate}.
+     */
+    void handleTransliterate(Replaceable& text, UTransPosition& offset,
+                             UBool isIncremental) const;
+
+ private:
+
+    /**
+     * Factory methods
+     */
+    static Transliterator* _createUnicode(const UnicodeString& ID, Token context);
+    static Transliterator* _createJava(const UnicodeString& ID, Token context);
+    static Transliterator* _createC(const UnicodeString& ID, Token context);
+    static Transliterator* _createXML(const UnicodeString& ID, Token context);
+    static Transliterator* _createXML10(const UnicodeString& ID, Token context);
+    static Transliterator* _createPerl(const UnicodeString& ID, Token context);
+    static Transliterator* _createAny(const UnicodeString& ID, Token context);
+
+    static UChar* copySpec(const UChar* spec);
+};
+
+U_NAMESPACE_END
+
+#endif