ICU-74 finish port of Source-Target/Variant code incl. TransliteratorRegistry and tests

X-SVN-Rev: 6084
2025-04-19 11:45:45 +00:00 · 2001-10-05 18:16:59 +00:00 · 2001-10-05 18:16:59 +00:00 · 5746e4c2fc
commit 5746e4c2fc
parent 50408eca3a
8 changed files with 2030 additions and 2244 deletions
--- a/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $
- * $Date: 2001/10/05 06:30:38 $
- * $Revision: 1.49 $
+ * $Date: 2001/10/05 18:16:59 $
+ * $Revision: 1.50 $
 *
 *****************************************************************************************
 */
@ -1204,7 +1204,77 @@ public class TransliteratorTest extends TestFmwk {
               "bb x xb");
    }

-    public void TestSTV_TODO() {
+    static class TestFact implements Transliterator.Factory {
+        static class NameableNullTrans extends NullTransliterator {
+            public NameableNullTrans(String id) {
+                setID(id);
+            }
+        };
+        String id;
+        public TestFact(String theID) {
+            id = theID;
+        }
+        public Transliterator getInstance() {
+            return new NameableNullTrans(id);
+        }
+    };
+
+    public void TestSTV() {
+        Enumeration es = Transliterator.getAvailableSources();
+        for (int i=0; es.hasMoreElements(); ++i) {
+            String source = (String) es.nextElement();
+            logln("" + i + ": " + source);
+            if (source.length() == 0) {
+                errln("FAIL: empty source");
+                continue;
+            }
+            Enumeration et = Transliterator.getAvailableTargets(source);
+            for (int j=0; et.hasMoreElements(); ++j) {
+                String target = (String) et.nextElement();
+                logln(" " + j + ": " + target);
+                if (target.length() == 0) {
+                    errln("FAIL: empty target");
+                    continue;
+                }
+                Enumeration ev = Transliterator.getAvailableVariants(source, target);
+                for (int k=0; ev.hasMoreElements(); ++k) {
+                    String variant = (String) ev.nextElement();
+                    if (variant.length() == 0) { 
+                        logln("  " + k + ": <empty>");
+                    } else {
+                        logln("  " + k + ": " + variant);
+                    }
+                }
+            }
+        }
+
+        // Test registration
+        String[] IDS = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
+        for (int i=0; i<3; ++i) {
+            Transliterator.registerFactory(IDS[i], new TestFact(IDS[i]));
+            try {
+                Transliterator t = Transliterator.getInstance(IDS[i]);
+                if (t.getID().equals(IDS[i])) {
+                    logln("Ok: Registration/creation succeeded for ID " +
+                          IDS[i]);
+                } else {
+                    errln("FAIL: Registration of ID " +
+                          IDS[i] + " creates ID " + t.getID());
+                }
+                Transliterator.unregister(IDS[i]);
+                try {
+                    t = Transliterator.getInstance(IDS[i]);
+                    errln("FAIL: Unregistration failed for ID " +
+                          IDS[i] + "; still receiving ID " + t.getID());
+                } catch (IllegalArgumentException e2) {
+                    // Good; this is what we expect
+                    logln("Ok; Unregistered " + IDS[i]);
+                }
+            } catch (IllegalArgumentException e) {
+                errln("FAIL: Registration/creation failed for ID " +
+                      IDS[i]);
+            }
+        }
    }

    /**
@ -1227,6 +1297,16 @@ public class TransliteratorTest extends TestFmwk {
        }
    }

+    /**
+     * Test NFD chaining with RBT
+     */
+    public void TestNFDChainRBT() {
+        Transliterator t = Transliterator.createFromRules(
+                               "TEST", "::NFD; aa > Q; a > q;",
+                               Transliterator.FORWARD);
+        expect(t, "aa", "Q");
+    }
+
    //======================================================================
    // icu4j only
    //======================================================================
--- a/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java,v $
- * $Date: 2001/10/03 00:14:22 $
- * $Revision: 1.47 $
+ * $Date: 2001/10/05 18:15:54 $
+ * $Revision: 1.48 $
 *
 *****************************************************************************************
 */
@ -279,7 +279,7 @@ import com.ibm.text.resources.ResourceReader;
 * <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
 *
 * @author Alan Liu
- * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.47 $ $Date: 2001/10/03 00:14:22 $
+ * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.48 $ $Date: 2001/10/05 18:15:54 $
 */
 public class RuleBasedTransliterator extends Transliterator {

@ -352,7 +352,35 @@ public class RuleBasedTransliterator extends Transliterator {
                      int direction,
                      StringBuffer idBlockResult,
                      int[] idSplitPointResult) {
-        TransliteratorParser parser = new TransliteratorParser(new String[] { rules }, direction);
+        return _parse(new TransliteratorParser(new String[] { rules }, direction),
+                      idBlockResult, idSplitPointResult);
+    }
+
+    /**
+     * Parse a given set of rules.  Return up to three pieces of
+     * parsed data.  These are the header ::id block, the rule block,
+     * and the footer ::id block.  Any or all of these may be empty.
+     * If the ::id blocks are empty, their corresponding parameters
+     * are returned as the empty string.  If there are no rules, the
+     * TransliterationRuleData result is 0.
+     * @param ruleDataResult caller owns the pointer stored here.
+     * May be NULL.
+     * @param headerRule string including semicolons for the header
+     * ::id block.  May be empty.
+     * @param footerRule string including semicolons for the footer
+     * ::id block.  May be empty.
+     */
+    static Data parse(ResourceReader rules,
+                      int direction,
+                      StringBuffer idBlockResult,
+                      int[] idSplitPointResult) {
+        return _parse(new TransliteratorParser(rules, direction),
+                      idBlockResult, idSplitPointResult);
+    }
+
+    static Data _parse(TransliteratorParser parser,
+                       StringBuffer idBlockResult,
+                       int[] idSplitPointResult) {
        idBlockResult.setLength(0);
        idBlockResult.append(parser.idBlock);
        idSplitPointResult[0] = parser.idSplitPoint;
@ -498,6 +526,9 @@ public class RuleBasedTransliterator extends Transliterator {

 /**
 * $Log: RuleBasedTransliterator.java,v $
+ * Revision 1.48  2001/10/05 18:15:54  alan
+ * jitterbug 74: finish port of Source-Target/Variant code incl. TransliteratorRegistry and tests
+ *
 * Revision 1.47  2001/10/03 00:14:22  alan
 * jitterbug 73: finish quantifier and supplemental char support
 *
--- a/icu4j/src/com/ibm/icu/text/Transliterator.java
+++ b/icu4j/src/com/ibm/icu/text/Transliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Transliterator.java,v $
- * $Date: 2001/10/03 16:26:50 $
- * $Revision: 1.45 $
+ * $Date: 2001/10/05 18:15:54 $
+ * $Revision: 1.46 $
 *
 *****************************************************************************************
 */
@ -241,7 +241,7 @@ import com.ibm.util.CaseInsensitiveString;
 * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
 *
 * @author Alan Liu
- * @version $RCSfile: Transliterator.java,v $ $Revision: 1.45 $ $Date: 2001/10/03 16:26:50 $
+ * @version $RCSfile: Transliterator.java,v $ $Revision: 1.46 $ $Date: 2001/10/05 18:15:54 $
 */
 public abstract class Transliterator {
    /**
@ -348,44 +348,12 @@ public abstract class Transliterator {
    private int maximumContextLength = 0;

    /**
-     * Cache of system transliterators.  Keys are <code>String</code>
-     * names, values are one of the following:
-     *
-     * <ul><li><code>String</code> objects.  These represent
-     * RuleBasedTransliterators that have not been loaded yet, or
-     * aliases.  The first character determines the type: 'f'
-     * indicates a FORWARD RBT, with the rest of the string giving the
-     * resource name and encoding, separated by a colon.  'r' is
-     * similar, but indicates a REVERSE RBT.  'a' indicates an alias,
-     * with the rest of the string giving the ID to create.
-     *
-     * <li><code>Class</code> objects.  Such objects must represent
-     * subclasses of <code>Transliterator</code>, and must satisfy the
-     * constraints described in <code>registerClass()</code>.
-     *
-     * <li><code>RuleBasedTransliterator.Data</code> objects.  These
-     * are built in-memory transliterator data cores that are wrapped
-     * thinly to create RuleBasedTransliterator objects.  When an RBT
-     * is created, its Data core is cached and shared among future
-     * instances of the same ID.
-     * </ul>
+     * System transliterator registry.
     */
-    private static Hashtable cache;
-
-    /**
-     * Identical to 'cache' but contains internal transliterators.
-     * These are not enumerated by getAvailableIDs().
-     */
-    private static Hashtable internalCache;
+    private static TransliteratorRegistry registry;

    private static Hashtable displayNameCache;

-    // TODO Add documentation
-    // TODO Add documentation
-    // TODO Add documentation
-    // TODO Add documentation
-    private static TransliteratorRegistry registry;
-
    /**
     * Prefix for resource bundle key for the display name for a
     * transliterator.  The ID is appended to this to form the key.
@ -981,9 +949,8 @@ public abstract class Transliterator {
                // idBlock and data -- this is a compound
                // RBT
                Transliterator t = new RuleBasedTransliterator("_", data, null);
-                t = new CompoundTransliterator(ID, idBlock.toString(), idSplitPoint[0],
-                                               t);
-                return t;
+                return new CompoundTransliterator(ID, idBlock.toString(), idSplitPoint[0],
+                                                  t);
            }
        }        
    }
@ -1040,14 +1007,7 @@ public abstract class Transliterator {
                parseID(id, regenID, p, sawDelimiter, dir, true);

            if (p[0] == pos || (p[0] < id.length() && !sawDelimiter[0])) {
-                // TODO
-                //throw new IllegalArgumentException("Invalid ID " + id);
-                throw new IllegalArgumentException("Invalid ID " + id +
-                                                   " p[0]=" + p[0] +
-                                                   " pos=" + pos +
-                                                   " id.length()=" + id.length() +
-                                                   " sawDelimite[0]=" + sawDelimiter[0] +
-                                                   "");
+                throw new IllegalArgumentException("Invalid ID " + id);
            }
            pos = p[0];
            // The return value may be NULL when, for instance, creating a
@ -1394,26 +1354,6 @@ public abstract class Transliterator {
        return pos;
    }

-    // TODO Remove remove remove
-    // TODO Remove remove remove
-    // TODO Remove remove remove
-    // TODO Remove remove remove
-    // TODO Remove remove remove
-    // TODO Remove remove remove
-    // TODO Remove remove remove
-    // TODO Remove remove remove
-    // TODO Remove remove remove
-    // TODO Remove remove remove
-    // TODO Remove remove remove
-    static Transliterator tempGet(String id, StringBuffer aliasReturn) {
-        aliasReturn.setLength(0);
-        if (id.equalsIgnoreCase(NullTransliterator.SHORT_ID)) {
-            id = NullTransliterator._ID;
-            // Temporary hack to make this work
-        }
-        return internalGetInstance(id);
-    }
-
    /**
     * Returns this transliterator's inverse.  See the class
     * documentation for details.  This implementation simply inverts
@ -1438,116 +1378,6 @@ public abstract class Transliterator {
        return getInstance(ID, REVERSE);
    }

-    /**
-     * Returns a transliterator object given its ID.  Unlike getInstance(),
-     * this method returns null if it cannot make use of the given ID.
-     */
-    private static Transliterator internalGetInstance(String ID) {
-        RuleBasedTransliterator.Data data = null;
-        Hashtable sourceCache = cache;
-        CaseInsensitiveString ciID = new CaseInsensitiveString(ID);
-        Object obj = cache.get(ciID);
-        if (obj == null) {
-            obj = internalCache.get(ciID);
-            sourceCache = internalCache;
-        }
-
-        if (obj != null) {
-            if (obj instanceof RuleBasedTransliterator.Data) {
-                data = (RuleBasedTransliterator.Data) obj;
-                // Fall through to construct transliterator from cached Data object.
-            } else if (obj instanceof Class) {
-                try {
-                    return (Transliterator) ((Class) obj).newInstance();
-                } catch (InstantiationException e) {
-                } catch (IllegalAccessException e2) {}
-            } else if (obj instanceof Factory) {
-                return ((Factory) obj).getInstance();
-            } else if (obj instanceof String) {
-                String spec = (String) obj;
-                if (spec.charAt(0) == 'a') {
-                    // alias
-                    Transliterator t = getInstance(spec.substring(1));
-                    t.ID = ID;
-                    return t;
-                } else {
-                    synchronized (cache) {
-                        // file, either forward or reverse
-                        int dir = (spec.charAt(0) == 'f') ? FORWARD:REVERSE;
-                        int colon = spec.indexOf(':', 1);
-                        String resourceName = spec.substring(1, colon);
-                        String encoding = spec.substring(colon+1);
-                        ResourceReader r = null;
-                        try {
-                            r = new ResourceReader(resourceName, encoding);
-                        } catch (UnsupportedEncodingException e) {
-                            // This should never happen; UTF8 is always supported
-                        } catch (IllegalArgumentException e2) {
-                            // Can't load UTF8 file
-                        }
-
-                        if (r != null) {
-                            data = RuleBasedTransliterator.parse(r, dir);
-                            sourceCache.put(ciID, data);
-                            // Fall through to construct transliterator from Data object.
-                        }
-                    }
-                }
-            } else {
-                throw new RuntimeException("Bogus cache object");
-            }
-
-            if (data != null) {
-                return new RuleBasedTransliterator(ID, data, null);
-            }
-        }
-
-        return null;
-    }
-
-// Currently unused, but may be of use in the future.
-//    /**
-//     * Find a path through the composed transliterator graph.  This
-//     * will not necessarily be the only path, or the shortest path.
-//     * This is a simple recursive algorithm.
-//     *
-//     * <p><code>composedGraph</code> is the links table.
-//     * composedGraph.get(x) should return a String[] array, each of
-//     * which is a node that x is connected to.
-//     * @param start the starting node
-//     * @param end the ending node
-//     * @param path the result vector; should be empty on entry.  Upon
-//     * success, it will contain successive nodes on the path from
-//     * start to end, including start and end.  If false is returned,
-//     * then path is unchanged.
-//     * @return true if a path from start to end is found
-//     */
-//    private static boolean findComposedPath(String start, String end,
-//                                            Vector path) {
-//        path.addElement(start);
-//        // composedGraph lists all links emanating from a node
-//        String[] links = (String[]) composedGraph.get(start);
-//        if (links != null) {
-//            for (int i=0; i<links.length; ++i) {
-//                if (links[i].equals(end)) {
-//                    path.addElement(end);
-//                    return true;
-//                }
-//            }
-//            for (int i=0; i<links.length; ++i) {
-//                // Avoid cycles: ignore links already on our path
-//                if (path.indexOf(links[i]) >= 0) {
-//                    continue;
-//                }
-//                if (findComposedPath(links[i], end, path)) {
-//                    return true;
-//                }
-//            }
-//        }
-//        path.removeElementAt(path.size() - 1);
-//        return false;
-//    }
-
    /**
     * Registers a subclass of <code>Transliterator</code> with the
     * system.  This subclass must have a public constructor taking no
@ -1561,7 +1391,7 @@ public abstract class Transliterator {
     * @see #unregister
     */
    public static void registerClass(String ID, Class transClass, String displayName) {
-        cache.put(new CaseInsensitiveString(ID), transClass);
+        registry.put(ID, transClass, true);
        if (displayName != null) {
            displayNameCache.put(new CaseInsensitiveString(ID), displayName);
        }
@ -1574,7 +1404,7 @@ public abstract class Transliterator {
     * @param factory the factory object
     */
    public static void registerFactory(String ID, Factory factory) {
-        cache.put(new CaseInsensitiveString(ID), factory);
+        registry.put(ID, factory, true);
    }

    /**
@ -1582,36 +1412,13 @@ public abstract class Transliterator {
     * a system transliterator or a user transliterator or class.
     *
     * @param ID the ID of the transliterator or class
-     * @return the <code>Object</code> that was registered with
-     * <code>ID</code>, or <code>null</code> if none was
     * @see #registerClass
     */
-    public static Object unregister(String ID) {
-        CaseInsensitiveString ciID = new CaseInsensitiveString(ID);
-        displayNameCache.remove(ciID);
-        return cache.remove(ciID);
+    public static void unregister(String ID) {
+        displayNameCache.remove(new CaseInsensitiveString(ID));
+        registry.remove(ID);
    }

-    /**
-     * An internal class that adapts an enumeration over
-     * CaseInsensitiveStrings to an enumeration over Strings.
-     */
-    private static class IDEnumeration implements Enumeration {
-        Enumeration enum;
-
-        public IDEnumeration(Enumeration e) {
-            enum = e;
-        }
-
-        public boolean hasMoreElements() {
-            return enum.hasMoreElements();
-        }
-
-        public Object nextElement() {
-            return ((CaseInsensitiveString) enum.nextElement()).getString();
-        }
-    };
-
    /**
     * Returns an enumeration over the programmatic names of registered
     * <code>Transliterator</code> objects.  This includes both system
@ -1624,9 +1431,20 @@ public abstract class Transliterator {
     * @see #registerClass
     */
    public static final Enumeration getAvailableIDs() {
-        // Since the cache contains CaseInsensitiveString objects, but
-        // the caller expects Strings, we have to use an intermediary.
-        return new IDEnumeration(cache.keys());
+        return registry.getAvailableIDs();
+    }
+
+    public static final Enumeration getAvailableSources() {
+        return registry.getAvailableSources();
+    }
+
+    public static final Enumeration getAvailableTargets(String source) {
+        return registry.getAvailableTargets(source);
+    }
+
+    public static final Enumeration getAvailableVariants(String source,
+                                                         String target) {
+        return registry.getAvailableVariants(source, target);
    }

    /**
@ -1651,21 +1469,18 @@ public abstract class Transliterator {
    }

    static {
-        // TODO FINISH
        registry = new TransliteratorRegistry();

        // The display name cache starts out empty
        displayNameCache = new Hashtable();

-        // Read the index file and construct the cache/internalCache.
+        // Read the index file and populate the registry.
        // Each line of the index file is either blank, a '#' comment,
        // or a colon-delimited line.  In the latter case the first
        // field is the ID being defined.  The second field is one of
        // three strings: "file", "internal", or "alias".  Remaining
        // fields vary according the value fo the second field.  See
        // the index file itself for further documentation.
-        cache = new Hashtable();
-        internalCache = new Hashtable();
        ResourceReader r = new ResourceReader("Transliterator_index.txt");
        for (;;) {
            String line = null;
@ -1693,19 +1508,21 @@ public abstract class Transliterator {
            String type = line.substring(pos, colon);
            pos = colon+1;

-            CaseInsensitiveString ciID = new CaseInsensitiveString(ID);
            if (type.equals("file") || type.equals("internal")) {
                // Rest of line is <resource>:<encoding>:<direction>
+                //                pos       colon      c2
                colon = line.indexOf(':', pos);
-                colon = line.indexOf(':', colon+1); // skip over 1 colon
-                String fileNameAndEncoding = line.substring(pos, colon);
-                pos = colon+1;
-                boolean isForward = line.substring(pos).equals("FORWARD");
-                Hashtable h = type.equals("internal") ? internalCache:cache;
-                h.put(ciID, (isForward ? "f" : "r") + fileNameAndEncoding);
+                int c2 = line.indexOf(':', colon+1);
+                int dir = line.substring(c2+1).equals("FORWARD") ?
+                    FORWARD :  REVERSE;
+                registry.put(ID,
+                             line.substring(pos, colon), // resource
+                             line.substring(colon+1, c2), // encoding
+                             dir,
+                             !type.equals("internal"));
            } else if (type.equals("alias")) {
                // Rest of line is the <getInstanceArg>
-                cache.put(ciID, "a" + line.substring(pos));
+                registry.put(ID, line.substring(pos), true);
            } else {
                // Unknown type
                throw new RuntimeException("Can't parse line: " + line);
--- a/icu4j/src/com/ibm/icu/text/TransliteratorRegistry.java
+++ b/icu4j/src/com/ibm/icu/text/TransliteratorRegistry.java
--- a/icu4j/src/com/ibm/test/translit/TransliteratorTest.java
+++ b/icu4j/src/com/ibm/test/translit/TransliteratorTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/TransliteratorTest.java,v $
- * $Date: 2001/10/05 06:30:38 $
- * $Revision: 1.49 $
+ * $Date: 2001/10/05 18:16:59 $
+ * $Revision: 1.50 $
 *
 *****************************************************************************************
 */
@ -1204,7 +1204,77 @@ public class TransliteratorTest extends TestFmwk {
               "bb x xb");
    }

-    public void TestSTV_TODO() {
+    static class TestFact implements Transliterator.Factory {
+        static class NameableNullTrans extends NullTransliterator {
+            public NameableNullTrans(String id) {
+                setID(id);
+            }
+        };
+        String id;
+        public TestFact(String theID) {
+            id = theID;
+        }
+        public Transliterator getInstance() {
+            return new NameableNullTrans(id);
+        }
+    };
+
+    public void TestSTV() {
+        Enumeration es = Transliterator.getAvailableSources();
+        for (int i=0; es.hasMoreElements(); ++i) {
+            String source = (String) es.nextElement();
+            logln("" + i + ": " + source);
+            if (source.length() == 0) {
+                errln("FAIL: empty source");
+                continue;
+            }
+            Enumeration et = Transliterator.getAvailableTargets(source);
+            for (int j=0; et.hasMoreElements(); ++j) {
+                String target = (String) et.nextElement();
+                logln(" " + j + ": " + target);
+                if (target.length() == 0) {
+                    errln("FAIL: empty target");
+                    continue;
+                }
+                Enumeration ev = Transliterator.getAvailableVariants(source, target);
+                for (int k=0; ev.hasMoreElements(); ++k) {
+                    String variant = (String) ev.nextElement();
+                    if (variant.length() == 0) { 
+                        logln("  " + k + ": <empty>");
+                    } else {
+                        logln("  " + k + ": " + variant);
+                    }
+                }
+            }
+        }
+
+        // Test registration
+        String[] IDS = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
+        for (int i=0; i<3; ++i) {
+            Transliterator.registerFactory(IDS[i], new TestFact(IDS[i]));
+            try {
+                Transliterator t = Transliterator.getInstance(IDS[i]);
+                if (t.getID().equals(IDS[i])) {
+                    logln("Ok: Registration/creation succeeded for ID " +
+                          IDS[i]);
+                } else {
+                    errln("FAIL: Registration of ID " +
+                          IDS[i] + " creates ID " + t.getID());
+                }
+                Transliterator.unregister(IDS[i]);
+                try {
+                    t = Transliterator.getInstance(IDS[i]);
+                    errln("FAIL: Unregistration failed for ID " +
+                          IDS[i] + "; still receiving ID " + t.getID());
+                } catch (IllegalArgumentException e2) {
+                    // Good; this is what we expect
+                    logln("Ok; Unregistered " + IDS[i]);
+                }
+            } catch (IllegalArgumentException e) {
+                errln("FAIL: Registration/creation failed for ID " +
+                      IDS[i]);
+            }
+        }
    }

    /**
@ -1227,6 +1297,16 @@ public class TransliteratorTest extends TestFmwk {
        }
    }

+    /**
+     * Test NFD chaining with RBT
+     */
+    public void TestNFDChainRBT() {
+        Transliterator t = Transliterator.createFromRules(
+                               "TEST", "::NFD; aa > Q; a > q;",
+                               Transliterator.FORWARD);
+        expect(t, "aa", "Q");
+    }
+
    //======================================================================
    // icu4j only
    //======================================================================
--- a/icu4j/src/com/ibm/text/RuleBasedTransliterator.java
+++ b/icu4j/src/com/ibm/text/RuleBasedTransliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/RuleBasedTransliterator.java,v $
- * $Date: 2001/10/03 00:14:22 $
- * $Revision: 1.47 $
+ * $Date: 2001/10/05 18:15:54 $
+ * $Revision: 1.48 $
 *
 *****************************************************************************************
 */
@ -279,7 +279,7 @@ import com.ibm.text.resources.ResourceReader;
 * <p>Copyright (c) IBM Corporation 1999-2000. All rights reserved.</p>
 *
 * @author Alan Liu
- * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.47 $ $Date: 2001/10/03 00:14:22 $
+ * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.48 $ $Date: 2001/10/05 18:15:54 $
 */
 public class RuleBasedTransliterator extends Transliterator {

@ -352,7 +352,35 @@ public class RuleBasedTransliterator extends Transliterator {
                      int direction,
                      StringBuffer idBlockResult,
                      int[] idSplitPointResult) {
-        TransliteratorParser parser = new TransliteratorParser(new String[] { rules }, direction);
+        return _parse(new TransliteratorParser(new String[] { rules }, direction),
+                      idBlockResult, idSplitPointResult);
+    }
+
+    /**
+     * Parse a given set of rules.  Return up to three pieces of
+     * parsed data.  These are the header ::id block, the rule block,
+     * and the footer ::id block.  Any or all of these may be empty.
+     * If the ::id blocks are empty, their corresponding parameters
+     * are returned as the empty string.  If there are no rules, the
+     * TransliterationRuleData result is 0.
+     * @param ruleDataResult caller owns the pointer stored here.
+     * May be NULL.
+     * @param headerRule string including semicolons for the header
+     * ::id block.  May be empty.
+     * @param footerRule string including semicolons for the footer
+     * ::id block.  May be empty.
+     */
+    static Data parse(ResourceReader rules,
+                      int direction,
+                      StringBuffer idBlockResult,
+                      int[] idSplitPointResult) {
+        return _parse(new TransliteratorParser(rules, direction),
+                      idBlockResult, idSplitPointResult);
+    }
+
+    static Data _parse(TransliteratorParser parser,
+                       StringBuffer idBlockResult,
+                       int[] idSplitPointResult) {
        idBlockResult.setLength(0);
        idBlockResult.append(parser.idBlock);
        idSplitPointResult[0] = parser.idSplitPoint;
@ -498,6 +526,9 @@ public class RuleBasedTransliterator extends Transliterator {

 /**
 * $Log: RuleBasedTransliterator.java,v $
+ * Revision 1.48  2001/10/05 18:15:54  alan
+ * jitterbug 74: finish port of Source-Target/Variant code incl. TransliteratorRegistry and tests
+ *
 * Revision 1.47  2001/10/03 00:14:22  alan
 * jitterbug 73: finish quantifier and supplemental char support
 *
--- a/icu4j/src/com/ibm/text/Transliterator.java
+++ b/icu4j/src/com/ibm/text/Transliterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/Transliterator.java,v $
- * $Date: 2001/10/03 16:26:50 $
- * $Revision: 1.45 $
+ * $Date: 2001/10/05 18:15:54 $
+ * $Revision: 1.46 $
 *
 *****************************************************************************************
 */
@ -241,7 +241,7 @@ import com.ibm.util.CaseInsensitiveString;
 * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
 *
 * @author Alan Liu
- * @version $RCSfile: Transliterator.java,v $ $Revision: 1.45 $ $Date: 2001/10/03 16:26:50 $
+ * @version $RCSfile: Transliterator.java,v $ $Revision: 1.46 $ $Date: 2001/10/05 18:15:54 $
 */
 public abstract class Transliterator {
    /**
@ -348,44 +348,12 @@ public abstract class Transliterator {
    private int maximumContextLength = 0;

    /**
-     * Cache of system transliterators.  Keys are <code>String</code>
-     * names, values are one of the following:
-     *
-     * <ul><li><code>String</code> objects.  These represent
-     * RuleBasedTransliterators that have not been loaded yet, or
-     * aliases.  The first character determines the type: 'f'
-     * indicates a FORWARD RBT, with the rest of the string giving the
-     * resource name and encoding, separated by a colon.  'r' is
-     * similar, but indicates a REVERSE RBT.  'a' indicates an alias,
-     * with the rest of the string giving the ID to create.
-     *
-     * <li><code>Class</code> objects.  Such objects must represent
-     * subclasses of <code>Transliterator</code>, and must satisfy the
-     * constraints described in <code>registerClass()</code>.
-     *
-     * <li><code>RuleBasedTransliterator.Data</code> objects.  These
-     * are built in-memory transliterator data cores that are wrapped
-     * thinly to create RuleBasedTransliterator objects.  When an RBT
-     * is created, its Data core is cached and shared among future
-     * instances of the same ID.
-     * </ul>
+     * System transliterator registry.
     */
-    private static Hashtable cache;
-
-    /**
-     * Identical to 'cache' but contains internal transliterators.
-     * These are not enumerated by getAvailableIDs().
-     */
-    private static Hashtable internalCache;
+    private static TransliteratorRegistry registry;

    private static Hashtable displayNameCache;

-    // TODO Add documentation
-    // TODO Add documentation
-    // TODO Add documentation
-    // TODO Add documentation
-    private static TransliteratorRegistry registry;
-
    /**
     * Prefix for resource bundle key for the display name for a
     * transliterator.  The ID is appended to this to form the key.
@ -981,9 +949,8 @@ public abstract class Transliterator {
                // idBlock and data -- this is a compound
                // RBT
                Transliterator t = new RuleBasedTransliterator("_", data, null);
-                t = new CompoundTransliterator(ID, idBlock.toString(), idSplitPoint[0],
-                                               t);
-                return t;
+                return new CompoundTransliterator(ID, idBlock.toString(), idSplitPoint[0],
+                                                  t);
            }
        }        
    }
@ -1040,14 +1007,7 @@ public abstract class Transliterator {
                parseID(id, regenID, p, sawDelimiter, dir, true);

            if (p[0] == pos || (p[0] < id.length() && !sawDelimiter[0])) {
-                // TODO
-                //throw new IllegalArgumentException("Invalid ID " + id);
-                throw new IllegalArgumentException("Invalid ID " + id +
-                                                   " p[0]=" + p[0] +
-                                                   " pos=" + pos +
-                                                   " id.length()=" + id.length() +
-                                                   " sawDelimite[0]=" + sawDelimiter[0] +
-                                                   "");
+                throw new IllegalArgumentException("Invalid ID " + id);
            }
            pos = p[0];
            // The return value may be NULL when, for instance, creating a
@ -1394,26 +1354,6 @@ public abstract class Transliterator {
        return pos;
    }

-    // TODO Remove remove remove
-    // TODO Remove remove remove
-    // TODO Remove remove remove
-    // TODO Remove remove remove
-    // TODO Remove remove remove
-    // TODO Remove remove remove
-    // TODO Remove remove remove
-    // TODO Remove remove remove
-    // TODO Remove remove remove
-    // TODO Remove remove remove
-    // TODO Remove remove remove
-    static Transliterator tempGet(String id, StringBuffer aliasReturn) {
-        aliasReturn.setLength(0);
-        if (id.equalsIgnoreCase(NullTransliterator.SHORT_ID)) {
-            id = NullTransliterator._ID;
-            // Temporary hack to make this work
-        }
-        return internalGetInstance(id);
-    }
-
    /**
     * Returns this transliterator's inverse.  See the class
     * documentation for details.  This implementation simply inverts
@ -1438,116 +1378,6 @@ public abstract class Transliterator {
        return getInstance(ID, REVERSE);
    }

-    /**
-     * Returns a transliterator object given its ID.  Unlike getInstance(),
-     * this method returns null if it cannot make use of the given ID.
-     */
-    private static Transliterator internalGetInstance(String ID) {
-        RuleBasedTransliterator.Data data = null;
-        Hashtable sourceCache = cache;
-        CaseInsensitiveString ciID = new CaseInsensitiveString(ID);
-        Object obj = cache.get(ciID);
-        if (obj == null) {
-            obj = internalCache.get(ciID);
-            sourceCache = internalCache;
-        }
-
-        if (obj != null) {
-            if (obj instanceof RuleBasedTransliterator.Data) {
-                data = (RuleBasedTransliterator.Data) obj;
-                // Fall through to construct transliterator from cached Data object.
-            } else if (obj instanceof Class) {
-                try {
-                    return (Transliterator) ((Class) obj).newInstance();
-                } catch (InstantiationException e) {
-                } catch (IllegalAccessException e2) {}
-            } else if (obj instanceof Factory) {
-                return ((Factory) obj).getInstance();
-            } else if (obj instanceof String) {
-                String spec = (String) obj;
-                if (spec.charAt(0) == 'a') {
-                    // alias
-                    Transliterator t = getInstance(spec.substring(1));
-                    t.ID = ID;
-                    return t;
-                } else {
-                    synchronized (cache) {
-                        // file, either forward or reverse
-                        int dir = (spec.charAt(0) == 'f') ? FORWARD:REVERSE;
-                        int colon = spec.indexOf(':', 1);
-                        String resourceName = spec.substring(1, colon);
-                        String encoding = spec.substring(colon+1);
-                        ResourceReader r = null;
-                        try {
-                            r = new ResourceReader(resourceName, encoding);
-                        } catch (UnsupportedEncodingException e) {
-                            // This should never happen; UTF8 is always supported
-                        } catch (IllegalArgumentException e2) {
-                            // Can't load UTF8 file
-                        }
-
-                        if (r != null) {
-                            data = RuleBasedTransliterator.parse(r, dir);
-                            sourceCache.put(ciID, data);
-                            // Fall through to construct transliterator from Data object.
-                        }
-                    }
-                }
-            } else {
-                throw new RuntimeException("Bogus cache object");
-            }
-
-            if (data != null) {
-                return new RuleBasedTransliterator(ID, data, null);
-            }
-        }
-
-        return null;
-    }
-
-// Currently unused, but may be of use in the future.
-//    /**
-//     * Find a path through the composed transliterator graph.  This
-//     * will not necessarily be the only path, or the shortest path.
-//     * This is a simple recursive algorithm.
-//     *
-//     * <p><code>composedGraph</code> is the links table.
-//     * composedGraph.get(x) should return a String[] array, each of
-//     * which is a node that x is connected to.
-//     * @param start the starting node
-//     * @param end the ending node
-//     * @param path the result vector; should be empty on entry.  Upon
-//     * success, it will contain successive nodes on the path from
-//     * start to end, including start and end.  If false is returned,
-//     * then path is unchanged.
-//     * @return true if a path from start to end is found
-//     */
-//    private static boolean findComposedPath(String start, String end,
-//                                            Vector path) {
-//        path.addElement(start);
-//        // composedGraph lists all links emanating from a node
-//        String[] links = (String[]) composedGraph.get(start);
-//        if (links != null) {
-//            for (int i=0; i<links.length; ++i) {
-//                if (links[i].equals(end)) {
-//                    path.addElement(end);
-//                    return true;
-//                }
-//            }
-//            for (int i=0; i<links.length; ++i) {
-//                // Avoid cycles: ignore links already on our path
-//                if (path.indexOf(links[i]) >= 0) {
-//                    continue;
-//                }
-//                if (findComposedPath(links[i], end, path)) {
-//                    return true;
-//                }
-//            }
-//        }
-//        path.removeElementAt(path.size() - 1);
-//        return false;
-//    }
-
    /**
     * Registers a subclass of <code>Transliterator</code> with the
     * system.  This subclass must have a public constructor taking no
@ -1561,7 +1391,7 @@ public abstract class Transliterator {
     * @see #unregister
     */
    public static void registerClass(String ID, Class transClass, String displayName) {
-        cache.put(new CaseInsensitiveString(ID), transClass);
+        registry.put(ID, transClass, true);
        if (displayName != null) {
            displayNameCache.put(new CaseInsensitiveString(ID), displayName);
        }
@ -1574,7 +1404,7 @@ public abstract class Transliterator {
     * @param factory the factory object
     */
    public static void registerFactory(String ID, Factory factory) {
-        cache.put(new CaseInsensitiveString(ID), factory);
+        registry.put(ID, factory, true);
    }

    /**
@ -1582,36 +1412,13 @@ public abstract class Transliterator {
     * a system transliterator or a user transliterator or class.
     *
     * @param ID the ID of the transliterator or class
-     * @return the <code>Object</code> that was registered with
-     * <code>ID</code>, or <code>null</code> if none was
     * @see #registerClass
     */
-    public static Object unregister(String ID) {
-        CaseInsensitiveString ciID = new CaseInsensitiveString(ID);
-        displayNameCache.remove(ciID);
-        return cache.remove(ciID);
+    public static void unregister(String ID) {
+        displayNameCache.remove(new CaseInsensitiveString(ID));
+        registry.remove(ID);
    }

-    /**
-     * An internal class that adapts an enumeration over
-     * CaseInsensitiveStrings to an enumeration over Strings.
-     */
-    private static class IDEnumeration implements Enumeration {
-        Enumeration enum;
-
-        public IDEnumeration(Enumeration e) {
-            enum = e;
-        }
-
-        public boolean hasMoreElements() {
-            return enum.hasMoreElements();
-        }
-
-        public Object nextElement() {
-            return ((CaseInsensitiveString) enum.nextElement()).getString();
-        }
-    };
-
    /**
     * Returns an enumeration over the programmatic names of registered
     * <code>Transliterator</code> objects.  This includes both system
@ -1624,9 +1431,20 @@ public abstract class Transliterator {
     * @see #registerClass
     */
    public static final Enumeration getAvailableIDs() {
-        // Since the cache contains CaseInsensitiveString objects, but
-        // the caller expects Strings, we have to use an intermediary.
-        return new IDEnumeration(cache.keys());
+        return registry.getAvailableIDs();
+    }
+
+    public static final Enumeration getAvailableSources() {
+        return registry.getAvailableSources();
+    }
+
+    public static final Enumeration getAvailableTargets(String source) {
+        return registry.getAvailableTargets(source);
+    }
+
+    public static final Enumeration getAvailableVariants(String source,
+                                                         String target) {
+        return registry.getAvailableVariants(source, target);
    }

    /**
@ -1651,21 +1469,18 @@ public abstract class Transliterator {
    }

    static {
-        // TODO FINISH
        registry = new TransliteratorRegistry();

        // The display name cache starts out empty
        displayNameCache = new Hashtable();

-        // Read the index file and construct the cache/internalCache.
+        // Read the index file and populate the registry.
        // Each line of the index file is either blank, a '#' comment,
        // or a colon-delimited line.  In the latter case the first
        // field is the ID being defined.  The second field is one of
        // three strings: "file", "internal", or "alias".  Remaining
        // fields vary according the value fo the second field.  See
        // the index file itself for further documentation.
-        cache = new Hashtable();
-        internalCache = new Hashtable();
        ResourceReader r = new ResourceReader("Transliterator_index.txt");
        for (;;) {
            String line = null;
@ -1693,19 +1508,21 @@ public abstract class Transliterator {
            String type = line.substring(pos, colon);
            pos = colon+1;

-            CaseInsensitiveString ciID = new CaseInsensitiveString(ID);
            if (type.equals("file") || type.equals("internal")) {
                // Rest of line is <resource>:<encoding>:<direction>
+                //                pos       colon      c2
                colon = line.indexOf(':', pos);
-                colon = line.indexOf(':', colon+1); // skip over 1 colon
-                String fileNameAndEncoding = line.substring(pos, colon);
-                pos = colon+1;
-                boolean isForward = line.substring(pos).equals("FORWARD");
-                Hashtable h = type.equals("internal") ? internalCache:cache;
-                h.put(ciID, (isForward ? "f" : "r") + fileNameAndEncoding);
+                int c2 = line.indexOf(':', colon+1);
+                int dir = line.substring(c2+1).equals("FORWARD") ?
+                    FORWARD :  REVERSE;
+                registry.put(ID,
+                             line.substring(pos, colon), // resource
+                             line.substring(colon+1, c2), // encoding
+                             dir,
+                             !type.equals("internal"));
            } else if (type.equals("alias")) {
                // Rest of line is the <getInstanceArg>
-                cache.put(ciID, "a" + line.substring(pos));
+                registry.put(ID, line.substring(pos), true);
            } else {
                // Unknown type
                throw new RuntimeException("Can't parse line: " + line);
--- a/icu4j/src/com/ibm/text/TransliteratorRegistry.java
+++ b/icu4j/src/com/ibm/text/TransliteratorRegistry.java