diff --git a/icu4j/src/com/ibm/demo/translit/Demo.java b/icu4j/src/com/ibm/demo/translit/Demo.java
new file mode 100755
index 00000000000..d02953d5036
--- /dev/null
+++ b/icu4j/src/com/ibm/demo/translit/Demo.java
@@ -0,0 +1,253 @@
+import java.applet.*;
+import java.awt.*;
+import java.awt.event.*;
+import java.util.*;
+import com.ibm.text.components.*;
+import com.ibm.text.*;
+
+/**
+ * A frame that allows the user to experiment with keyboard
+ * transliteration.  This class has a main() method so it can be run
+ * as an application.  The frame contains an editable text component
+ * and uses keyboard transliteration to process keyboard events.
+ *
+ * <p>Copyright (c) IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: Demo.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class Demo extends Frame {
+
+    static final boolean DEBUG = false;
+
+    Transliterator translit = null;
+
+    boolean compound = false;
+    Transliterator[] compoundTranslit = new Transliterator[MAX_COMPOUND];
+    static final int MAX_COMPOUND = 128;
+    int compoundCount = 0;
+
+    TransliteratingTextComponent text = null;
+
+    Menu translitMenu;
+    CheckboxMenuItem translitItem;
+    CheckboxMenuItem noTranslitItem;
+
+    static final String NO_TRANSLITERATOR = "None";
+
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    public static void main(String[] args) {
+        Frame f = new Demo(600, 200);
+        f.addWindowListener(new WindowAdapter() {
+            public void windowClosing(WindowEvent e) {
+                System.exit(0);
+            }
+        });
+        f.setVisible(true);
+    }
+
+	public Demo(int width, int height) {
+        super("Transliteration Demo");
+
+        initMenus();
+
+        addWindowListener(new WindowAdapter() {
+            public void windowClosing(WindowEvent e) {
+                handleClose();
+            }
+        });
+        
+        text = new TransliteratingTextComponent();
+        Font font = new Font("serif", Font.PLAIN, 48);
+        text.setFont(font);
+        text.setSize(width, height);
+        text.setVisible(true);
+        text.setText("\u03B1\u05D0\u3042\u4E80");
+        add(text);
+
+        setSize(width, height);
+    }
+
+    private void initMenus() {
+        MenuBar mbar;
+        Menu menu;
+        MenuItem mitem;
+        CheckboxMenuItem citem;
+        
+        setMenuBar(mbar = new MenuBar());
+        mbar.add(menu = new Menu("File"));
+        menu.add(mitem = new MenuItem("Quit"));
+        mitem.addActionListener(new ActionListener() {
+            public void actionPerformed(ActionEvent e) {
+                handleClose();
+            }
+        });
+
+        final ItemListener setTransliteratorListener = new ItemListener() {
+            public void itemStateChanged(ItemEvent e) {
+                CheckboxMenuItem item = (CheckboxMenuItem) e.getSource();
+                if (e.getStateChange() == ItemEvent.DESELECTED) {
+                    // Don't let the current transliterator be deselected.
+                    // Just reselect it.
+                    item.setState(true);
+                } else if (compound) {
+                    // Adding an item to a compound transliterator
+                    handleAddToCompound(item.getLabel());
+                } else if (item != translitItem) {
+                    // Deselect previous choice.  Don't need to call
+                    // setState(true) on new choice.
+                    translitItem.setState(false);
+                    translitItem = item;
+                    handleSetTransliterator(item.getLabel());
+                }
+            }
+        };
+
+        translit = null;
+        mbar.add(translitMenu = new Menu("Transliterator"));
+        translitMenu.add(translitItem = noTranslitItem =
+                         new CheckboxMenuItem(NO_TRANSLITERATOR, true));
+        noTranslitItem.addItemListener(new ItemListener() {
+            public void itemStateChanged(ItemEvent e) {
+                // Can't uncheck None -- any action here sets None to true
+                setNoTransliterator();
+            }
+        });
+
+        translitMenu.addSeparator();
+
+        translitMenu.add(citem = new CheckboxMenuItem("Compound"));
+        citem.addItemListener(new ItemListener() {
+            public void itemStateChanged(ItemEvent e) {
+                CheckboxMenuItem item = (CheckboxMenuItem) e.getSource();
+                if (e.getStateChange() == ItemEvent.DESELECTED) {
+                    // If compound gets deselected, then select NONE
+                    setNoTransliterator();
+                } else if (!compound) {
+                    // Switching from non-compound to compound
+                    translitItem.setState(false);
+                    translitItem = item;
+                    translit = null;
+                    compound = true;
+                    compoundCount = 0;
+                    for (int i=0; i<MAX_COMPOUND; ++i) {
+                        compoundTranslit[i] = null;
+                    }
+                }
+            }
+        });
+      
+        translitMenu.addSeparator();
+
+        for (Enumeration e=getSystemTransliteratorNames().elements();
+             e.hasMoreElements(); ) {
+            String s = (String) e.nextElement();
+            translitMenu.add(citem = new CheckboxMenuItem(s));
+            citem.addItemListener(setTransliteratorListener);
+        }
+
+        mbar.add(menu = new Menu("Batch"));
+        menu.add(mitem = new MenuItem("Transliterate Selection"));
+        mitem.addActionListener(new ActionListener() {
+            public void actionPerformed(ActionEvent e) {
+                handleBatchTransliterate();
+            }
+        });
+    }
+
+    /**
+     * Get a sorted list of the system transliterators.
+     */
+    private static Vector getSystemTransliteratorNames() {
+        Vector v = new Vector();
+        for (Enumeration e=Transliterator.getAvailableIDs();
+             e.hasMoreElements(); ) {
+            v.addElement(e.nextElement());
+        }
+        // Insertion sort, O(n^2) acceptable for small n
+        for (int i=0; i<(v.size()-1); ++i) {
+            String a = (String) v.elementAt(i);
+            for (int j=i+1; j<v.size(); ++j) {
+                String b = (String) v.elementAt(j);
+                if (a.compareTo(b) > 0) {
+                    v.setElementAt(b, i);
+                    v.setElementAt(a, j);
+                    a = b;
+                }
+            }
+        }
+        return v;
+    }
+
+    private void setNoTransliterator() {
+        translitItem = noTranslitItem;
+        noTranslitItem.setState(true);
+        handleSetTransliterator(noTranslitItem.getLabel());
+        compound = false;
+        for (int i=0; i<translitMenu.getItemCount(); ++i) {
+            MenuItem it = translitMenu.getItem(i);
+            if (it != noTranslitItem && it instanceof CheckboxMenuItem) {
+                ((CheckboxMenuItem) it).setState(false);
+            }
+        }
+    }
+
+    private void handleAddToCompound(String name) {
+        if (compoundCount < MAX_COMPOUND) {
+            compoundTranslit[compoundCount] = decodeTranslitItem(name);
+            ++compoundCount;
+            Transliterator t[] = new Transliterator[compoundCount];
+            System.arraycopy(compoundTranslit, 0, t, 0, compoundCount);
+            translit = new CompoundTransliterator("Compound", t);
+            text.setTransliterator(translit);
+        }
+    }
+
+    private void handleSetTransliterator(String name) {
+        translit = decodeTranslitItem(name);
+        text.setTransliterator(translit);
+    }
+
+    /**
+     * Decode a menu item that looks like <translit name>.
+     */
+    private static Transliterator decodeTranslitItem(String name) {
+        return (name.equals(NO_TRANSLITERATOR))
+            ? null : Transliterator.getInstance(name);
+    }
+
+    private void handleBatchTransliterate() {
+        if (translit == null) {
+            return;
+        }
+
+        int start = text.getSelectionStart();
+        int end = text.getSelectionEnd();
+        ReplaceableString s =
+            new ReplaceableString(text.getText().substring(start, end));
+
+        StringBuffer log = null;
+        if (DEBUG) {
+            log = new StringBuffer();
+            log.append('"' + s.toString() + "\" (start " + start +
+                       ", end " + end + ") -> \"");
+        }
+
+        translit.transliterate(s);
+        String str = s.toString();
+
+        if (DEBUG) {
+            log.append(str + "\"");
+            System.out.println("Batch " + translit.getID() + ": " + log.toString());
+        }
+
+        text.replaceRange(str, start, end);
+        text.select(start, start + str.length());
+    }
+
+    private void handleClose() {
+        dispose();
+    }
+}
diff --git a/icu4j/src/com/ibm/demo/translit/DemoApplet.java b/icu4j/src/com/ibm/demo/translit/DemoApplet.java
new file mode 100755
index 00000000000..21b256ebc26
--- /dev/null
+++ b/icu4j/src/com/ibm/demo/translit/DemoApplet.java
@@ -0,0 +1,62 @@
+
+import java.awt.*;
+import java.awt.event.*;
+import java.applet.*;
+import com.ibm.text.components.AppletFrame;
+
+/**
+ * A simple Applet that shows a button.  When pressed, the button
+ * shows the DemoAppletFrame.  This Applet is meant to be embedded
+ * in a web page.
+ *
+ * <p>Copyright (c) IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: DemoApplet.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class DemoApplet extends Applet {
+
+    Demo frame = null;
+    
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    public static void main(String args[]) {
+        final DemoApplet applet = new DemoApplet();
+        new AppletFrame("Transliteration Demo", applet, 640, 480);
+    }
+
+	public void init() {
+
+		Button button = new Button("Transliteration Demo");
+		button.addActionListener(new ActionListener() {
+		    public void actionPerformed(ActionEvent e) {
+		        if (frame == null) {
+                    frame = new Demo(600, 200);
+                    frame.addWindowListener(new WindowAdapter() {
+                        public void windowClosing(WindowEvent we) {
+                            frame = null;
+                        }
+                    });
+                }
+                frame.setVisible(true);
+                frame.toFront();
+		    }
+		});
+
+		add(button);
+
+        Dimension size = button.getPreferredSize();
+        size.width += 10;
+        size.height += 10;
+
+		resize(size);
+	}
+	
+    public void stop() {
+        if (frame != null) {
+            frame.dispose();
+        }
+        frame = null;
+    }
+}
diff --git a/icu4j/src/com/ibm/demo/translit/demo.bat b/icu4j/src/com/ibm/demo/translit/demo.bat
new file mode 100755
index 00000000000..88f63e3446f
--- /dev/null
+++ b/icu4j/src/com/ibm/demo/translit/demo.bat
@@ -0,0 +1,7 @@
+REM For best results, run the demo as an applet inside of Netscape
+REM with Bitstream Cyberbit installed.
+
+REM setup your JDK 1.1.x path and classpath here:
+call JDK11
+set CLASSPATH=../translit.jar;%CLASSPATH%
+javaw Demo
diff --git a/icu4j/src/com/ibm/demo/translit/demo.html b/icu4j/src/com/ibm/demo/translit/demo.html
new file mode 100755
index 00000000000..6327daf6504
--- /dev/null
+++ b/icu4j/src/com/ibm/demo/translit/demo.html
@@ -0,0 +1,8 @@
+<HTML>
+<HEAD>
+<TITLE>Transliteration Demo</TITLE>
+</HEAD>
+<BODY>
+<APPLET CODE="DemoApplet.class" WIDTH=140 HEIGHT=33></APPLET>
+</BODY>
+</HTML>
diff --git a/icu4j/src/com/ibm/icu/dev/demo/translit/Demo.java b/icu4j/src/com/ibm/icu/dev/demo/translit/Demo.java
new file mode 100755
index 00000000000..d02953d5036
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/dev/demo/translit/Demo.java
@@ -0,0 +1,253 @@
+import java.applet.*;
+import java.awt.*;
+import java.awt.event.*;
+import java.util.*;
+import com.ibm.text.components.*;
+import com.ibm.text.*;
+
+/**
+ * A frame that allows the user to experiment with keyboard
+ * transliteration.  This class has a main() method so it can be run
+ * as an application.  The frame contains an editable text component
+ * and uses keyboard transliteration to process keyboard events.
+ *
+ * <p>Copyright (c) IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: Demo.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class Demo extends Frame {
+
+    static final boolean DEBUG = false;
+
+    Transliterator translit = null;
+
+    boolean compound = false;
+    Transliterator[] compoundTranslit = new Transliterator[MAX_COMPOUND];
+    static final int MAX_COMPOUND = 128;
+    int compoundCount = 0;
+
+    TransliteratingTextComponent text = null;
+
+    Menu translitMenu;
+    CheckboxMenuItem translitItem;
+    CheckboxMenuItem noTranslitItem;
+
+    static final String NO_TRANSLITERATOR = "None";
+
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    public static void main(String[] args) {
+        Frame f = new Demo(600, 200);
+        f.addWindowListener(new WindowAdapter() {
+            public void windowClosing(WindowEvent e) {
+                System.exit(0);
+            }
+        });
+        f.setVisible(true);
+    }
+
+	public Demo(int width, int height) {
+        super("Transliteration Demo");
+
+        initMenus();
+
+        addWindowListener(new WindowAdapter() {
+            public void windowClosing(WindowEvent e) {
+                handleClose();
+            }
+        });
+        
+        text = new TransliteratingTextComponent();
+        Font font = new Font("serif", Font.PLAIN, 48);
+        text.setFont(font);
+        text.setSize(width, height);
+        text.setVisible(true);
+        text.setText("\u03B1\u05D0\u3042\u4E80");
+        add(text);
+
+        setSize(width, height);
+    }
+
+    private void initMenus() {
+        MenuBar mbar;
+        Menu menu;
+        MenuItem mitem;
+        CheckboxMenuItem citem;
+        
+        setMenuBar(mbar = new MenuBar());
+        mbar.add(menu = new Menu("File"));
+        menu.add(mitem = new MenuItem("Quit"));
+        mitem.addActionListener(new ActionListener() {
+            public void actionPerformed(ActionEvent e) {
+                handleClose();
+            }
+        });
+
+        final ItemListener setTransliteratorListener = new ItemListener() {
+            public void itemStateChanged(ItemEvent e) {
+                CheckboxMenuItem item = (CheckboxMenuItem) e.getSource();
+                if (e.getStateChange() == ItemEvent.DESELECTED) {
+                    // Don't let the current transliterator be deselected.
+                    // Just reselect it.
+                    item.setState(true);
+                } else if (compound) {
+                    // Adding an item to a compound transliterator
+                    handleAddToCompound(item.getLabel());
+                } else if (item != translitItem) {
+                    // Deselect previous choice.  Don't need to call
+                    // setState(true) on new choice.
+                    translitItem.setState(false);
+                    translitItem = item;
+                    handleSetTransliterator(item.getLabel());
+                }
+            }
+        };
+
+        translit = null;
+        mbar.add(translitMenu = new Menu("Transliterator"));
+        translitMenu.add(translitItem = noTranslitItem =
+                         new CheckboxMenuItem(NO_TRANSLITERATOR, true));
+        noTranslitItem.addItemListener(new ItemListener() {
+            public void itemStateChanged(ItemEvent e) {
+                // Can't uncheck None -- any action here sets None to true
+                setNoTransliterator();
+            }
+        });
+
+        translitMenu.addSeparator();
+
+        translitMenu.add(citem = new CheckboxMenuItem("Compound"));
+        citem.addItemListener(new ItemListener() {
+            public void itemStateChanged(ItemEvent e) {
+                CheckboxMenuItem item = (CheckboxMenuItem) e.getSource();
+                if (e.getStateChange() == ItemEvent.DESELECTED) {
+                    // If compound gets deselected, then select NONE
+                    setNoTransliterator();
+                } else if (!compound) {
+                    // Switching from non-compound to compound
+                    translitItem.setState(false);
+                    translitItem = item;
+                    translit = null;
+                    compound = true;
+                    compoundCount = 0;
+                    for (int i=0; i<MAX_COMPOUND; ++i) {
+                        compoundTranslit[i] = null;
+                    }
+                }
+            }
+        });
+      
+        translitMenu.addSeparator();
+
+        for (Enumeration e=getSystemTransliteratorNames().elements();
+             e.hasMoreElements(); ) {
+            String s = (String) e.nextElement();
+            translitMenu.add(citem = new CheckboxMenuItem(s));
+            citem.addItemListener(setTransliteratorListener);
+        }
+
+        mbar.add(menu = new Menu("Batch"));
+        menu.add(mitem = new MenuItem("Transliterate Selection"));
+        mitem.addActionListener(new ActionListener() {
+            public void actionPerformed(ActionEvent e) {
+                handleBatchTransliterate();
+            }
+        });
+    }
+
+    /**
+     * Get a sorted list of the system transliterators.
+     */
+    private static Vector getSystemTransliteratorNames() {
+        Vector v = new Vector();
+        for (Enumeration e=Transliterator.getAvailableIDs();
+             e.hasMoreElements(); ) {
+            v.addElement(e.nextElement());
+        }
+        // Insertion sort, O(n^2) acceptable for small n
+        for (int i=0; i<(v.size()-1); ++i) {
+            String a = (String) v.elementAt(i);
+            for (int j=i+1; j<v.size(); ++j) {
+                String b = (String) v.elementAt(j);
+                if (a.compareTo(b) > 0) {
+                    v.setElementAt(b, i);
+                    v.setElementAt(a, j);
+                    a = b;
+                }
+            }
+        }
+        return v;
+    }
+
+    private void setNoTransliterator() {
+        translitItem = noTranslitItem;
+        noTranslitItem.setState(true);
+        handleSetTransliterator(noTranslitItem.getLabel());
+        compound = false;
+        for (int i=0; i<translitMenu.getItemCount(); ++i) {
+            MenuItem it = translitMenu.getItem(i);
+            if (it != noTranslitItem && it instanceof CheckboxMenuItem) {
+                ((CheckboxMenuItem) it).setState(false);
+            }
+        }
+    }
+
+    private void handleAddToCompound(String name) {
+        if (compoundCount < MAX_COMPOUND) {
+            compoundTranslit[compoundCount] = decodeTranslitItem(name);
+            ++compoundCount;
+            Transliterator t[] = new Transliterator[compoundCount];
+            System.arraycopy(compoundTranslit, 0, t, 0, compoundCount);
+            translit = new CompoundTransliterator("Compound", t);
+            text.setTransliterator(translit);
+        }
+    }
+
+    private void handleSetTransliterator(String name) {
+        translit = decodeTranslitItem(name);
+        text.setTransliterator(translit);
+    }
+
+    /**
+     * Decode a menu item that looks like <translit name>.
+     */
+    private static Transliterator decodeTranslitItem(String name) {
+        return (name.equals(NO_TRANSLITERATOR))
+            ? null : Transliterator.getInstance(name);
+    }
+
+    private void handleBatchTransliterate() {
+        if (translit == null) {
+            return;
+        }
+
+        int start = text.getSelectionStart();
+        int end = text.getSelectionEnd();
+        ReplaceableString s =
+            new ReplaceableString(text.getText().substring(start, end));
+
+        StringBuffer log = null;
+        if (DEBUG) {
+            log = new StringBuffer();
+            log.append('"' + s.toString() + "\" (start " + start +
+                       ", end " + end + ") -> \"");
+        }
+
+        translit.transliterate(s);
+        String str = s.toString();
+
+        if (DEBUG) {
+            log.append(str + "\"");
+            System.out.println("Batch " + translit.getID() + ": " + log.toString());
+        }
+
+        text.replaceRange(str, start, end);
+        text.select(start, start + str.length());
+    }
+
+    private void handleClose() {
+        dispose();
+    }
+}
diff --git a/icu4j/src/com/ibm/icu/dev/demo/translit/DemoApplet.java b/icu4j/src/com/ibm/icu/dev/demo/translit/DemoApplet.java
new file mode 100755
index 00000000000..21b256ebc26
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/dev/demo/translit/DemoApplet.java
@@ -0,0 +1,62 @@
+
+import java.awt.*;
+import java.awt.event.*;
+import java.applet.*;
+import com.ibm.text.components.AppletFrame;
+
+/**
+ * A simple Applet that shows a button.  When pressed, the button
+ * shows the DemoAppletFrame.  This Applet is meant to be embedded
+ * in a web page.
+ *
+ * <p>Copyright (c) IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: DemoApplet.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class DemoApplet extends Applet {
+
+    Demo frame = null;
+    
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    public static void main(String args[]) {
+        final DemoApplet applet = new DemoApplet();
+        new AppletFrame("Transliteration Demo", applet, 640, 480);
+    }
+
+	public void init() {
+
+		Button button = new Button("Transliteration Demo");
+		button.addActionListener(new ActionListener() {
+		    public void actionPerformed(ActionEvent e) {
+		        if (frame == null) {
+                    frame = new Demo(600, 200);
+                    frame.addWindowListener(new WindowAdapter() {
+                        public void windowClosing(WindowEvent we) {
+                            frame = null;
+                        }
+                    });
+                }
+                frame.setVisible(true);
+                frame.toFront();
+		    }
+		});
+
+		add(button);
+
+        Dimension size = button.getPreferredSize();
+        size.width += 10;
+        size.height += 10;
+
+		resize(size);
+	}
+	
+    public void stop() {
+        if (frame != null) {
+            frame.dispose();
+        }
+        frame = null;
+    }
+}
diff --git a/icu4j/src/com/ibm/icu/dev/demo/translit/demo.bat b/icu4j/src/com/ibm/icu/dev/demo/translit/demo.bat
new file mode 100755
index 00000000000..88f63e3446f
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/dev/demo/translit/demo.bat
@@ -0,0 +1,7 @@
+REM For best results, run the demo as an applet inside of Netscape
+REM with Bitstream Cyberbit installed.
+
+REM setup your JDK 1.1.x path and classpath here:
+call JDK11
+set CLASSPATH=../translit.jar;%CLASSPATH%
+javaw Demo
diff --git a/icu4j/src/com/ibm/icu/dev/demo/translit/demo.html b/icu4j/src/com/ibm/icu/dev/demo/translit/demo.html
new file mode 100755
index 00000000000..6327daf6504
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/dev/demo/translit/demo.html
@@ -0,0 +1,8 @@
+<HTML>
+<HEAD>
+<TITLE>Transliteration Demo</TITLE>
+</HEAD>
+<BODY>
+<APPLET CODE="DemoApplet.class" WIDTH=140 HEIGHT=33></APPLET>
+</BODY>
+</HTML>
diff --git a/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java b/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java
new file mode 100755
index 00000000000..96433f64a26
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java
@@ -0,0 +1,763 @@
+import com.ibm.text.*;
+import java.text.*;
+import java.util.*;
+
+/**
+ * @test
+ * @summary General test of Transliterator
+ */
+public class TransliteratorTest extends IntlTest {
+
+    public static void main(String[] args) throws Exception {
+        new TransliteratorTest().run(args);
+    }
+
+    /**
+     * A CommonPoint legacy round-trip test for the Kana transliterator.
+     */
+//    public void TestKanaRoundTrip() {
+//        Transliterator t = Transliterator.getInstance("Kana");
+//        StringTokenizer tok = new StringTokenizer(KANA_RT_DATA);
+//        while (tok.hasMoreTokens()) {
+//            String str = tok.nextToken();
+//            ReplaceableString tmp = new ReplaceableString(str);
+//            t.transliterate(tmp, Transliterator.FORWARD);
+//
+//            str = tmp.toString();
+//            tmp = new ReplaceableString(str);
+//            t.transliterate(tmp, Transliterator.REVERSE);
+//            t.transliterate(tmp, Transliterator.FORWARD);
+//            if (!tmp.toString().equals(str)) {
+//                tmp = new ReplaceableString(str);
+//                t.transliterate(tmp, Transliterator.REVERSE);
+//                String a = tmp.toString();
+//                t.transliterate(tmp, Transliterator.FORWARD);
+//                errln("FAIL: " + escape(str) + " -> " +
+//                      escape(a) + " -> " + escape(tmp.toString()));
+//            }
+//        }
+//    }
+
+    public void TestInstantiation() {
+        long ms = System.currentTimeMillis();
+        String ID;
+        for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
+            ID = (String) e.nextElement();
+            try {
+                Transliterator t = Transliterator.getInstance(ID);
+                // We should get a new instance if we try again
+                Transliterator t2 = Transliterator.getInstance(ID);
+                if (t != t2) {
+                    logln(ID + ":" + t);
+                } else {
+                    errln("FAIL: " + ID + " returned identical instances");
+                }
+            } catch (IllegalArgumentException ex) {
+                errln("FAIL: " + ID);
+                throw ex;
+            }
+        }
+
+        // Now test the failure path
+        try {
+            ID = "<Not a valid Transliterator ID>";
+            Transliterator t = Transliterator.getInstance(ID);
+            errln("FAIL: " + ID + " returned " + t);
+        } catch (IllegalArgumentException ex) {
+            logln("OK: Bogus ID handled properly");
+        }
+        
+        ms = System.currentTimeMillis() - ms;
+        logln("Elapsed time: " + ms + " ms");
+    }
+
+    public void TestSimpleRules() {
+        /* Example: rules 1. ab>x|y
+         *                2. yc>z
+         *
+         * []|eabcd  start - no match, copy e to tranlated buffer
+         * [e]|abcd  match rule 1 - copy output & adjust cursor
+         * [ex|y]cd  match rule 2 - copy output & adjust cursor
+         * [exz]|d   no match, copy d to transliterated buffer
+         * [exzd]|   done
+         */
+        expect("ab>x|y\n" +
+               "yc>z",
+               "eabcd", "exzd");
+
+        /* Another set of rules:
+         *    1. ab>x|yzacw
+         *    2. za>q
+         *    3. qc>r
+         *    4. cw>n
+         *
+         * []|ab       Rule 1
+         * [x|yzacw]   No match
+         * [xy|zacw]   Rule 2
+         * [xyq|cw]    Rule 4
+         * [xyqn]|     Done
+         */
+        expect("ab>x|yzacw\n" +
+               "za>q\n" +
+               "qc>r\n" +
+               "cw>n",
+               "ab", "xyqn");
+
+        /* Test categories
+         */
+        Transliterator t = new RuleBasedTransliterator("<ID>",
+                                                       "dummy=\uE100\n" +
+                                                       "vowel=[aeiouAEIOU]\n" +
+                                                       "lu=[:Lu:]\n" +
+                                                       "{vowel}[{lu}>!\n" +
+                                                       "{vowel}>&\n" +
+                                                       "!]{lu}>^\n" +
+                                                       "{lu}>*\n" +
+                                                       "a>ERROR");
+        expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
+    }
+
+    // Restore this test if/when it's been deciphered.  In general,
+    // tests that depend on a specific tranliterator are subject
+    // to the same fragility as tests that depend on resource data.
+
+//    public void TestKana() {
+//        String DATA[] = {
+//            "a", "\u3042",
+//            "A", "\u30A2",
+//            "aA", "\u3042\u30A2",
+//            "aaaa", "\u3042\u3042\u3042\u3042",
+//            "akasata", "\u3042\u304B\u3055\u305F",
+//        };
+//
+//        Transliterator t = Transliterator.getInstance("Latin-Kana");
+//        Transliterator rt = Transliterator.getInstance("Kana-Latin");
+//        for (int i=0; i<DATA.length; i+=2) {
+//            expect(t, DATA[i], DATA[i+1], rt);
+//        }
+//    }
+
+
+    /**
+     * Create some inverses and confirm that they work.  We have to be
+     * careful how we do this, since the inverses will not be true
+     * inverses -- we can't throw any random string at the composition
+     * of the transliterators and expect the identity function.  F x
+     * F' != I.  However, if we are careful about the input, we will
+     * get the expected results.
+     */
+    public void TestRuleBasedInverse() {
+        String RULES =
+            "abc>zyx\n" +
+            "ab>yz\n" +
+            "bc>zx\n" +
+            "ca>xy\n" +
+            "a>x\n" +
+            "b>y\n" +
+            "c>z\n" +
+
+            "abc<zyx\n" +
+            "ab<yz\n" +
+            "bc<zx\n" +
+            "ca<xy\n" +
+            "a<x\n" +
+            "b<y\n" +
+            "c<z\n" +
+
+            "";
+
+        String[] DATA = {
+            // Careful here -- random strings will not work.  If we keep
+            // the left side to the domain and the right side to the range
+            // we will be okay though (left, abc; right xyz).
+            "a", "x",
+            "abcacab", "zyxxxyy",
+            "caccb", "xyzzy",
+        };
+
+        Transliterator fwd = new RuleBasedTransliterator("<ID>", RULES);
+        Transliterator rev = new RuleBasedTransliterator("<ID>", RULES,
+                                     RuleBasedTransliterator.REVERSE, null);
+        for (int i=0; i<DATA.length; i+=2) {
+            expect(fwd, DATA[i], DATA[i+1]);
+            expect(rev, DATA[i+1], DATA[i]);
+        }
+    }
+
+    /**
+     * Basic test of keyboard.
+     */
+    public void TestKeyboard() {
+        Transliterator t = new RuleBasedTransliterator("<ID>", 
+                                                       "psch>Y\n"
+                                                       +"ps>y\n"
+                                                       +"ch>x\n"
+                                                       +"a>A\n");
+        String DATA[] = {
+            // insertion, buffer
+            "a", "A",
+            "p", "Ap",
+            "s", "Aps",
+            "c", "Apsc",
+            "a", "AycA",
+            "psch", "AycAY",
+            null, "AycAY", // null means finishKeyboardTransliteration
+        };
+
+        keyboardAux(t, DATA);
+    }
+
+    /**
+     * Basic test of keyboard with cursor.
+     */
+    public void TestKeyboard2() {
+        Transliterator t = new RuleBasedTransliterator("<ID>", 
+                                                       "ych>Y\n"
+                                                       +"ps>|y\n"
+                                                       +"ch>x\n"
+                                                       +"a>A\n");
+        String DATA[] = {
+            // insertion, buffer
+            "a", "A",
+            "p", "Ap",
+            "s", "Ay",
+            "c", "Ayc",
+            "a", "AycA",
+            "p", "AycAp",
+            "s", "AycAy",
+            "c", "AycAyc",
+            "h", "AycAY",
+            null, "AycAY", // null means finishKeyboardTransliteration
+        };
+
+        keyboardAux(t, DATA);
+    }
+
+    /**
+     * Test keyboard transliteration with back-replacement.
+     */
+    public void TestKeyboard3() {
+        // We want th>z but t>y.  Furthermore, during keyboard
+        // transliteration we want t>y then yh>z if t, then h are
+        // typed.
+        String RULES =
+            "t>|y\n" +
+            "yh>z\n" +
+            "";
+
+        String[] DATA = {
+            // Column 1: characters to add to buffer (as if typed)
+            // Column 2: expected appearance of buffer after
+            //           keyboard xliteration.
+            "a", "a",
+            "b", "ab",
+            "t", "aby",
+            "c", "abyc",
+            "t", "abycy",
+            "h", "abycz",
+            null, "abycz", // null means finishKeyboardTransliteration
+        };
+
+        Transliterator t = new RuleBasedTransliterator("<ID>", RULES);
+        keyboardAux(t, DATA);
+    }
+
+    private void keyboardAux(Transliterator t, String[] DATA) {
+        int[] index = {0, 0, 0};
+        ReplaceableString s = new ReplaceableString();
+        for (int i=0; i<DATA.length; i+=2) {
+            StringBuffer log;
+            if (DATA[i] != null) {
+                log = new StringBuffer(s.toString() + " + "
+                                       + DATA[i]
+                                       + " -> ");
+                t.keyboardTransliterate(s, index, DATA[i]);
+            } else {
+                log = new StringBuffer(s.toString() + " => ");
+                t.finishKeyboardTransliteration(s, index);
+            }
+            String str = s.toString();
+            // Show the start index '{' and the cursor '|'
+            log.append(str.substring(0, index[Transliterator.START])).
+                append('{').
+                append(str.substring(index[Transliterator.START],
+                                     index[Transliterator.CURSOR])).
+                append('|').
+                append(str.substring(index[Transliterator.CURSOR]));
+            if (str.equals(DATA[i+1])) {
+                logln(log.toString());
+            } else {
+                errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]);
+            }
+        }
+    }
+
+    public void TestArabic() {
+        String DATA[] = {
+            "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
+                      "\u0627\u0644\u0644\u063a\u0629\u0020"+
+                      "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
+                      "\u0628\u0628\u0646\u0638\u0645\u0020"+
+                      "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
+                      "\u062c\u0645\u064a\u0644\u0629",
+        };
+
+        Transliterator t = Transliterator.getInstance("Latin-Arabic");
+        for (int i=0; i<DATA.length; i+=2) {
+            expect(t, DATA[i], DATA[i+1]);
+        }
+    }
+
+    /**
+     * Compose the Kana transliterator forward and reverse and try
+     * some strings that should come out unchanged.
+     */
+    public void TestCompoundKana() {
+        Transliterator kana = Transliterator.getInstance("Latin-Kana");
+        Transliterator rkana = Transliterator.getInstance("Kana-Latin");
+        Transliterator[] trans = { kana, rkana };
+        Transliterator t = new CompoundTransliterator("<ID>", trans);
+
+        expect(t, "aaaaa", "aaaaa");
+    }
+
+    /**
+     * Compose the hex transliterators forward and reverse.
+     */
+    public void TestCompoundHex() {
+        Transliterator a = Transliterator.getInstance("Unicode-Hex");
+        Transliterator b = Transliterator.getInstance("Hex-Unicode");
+        Transliterator[] trans = { a, b };
+        Transliterator ab = new CompoundTransliterator("ab", trans);
+        String s = "abcde";
+        expect(ab, s, s);
+
+        trans = new Transliterator[] { b, a };
+        Transliterator ba = new CompoundTransliterator("ba", trans);
+        ReplaceableString str = new ReplaceableString(s);
+        a.transliterate(str);
+        expect(ba, str.toString(), str.toString());
+    }
+
+    /**
+     * Do some basic tests of filtering.
+     */
+    public void TestFiltering() {
+        Transliterator hex = Transliterator.getInstance("Unicode-Hex");
+        hex.setFilter(new UnicodeFilter() {
+            public boolean isIn(char c) {
+                return c != 'c';
+            }
+        });
+        String s = "abcde";
+        String out = hex.transliterate(s);
+        String exp = "\\u0061\\u0062c\\u0064\\u0065";
+        if (out.equals(exp)) {
+            logln("Ok:   \"" + exp + "\"");
+        } else {
+            logln("FAIL: \"" + out + "\", wanted \"" + exp + "\"");
+        }
+    }
+
+    //======================================================================
+    // Support methods
+    //======================================================================
+
+    void expect(String rules, String source, String expectedResult) {
+        expect(new RuleBasedTransliterator("<ID>", rules), source, expectedResult);
+    }
+
+    void expect(Transliterator t, String source, String expectedResult,
+                Transliterator reverseTransliterator) {
+        expect(t, source, expectedResult);
+        if (reverseTransliterator != null) {
+            expect(reverseTransliterator, expectedResult, source);
+        }
+    }
+
+    void expect(Transliterator t, String source, String expectedResult) {
+        String result = t.transliterate(source);
+        expectAux(t.getID() + ":String", source, result, expectedResult);
+
+        ReplaceableString rsource = new ReplaceableString(source);
+        t.transliterate(rsource);
+        result = rsource.toString();
+        expectAux(t.getID() + ":Replaceable", source, result, expectedResult);
+
+        // Test keyboard (incremental) transliteration -- this result
+        // must be the same after we finalize (see below).
+        rsource.getStringBuffer().setLength(0);
+        int[] index = { 0, 0, 0 };
+        StringBuffer log = new StringBuffer();
+
+        for (int i=0; i<source.length(); ++i) {
+            if (i != 0) {
+                log.append(" + ");
+            }
+            log.append(source.charAt(i)).append(" -> ");
+            t.keyboardTransliterate(rsource, index,
+                                    String.valueOf(source.charAt(i)));
+            // Append the string buffer with a vertical bar '|' where
+            // the committed index is.
+            String s = rsource.toString();
+            log.append(s.substring(0, index[Transliterator.CURSOR])).
+                append('|').
+                append(s.substring(index[Transliterator.CURSOR]));
+        }
+        
+        // As a final step in keyboard transliteration, we must call
+        // transliterate to finish off any pending partial matches that
+        // were waiting for more input.
+        t.finishKeyboardTransliteration(rsource, index);
+        result = rsource.toString();
+        log.append(" => ").append(rsource.toString());
+
+        expectAux(t.getID() + ":Keyboard", log.toString(),
+                  result.equals(expectedResult),
+                  expectedResult);
+    }
+
+    void expectAux(String tag, String source,
+                   String result, String expectedResult) {
+        expectAux(tag, source + " -> " + result,
+                  result.equals(expectedResult),
+                  expectedResult);
+    }
+    
+    void expectAux(String tag, String summary, boolean pass,
+                   String expectedResult) {
+        if (pass) {
+            logln("("+tag+") " + escape(summary));
+        } else {
+            errln("FAIL: ("+tag+") "
+                  + escape(summary)
+                  + ", expected " + escape(expectedResult));
+        }
+    }
+    
+    /**
+     * Escape non-ASCII characters as Unicode.
+     */
+    public static final String escape(String s) {
+        StringBuffer buf = new StringBuffer();
+        for (int i=0; i<s.length(); ++i) {
+            char c = s.charAt(i);
+            if (c >= ' ' && c <= 0x007F) {
+                buf.append(c);
+            } else {
+                buf.append("\\u");
+                if (c < 0x1000) {
+                    buf.append('0');
+                    if (c < 0x100) {
+                        buf.append('0');
+                        if (c < 0x10) {
+                            buf.append('0');
+                        }
+                    }
+                }
+                buf.append(Integer.toHexString(c));
+            }
+        }
+        return buf.toString();
+    }
+
+    /*
+    static final String KANA_RT_DATA =
+"a "+
+
+"ba bi bu be bo "+
+"bya byi byu bye byo "+
+"bba "+
+
+"da di du de do "+
+"dya dyi dyu dye dyo "+
+"dha dhi dhu dhe dho "+
+"dda "+
+
+"e "+
+
+"fa fi fe fo "+
+"fya fyu fyo "+
+"ffa "+
+
+"ga gi gu ge go "+
+"gya gyi gyu gye gyo "+
+"gwa gwi gwu gwe gwo "+
+"gga "+
+
+"ha hi hu he ho "+
+"hya hyi hyu hye hyo "+
+"hha "+
+
+"i "+
+
+"ka ki ku ke ko "+
+"kwa kwi kwu kwe kwo "+
+"kya kyi kyu kye kyo "+
+"kka "+
+
+"ma mi mu me mo "+
+"mya myi myu mye myo "+
+"mba mfa mma mpa mva "+
+"m'' "+
+
+"na ni nu ne no "+
+"nya nyi nyu nye nyo "+
+"nn n'' n "+
+
+"o "+
+
+"pa pi pu pe po "+
+"pya pyi pyu pye pyo "+
+"ppa "+
+
+"qa qi qu qe qo "+
+"qya qyi qyu qye qyo "+
+"qqa "+
+
+"ra ri ru re ro "+
+"rya ryi ryu rye ryo "+
+"rra "+
+
+"sa si su se so "+
+"sya syi syu sye syo "+
+"ssya ssa "+
+
+"ta ti tu te to "+
+"tha thi thu the tho "+
+"tsa tsi tse tso "+
+"tya tyi tyu tye tyo "+
+"ttsa "+
+"tta "+
+
+"u "+
+
+"va vi vu ve vo "+
+"vya vyi vyu vye vyo "+
+"vva "+
+
+"wa wi we wo "+
+"wwa "+
+
+"ya yu ye yo "+
+"yya "+
+
+"za zi zu ze zo "+
+"zya zyi zyu zye zyo "+
+"zza "+
+
+"xa xi xu xe xo "+
+"xka xke "+
+"xtu "+
+"xwa "+
+"xya xyu xyo "+
+
+        "akka akki akku akke akko "+
+        "akkya akkyu akkyo "+
+
+        "atta atti attu atte atto "+
+        "attya attyu attyo "+
+        "adda addi addu adde addo "+
+
+        "atcha atchi atchu atche atcho "+
+
+        "assa assi assu asse asso "+
+        "assya assyu assyo "+
+
+        "ahha ahhi ahhu ahhe ahho "+
+        "appa appi appu appe appo "+
+
+        "an "+
+        "ana ani anu ane ano "+
+        "anna anni annu anne anno "+
+        "an'a an'i an'u an'e an'o "+
+
+        "annna annni annnu annne annno "+
+        "an'na an'ni an'nu an'ne an'no "+
+
+        "anka anki anku anke anko "+
+        "anga angi angu ange ango "+
+
+        "ansa ansi ansu anse anso "+
+        "anza anzi anzu anze anzo "+
+        "anzya anzyu anzyo "+
+
+        "anta anti antu ante anto "+
+        "antya antyu antyo "+
+        "anda andi andu ande ando "+
+
+        "ancha anchi anchu anche ancho "+
+        "anja anji anju anje anjo "+
+        "antsa antsu antso "+
+
+        "anpa anpi anpu anpe anpo "+
+        "ampa ampi ampu ampe ampo "+
+
+        "anba anbi anbu anbe anbo "+
+        "amba ambi ambu ambe ambo "+
+
+        "anma anmi anmu anme anmo "+
+        "amma ammi ammu amme ammo "+
+
+        "anwa anwi anwu anwe anwo "+
+
+        "anha anhi anhu anhe anho "+
+
+        "anya anyi anyu anye anyo "+
+        "annya annyi annyu annye annyo "+
+        "an'ya an'yi an'yu an'ye an'yo "+
+
+        "kkk "+
+        "ggg "+
+        "sss "+
+        "zzz "+
+        "ttt "+
+        "ddd "+
+        "nnn "+
+        "hhh "+
+        "bbb "+
+        "ppp "+
+        "mmm "+
+        "yyy "+
+        "rrr "+
+        "www ";
+*/
+
+        /*+
+
+        "A I U E O "+
+        "XA XI XU XE XO "+
+
+        "KA KI KU KE KO "+
+        "KYA KYI KYU KYE KYO "+
+        "KWA KWI KWU KWE KWO "+
+        "QA QI QU QE QO "+
+        "QYA QYI QYU QYE QYO "+
+        "XKA XKE "+
+
+        "GA GI GU GE GO "+
+        "GYA GYI GYU GYE GYO "+
+        "GWA GWI GWU GWE GWO "+
+
+        "SA SI SU SE SO  "+
+        "SHA SHI SHU SHE SHO "+
+        "SYA SYI SYU SYE SYO "+
+
+        "ZA ZI ZU ZE ZO "+
+        "ZYA ZYI ZYU ZYE ZYO "+
+        "JA JI JU JE JO "+
+        "JYA JYU JYO "+
+
+        "TA TI TU TE TO "+
+        "XTU XTSU "+
+        "TYA TYU TYO "+
+        "CYA CYU CYO "+
+        "CHA CHI CHU CHE CHO "+
+        "TSA TSI TSU TSE TSO "+
+        "DA DI DU DE DO "+
+        "DYA DYU DYO "+
+        "THA THI THU THE THO "+
+        "DHA DHI DHU DHE DHO "+
+
+        "NA NI NU NE NO "+
+        "NYA NYU NYO "+
+
+        "HA HI HU HE HO "+
+        "HYA HYU HYO "+
+        "FA FI FU FE FO "+
+        "FYA FYU FYO "+
+        "BA BI BU BE BO "+
+        "BYA BYU BYO "+
+        "PA PI PU PE PO "+
+        "PYA PYU PYO "+
+
+        "MA MI MU ME MO "+
+        "MYA MYU MYO "+
+        "YA YI YU YE YO "+
+        "XYA XYI XYU XYE XYO "+
+
+        "RA RI RU RE RO "+
+        "LA LI LU LE LO "+
+        "RYA RYI RYU RYE RYO "+
+        "LYA LYI LYU LYE LYO "+
+
+        "WA WI WU WE WO "+
+        "VA VI VU VE VO "+
+        "VYA VYU VYO "+
+
+        "CYA CYI CYU CYE CYO "+
+
+        "NN "+
+        "N' "+
+        "N "+
+
+        "AKKA AKKI AKKU AKKE AKKO "+
+        "AKKYA AKKYU AKKYO "+
+
+        "ATTA ATTI ATTU ATTE ATTO "+
+        "ATTYA ATTYU ATTYO "+
+        "ADDA ADDI ADDU ADDE ADDO "+
+
+        "ATCHA ATCHI ATCHU ATCHE ATCHO "+
+
+        "ASSA ASSI ASSU ASSE ASSO "+
+        "ASSYA ASSYU ASSYO "+
+
+        "AHHA AHHI AHHU AHHE AHHO "+
+        "APPA APPI APPU APPE APPO "+
+
+        "AN "+
+        "ANA ANI ANU ANE ANO "+
+        "ANNA ANNI ANNU ANNE ANNO "+
+        "AN'A AN'I AN'U AN'E AN'O "+
+
+        "ANNNA ANNNI ANNNU ANNNE ANNNO "+
+        "AN'NA AN'NI AN'NU AN'NE AN'NO "+
+
+        "ANKA ANKI ANKU ANKE ANKO "+
+        "ANGA ANGI ANGU ANGE ANGO "+
+
+        "ANSA ANSI ANSU ANSE ANSO "+
+        "ANZA ANZI ANZU ANZE ANZO "+
+        "ANZYA ANZYU ANZYO "+
+
+        "ANTA ANTI ANTU ANTE ANTO "+
+        "ANTYA ANTYU ANTYO "+
+        "ANDA ANDI ANDU ANDE ANDO "+
+
+        "ANCHA ANCHI ANCHU ANCHE ANCHO "+
+        "ANJA ANJI ANJU ANJE ANJO "+
+        "ANTSA ANTSU ANTSO "+
+
+        "ANPA ANPI ANPU ANPE ANPO "+
+        "AMPA AMPI AMPU AMPE AMPO "+
+
+        "ANBA ANBI ANBU ANBE ANBO "+
+        "AMBA AMBI AMBU AMBE AMBO "+
+
+        "ANMA ANMI ANMU ANME ANMO "+
+        "AMMA AMMI AMMU AMME AMMO "+
+
+        "ANWA ANWI ANWU ANWE ANWO "+
+
+        "ANHA ANHI ANHU ANHE ANHO "+
+
+        "ANYA ANYI ANYU ANYE ANYO "+
+        "ANNYA ANNYI ANNYU ANNYE ANNYO "+
+        "AN'YA AN'YI AN'YU AN'YE AN'YO "+
+
+        "KKK "+
+        "GGG "+
+        "SSS "+
+        "ZZZ "+
+        "TTT "+
+        "DDD "+
+        "NNN "+
+        "HHH "+
+        "BBB "+
+        "PPP "+
+        "MMM "+
+        "YYY "+
+        "RRR "+
+        "WWW";*/
+}
diff --git a/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java b/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java
new file mode 100755
index 00000000000..8417faf4b44
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/dev/test/translit/UnicodeSetTest.java
@@ -0,0 +1,118 @@
+import com.ibm.text.*;
+import java.text.*;
+import java.util.*;
+
+/**
+ * @test
+ * @summary General test of UnicodeSet
+ */
+public class UnicodeSetTest extends IntlTest {
+
+    public static void main(String[] args) throws Exception {
+        new UnicodeSetTest().run(args);
+    }
+
+    public void TestPatterns() {
+        UnicodeSet set = new UnicodeSet();
+        expectPattern(set, "[[a-m]&[d-z]&[k-y]]",  "km");
+        expectPattern(set, "[[a-z]-[m-y]-[d-r]]",  "aczz");
+        expectPattern(set, "[a\\-z]",  "--aazz");
+        expectPattern(set, "[-az]",  "--aazz");
+        expectPattern(set, "[az-]",  "--aazz");
+        expectPattern(set, "[[[a-z]-[aeiou]i]]", "bdfnptvz");
+
+        // Throw in a test of complement
+        set.complement();
+        String exp = '\u0000' + "aeeoouu" + (char)('z'+1) + '\uFFFF';
+        expectPairs(set, exp);
+    }
+
+    public void TestAddRemove() {
+        UnicodeSet set = new UnicodeSet();
+        set.add('a', 'z');
+        expectPairs(set, "az");
+        set.remove('m', 'p');
+        expectPairs(set, "alqz");
+        set.remove('e', 'g');
+        expectPairs(set, "adhlqz");
+        set.remove('d', 'i');
+        expectPairs(set, "acjlqz");
+        set.remove('c', 'r');
+        expectPairs(set, "absz");
+        set.add('f', 'q');
+        expectPairs(set, "abfqsz");
+        set.remove('a', 'g');
+        expectPairs(set, "hqsz");
+        set.remove('a', 'z');
+        expectPairs(set, "");
+
+        // Try removing an entire set from another set
+        expectPattern(set, "[c-x]", "cx");
+        UnicodeSet set2 = new UnicodeSet();
+        expectPattern(set2, "[f-ky-za-bc[vw]]", "acfkvwyz");
+        set.removeAll(set2);
+        expectPairs(set, "deluxx");
+
+        // Try adding an entire set to another set
+        expectPattern(set, "[jackiemclean]", "aacceein");
+        expectPattern(set2, "[hitoshinamekatajamesanderson]", "aadehkmort");
+        set.addAll(set2);
+        expectPairs(set, "aacehort");
+
+        // Test commutativity
+        expectPattern(set, "[hitoshinamekatajamesanderson]", "aadehkmort");
+        expectPattern(set2, "[jackiemclean]", "aacceein");
+        set.addAll(set2);
+        expectPairs(set, "aacehort");
+    }
+
+    void expectPattern(UnicodeSet set,
+                       String pattern,
+                       String expectedPairs) {
+        set.applyPattern(pattern);
+        if (!set.getPairs().equals(expectedPairs)) {
+            errln("FAIL: applyPattern(\"" + pattern +
+                  "\") => pairs \"" +
+                  escape(set.getPairs()) + "\", expected \"" +
+                  escape(expectedPairs) + "\"");
+        } else {
+            logln("Ok:   applyPattern(\"" + pattern +
+                  "\") => pairs \"" +
+                  escape(set.getPairs()) + "\"");
+        }
+    }
+
+    void expectPairs(UnicodeSet set, String expectedPairs) {
+        if (!set.getPairs().equals(expectedPairs)) {
+            errln("FAIL: Expected pair list \"" +
+                  escape(expectedPairs) + "\", got \"" +
+                  escape(set.getPairs()) + "\"");
+        }
+    }
+
+    /**
+     * Escape non-ASCII characters as Unicode.
+     */
+    static final String escape(String s) {
+        StringBuffer buf = new StringBuffer();
+        for (int i=0; i<s.length(); ++i) {
+            char c = s.charAt(i);
+            if (c >= ' ' && c <= 0x007F) {
+                buf.append(c);
+            } else {
+                buf.append("\\u");
+                if (c < 0x1000) {
+                    buf.append('0');
+                    if (c < 0x100) {
+                        buf.append('0');
+                        if (c < 0x10) {
+                            buf.append('0');
+                        }
+                    }
+                }
+                buf.append(Integer.toHexString(c));
+            }
+        }
+        return buf.toString();
+    }
+}
diff --git a/icu4j/src/com/ibm/icu/text/CompoundTransliterator.java b/icu4j/src/com/ibm/icu/text/CompoundTransliterator.java
new file mode 100755
index 00000000000..c3582237d42
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/text/CompoundTransliterator.java
@@ -0,0 +1,285 @@
+package com.ibm.text;
+
+import java.util.Enumeration;
+import java.util.Vector;
+
+/**
+ * A transliterator that is composed of two or more other
+ * transliterator objects linked together.  For example, if one
+ * transliterator transliterates from script A to script B, and
+ * another transliterates from script B to script C, the two may be
+ * combined to form a new transliterator from A to C.
+ *
+ * <p>Composed transliterators may not behave as expected.  For
+ * example, inverses may not combine to form the identity
+ * transliterator.  See the class documentation for {@link
+ * Transliterator} for details.
+ *
+ * <p>If a non-<tt>null</tt> <tt>UnicodeFilter</tt> is applied to a
+ * <tt>CompoundTransliterator</tt>, it has the effect of being
+ * logically <b>and</b>ed with the filter of each transliterator in
+ * the chain.
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class CompoundTransliterator extends Transliterator {
+
+    private static final boolean DEBUG = false;
+
+    private Transliterator[] trans;
+
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    /**
+     * Constructs a new compound transliterator given an array of
+     * transliterators.  The array of transliterators may be of any
+     * length, including zero or one, however, useful compound
+     * transliterators have at least two components.
+     * @param transliterators array of <code>Transliterator</code>
+     * objects
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     */
+    public CompoundTransliterator(String ID, Transliterator[] transliterators,
+                                  UnicodeFilter filter) {
+        super(ID, filter);
+        trans = new Transliterator[transliterators.length];
+        System.arraycopy(transliterators, 0, trans, 0, trans.length);
+    }
+
+    /**
+     * Constructs a new compound transliterator given an array of
+     * transliterators.  The array of transliterators may be of any
+     * length, including zero or one, however, useful compound
+     * transliterators have at least two components.
+     * @param transliterators array of <code>Transliterator</code>
+     * objects
+     */
+    public CompoundTransliterator(String ID, Transliterator[] transliterators) {
+        this(ID, transliterators, null);
+    }
+
+    /**
+     * Returns the number of transliterators in this chain.
+     * @return number of transliterators in this chain.
+     */
+    public int getCount() {
+        return trans.length;
+    }
+
+    /**
+     * Returns the transliterator at the given index in this chain.
+     * @param index index into chain, from 0 to <code>getCount() - 1</code>
+     * @return transliterator at the given index
+     */
+    public Transliterator getTransliterator(int index) {
+        return trans[index];
+    }
+
+    /**
+     * Transliterates a segment of a string.  <code>Transliterator</code> API.
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @return the new limit index
+     */
+    public int transliterate(Replaceable text, int start, int limit) {
+        for (int i=0; i<trans.length; ++i) {
+            limit = trans[i].transliterate(text, start, limit);
+        }
+        return limit;
+    }
+
+    /**
+     * Implements {@link Transliterator#handleKeyboardTransliterate}.
+     */
+    protected void handleKeyboardTransliterate(Replaceable text,
+                                               int[] index) {
+        /* Call each transliterator with the same start value and
+         * initial cursor index, but with the limit index as modified
+         * by preceding transliterators.  The cursor index must be
+         * reset for each transliterator to give each a chance to
+         * transliterate the text.  The initial cursor index is known
+         * to still point to the same place after each transliterator
+         * is called because each transliterator will not change the
+         * text between start and the initial value of cursor.
+         *
+         * IMPORTANT: After the first transliterator, each subsequent
+         * transliterator only gets to transliterate text committed by
+         * preceding transliterators; that is, the cursor (output
+         * value) of transliterator i becomes the limit (input value)
+         * of transliterator i+1.  Finally, the overall limit is fixed
+         * up before we return.
+         *
+         * Assumptions we make here:
+         * (1) start <= cursor <= limit    ;cursor valid on entry
+         * (2) cursor <= cursor' <= limit' ;cursor doesn't move back
+         * (3) cursor <= limit'            ;text before cursor unchanged
+         * - cursor' is the value of cursor after calling handleKT
+         * - limit' is the value of limit after calling handleKT
+         */
+
+        /**
+         * Example: 3 transliterators.  This example illustrates the
+         * mechanics we need to implement.  S, C, and L are the start,
+         * cursor, and limit.  gl is the globalLimit.
+         *
+         * 1. h-u, changes hex to Unicode
+         *
+         *    4  7  a  d  0      4  7  a
+         *    abc/u0061/u    =>  abca/u    
+         *    S  C       L       S   C L   gl=f->a
+         *
+         * 2. upup, changes "x" to "XX"
+         *
+         *    4  7  a       4  7  a
+         *    abca/u    =>  abcAA/u    
+         *    S  CL         S    C   
+         *                       L    gl=a->b
+         * 3. u-h, changes Unicode to hex
+         *
+         *    4  7  a        4  7  a  d  0  3
+         *    abcAA/u    =>  abc/u0041/u0041/u    
+         *    S  C L         S              C
+         *                                  L   gl=b->15
+         * 4. return
+         *
+         *    4  7  a  d  0  3
+         *    abc/u0041/u0041/u    
+         *    S C L
+         */
+
+        /**
+         * One more wrinkle.  If there is a filter F for the compound
+         * transliterator as a whole, then we need to modify every
+         * non-null filter f in the chain to be f' = F & f.  Then,
+         * when we're done, we restore the original filters.
+         *
+         * A possible future optimization is to change f to f' at
+         * construction time, but then if anyone else is using the
+         * transliterators in the chain outside of this context, they
+         * will get unexpected results.
+         */
+        UnicodeFilter F = getFilter();
+        UnicodeFilter[] f = null;
+        if (F != null) {
+            f = new UnicodeFilter[trans.length];
+            for (int i=0; i<f.length; ++i) {
+                f[i] = trans[i].getFilter();
+                trans[i].setFilter(UnicodeFilterLogic.and(F, f[i]));
+            }
+        }
+
+        try {
+            int cursor = index[CURSOR];
+            int limit = index[LIMIT];
+            int globalLimit = limit;
+            /* globalLimit is the overall limit.  We keep track of this
+             * since we overwrite index[LIMIT] with the previous
+             * index[CURSOR].  After each transliteration, we update
+             * globalLimit for insertions or deletions that have happened.
+             */
+
+            for (int i=0; i<trans.length; ++i) {
+                index[CURSOR] = cursor; // Reset cursor
+                index[LIMIT] = limit;
+
+                if (DEBUG) {
+                    System.out.print(escape(i + ": \"" +
+                        substring(text, index[START], index[CURSOR]) + '|' +
+                        substring(text, index[CURSOR], index[LIMIT]) +
+                        "\" -> \""));
+                }
+
+                trans[i].handleKeyboardTransliterate(text, index);
+
+                if (DEBUG) {
+                    System.out.println(escape(
+                        substring(text, index[START], index[CURSOR]) + '|' +
+                        substring(text, index[CURSOR], index[LIMIT]) +
+                        '"'));
+                }
+            
+                // Adjust overall limit for insertions/deletions
+                globalLimit += index[LIMIT] - limit;
+                limit = index[CURSOR]; // Move limit to end of committed text
+            }
+            // Cursor is good where it is -- where the last
+            // transliterator left it.  Limit needs to be put back
+            // where it was, modulo adjustments for deletions/insertions.
+            index[LIMIT] = globalLimit;
+
+        } finally {
+            // Fixup the transliterator filters, if we had to modify them.
+            if (f != null) {
+                for (int i=0; i<f.length; ++i) {
+                    trans[i].setFilter(f[i]);
+                }
+            }
+        }
+    }
+
+    /**
+     * Returns the length of the longest context required by this transliterator.
+     * This is <em>preceding</em> context.
+     * @return maximum number of preceding context characters this
+     * transliterator needs to examine
+     */
+    protected int getMaximumContextLength() {
+        int max = 0;
+        for (int i=0; i<trans.length; ++i) {
+            int len = trans[i].getMaximumContextLength();
+            if (len > max) {
+                max = len;
+            }
+        }
+        return max;
+    }
+
+    /**
+     * DEBUG
+     * Returns a substring of a Replaceable.
+     */
+    private static final String substring(Replaceable str, int start, int limit) {
+        StringBuffer buf = new StringBuffer();
+        while (start < limit) {
+            buf.append(str.charAt(start++));
+        }
+        return buf.toString();
+    }
+
+    /**
+     * DEBUG
+     * Escapes non-ASCII characters as Unicode.
+     */
+    private static final String escape(String s) {
+        StringBuffer buf = new StringBuffer();
+        for (int i=0; i<s.length(); ++i) {
+            char c = s.charAt(i);
+            if (c >= ' ' && c <= 0x007F) {
+                buf.append(c);
+            } else {
+                buf.append("\\u");
+                if (c < 0x1000) {
+                    buf.append('0');
+                    if (c < 0x100) {
+                        buf.append('0');
+                        if (c < 0x10) {
+                            buf.append('0');
+                        }
+                    }
+                }
+                buf.append(Integer.toHexString(c));
+            }
+        }
+        return buf.toString();
+    }
+}
diff --git a/icu4j/src/com/ibm/icu/text/HexToUnicodeTransliterator.java b/icu4j/src/com/ibm/icu/text/HexToUnicodeTransliterator.java
new file mode 100755
index 00000000000..18673e15fe7
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/text/HexToUnicodeTransliterator.java
@@ -0,0 +1,130 @@
+package com.ibm.text;
+import java.util.*;
+
+/**
+ * A transliterator that converts from hexadecimal Unicode
+ * escape sequences to the characters they represent.  For example, "U+0040"
+ * and '\u0040'.  It recognizes the
+ * prefixes "U+", "u+", "&#92;U", and "&#92;u".  Hex values may be
+ * upper- or lowercase.
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: HexToUnicodeTransliterator.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class HexToUnicodeTransliterator extends Transliterator {
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    /**
+     * Package accessible ID for this transliterator.
+     */
+    static String _ID = "Hex-Unicode";
+
+    /**
+     * Constructs a transliterator.
+     */
+    public HexToUnicodeTransliterator() {
+        super(_ID, null);
+    }
+
+    /**
+     * Transliterates a segment of a string.  <code>Transliterator</code> API.
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @return the new limit index
+     */
+    public int transliterate(Replaceable text, int start, int limit) {
+        int[] offsets = { start, limit, start };
+        handleKeyboardTransliterate(text, offsets);
+        return offsets[LIMIT];
+    }
+
+    /**
+     * Implements {@link Transliterator#handleKeyboardTransliterate}.
+     */
+    protected void handleKeyboardTransliterate(Replaceable text,
+                                               int[] offsets) {
+        /**
+         * Performs transliteration changing Unicode hexadecimal
+         * escapes to characters.  For example, "U+0040" -> '@'.  A fixed
+         * set of prefixes is recognized: "&#92;u", "&#92;U", "u+", "U+". 
+         */
+        int cursor = offsets[CURSOR];
+        int limit = offsets[LIMIT];
+
+        int maxCursor = limit - 6;
+    loop:
+        while (cursor <= maxCursor) {
+            char c = filteredCharAt(text, cursor + 5);
+            int digit0 = Character.digit(c, 16);
+            if (digit0 < 0) {
+                if (c == '\\') {
+                    cursor += 5;
+                } else if (c == 'U' || c == 'u' || c == '+') {
+                    cursor += 4;
+                } else {
+                    cursor += 6;
+                }
+                continue;
+            }
+
+            int u = digit0;
+
+            for (int i=4; i>=2; --i) {
+                c = filteredCharAt(text, cursor + i);
+                int digit = Character.digit(c, 16);
+                if (digit < 0) {
+                    if (c == 'U' || c == 'u' || c == '+') {
+                        cursor += i-1;
+                    } else {
+                        cursor += 6;
+                    }
+                    continue loop;
+                }
+                u |= digit << (4 * (5-i));
+            }
+
+            c = filteredCharAt(text, cursor);
+            char d = filteredCharAt(text, cursor + 1);
+            if (((c == 'U' || c == 'u') && d == '+')
+                || (c == '\\' && (d == 'U' || d == 'u'))) {
+                
+                // At this point, we have a match; replace cursor..cursor+5
+                // with u.
+                text.replace(cursor, cursor+6, String.valueOf((char) u));
+                limit -= 5;
+                maxCursor -= 5;
+
+                ++cursor;
+            } else {
+                cursor += 6;
+            }
+        }
+
+        offsets[LIMIT] = limit;
+        offsets[CURSOR] = cursor;
+    }
+    
+    private char filteredCharAt(Replaceable text, int i) {
+        char c;
+        UnicodeFilter filter = getFilter();
+        return (filter == null) ? text.charAt(i) :
+            (filter.isIn(c = text.charAt(i)) ? c : '\uFFFF');
+    }
+
+    /**
+     * Return the length of the longest context required by this transliterator.
+     * This is <em>preceding</em> context.
+     * @param direction either <code>FORWARD</code> or <code>REVERSE</code>
+     * @return maximum number of preceding context characters this
+     * transliterator needs to examine
+     */
+    protected int getMaximumContextLength() {
+        return 0;
+    }
+}
diff --git a/icu4j/src/com/ibm/icu/text/Replaceable.java b/icu4j/src/com/ibm/icu/text/Replaceable.java
new file mode 100755
index 00000000000..b4c8519689c
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/text/Replaceable.java
@@ -0,0 +1,77 @@
+package com.ibm.text;
+
+/**
+ * <code>Replaceable</code> is an interface that supports the
+ * operation of replacing a substring with another piece of text.
+ * <code>Replaceable</code> is needed in order to change a piece of
+ * text while retaining style attributes.  For example, if the string
+ * "the <b>bold</b> font" has range (4, 8) replaced with "strong",
+ * then it becomes "the <b>strong</b> font".
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: Replaceable.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public interface Replaceable {
+    /**
+     * Return the number of characters in the text.
+     * @return number of characters in text
+     */ 
+    int length();
+
+    /**
+     * Return the character at the given offset into the text.
+     * @param offset an integer between 0 and <code>length()</code>-1
+     * inclusive
+     * @return character of text at given offset
+     */
+    char charAt(int offset);
+
+    /**
+     * Copies characters from this object into the destination
+     * character array.  The first character to be copied is at index
+     * <code>srcStart</code>; the last character to be copied is at
+     * index <code>srcLimit-1</code> (thus the total number of
+     * characters to be copied is <code>srcLimit-srcStart</code>). The
+     * characters are copied into the subarray of <code>dst</code>
+     * starting at index <code>dstStart</code> and ending at index
+     * <code>dstStart + (srcLimit-srcStart) - 1</code>.
+     *
+     * @param srcStart the beginning index to copy, inclusive; <code>0
+     * <= start <= limit</code>.
+     * @param srcLimit the ending index to copy, exclusive;
+     * <code>start <= limit <= length()</code>.
+     * @param dst the destination array.
+     * @param dstStart the start offset in the destination array.
+     */
+    void getChars(int srcStart, int srcLimit, char dst[], int dstStart);
+
+    /**
+     * Replace a substring of this object with the given text.
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= length()</code>.
+     * @param text the text to replace characters <code>start</code>
+     * to <code>limit - 1</code>
+     */
+    void replace(int start, int limit, String text);
+
+    /**
+     * Replace a substring of this object with the given text.
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= length()</code>.
+     * @param chars the text to replace characters <code>start</code>
+     * to <code>limit - 1</code>
+     * @param charsStart the beginning index into <code>chars</code>,
+     * inclusive; <code>0 <= start <= limit</code>.
+     * @param charsLen the number of characters of <code>chars</code>.
+     */
+    void replace(int start, int limit, char[] chars,
+                 int charsStart, int charsLen);
+    // Note: We use length rather than limit to conform to StringBuffer
+    // and System.arraycopy.
+}
diff --git a/icu4j/src/com/ibm/icu/text/ReplaceableString.java b/icu4j/src/com/ibm/icu/text/ReplaceableString.java
new file mode 100755
index 00000000000..d6a7df06db5
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/text/ReplaceableString.java
@@ -0,0 +1,159 @@
+package com.ibm.text;
+
+/**
+ * <code>ReplaceableString</code> is an adapter class that implements the
+ * <code>Replaceable</code> API around an ordinary <code>StringBuffer</code>.
+ *
+ * <p><em>Note:</em> This class does not support attributes and is not
+ * intended for general use.  Most clients will need to implement
+ * {@link Replaceable} in their text representation class.
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @see Replaceable
+ * @author Alan Liu
+ * @version $RCSfile: ReplaceableString.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class ReplaceableString implements Replaceable {
+    private StringBuffer buf;
+
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    /**
+     * Construct a new object with the given initial contents.
+     * @param str initial contents
+     */
+    public ReplaceableString(String str) {
+        buf = new StringBuffer(str);
+    }
+
+    /**
+     * Construct a new object using <code>buf</code> for internal
+     * storage.  The contents of <code>buf</code> at the time of
+     * construction are used as the initial contents.  <em>Note!
+     * Modifications to <code>buf</code> will modify this object, and
+     * vice versa.</em>
+     * @param buf object to be used as internal storage
+     */
+    public ReplaceableString(StringBuffer buf) {
+        this.buf = buf;
+    }
+
+    /**
+     * Construct a new empty object.
+     */
+    public ReplaceableString() {
+        buf = new StringBuffer();
+    }
+
+    /**
+     * Return the contents of this object as a <code>String</code>.
+     * @return string contents of this object
+     */
+    public String toString() {
+        return buf.toString();
+    }
+
+    /**
+     * Return the internal storage of this object.  <em>Note!  Any
+     * changes made to the returned object affect this object's
+     * contents, and vice versa.</em>
+     * @return internal buffer used by this object
+     */
+    public StringBuffer getStringBuffer() {
+        return buf;
+    }
+
+    /**
+     * Return the number of characters contained in this object.
+     * <code>Replaceable</code> API.
+     */ 
+    public int length() {
+        return buf.length();
+    }
+
+    /**
+     * Return the character at the given position in this object.
+     * <code>Replaceable</code> API.
+     * @param offset offset into the contents, from 0 to
+     * <code>length()</code> - 1
+     */
+    public char charAt(int offset) {
+        return buf.charAt(offset);
+    }
+
+    /**
+     * Copies characters from this object into the destination
+     * character array.  The first character to be copied is at index
+     * <code>srcStart</code>; the last character to be copied is at
+     * index <code>srcLimit-1</code> (thus the total number of
+     * characters to be copied is <code>srcLimit-srcStart</code>). The
+     * characters are copied into the subarray of <code>dst</code>
+     * starting at index <code>dstStart</code> and ending at index
+     * <code>dstStart + (srcLimit-srcStart) - 1</code>.
+     *
+     * @param srcStart the beginning index to copy, inclusive; <code>0
+     * <= start <= limit</code>.
+     * @param srcLimit the ending index to copy, exclusive;
+     * <code>start <= limit <= length()</code>.
+     * @param dst the destination array.
+     * @param dstStart the start offset in the destination array.
+     */
+    public void getChars(int srcStart, int srcLimit, char dst[], int dstStart) {
+        buf.getChars(srcStart, srcLimit, dst, dstStart);
+    }
+
+    /**
+     * Replace zero or more characters with new characters.
+     * <code>Replaceable</code> API.
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= length()</code>.
+     * @param text new text to replace characters <code>start</code> to
+     * <code>limit - 1</code>
+     */
+    public void replace(int start, int limit, String text) {
+        if (start == limit) {
+            buf.insert(start, text);
+        } else {
+            char[] tail = null;
+            if (limit < buf.length()) {
+                tail = new char[buf.length() - limit];
+                buf.getChars(limit, buf.length(), tail, 0);
+            }
+            buf.setLength(start);
+            buf.append(text);
+            if (tail != null) {
+                buf.append(tail);
+            }
+        }
+    }
+
+    /**
+     * Replace a substring of this object with the given text.
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= length()</code>.
+     * @param chars the text to replace characters <code>start</code>
+     * to <code>limit - 1</code>
+     * @param charsStart the beginning index into <code>chars</code>,
+     * inclusive; <code>0 <= start <= limit</code>.
+     * @param charsLen the number of characters of <code>chars</code>.
+     */
+    public void replace(int start, int limit, char[] chars,
+                        int charsStart, int charsLen) {
+        char[] tail = null;
+        if (limit < buf.length()) {
+            tail = new char[buf.length() - limit];
+            buf.getChars(limit, buf.length(), tail, 0);
+        }
+        buf.setLength(start);
+        buf.append(chars, charsStart, charsLen);
+        if (tail != null) {
+            buf.append(tail);
+        }
+    }
+}
diff --git a/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java b/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java
new file mode 100755
index 00000000000..4a433e9479d
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/text/RuleBasedTransliterator.java
@@ -0,0 +1,1187 @@
+package com.ibm.text;
+
+import java.util.Hashtable;
+import java.util.Vector;
+
+/**
+ * A transliterator that reads a set of rules in order to determine how to
+ * perform translations.  Rules are stored in resource bundles indexed by name.
+ * Rules are separated by newline characters ('\n'); to include a literal
+ * newline, prefix it with a backslash ('\\\n').  Whitespace is significant.  If
+ * the first character on a line is '#', the entire line is ignored as a
+ * comment.
+ *
+ * <p>Each set of rules consists of two groups, one forward, and one reverse.
+ * This is a convention that is not enforced; rules for one direction may be
+ * omitted, with the result that translations in that direction will not modify
+ * the source text.
+ *
+ * <p><b>Rule syntax</b>
+ *
+ * <p>Rule statements take one of the following forms:
+ * <dl>
+ *   <dt><code>alefmadda=&#092;u0622</code></dt>
+ *
+ *   <dd><strong>Variable definition.</strong> The name on the left is
+ *   assigned the character or expression on the right. Names may not
+ *   contain any special characters (see list below). Duplicate names
+ *   (including duplicates of simple variables or category names)
+ *   cause an exception to be thrown.  If the right hand side consists
+ *   of one character, then the variable stands for that character.
+ *   In this example, after this statement, instances of the left hand
+ *   name surrounded by braces, &quot;<code>{alefmadda}</code>&quot,
+ *   will be replaced by the Unicode character U+0622.</dd> If the
+ *   right hand side is longer than one character, then it is
+ *   interpreted as a character category expression; see below for
+ *   details.
+ *
+ *   <dt><code>softvowel=[eiyEIY]</code></dt>
+ *
+ *   <dd><strong>Category definition.</strong> The name on the left is assigned
+ *   to stand for a set of characters.  The same rules for names of simple
+ *   variables apply. After this statement, the left hand variable will be
+ *   interpreted as indicating a set of characters in appropriate contexts. The
+ *   pattern syntax defining sets of characters is defined by {@link UnicodeSet}.
+ *   Examples of valid patterns are:<table>
+ *
+ *       <tr valign=top>
+ *         <td nowrap><code>[abc]</code></td>
+ *         <td>The set containing the characters 'a', 'b', and 'c'.</td>
+ *       </tr>
+ *       <tr valign=top>
+ *         <td nowrap><code>[^abc]</code></td>
+ *         <td>The set of all characters <em>except</em> 'a', 'b', and 'c'.</td>
+ *       </tr>
+ *       <tr valign=top>
+ *         <td nowrap><code>[A-Z]</code></td>
+ *         <td>The set of all characters from 'A' to 'Z' in Unicode order.</td>
+ *       </tr>
+ *       <tr valign=top>
+ *         <td nowrap><code>[:Lu:]</code></td>
+ *         <td>The set of Unicode uppercase letters. See
+ *         <a href="http://www.unicode.org">www.unicode.org</a>
+ *         for a complete list of categories and their two-letter codes.</td>
+ *       </tr>
+ *       <tr valign=top>
+ *         <td nowrap><code>[^a-z[:Lu:][:Ll:]]</code></td>
+ *         <td>The set of all characters <em>except</em> 'a' through 'z' and
+ *         uppercase or lowercase letters.</td>
+ *       </tr>
+ *     </table>
+ *
+ *   See {@link UnicodeSet} for more documentation and examples.
+ *   </dd>
+ *
+ *   <dt><code>ai&gt;{alefmadda}</code></dt>
+ *
+ *   <dd><strong>Forward translation rule.</strong> This rule states that the
+ *   string on the left will be changed to the string on the right when
+ *   performing forward transliteration.</dd>
+ *
+ *   <dt><code>ai&lt;{alefmadda}</code></dt>
+ *
+ *   <dd><strong>Reverse translation rule.</strong> This rule states that the
+ *   string on the right will be changed to the string on the left when
+ *   performing reverse transliteration.</dd>
+ *
+ * </dl>
+ *
+ * <p>Forward and reverse translation rules consist of a <em>match
+ * pattern</em> and an <em>output string</em>.  The match pattern consists
+ * of literal characters, optionally preceded by context, and optionally
+ * followed by context.  Context characters, like literal pattern characters,
+ * must be matched in the text being transliterated.  However, unlike literal
+ * pattern characters, they are not replaced by the output text.  For example,
+ * the pattern "<code>[abc]def</code>" indicates the characters
+ * "<code>def</code>" must be preceded by "<code>abc</code>" for a successful
+ * match.  If there is a successful match, "<code>def</code>" will be replaced,
+ * but not "<code>abc</code>".  The initial '<code>[</code>' is optional, so
+ * "<code>abc]def</code>" is equivalent to "<code>[abc]def</code>".  Another
+ * example is "<code>123[456]</code>" (or "<code>123[456</code>") in which the
+ * literal pattern "<code>123</code>" must be followed by "<code>456</code>".
+ *
+ * <p>The output string of a forward or reverse rule consists of characters to
+ * replace the literal pattern characters.  If the output string contains the
+ * character '<code>|</code>', this is taken to indicate the location of the
+ * <em>cursor</em> after replacement.  The cursor is the point in the text
+ * at which the next replacement, if any, will be applied.
+ *
+ * <p><b>Example</b>
+ *
+ * <p>The following example rules illustrate many of the features of the rule
+ * language.
+ * <table cellpadding="4">
+ * <tr valign=top><td>Rule 1.</td>
+ *     <td nowrap><code>abc]def&gt;x|y</code></td></tr>
+ * <tr valign=top><td>Rule 2.</td>
+ *     <td nowrap><code>xyz&gt;r</code></td></tr>
+ * <tr valign=top><td>Rule 3.</td>
+ *     <td nowrap><code>yz&gt;q</code></td></tr>
+ * </table>
+ *
+ * <p>Applying these rules to the string "<code>adefabcdefz</code>" yields the
+ * following results:
+ *
+ * <table cellpadding="4">
+ * <tr valign=top><td nowrap><code>|adefabcdefz</code></td>
+ *     <td>Initial state, no rules match.  Advance cursor.</td></tr>
+ * <tr valign=top><td nowrap><code>a|defabcdefz</code></td>
+ *     <td>Still no match.  Rule 1 does not match because the preceding
+ *     context is not present.</td></tr>
+ * <tr valign=top><td nowrap><code>ad|efabcdefz</code></td>
+ *     <td>Still no match.  Keep advancing until there is a match...</td></tr>
+ * <tr valign=top><td nowrap><code>ade|fabcdefz</code></td>
+ *     <td>...</td></tr>
+ * <tr valign=top><td nowrap><code>adef|abcdefz</code></td>
+ *     <td>...</td></tr>
+ * <tr valign=top><td nowrap><code>adefa|bcdefz</code></td>
+ *     <td>...</td></tr>
+ * <tr valign=top><td nowrap><code>adefab|cdefz</code></td>
+ *     <td>...</td></tr>
+ * <tr valign=top><td nowrap><code>adefabc|defz</code></td>
+ *     <td>Rule 1 matches; replace "<code>def</code>" with "<code>xy</code>"
+ *     and back up the cursor to before the '<code>y</code>'.</td></tr>
+ * <tr valign=top><td nowrap><code>adefabcx|yz</code></td>
+ *     <td>Although "<code>xyz</code>" is present, rule 2 does not match
+ *     because the cursor is before the '<code>y</code>', not before the
+ *     '<code>x</code>'.  Rule 3 does match.  Replace "<code>yz</code>" with
+ *     "<code>q</code>".</td></tr>
+ * <tr valign=top><td nowrap><code>adefabcxq|</code></td>
+ *     <td>The cursor is at the end; transliteration is complete.</td></tr>
+ * </table>
+ *
+ * <p>The order of rules is significant.  If multiple rules may match at some
+ * point, the first matching rule is applied.
+ *
+ * <p>Forward and reverse rules may have an empty output string.  Otherwise, an
+ * empty left or right hand side of any statement is a syntax error.
+ *
+ * <p>Single quotes are used to quote the special characters
+ * <code>=&gt;&lt;{}[]|</code>.  To specify a single quote itself, inside or
+ * outside of quotes, use two single quotes in a row.  For example, the rule
+ * "<code>'&gt;'&gt;o''clock</code>" changes the string "<code>&gt;</code>" to
+ * the string "<code>o'clock</code>".
+ *
+ * <p><b>Notes</b>
+ *
+ * <p>While a RuleBasedTransliterator is being built, it checks that the rules
+ * are added in proper order.  For example, if the rule "a>x" is followed by the
+ * rule "ab>y", then the second rule will throw an exception.  The reason is
+ * that the second rule can never be triggered, since the first rule always
+ * matches anything it matches.  In other words, the first rule <em>masks</em>
+ * the second rule.  There is a cost of O(n^2) to make this check; in real-world
+ * tests it appears to approximately double build time.
+ *
+ * <p>One optimization that can be made is to add a pragma to the rule language,
+ * "#pragma order", that turns off ordering checking.  This pragma can then be
+ * added to all of our resource-based rules (after we build these once and
+ * determine that there are no ordering errors).  I haven't made this change yet
+ * in the interests of keeping the code from getting too byzantine.
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class RuleBasedTransliterator extends Transliterator {
+    /**
+     * Direction constant passed to constructor to create a transliterator
+     * using the forward rules.
+     */
+    public static final int FORWARD = 0;
+
+    /**
+     * Direction constant passed to constructor to create a transliterator
+     * using the reverse rules.
+     */
+    public static final int REVERSE = 1;    
+
+    private Data data;
+
+    static final boolean DEBUG = false;
+
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    /**
+     * Constructs a new transliterator from the given rules.
+     * @param rules rules, separated by '\n'
+     * @param direction either FORWARD or REVERSE.
+     * @exception IllegalArgumentException if rules are malformed
+     * or direction is invalid.
+     */
+    public RuleBasedTransliterator(String ID, String rules, int direction,
+                                   UnicodeFilter filter) {
+        super(ID, filter);
+        if (direction != FORWARD && direction != REVERSE) {
+            throw new IllegalArgumentException("Invalid direction");
+        }
+        data = parse(rules, direction);
+    }
+
+    /**
+     * Constructs a new transliterator from the given rules in the
+     * <code>FORWARD</code> direction.
+     * @param rules rules, separated by '\n'
+     * @exception IllegalArgumentException if rules are malformed
+     * or direction is invalid.
+     */
+    public RuleBasedTransliterator(String ID, String rules) {
+        this(ID, rules, FORWARD, null);
+    }
+
+    RuleBasedTransliterator(String ID, Data data, UnicodeFilter filter) {
+        super(ID, filter);
+        this.data = data;
+    }
+
+    static Data parse(String rules, int direction) {
+        return new Parser(rules, direction).getData();
+    }
+
+    /**
+     * Transliterates a segment of a string.  <code>Transliterator</code> API.
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param result buffer to receive the transliterated text; previous
+     * contents are discarded
+     */
+    public void transliterate(String text, int start, int limit,
+                              StringBuffer result) {
+        /* In the following loop there is a virtual buffer consisting of the
+         * text transliterated so far followed by the untransliterated text.  There is
+         * also a cursor, which may be in the already transliterated buffer or just
+         * before the untransliterated text.
+         *
+         * Example: rules 1. ab>x|y
+         *                2. yc>z
+         *
+         * []|eabcd  start - no match, copy e to tranlated buffer
+         * [e]|abcd  match rule 1 - copy output & adjust cursor
+         * [ex|y]cd  match rule 2 - copy output & adjust cursor
+         * [exz]|d   no match, copy d to transliterated buffer
+         * [exzd]|   done
+         *
+         * cursor: an index into the virtual buffer, 0..result.length()-1.
+         * Matches take place at the cursor.  If there is no match, the cursor
+         * is advanced, and one character is moved from the source text to the
+         * result buffer.
+         *         
+         * start, limit: these designate the substring of the source text which
+         * has not been processed yet.  The range of offsets is start..limit-1.
+         * At any moment the virtual buffer consists of result +
+         * text.substring(start, limit).
+         */
+        int cursor = 0;
+        result.setLength(0);
+        while (start < limit || cursor < result.length()) {
+            TransliterationRule r = data.ruleSet.findMatch(text, start, limit, result,
+                                                      cursor, data.setVariables, getFilter());
+            if (DEBUG) {
+                StringBuffer buf = new StringBuffer(
+                        result.toString() + '#' + text.substring(start, limit));
+                buf.insert(cursor <= result.length()
+                           ? cursor : (cursor + 1),
+                           '|');
+                System.err.print((r == null ? "nomatch:" : ("match:" + r + ", "))
+                                 + buf);
+            }
+
+            if (r == null) {
+                if (cursor == result.length()) {
+                    result.append(text.charAt(start++));
+                }
+                ++cursor;
+            } else {
+                // resultPad is length of result to right of cursor; >= 0
+                int resultPad = result.length() - cursor;
+                char[] tail = null;
+                if (r.getKeyLength() > resultPad) {
+                    start += r.getKeyLength() - resultPad;
+                } else if (r.getKeyLength() < resultPad) {
+                    tail = new char[resultPad - r.getKeyLength()];
+                    result.getChars(cursor + r.getKeyLength(), result.length(),
+                                    tail, 0);
+                }
+                result.setLength(cursor);
+                result.append(r.getOutput());
+                if (tail != null) {
+                    result.append(tail);
+                }
+                cursor += r.getCursorPos();
+            }
+
+            if (DEBUG) {
+                StringBuffer buf = new StringBuffer(
+                        result.toString() + '#' + text.substring(start, limit));
+                buf.insert(cursor <= result.length()
+                           ? cursor : (cursor + 1),
+                           '|');
+                System.err.println(" => " + buf);
+            }
+        }
+    }
+
+    /**
+     * Transliterates a segment of a string.  <code>Transliterator</code> API.
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @return The new limit index
+     */
+    public int transliterate(Replaceable text, int start, int limit) {
+        /* When using Replaceable, the algorithm is simpler, since we don't have
+         * two separate buffers.  We keep start and limit fixed the entire time,
+         * relative to the text -- limit may move numerically if text is
+         * inserted or removed.  The cursor moves from start to limit, with
+         * replacements happening under it.
+         *
+         * Example: rules 1. ab>x|y
+         *                2. yc>z
+         *
+         * |eabcd   start - no match, advance cursor
+         * e|abcd   match rule 1 - change text & adjust cursor
+         * ex|ycd   match rule 2 - change text & adjust cursor
+         * exz|d    no match, advance cursor
+         * exzd|    done
+         */
+        int cursor = start;
+        while (cursor < limit) {
+            TransliterationRule r = data.ruleSet.findMatch(text, start, limit,
+                                                      cursor, data.setVariables, getFilter());
+            if (r == null) {
+                ++cursor;
+            } else {
+                text.replace(cursor, cursor + r.getKeyLength(), r.getOutput());
+                limit += r.getOutput().length() - r.getKeyLength();
+                cursor += r.getCursorPos();
+            }
+        }
+        return limit;
+    }
+
+    /**
+     * Implements {@link Transliterator#handleKeyboardTransliterate}.
+     */
+    protected void handleKeyboardTransliterate(Replaceable text,
+                                               int[] index) {
+        int start = index[START];
+        int limit = index[LIMIT];
+        int cursor = index[CURSOR];
+
+        if (DEBUG) {
+            System.out.print("\"" +
+                escape(rsubstring(text, start, cursor)) + '|' +
+                escape(rsubstring(text, cursor, limit)) + "\"");
+        }
+
+        boolean partial[] = new boolean[1];
+
+        while (cursor < limit) {
+            TransliterationRule r = data.ruleSet.findIncrementalMatch(
+                    text, start, limit, cursor, data.setVariables, partial, getFilter());
+            /* If we match a rule then apply it by replacing the key
+             * with the rule output and repositioning the cursor
+             * appropriately.  If we get a partial match, then we
+             * can't do anything without more text; return with the
+             * cursor at the current position.  If we get null, then
+             * there is no match at this position, and we can advance
+             * the cursor.
+             */
+            if (r == null) {
+                if (partial[0]) {
+                    break;
+                } else {
+                    ++cursor;
+                }
+            } else {
+                text.replace(cursor, cursor + r.getKeyLength(), r.getOutput());
+                limit += r.getOutput().length() - r.getKeyLength();
+                cursor += r.getCursorPos();
+            }
+        }
+
+        if (DEBUG) {
+            System.out.println(" -> \"" +
+                escape(rsubstring(text, start, cursor)) + '|' + 
+                escape(rsubstring(text, cursor, cursor)) + '|' + 
+                escape(rsubstring(text, cursor, limit)) + "\"");
+        }
+
+        index[LIMIT] = limit;
+        index[CURSOR] = cursor;
+    }
+
+    /**
+     * Returns the length of the longest context required by this transliterator.
+     * This is <em>preceding</em> context.
+     * @return Maximum number of preceding context characters this
+     * transliterator needs to examine
+     */
+    protected int getMaximumContextLength() {
+        return data.ruleSet.getMaximumContextLength();
+    }
+
+
+    /**
+     * FOR DEBUGGING: Return a substring of a Replaceable.
+     */
+    private static String rsubstring(Replaceable r, int start, int limit) {
+        StringBuffer buf = new StringBuffer();
+        while (start < limit) {
+            buf.append(r.charAt(start++));
+        }
+        return buf.toString();
+    }
+
+    /**
+     * FOR DEBUGGING: Escape non-ASCII characters as Unicode.
+     */
+    private static final String escape(String s) {
+        StringBuffer buf = new StringBuffer();
+        for (int i=0; i<s.length(); ++i) {
+            char c = s.charAt(i);
+            if (c >= ' ' && c <= 0x007F) {
+                if (c == '\\') {
+                    buf.append("\\\\"); // That is, "\\"
+                } else {
+                    buf.append(c);
+                }
+            } else {
+                buf.append("\\u");
+                if (c < 0x1000) {
+                    buf.append('0');
+                    if (c < 0x100) {
+                        buf.append('0');
+                        if (c < 0x10) {
+                            buf.append('0');
+                        }
+                    }
+                }
+                buf.append(Integer.toHexString(c));
+            }
+        }
+        return buf.toString();
+    }
+
+
+
+
+
+    static class Data {
+        public Data() {
+            variableNames = new Hashtable();
+            setVariables = new Hashtable();
+            ruleSet = new TransliterationRuleSet();
+        }
+
+        /**
+         * Rule table.  May be empty.
+         */
+        public TransliterationRuleSet ruleSet;
+
+        /**
+         * Map variable name (String) to variable (Character).  A variable
+         * name may correspond to a single literal character, in which
+         * case the character is stored in this hash.  It may also
+         * correspond to a UnicodeSet, in which case a character is
+         * again stored in this hash, but the character is a stand-in: it
+         * is a key for a secondary lookup in data.setVariables.  The stand-in
+         * also represents the UnicodeSet in the stored rules.
+         */
+        public Hashtable variableNames;
+
+        /**
+         * Map category variable (Character) to set (UnicodeSet).
+         * Variables that correspond to a set of characters are mapped
+         * from variable name to a stand-in character in data.variableNames.
+         * The stand-in then serves as a key in this hash to lookup the
+         * actual UnicodeSet object.  In addition, the stand-in is
+         * stored in the rule text to represent the set of characters.
+         */
+        public Hashtable setVariables;
+    }
+
+
+
+
+
+
+    private static class Parser {
+        private String rules;
+
+        private int direction;
+
+        private Data data;
+
+        /**
+         * The next available stand-in for variables.  This starts at some point in
+         * the private use area (discovered dynamically) and increments up toward
+         * <code>variableLimit</code>.  At any point during parsing, available
+         * variables are <code>variableNext..variableLimit-1</code>.
+         */
+        private char variableNext;
+
+        /**
+         * The last available stand-in for variables.  This is discovered
+         * dynamically.  At any point during parsing, available variables are
+         * <code>variableNext..variableLimit-1</code>.
+         */
+        private char variableLimit;
+
+        // Operators
+        private static final char VARIABLE_DEF_OP   = '=';
+        private static final char FORWARD_RULE_OP   = '>';
+        private static final char REVERSE_RULE_OP   = '<';
+
+        private static final String OPERATORS = "=><";
+
+        // Other special characters
+        private static final char QUOTE               = '\'';
+        private static final char VARIABLE_REF_OPEN   = '{';
+        private static final char VARIABLE_REF_CLOSE  = '}';
+        private static final char CONTEXT_OPEN        = '[';
+        private static final char CONTEXT_CLOSE       = ']';
+        private static final char CURSOR_POS          = '|';
+        private static final char RULE_COMMENT_CHAR   = '#';
+
+        /**
+         * Specials must be quoted in rules to be used as literals.
+         * Specials may not occur in variable names.
+         */
+        private static final String SPECIALS = "'{}[]|#" + OPERATORS;
+
+        /**
+         * Specials that must be quoted in variable definitions.
+         */
+        private static final String DEF_SPECIALS = "'{}";
+
+        /**
+         * @param rules list of rules, separated by newline characters
+         * @exception IllegalArgumentException if there is a syntax error in the
+         * rules
+         */
+        public Parser(String rules, int direction) {
+            this.rules = rules;
+            this.direction = direction;
+            data = new Data();
+            parseRules();
+        }
+
+        public Data getData() {
+            return data;
+        }
+
+        /**
+         * Parse the given string as a sequence of rules, separated by newline
+         * characters ('\n'), and cause this object to implement those rules.  Any
+         * previous rules are discarded.  Typically this method is called exactly
+         * once, during construction.
+         * @exception IllegalArgumentException if there is a syntax error in the
+         * rules
+         */
+        private void parseRules() {
+            determineVariableRange();
+
+            int n = rules.length();
+            int i = 0;
+            while (i<n) {
+                int limit = rules.indexOf('\n', i);
+
+                // Recognize "\\\n" as an escaped "\n"
+                while (limit>0 && rules.charAt(limit-1) == '\\') {
+                    limit = rules.indexOf('\n', limit+1);
+                }
+
+                if (limit == -1) {
+                    limit = n;
+                }
+                // Skip over empty lines and line starting with #
+                if (limit > i && rules.charAt(i) != RULE_COMMENT_CHAR) {
+                    applyRule(i, limit);
+                }
+                i = limit + 1;
+            }
+
+            data.ruleSet.freeze();
+        }
+
+        /**
+         * Parse the given substring as a rule, and append it to the rules currently
+         * represented in this object.
+         * @param start the beginning index, inclusive; <code>0 <= start
+         * <= limit</code>.
+         * @param limit the ending index, exclusive; <code>start <= limit
+         * <= rules.length()</code>.
+         * @exception IllegalArgumentException if there is a syntax error in the
+         * rules
+         */
+        private void applyRule(int start, int limit) {
+            /* General description of parsing: Initially, rules contain two types of
+             * quoted characters.  First, there are variable references, such as
+             * "{alpha}".  Second, there are quotes, such as "'<'" or "''".  One of
+             * the first steps in parsing a rule is to resolve such quoted matter.
+             * Quotes are removed early, leaving unquoted literal matter.  Variable
+             * references are resolved and replaced by single characters.  In some
+             * instances these characters represent themselves; in others, they
+             * stand for categories of characters.  Character categories are either
+             * predefined (e.g., "{Lu}"), or are defined by the user using a
+             * statement (e.g., "vowels:aeiouAEIOU").
+             *
+             * Another early step in parsing is to split each rule into component
+             * pieces.  These pieces are, for every rule, a left-hand side, a right-
+             * hand side, and an operator.  The left- and right-hand sides may not
+             * be empty, except for the output patterns of forward and reverse
+             * rules.  In addition to this partitioning, the match patterns of
+             * forward and reverse rules must be partitioned into antecontext,
+             * postcontext, and literal pattern, where the context portions may or
+             * may not be present.  Finally, output patterns must have the cursor
+             * indicator '|' detected and removed, with its position recorded.
+             *
+             * Quote removal, variable resolution, and sub-pattern splitting must
+             * all happen at once.  This is due chiefly to the quoting mechanism,
+             * which allows special characters to appear at arbitrary positions in
+             * the final unquoted text.  (For this reason, alteration of the rule
+             * language is somewhat clumsy; it entails reassessment and revision of
+             * the parsing methods as a whole.)
+             *
+             * After this processing of rules is complete, the final end products
+             * are unquoted pieces of text of various types, and an integer cursor
+             * position, if one is specified.  These processed raw materials are now
+             * easy to deal with; other classes such as UnicodeSet and
+             * TransliterationRule need know nothing of quoting or variables.
+             */
+            StringBuffer left = new StringBuffer();
+            StringBuffer right = new StringBuffer();
+            StringBuffer anteContext = new StringBuffer();
+            StringBuffer postContext = new StringBuffer();
+            int cursorPos[] = new int[1];
+
+            char operator = parseRule(start, limit, left, right,
+                                      anteContext, postContext, cursorPos);
+
+            switch (operator) {
+            case VARIABLE_DEF_OP:
+                applyVariableDef(left.toString(), right.toString());
+                break;
+            case FORWARD_RULE_OP:
+                if (direction == FORWARD) {
+                    data.ruleSet.addRule(new TransliterationRule(
+                                             left.toString(), right.toString(),
+                                             anteContext.toString(), postContext.toString(),
+                                             cursorPos[0]));
+                } // otherwise ignore the rule; it's not the direction we want
+                break;
+            case REVERSE_RULE_OP:
+                if (direction == REVERSE) {
+                    data.ruleSet.addRule(new TransliterationRule(
+                                             right.toString(), left.toString(),
+                                             anteContext.toString(), postContext.toString(),
+                                             cursorPos[0]));
+                } // otherwise ignore the rule; it's not the direction we want
+                break;
+            }
+        }
+
+        /**
+         * Add a variable definition.
+         * @param name the name of the variable.  It must not already be defined.
+         * @param pattern the value of the variable.  It may be a single character
+         * or a pattern describing a character set.
+         * @exception IllegalArgumentException if there is a syntax error
+         */
+        private final void applyVariableDef(String name, String pattern) {
+            validateVariableName(name);
+            if (data.variableNames.get(name) != null) {
+                throw new IllegalArgumentException("Duplicate variable definition: "
+                                                   + name + '=' + pattern);
+            }
+//!         if (UnicodeSet.getCategoryID(name) >= 0) {
+//!             throw new IllegalArgumentException("Reserved variable name: "
+//!                                                + name);
+//!         }
+            if (pattern.length() < 1) {
+                throw new IllegalArgumentException("Variable definition missing: "
+                                                   + name);
+            }
+            if (pattern.length() == 1) {
+                // Got a single character variable definition
+                data.variableNames.put(name, new Character(pattern.charAt(0)));
+            } else {
+                // Got more than one character; parse it as a category
+                if (variableNext >= variableLimit) {
+                    throw new RuntimeException("Private use variables exhausted");
+                }
+                Character c = new Character(variableNext++);
+                data.variableNames.put(name, c);
+                data.setVariables.put(c, new UnicodeSet(pattern));
+            }
+        }
+
+        /**
+         * Given a rule, parses it into three pieces: The left side, the right side,
+         * and the operator.  Returns the operator.  Quotes and variable references
+         * are resolved; the otuput text in all <code>StringBuffer</code> parameters
+         * is literal text.  This method delegates to other parsing methods to
+         * handle the match pattern, output pattern, and other sub-patterns in the
+         * rule.
+         * @param start the beginning index, inclusive; <code>0 <= start
+         * <= limit</code>.
+         * @param limit the ending index, exclusive; <code>start <= limit
+         * <= rules.length()</code>.
+         * @param left left side of rule is appended to this buffer
+         * with the quotes removed and variables resolved
+         * @param right right side of rule is appended to this buffer
+         * with the quotes removed and variables resolved
+         * @param anteContext the preceding context of the match pattern,
+         * if there is one, is appended to this buffer
+         * @param postContext the following context of the match pattern,
+         * if there is one, is appended to this buffer
+         * @param cursorPos if there is a cursor in the output pattern, its
+         * offset is stored in <code>cursorPos[0]</code>
+         * @return The operator character, one of the characters in OPERATORS.
+         */
+        private char parseRule(int start, int limit,
+                               StringBuffer left, StringBuffer right,
+                               StringBuffer anteContext,
+                               StringBuffer postContext,
+                               int[] cursorPos) {
+            if (false) {
+                System.err.println("Parsing " + rules.substring(start, limit));
+            }
+            /* Parse the rule into three pieces -- left, operator, and right,
+             * parsing out quotes.  The result is that left and right will have
+             * unquoted text.  E.g., "gt<'>'" will have right = ">".  Unquoted
+             * operators throw an exception.  Two quotes inside or outside
+             * quotes indicates a quote literal.  E.g., "o''clock" -> "o'clock".
+             */
+            int i = quotedIndexOf(rules, start, limit, OPERATORS);
+            if (i < 0) {
+                throw new IllegalArgumentException(
+                              "Syntax error: "
+                              + rules.substring(start, limit));
+            }
+            char c = rules.charAt(i);
+            switch (c) {
+            case FORWARD_RULE_OP:
+                if (i == start) {
+                    throw new IllegalArgumentException(
+                                  "Empty left side: "
+                                  + rules.substring(start, limit));
+                }
+                parseMatchPattern(start, i, left, anteContext, postContext);
+                if (i != (limit-1)) {
+                    parseOutputPattern(i+1, limit, right, cursorPos);
+                }
+                break;
+            case REVERSE_RULE_OP:
+                if (i == (limit-1)) {
+                    throw new IllegalArgumentException(
+                                  "Empty right side: "
+                                  + rules.substring(start, limit));
+                }
+                if (i != start) {
+                    parseOutputPattern(start, i, left, cursorPos);
+                }
+                parseMatchPattern(i+1, limit, right, anteContext, postContext);
+                break;
+            default:
+                if (i == start || i == (limit-1)) {
+                    throw new IllegalArgumentException(
+                                  "Empty left or right side: "
+                                  + rules.substring(start, limit));
+                }
+                parseSubPattern(start, i, left);
+                parseDefPattern(i+1, limit, right);
+                break;
+            }
+            return c;
+        }
+
+        /**
+         * Parses the match pattern of a forward or reverse rule.  Given the raw
+         * match pattern, return the match text and the context on both sides, if
+         * any.  Resolves all quotes and variables.
+         * @param start the beginning index, inclusive; <code>0 <= start
+         * <= limit</code>.
+         * @param limit the ending index, exclusive; <code>start <= limit
+         * <= rules.length()</code>.
+         * @param text the key to be matched will be appended to this buffer
+         * @param anteContext the preceding context, if any, will be appended
+         * to this buffer.
+         * @param postContext the following context, if any, will be appended
+         * to this buffer.
+         */
+        private void parseMatchPattern(int start, int limit,
+                                       StringBuffer text,
+                                       StringBuffer anteContext,
+                                       StringBuffer postContext) {
+            if (start >= limit) {
+                throw new IllegalArgumentException(
+                              "Empty expression in rule: "
+                              + rules.substring(start, limit));
+            }
+            if (anteContext != null) {
+                // Ignore optional opening and closing context characters
+                if (rules.charAt(start) == CONTEXT_OPEN) {
+                    ++start;
+                }
+                if (rules.charAt(limit-1) == CONTEXT_CLOSE) {
+                    --limit;
+                }
+                // The four possibilities are:
+                //             key
+                // anteContext]key
+                // anteContext]key[postContext
+                //             key[postContext
+                int ante = quotedIndexOf(rules, start, limit, String.valueOf(CONTEXT_CLOSE));
+                int post = quotedIndexOf(rules, start, limit, String.valueOf(CONTEXT_OPEN));
+                if (ante >= 0 && post >= 0 && ante > post) {
+                    throw new IllegalArgumentException(
+                                  "Syntax error in context specifier: "
+                                  + rules.substring(start, limit));
+                }
+                if (ante >= 0) {
+                    parseSubPattern(start, ante, anteContext);
+                    start = ante+1;
+                }
+                if (post >= 0) {
+                    parseSubPattern(post+1, limit, postContext);
+                    limit = post;
+                }
+            }
+            parseSubPattern(start, limit, text);
+        }
+
+        private final void parseSubPattern(int start, int limit,
+                                           StringBuffer text) {
+            parseSubPattern(start, limit, text, null, SPECIALS);
+        }
+
+        /**
+         * Parse a variable definition sub pattern.  This kind of sub
+         * pattern differs in the set of characters that are considered
+         * special.  In particular, the '[' and ']' characters are not
+         * special, since these are used in UnicodeSet patterns.
+         */
+        private final void parseDefPattern(int start, int limit,
+                                           StringBuffer text) {
+            parseSubPattern(start, limit, text, null, DEF_SPECIALS);
+        }
+
+        /**
+         * Parses the output pattern of a forward or reverse rule.  Given the
+         * output pattern, return the output text and the position of the cursor,
+         * if any.  Resolves all quotes and variables.
+         * @param rules the string to be parsed
+         * @param start the beginning index, inclusive; <code>0 <= start
+         * <= limit</code>.
+         * @param limit the ending index, exclusive; <code>start <= limit
+         * <= rules.length()</code>.
+         * @param text the output text will be appended to this buffer
+         * @param cursorPos if this parameter is not null, then cursorPos[0]
+         * will be set to the cursor position, or -1 if there is none.  If this
+         * parameter is null, then cursors will be disallowed.
+         */
+        private final void parseOutputPattern(int start, int limit,
+                                              StringBuffer text,
+                                              int[] cursorPos) {
+            parseSubPattern(start, limit, text, cursorPos, SPECIALS);
+        }
+
+        /**
+         * Parses a sub-pattern of a rule.  Return the text and the position of the cursor,
+         * if any.  Resolves all quotes and variables.
+         * @param rules the string to be parsed
+         * @param start the beginning index, inclusive; <code>0 <= start
+         * <= limit</code>.
+         * @param limit the ending index, exclusive; <code>start <= limit
+         * <= rules.length()</code>.
+         * @param text the output text will be appended to this buffer
+         * @param cursorPos if this parameter is not null, then cursorPos[0]
+         * will be set to the cursor position, or -1 if there is none.  If this
+         * parameter is null, then cursors will be disallowed.
+         * @param specials characters that must be quoted; typically either
+         * SPECIALS or DEF_SPECIALS.
+         */
+        private void parseSubPattern(int start, int limit,
+                                     StringBuffer text,
+                                     int[] cursorPos,
+                                     String specials) {
+            boolean inQuote = false;
+
+            if (start >= limit) {
+                throw new IllegalArgumentException("Empty expression in rule");
+            }
+            if (cursorPos != null) {
+                cursorPos[0] = -1;
+            }
+            for (int i=start; i<limit; ++i) {
+                char c = rules.charAt(i);
+                if (c == QUOTE) {
+                    // Check for double quote
+                    if ((i+1) < limit
+                        && rules.charAt(i+1) == QUOTE) {
+                        text.append(QUOTE);
+                        ++i; // Skip over both quotes
+                    } else {
+                        inQuote = !inQuote;
+                    }
+                } else if (inQuote) {
+                    text.append(c);
+                } else if (c == VARIABLE_REF_OPEN) {
+                    ++i;
+                    int j = rules.indexOf(VARIABLE_REF_CLOSE, i);
+                    if (i == j || j < 0) { // empty or unterminated
+                        throw new IllegalArgumentException("Illegal variable reference: "
+                                                           + rules.substring(start, limit));
+                    }
+                    String name = rules.substring(i, j);
+                    validateVariableName(name);
+                    text.append(getVariableDef(name).charValue());
+                    i = j;
+                } else if (c == CURSOR_POS && cursorPos != null) {
+                    if (cursorPos[0] >= 0) {
+                        throw new IllegalArgumentException("Multiple cursors: "
+                                                           + rules.substring(start, limit));
+                    }
+                    cursorPos[0] = text.length();
+                } else if (specials.indexOf(c) >= 0) {
+                    throw new IllegalArgumentException("Unquoted special character: "
+                                                       + rules.substring(start, limit));
+                } else {
+                    text.append(c);
+                }
+            }
+        }
+
+        private static void validateVariableName(String name) {
+            if (indexOf(name, SPECIALS) >= 0) {
+                throw new IllegalArgumentException(
+                              "Special character in variable name: "
+                              + name);
+            }
+        }
+
+        /**
+         * Returns the single character value of the given variable name.  Defined
+         * names are recognized.
+         *
+         * NO LONGER SUPPORTED:
+         * If a Unicode category name is given, a standard character variable
+         * in the range firstCategoryVariable to lastCategoryVariable is returned,
+         * with value firstCategoryVariable + n, where n is the category
+         * number.
+         * @exception IllegalArgumentException if the name is unknown.
+         */
+        private Character getVariableDef(String name) {
+            Character ch = (Character) data.variableNames.get(name);
+//!         if (ch == null) {
+//!             int id = UnicodeSet.getCategoryID(name);
+//!             if (id >= 0) {
+//!                 ch = new Character((char) (firstCategoryVariable + id));
+//!                 data.variableNames.put(name, ch);
+//!                 data.setVariables.put(ch, new UnicodeSet(id));
+//!             }
+//!         }
+            if (ch == null) {
+                throw new IllegalArgumentException("Undefined variable: "
+                                                   + name);
+            }
+            return ch;
+        }
+
+        /**
+         * Determines what part of the private use region of Unicode we can use for
+         * variable stand-ins.  The correct way to do this is as follows: Parse each
+         * rule, and for forward and reverse rules, take the FROM expression, and
+         * make a hash of all characters used.  The TO expression should be ignored.
+         * When done, everything not in the hash is available for use.  In practice,
+         * this method may employ some other algorithm for improved speed.
+         */
+        private final void determineVariableRange() {
+            Range r = new Range('\uE000', 0x1900); // Private use area
+            r = r.largestUnusedSubrange(rules);
+            
+            if (r == null) {
+                throw new RuntimeException(
+                    "No private use characters available for variables");
+            }
+
+            variableNext = r.start;
+            variableLimit = (char) (r.start + r.length);
+
+            if (variableNext >= variableLimit) {
+                throw new RuntimeException(
+                        "Too few private use characters available for variables");
+            }
+        }
+
+        /**
+         * Returns the index of the first character in a set, ignoring quoted text.
+         * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
+         * found by a search for "h".  Unlike String.indexOf(), this method searches
+         * not for a single character, but for any character of the string
+         * <code>setOfChars</code>.
+         * @param text text to be searched
+         * @param start the beginning index, inclusive; <code>0 <= start
+         * <= limit</code>.
+         * @param limit the ending index, exclusive; <code>start <= limit
+         * <= text.length()</code>.
+         * @param setOfChars string with one or more distinct characters
+         * @return Offset of the first character in <code>setOfChars</code>
+         * found, or -1 if not found.
+         * @see #indexOf
+         */
+        private static int quotedIndexOf(String text, int start, int limit,
+                                         String setOfChars) {
+            for (int i=start; i<limit; ++i) {
+                char c = text.charAt(i);
+                if (c == QUOTE) {
+                    while (++i < limit
+                           && text.charAt(i) != QUOTE) {}
+                } else if (setOfChars.indexOf(c) >= 0) {
+                    return i;
+                }
+            }
+            return -1;
+        }
+
+        /**
+         * Returns the index of the first character in a set.  Unlike
+         * String.indexOf(), this method searches not for a single character, but
+         * for any character of the string <code>setOfChars</code>.
+         * @param text text to be searched
+         * @param start the beginning index, inclusive; <code>0 <= start
+         * <= limit</code>.
+         * @param limit the ending index, exclusive; <code>start <= limit
+         * <= text.length()</code>.
+         * @param setOfChars string with one or more distinct characters
+         * @return Offset of the first character in <code>setOfChars</code>
+         * found, or -1 if not found.
+         * @see #quotedIndexOf
+         */
+        private static int indexOf(String text, int start, int limit,
+                                   String setOfChars) {
+            for (int i=start; i<limit; ++i) {
+                if (setOfChars.indexOf(text.charAt(i)) >= 0) {
+                    return i;
+                }
+            }
+            return -1;
+        }
+
+        /**
+         * Returns the index of the first character in a set.  Unlike
+         * String.indexOf(), this method searches not for a single character, but
+         * for any character of the string <code>setOfChars</code>.
+         * @param text text to be searched
+         * @param setOfChars string with one or more distinct characters
+         * @return Offset of the first character in <code>setOfChars</code>
+         * found, or -1 if not found.
+         * @see #quotedIndexOf
+         */
+        private static int indexOf(String text, String setOfChars) {
+            return indexOf(text, 0, text.length(), setOfChars);
+        }
+
+
+
+        /**
+         * A range of Unicode characters.  Support the operations of testing for
+         * inclusion (does this range contain this character?) and splitting.
+         * Splitting involves breaking a range into two smaller ranges around a
+         * character inside the original range.  The split character is not included
+         * in either range.  If the split character is at either extreme end of the
+         * range, one of the split products is an empty range.
+         *
+         * This class is used internally to determine the largest available private
+         * use character range for variable stand-ins.
+         */
+        private static class Range implements Cloneable {
+            char start;
+            int length;
+
+            Range(char start, int length) {
+                this.start = start;
+                this.length = length;
+            }
+
+            public Object clone() {
+                return new Range(start, length);
+            }
+
+            boolean contains(char c) {
+                return c >= start && (c - start) < length;
+            }
+
+            /**
+             * Assume that contains(c) is true.  Split this range into two new
+             * ranges around the character c.  Make this range one of the new ranges
+             * (modify it in place) and return the other new range.  The character
+             * itself is not included in either range.  If the split results in an
+             * empty range (that is, if c == start or c == start + length - 1) then
+             * return null.
+             */
+            Range split(char c) {
+                if (c == start) {
+                    ++start;
+                    --length;
+                    return null;
+                } else if (c - start == length - 1) {
+                    --length;
+                    return null;
+                } else {
+                    ++c;
+                    Range r = new Range(c, start + length - c);
+                    length = --c - start;
+                    return r;
+                }
+            }
+
+            /**
+             * Finds the largest unused subrange by the given string.  A
+             * subrange is unused by a string if the string contains no
+             * characters in that range.  If the given string contains no
+             * characters in this range, then this range itself is
+             * returned.
+             */
+            Range largestUnusedSubrange(String str) {
+                int n = str.length();
+
+                Vector v = new Vector(1);
+                v.addElement(clone());
+                for (int i=0; i<n; ++i) {
+                    char c = str.charAt(i);
+                    if (contains(c)) {
+                        for (int j=0; j<v.size(); ++j) {
+                            Range r = (Range) v.elementAt(j);
+                            if (r.contains(c)) {
+                                r = r.split(c);
+                                if (r != null) {
+                                    v.addElement(r);
+                                }
+                                break;
+                            }
+                        }
+                    }
+                }
+
+                Range bestRange = null;
+                for (int j=0; j<v.size(); ++j) {
+                    Range r = (Range) v.elementAt(j);
+                    if (bestRange == null || r.length > bestRange.length) {
+                        bestRange = r;
+                    }
+                }
+
+                return bestRange;
+            }
+        }
+    }
+}
diff --git a/icu4j/src/com/ibm/icu/text/TransliterationRule.java b/icu4j/src/com/ibm/icu/text/TransliterationRule.java
new file mode 100755
index 00000000000..383c77ed340
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/text/TransliterationRule.java
@@ -0,0 +1,530 @@
+package com.ibm.text;
+
+import java.util.Dictionary;
+
+/**
+ * A transliteration rule used by
+ * <code>RuleBasedTransliterator</code>.
+ * <code>TransliterationRule</code> is an immutable object.
+ *
+ * <p>A rule consists of an input pattern and an output string.  When
+ * the input pattern is matched, the output string is emitted.  The
+ * input pattern consists of zero or more characters which are matched
+ * exactly (the key) and optional context.  Context must match if it
+ * is specified.  Context may be specified before the key, after the
+ * key, or both.  The key, preceding context, and following context
+ * may contain variables.  Variables represent a set of Unicode
+ * characters, such as the letters <i>a</i> through <i>z</i>.
+ * Variables are detected by looking up each character in a supplied
+ * variable list to see if it has been so defined. 
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+class TransliterationRule {
+    /**
+     * Constant returned by <code>getMatchDegree()</code> indicating a mismatch
+     * between the text and this rule.  One or more characters of the context or
+     * key do not match the text.
+     * @see #getMatchDegree
+     */
+    public static final int MISMATCH      = 0;
+
+    /**
+     * Constant returned by <code>getMatchDegree()</code> indicating a partial
+     * match between the text and this rule.  All characters of the text match
+     * the corresponding context or key, but more characters are required for a
+     * complete match.  There are some key or context characters at the end of
+     * the pattern that remain unmatched because the text isn't long enough.
+     * @see #getMatchDegree
+     */
+    public static final int PARTIAL_MATCH = 1;
+
+    /**
+     * Constant returned by <code>getMatchDegree()</code> indicating a complete
+     * match between the text and this rule.  The text matches all context and
+     * key characters.
+     * @see #getMatchDegree
+     */
+    public static final int FULL_MATCH    = 2;
+
+    /**
+     * The string that must be matched.
+     */
+    private String key;
+
+    /**
+     * The string that is emitted if the key, anteContext, and postContext
+     * are matched.
+     */
+    private String output;
+
+    /**
+     * The string that must match before the key.  Must not be the empty string.
+     * May be null; if null, then there is no matching requirement before the
+     * key.
+     */
+    private String anteContext;
+
+    /**
+     * The string that must match after the key.  Must not be the empty string.
+     * May be null; if null, then there is no matching requirement after the
+     * key.
+     */
+    private String postContext;
+
+    /**
+     * The position of the cursor after emitting the output string, from 0 to
+     * output.length().  For most rules with no special cursor specification,
+     * the cursorPos is output.length().
+     */
+    private int cursorPos;
+
+    /**
+     * A string used to implement masks().
+     */
+    private String maskKey;
+
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    /**
+     * Construct a new rule with the given key, output text, and other
+     * attributes.  Zero, one, or two context strings may be specified.  A
+     * cursor position may be specified for the output text.
+     * @param key the string to match
+     * @param output the string to produce when the <code>key</code> is seen
+     * @param anteContext if not null and not empty, then it must be matched
+     * before the <code>key</code>
+     * @param postContext if not null and not empty, then it must be matched
+     * after the <code>key</code>
+     * @param cursorPos a position for the cursor after the <code>output</code>
+     * is emitted.  If less than zero, then the cursor is placed after the
+     * <code>output</code>; that is, -1 is equivalent to
+     * <code>output.length()</code>.  If greater than
+     * <code>output.length()</code> then an exception is thrown.
+     * @exception IllegalArgumentException if the cursor position is out of
+     * range.
+     */
+    public TransliterationRule(String key, String output,
+                               String anteContext, String postContext,
+                               int cursorPos) {
+        this.key = key;
+        this.output = output;
+        this.anteContext = (anteContext != null && anteContext.length() > 0)
+            ? anteContext : null;
+        this.postContext = (postContext != null && postContext.length() > 0)
+            ? postContext : null;
+        this.cursorPos = cursorPos < 0 ? output.length() : cursorPos;
+        if (this.cursorPos > output.length()) {
+            throw new IllegalArgumentException("Illegal cursor position");
+        }
+
+        /* The mask key is needed when we are adding individual rules to a rule
+         * set, for performance.  Here are the numbers: Without mask key, 13.0
+         * seconds.  With mask key, 6.2 seconds.  However, once the rules have
+         * been added to the set, then they can be discarded to free up space.
+         * This is what the freeze() method does.  After freeze() has been
+         * called, the method masks() must NOT be called.
+         */
+        maskKey = key;
+        if (postContext != null) {
+            maskKey += postContext;
+        }
+    }
+
+    /**
+     * Return the length of the key.  Equivalent to <code>getKey().length()</code>.
+     * @return the length of the match key.
+     */
+    public int getKeyLength() {
+        return key.length();
+    }
+
+    /**
+     * Return the key.
+     * @return the match key.
+     */
+    public String getKey() {
+        return key;
+    }
+
+    /**
+     * Return the output string.
+     * @return the output string.
+     */
+    public String getOutput() {
+        return output;
+    }
+
+    /**
+     * Return the position of the cursor within the output string.
+     * @return a value from 0 to <code>getOutput().length()</code>, inclusive.
+     */
+    public int getCursorPos() {
+        return cursorPos;
+    }
+
+    /**
+     * Return the preceding context length.  This method is needed to
+     * support the <code>Transliterator</code> method
+     * <code>getMaximumContextLength()</code>.
+     */
+    public int getAnteContextLength() {
+        return anteContext == null ? 0 : anteContext.length();
+    }
+
+    /**
+     * Return true if this rule masks another rule.  If r1 masks r2 then
+     * r1 matches any input string that r2 matches.  If r1 masks r2 and r2 masks
+     * r1 then r1 == r2.  Examples: "a>x" masks "ab>y".  "a>x" masks "a[b]>y".
+     * "[c]a>x" masks "[dc]a>y".
+     *
+     * <p>This method must not be called after freeze() is called.
+     */
+    public boolean masks(TransliterationRule r2) {
+        /* There are three cases of masking.  In each instance, rule1
+         * masks rule2.
+         *
+         * 1. KEY mask: len(key1) < len(key2), key2 starts with key1.
+         *
+         * 2. PREFIX mask: key1 == key2, len(prefix1) < len(prefix2),
+         * prefix2 ends with prefix1, suffix2 starts with suffix1.
+         *
+         * 3. SUFFIX mask: key1 == key2, len(suffix1) < len(suffix2),
+         * prefix2 ends with prefix1, suffix2 starts with suffix1.
+         */
+
+        /* LIMITATION of the current mask algorithm: Some rule
+         * maskings are currently not detected.  For example,
+         * "{Lu}]a>x" masks "A]a>y".  To detect these sorts of masking,
+         * we need a subset operator on UnicodeSet objects, which we
+         * currently do not have.  This can be added later.
+         */
+        return ((maskKey.length() < r2.maskKey.length() &&
+                 r2.maskKey.startsWith(maskKey)) ||
+                (r2.anteContext != null && maskKey.equals(r2.maskKey) &&
+                 ((anteContext == null) ||
+                  (anteContext.length() < r2.anteContext.length() &&
+                   r2.anteContext.endsWith(anteContext)))));
+    }
+
+    /**
+     * Free up space.  Once this method is called, masks() must NOT be called.
+     * If it is called, an exception will be thrown.
+     */
+    public void freeze() {
+        maskKey = null;
+    }
+
+    /**
+     * Return a string representation of this object.
+     * @return string representation of this object
+     */
+    public String toString() {
+        return getClass().getName() + '['
+            + escape((anteContext != null ? ("[" + anteContext + ']') : "")
+            + key
+            + (postContext != null ? ("[" + postContext + ']') : "")
+            + " -> "
+            + (cursorPos < output.length()
+               ? (output.substring(0, cursorPos) + '|' + output.substring(cursorPos))
+               : output))
+            + ']';
+    }
+
+    /**
+     * Return true if this rule matches the given text.  The text being matched
+     * occupies a virtual buffer consisting of the contents of
+     * <code>result</code> concatenated to a substring of <code>text</code>.
+     * The substring is specified by <code>start</code> and <code>limit</code>.
+     * The value of <code>cursor</code> is an index into this virtual buffer,
+     * from 0 to the length of the buffer.  In terms of the parameters,
+     * <code>cursor</code> must be between 0 and <code>result.length() + limit -
+     * start</code>.
+     * @param text the untranslated text
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param result translated text so far
+     * @param cursor position at which to translate next, an offset into result.
+     * If greater than or equal to result.length(), represents offset start +
+     * cursor - result.length() into text.
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     */
+    public boolean matches(String text, int start, int limit,
+                           StringBuffer result, int cursor,
+                           Dictionary variables,
+                           UnicodeFilter filter) {
+        return
+            (anteContext == null
+             || regionMatches(text, start, limit, result,
+                              cursor - anteContext.length(),
+                              anteContext, variables, filter)) &&
+            regionMatches(text, start, limit, result, cursor,
+                          key, variables, filter) &&
+            (postContext == null
+             || regionMatches(text, start, limit, result,
+                              cursor + key.length(),
+                              postContext, variables, filter));
+    }
+
+    /**
+     * Return true if this rule matches the given text.
+     * @param text the text, both translated and untranslated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param cursor position at which to translate next, representing offset
+     * into text.  This value must be between <code>start</code> and
+     * <code>limit</code>.
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     */
+    public boolean matches(Replaceable text, int start, int limit,
+                           int cursor, Dictionary variables,
+                           UnicodeFilter filter) {
+        return
+            (anteContext == null
+             || regionMatches(text, start, limit, cursor - anteContext.length(),
+                              anteContext, variables, filter)) &&
+            regionMatches(text, start, limit, cursor,
+                          key, variables, filter) &&
+            (postContext == null
+             || regionMatches(text, start, limit, cursor + key.length(),
+                              postContext, variables, filter));
+    }
+
+    /**
+     * Return the degree of match between this rule and the given text.  The
+     * degree of match may be mismatch, a partial match, or a full match.  A
+     * mismatch means at least one character of the text does not match the
+     * context or key.  A partial match means some context and key characters
+     * match, but the text is not long enough to match all of them.  A full
+     * match means all context and key characters match.
+     * @param text the text, both translated and untranslated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param cursor position at which to translate next, representing offset
+     * into text.  This value must be between <code>start</code> and
+     * <code>limit</code>.
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return one of <code>MISMATCH</code>, <code>PARTIAL_MATCH</code>, or
+     * <code>FULL_MATCH</code>.
+     * @see #MISMATCH
+     * @see #PARTIAL_MATCH
+     * @see #FULL_MATCH
+     */
+    public int getMatchDegree(Replaceable text, int start, int limit,
+                              int cursor, Dictionary variables,
+                              UnicodeFilter filter) {
+        if (anteContext != null
+            && !regionMatches(text, start, limit, cursor - anteContext.length(),
+                              anteContext, variables, filter)) {
+            return MISMATCH;
+        }
+        int len = getRegionMatchLength(text, start, limit, cursor,
+                                       key, variables, filter);
+        if (len < 0) {
+            return MISMATCH;
+        }
+        if (len < key.length()) {
+            return PARTIAL_MATCH;
+        }
+        if (postContext == null) {
+            return FULL_MATCH;
+        }
+        len = getRegionMatchLength(text, start, limit,
+                                   cursor + key.length(),
+                                   postContext, variables, filter);
+        return (len < 0) ? MISMATCH
+                         : ((len == postContext.length()) ? FULL_MATCH
+                                                          : PARTIAL_MATCH);
+    }
+
+    /**
+     * Return true if a template matches the text.  The entire length of the
+     * template is compared to the text at the cursor.  As in
+     * <code>matches()</code>, the text being matched occupies a virtual buffer
+     * consisting of the contents of <code>result</code> concatenated to a
+     * substring of <code>text</code>.  See <code>matches()</code> for details.
+     * @param text the untranslated text
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param result translated text so far
+     * @param cursor position at which to translate next, an offset into result.
+     * If greater than or equal to result.length(), represents offset start +
+     * cursor - result.length() into text.
+     * @param template the text to match against.  All characters must match.
+     * @param variables a dictionary of variables mapping <code>Character</code>
+     * to <code>UnicodeSet</code>
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return true if there is a match
+     */
+    protected static boolean regionMatches(String text, int start, int limit,
+                                           StringBuffer result, int cursor,
+                                           String template,
+                                           Dictionary variables,
+                                           UnicodeFilter filter) {
+        int rlen = result.length();
+        if (cursor < 0
+            || (cursor + template.length()) > (rlen + limit - start)) {
+            return false;
+        }
+        for (int i=0; i<template.length(); ++i, ++cursor) {
+            if (!charMatches(template.charAt(i),
+                             cursor < rlen ? result.charAt(cursor)
+                                           : text.charAt(cursor - rlen + start),
+                             variables, filter)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Return true if a template matches the text.  The entire length of the
+     * template is compared to the text at the cursor.
+     * @param text the text, both translated and untranslated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param cursor position at which to translate next, representing offset
+     * into text.  This value must be between <code>start</code> and
+     * <code>limit</code>.
+     * @param template the text to match against.  All characters must match.
+     * @param variables a dictionary of variables mapping <code>Character</code>
+     * to <code>UnicodeSet</code>
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return true if there is a match
+     */
+    protected static boolean regionMatches(Replaceable text, int start, int limit,
+                                           int cursor,
+                                           String template, Dictionary variables,
+                                           UnicodeFilter filter) {
+        if (cursor < start
+            || (cursor + template.length()) > limit) {
+            return false;
+        }
+        for (int i=0; i<template.length(); ++i, ++cursor) {
+            if (!charMatches(template.charAt(i), text.charAt(cursor),
+                             variables, filter)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Return the number of characters of the text that match this rule.  If
+     * there is a mismatch, return -1.  If the text is not long enough to match
+     * any characters, return 0.
+     * @param text the text, both translated and untranslated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param cursor position at which to translate next, representing offset
+     * into text.  This value must be between <code>start</code> and
+     * <code>limit</code>.
+     * @param template the text to match against.  All characters must match.
+     * @param variables a dictionary of variables mapping <code>Character</code>
+     * to <code>UnicodeSet</code>
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return -1 if there is a mismatch, 0 if the text is not long enough to
+     * match any characters, otherwise the number of characters of text that
+     * match this rule.
+     */
+    protected static int getRegionMatchLength(Replaceable text, int start,
+                                              int limit, int cursor,
+                                              String template,
+                                              Dictionary variables,
+                                              UnicodeFilter filter) {
+        if (cursor < start) {
+            return -1;
+        }
+        int i;
+        for (i=0; i<template.length() && cursor<limit; ++i, ++cursor) {
+            if (!charMatches(template.charAt(i), text.charAt(cursor),
+                             variables, filter)) {
+                return -1;
+            }
+        }
+        return i;
+    }
+
+    /**
+     * Return true if the given key matches the given text.  This method
+     * accounts for the fact that the key character may represent a character
+     * set.  Note that the key and text characters may not be interchanged
+     * without altering the results.
+     * @param keyChar a character in the match key
+     * @param textChar a character in the text being transliterated
+     * @param variables a dictionary of variables mapping <code>Character</code>
+     * to <code>UnicodeSet</code>
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     */
+    protected static boolean charMatches(char keyChar, char textChar,
+                                         Dictionary variables, UnicodeFilter filter) {
+        UnicodeSet set = null;
+        return (filter == null || filter.isIn(textChar)) &&
+            ((set = (UnicodeSet) variables.get(new Character(keyChar)))
+             == null) ?
+            keyChar == textChar : set.contains(textChar);
+    }
+
+    /**
+     * Escape non-ASCII characters as Unicode.
+     */
+    public static final String escape(String s) {
+        StringBuffer buf = new StringBuffer();
+        for (int i=0; i<s.length(); ++i) {
+            char c = s.charAt(i);
+            if (c >= ' ' && c <= 0x007F) {
+                buf.append(c);
+            } else {
+                buf.append("\\u");
+                if (c < 0x1000) {
+                    buf.append('0');
+                    if (c < 0x100) {
+                        buf.append('0');
+                        if (c < 0x10) {
+                            buf.append('0');
+                        }
+                    }
+                }
+                buf.append(Integer.toHexString(c));
+            }
+        }
+        return buf.toString();
+    }
+}
diff --git a/icu4j/src/com/ibm/icu/text/TransliterationRuleSet.java b/icu4j/src/com/ibm/icu/text/TransliterationRuleSet.java
new file mode 100755
index 00000000000..d57bf75464a
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/text/TransliterationRuleSet.java
@@ -0,0 +1,218 @@
+package com.ibm.text;
+
+import java.util.*;
+
+/**
+ * A set of rules for a <code>RuleBasedTransliterator</code>.  This set encodes
+ * the transliteration in one direction from one set of characters or short
+ * strings to another.  A <code>RuleBasedTransliterator</code> consists of up to
+ * two such sets, one for the forward direction, and one for the reverse.
+ *
+ * <p>A <code>TransliterationRuleSet</code> has one important operation, that of
+ * finding a matching rule at a given point in the text.  This is accomplished
+ * by the <code>findMatch()</code> method.
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: TransliterationRuleSet.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+class TransliterationRuleSet {
+    /* Note: There was an old implementation that indexed by first letter of
+     * key.  Problem with this is that key may not have a meaningful first
+     * letter; e.g., {Lu}>*.  One solution is to keep a separate vector of all
+     * rules whose intial key letter is a category variable.  However, the
+     * problem is that they must be kept in order with respect to other rules.
+     * One solution -- add a sequence number to each rule.  Do the usual
+     * first-letter lookup, and also a lookup from the spare bin with rules like
+     * {Lu}>*.  Take the lower sequence number.  This seems complex and not
+     * worth the trouble, but we may revisit this later.  For documentation (or
+     * possible resurrection) the old code is included below, commented out
+     * with the remark "// OLD INDEXED IMPLEMENTATION".  Under the old
+     * implementation, <code>rules</code> is a Hashtable, not a Vector.
+     */
+
+    /**
+     * Vector of rules, in the order added.
+     */
+    private Vector rules;
+
+    /**
+     * Length of the longest preceding context
+     */
+    private int maxContextLength;
+
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    /**
+     * Construct a new empty rule set.
+     */
+    public TransliterationRuleSet() {
+        rules = new Vector();
+        maxContextLength = 0;
+    }
+
+    /**
+     * Return the maximum context length.
+     * @return the length of the longest preceding context.
+     */
+    public int getMaximumContextLength() {
+        return maxContextLength;
+    }
+
+    /**
+     * Add a rule to this set.  Rules are added in order, and order is
+     * significant.
+     *
+     * <p>Once freeze() is called, this method must not be called.
+     * @param rule the rule to add
+     */
+    public void addRule(TransliterationRule rule) {
+        
+        // Build time, no checking  : 3562 ms
+        // Build time, with checking: 6234 ms
+
+        for (int i=0; i<rules.size(); ++i) {
+            TransliterationRule r = (TransliterationRule) rules.elementAt(i);
+            if (r.masks(rule)) {
+                throw new IllegalArgumentException("Rule " + rule +
+                                                   " must precede " + r);
+            }
+        }
+
+        rules.addElement(rule);
+        int len;
+        if ((len = rule.getAnteContextLength()) > maxContextLength) {
+            maxContextLength = len;
+        }
+    }
+
+    /**
+     * Free up space.  Once this method is called, addRule() must NOT
+     * be called again.
+     */
+    public void freeze() {
+        for (int i=0; i<rules.size(); ++i) {
+            ((TransliterationRule) rules.elementAt(i)).freeze();
+        }
+    }
+
+    /**
+     * Attempt to find a matching rule at the specified point in the text.  The
+     * text being matched occupies a virtual buffer consisting of the contents
+     * of <code>result</code> concatenated to a substring of <code>text</code>.
+     * The substring is specified by <code>start</code> and <code>limit</code>.
+     * The value of <code>cursor</code> is an index into this virtual buffer,
+     * from 0 to the length of the buffer.  In terms of the parameters,
+     * <code>cursor</code> must be between 0 and <code>result.length() + limit -
+     * start</code>.
+     * @param text the untranslated text
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param result tranlated text
+     * @param cursor position at which to translate next, an offset into result.
+     * If greater than or equal to result.length(), represents offset start +
+     * cursor - result.length() into text.
+     * @param variables a dictionary mapping variables to the sets they
+     * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return the matching rule, or null if none found.
+     */
+    public TransliterationRule findMatch(String text, int start, int limit,
+                                         StringBuffer result, int cursor,
+                                         Dictionary variables,
+                                         UnicodeFilter filter) {
+        for (Enumeration e = rules.elements(); e.hasMoreElements(); ) {
+            TransliterationRule rule = (TransliterationRule) e.nextElement();
+            if (rule.matches(text, start, limit, result, cursor, variables, filter)) {
+                return rule;
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Attempt to find a matching rule at the specified point in the text.
+     * @param text the text, both translated and untranslated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param cursor position at which to translate next, representing offset
+     * into text.  This value must be between <code>start</code> and
+     * <code>limit</code>.
+     * @param variables a dictionary mapping variables to the sets they
+     * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return the matching rule, or null if none found.
+     */
+    public TransliterationRule findMatch(Replaceable text, int start, int limit,
+                                         int cursor,
+                                         Dictionary variables,
+                                         UnicodeFilter filter) {
+        for (Enumeration e = rules.elements(); e.hasMoreElements(); ) {
+            TransliterationRule rule = (TransliterationRule) e.nextElement();
+            if (rule.matches(text, start, limit, cursor, variables, filter)) {
+                return rule;
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Attempt to find a matching rule at the specified point in the text.
+     * Unlike <code>findMatch()</code>, this method does an incremental match.
+     * An incremental match requires that there be no partial matches that might
+     * pre-empt the full match that is found.  If there are partial matches,
+     * then null is returned.  A non-null result indicates that a full match has
+     * been found, and that it cannot be pre-empted by a partial match
+     * regardless of what additional text is added to the translation buffer.
+     * @param text the text, both translated and untranslated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param cursor position at which to translate next, representing offset
+     * into text.  This value must be between <code>start</code> and
+     * <code>limit</code>.
+     * @param variables a dictionary mapping variables to the sets they
+     * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
+     * @param partial output parameter.  <code>partial[0]</code> is set to
+     * true if a partial match is returned.
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return the matching rule, or null if none found, or if the text buffer
+     * does not have enough text yet to unambiguously match a rule.
+     */
+    public TransliterationRule findIncrementalMatch(Replaceable text, int start,
+                                                    int limit, int cursor,
+                                                    Dictionary variables,
+                                                    boolean partial[],
+                                                    UnicodeFilter filter) {
+        partial[0] = false;
+        for (Enumeration e = rules.elements(); e.hasMoreElements(); ) {
+            TransliterationRule rule = (TransliterationRule) e.nextElement();
+            int match = rule.getMatchDegree(text, start, limit, cursor,
+                                            variables, filter);
+            switch (match) {
+            case TransliterationRule.FULL_MATCH:
+                return rule;
+            case TransliterationRule.PARTIAL_MATCH:
+                partial[0] = true;
+                return null;
+            }
+        }
+        return null;
+    }
+}
diff --git a/icu4j/src/com/ibm/icu/text/Transliterator.java b/icu4j/src/com/ibm/icu/text/Transliterator.java
new file mode 100755
index 00000000000..83171a961e7
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/text/Transliterator.java
@@ -0,0 +1,860 @@
+package com.ibm.text;
+
+import java.util.*;
+import java.text.MessageFormat;
+
+/**
+ * <code>Transliterator</code> is an abstract class that
+ * transliterates text from one format to another.  The most common
+ * kind of transliterator is a script, or alphabet, transliterator.
+ * For example, a Russian to Latin transliterator changes Russian text
+ * written in Cyrillic characters to phonetically equivalent Latin
+ * characters.  It does not <em>translate</em> Russian to English!
+ * Transliteration, unlike translation, operates on characters, without
+ * reference to the meanings of words and sentences.
+ *
+ * <p>Although script conversion is its most common use, a
+ * transliterator can actually perform a more general class of tasks.
+ * In fact, <code>Transliterator</code> defines a very general API
+ * which specifies only that a segment of the input text is replaced
+ * by new text.  The particulars of this conversion are determined
+ * entirely by subclasses of <code>Transliterator</code>.
+ *
+ * <p><b>Transliterators are stateless</b>
+ *
+ * <p><code>Transliterator</code> objects are <em>stateless</em>; they
+ * retain no information between calls to
+ * <code>transliterate()</code>.  As a result, threads may share
+ * transliterators without synchronizing them.  This might seem to
+ * limit the complexity of the transliteration operation.  In
+ * practice, subclasses perform complex transliterations by delaying
+ * the replacement of text until it is known that no other
+ * replacements are possible.  In other words, although the
+ * <code>Transliterator</code> objects are stateless, the source text
+ * itself embodies all the needed information, and delayed operation
+ * allows arbitrary complexity.
+ *
+ * <p><b>Batch transliteration</b>
+ *
+ * <p>The simplest way to perform transliteration is all at once, on a
+ * string of existing text.  This is referred to as <em>batch</em>
+ * transliteration.  For example, given a string <code>input</code>
+ * and a transliterator <code>t</code>, the call
+ *
+ * <blockquote><code>String result = t.transliterate(input);
+ * </code></blockquote>
+ *
+ * will transliterate it and return the result.  Other methods allow
+ * the client to specify a substring to be transliterated and to use
+ * {@link Replaceable} objects instead of strings, in order to
+ * preserve out-of-band information (such as text styles).
+ *
+ * <p><b>Keyboard transliteration</b>
+ *
+ * <p>Somewhat more involved is <em>keyboard</em>, or incremental
+ * transliteration.  This is the transliteration of text that is
+ * arriving from some source (typically the user's keyboard) one
+ * character at a time, or in some other piecemeal fashion.
+ *
+ * <p>In keyboard transliteration, a <code>Replaceable</code> buffer
+ * stores the text.  As text is inserted, as much as possible is
+ * transliterated on the fly.  This means a GUI that displays the
+ * contents of the buffer may show text being modified as each new
+ * character arrives.
+ *
+ * <p>Consider the simple <code>RuleBasedTransliterator</code>:
+ *
+ * <blockquote><code>
+ * th&gt;{theta}<br>
+ * t&gt;{tau}
+ * </code></blockquote>
+ *
+ * When the user types 't', nothing will happen, since the
+ * transliterator is waiting to see if the next character is 'h'.  To
+ * remedy this, we introduce the notion of a cursor, marked by a '|'
+ * in the output string:
+ *
+ * <blockquote><code>
+ * t&gt;|{tau}<br>
+ * {tau}h&gt;{theta}
+ * </code></blockquote>
+ *
+ * Now when the user types 't', tau appears, and if the next character
+ * is 'h', the tau changes to a theta.  This is accomplished by
+ * maintaining a cursor position (independent of the insertion point,
+ * and invisible in the GUI) across calls to
+ * <code>keyboardTransliterate()</code>.  Typically, the cursor will
+ * be coincident with the insertion point, but in a case like the one
+ * above, it will precede the insertion point.
+ *
+ * <p>Keyboard transliteration methods maintain a set of three indices
+ * that are updated with each call to
+ * <code>keyboardTransliterate()</code>, including the cursor, start,
+ * and limit.  Since these indices are changed by the method, they are
+ * passed in an <code>int[]</code> array. The <code>START</code> index
+ * marks the beginning of the substring that the transliterator will
+ * look at.  It is advanced as text becomes committed (but it is not
+ * the committed index; that's the <code>CURSOR</code>).  The
+ * <code>CURSOR</code> index, described above, marks the point at
+ * which the transliterator last stopped, either because it reached
+ * the end, or because it required more characters to disambiguate
+ * between possible inputs.  The <code>CURSOR</code> can also be
+ * explicitly set by rules in a <code>RuleBasedTransliterator</code>.
+ * Any characters before the <code>CURSOR</code> index are frozen;
+ * future keyboard transliteration calls within this input sequence
+ * will not change them.  New text is inserted at the
+ * <code>LIMIT</code> index, which marks the end of the substring that
+ * the transliterator looks at.
+ *
+ * <p>Because keyboard transliteration assumes that more characters
+ * are to arrive, it is conservative in its operation.  It only
+ * transliterates when it can do so unambiguously.  Otherwise it waits
+ * for more characters to arrive.  When the client code knows that no
+ * more characters are forthcoming, perhaps because the user has
+ * performed some input termination operation, then it should call
+ * <code>finishKeyboardTransliteration()</code> to complete any
+ * pending transliterations.
+ *
+ * <p><b>Inverses</b>
+ *
+ * <p>Pairs of transliterators may be inverses of one another.  For
+ * example, if transliterator <b>A</b> transliterates characters by
+ * incrementing their Unicode value (so "abc" -> "def"), and
+ * transliterator <b>B</b> decrements character values, then <b>A</b>
+ * is an inverse of <b>B</b> and vice versa.  If we compose <b>A</b>
+ * with <b>B</b> in a compound transliterator, the result is the
+ * indentity transliterator, that is, a transliterator that does not
+ * change its input text.
+ *
+ * The <code>Transliterator</code> method <code>getInverse()</code>
+ * returns a transliterator's inverse, if one exists, or
+ * <code>null</code> otherwise.  However, the result of
+ * <code>getInverse()</code> usually will <em>not</em> be a true
+ * mathematical inverse.  This is because true inverse transliterators
+ * are difficult to formulate.  For example, consider two
+ * transliterators: <b>AB</b>, which transliterates the character 'A'
+ * to 'B', and <b>BA</b>, which transliterates 'B' to 'A'.  It might
+ * seem that these are exact inverses, since
+ *
+ * <blockquote>"A" x <b>AB</b> -> "B"<br>
+ * "B" x <b>BA</b> -> "A"</blockquote>
+ *
+ * where 'x' represents transliteration.  However,
+ *
+ * <blockquote>"ABCD" x <b>AB</b> -> "BBCD"<br>
+ * "BBCD" x <b>BA</b> -> "AACD"</blockquote>
+ *
+ * so <b>AB</b> composed with <b>BA</b> is not the
+ * identity. Nonetheless, <b>BA</b> may be usefully considered to be
+ * <b>AB</b>'s inverse, and it is on this basis that
+ * <b>AB</b><code>.getInverse()</code> could legitimately return
+ * <b>BA</b>.
+ *
+ * <p><b>IDs and display names</b>
+ *
+ * <p>A transliterator is designated by a short identifier string or
+ * <em>ID</em>.  IDs follow the format <em>source-destination</em>,
+ * where <em>source</em> describes the entity being replaced, and
+ * <em>destination</em> describes the entity replacing
+ * <em>source</em>.  The entities may be the names of scripts,
+ * particular sequences of characters, or whatever else it is that the
+ * transliterator converts to or from.  For example, a transliterator
+ * from Russian to Latin might be named "Russian-Latin".  A
+ * transliterator from keyboard escape sequences to Latin-1 characters
+ * might be named "KeyboardEscape-Latin1".  By convention, system
+ * entity names are in English, with the initial letters of words
+ * capitalized; user entity names may follow any format so long as
+ * they do not contain dashes.
+ *
+ * <p>In addition to programmatic IDs, transliterator objects have
+ * display names for presentation in user interfaces, returned by
+ * {@link #getDisplayName}.
+ *
+ * <p><b>Factory methods and registration</b>
+ *
+ * <p>In general, client code should use the factory method
+ * <code>getInstance()</code> to obtain an instance of a
+ * transliterator given its ID.  Valid IDs may be enumerated using
+ * <code>getAvailableIDs()</code>.  Since transliterators are
+ * stateless, multiple calls to <code>getInstance()</code> with the
+ * same ID will return the same object.
+ *
+ * <p>In addition to the system transliterators registered at startup,
+ * user transliterators may be registered by calling
+ * <code>registerInstance()</code> at run time.  To register a
+ * transliterator subclass without instantiating it (until it is
+ * needed), users may call <code>registerClass()</code>.
+ *
+ * <p><b>Subclassing</b>
+ *
+ * <p>Subclasses must implement the abstract
+ * <code>transliterate()</code> method.  They should also override the
+ * <code>transliterate()</code> method taking a <code>String</code>
+ * and <code>StringBuffer</code> if the performance of these methods
+ * can be improved over the performance obtained by the default
+ * implementations in this class.  Subclasses must also implement
+ * <code>handleKeyboardTransliterate()</code>.
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: Transliterator.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public abstract class Transliterator {
+    /**
+     * In the <code>keyboardTransliterate()</code>
+     * <code>index[]</code> array, the beginning index, inclusive
+     * @see #keyboardTransliterate
+     */
+    public static final int START  = 0;
+
+    /**
+     * In the <code>keyboardTransliterate()</code>
+     * <code>index[]</code> array, the ending index, exclusive
+     * @see #keyboardTransliterate
+     */
+    public static final int LIMIT  = 1;
+
+    /**
+     * In the <code>keyboardTransliterate()</code>
+     * <code>index[]</code> array, the next character to be considered
+     * for transliteration
+     * @see #keyboardTransliterate
+     */
+    public static final int CURSOR = 2;
+
+    /**
+     * Programmatic name, e.g., "Latin-Arabic".
+     */
+    private String ID;
+
+    /** 
+     * This transliterator's filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     */
+    private UnicodeFilter filter;
+
+    /**
+     * Dictionary of known transliterators.  Keys are <code>String</code>
+     * names, values are one of the following:
+     *
+     * <ul><li><code>Transliterator</code> objects
+     *
+     * <li><code>Class</code> objects.  Such objects must represent
+     * subclasses of <code>Transliterator</code>, and must satisfy the
+     * constraints described in <code>registerClass()</code>
+     *
+     * <li><code>RULE_BASED_PLACEHOLDER</code>, in which case the ID
+     * will have its first '-' removed and be appended to
+     * RB_RULE_BASED_PREFIX to form a resource bundle name from which
+     * the RB_RULE key is looked up to obtain the rule.
+     *
+     * <li><code>REVERSE_RULE_BASED_PLACEHOLDER</code>.  Like
+     * <code>RULE_BASED_PLACEHOLDER</code>, except the entity names in
+     * the ID are reversed, and the argument
+     * RuleBasedTransliterator.REVERSE is pased to the
+     * RuleBasedTransliterator constructor.
+     * </ul>
+     */
+    private static Hashtable cache;
+
+    /**
+     * Internal object used to stand for instances of
+     * <code>RuleBasedTransliterator</code> that have not been
+     * constructed yet in the <code>cache</code>.  When a
+     * <code>getInstance()</code> call retrieves this object, it is
+     * replaced by the actual <code>RuleBasedTransliterator</code>.
+     * This allows <code>Transliterator</code> to delay instantiation
+     * of such transliterators until they are needed.
+     */
+    private static final Object RULE_BASED_PLACEHOLDER = new Object();
+
+    /**
+     * Internal object used to stand for instances of
+     * <code>RuleBasedTransliterator</code> that have not been
+     * constructed yet in the <code>cache</code>.  These instances are
+     * constructed with an argument
+     * <code>RuleBasedTransliterator.REVERSE</code>.
+     */
+    private static final Object REVERSE_RULE_BASED_PLACEHOLDER = new Object();
+
+    /**
+     * Prefix for resource bundle key for the display name for a
+     * transliterator.  The ID is appended to this to form the key.
+     * The resource bundle value should be a String.
+     */
+    private static final String RB_DISPLAY_NAME_PREFIX = "T:";
+
+    /**
+     * Resource bundle key for display name pattern.
+     * The resource bundle value should be a String forming a
+     * MessageFormat pattern, e.g.:
+     * "{0,choice,0#|1#{1} Transliterator|2#{1} to {2} Transliterator}".
+     */
+    private static final String RB_DISPLAY_NAME_PATTERN = "TransliteratorNamePattern";
+
+    /**
+     * Resource bundle key for the list of RuleBasedTransliterator IDs.
+     * The resource bundle value should be a String[] with each element
+     * being a valid ID.  The ID will be appended to RB_RULE_BASED_PREFIX
+     * to obtain the class name in which the RB_RULE key will be sought.
+     */
+    private static final String RB_RULE_BASED_IDS = "RuleBasedTransliteratorIDs";
+
+    /**
+     * Resource bundle containing display name keys and the
+     * RB_RULE_BASED_IDS array.
+     *
+     * <p>If we ever integrate this with the Sun JDK, the resource bundle
+     * root will change to java.text.resources.LocaleElements
+     */
+    private static final String RB_LOCALE_ELEMENTS =
+        "com.ibm.text.resources.LocaleElements";
+
+    /**
+     * Prefix for resource bundle containing RuleBasedTransliterator
+     * RB_RULE string.  The ID is munged to remove the first '-' then appended
+     * to this String to obtain the class name.
+     */
+    private static final String RB_RULE_BASED_PREFIX =
+        "com.ibm.text.resources.TransliterationRule";
+
+    /**
+     * Resource bundle key for the RuleBasedTransliterator rule.
+     */
+    private static final String RB_RULE = "Rule";
+
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    /**
+     * Default constructor.
+     * @param ID the string identifier for this transliterator
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     */
+    protected Transliterator(String ID, UnicodeFilter filter) {
+        if (ID == null) {
+            throw new NullPointerException();
+        }
+        this.ID = ID;
+        this.filter = filter;
+    }
+
+    /**
+     * Transliterates the segment of a string that begins at the
+     * character at offset <code>start</code> and extends to the
+     * character at offset <code>limit - 1</code>, with optional
+     * filtering.  A default implementaion is provided here;
+     * subclasses should provide a more efficient implementation if
+     * possible.
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param result buffer to receive the transliterated text; previous
+     * contents are discarded
+     */
+    public void transliterate(String text, int start, int limit,
+                              StringBuffer result) {
+        /* This is a default implementation that should be replaced by
+         * a more efficient subclass implementation if possible.
+         */
+        result.setLength(0);
+        result.append(text.substring(start, limit));
+        transliterate(new ReplaceableString(result),
+                      0, result.length());
+    }
+
+    /**
+     * Transliterates a segment of a string, with optional filtering.
+     * Subclasses must override this abstract method.
+     *
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return The new limit index.  The text previously occupying <code>[start,
+     * limit)</code> has been transliterated, possibly to a string of a different
+     * length, at <code>[start, </code><em>new-limit</em><code>)</code>, where
+     * <em>new-limit</em> is the return value.
+     */
+    public abstract int transliterate(Replaceable text, int start, int limit);
+
+    /**
+     * Transliterates an entire string. Convenience method.
+     * @param text the string to be transliterated
+     * @param result buffer to receive the transliterated text; previous
+     * contents are discarded
+     */
+    public final void transliterate(String text, StringBuffer result) {
+        transliterate(text, 0, text.length(), result);
+    }
+
+    /**
+     * Transliterate an entire string and returns the result. Convenience method.
+     *
+     * @param text the string to be transliterated
+     * @return The transliterated text
+     */
+    public final String transliterate(String text) {
+        StringBuffer result = new StringBuffer();
+        transliterate(text, 0, text.length(), result);
+        return result.toString();
+    }
+
+    /**
+     * Transliterates an entire string in place. Convenience method.
+     * @param text the string to be transliterated
+     */
+    public final void transliterate(Replaceable text) {
+        transliterate(text, 0, text.length());
+    }
+
+    /**
+     * Transliterates the portion of the text buffer that can be
+     * transliterated unambiguosly after new text has been inserted,
+     * typically as a result of a keyboard event.  The new text in
+     * <code>insertion</code> will be inserted into <code>text</code>
+     * at <code>index[LIMIT]</code>, advancing
+     * <code>index[LIMIT]</code> by <code>insertion.length()</code>.
+     * Then the transliterator will try to transliterate characters of
+     * <code>text</code> between <code>index[CURSOR]</code> and
+     * <code>index[LIMIT]</code>.  Characters before
+     * <code>index[CURSOR]</code> will not be changed.
+     *
+     * <p>Upon return, values in <code>index[]</code> will be updated.
+     * <code>index[START]</code> will be advanced to the first
+     * character that future calls to this method will read.
+     * <code>index[CURSOR]</code> and <code>index[LIMIT]</code> will
+     * be adjusted to delimit the range of text that future calls to
+     * this method may change.
+     *
+     * <p>Typical usage of this method begins with an initial call
+     * with <code>index[START]</code> and <code>index[LIMIT]</code>
+     * set to indicate the portion of <code>text</code> to be
+     * transliterated, and <code>index[CURSOR] == index[START]</code>.
+     * Thereafter, <code>index[]</code> can be used without
+     * modification in future calls, provided that all changes to
+     * <code>text</code> are made via this method.
+     *
+     * <p>This method assumes that future calls may be made that will
+     * insert new text into the buffer.  As a result, it only performs
+     * unambiguous transliterations.  After the last call to this
+     * method, there may be untransliterated text that is waiting for
+     * more input to resolve an ambiguity.  In order to perform these
+     * pending transliterations, clients should call {@link
+     * #finishKeyboardTransliteration} after the last call to this
+     * method has been made.
+     * 
+     * @param text the buffer holding transliterated and untransliterated text
+     * @param index an array of three integers.
+     *
+     * <ul><li><code>index[START]</code>: the beginning index,
+     * inclusive; <code>0 <= index[START] <= index[LIMIT]</code>.
+     *
+     * <li><code>index[LIMIT]</code>: the ending index, exclusive;
+     * <code>index[START] <= index[LIMIT] <= text.length()</code>.
+     * <code>insertion</code> is inserted at
+     * <code>index[LIMIT]</code>.
+     *
+     * <li><code>index[CURSOR]</code>: the next character to be
+     * considered for transliteration; <code>index[START] <=
+     * index[CURSOR] <= index[LIMIT]</code>.  Characters before
+     * <code>index[CURSOR]</code> will not be changed by future calls
+     * to this method.</ul>
+     *
+     * @param insertion text to be inserted and possibly
+     * transliterated into the translation buffer at
+     * <code>index[LIMIT]</code>.  If <code>null</code> then no text
+     * is inserted.
+     * @see #START
+     * @see #LIMIT
+     * @see #CURSOR
+     * @see #handleKeyboardTransliterate
+     * @exception IllegalArgumentException if <code>index[]</code>
+     * is invalid
+     */
+    public final void keyboardTransliterate(Replaceable text, int[] index,
+                                            String insertion) {
+        if (index.length < 3 ||
+            index[START] < 0 ||
+            index[LIMIT] > text.length() ||
+            index[CURSOR] < index[START] ||
+            index[CURSOR] > index[LIMIT]) {
+            throw new IllegalArgumentException("Invalid index array");
+        }
+
+        int originalStart = index[START];
+        if (insertion != null) {
+            text.replace(index[LIMIT], index[LIMIT], insertion);
+            index[LIMIT] += insertion.length();
+        }
+
+        handleKeyboardTransliterate(text, index);
+
+        index[START] = Math.max(index[CURSOR] - getMaximumContextLength(),
+                                originalStart);
+    }
+
+    /**
+     * Transliterates the portion of the text buffer that can be
+     * transliterated unambiguosly after a new character has been
+     * inserted, typically as a result of a keyboard event.  This is a
+     * convenience method; see {@link
+     * #keyboardTransliterate(Replaceable, int[], String)} for details.
+     * @param text the buffer holding transliterated and
+     * untransliterated text
+     * @param index an array of three integers.  See {@link
+     * #keyboardTransliterate(Replaceable, int[], String)}.
+     * @param insertion text to be inserted and possibly
+     * transliterated into the translation buffer at
+     * <code>index[LIMIT]</code>.
+     * @see #keyboardTransliterate(Replaceable, int[], String)
+     */
+    public final void keyboardTransliterate(Replaceable text, int[] index,
+                                            char insertion) {
+        keyboardTransliterate(text, index, String.valueOf(insertion));
+    }
+
+    /**
+     * Transliterates the portion of the text buffer that can be
+     * transliterated unambiguosly.  This is a convenience method; see
+     * {@link #keyboardTransliterate(Replaceable, int[], String)} for
+     * details.
+     * @param text the buffer holding transliterated and
+     * untransliterated text
+     * @param index an array of three integers.  See {@link
+     * #keyboardTransliterate(Replaceable, int[], String)}.
+     * @see #keyboardTransliterate(Replaceable, int[], String)
+     */
+    public final void keyboardTransliterate(Replaceable text, int[] index) {
+        keyboardTransliterate(text, index, null);
+    }
+
+    /**
+     * Finishes any pending transliterations that were waiting for
+     * more characters.  Clients should call this method as the last
+     * call after a sequence of one or more calls to
+     * <code>keyboardTransliterate()</code>.
+     * @param text the buffer holding transliterated and
+     * untransliterated text.
+     * @param index the array of indices previously passed to {@link
+     * #keyboardTransliterate}
+     */
+    public final void finishKeyboardTransliteration(Replaceable text,
+                                                    int[] index) {
+        transliterate(text, index[START], index[LIMIT]);
+    }
+
+    /**
+     * Abstract method that concrete subclasses define to implement
+     * keyboard transliteration.  This method should transliterate all
+     * characters between <code>index[CURSOR]</code> and
+     * <code>index[LIMIT]</code> that can be unambiguously
+     * transliterated, regardless of future insertions of text at
+     * <code>index[LIMIT]</code>.  <code>index[CURSOR]</code> should
+     * be advanced past committed characters (those that will not
+     * change in future calls to this method).
+     * <code>index[LIMIT]</code> should be updated to reflect text
+     * replacements that shorten or lengthen the text between
+     * <code>index[CURSOR]</code> and <code>index[LIMIT]</code>.  Upon
+     * return, neither <code>index[CURSOR]</code> nor
+     * <code>index[LIMIT]</code> should be less than the initial value
+     * of <code>index[CURSOR]</code>.  <code>index[START]</code>
+     * should <em>not</em> be changed.
+     *
+     * @param text the buffer holding transliterated and
+     * untransliterated text
+     * @param index an array of three integers.  See {@link
+     * #keyboardTransliterate(Replaceable, int[], String)}.
+     * @see #keyboardTransliterate
+     */
+    protected abstract void handleKeyboardTransliterate(Replaceable text,
+                                                        int[] index);
+
+    /**
+     * Returns the length of the longest context required by this transliterator.
+     * This is <em>preceding</em> context.  The default implementation supplied
+     * by <code>Transliterator</code> returns zero; subclasses
+     * that use preceding context should override this method to return the
+     * correct value.  For example, if a transliterator translates "ddd" (where
+     * d is any digit) to "555" when preceded by "(ddd)", then the preceding
+     * context length is 5, the length of "(ddd)".
+     *
+     * @return The maximum number of preceding context characters this
+     * transliterator needs to examine
+     */
+    protected int getMaximumContextLength() {
+        return 0;
+    }
+
+    /**
+     * Returns a programmatic identifier for this transliterator.
+     * If this identifier is passed to <code>getInstance()</code>, it
+     * will return this object, if it has been registered.
+     * @see #registerInstance
+     * @see #registerClass
+     * @see #getAvailableIDs
+     */
+    public final String getID() {
+        return ID;
+    }
+
+    /**
+     * Returns a name for this transliterator that is appropriate for
+     * display to the user in the default locale.  See {@link
+     * #getDisplayName(Locale)} for details.
+     */
+    public final String getDisplayName() {
+        return getDisplayName(Locale.getDefault());
+    }
+
+    /**
+     * Returns a name for this transliterator that is appropriate for
+     * display to the user in the given locale.  This name is taken
+     * from the locale resource data in the standard manner of the
+     * <code>java.text</code> package.
+     *
+     * <p>If no localized names exist in the system resource bundles,
+     * a name is synthesized using a localized
+     * <code>MessageFormat</code> pattern from the resource data.  The
+     * arguments to this pattern are an integer followed by one or two
+     * strings.  The integer is the number of strings, either 1 or 2.
+     * The strings are formed by splitting the ID for this
+     * transliterator at the first '-'.  If there is no '-', then the
+     * entire ID forms the only string.
+     * @param inLocale the Locale in which the display name should be
+     * localized.
+     * @see java.text.MessageFormat
+     */
+    public String getDisplayName(Locale inLocale) {
+        ResourceBundle bundle = ResourceBundle.getBundle(
+            RB_LOCALE_ELEMENTS, inLocale);
+
+        try {
+            return bundle.getString(RB_DISPLAY_NAME_PREFIX + ID);
+        } catch (MissingResourceException e) {}
+
+        try {
+            // Construct the formatter first; if getString() fails
+            // we'll exit the try block
+            MessageFormat format = new MessageFormat(
+                    bundle.getString(RB_DISPLAY_NAME_PATTERN));
+            // Construct the argument array
+            int i = ID.indexOf('-');
+            Object[] args = (i < 0)
+                ? new Object[] { new Integer(1), ID }
+                : new Object[] { new Integer(2), ID.substring(0, i),
+                                 ID.substring(i+1) };
+            // Format it using the pattern in the resource
+            return format.format(args);
+        } catch (MissingResourceException e2) {}
+
+        // We should not reach this point unless there is something
+        // wrong with the build or the RB_DISPLAY_NAME_PATTERN has
+        // been deleted from the root RB_LOCALE_ELEMENTS resource.
+        throw new RuntimeException();
+    }
+
+    /**
+     * Returns the filter used by this transliterator, or <tt>null</tt>
+     * if this transliterator uses no filter.
+     */
+    public UnicodeFilter getFilter() {
+        return filter;
+    }
+
+    /**
+     * Changes the filter used by this transliterator.  If the filter
+     * is set to <tt>null</tt> then no filtering will occur.
+     *
+     * <p>Callers must take care if a transliterator is in use by
+     * multiple threads.  The filter should not be changed by one
+     * thread while another thread may be transliterating.
+     */
+    public void setFilter(UnicodeFilter filter) {
+        this.filter = filter;
+    }
+
+    /**
+     * Returns this transliterator's inverse.  See the class
+     * documentation for details.  This implementation simply inverts
+     * the two entities in the ID and attempts to retrieve the
+     * resulting transliterator.  That is, if <code>getID()</code>
+     * returns "A-B", then this method will return the result of
+     * <code>getInstance("B-A")</code>, or <code>null</code> if that
+     * call fails.
+     *
+     * <p>This method does not take filtering into account.  The
+     * returned transliterator will have no filter.
+     *
+     * <p>Subclasses with knowledge of their inverse may wish to
+     * override this method.
+     *
+     * @return a transliterator that is an inverse, not necessarily
+     * exact, of this transliterator, or <code>null</code> if no such
+     * transliterator is registered.
+     * @see #registerInstance
+     */
+    public Transliterator getInverse() {
+        int i = ID.indexOf('-');
+        if (i >= 0) {
+            String inverseID = ID.substring(i+1) + '-' + ID.substring(0, i);
+            return internalGetInstance(inverseID);
+        }
+        return null;
+    }
+
+    /**
+     * Returns a <code>Transliterator</code> object given its ID.
+     * The ID must be either a system transliterator ID or a ID registered
+     * using <code>registerInstance()</code>.
+     *
+     * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
+     * @return A <code>Transliterator</code> object with the given ID
+     * @exception IllegalArgumentException if the given ID is invalid.
+     * @see #registerInstance
+     * @see #getAvailableIDs
+     * @see #getID
+     */
+    public static Transliterator getInstance(String ID) {
+        Transliterator t = internalGetInstance(ID);
+        if (t != null) {
+            return t;
+        }
+        throw new IllegalArgumentException("Unsupported transliterator: "
+                                           + ID);
+    }
+
+    /**
+     * Returns a transliterator object given its ID.  Unlike getInstance(),
+     * this method returns null if it cannot make use of the given ID.
+     */
+    private static Transliterator internalGetInstance(String ID) {
+        Object obj = cache.get(ID);
+        RuleBasedTransliterator.Data data = null;
+
+        if (obj instanceof RuleBasedTransliterator.Data) {
+            data = (RuleBasedTransliterator.Data) obj;
+            // Fall through to construct transliterator from cached Data object.
+        } else if (obj instanceof Class) {
+            try {
+                return (Transliterator) ((Class) obj).newInstance();
+            } catch (InstantiationException e) {
+            } catch (IllegalAccessException e2) {}
+        } else {
+            synchronized (cache) {
+                boolean isReverse = (obj == REVERSE_RULE_BASED_PLACEHOLDER);
+                String resourceName = RB_RULE_BASED_PREFIX;
+                int i = ID.indexOf('-');
+                if (i < 0) {
+                    resourceName += ID;
+                } else {
+                    String IDLeft  = ID.substring(0, i);
+                    String IDRight = ID.substring(i+1);
+                    resourceName += isReverse ? (IDRight + IDLeft)
+                                              : (IDLeft + IDRight);
+                }
+                try {
+                    ResourceBundle resource = ResourceBundle.getBundle(resourceName);
+
+                    data = RuleBasedTransliterator.parse(resource.getString(RB_RULE),
+                                                         isReverse
+                                                         ? RuleBasedTransliterator.REVERSE
+                                                         : RuleBasedTransliterator.FORWARD);
+
+                    cache.put(ID, data);
+                    // Fall through to construct transliterator from Data object.
+                } catch (MissingResourceException e) {}
+            }
+        }
+
+        if (data != null) {
+            return new RuleBasedTransliterator(ID, data, null);
+        }
+
+        return null;
+    }
+
+    /**
+     * Registers a subclass of <code>Transliterator</code> with the
+     * system.  This subclass must have a public constructor taking no
+     * arguments.  When that constructor is called, the resulting
+     * object must return the <code>ID</code> passed to this method if
+     * its <code>getID()</code> method is called.
+     *
+     * @param ID the result of <code>getID()</code> for this
+     * transliterator
+     * @param transClass a subclass of <code>Transliterator</code>
+     * @see #registerInstance
+     * @see #unregister
+     */
+    public static void registerClass(String ID, Class transClass) {
+        cache.put(ID, transClass);        
+    }
+
+    /**
+     * Unregisters a transliterator or class.  This may be either
+     * a system transliterator or a user transliterator or class.
+     * 
+     * @param ID the ID of the transliterator or class
+     * @return the <code>Object</code> that was registered with
+     * <code>ID</code>, or <code>null</code> if none was
+     * @see #registerInstance
+     * @see #registerClass
+     */
+    public static Object unregister(String ID) {
+        return cache.remove(ID);
+    }
+
+    /**
+     * Returns an enumeration over the programmatic names of registered
+     * <code>Transliterator</code> objects.  This includes both system
+     * transliterators and user transliterators registered using
+     * <code>registerInstance()</code>.  The enumerated names may be
+     * passed to <code>getInstance()</code>.
+     *
+     * @return An <code>Enumeration</code> over <code>String</code> objects
+     * @see #getInstance
+     * @see #registerInstance
+     */
+    public static final Enumeration getAvailableIDs() {
+        return cache.keys();
+    }
+
+    static {
+        ResourceBundle bundle = ResourceBundle.getBundle(RB_LOCALE_ELEMENTS);
+        
+        try {
+            String[] ruleBasedIDs = bundle.getStringArray(RB_RULE_BASED_IDS);
+            
+            cache = new Hashtable();
+            
+            for (int i=0; i<ruleBasedIDs.length; ++i) {
+                String ID = ruleBasedIDs[i];
+                boolean isReverse = (ID.charAt(0) == '*');
+                if (isReverse) {
+                    ID = ID.substring(1);
+                }
+                cache.put(ID, isReverse ? REVERSE_RULE_BASED_PLACEHOLDER
+                                        : RULE_BASED_PLACEHOLDER);
+            }
+        } catch (MissingResourceException e) {}
+
+        cache.put(HexToUnicodeTransliterator._ID,
+                  HexToUnicodeTransliterator.class);
+        cache.put(UnicodeToHexTransliterator._ID,
+                  UnicodeToHexTransliterator.class);
+    }
+}
diff --git a/icu4j/src/com/ibm/icu/text/UnicodeFilter.java b/icu4j/src/com/ibm/icu/text/UnicodeFilter.java
new file mode 100755
index 00000000000..3753883a476
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/text/UnicodeFilter.java
@@ -0,0 +1,22 @@
+package com.ibm.text;
+
+/**
+ * <code>UnicodeFilter</code> defines a protocol for selecting a
+ * subset of the full range (U+0000 to U+FFFF) of Unicode characters.
+ * Currently, filters are used in conjunction with classes like {@link
+ * Transliterator} to only process selected characters through a
+ * transformation.
+ *
+ * {@link UnicodeFilterLogic}
+ */
+
+public interface UnicodeFilter {
+
+    /**
+     * Returns <tt>true</tt> for characters that are in the selected
+     * subset.  In other words, if a character is <b>to be
+     * filtered</b>, then <tt>isIn()</tt> returns
+     * <b><tt>false</tt></b>.
+     */
+    public boolean isIn(char c);
+}
diff --git a/icu4j/src/com/ibm/icu/text/UnicodeFilterLogic.java b/icu4j/src/com/ibm/icu/text/UnicodeFilterLogic.java
new file mode 100755
index 00000000000..f9e6ec1c609
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/text/UnicodeFilterLogic.java
@@ -0,0 +1,112 @@
+package com.ibm.text;
+
+/**
+ * <code>UnicodeFilterLogic</code> provides logical operators on
+ * {@link UnicodeFilter} objects.  This class cannot be instantiated;
+ * it consists only of static methods.  The static methods return
+ * filter objects that perform logical inversion (<tt>not</tt>),
+ * intersection (<tt>and</tt>), or union (<tt>or</tt>) of the given
+ * filter objects.
+ */
+public final class UnicodeFilterLogic {
+
+    /**
+     * Returns a <tt>UnicodeFilter</tt> that implements the inverse of
+     * the given filter.
+     */
+    public static UnicodeFilter not(final UnicodeFilter f) {
+        return new UnicodeFilter() {
+            public boolean isIn(char c) {
+                return !f.isIn(c);
+            }
+        };
+    }
+
+    /**
+     * Returns a <tt>UnicodeFilter</tt> that implements a short
+     * circuit AND of the result of the two given filters.  That is,
+     * if <tt>f.isIn()</tt> is <tt>false</tt>, then <tt>g.isIn()</tt>
+     * is not called, and <tt>isIn()</tt> returns <tt>false</tt>.
+     *
+     * <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
+     */
+    public static UnicodeFilter and(final UnicodeFilter f,
+                                    final UnicodeFilter g) {
+        if (f == null) {
+            return g;
+        }
+        if (g == null) {
+            return f;
+        }
+        return new UnicodeFilter() {
+            public boolean isIn(char c) {
+                return f.isIn(c) && g.isIn(c);
+            }
+        };
+    }
+
+    /**
+     * Returns a <tt>UnicodeFilter</tt> that implements a short
+     * circuit AND of the result of the given filters.  That is, if
+     * <tt>f[i].isIn()</tt> is <tt>false</tt>, then
+     * <tt>f[j].isIn()</tt> is not called, where <tt>j > i</tt>, and
+     * <tt>isIn()</tt> returns <tt>false</tt>.
+     */
+    public static UnicodeFilter and(final UnicodeFilter[] f) {
+        return new UnicodeFilter() {
+            public boolean isIn(char c) {
+                for (int i=0; i<f.length; ++i) {
+                    if (!f[i].isIn(c)) {
+                        return false;
+                    }
+                }
+                return true;
+            }
+        };
+    }
+
+    /**
+     * Returns a <tt>UnicodeFilter</tt> that implements a short
+     * circuit OR of the result of the two given filters.  That is, if
+     * <tt>f.isIn()</tt> is <tt>true</tt>, then <tt>g.isIn()</tt> is
+     * not called, and <tt>isIn()</tt> returns <tt>true</tt>.
+     *
+     * <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
+     */
+    public static UnicodeFilter or(final UnicodeFilter f,
+                                   final UnicodeFilter g) {
+        if (f == null) {
+            return g;
+        }
+        if (g == null) {
+            return f;
+        }
+        return new UnicodeFilter() {
+            public boolean isIn(char c) {
+                return f.isIn(c) || g.isIn(c);
+            }
+        };
+    }
+
+    /**
+     * Returns a <tt>UnicodeFilter</tt> that implements a short
+     * circuit OR of the result of the given filters.  That is, if
+     * <tt>f[i].isIn()</tt> is <tt>false</tt>, then
+     * <tt>f[j].isIn()</tt> is not called, where <tt>j > i</tt>, and
+     * <tt>isIn()</tt> returns <tt>true</tt>.
+     */
+    public static UnicodeFilter or(final UnicodeFilter[] f) {
+        return new UnicodeFilter() {
+            public boolean isIn(char c) {
+                for (int i=0; i<f.length; ++i) {
+                    if (f[i].isIn(c)) {
+                        return true;
+                    }
+                }
+                return false;
+            }
+        };
+    }
+
+    // TODO: Add nand() & nor() for convenience, if needed.
+}
diff --git a/icu4j/src/com/ibm/icu/text/UnicodeSet.java b/icu4j/src/com/ibm/icu/text/UnicodeSet.java
new file mode 100755
index 00000000000..0d8db3021bb
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/text/UnicodeSet.java
@@ -0,0 +1,1354 @@
+package com.ibm.text;
+
+import java.text.*;
+
+/**
+ * A mutable set of Unicode characters.  Objects of this class
+ * represent <em>character classes</em> used in regular expressions.
+ * Such classes specify a subset of the set of all Unicode characters,
+ * which in this implementation is the characters from U+0000 to
+ * U+FFFF, ignoring surrogates.
+ *
+ * <p>This class supports two APIs.  The first is modeled after Java 2's
+ * <code>java.util.Set</code> interface, although this class does not
+ * implement that interface.  All methods of <code>Set</code> are
+ * supported, with the modification that they take a character range
+ * or single character instead of an <code>Object</code>, and they
+ * take a <code>UnicodeSet</code> instead of a <code>Collection</code>.
+ *
+ * <p>The second API is the
+ * <code>applyPattern()</code>/<code>toPattern()</code> API from the
+ * <code>java.text.Format</code>-derived classes.  Unlike the
+ * methods that add characters, add categories, and control the logic
+ * of the set, the method <code>applyPattern()</code> sets all
+ * attributes of a <code>UnicodeSet</code> at once, based on a
+ * string pattern.
+ *
+ * <p>In addition, the set complement operation is supported through
+ * the <code>complement()</code> method.
+ *
+ * <p><b>Pattern syntax</b></p>
+ *
+ * Patterns are accepted by the constructors and the
+ * <code>applyPattern()</code> methods and returned by the
+ * <code>toPattern()</code> method.  These patterns follow a syntax
+ * similar to that employed by version 8 regular expression character
+ * classes:
+ *
+ * <blockquote>
+ *   <table>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>pattern :=&nbsp; </code></td>
+ *       <td valign="top"><code>('[' '^'? item* ']') |
+ *       ('[:' '^'? category ':]')</code></td>
+ *     </tr>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>item :=&nbsp; </code></td>
+ *       <td valign="top"><code>char | (char '-' char) | pattern-expr<br>
+ *       </code></td>
+ *     </tr>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>pattern-expr :=&nbsp; </code></td>
+ *       <td valign="top"><code>pattern | pattern-expr pattern |
+ *       pattern-expr op pattern<br>
+ *       </code></td>
+ *     </tr>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>op :=&nbsp; </code></td>
+ *       <td valign="top"><code>'&amp;' | '-'<br>
+ *       </code></td>
+ *     </tr>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>special :=&nbsp; </code></td>
+ *       <td valign="top"><code>'[' | ']' | '-'<br>
+ *       </code></td>
+ *     </tr>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>char :=&nbsp; </code></td>
+ *       <td valign="top"><em>any character that is not</em><code> special<br>
+ *       | ('\u005C' </code><em>any character</em><code>)<br>
+ *       | ('\u005Cu' hex hex hex hex)<br>
+ *       </code></td>
+ *     </tr>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>hex :=&nbsp; </code></td>
+ *       <td valign="top"><em>any character for which
+ *       </em><code>Character.digit(c, 16)</code><em>
+ *       returns a non-negative result</em></td>
+ *     </tr>
+ *     <tr>
+ *       <td nowrap valign="top" align="right"><code>category :=&nbsp; </code></td>
+ *       <td valign="top"><code>'M' | 'N' | 'Z' | 'C' | 'L' | 'P' |
+ *       'S' | 'Mn' | 'Mc' | 'Me' | 'Nd' | 'Nl' | 'No' | 'Zs' | 'Zl' |
+ *       'Zp' | 'Cc' | 'Cf' | 'Cs' | 'Co' | 'Cn' | 'Lu' | 'Ll' | 'Lt'
+ *       | 'Lm' | 'Lo' | 'Pc' | 'Pd' | 'Ps' | 'Pe' | 'Po' | 'Sm' |
+ *       'Sc' | 'Sk' | 'So'</code></td>
+ *     </tr>
+ *   </table>
+ *   <br>
+ *   <table border="1">
+ *     <tr>
+ *       <td>Legend: <table>
+ *         <tr>
+ *           <td nowrap valign="top"><code>a := b</code></td>
+ *           <td width="20" valign="top">&nbsp; </td>
+ *           <td valign="top"><code>a</code> may be replaced by <code>b</code> </td>
+ *         </tr>
+ *         <tr>
+ *           <td nowrap valign="top"><code>a?</code></td>
+ *           <td valign="top"></td>
+ *           <td valign="top">zero or one instance of <code>a</code><br>
+ *           </td>
+ *         </tr>
+ *         <tr>
+ *           <td nowrap valign="top"><code>a*</code></td>
+ *           <td valign="top"></td>
+ *           <td valign="top">one or more instances of <code>a</code><br>
+ *           </td>
+ *         </tr>
+ *         <tr>
+ *           <td nowrap valign="top"><code>a | b</code></td>
+ *           <td valign="top"></td>
+ *           <td valign="top">either <code>a</code> or <code>b</code><br>
+ *           </td>
+ *         </tr>
+ *         <tr>
+ *           <td nowrap valign="top"><code>'a'</code></td>
+ *           <td valign="top"></td>
+ *           <td valign="top">the literal string between the quotes </td>
+ *         </tr>
+ *       </table>
+ *       </td>
+ *     </tr>
+ *   </table>
+ * </blockquote>
+ *
+ * Patterns specify individual characters, ranges of characters, and
+ * Unicode character categories.  When elements are concatenated, they
+ * specify their union.  To complement a set, place a '^' immediately
+ * after the opening '[' or '[:'.  In any other location, '^' has no
+ * special meaning.
+ *
+ * <p>Ranges are indicated by placing two a '-' between two
+ * characters, as in "a-z".  This specifies the range of all
+ * characters from the left to the right, in Unicode order.  If the
+ * left and right characters are the same, then the range consists of
+ * just that character.  If the left character is greater than the
+ * right character it is a syntax error.  If a '-' occurs as the first
+ * character after the opening '[' or '[^', or if it occurs as the
+ * last character before the closing ']', then it is taken as a
+ * literal.  Thus "[a\u005C-b]", "[-ab]", and "[ab-]" all indicate the same
+ * set of three characters, 'a', 'b', and '-'.
+ *
+ * <p>Sets may be intersected using the '&' operator or the asymmetric
+ * set difference may be taken using the '-' operator, for example,
+ * "[[:L:]&[\u005Cu0000-\u005Cu0FFF]]" indicates the set of all Unicode letters
+ * with values less than 4096.  Operators ('&' and '|') have equal
+ * precedence and bind left-to-right.  Thus
+ * "[[:L:]-[a-z]-[\u005Cu0100-\u005Cu01FF]]" is equivalent to
+ * "[[[:L:]-[a-z]]-[\u005Cu0100-\u005Cu01FF]]".  This only really matters for
+ * difference; intersection is commutative.
+ *
+ * <table>
+ * <tr valign=top><td nowrap><code>[a]</code><td>The set containing 'a'
+ * <tr valign=top><td nowrap><code>[a-z]</code><td>The set containing 'a'
+ * through 'z' and all letters in between, in Unicode order
+ * <tr valign=top><td nowrap><code>[^a-z]</code><td>The set containing
+ * all characters but 'a' through 'z',
+ * that is, U+0000 through 'a'-1 and 'z'+1 through U+FFFF
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>][<em>pat2</em>]]</code>
+ * <td>The union of sets specified by <em>pat1</em> and <em>pat2</em>
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>]&[<em>pat2</em>]]</code>
+ * <td>The intersection of sets specified by <em>pat1</em> and <em>pat2</em>
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>]-[<em>pat2</em>]]</code>
+ * <td>The asymmetric difference of sets specified by <em>pat1</em> and
+ * <em>pat2</em>
+ * <tr valign=top><td nowrap><code>[:Lu:]</code>
+ * <td>The set of characters belonging to the given
+ * Unicode category, as defined by <code>Character.getType()</code>; in
+ * this case, Unicode uppercase letters
+ * <tr valign=top><td nowrap><code>[:L:]</code>
+ * <td>The set of characters belonging to all Unicode categories
+ * starting wih 'L', that is, <code>[[:Lu:][:Ll:][:Lt:][:Lm:][:Lo:]]</code>.
+ * </table>
+ *
+ * <p><b>Character categories.</b>
+ *
+ * Character categories are specified using the POSIX-like syntax
+ * '[:Lu:]'.  The complement of a category is specified by inserting
+ * '^' after the opening '[:'.  The following category names are
+ * recognized.  Actual determination of category data uses
+ * <code>Character.getType()</code>, so it reflects the underlying
+ * implmementation used by <code>Character</code>.  As of Java 2 and
+ * JDK 1.1.8, this is Unicode 2.1.2.
+ *
+ * <pre>
+ * Normative
+ *     Mn = Mark, Non-Spacing
+ *     Mc = Mark, Spacing Combining
+ *     Me = Mark, Enclosing
+ * 
+ *     Nd = Number, Decimal Digit
+ *     Nl = Number, Letter
+ *     No = Number, Other
+ * 
+ *     Zs = Separator, Space
+ *     Zl = Separator, Line
+ *     Zp = Separator, Paragraph
+ * 
+ *     Cc = Other, Control
+ *     Cf = Other, Format
+ *     Cs = Other, Surrogate
+ *     Co = Other, Private Use
+ *     Cn = Other, Not Assigned
+ * 
+ * Informative
+ *     Lu = Letter, Uppercase
+ *     Ll = Letter, Lowercase
+ *     Lt = Letter, Titlecase
+ *     Lm = Letter, Modifier
+ *     Lo = Letter, Other
+ * 
+ *     Pc = Punctuation, Connector
+ *     Pd = Punctuation, Dash
+ *     Ps = Punctuation, Open
+ *     Pe = Punctuation, Close
+ *    *Pi = Punctuation, Initial quote
+ *    *Pf = Punctuation, Final quote
+ *     Po = Punctuation, Other
+ * 
+ *     Sm = Symbol, Math
+ *     Sc = Symbol, Currency
+ *     Sk = Symbol, Modifier
+ *     So = Symbol, Other
+ * </pre>
+ * *Unsupported by Java (and hence unsupported by UnicodeSet).
+ *
+ * @author Alan Liu
+ * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $ */
+public class UnicodeSet {
+    /**
+     * The internal representation is a StringBuffer of even length.
+     * Each pair of characters represents a range that is included in
+     * the set.  A single character c is represented as cc.  Thus, the
+     * ranges in the set are (a,b), a and b inclusive, where a =
+     * pairs.charAt(i) and b = pairs.charAt(i+1) for all even i, 0 <=
+     * i <= pairs.length()-2.  Pairs are always stored in ascending
+     * Unicode order.  Pairs are always stored in shortest form.  For
+     * example, if the pair "hh", representing the single character
+     * 'h', is added to the pairs list "agik", representing the ranges
+     * 'a'-'g' and 'i'-'k', the result is "ak", not "aghhik".
+     *
+     * This representation format was originally used in Richard
+     * Gillam's CharSet class.
+     */
+    private StringBuffer pairs;
+
+    private static final String CATEGORY_NAMES =
+        //                    1 1 1 1 1 1 1   1 1 2 2 2 2 2 2 2 2 2
+        //0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6   8 9 0 1 2 3 4 5 6 7 8
+        "CnLuLlLtLmLoMnMeMcNdNlNoZsZlZpCcCf--CoCsPdPsPePcPoSmScSkSo";
+
+    private static final int UNSUPPORTED_CATEGORY = 17;
+
+    private static final int CATEGORY_COUNT = 29;
+
+    /**
+     * A cache mapping character category integers, as returned by
+     * Character.getType(), to pairs strings.  Entries are initially
+     * null and are created on demand.
+     */
+    private static final String[] CATEGORY_PAIRS_CACHE =
+        new String[CATEGORY_COUNT];
+
+    //----------------------------------------------------------------
+    // Debugging and testing
+    //----------------------------------------------------------------
+
+    /**
+     * Return the representation of this set as a list of character
+     * ranges.  Ranges are listed in ascending Unicode order.  For
+     * example, the set [a-zA-M3] is represented as "33AMaz".
+     */
+    public String getPairs() {
+        return pairs.toString();
+    }
+
+    //----------------------------------------------------------------
+    // Public API
+    //----------------------------------------------------------------
+
+    /**
+     * Constructs an empty set.
+     */
+    public UnicodeSet() {
+        pairs = new StringBuffer();
+    }
+
+    /**
+     * Constructs a set from the given pattern.  See the class description
+     * for the syntax of the pattern language.
+     * @param pattern a string specifying what characters are in the set
+     * @exception IllegalArgumentException if the pattern contains
+     * a syntax error.
+     */
+    public UnicodeSet(String pattern) {
+        applyPattern(pattern, false);
+    }
+
+    /**
+     * Constructs a set from the given pattern, optionally ignoring
+     * white space.  See the class description for the syntax of the
+     * pattern language.
+     * @param pattern a string specifying what characters are in the set
+     * @param ignoreSpaces if <code>true</code>, all spaces in the
+     * pattern are ignored, except those preceded by '\u005C'.  Spaces are
+     * those characters for which <code>Character.isSpaceChar()</code>
+     * is <code>true</code>.
+     * @exception <code>IllegalArgumentException</code> if the pattern
+     * contains a syntax error.
+     */
+    public UnicodeSet(String pattern, boolean ignoreSpaces) {
+        applyPattern(pattern, ignoreSpaces);
+    }
+
+    /**
+     * Constructs a set from the given Unicode character category.
+     * @param category an integer indicating the character category as
+     * returned by <code>Character.getType()</code>.
+     * @exception <code>IllegalArgumentException</code> if the given
+     * category is invalid.
+     */
+    public UnicodeSet(int category) {
+        if (category < 0 || category >= CATEGORY_COUNT ||
+            category == UNSUPPORTED_CATEGORY) {
+            throw new IllegalArgumentException("Invalid category");
+        }
+        pairs = new StringBuffer(getCategoryPairs(category));
+    }
+
+    /**
+     * Modifies this set to represent the set specified by the given
+     * pattern.  See the class description for the syntax of the
+     * pattern language.
+     * @param pattern a string specifying what characters are in the set
+     * @exception <code>IllegalArgumentException</code> if the pattern
+     * contains a syntax error.
+     */
+    public final void applyPattern(String pattern) {
+        applyPattern(pattern, false);
+    }
+
+    /**
+     * Modifies this set to represent the set specified by the given
+     * pattern, optionally ignoring white space.  See the class
+     * description for the syntax of the pattern language.
+     * @param pattern a string specifying what characters are in the set
+     * @param ignoreSpaces if <code>true</code>, all spaces in the
+     * pattern are ignored.  Spaces are those characters for which
+     * <code>Character.isSpaceChar()</code> is <code>true</code>.
+     * Characters preceded by '\\' are escaped, losing any special
+     * meaning they otherwise have.  Spaces may be included by
+     * escaping them.
+     * @exception <code>IllegalArgumentException</code> if the pattern
+     * contains a syntax error.
+     */
+    public void applyPattern(String pattern, boolean ignoreSpaces) {
+        ParsePosition pos = new ParsePosition(0);
+
+        // To ignore spaces, create a new pattern without spaces.  We
+        // have to process all '\' escapes.  If '\' is encountered,
+        // insert it and the following character (if any -- let parse
+        // deal with any syntax errors) in the pattern.  This allows
+        // escaped spaces.
+        if (ignoreSpaces) {
+            StringBuffer pat = new StringBuffer();
+            for (int i=0; i<pattern.length(); ++i) {
+                char c = pattern.charAt(i);
+                if (Character.isSpaceChar(c)) {
+                    continue;
+                }
+                if (c == '\\' && (i+1) < pattern.length()) {
+                    pat.append(c);
+                    c = pattern.charAt(++i);
+                    // Fall through and append the following char
+                }
+                pat.append(c);
+            }
+            pattern = pat.toString();
+        }
+
+        pairs = parse(pattern, pos);
+        if (pos.getIndex() != pattern.length()) {
+            throw new IllegalArgumentException("Parse of \"" + pattern +
+                                               "\" failed at " +
+                                               pos.getIndex());
+        }
+    }
+
+    /**
+     * Returns a string representation of this set.  If the result of
+     * calling this function is passed to a UnicodeSet constructor, it
+     * will produce another set that is equal to this one.
+     */
+    public String toPattern() {
+        StringBuffer result = new StringBuffer();
+        result.append('[');
+        
+        // iterate through the ranges in the UnicodeSet
+        for (int i=0; i<pairs.length(); i+=2) {
+            // for a range with the same beginning and ending point,
+            // output that character, otherwise, output the start and
+            // end points of the range separated by a dash
+            result.append(pairs.charAt(i));
+            if (pairs.charAt(i) != pairs.charAt(i+1)) {
+                result.append('-').append(pairs.charAt(i+1));
+            }
+        }
+        
+        return result.append(']').toString();        
+    }
+
+    /**
+     * Returns the number of elements in this set (its cardinality),
+     * <em>n</em>, where <code>0 <= </code><em>n</em><code> <= 65536</code>.
+     *
+     * @return the number of elements in this set (its cardinality).
+     */
+    public int size() {
+        int n = 0;
+        for (int i=0; i<pairs.length(); i+=2) {
+            n += pairs.charAt(i+1) - pairs.charAt(i) + 1;
+        }
+        return n;
+    }
+
+    /**
+     * Returns <tt>true</tt> if this set contains no elements.
+     *
+     * @return <tt>true</tt> if this set contains no elements.
+     */
+    public boolean isEmpty() {
+        return pairs.length() == 0;
+    }
+
+    /**
+     * Returns <tt>true</tt> if this set contains the specified range
+     * of chars.
+     *
+     * @return <tt>true</tt> if this set contains the specified range
+     * of chars.
+     */
+    public boolean contains(char first, char last) {
+        // Set i to the end of the smallest range such that its end
+        // point >= last, or pairs.length() if no such range exists.
+        int i = 1;
+        while (i<pairs.length() && last>pairs.charAt(i)) i+=2;
+        return i<pairs.length() && first>=pairs.charAt(i-1);
+    }
+
+    /**
+     * Returns <tt>true</tt> if this set contains the specified char.
+     *
+     * @return <tt>true</tt> if this set contains the specified char.
+     */
+    public boolean contains(char c) {
+        return contains(c, c);
+    }
+
+    /**
+     * Adds the specified range to this set if it is not already
+     * present.  If this set already contains the specified range,
+     * the call leaves this set unchanged.  If <code>last > first</code>
+     * then an empty range is added, leaving the set unchanged.
+     *
+     * @param first first character, inclusive, of range to be added
+     * to this set.
+     * @param last last character, inclusive, of range to be added
+     * to this set.
+     */
+    public void add(char first, char last) {
+        if (first <= last) {
+            addPair(pairs, first, last);
+        }
+    }
+
+    /**
+     * Adds the specified character to this set if it is not already
+     * present.  If this set already contains the specified character,
+     * the call leaves this set unchanged.
+     */
+    public final void add(char c) {
+        add(c, c);
+    }
+
+    /**
+     * Removes the specified range from this set if it is present.
+     * The set will not contain the specified range once the call
+     * returns.  If <code>last > first</code> then an empty range is
+     * removed, leaving the set unchanged.
+     * 
+     * @param first first character, inclusive, of range to be removed
+     * from this set.
+     * @param last last character, inclusive, of range to be removed
+     * from this set.
+     */
+    public void remove(char first, char last) {
+        if (first <= last) {
+            removePair(pairs, first, last);
+        }
+    }
+
+    /**
+     * Removes the specified character from this set if it is present.
+     * The set will not contain the specified range once the call
+     * returns.
+     */
+    public final void remove(char c) {
+        remove(c, c);
+    }
+
+    /**
+     * Returns <tt>true</tt> if the specified set is a <i>subset</i>
+     * of this set.
+     *
+     * @param c set to be checked for containment in this set.
+     * @return <tt>true</tt> if this set contains all of the elements of the
+     * 	       specified set.
+     */
+    public boolean containsAll(UnicodeSet c) {
+        // The specified set is a subset if all of its pairs are contained
+        // in this set.
+        int i = 1;
+        for (int j=0; j<c.pairs.length(); j+=2) {
+            char last = c.pairs.charAt(j+1);
+            // Set i to the end of the smallest range such that its
+            // end point >= last, or pairs.length() if no such range
+            // exists.
+            while (i<pairs.length() && last>pairs.charAt(i)) i+=2;
+            if (i>pairs.length() || c.pairs.charAt(j) < pairs.charAt(i-1)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Adds all of the elements in the specified set to this set if
+     * they're not already present.  This operation effectively
+     * modifies this set so that its value is the <i>union</i> of the two
+     * sets.  The behavior of this operation is unspecified if the specified
+     * collection is modified while the operation is in progress.
+     *
+     * @param c set whose elements are to be added to this set.
+     * @see #add(char, char)
+     */
+    public void addAll(UnicodeSet c) {
+        doUnion(pairs, c.pairs.toString());
+    }
+
+    /**
+     * Retains only the elements in this set that are contained in the
+     * specified set.  In other words, removes from this set all of
+     * its elements that are not contained in the specified set.  This
+     * operation effectively modifies this set so that its value is
+     * the <i>intersection</i> of the two sets.
+     *
+     * @param c set that defines which elements this set will retain.
+     */
+    public void retainAll(UnicodeSet c) {
+        doIntersection(pairs, c.pairs.toString());
+    }
+
+    /**
+     * Removes from this set all of its elements that are contained in the
+     * specified set.  This operation effectively modifies this
+     * set so that its value is the <i>asymmetric set difference</i> of
+     * the two sets.
+     *
+     * @param c set that defines which elements will be removed from
+     *          this set.
+     */
+    public void removeAll(UnicodeSet c) {
+        doDifference(pairs, c.pairs.toString());
+    }
+
+    /**
+     * Inverts this set.  This operation modifies this set so that
+     * its value is its complement.  This is equivalent to the pseudo code:
+     * <code>this = new UnicodeSet("[\u0000-\uFFFF]").removeAll(this)</code>.
+     */
+    public void complement() {
+        doComplement(pairs);
+    }
+
+    /**
+     * Removes all of the elements from this set.  This set will be
+     * empty after this call returns.
+     */
+    public void clear() {
+        pairs.setLength(0);
+    }
+
+    /**
+     * Compares the specified object with this set for equality.  Returns
+     * <tt>true</tt> if the specified object is also a set, the two sets
+     * have the same size, and every member of the specified set is
+     * contained in this set (or equivalently, every member of this set is
+     * contained in the specified set).
+     *
+     * @param o Object to be compared for equality with this set.
+     * @return <tt>true</tt> if the specified Object is equal to this set.
+     */
+    public boolean equals(Object o) {
+        return o instanceof UnicodeSet &&
+            pairs.equals(((UnicodeSet)o).pairs);
+    }
+
+    /**
+     * Returns the hash code value for this set.
+     *
+     * @return the hash code value for this set.
+     * @see Object#hashCode()
+     */
+    public int hashCode() {
+        return pairs.hashCode();
+    }
+
+    //----------------------------------------------------------------
+    // Implementation: Pattern parsing
+    //----------------------------------------------------------------
+
+    /**
+     * Parses the given pattern, starting at the given position.  The
+     * character at pattern.charAt(pos.getIndex()) must be '[', or the
+     * parse fails.  Parsing continues until the corresponding closing
+     * ']'.  If a syntax error is encountered between the opening and
+     * closing brace, the parse fails.  Upon return from a successful
+     * parse, the ParsePosition is updated to point to the character
+     * following the closing ']', and a StringBuffer containing a
+     * pairs list for the parsed pattern is returned.  This method calls
+     * itself recursively to parse embedded subpatterns.
+     *
+     * @param pattern the string containing the pattern to be parsed.
+     * The portion of the string from pos.getIndex(), which must be a
+     * '[', to the corresponding closing ']', is parsed.
+     * @param pos upon entry, the position at which to being parsing.
+     * The character at pattern.charAt(pos.getIndex()) must be a '['.
+     * Upon return from a successful parse, pos.getIndex() is either
+     * the character after the closing ']' of the parsed pattern, or
+     * pattern.length() if the closing ']' is the last character of
+     * the pattern string.
+     * @return a StringBuffer containing a pairs list for the parsed
+     * substring of <code>pattern</code>
+     * @exception IllegalArgumentException if the parse fails.
+     */
+    private static StringBuffer parse(String pattern, ParsePosition pos) {
+
+        boolean invert = false;
+        StringBuffer pairsBuf = new StringBuffer();
+
+        /**
+         * Nodes:  0 - idle, waiting for '['
+         *        10 - like 11, but immediately after "[" or "[^"
+         *        11 - awaiting x, "]", "[...]", or "[:...:]"
+         *        21 - after x
+         *        23 - after x-
+         * 
+         * The parsing state machine moves from node 0 through zero or more
+         * other nodes back to node 0, in a successful parse.
+         */
+        int node = 0;
+        char first = 0;
+        int i;
+
+        /**
+         * This loop iterates over the characters in the pattern.  We
+         * start at the position specified by pos.  We exit the loop
+         * when either a matching closing ']' is seen, or we read all
+         * characters of the pattern.
+         */
+        for (i=pos.getIndex(); i<pattern.length(); ++i) {
+            char c = pattern.charAt(i);
+
+            /**
+             * Handle escapes here.  If a character is escaped, then
+             * it assumes its literal value.  This is true for all
+             * characters, both special characters and characters with
+             * no special meaning.  We also interpret '\\uxxxx' Unicode
+             * escapes here.
+             */
+            boolean isLiteral = false;
+            if (c == '\\') {
+                ++i;
+                if (i < pattern.length()) {
+                    c = pattern.charAt(i);
+                    isLiteral = true;
+                    if (c == 'u') {
+                        if ((i+4) >= pattern.length()) {
+                            throw new IllegalArgumentException("Invalid \\u escape");
+                        }
+                        c = '\u0000';
+                        for (int j=(++i)+4; i<j; ++i) { // [sic]
+                            int digit = Character.digit(pattern.charAt(i), 16);
+                            if (digit<0) {
+                                throw new IllegalArgumentException("Invalid \\u escape");
+                            }
+                            c = (char) ((c << 4) | digit);
+                        }
+                        --i; // Move i back to last parsed character
+                    }
+                } else {
+                    throw new IllegalArgumentException("Trailing '\\'");
+                }
+            }
+
+            /**
+             * Within this loop, we handle each of the four
+             * conditions: '[', ']', '-', other.  The first three
+             * characters must not be escaped.
+             */
+
+            /**
+             * An opening bracket indicates either the first bracket
+             * of the entire subpattern we are parsing, in which case
+             * we are in node 0 and move into node 10.  We also check
+             * for an immediately following '^', indicating the
+             * complement of the following pattern.  ('^' is any other
+             * position has no special meaning.)  If we are not in
+             * node 0, '[' represents a nested subpattern that must be
+             * recursively parsed and checked for following operators
+             * ('&' or '|').  If two nested subpatterns follow one
+             * another with no operator, their union is formed, just
+             * as with any other elements that follow one another
+             * without intervening operator.  The other thing we
+             * handle here is the syntax "[:Xx:]" or "[:X:]" that
+             * indicates a Unicode category or supercategory.
+             */
+            if (!isLiteral && c == '[') {
+                boolean parseOp = false;
+                char d = charAfter(pattern, i);
+                // "[:...:]" represents a character category
+                if (d == ':') {
+                    if (node == 23) {
+                        throw new IllegalArgumentException("Unexpected \"[:\"");
+                    }
+                    if (node == 21) {
+                        addPair(pairsBuf, first, first);
+                        node = 11;
+                    }
+                    i += 2;
+                    int j = pattern.indexOf(":]", i);
+                    if (j < 0) {
+                        throw new IllegalArgumentException("Missing \":]\"");
+                    }
+                    doUnion(pairsBuf,
+                            getCategoryPairs(pattern.substring(i, j)));
+                    i = j+1;
+                    if (node == 10) {
+                        node = 11;
+                        parseOp = true;
+                    } else if (node == 0) {
+                        break;
+                    }
+                } else {
+                    if (node == 0) {
+                        node = 10;
+                        if (d == '^') {
+                            invert = true;
+                            ++i;
+                        }
+                    } else {
+                        // Nested '['
+                        pos.setIndex(i);
+                        doUnion(pairsBuf, parse(pattern, pos)
+                                .toString());
+                        i = pos.getIndex() - 1; // Subtract 1 to point at ']'
+                        parseOp = true;
+                    }
+                }
+                /**
+                 * parseOp is true after "[:...:]" or a nested
+                 * "[...]".  It is false only after the final closing
+                 * ']'.  If parseOp is true, we look past the closing
+                 * ']' to see if we have an operator character.  If
+                 * so, we parse the subsequent "[...]" recursively,
+                 * then perform the operation.  We do this in a loop
+                 * until there are no more operators.  Note that this
+                 * means the operators have equal precedence and are
+                 * bound left-to-right.
+                 */
+                if (parseOp) {
+                    for (;;) {
+                        // Is the next character an operator?
+                        char op = charAfter(pattern, i);
+                        if (op == '-' || op == '&') {
+                            pos.setIndex(i+2); // Add 2 to point AFTER op
+                            String rhs = parse(pattern, pos).toString();
+                            if (op == '-') {
+                                doDifference(pairsBuf, rhs);
+                            } else if (op == '&') {
+                                doIntersection(pairsBuf, rhs);
+                            }
+                            i = pos.getIndex() - 1; // - 1 to point at ']'
+                        } else {
+                            break;
+                        }
+                    }
+                }          
+            }
+
+            /**
+             * A closing bracket can only be a closing bracket for
+             * "[...]", since the closing bracket for "[:...:]" is
+             * taken care of when the initial "[:" is seen.  When we
+             * see a closing bracket, we then know, if we were in node
+             * 21 (after x) or 23 (after x-) that nothing more is
+             * coming, and we add the last character(s) we saw to the
+             * set.  Note that a trailing '-' assumes its literal
+             * meaning, just as a leading '-' after "[" or "[^".
+             */
+            else if (!isLiteral && c == ']') {
+                if (node == 0) {
+                    throw new IllegalArgumentException("Unexpected ']'");
+                }
+                if (node == 21 || node == 23) {
+                    addPair(pairsBuf, first, first);
+                    if (node == 23) {
+                        addPair(pairsBuf, '-', '-');
+                    }
+                }
+                node = 0;
+                break;
+            }
+
+            /**
+             * '-' has the following interpretations: 1. Within
+             * "[...]", between two letters, it indicates a range.
+             * 2. Between two nested bracket patterns, "[[...]-[...]",
+             * it indicates asymmetric difference.  3. At the start of
+             * a bracket pattern, "[-...]", "[^-...]", it indicates
+             * the literal character '-'.  4. At the end of a bracket
+             * pattern, "[...-]", it indicates the literal character
+             * '-'.
+             *
+             * We handle cases 1 and 3 here.  Cases 2 and 4 are
+             * handled in the ']' parsing code.
+             */
+            else if (!isLiteral && c == '-') {
+                if (node == 10) {
+                    addPair(pairsBuf, c, c); // Handle "[-...]", "[^-...]"
+                } else if (node == 21) {
+                    node = 23;
+                } else {
+                    throw new IllegalArgumentException("Unexpected '-'");
+                }
+            } 
+
+            /**
+             * If we fall through to this point, we have a literal
+             * character, either one that has been escaped with a
+             * backslash, escaped with a backslash u, or that isn't
+             * a special '[', ']', or '-'.
+             *
+             * Literals can either start a range "x-...", end a range,
+             * "...-x", or indicate a single character "x".
+             */
+            else {
+                if (node == 10 || node == 11) {
+                    first = c;
+                    node = 21;
+                } else if (node == 21) {
+                    addPair(pairsBuf, first, first);
+                    first = c;
+                    node = 21;
+                } else if (node == 23) {
+                    if (c < first) {
+                        throw new IllegalArgumentException("Bad range");
+                    }
+                    addPair(pairsBuf, first, c);
+                    node = 11;
+                } else {
+                    throw new IllegalArgumentException("Expected '[', got '" + c + '\'');
+                }
+            }
+        }
+
+        if (node != 0) {
+            throw new IllegalArgumentException("Missing ']'");
+        }
+
+        /**
+         * i indexes the last character we parsed or is
+         * pattern.length().  In the latter case, the node will not be
+         * zero, since we have run off the end without finding a
+         * closing ']'.  Therefore, the above statement will have
+         * thrown an exception, and we'll never get here.  If we get
+         * here, we know i < pattern.length(), and we set the
+         * ParsePosition to the next character to be parsed.
+         */
+        pos.setIndex(i+1);
+
+        /**
+         * If we saw a '^' after the initial '[' of this pattern, then
+         * perform the complement.  (Inversion after '[:' is handled
+         * elsewhere.)
+         */
+        if (invert) {
+            doComplement(pairsBuf);
+        }
+
+        return pairsBuf;
+    }
+
+    //----------------------------------------------------------------
+    // Implementation: Efficient in-place union & difference
+    //----------------------------------------------------------------
+
+    /**
+     * Performs a union operation: adds the range 'c'-'d' to the given
+     * pairs list.  The pairs list is modified in place.  The result
+     * is normalized (in order and as short as possible).  For
+     * example, addPair("am", 'l', 'q') => "aq".  addPair("ampz", 'n',
+     * 'o') => "az".
+     */
+    private static void addPair(StringBuffer pairs, char c, char d) {
+        char a = 0;
+        char b = 0;
+        for (int i=0; i<pairs.length(); i+=2) {
+            char e = pairs.charAt(i);
+            char f = pairs.charAt(i+1);
+            if (e <= (d+1) && c <= (f+1)) {
+                // Merge with this range
+                f = (char) Math.max(d, f);
+
+                // Check to see if we need to merge with the
+                // subsequent range also.  This happens if we have
+                // "abdf" and are merging in "cc".  We only need to
+                // check on the right side -- never on the left.
+                if ((i+2) < pairs.length() &&
+                    pairs.charAt(i+2) == (f+1)) {
+                    f = pairs.charAt(i+3);
+                    stringBufferDelete(pairs, i+2, i+4);
+                }
+                pairs.setCharAt(i, (char) Math.min(c, e));
+                pairs.setCharAt(i+1, f);
+                return;
+            } else if ((b+1) < c && (d+1) < e) {
+                // Insert before this range
+                pairs.insert(i, new char[] { c, d });
+                return;
+            }
+            a = e;
+            b = f;
+        }
+        // If nothing else, fall through and append this new range to
+        // the end.
+        pairs.append(c).append(d);
+    }
+
+    /**
+     * Performs an asymmetric difference: removes the range 'c'-'d'
+     * from the pairs list.  The pairs list is modified in place.  The
+     * result is normalized (in order and as short as possible).  For
+     * example, removePair("am", 'l', 'q') => "ak".
+     * removePair("ampz", 'l', 'q') => "akrz".
+     */
+    private static void removePair(StringBuffer pairs, char c, char d) {
+        // Iterate over pairs until we find a pair that overlaps
+        // with the given range.
+        for (int i=0; i<pairs.length(); i+=2) {
+            char b = pairs.charAt(i+1);
+            if (b < c) {
+                // Range at i is entirely before the given range,
+                // since we have a-b < c-d.  No overlap yet...keep
+                // iterating.
+                continue;
+            }
+            char a = pairs.charAt(i);
+            if (d < a) {
+                // Range at i is entirely after the given range; c-d <
+                // a-b.  Since ranges are in order, nothing else will
+                // overlap.
+                break;
+            }
+            // Once we get here, we know c <= b and d >= a.
+            // rangeEdited is set to true if we have modified the
+            // range a-b (the range at i) in place.
+            boolean rangeEdited = false;
+            if (c > a) {
+                // If c is after a and before b, then we have overlap
+                // of this sort: a--c==b--d or a--c==d--b, where a-b
+                // and c-d are the ranges of interest.  We need to
+                // add the range a,c-1.
+                pairs.setCharAt(i+1, (char)(c-1));
+                // i is already a
+                rangeEdited = true;
+            }
+            if (d < b) {
+                // If d is after a and before b, we overlap like this:
+                // c--a==d--b or a--c==d--b, where a-b is the range at
+                // i and c-d is the range being removed.  We need to
+                // add the range d+1,b.
+                if (rangeEdited) {
+                    pairs.insert(i+2, new char[] { (char)(d+1), b });
+                    i += 2;
+                } else {
+                    pairs.setCharAt(i, (char)(d+1));
+                    // i+1 is already b
+                    rangeEdited = true;
+                }
+            }
+            if (!rangeEdited) {
+                // If we didn't add any ranges, that means the entire
+                // range a-b must be deleted, since we have
+                // c--a==b--d.
+                stringBufferDelete(pairs, i, i+2);
+                i -= 2;
+            }
+        }
+    }
+
+    //----------------------------------------------------------------
+    // Implementation: Fundamental operators
+    //----------------------------------------------------------------
+
+    /**
+     * Changes the pairs list to represent the complement of the set it
+     * currently represents.  The pairs list will be normalized (in
+     * order and in shortest possible form) if the original pairs list
+     * was normalized.
+     */
+    private static void doComplement(StringBuffer pairs) {
+        if (pairs.length() == 0) {
+            pairs.append('\u0000').append('\uffff');
+            return;
+        }
+
+        // Change each end to a start and each start to an end of the
+        // gaps between the ranges.  That is, 3-7 9-12 becomes x-2 8-8
+        // 13-x, where 'x' represents a range that must now be fixed
+        // up.
+        for (int i=0; i<pairs.length(); i+=2) {
+            pairs.setCharAt(i,   (char) (pairs.charAt(i)   - 1));
+            pairs.setCharAt(i+1, (char) (pairs.charAt(i+1) + 1));
+        }
+
+        // Fix up the initial range, either by adding a start point of
+        // U+0000, or by deleting the range altogether, if the
+        // original range was U+0000 - x.
+        if (pairs.charAt(0) == '\uFFFF') {
+            stringBufferDelete(pairs, 0, 1);
+        } else {
+            pairs.insert(0, '\u0000');
+        }
+
+        // Fix up the final range, either by adding an end point of
+        // U+FFFF, or by deleting the range altogether, if the
+        // original range was x - U+FFFF.
+        if (pairs.charAt(pairs.length() - 1) == '\u0000') {
+            pairs.setLength(pairs.length() - 1);
+        } else {
+            pairs.append('\uFFFF');
+        }
+    }
+
+    /**
+     * Given two pairs lists, changes the first in place to represent
+     * the union of the two sets.
+     *
+     * This implementation format was stolen from Richard Gillam's
+     * CharSet class.
+     */
+    private static void doUnion(StringBuffer pairs, String c2) {
+        StringBuffer result = new StringBuffer();
+        String c1 = pairs.toString();
+
+        int i = 0;
+        int j = 0;
+
+        // consider all the characters in both strings
+        while (i < c1.length() && j < c2.length()) {
+            char ub;
+            
+            // the first character in the result is the lower of the
+            // starting characters of the two strings, and "ub" gets
+            // set to the upper bound of that range
+            if (c1.charAt(i) < c2.charAt(j)) {
+                result.append(c1.charAt(i));
+                ub = c1.charAt(++i);
+            }
+            else {
+                result.append(c2.charAt(j));
+                ub = c2.charAt(++j);
+            }
+            
+            // for as long as one of our two pointers is pointing to a range's
+            // end point, or i is pointing to a character that is less than
+            // "ub" plus one (the "plus one" stitches touching ranges together)...
+            while (i % 2 == 1 || j % 2 == 1 || (i < c1.length() && c1.charAt(i)
+                            <= ub + 1)) {
+                // advance i to the first character that is greater than
+                // "ub" plus one
+                while (i < c1.length() && c1.charAt(i) <= ub + 1)
+                    ++i;
+                    
+                // if i points to the endpoint of a range, update "ub"
+                // to that character, or if i points to the start of
+                // a range and the endpoint of the preceding range is
+                // greater than "ub", update "up" to _that_ character
+                if (i % 2 == 1)
+                    ub = c1.charAt(i);
+                else if (i > 0 && c1.charAt(i - 1) > ub)
+                    ub = c1.charAt(i - 1);
+
+                // now advance j to the first character that is greater
+                // that "ub" plus one
+                while (j < c2.length() && c2.charAt(j) <= ub + 1)
+                    ++j;
+                    
+                // if j points to the endpoint of a range, update "ub"
+                // to that character, or if j points to the start of
+                // a range and the endpoint of the preceding range is
+                // greater than "ub", update "up" to _that_ character
+                if (j % 2 == 1)
+                    ub = c2.charAt(j);
+                else if (j > 0 && c2.charAt(j - 1) > ub)
+                    ub = c2.charAt(j - 1);
+            }
+            // when we finally fall out of this loop, we will have stitched
+            // together a series of ranges that overlap or touch, i and j
+            // will both point to starting points of ranges, and "ub" will
+            // be the endpoint of the range we're working on.  Write "ub"
+            // to the result
+            result.append(ub);
+            
+        // loop back around to create the next range in the result
+        }
+        
+        // we fall out to here when we've exhausted all the characters in
+        // one of the operands.  We can append all of the remaining characters
+        // in the other operand without doing any extra work.
+        if (i < c1.length())
+            result.append(c1.substring(i));
+        if (j < c2.length())
+            result.append(c2.substring(j));
+
+        pairs.setLength(0);
+        pairs.append(result.toString());
+    }
+
+    /**
+     * Given two pairs lists, changes the first in place to represent
+     * the asymmetric difference of the two sets.
+     */
+    private static void doDifference(StringBuffer pairs, String pairs2) {
+        StringBuffer p2 = new StringBuffer(pairs2);
+        doComplement(p2);
+        doIntersection(pairs, p2.toString());
+    }
+
+    /**
+     * Given two pairs lists, changes the first in place to represent
+     * the intersection of the two sets.
+     *
+     * This implementation format was stolen from Richard Gillam's
+     * CharSet class.
+     */
+    private static void doIntersection(StringBuffer pairs, String c2) {
+        StringBuffer result = new StringBuffer();
+        String c1 = pairs.toString();
+
+        int i = 0;
+        int j = 0;
+        int oldI;
+        int oldJ;
+
+        // iterate until we've exhausted one of the operands
+        while (i < c1.length() && j < c2.length()) {
+            
+            // advance j until it points to a character that is larger than
+            // the one i points to.  If this is the beginning of a one-
+            // character range, advance j to point to the end
+            if (i < c1.length() && i % 2 == 0) {
+                while (j < c2.length() && c2.charAt(j) < c1.charAt(i))
+                    ++j;
+                if (j < c2.length() && j % 2 == 0 && c2.charAt(j) == c1.charAt(i))
+                    ++j;
+            }
+
+            // if j points to the endpoint of a range, save the current
+            // value of i, then advance i until it reaches a character
+            // which is larger than the character pointed at
+            // by j.  All of the characters we've advanced over (except
+            // the one currently pointed to by i) are added to the result
+            oldI = i;
+            while (j % 2 == 1 && i < c1.length() && c1.charAt(i) <= c2.charAt(j))
+                ++i;
+            result.append(c1.substring(oldI, i));
+
+            // if i points to the endpoint of a range, save the current
+            // value of j, then advance j until it reaches a character
+            // which is larger than the character pointed at
+            // by i.  All of the characters we've advanced over (except
+            // the one currently pointed to by i) are added to the result
+            oldJ = j;
+            while (i % 2 == 1 && j < c2.length() && c2.charAt(j) <= c1.charAt(i))
+                ++j;
+            result.append(c2.substring(oldJ, j));
+
+            // advance i until it points to a character larger than j
+            // If it points at the beginning of a one-character range,
+            // advance it to the end of that range
+            if (j < c2.length() && j % 2 == 0) {
+                while (i < c1.length() && c1.charAt(i) < c2.charAt(j))
+                    ++i;
+                if (i < c1.length() && i % 2 == 0 && c2.charAt(j) == c1.charAt(i))
+                    ++i;
+            }
+        }
+
+        pairs.setLength(0);
+        pairs.append(result.toString());
+    }
+
+    //----------------------------------------------------------------
+    // Implementation: Generation of pairs for Unicode categories
+    //----------------------------------------------------------------
+    
+    /**
+     * Returns a pairs string for the given category, given its name.
+     * The category name must be either a two-letter name, such as
+     * "Lu", or a one letter name, such as "L".  One-letter names
+     * indicate the logical union of all two-letter names that start
+     * with that letter.  Case is significant.  If the name starts
+     * with the character '^' then the complement of the given
+     * character set is returned.
+     *
+     * Although individual categories such as "Lu" are cached, we do
+     * not currently cache single-letter categories such as "L" or
+     * complements such as "^Lu" or "^L".  It would be easy to cache
+     * these as well in a hashtable should the need arise.
+     */
+    private static String getCategoryPairs(String catName) {
+        boolean invert = (catName.length() > 1 &&
+                          catName.charAt(0) == '^');
+        if (invert) {
+            catName = catName.substring(1);
+        }
+
+        StringBuffer cat = null;
+        
+        // if we have two characters, search the category map for that
+        // code and either construct and return a UnicodeSet from the
+        // data in the category map or throw an exception
+        if (catName.length() == 2) {
+            int i = CATEGORY_NAMES.indexOf(catName);
+            if (i>=0 && i%2==0) {
+                i /= 2;
+                if (i != UNSUPPORTED_CATEGORY) {
+                    String pairs = getCategoryPairs(i);
+                    if (!invert) {
+                        return pairs;
+                    }
+                    cat = new StringBuffer(pairs);
+                }
+            }
+        } else if (catName.length() == 1) {
+            // if we have one character, search the category map for
+            // codes beginning with that letter, and union together
+            // all of the matching sets that we find (or throw an
+            // exception if there are no matches)
+            for (int i=0; i<CATEGORY_COUNT; ++i) {
+                if (i != UNSUPPORTED_CATEGORY &&
+                    CATEGORY_NAMES.charAt(2*i) == catName.charAt(0)) {
+                    String pairs = getCategoryPairs(i);
+                    if (cat == null) {
+                        cat = new StringBuffer(pairs);
+                    } else {
+                        doUnion(cat, pairs);
+                    }
+                }
+            }
+        }
+
+        if (cat == null) {
+            throw new IllegalArgumentException("Bad category");            
+        }
+
+        if (invert) {
+            doComplement(cat);
+        }
+        return cat.toString();
+    }
+
+    /**
+     * Returns a pairs string for the given category.  This string is
+     * cached and returned again if this method is called again with
+     * the same parameter.
+     */
+    private static String getCategoryPairs(int cat) {
+        if (CATEGORY_PAIRS_CACHE[cat] == null) {
+            // Walk through all Unicode characters, noting the start
+            // and end of each range for which Character.getType(c)
+            // returns the given category integer.  Since we are
+            // iterating in order, we can simply append the resulting
+            // ranges to the pairs string.
+            StringBuffer pairs = new StringBuffer();
+            int first = -1;
+            int last = -2;
+            for (int i=0; i<=0xFFFF; ++i) {
+                if (Character.getType((char)i) == cat) {
+                    if ((last+1) == i) {
+                        last = i;
+                    } else {
+                        if (first >= 0) {
+                            pairs.append((char)first).append((char)last);
+                        }
+                        first = last = i;
+                    }
+                }
+            }
+            if (first >= 0) {
+                pairs.append((char)first).append((char)last);
+            }
+            CATEGORY_PAIRS_CACHE[cat] = pairs.toString();
+        }
+        return CATEGORY_PAIRS_CACHE[cat];
+    }
+
+    //----------------------------------------------------------------
+    // Implementation: Utility methods
+    //----------------------------------------------------------------
+
+    /**
+     * Returns the character after the given position, or '\uFFFF' if
+     * there is none.
+
+     */
+    private static final char charAfter(String str, int i) {
+        return ((++i) < str.length()) ? str.charAt(i) : '\uFFFF';
+    }
+    
+    /**
+     * Deletes a range of character from a StringBuffer, from start to
+     * limit-1.  This is not part of JDK 1.1 StringBuffer, but is
+     * present in Java 2.
+     * @param start inclusive start of range
+     * @param limit exclusive end of range
+     */
+    private static void stringBufferDelete(StringBuffer buf,
+                                           int start, int limit) {
+        // In Java 2 just use:
+        //   buf.delete(start, limit);
+        char[] chars = null;
+        if (buf.length() > limit) {
+            chars = new char[buf.length() - limit];
+            buf.getChars(limit, buf.length(), chars, 0);
+        }
+        buf.setLength(start);
+        if (chars != null) {
+            buf.append(chars);
+        }
+    }
+}
diff --git a/icu4j/src/com/ibm/icu/text/UnicodeToHexTransliterator.java b/icu4j/src/com/ibm/icu/text/UnicodeToHexTransliterator.java
new file mode 100755
index 00000000000..1e688f65fa9
--- /dev/null
+++ b/icu4j/src/com/ibm/icu/text/UnicodeToHexTransliterator.java
@@ -0,0 +1,172 @@
+package com.ibm.text;
+import java.util.*;
+
+/**
+ * A transliterator that converts from Unicode characters to 
+ * hexadecimal Unicode escape sequences.  It outputs a
+ * prefix specified in the constructor and optionally converts the hex
+ * digits to uppercase.
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: UnicodeToHexTransliterator.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class UnicodeToHexTransliterator extends Transliterator {
+
+    /**
+     * Package accessible ID for this transliterator.
+     */
+    static String _ID = "Unicode-Hex";
+
+    private String prefix;
+
+    private boolean uppercase;
+
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    /**
+     * Constructs a transliterator.
+     * @param prefix the string that will precede the four hex
+     * digits for UNICODE_HEX transliterators.  Ignored
+     * if direction is HEX_UNICODE.
+     * @param uppercase if true, the four hex digits will be
+     * converted to uppercase; otherwise they will be lowercase.
+     * Ignored if direction is HEX_UNICODE.
+     */
+    public UnicodeToHexTransliterator(String prefix, boolean uppercase,
+                                      UnicodeFilter filter) {
+        super(_ID, filter);
+        this.prefix = prefix;
+        this.uppercase = uppercase;
+    }
+
+    /**
+     * Constructs a transliterator with the default prefix "&#092;u"
+     * that outputs uppercase hex digits.
+     */
+    public UnicodeToHexTransliterator() {
+        this("\\u", true, null);
+    }
+
+    /**
+     * Returns the string that precedes the four hex digits.
+     * @return prefix string
+     */
+    public String getPrefix() {
+        return prefix;
+    }
+
+    /**
+     * Sets the string that precedes the four hex digits.
+     *
+     * <p>Callers must take care if a transliterator is in use by
+     * multiple threads.  The prefix should not be changed by one
+     * thread while another thread may be transliterating.
+     * @param prefix prefix string
+     */
+    public void setPrefix(String prefix) {
+        this.prefix = prefix;
+    }
+
+    /**
+     * Returns true if this transliterator outputs uppercase hex digits.
+     */
+    public boolean isUppercase() {
+        return uppercase;
+    }
+
+    /**
+     * Sets if this transliterator outputs uppercase hex digits.
+     *
+     * <p>Callers must take care if a transliterator is in use by
+     * multiple threads.  The uppercase mode should not be changed by
+     * one thread while another thread may be transliterating.
+     * @param outputUppercase if true, then this transliterator
+     * outputs uppercase hex digits.
+     */
+    public void setUppercase(boolean outputUppercase) {
+        uppercase = outputUppercase;
+    }
+
+    /**
+     * Transliterates a segment of a string.  <code>Transliterator</code> API.
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @return the new limit index
+     */
+    public int transliterate(Replaceable text, int start, int limit) {
+        int[] offsets = { start, limit, start };
+        handleKeyboardTransliterate(text, offsets);
+        return offsets[LIMIT];
+    }
+
+    /**
+     * Implements {@link Transliterator#handleKeyboardTransliterate}.
+     */
+    protected void handleKeyboardTransliterate(Replaceable text,
+                                               int[] offsets) {
+        /**
+         * Performs transliteration changing all characters to
+         * Unicode hexadecimal escapes.  For example, '@' -> "U+0040",
+         * assuming the prefix is "U+". 
+         */
+        int cursor = offsets[CURSOR];
+        int limit = offsets[LIMIT];
+
+        UnicodeFilter filter = getFilter();
+
+    loop:
+        while (cursor < limit) {
+            char c = text.charAt(cursor);
+            if (filter != null && !filter.isIn(c)) {
+                ++cursor;
+                continue;
+            }
+            String hex = hex(c);
+            text.replace(cursor, cursor+1, hex);
+            int len = hex.length();
+            cursor += len; // Advance cursor by 1 and adjust for new text
+            --len;
+            limit += len;
+        }
+
+        offsets[LIMIT] = limit;
+        offsets[CURSOR] = cursor;
+    }
+
+    /**
+     * Return the length of the longest context required by this transliterator.
+     * This is <em>preceding</em> context.
+     * @param direction either <code>FORWARD</code> or <code>REVERSE</code>
+     * @return maximum number of preceding context characters this
+     * transliterator needs to examine
+     */
+    protected int getMaximumContextLength() {
+        return 0;
+    }
+
+    /**
+     * Form escape sequence.
+     */
+    private final String hex(char c) {
+        StringBuffer buf = new StringBuffer();
+        buf.append(prefix);
+        if (c < 0x1000) {
+            buf.append('0');
+            if (c < 0x100) {
+                buf.append('0');
+                if (c < 0x10) {
+                    buf.append('0');
+                }
+            }
+        } 
+        String h = Integer.toHexString(c);
+        buf.append(uppercase ? h.toUpperCase() : h);
+        return buf.toString();
+    }
+}
diff --git a/icu4j/src/com/ibm/test/translit/TransliteratorTest.java b/icu4j/src/com/ibm/test/translit/TransliteratorTest.java
new file mode 100755
index 00000000000..96433f64a26
--- /dev/null
+++ b/icu4j/src/com/ibm/test/translit/TransliteratorTest.java
@@ -0,0 +1,763 @@
+import com.ibm.text.*;
+import java.text.*;
+import java.util.*;
+
+/**
+ * @test
+ * @summary General test of Transliterator
+ */
+public class TransliteratorTest extends IntlTest {
+
+    public static void main(String[] args) throws Exception {
+        new TransliteratorTest().run(args);
+    }
+
+    /**
+     * A CommonPoint legacy round-trip test for the Kana transliterator.
+     */
+//    public void TestKanaRoundTrip() {
+//        Transliterator t = Transliterator.getInstance("Kana");
+//        StringTokenizer tok = new StringTokenizer(KANA_RT_DATA);
+//        while (tok.hasMoreTokens()) {
+//            String str = tok.nextToken();
+//            ReplaceableString tmp = new ReplaceableString(str);
+//            t.transliterate(tmp, Transliterator.FORWARD);
+//
+//            str = tmp.toString();
+//            tmp = new ReplaceableString(str);
+//            t.transliterate(tmp, Transliterator.REVERSE);
+//            t.transliterate(tmp, Transliterator.FORWARD);
+//            if (!tmp.toString().equals(str)) {
+//                tmp = new ReplaceableString(str);
+//                t.transliterate(tmp, Transliterator.REVERSE);
+//                String a = tmp.toString();
+//                t.transliterate(tmp, Transliterator.FORWARD);
+//                errln("FAIL: " + escape(str) + " -> " +
+//                      escape(a) + " -> " + escape(tmp.toString()));
+//            }
+//        }
+//    }
+
+    public void TestInstantiation() {
+        long ms = System.currentTimeMillis();
+        String ID;
+        for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
+            ID = (String) e.nextElement();
+            try {
+                Transliterator t = Transliterator.getInstance(ID);
+                // We should get a new instance if we try again
+                Transliterator t2 = Transliterator.getInstance(ID);
+                if (t != t2) {
+                    logln(ID + ":" + t);
+                } else {
+                    errln("FAIL: " + ID + " returned identical instances");
+                }
+            } catch (IllegalArgumentException ex) {
+                errln("FAIL: " + ID);
+                throw ex;
+            }
+        }
+
+        // Now test the failure path
+        try {
+            ID = "<Not a valid Transliterator ID>";
+            Transliterator t = Transliterator.getInstance(ID);
+            errln("FAIL: " + ID + " returned " + t);
+        } catch (IllegalArgumentException ex) {
+            logln("OK: Bogus ID handled properly");
+        }
+        
+        ms = System.currentTimeMillis() - ms;
+        logln("Elapsed time: " + ms + " ms");
+    }
+
+    public void TestSimpleRules() {
+        /* Example: rules 1. ab>x|y
+         *                2. yc>z
+         *
+         * []|eabcd  start - no match, copy e to tranlated buffer
+         * [e]|abcd  match rule 1 - copy output & adjust cursor
+         * [ex|y]cd  match rule 2 - copy output & adjust cursor
+         * [exz]|d   no match, copy d to transliterated buffer
+         * [exzd]|   done
+         */
+        expect("ab>x|y\n" +
+               "yc>z",
+               "eabcd", "exzd");
+
+        /* Another set of rules:
+         *    1. ab>x|yzacw
+         *    2. za>q
+         *    3. qc>r
+         *    4. cw>n
+         *
+         * []|ab       Rule 1
+         * [x|yzacw]   No match
+         * [xy|zacw]   Rule 2
+         * [xyq|cw]    Rule 4
+         * [xyqn]|     Done
+         */
+        expect("ab>x|yzacw\n" +
+               "za>q\n" +
+               "qc>r\n" +
+               "cw>n",
+               "ab", "xyqn");
+
+        /* Test categories
+         */
+        Transliterator t = new RuleBasedTransliterator("<ID>",
+                                                       "dummy=\uE100\n" +
+                                                       "vowel=[aeiouAEIOU]\n" +
+                                                       "lu=[:Lu:]\n" +
+                                                       "{vowel}[{lu}>!\n" +
+                                                       "{vowel}>&\n" +
+                                                       "!]{lu}>^\n" +
+                                                       "{lu}>*\n" +
+                                                       "a>ERROR");
+        expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
+    }
+
+    // Restore this test if/when it's been deciphered.  In general,
+    // tests that depend on a specific tranliterator are subject
+    // to the same fragility as tests that depend on resource data.
+
+//    public void TestKana() {
+//        String DATA[] = {
+//            "a", "\u3042",
+//            "A", "\u30A2",
+//            "aA", "\u3042\u30A2",
+//            "aaaa", "\u3042\u3042\u3042\u3042",
+//            "akasata", "\u3042\u304B\u3055\u305F",
+//        };
+//
+//        Transliterator t = Transliterator.getInstance("Latin-Kana");
+//        Transliterator rt = Transliterator.getInstance("Kana-Latin");
+//        for (int i=0; i<DATA.length; i+=2) {
+//            expect(t, DATA[i], DATA[i+1], rt);
+//        }
+//    }
+
+
+    /**
+     * Create some inverses and confirm that they work.  We have to be
+     * careful how we do this, since the inverses will not be true
+     * inverses -- we can't throw any random string at the composition
+     * of the transliterators and expect the identity function.  F x
+     * F' != I.  However, if we are careful about the input, we will
+     * get the expected results.
+     */
+    public void TestRuleBasedInverse() {
+        String RULES =
+            "abc>zyx\n" +
+            "ab>yz\n" +
+            "bc>zx\n" +
+            "ca>xy\n" +
+            "a>x\n" +
+            "b>y\n" +
+            "c>z\n" +
+
+            "abc<zyx\n" +
+            "ab<yz\n" +
+            "bc<zx\n" +
+            "ca<xy\n" +
+            "a<x\n" +
+            "b<y\n" +
+            "c<z\n" +
+
+            "";
+
+        String[] DATA = {
+            // Careful here -- random strings will not work.  If we keep
+            // the left side to the domain and the right side to the range
+            // we will be okay though (left, abc; right xyz).
+            "a", "x",
+            "abcacab", "zyxxxyy",
+            "caccb", "xyzzy",
+        };
+
+        Transliterator fwd = new RuleBasedTransliterator("<ID>", RULES);
+        Transliterator rev = new RuleBasedTransliterator("<ID>", RULES,
+                                     RuleBasedTransliterator.REVERSE, null);
+        for (int i=0; i<DATA.length; i+=2) {
+            expect(fwd, DATA[i], DATA[i+1]);
+            expect(rev, DATA[i+1], DATA[i]);
+        }
+    }
+
+    /**
+     * Basic test of keyboard.
+     */
+    public void TestKeyboard() {
+        Transliterator t = new RuleBasedTransliterator("<ID>", 
+                                                       "psch>Y\n"
+                                                       +"ps>y\n"
+                                                       +"ch>x\n"
+                                                       +"a>A\n");
+        String DATA[] = {
+            // insertion, buffer
+            "a", "A",
+            "p", "Ap",
+            "s", "Aps",
+            "c", "Apsc",
+            "a", "AycA",
+            "psch", "AycAY",
+            null, "AycAY", // null means finishKeyboardTransliteration
+        };
+
+        keyboardAux(t, DATA);
+    }
+
+    /**
+     * Basic test of keyboard with cursor.
+     */
+    public void TestKeyboard2() {
+        Transliterator t = new RuleBasedTransliterator("<ID>", 
+                                                       "ych>Y\n"
+                                                       +"ps>|y\n"
+                                                       +"ch>x\n"
+                                                       +"a>A\n");
+        String DATA[] = {
+            // insertion, buffer
+            "a", "A",
+            "p", "Ap",
+            "s", "Ay",
+            "c", "Ayc",
+            "a", "AycA",
+            "p", "AycAp",
+            "s", "AycAy",
+            "c", "AycAyc",
+            "h", "AycAY",
+            null, "AycAY", // null means finishKeyboardTransliteration
+        };
+
+        keyboardAux(t, DATA);
+    }
+
+    /**
+     * Test keyboard transliteration with back-replacement.
+     */
+    public void TestKeyboard3() {
+        // We want th>z but t>y.  Furthermore, during keyboard
+        // transliteration we want t>y then yh>z if t, then h are
+        // typed.
+        String RULES =
+            "t>|y\n" +
+            "yh>z\n" +
+            "";
+
+        String[] DATA = {
+            // Column 1: characters to add to buffer (as if typed)
+            // Column 2: expected appearance of buffer after
+            //           keyboard xliteration.
+            "a", "a",
+            "b", "ab",
+            "t", "aby",
+            "c", "abyc",
+            "t", "abycy",
+            "h", "abycz",
+            null, "abycz", // null means finishKeyboardTransliteration
+        };
+
+        Transliterator t = new RuleBasedTransliterator("<ID>", RULES);
+        keyboardAux(t, DATA);
+    }
+
+    private void keyboardAux(Transliterator t, String[] DATA) {
+        int[] index = {0, 0, 0};
+        ReplaceableString s = new ReplaceableString();
+        for (int i=0; i<DATA.length; i+=2) {
+            StringBuffer log;
+            if (DATA[i] != null) {
+                log = new StringBuffer(s.toString() + " + "
+                                       + DATA[i]
+                                       + " -> ");
+                t.keyboardTransliterate(s, index, DATA[i]);
+            } else {
+                log = new StringBuffer(s.toString() + " => ");
+                t.finishKeyboardTransliteration(s, index);
+            }
+            String str = s.toString();
+            // Show the start index '{' and the cursor '|'
+            log.append(str.substring(0, index[Transliterator.START])).
+                append('{').
+                append(str.substring(index[Transliterator.START],
+                                     index[Transliterator.CURSOR])).
+                append('|').
+                append(str.substring(index[Transliterator.CURSOR]));
+            if (str.equals(DATA[i+1])) {
+                logln(log.toString());
+            } else {
+                errln("FAIL: " + log.toString() + ", expected " + DATA[i+1]);
+            }
+        }
+    }
+
+    public void TestArabic() {
+        String DATA[] = {
+            "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
+                      "\u0627\u0644\u0644\u063a\u0629\u0020"+
+                      "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
+                      "\u0628\u0628\u0646\u0638\u0645\u0020"+
+                      "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
+                      "\u062c\u0645\u064a\u0644\u0629",
+        };
+
+        Transliterator t = Transliterator.getInstance("Latin-Arabic");
+        for (int i=0; i<DATA.length; i+=2) {
+            expect(t, DATA[i], DATA[i+1]);
+        }
+    }
+
+    /**
+     * Compose the Kana transliterator forward and reverse and try
+     * some strings that should come out unchanged.
+     */
+    public void TestCompoundKana() {
+        Transliterator kana = Transliterator.getInstance("Latin-Kana");
+        Transliterator rkana = Transliterator.getInstance("Kana-Latin");
+        Transliterator[] trans = { kana, rkana };
+        Transliterator t = new CompoundTransliterator("<ID>", trans);
+
+        expect(t, "aaaaa", "aaaaa");
+    }
+
+    /**
+     * Compose the hex transliterators forward and reverse.
+     */
+    public void TestCompoundHex() {
+        Transliterator a = Transliterator.getInstance("Unicode-Hex");
+        Transliterator b = Transliterator.getInstance("Hex-Unicode");
+        Transliterator[] trans = { a, b };
+        Transliterator ab = new CompoundTransliterator("ab", trans);
+        String s = "abcde";
+        expect(ab, s, s);
+
+        trans = new Transliterator[] { b, a };
+        Transliterator ba = new CompoundTransliterator("ba", trans);
+        ReplaceableString str = new ReplaceableString(s);
+        a.transliterate(str);
+        expect(ba, str.toString(), str.toString());
+    }
+
+    /**
+     * Do some basic tests of filtering.
+     */
+    public void TestFiltering() {
+        Transliterator hex = Transliterator.getInstance("Unicode-Hex");
+        hex.setFilter(new UnicodeFilter() {
+            public boolean isIn(char c) {
+                return c != 'c';
+            }
+        });
+        String s = "abcde";
+        String out = hex.transliterate(s);
+        String exp = "\\u0061\\u0062c\\u0064\\u0065";
+        if (out.equals(exp)) {
+            logln("Ok:   \"" + exp + "\"");
+        } else {
+            logln("FAIL: \"" + out + "\", wanted \"" + exp + "\"");
+        }
+    }
+
+    //======================================================================
+    // Support methods
+    //======================================================================
+
+    void expect(String rules, String source, String expectedResult) {
+        expect(new RuleBasedTransliterator("<ID>", rules), source, expectedResult);
+    }
+
+    void expect(Transliterator t, String source, String expectedResult,
+                Transliterator reverseTransliterator) {
+        expect(t, source, expectedResult);
+        if (reverseTransliterator != null) {
+            expect(reverseTransliterator, expectedResult, source);
+        }
+    }
+
+    void expect(Transliterator t, String source, String expectedResult) {
+        String result = t.transliterate(source);
+        expectAux(t.getID() + ":String", source, result, expectedResult);
+
+        ReplaceableString rsource = new ReplaceableString(source);
+        t.transliterate(rsource);
+        result = rsource.toString();
+        expectAux(t.getID() + ":Replaceable", source, result, expectedResult);
+
+        // Test keyboard (incremental) transliteration -- this result
+        // must be the same after we finalize (see below).
+        rsource.getStringBuffer().setLength(0);
+        int[] index = { 0, 0, 0 };
+        StringBuffer log = new StringBuffer();
+
+        for (int i=0; i<source.length(); ++i) {
+            if (i != 0) {
+                log.append(" + ");
+            }
+            log.append(source.charAt(i)).append(" -> ");
+            t.keyboardTransliterate(rsource, index,
+                                    String.valueOf(source.charAt(i)));
+            // Append the string buffer with a vertical bar '|' where
+            // the committed index is.
+            String s = rsource.toString();
+            log.append(s.substring(0, index[Transliterator.CURSOR])).
+                append('|').
+                append(s.substring(index[Transliterator.CURSOR]));
+        }
+        
+        // As a final step in keyboard transliteration, we must call
+        // transliterate to finish off any pending partial matches that
+        // were waiting for more input.
+        t.finishKeyboardTransliteration(rsource, index);
+        result = rsource.toString();
+        log.append(" => ").append(rsource.toString());
+
+        expectAux(t.getID() + ":Keyboard", log.toString(),
+                  result.equals(expectedResult),
+                  expectedResult);
+    }
+
+    void expectAux(String tag, String source,
+                   String result, String expectedResult) {
+        expectAux(tag, source + " -> " + result,
+                  result.equals(expectedResult),
+                  expectedResult);
+    }
+    
+    void expectAux(String tag, String summary, boolean pass,
+                   String expectedResult) {
+        if (pass) {
+            logln("("+tag+") " + escape(summary));
+        } else {
+            errln("FAIL: ("+tag+") "
+                  + escape(summary)
+                  + ", expected " + escape(expectedResult));
+        }
+    }
+    
+    /**
+     * Escape non-ASCII characters as Unicode.
+     */
+    public static final String escape(String s) {
+        StringBuffer buf = new StringBuffer();
+        for (int i=0; i<s.length(); ++i) {
+            char c = s.charAt(i);
+            if (c >= ' ' && c <= 0x007F) {
+                buf.append(c);
+            } else {
+                buf.append("\\u");
+                if (c < 0x1000) {
+                    buf.append('0');
+                    if (c < 0x100) {
+                        buf.append('0');
+                        if (c < 0x10) {
+                            buf.append('0');
+                        }
+                    }
+                }
+                buf.append(Integer.toHexString(c));
+            }
+        }
+        return buf.toString();
+    }
+
+    /*
+    static final String KANA_RT_DATA =
+"a "+
+
+"ba bi bu be bo "+
+"bya byi byu bye byo "+
+"bba "+
+
+"da di du de do "+
+"dya dyi dyu dye dyo "+
+"dha dhi dhu dhe dho "+
+"dda "+
+
+"e "+
+
+"fa fi fe fo "+
+"fya fyu fyo "+
+"ffa "+
+
+"ga gi gu ge go "+
+"gya gyi gyu gye gyo "+
+"gwa gwi gwu gwe gwo "+
+"gga "+
+
+"ha hi hu he ho "+
+"hya hyi hyu hye hyo "+
+"hha "+
+
+"i "+
+
+"ka ki ku ke ko "+
+"kwa kwi kwu kwe kwo "+
+"kya kyi kyu kye kyo "+
+"kka "+
+
+"ma mi mu me mo "+
+"mya myi myu mye myo "+
+"mba mfa mma mpa mva "+
+"m'' "+
+
+"na ni nu ne no "+
+"nya nyi nyu nye nyo "+
+"nn n'' n "+
+
+"o "+
+
+"pa pi pu pe po "+
+"pya pyi pyu pye pyo "+
+"ppa "+
+
+"qa qi qu qe qo "+
+"qya qyi qyu qye qyo "+
+"qqa "+
+
+"ra ri ru re ro "+
+"rya ryi ryu rye ryo "+
+"rra "+
+
+"sa si su se so "+
+"sya syi syu sye syo "+
+"ssya ssa "+
+
+"ta ti tu te to "+
+"tha thi thu the tho "+
+"tsa tsi tse tso "+
+"tya tyi tyu tye tyo "+
+"ttsa "+
+"tta "+
+
+"u "+
+
+"va vi vu ve vo "+
+"vya vyi vyu vye vyo "+
+"vva "+
+
+"wa wi we wo "+
+"wwa "+
+
+"ya yu ye yo "+
+"yya "+
+
+"za zi zu ze zo "+
+"zya zyi zyu zye zyo "+
+"zza "+
+
+"xa xi xu xe xo "+
+"xka xke "+
+"xtu "+
+"xwa "+
+"xya xyu xyo "+
+
+        "akka akki akku akke akko "+
+        "akkya akkyu akkyo "+
+
+        "atta atti attu atte atto "+
+        "attya attyu attyo "+
+        "adda addi addu adde addo "+
+
+        "atcha atchi atchu atche atcho "+
+
+        "assa assi assu asse asso "+
+        "assya assyu assyo "+
+
+        "ahha ahhi ahhu ahhe ahho "+
+        "appa appi appu appe appo "+
+
+        "an "+
+        "ana ani anu ane ano "+
+        "anna anni annu anne anno "+
+        "an'a an'i an'u an'e an'o "+
+
+        "annna annni annnu annne annno "+
+        "an'na an'ni an'nu an'ne an'no "+
+
+        "anka anki anku anke anko "+
+        "anga angi angu ange ango "+
+
+        "ansa ansi ansu anse anso "+
+        "anza anzi anzu anze anzo "+
+        "anzya anzyu anzyo "+
+
+        "anta anti antu ante anto "+
+        "antya antyu antyo "+
+        "anda andi andu ande ando "+
+
+        "ancha anchi anchu anche ancho "+
+        "anja anji anju anje anjo "+
+        "antsa antsu antso "+
+
+        "anpa anpi anpu anpe anpo "+
+        "ampa ampi ampu ampe ampo "+
+
+        "anba anbi anbu anbe anbo "+
+        "amba ambi ambu ambe ambo "+
+
+        "anma anmi anmu anme anmo "+
+        "amma ammi ammu amme ammo "+
+
+        "anwa anwi anwu anwe anwo "+
+
+        "anha anhi anhu anhe anho "+
+
+        "anya anyi anyu anye anyo "+
+        "annya annyi annyu annye annyo "+
+        "an'ya an'yi an'yu an'ye an'yo "+
+
+        "kkk "+
+        "ggg "+
+        "sss "+
+        "zzz "+
+        "ttt "+
+        "ddd "+
+        "nnn "+
+        "hhh "+
+        "bbb "+
+        "ppp "+
+        "mmm "+
+        "yyy "+
+        "rrr "+
+        "www ";
+*/
+
+        /*+
+
+        "A I U E O "+
+        "XA XI XU XE XO "+
+
+        "KA KI KU KE KO "+
+        "KYA KYI KYU KYE KYO "+
+        "KWA KWI KWU KWE KWO "+
+        "QA QI QU QE QO "+
+        "QYA QYI QYU QYE QYO "+
+        "XKA XKE "+
+
+        "GA GI GU GE GO "+
+        "GYA GYI GYU GYE GYO "+
+        "GWA GWI GWU GWE GWO "+
+
+        "SA SI SU SE SO  "+
+        "SHA SHI SHU SHE SHO "+
+        "SYA SYI SYU SYE SYO "+
+
+        "ZA ZI ZU ZE ZO "+
+        "ZYA ZYI ZYU ZYE ZYO "+
+        "JA JI JU JE JO "+
+        "JYA JYU JYO "+
+
+        "TA TI TU TE TO "+
+        "XTU XTSU "+
+        "TYA TYU TYO "+
+        "CYA CYU CYO "+
+        "CHA CHI CHU CHE CHO "+
+        "TSA TSI TSU TSE TSO "+
+        "DA DI DU DE DO "+
+        "DYA DYU DYO "+
+        "THA THI THU THE THO "+
+        "DHA DHI DHU DHE DHO "+
+
+        "NA NI NU NE NO "+
+        "NYA NYU NYO "+
+
+        "HA HI HU HE HO "+
+        "HYA HYU HYO "+
+        "FA FI FU FE FO "+
+        "FYA FYU FYO "+
+        "BA BI BU BE BO "+
+        "BYA BYU BYO "+
+        "PA PI PU PE PO "+
+        "PYA PYU PYO "+
+
+        "MA MI MU ME MO "+
+        "MYA MYU MYO "+
+        "YA YI YU YE YO "+
+        "XYA XYI XYU XYE XYO "+
+
+        "RA RI RU RE RO "+
+        "LA LI LU LE LO "+
+        "RYA RYI RYU RYE RYO "+
+        "LYA LYI LYU LYE LYO "+
+
+        "WA WI WU WE WO "+
+        "VA VI VU VE VO "+
+        "VYA VYU VYO "+
+
+        "CYA CYI CYU CYE CYO "+
+
+        "NN "+
+        "N' "+
+        "N "+
+
+        "AKKA AKKI AKKU AKKE AKKO "+
+        "AKKYA AKKYU AKKYO "+
+
+        "ATTA ATTI ATTU ATTE ATTO "+
+        "ATTYA ATTYU ATTYO "+
+        "ADDA ADDI ADDU ADDE ADDO "+
+
+        "ATCHA ATCHI ATCHU ATCHE ATCHO "+
+
+        "ASSA ASSI ASSU ASSE ASSO "+
+        "ASSYA ASSYU ASSYO "+
+
+        "AHHA AHHI AHHU AHHE AHHO "+
+        "APPA APPI APPU APPE APPO "+
+
+        "AN "+
+        "ANA ANI ANU ANE ANO "+
+        "ANNA ANNI ANNU ANNE ANNO "+
+        "AN'A AN'I AN'U AN'E AN'O "+
+
+        "ANNNA ANNNI ANNNU ANNNE ANNNO "+
+        "AN'NA AN'NI AN'NU AN'NE AN'NO "+
+
+        "ANKA ANKI ANKU ANKE ANKO "+
+        "ANGA ANGI ANGU ANGE ANGO "+
+
+        "ANSA ANSI ANSU ANSE ANSO "+
+        "ANZA ANZI ANZU ANZE ANZO "+
+        "ANZYA ANZYU ANZYO "+
+
+        "ANTA ANTI ANTU ANTE ANTO "+
+        "ANTYA ANTYU ANTYO "+
+        "ANDA ANDI ANDU ANDE ANDO "+
+
+        "ANCHA ANCHI ANCHU ANCHE ANCHO "+
+        "ANJA ANJI ANJU ANJE ANJO "+
+        "ANTSA ANTSU ANTSO "+
+
+        "ANPA ANPI ANPU ANPE ANPO "+
+        "AMPA AMPI AMPU AMPE AMPO "+
+
+        "ANBA ANBI ANBU ANBE ANBO "+
+        "AMBA AMBI AMBU AMBE AMBO "+
+
+        "ANMA ANMI ANMU ANME ANMO "+
+        "AMMA AMMI AMMU AMME AMMO "+
+
+        "ANWA ANWI ANWU ANWE ANWO "+
+
+        "ANHA ANHI ANHU ANHE ANHO "+
+
+        "ANYA ANYI ANYU ANYE ANYO "+
+        "ANNYA ANNYI ANNYU ANNYE ANNYO "+
+        "AN'YA AN'YI AN'YU AN'YE AN'YO "+
+
+        "KKK "+
+        "GGG "+
+        "SSS "+
+        "ZZZ "+
+        "TTT "+
+        "DDD "+
+        "NNN "+
+        "HHH "+
+        "BBB "+
+        "PPP "+
+        "MMM "+
+        "YYY "+
+        "RRR "+
+        "WWW";*/
+}
diff --git a/icu4j/src/com/ibm/test/translit/UnicodeSetTest.java b/icu4j/src/com/ibm/test/translit/UnicodeSetTest.java
new file mode 100755
index 00000000000..8417faf4b44
--- /dev/null
+++ b/icu4j/src/com/ibm/test/translit/UnicodeSetTest.java
@@ -0,0 +1,118 @@
+import com.ibm.text.*;
+import java.text.*;
+import java.util.*;
+
+/**
+ * @test
+ * @summary General test of UnicodeSet
+ */
+public class UnicodeSetTest extends IntlTest {
+
+    public static void main(String[] args) throws Exception {
+        new UnicodeSetTest().run(args);
+    }
+
+    public void TestPatterns() {
+        UnicodeSet set = new UnicodeSet();
+        expectPattern(set, "[[a-m]&[d-z]&[k-y]]",  "km");
+        expectPattern(set, "[[a-z]-[m-y]-[d-r]]",  "aczz");
+        expectPattern(set, "[a\\-z]",  "--aazz");
+        expectPattern(set, "[-az]",  "--aazz");
+        expectPattern(set, "[az-]",  "--aazz");
+        expectPattern(set, "[[[a-z]-[aeiou]i]]", "bdfnptvz");
+
+        // Throw in a test of complement
+        set.complement();
+        String exp = '\u0000' + "aeeoouu" + (char)('z'+1) + '\uFFFF';
+        expectPairs(set, exp);
+    }
+
+    public void TestAddRemove() {
+        UnicodeSet set = new UnicodeSet();
+        set.add('a', 'z');
+        expectPairs(set, "az");
+        set.remove('m', 'p');
+        expectPairs(set, "alqz");
+        set.remove('e', 'g');
+        expectPairs(set, "adhlqz");
+        set.remove('d', 'i');
+        expectPairs(set, "acjlqz");
+        set.remove('c', 'r');
+        expectPairs(set, "absz");
+        set.add('f', 'q');
+        expectPairs(set, "abfqsz");
+        set.remove('a', 'g');
+        expectPairs(set, "hqsz");
+        set.remove('a', 'z');
+        expectPairs(set, "");
+
+        // Try removing an entire set from another set
+        expectPattern(set, "[c-x]", "cx");
+        UnicodeSet set2 = new UnicodeSet();
+        expectPattern(set2, "[f-ky-za-bc[vw]]", "acfkvwyz");
+        set.removeAll(set2);
+        expectPairs(set, "deluxx");
+
+        // Try adding an entire set to another set
+        expectPattern(set, "[jackiemclean]", "aacceein");
+        expectPattern(set2, "[hitoshinamekatajamesanderson]", "aadehkmort");
+        set.addAll(set2);
+        expectPairs(set, "aacehort");
+
+        // Test commutativity
+        expectPattern(set, "[hitoshinamekatajamesanderson]", "aadehkmort");
+        expectPattern(set2, "[jackiemclean]", "aacceein");
+        set.addAll(set2);
+        expectPairs(set, "aacehort");
+    }
+
+    void expectPattern(UnicodeSet set,
+                       String pattern,
+                       String expectedPairs) {
+        set.applyPattern(pattern);
+        if (!set.getPairs().equals(expectedPairs)) {
+            errln("FAIL: applyPattern(\"" + pattern +
+                  "\") => pairs \"" +
+                  escape(set.getPairs()) + "\", expected \"" +
+                  escape(expectedPairs) + "\"");
+        } else {
+            logln("Ok:   applyPattern(\"" + pattern +
+                  "\") => pairs \"" +
+                  escape(set.getPairs()) + "\"");
+        }
+    }
+
+    void expectPairs(UnicodeSet set, String expectedPairs) {
+        if (!set.getPairs().equals(expectedPairs)) {
+            errln("FAIL: Expected pair list \"" +
+                  escape(expectedPairs) + "\", got \"" +
+                  escape(set.getPairs()) + "\"");
+        }
+    }
+
+    /**
+     * Escape non-ASCII characters as Unicode.
+     */
+    static final String escape(String s) {
+        StringBuffer buf = new StringBuffer();
+        for (int i=0; i<s.length(); ++i) {
+            char c = s.charAt(i);
+            if (c >= ' ' && c <= 0x007F) {
+                buf.append(c);
+            } else {
+                buf.append("\\u");
+                if (c < 0x1000) {
+                    buf.append('0');
+                    if (c < 0x100) {
+                        buf.append('0');
+                        if (c < 0x10) {
+                            buf.append('0');
+                        }
+                    }
+                }
+                buf.append(Integer.toHexString(c));
+            }
+        }
+        return buf.toString();
+    }
+}
diff --git a/icu4j/src/com/ibm/text/CompoundTransliterator.java b/icu4j/src/com/ibm/text/CompoundTransliterator.java
new file mode 100755
index 00000000000..c3582237d42
--- /dev/null
+++ b/icu4j/src/com/ibm/text/CompoundTransliterator.java
@@ -0,0 +1,285 @@
+package com.ibm.text;
+
+import java.util.Enumeration;
+import java.util.Vector;
+
+/**
+ * A transliterator that is composed of two or more other
+ * transliterator objects linked together.  For example, if one
+ * transliterator transliterates from script A to script B, and
+ * another transliterates from script B to script C, the two may be
+ * combined to form a new transliterator from A to C.
+ *
+ * <p>Composed transliterators may not behave as expected.  For
+ * example, inverses may not combine to form the identity
+ * transliterator.  See the class documentation for {@link
+ * Transliterator} for details.
+ *
+ * <p>If a non-<tt>null</tt> <tt>UnicodeFilter</tt> is applied to a
+ * <tt>CompoundTransliterator</tt>, it has the effect of being
+ * logically <b>and</b>ed with the filter of each transliterator in
+ * the chain.
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: CompoundTransliterator.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class CompoundTransliterator extends Transliterator {
+
+    private static final boolean DEBUG = false;
+
+    private Transliterator[] trans;
+
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    /**
+     * Constructs a new compound transliterator given an array of
+     * transliterators.  The array of transliterators may be of any
+     * length, including zero or one, however, useful compound
+     * transliterators have at least two components.
+     * @param transliterators array of <code>Transliterator</code>
+     * objects
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     */
+    public CompoundTransliterator(String ID, Transliterator[] transliterators,
+                                  UnicodeFilter filter) {
+        super(ID, filter);
+        trans = new Transliterator[transliterators.length];
+        System.arraycopy(transliterators, 0, trans, 0, trans.length);
+    }
+
+    /**
+     * Constructs a new compound transliterator given an array of
+     * transliterators.  The array of transliterators may be of any
+     * length, including zero or one, however, useful compound
+     * transliterators have at least two components.
+     * @param transliterators array of <code>Transliterator</code>
+     * objects
+     */
+    public CompoundTransliterator(String ID, Transliterator[] transliterators) {
+        this(ID, transliterators, null);
+    }
+
+    /**
+     * Returns the number of transliterators in this chain.
+     * @return number of transliterators in this chain.
+     */
+    public int getCount() {
+        return trans.length;
+    }
+
+    /**
+     * Returns the transliterator at the given index in this chain.
+     * @param index index into chain, from 0 to <code>getCount() - 1</code>
+     * @return transliterator at the given index
+     */
+    public Transliterator getTransliterator(int index) {
+        return trans[index];
+    }
+
+    /**
+     * Transliterates a segment of a string.  <code>Transliterator</code> API.
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @return the new limit index
+     */
+    public int transliterate(Replaceable text, int start, int limit) {
+        for (int i=0; i<trans.length; ++i) {
+            limit = trans[i].transliterate(text, start, limit);
+        }
+        return limit;
+    }
+
+    /**
+     * Implements {@link Transliterator#handleKeyboardTransliterate}.
+     */
+    protected void handleKeyboardTransliterate(Replaceable text,
+                                               int[] index) {
+        /* Call each transliterator with the same start value and
+         * initial cursor index, but with the limit index as modified
+         * by preceding transliterators.  The cursor index must be
+         * reset for each transliterator to give each a chance to
+         * transliterate the text.  The initial cursor index is known
+         * to still point to the same place after each transliterator
+         * is called because each transliterator will not change the
+         * text between start and the initial value of cursor.
+         *
+         * IMPORTANT: After the first transliterator, each subsequent
+         * transliterator only gets to transliterate text committed by
+         * preceding transliterators; that is, the cursor (output
+         * value) of transliterator i becomes the limit (input value)
+         * of transliterator i+1.  Finally, the overall limit is fixed
+         * up before we return.
+         *
+         * Assumptions we make here:
+         * (1) start <= cursor <= limit    ;cursor valid on entry
+         * (2) cursor <= cursor' <= limit' ;cursor doesn't move back
+         * (3) cursor <= limit'            ;text before cursor unchanged
+         * - cursor' is the value of cursor after calling handleKT
+         * - limit' is the value of limit after calling handleKT
+         */
+
+        /**
+         * Example: 3 transliterators.  This example illustrates the
+         * mechanics we need to implement.  S, C, and L are the start,
+         * cursor, and limit.  gl is the globalLimit.
+         *
+         * 1. h-u, changes hex to Unicode
+         *
+         *    4  7  a  d  0      4  7  a
+         *    abc/u0061/u    =>  abca/u    
+         *    S  C       L       S   C L   gl=f->a
+         *
+         * 2. upup, changes "x" to "XX"
+         *
+         *    4  7  a       4  7  a
+         *    abca/u    =>  abcAA/u    
+         *    S  CL         S    C   
+         *                       L    gl=a->b
+         * 3. u-h, changes Unicode to hex
+         *
+         *    4  7  a        4  7  a  d  0  3
+         *    abcAA/u    =>  abc/u0041/u0041/u    
+         *    S  C L         S              C
+         *                                  L   gl=b->15
+         * 4. return
+         *
+         *    4  7  a  d  0  3
+         *    abc/u0041/u0041/u    
+         *    S C L
+         */
+
+        /**
+         * One more wrinkle.  If there is a filter F for the compound
+         * transliterator as a whole, then we need to modify every
+         * non-null filter f in the chain to be f' = F & f.  Then,
+         * when we're done, we restore the original filters.
+         *
+         * A possible future optimization is to change f to f' at
+         * construction time, but then if anyone else is using the
+         * transliterators in the chain outside of this context, they
+         * will get unexpected results.
+         */
+        UnicodeFilter F = getFilter();
+        UnicodeFilter[] f = null;
+        if (F != null) {
+            f = new UnicodeFilter[trans.length];
+            for (int i=0; i<f.length; ++i) {
+                f[i] = trans[i].getFilter();
+                trans[i].setFilter(UnicodeFilterLogic.and(F, f[i]));
+            }
+        }
+
+        try {
+            int cursor = index[CURSOR];
+            int limit = index[LIMIT];
+            int globalLimit = limit;
+            /* globalLimit is the overall limit.  We keep track of this
+             * since we overwrite index[LIMIT] with the previous
+             * index[CURSOR].  After each transliteration, we update
+             * globalLimit for insertions or deletions that have happened.
+             */
+
+            for (int i=0; i<trans.length; ++i) {
+                index[CURSOR] = cursor; // Reset cursor
+                index[LIMIT] = limit;
+
+                if (DEBUG) {
+                    System.out.print(escape(i + ": \"" +
+                        substring(text, index[START], index[CURSOR]) + '|' +
+                        substring(text, index[CURSOR], index[LIMIT]) +
+                        "\" -> \""));
+                }
+
+                trans[i].handleKeyboardTransliterate(text, index);
+
+                if (DEBUG) {
+                    System.out.println(escape(
+                        substring(text, index[START], index[CURSOR]) + '|' +
+                        substring(text, index[CURSOR], index[LIMIT]) +
+                        '"'));
+                }
+            
+                // Adjust overall limit for insertions/deletions
+                globalLimit += index[LIMIT] - limit;
+                limit = index[CURSOR]; // Move limit to end of committed text
+            }
+            // Cursor is good where it is -- where the last
+            // transliterator left it.  Limit needs to be put back
+            // where it was, modulo adjustments for deletions/insertions.
+            index[LIMIT] = globalLimit;
+
+        } finally {
+            // Fixup the transliterator filters, if we had to modify them.
+            if (f != null) {
+                for (int i=0; i<f.length; ++i) {
+                    trans[i].setFilter(f[i]);
+                }
+            }
+        }
+    }
+
+    /**
+     * Returns the length of the longest context required by this transliterator.
+     * This is <em>preceding</em> context.
+     * @return maximum number of preceding context characters this
+     * transliterator needs to examine
+     */
+    protected int getMaximumContextLength() {
+        int max = 0;
+        for (int i=0; i<trans.length; ++i) {
+            int len = trans[i].getMaximumContextLength();
+            if (len > max) {
+                max = len;
+            }
+        }
+        return max;
+    }
+
+    /**
+     * DEBUG
+     * Returns a substring of a Replaceable.
+     */
+    private static final String substring(Replaceable str, int start, int limit) {
+        StringBuffer buf = new StringBuffer();
+        while (start < limit) {
+            buf.append(str.charAt(start++));
+        }
+        return buf.toString();
+    }
+
+    /**
+     * DEBUG
+     * Escapes non-ASCII characters as Unicode.
+     */
+    private static final String escape(String s) {
+        StringBuffer buf = new StringBuffer();
+        for (int i=0; i<s.length(); ++i) {
+            char c = s.charAt(i);
+            if (c >= ' ' && c <= 0x007F) {
+                buf.append(c);
+            } else {
+                buf.append("\\u");
+                if (c < 0x1000) {
+                    buf.append('0');
+                    if (c < 0x100) {
+                        buf.append('0');
+                        if (c < 0x10) {
+                            buf.append('0');
+                        }
+                    }
+                }
+                buf.append(Integer.toHexString(c));
+            }
+        }
+        return buf.toString();
+    }
+}
diff --git a/icu4j/src/com/ibm/text/HexToUnicodeTransliterator.java b/icu4j/src/com/ibm/text/HexToUnicodeTransliterator.java
new file mode 100755
index 00000000000..18673e15fe7
--- /dev/null
+++ b/icu4j/src/com/ibm/text/HexToUnicodeTransliterator.java
@@ -0,0 +1,130 @@
+package com.ibm.text;
+import java.util.*;
+
+/**
+ * A transliterator that converts from hexadecimal Unicode
+ * escape sequences to the characters they represent.  For example, "U+0040"
+ * and '\u0040'.  It recognizes the
+ * prefixes "U+", "u+", "&#92;U", and "&#92;u".  Hex values may be
+ * upper- or lowercase.
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: HexToUnicodeTransliterator.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class HexToUnicodeTransliterator extends Transliterator {
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    /**
+     * Package accessible ID for this transliterator.
+     */
+    static String _ID = "Hex-Unicode";
+
+    /**
+     * Constructs a transliterator.
+     */
+    public HexToUnicodeTransliterator() {
+        super(_ID, null);
+    }
+
+    /**
+     * Transliterates a segment of a string.  <code>Transliterator</code> API.
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @return the new limit index
+     */
+    public int transliterate(Replaceable text, int start, int limit) {
+        int[] offsets = { start, limit, start };
+        handleKeyboardTransliterate(text, offsets);
+        return offsets[LIMIT];
+    }
+
+    /**
+     * Implements {@link Transliterator#handleKeyboardTransliterate}.
+     */
+    protected void handleKeyboardTransliterate(Replaceable text,
+                                               int[] offsets) {
+        /**
+         * Performs transliteration changing Unicode hexadecimal
+         * escapes to characters.  For example, "U+0040" -> '@'.  A fixed
+         * set of prefixes is recognized: "&#92;u", "&#92;U", "u+", "U+". 
+         */
+        int cursor = offsets[CURSOR];
+        int limit = offsets[LIMIT];
+
+        int maxCursor = limit - 6;
+    loop:
+        while (cursor <= maxCursor) {
+            char c = filteredCharAt(text, cursor + 5);
+            int digit0 = Character.digit(c, 16);
+            if (digit0 < 0) {
+                if (c == '\\') {
+                    cursor += 5;
+                } else if (c == 'U' || c == 'u' || c == '+') {
+                    cursor += 4;
+                } else {
+                    cursor += 6;
+                }
+                continue;
+            }
+
+            int u = digit0;
+
+            for (int i=4; i>=2; --i) {
+                c = filteredCharAt(text, cursor + i);
+                int digit = Character.digit(c, 16);
+                if (digit < 0) {
+                    if (c == 'U' || c == 'u' || c == '+') {
+                        cursor += i-1;
+                    } else {
+                        cursor += 6;
+                    }
+                    continue loop;
+                }
+                u |= digit << (4 * (5-i));
+            }
+
+            c = filteredCharAt(text, cursor);
+            char d = filteredCharAt(text, cursor + 1);
+            if (((c == 'U' || c == 'u') && d == '+')
+                || (c == '\\' && (d == 'U' || d == 'u'))) {
+                
+                // At this point, we have a match; replace cursor..cursor+5
+                // with u.
+                text.replace(cursor, cursor+6, String.valueOf((char) u));
+                limit -= 5;
+                maxCursor -= 5;
+
+                ++cursor;
+            } else {
+                cursor += 6;
+            }
+        }
+
+        offsets[LIMIT] = limit;
+        offsets[CURSOR] = cursor;
+    }
+    
+    private char filteredCharAt(Replaceable text, int i) {
+        char c;
+        UnicodeFilter filter = getFilter();
+        return (filter == null) ? text.charAt(i) :
+            (filter.isIn(c = text.charAt(i)) ? c : '\uFFFF');
+    }
+
+    /**
+     * Return the length of the longest context required by this transliterator.
+     * This is <em>preceding</em> context.
+     * @param direction either <code>FORWARD</code> or <code>REVERSE</code>
+     * @return maximum number of preceding context characters this
+     * transliterator needs to examine
+     */
+    protected int getMaximumContextLength() {
+        return 0;
+    }
+}
diff --git a/icu4j/src/com/ibm/text/Replaceable.java b/icu4j/src/com/ibm/text/Replaceable.java
new file mode 100755
index 00000000000..b4c8519689c
--- /dev/null
+++ b/icu4j/src/com/ibm/text/Replaceable.java
@@ -0,0 +1,77 @@
+package com.ibm.text;
+
+/**
+ * <code>Replaceable</code> is an interface that supports the
+ * operation of replacing a substring with another piece of text.
+ * <code>Replaceable</code> is needed in order to change a piece of
+ * text while retaining style attributes.  For example, if the string
+ * "the <b>bold</b> font" has range (4, 8) replaced with "strong",
+ * then it becomes "the <b>strong</b> font".
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: Replaceable.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public interface Replaceable {
+    /**
+     * Return the number of characters in the text.
+     * @return number of characters in text
+     */ 
+    int length();
+
+    /**
+     * Return the character at the given offset into the text.
+     * @param offset an integer between 0 and <code>length()</code>-1
+     * inclusive
+     * @return character of text at given offset
+     */
+    char charAt(int offset);
+
+    /**
+     * Copies characters from this object into the destination
+     * character array.  The first character to be copied is at index
+     * <code>srcStart</code>; the last character to be copied is at
+     * index <code>srcLimit-1</code> (thus the total number of
+     * characters to be copied is <code>srcLimit-srcStart</code>). The
+     * characters are copied into the subarray of <code>dst</code>
+     * starting at index <code>dstStart</code> and ending at index
+     * <code>dstStart + (srcLimit-srcStart) - 1</code>.
+     *
+     * @param srcStart the beginning index to copy, inclusive; <code>0
+     * <= start <= limit</code>.
+     * @param srcLimit the ending index to copy, exclusive;
+     * <code>start <= limit <= length()</code>.
+     * @param dst the destination array.
+     * @param dstStart the start offset in the destination array.
+     */
+    void getChars(int srcStart, int srcLimit, char dst[], int dstStart);
+
+    /**
+     * Replace a substring of this object with the given text.
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= length()</code>.
+     * @param text the text to replace characters <code>start</code>
+     * to <code>limit - 1</code>
+     */
+    void replace(int start, int limit, String text);
+
+    /**
+     * Replace a substring of this object with the given text.
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= length()</code>.
+     * @param chars the text to replace characters <code>start</code>
+     * to <code>limit - 1</code>
+     * @param charsStart the beginning index into <code>chars</code>,
+     * inclusive; <code>0 <= start <= limit</code>.
+     * @param charsLen the number of characters of <code>chars</code>.
+     */
+    void replace(int start, int limit, char[] chars,
+                 int charsStart, int charsLen);
+    // Note: We use length rather than limit to conform to StringBuffer
+    // and System.arraycopy.
+}
diff --git a/icu4j/src/com/ibm/text/ReplaceableString.java b/icu4j/src/com/ibm/text/ReplaceableString.java
new file mode 100755
index 00000000000..d6a7df06db5
--- /dev/null
+++ b/icu4j/src/com/ibm/text/ReplaceableString.java
@@ -0,0 +1,159 @@
+package com.ibm.text;
+
+/**
+ * <code>ReplaceableString</code> is an adapter class that implements the
+ * <code>Replaceable</code> API around an ordinary <code>StringBuffer</code>.
+ *
+ * <p><em>Note:</em> This class does not support attributes and is not
+ * intended for general use.  Most clients will need to implement
+ * {@link Replaceable} in their text representation class.
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @see Replaceable
+ * @author Alan Liu
+ * @version $RCSfile: ReplaceableString.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class ReplaceableString implements Replaceable {
+    private StringBuffer buf;
+
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    /**
+     * Construct a new object with the given initial contents.
+     * @param str initial contents
+     */
+    public ReplaceableString(String str) {
+        buf = new StringBuffer(str);
+    }
+
+    /**
+     * Construct a new object using <code>buf</code> for internal
+     * storage.  The contents of <code>buf</code> at the time of
+     * construction are used as the initial contents.  <em>Note!
+     * Modifications to <code>buf</code> will modify this object, and
+     * vice versa.</em>
+     * @param buf object to be used as internal storage
+     */
+    public ReplaceableString(StringBuffer buf) {
+        this.buf = buf;
+    }
+
+    /**
+     * Construct a new empty object.
+     */
+    public ReplaceableString() {
+        buf = new StringBuffer();
+    }
+
+    /**
+     * Return the contents of this object as a <code>String</code>.
+     * @return string contents of this object
+     */
+    public String toString() {
+        return buf.toString();
+    }
+
+    /**
+     * Return the internal storage of this object.  <em>Note!  Any
+     * changes made to the returned object affect this object's
+     * contents, and vice versa.</em>
+     * @return internal buffer used by this object
+     */
+    public StringBuffer getStringBuffer() {
+        return buf;
+    }
+
+    /**
+     * Return the number of characters contained in this object.
+     * <code>Replaceable</code> API.
+     */ 
+    public int length() {
+        return buf.length();
+    }
+
+    /**
+     * Return the character at the given position in this object.
+     * <code>Replaceable</code> API.
+     * @param offset offset into the contents, from 0 to
+     * <code>length()</code> - 1
+     */
+    public char charAt(int offset) {
+        return buf.charAt(offset);
+    }
+
+    /**
+     * Copies characters from this object into the destination
+     * character array.  The first character to be copied is at index
+     * <code>srcStart</code>; the last character to be copied is at
+     * index <code>srcLimit-1</code> (thus the total number of
+     * characters to be copied is <code>srcLimit-srcStart</code>). The
+     * characters are copied into the subarray of <code>dst</code>
+     * starting at index <code>dstStart</code> and ending at index
+     * <code>dstStart + (srcLimit-srcStart) - 1</code>.
+     *
+     * @param srcStart the beginning index to copy, inclusive; <code>0
+     * <= start <= limit</code>.
+     * @param srcLimit the ending index to copy, exclusive;
+     * <code>start <= limit <= length()</code>.
+     * @param dst the destination array.
+     * @param dstStart the start offset in the destination array.
+     */
+    public void getChars(int srcStart, int srcLimit, char dst[], int dstStart) {
+        buf.getChars(srcStart, srcLimit, dst, dstStart);
+    }
+
+    /**
+     * Replace zero or more characters with new characters.
+     * <code>Replaceable</code> API.
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= length()</code>.
+     * @param text new text to replace characters <code>start</code> to
+     * <code>limit - 1</code>
+     */
+    public void replace(int start, int limit, String text) {
+        if (start == limit) {
+            buf.insert(start, text);
+        } else {
+            char[] tail = null;
+            if (limit < buf.length()) {
+                tail = new char[buf.length() - limit];
+                buf.getChars(limit, buf.length(), tail, 0);
+            }
+            buf.setLength(start);
+            buf.append(text);
+            if (tail != null) {
+                buf.append(tail);
+            }
+        }
+    }
+
+    /**
+     * Replace a substring of this object with the given text.
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= length()</code>.
+     * @param chars the text to replace characters <code>start</code>
+     * to <code>limit - 1</code>
+     * @param charsStart the beginning index into <code>chars</code>,
+     * inclusive; <code>0 <= start <= limit</code>.
+     * @param charsLen the number of characters of <code>chars</code>.
+     */
+    public void replace(int start, int limit, char[] chars,
+                        int charsStart, int charsLen) {
+        char[] tail = null;
+        if (limit < buf.length()) {
+            tail = new char[buf.length() - limit];
+            buf.getChars(limit, buf.length(), tail, 0);
+        }
+        buf.setLength(start);
+        buf.append(chars, charsStart, charsLen);
+        if (tail != null) {
+            buf.append(tail);
+        }
+    }
+}
diff --git a/icu4j/src/com/ibm/text/RuleBasedTransliterator.java b/icu4j/src/com/ibm/text/RuleBasedTransliterator.java
new file mode 100755
index 00000000000..4a433e9479d
--- /dev/null
+++ b/icu4j/src/com/ibm/text/RuleBasedTransliterator.java
@@ -0,0 +1,1187 @@
+package com.ibm.text;
+
+import java.util.Hashtable;
+import java.util.Vector;
+
+/**
+ * A transliterator that reads a set of rules in order to determine how to
+ * perform translations.  Rules are stored in resource bundles indexed by name.
+ * Rules are separated by newline characters ('\n'); to include a literal
+ * newline, prefix it with a backslash ('\\\n').  Whitespace is significant.  If
+ * the first character on a line is '#', the entire line is ignored as a
+ * comment.
+ *
+ * <p>Each set of rules consists of two groups, one forward, and one reverse.
+ * This is a convention that is not enforced; rules for one direction may be
+ * omitted, with the result that translations in that direction will not modify
+ * the source text.
+ *
+ * <p><b>Rule syntax</b>
+ *
+ * <p>Rule statements take one of the following forms:
+ * <dl>
+ *   <dt><code>alefmadda=&#092;u0622</code></dt>
+ *
+ *   <dd><strong>Variable definition.</strong> The name on the left is
+ *   assigned the character or expression on the right. Names may not
+ *   contain any special characters (see list below). Duplicate names
+ *   (including duplicates of simple variables or category names)
+ *   cause an exception to be thrown.  If the right hand side consists
+ *   of one character, then the variable stands for that character.
+ *   In this example, after this statement, instances of the left hand
+ *   name surrounded by braces, &quot;<code>{alefmadda}</code>&quot,
+ *   will be replaced by the Unicode character U+0622.</dd> If the
+ *   right hand side is longer than one character, then it is
+ *   interpreted as a character category expression; see below for
+ *   details.
+ *
+ *   <dt><code>softvowel=[eiyEIY]</code></dt>
+ *
+ *   <dd><strong>Category definition.</strong> The name on the left is assigned
+ *   to stand for a set of characters.  The same rules for names of simple
+ *   variables apply. After this statement, the left hand variable will be
+ *   interpreted as indicating a set of characters in appropriate contexts. The
+ *   pattern syntax defining sets of characters is defined by {@link UnicodeSet}.
+ *   Examples of valid patterns are:<table>
+ *
+ *       <tr valign=top>
+ *         <td nowrap><code>[abc]</code></td>
+ *         <td>The set containing the characters 'a', 'b', and 'c'.</td>
+ *       </tr>
+ *       <tr valign=top>
+ *         <td nowrap><code>[^abc]</code></td>
+ *         <td>The set of all characters <em>except</em> 'a', 'b', and 'c'.</td>
+ *       </tr>
+ *       <tr valign=top>
+ *         <td nowrap><code>[A-Z]</code></td>
+ *         <td>The set of all characters from 'A' to 'Z' in Unicode order.</td>
+ *       </tr>
+ *       <tr valign=top>
+ *         <td nowrap><code>[:Lu:]</code></td>
+ *         <td>The set of Unicode uppercase letters. See
+ *         <a href="http://www.unicode.org">www.unicode.org</a>
+ *         for a complete list of categories and their two-letter codes.</td>
+ *       </tr>
+ *       <tr valign=top>
+ *         <td nowrap><code>[^a-z[:Lu:][:Ll:]]</code></td>
+ *         <td>The set of all characters <em>except</em> 'a' through 'z' and
+ *         uppercase or lowercase letters.</td>
+ *       </tr>
+ *     </table>
+ *
+ *   See {@link UnicodeSet} for more documentation and examples.
+ *   </dd>
+ *
+ *   <dt><code>ai&gt;{alefmadda}</code></dt>
+ *
+ *   <dd><strong>Forward translation rule.</strong> This rule states that the
+ *   string on the left will be changed to the string on the right when
+ *   performing forward transliteration.</dd>
+ *
+ *   <dt><code>ai&lt;{alefmadda}</code></dt>
+ *
+ *   <dd><strong>Reverse translation rule.</strong> This rule states that the
+ *   string on the right will be changed to the string on the left when
+ *   performing reverse transliteration.</dd>
+ *
+ * </dl>
+ *
+ * <p>Forward and reverse translation rules consist of a <em>match
+ * pattern</em> and an <em>output string</em>.  The match pattern consists
+ * of literal characters, optionally preceded by context, and optionally
+ * followed by context.  Context characters, like literal pattern characters,
+ * must be matched in the text being transliterated.  However, unlike literal
+ * pattern characters, they are not replaced by the output text.  For example,
+ * the pattern "<code>[abc]def</code>" indicates the characters
+ * "<code>def</code>" must be preceded by "<code>abc</code>" for a successful
+ * match.  If there is a successful match, "<code>def</code>" will be replaced,
+ * but not "<code>abc</code>".  The initial '<code>[</code>' is optional, so
+ * "<code>abc]def</code>" is equivalent to "<code>[abc]def</code>".  Another
+ * example is "<code>123[456]</code>" (or "<code>123[456</code>") in which the
+ * literal pattern "<code>123</code>" must be followed by "<code>456</code>".
+ *
+ * <p>The output string of a forward or reverse rule consists of characters to
+ * replace the literal pattern characters.  If the output string contains the
+ * character '<code>|</code>', this is taken to indicate the location of the
+ * <em>cursor</em> after replacement.  The cursor is the point in the text
+ * at which the next replacement, if any, will be applied.
+ *
+ * <p><b>Example</b>
+ *
+ * <p>The following example rules illustrate many of the features of the rule
+ * language.
+ * <table cellpadding="4">
+ * <tr valign=top><td>Rule 1.</td>
+ *     <td nowrap><code>abc]def&gt;x|y</code></td></tr>
+ * <tr valign=top><td>Rule 2.</td>
+ *     <td nowrap><code>xyz&gt;r</code></td></tr>
+ * <tr valign=top><td>Rule 3.</td>
+ *     <td nowrap><code>yz&gt;q</code></td></tr>
+ * </table>
+ *
+ * <p>Applying these rules to the string "<code>adefabcdefz</code>" yields the
+ * following results:
+ *
+ * <table cellpadding="4">
+ * <tr valign=top><td nowrap><code>|adefabcdefz</code></td>
+ *     <td>Initial state, no rules match.  Advance cursor.</td></tr>
+ * <tr valign=top><td nowrap><code>a|defabcdefz</code></td>
+ *     <td>Still no match.  Rule 1 does not match because the preceding
+ *     context is not present.</td></tr>
+ * <tr valign=top><td nowrap><code>ad|efabcdefz</code></td>
+ *     <td>Still no match.  Keep advancing until there is a match...</td></tr>
+ * <tr valign=top><td nowrap><code>ade|fabcdefz</code></td>
+ *     <td>...</td></tr>
+ * <tr valign=top><td nowrap><code>adef|abcdefz</code></td>
+ *     <td>...</td></tr>
+ * <tr valign=top><td nowrap><code>adefa|bcdefz</code></td>
+ *     <td>...</td></tr>
+ * <tr valign=top><td nowrap><code>adefab|cdefz</code></td>
+ *     <td>...</td></tr>
+ * <tr valign=top><td nowrap><code>adefabc|defz</code></td>
+ *     <td>Rule 1 matches; replace "<code>def</code>" with "<code>xy</code>"
+ *     and back up the cursor to before the '<code>y</code>'.</td></tr>
+ * <tr valign=top><td nowrap><code>adefabcx|yz</code></td>
+ *     <td>Although "<code>xyz</code>" is present, rule 2 does not match
+ *     because the cursor is before the '<code>y</code>', not before the
+ *     '<code>x</code>'.  Rule 3 does match.  Replace "<code>yz</code>" with
+ *     "<code>q</code>".</td></tr>
+ * <tr valign=top><td nowrap><code>adefabcxq|</code></td>
+ *     <td>The cursor is at the end; transliteration is complete.</td></tr>
+ * </table>
+ *
+ * <p>The order of rules is significant.  If multiple rules may match at some
+ * point, the first matching rule is applied.
+ *
+ * <p>Forward and reverse rules may have an empty output string.  Otherwise, an
+ * empty left or right hand side of any statement is a syntax error.
+ *
+ * <p>Single quotes are used to quote the special characters
+ * <code>=&gt;&lt;{}[]|</code>.  To specify a single quote itself, inside or
+ * outside of quotes, use two single quotes in a row.  For example, the rule
+ * "<code>'&gt;'&gt;o''clock</code>" changes the string "<code>&gt;</code>" to
+ * the string "<code>o'clock</code>".
+ *
+ * <p><b>Notes</b>
+ *
+ * <p>While a RuleBasedTransliterator is being built, it checks that the rules
+ * are added in proper order.  For example, if the rule "a>x" is followed by the
+ * rule "ab>y", then the second rule will throw an exception.  The reason is
+ * that the second rule can never be triggered, since the first rule always
+ * matches anything it matches.  In other words, the first rule <em>masks</em>
+ * the second rule.  There is a cost of O(n^2) to make this check; in real-world
+ * tests it appears to approximately double build time.
+ *
+ * <p>One optimization that can be made is to add a pragma to the rule language,
+ * "#pragma order", that turns off ordering checking.  This pragma can then be
+ * added to all of our resource-based rules (after we build these once and
+ * determine that there are no ordering errors).  I haven't made this change yet
+ * in the interests of keeping the code from getting too byzantine.
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: RuleBasedTransliterator.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class RuleBasedTransliterator extends Transliterator {
+    /**
+     * Direction constant passed to constructor to create a transliterator
+     * using the forward rules.
+     */
+    public static final int FORWARD = 0;
+
+    /**
+     * Direction constant passed to constructor to create a transliterator
+     * using the reverse rules.
+     */
+    public static final int REVERSE = 1;    
+
+    private Data data;
+
+    static final boolean DEBUG = false;
+
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    /**
+     * Constructs a new transliterator from the given rules.
+     * @param rules rules, separated by '\n'
+     * @param direction either FORWARD or REVERSE.
+     * @exception IllegalArgumentException if rules are malformed
+     * or direction is invalid.
+     */
+    public RuleBasedTransliterator(String ID, String rules, int direction,
+                                   UnicodeFilter filter) {
+        super(ID, filter);
+        if (direction != FORWARD && direction != REVERSE) {
+            throw new IllegalArgumentException("Invalid direction");
+        }
+        data = parse(rules, direction);
+    }
+
+    /**
+     * Constructs a new transliterator from the given rules in the
+     * <code>FORWARD</code> direction.
+     * @param rules rules, separated by '\n'
+     * @exception IllegalArgumentException if rules are malformed
+     * or direction is invalid.
+     */
+    public RuleBasedTransliterator(String ID, String rules) {
+        this(ID, rules, FORWARD, null);
+    }
+
+    RuleBasedTransliterator(String ID, Data data, UnicodeFilter filter) {
+        super(ID, filter);
+        this.data = data;
+    }
+
+    static Data parse(String rules, int direction) {
+        return new Parser(rules, direction).getData();
+    }
+
+    /**
+     * Transliterates a segment of a string.  <code>Transliterator</code> API.
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param result buffer to receive the transliterated text; previous
+     * contents are discarded
+     */
+    public void transliterate(String text, int start, int limit,
+                              StringBuffer result) {
+        /* In the following loop there is a virtual buffer consisting of the
+         * text transliterated so far followed by the untransliterated text.  There is
+         * also a cursor, which may be in the already transliterated buffer or just
+         * before the untransliterated text.
+         *
+         * Example: rules 1. ab>x|y
+         *                2. yc>z
+         *
+         * []|eabcd  start - no match, copy e to tranlated buffer
+         * [e]|abcd  match rule 1 - copy output & adjust cursor
+         * [ex|y]cd  match rule 2 - copy output & adjust cursor
+         * [exz]|d   no match, copy d to transliterated buffer
+         * [exzd]|   done
+         *
+         * cursor: an index into the virtual buffer, 0..result.length()-1.
+         * Matches take place at the cursor.  If there is no match, the cursor
+         * is advanced, and one character is moved from the source text to the
+         * result buffer.
+         *         
+         * start, limit: these designate the substring of the source text which
+         * has not been processed yet.  The range of offsets is start..limit-1.
+         * At any moment the virtual buffer consists of result +
+         * text.substring(start, limit).
+         */
+        int cursor = 0;
+        result.setLength(0);
+        while (start < limit || cursor < result.length()) {
+            TransliterationRule r = data.ruleSet.findMatch(text, start, limit, result,
+                                                      cursor, data.setVariables, getFilter());
+            if (DEBUG) {
+                StringBuffer buf = new StringBuffer(
+                        result.toString() + '#' + text.substring(start, limit));
+                buf.insert(cursor <= result.length()
+                           ? cursor : (cursor + 1),
+                           '|');
+                System.err.print((r == null ? "nomatch:" : ("match:" + r + ", "))
+                                 + buf);
+            }
+
+            if (r == null) {
+                if (cursor == result.length()) {
+                    result.append(text.charAt(start++));
+                }
+                ++cursor;
+            } else {
+                // resultPad is length of result to right of cursor; >= 0
+                int resultPad = result.length() - cursor;
+                char[] tail = null;
+                if (r.getKeyLength() > resultPad) {
+                    start += r.getKeyLength() - resultPad;
+                } else if (r.getKeyLength() < resultPad) {
+                    tail = new char[resultPad - r.getKeyLength()];
+                    result.getChars(cursor + r.getKeyLength(), result.length(),
+                                    tail, 0);
+                }
+                result.setLength(cursor);
+                result.append(r.getOutput());
+                if (tail != null) {
+                    result.append(tail);
+                }
+                cursor += r.getCursorPos();
+            }
+
+            if (DEBUG) {
+                StringBuffer buf = new StringBuffer(
+                        result.toString() + '#' + text.substring(start, limit));
+                buf.insert(cursor <= result.length()
+                           ? cursor : (cursor + 1),
+                           '|');
+                System.err.println(" => " + buf);
+            }
+        }
+    }
+
+    /**
+     * Transliterates a segment of a string.  <code>Transliterator</code> API.
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @return The new limit index
+     */
+    public int transliterate(Replaceable text, int start, int limit) {
+        /* When using Replaceable, the algorithm is simpler, since we don't have
+         * two separate buffers.  We keep start and limit fixed the entire time,
+         * relative to the text -- limit may move numerically if text is
+         * inserted or removed.  The cursor moves from start to limit, with
+         * replacements happening under it.
+         *
+         * Example: rules 1. ab>x|y
+         *                2. yc>z
+         *
+         * |eabcd   start - no match, advance cursor
+         * e|abcd   match rule 1 - change text & adjust cursor
+         * ex|ycd   match rule 2 - change text & adjust cursor
+         * exz|d    no match, advance cursor
+         * exzd|    done
+         */
+        int cursor = start;
+        while (cursor < limit) {
+            TransliterationRule r = data.ruleSet.findMatch(text, start, limit,
+                                                      cursor, data.setVariables, getFilter());
+            if (r == null) {
+                ++cursor;
+            } else {
+                text.replace(cursor, cursor + r.getKeyLength(), r.getOutput());
+                limit += r.getOutput().length() - r.getKeyLength();
+                cursor += r.getCursorPos();
+            }
+        }
+        return limit;
+    }
+
+    /**
+     * Implements {@link Transliterator#handleKeyboardTransliterate}.
+     */
+    protected void handleKeyboardTransliterate(Replaceable text,
+                                               int[] index) {
+        int start = index[START];
+        int limit = index[LIMIT];
+        int cursor = index[CURSOR];
+
+        if (DEBUG) {
+            System.out.print("\"" +
+                escape(rsubstring(text, start, cursor)) + '|' +
+                escape(rsubstring(text, cursor, limit)) + "\"");
+        }
+
+        boolean partial[] = new boolean[1];
+
+        while (cursor < limit) {
+            TransliterationRule r = data.ruleSet.findIncrementalMatch(
+                    text, start, limit, cursor, data.setVariables, partial, getFilter());
+            /* If we match a rule then apply it by replacing the key
+             * with the rule output and repositioning the cursor
+             * appropriately.  If we get a partial match, then we
+             * can't do anything without more text; return with the
+             * cursor at the current position.  If we get null, then
+             * there is no match at this position, and we can advance
+             * the cursor.
+             */
+            if (r == null) {
+                if (partial[0]) {
+                    break;
+                } else {
+                    ++cursor;
+                }
+            } else {
+                text.replace(cursor, cursor + r.getKeyLength(), r.getOutput());
+                limit += r.getOutput().length() - r.getKeyLength();
+                cursor += r.getCursorPos();
+            }
+        }
+
+        if (DEBUG) {
+            System.out.println(" -> \"" +
+                escape(rsubstring(text, start, cursor)) + '|' + 
+                escape(rsubstring(text, cursor, cursor)) + '|' + 
+                escape(rsubstring(text, cursor, limit)) + "\"");
+        }
+
+        index[LIMIT] = limit;
+        index[CURSOR] = cursor;
+    }
+
+    /**
+     * Returns the length of the longest context required by this transliterator.
+     * This is <em>preceding</em> context.
+     * @return Maximum number of preceding context characters this
+     * transliterator needs to examine
+     */
+    protected int getMaximumContextLength() {
+        return data.ruleSet.getMaximumContextLength();
+    }
+
+
+    /**
+     * FOR DEBUGGING: Return a substring of a Replaceable.
+     */
+    private static String rsubstring(Replaceable r, int start, int limit) {
+        StringBuffer buf = new StringBuffer();
+        while (start < limit) {
+            buf.append(r.charAt(start++));
+        }
+        return buf.toString();
+    }
+
+    /**
+     * FOR DEBUGGING: Escape non-ASCII characters as Unicode.
+     */
+    private static final String escape(String s) {
+        StringBuffer buf = new StringBuffer();
+        for (int i=0; i<s.length(); ++i) {
+            char c = s.charAt(i);
+            if (c >= ' ' && c <= 0x007F) {
+                if (c == '\\') {
+                    buf.append("\\\\"); // That is, "\\"
+                } else {
+                    buf.append(c);
+                }
+            } else {
+                buf.append("\\u");
+                if (c < 0x1000) {
+                    buf.append('0');
+                    if (c < 0x100) {
+                        buf.append('0');
+                        if (c < 0x10) {
+                            buf.append('0');
+                        }
+                    }
+                }
+                buf.append(Integer.toHexString(c));
+            }
+        }
+        return buf.toString();
+    }
+
+
+
+
+
+    static class Data {
+        public Data() {
+            variableNames = new Hashtable();
+            setVariables = new Hashtable();
+            ruleSet = new TransliterationRuleSet();
+        }
+
+        /**
+         * Rule table.  May be empty.
+         */
+        public TransliterationRuleSet ruleSet;
+
+        /**
+         * Map variable name (String) to variable (Character).  A variable
+         * name may correspond to a single literal character, in which
+         * case the character is stored in this hash.  It may also
+         * correspond to a UnicodeSet, in which case a character is
+         * again stored in this hash, but the character is a stand-in: it
+         * is a key for a secondary lookup in data.setVariables.  The stand-in
+         * also represents the UnicodeSet in the stored rules.
+         */
+        public Hashtable variableNames;
+
+        /**
+         * Map category variable (Character) to set (UnicodeSet).
+         * Variables that correspond to a set of characters are mapped
+         * from variable name to a stand-in character in data.variableNames.
+         * The stand-in then serves as a key in this hash to lookup the
+         * actual UnicodeSet object.  In addition, the stand-in is
+         * stored in the rule text to represent the set of characters.
+         */
+        public Hashtable setVariables;
+    }
+
+
+
+
+
+
+    private static class Parser {
+        private String rules;
+
+        private int direction;
+
+        private Data data;
+
+        /**
+         * The next available stand-in for variables.  This starts at some point in
+         * the private use area (discovered dynamically) and increments up toward
+         * <code>variableLimit</code>.  At any point during parsing, available
+         * variables are <code>variableNext..variableLimit-1</code>.
+         */
+        private char variableNext;
+
+        /**
+         * The last available stand-in for variables.  This is discovered
+         * dynamically.  At any point during parsing, available variables are
+         * <code>variableNext..variableLimit-1</code>.
+         */
+        private char variableLimit;
+
+        // Operators
+        private static final char VARIABLE_DEF_OP   = '=';
+        private static final char FORWARD_RULE_OP   = '>';
+        private static final char REVERSE_RULE_OP   = '<';
+
+        private static final String OPERATORS = "=><";
+
+        // Other special characters
+        private static final char QUOTE               = '\'';
+        private static final char VARIABLE_REF_OPEN   = '{';
+        private static final char VARIABLE_REF_CLOSE  = '}';
+        private static final char CONTEXT_OPEN        = '[';
+        private static final char CONTEXT_CLOSE       = ']';
+        private static final char CURSOR_POS          = '|';
+        private static final char RULE_COMMENT_CHAR   = '#';
+
+        /**
+         * Specials must be quoted in rules to be used as literals.
+         * Specials may not occur in variable names.
+         */
+        private static final String SPECIALS = "'{}[]|#" + OPERATORS;
+
+        /**
+         * Specials that must be quoted in variable definitions.
+         */
+        private static final String DEF_SPECIALS = "'{}";
+
+        /**
+         * @param rules list of rules, separated by newline characters
+         * @exception IllegalArgumentException if there is a syntax error in the
+         * rules
+         */
+        public Parser(String rules, int direction) {
+            this.rules = rules;
+            this.direction = direction;
+            data = new Data();
+            parseRules();
+        }
+
+        public Data getData() {
+            return data;
+        }
+
+        /**
+         * Parse the given string as a sequence of rules, separated by newline
+         * characters ('\n'), and cause this object to implement those rules.  Any
+         * previous rules are discarded.  Typically this method is called exactly
+         * once, during construction.
+         * @exception IllegalArgumentException if there is a syntax error in the
+         * rules
+         */
+        private void parseRules() {
+            determineVariableRange();
+
+            int n = rules.length();
+            int i = 0;
+            while (i<n) {
+                int limit = rules.indexOf('\n', i);
+
+                // Recognize "\\\n" as an escaped "\n"
+                while (limit>0 && rules.charAt(limit-1) == '\\') {
+                    limit = rules.indexOf('\n', limit+1);
+                }
+
+                if (limit == -1) {
+                    limit = n;
+                }
+                // Skip over empty lines and line starting with #
+                if (limit > i && rules.charAt(i) != RULE_COMMENT_CHAR) {
+                    applyRule(i, limit);
+                }
+                i = limit + 1;
+            }
+
+            data.ruleSet.freeze();
+        }
+
+        /**
+         * Parse the given substring as a rule, and append it to the rules currently
+         * represented in this object.
+         * @param start the beginning index, inclusive; <code>0 <= start
+         * <= limit</code>.
+         * @param limit the ending index, exclusive; <code>start <= limit
+         * <= rules.length()</code>.
+         * @exception IllegalArgumentException if there is a syntax error in the
+         * rules
+         */
+        private void applyRule(int start, int limit) {
+            /* General description of parsing: Initially, rules contain two types of
+             * quoted characters.  First, there are variable references, such as
+             * "{alpha}".  Second, there are quotes, such as "'<'" or "''".  One of
+             * the first steps in parsing a rule is to resolve such quoted matter.
+             * Quotes are removed early, leaving unquoted literal matter.  Variable
+             * references are resolved and replaced by single characters.  In some
+             * instances these characters represent themselves; in others, they
+             * stand for categories of characters.  Character categories are either
+             * predefined (e.g., "{Lu}"), or are defined by the user using a
+             * statement (e.g., "vowels:aeiouAEIOU").
+             *
+             * Another early step in parsing is to split each rule into component
+             * pieces.  These pieces are, for every rule, a left-hand side, a right-
+             * hand side, and an operator.  The left- and right-hand sides may not
+             * be empty, except for the output patterns of forward and reverse
+             * rules.  In addition to this partitioning, the match patterns of
+             * forward and reverse rules must be partitioned into antecontext,
+             * postcontext, and literal pattern, where the context portions may or
+             * may not be present.  Finally, output patterns must have the cursor
+             * indicator '|' detected and removed, with its position recorded.
+             *
+             * Quote removal, variable resolution, and sub-pattern splitting must
+             * all happen at once.  This is due chiefly to the quoting mechanism,
+             * which allows special characters to appear at arbitrary positions in
+             * the final unquoted text.  (For this reason, alteration of the rule
+             * language is somewhat clumsy; it entails reassessment and revision of
+             * the parsing methods as a whole.)
+             *
+             * After this processing of rules is complete, the final end products
+             * are unquoted pieces of text of various types, and an integer cursor
+             * position, if one is specified.  These processed raw materials are now
+             * easy to deal with; other classes such as UnicodeSet and
+             * TransliterationRule need know nothing of quoting or variables.
+             */
+            StringBuffer left = new StringBuffer();
+            StringBuffer right = new StringBuffer();
+            StringBuffer anteContext = new StringBuffer();
+            StringBuffer postContext = new StringBuffer();
+            int cursorPos[] = new int[1];
+
+            char operator = parseRule(start, limit, left, right,
+                                      anteContext, postContext, cursorPos);
+
+            switch (operator) {
+            case VARIABLE_DEF_OP:
+                applyVariableDef(left.toString(), right.toString());
+                break;
+            case FORWARD_RULE_OP:
+                if (direction == FORWARD) {
+                    data.ruleSet.addRule(new TransliterationRule(
+                                             left.toString(), right.toString(),
+                                             anteContext.toString(), postContext.toString(),
+                                             cursorPos[0]));
+                } // otherwise ignore the rule; it's not the direction we want
+                break;
+            case REVERSE_RULE_OP:
+                if (direction == REVERSE) {
+                    data.ruleSet.addRule(new TransliterationRule(
+                                             right.toString(), left.toString(),
+                                             anteContext.toString(), postContext.toString(),
+                                             cursorPos[0]));
+                } // otherwise ignore the rule; it's not the direction we want
+                break;
+            }
+        }
+
+        /**
+         * Add a variable definition.
+         * @param name the name of the variable.  It must not already be defined.
+         * @param pattern the value of the variable.  It may be a single character
+         * or a pattern describing a character set.
+         * @exception IllegalArgumentException if there is a syntax error
+         */
+        private final void applyVariableDef(String name, String pattern) {
+            validateVariableName(name);
+            if (data.variableNames.get(name) != null) {
+                throw new IllegalArgumentException("Duplicate variable definition: "
+                                                   + name + '=' + pattern);
+            }
+//!         if (UnicodeSet.getCategoryID(name) >= 0) {
+//!             throw new IllegalArgumentException("Reserved variable name: "
+//!                                                + name);
+//!         }
+            if (pattern.length() < 1) {
+                throw new IllegalArgumentException("Variable definition missing: "
+                                                   + name);
+            }
+            if (pattern.length() == 1) {
+                // Got a single character variable definition
+                data.variableNames.put(name, new Character(pattern.charAt(0)));
+            } else {
+                // Got more than one character; parse it as a category
+                if (variableNext >= variableLimit) {
+                    throw new RuntimeException("Private use variables exhausted");
+                }
+                Character c = new Character(variableNext++);
+                data.variableNames.put(name, c);
+                data.setVariables.put(c, new UnicodeSet(pattern));
+            }
+        }
+
+        /**
+         * Given a rule, parses it into three pieces: The left side, the right side,
+         * and the operator.  Returns the operator.  Quotes and variable references
+         * are resolved; the otuput text in all <code>StringBuffer</code> parameters
+         * is literal text.  This method delegates to other parsing methods to
+         * handle the match pattern, output pattern, and other sub-patterns in the
+         * rule.
+         * @param start the beginning index, inclusive; <code>0 <= start
+         * <= limit</code>.
+         * @param limit the ending index, exclusive; <code>start <= limit
+         * <= rules.length()</code>.
+         * @param left left side of rule is appended to this buffer
+         * with the quotes removed and variables resolved
+         * @param right right side of rule is appended to this buffer
+         * with the quotes removed and variables resolved
+         * @param anteContext the preceding context of the match pattern,
+         * if there is one, is appended to this buffer
+         * @param postContext the following context of the match pattern,
+         * if there is one, is appended to this buffer
+         * @param cursorPos if there is a cursor in the output pattern, its
+         * offset is stored in <code>cursorPos[0]</code>
+         * @return The operator character, one of the characters in OPERATORS.
+         */
+        private char parseRule(int start, int limit,
+                               StringBuffer left, StringBuffer right,
+                               StringBuffer anteContext,
+                               StringBuffer postContext,
+                               int[] cursorPos) {
+            if (false) {
+                System.err.println("Parsing " + rules.substring(start, limit));
+            }
+            /* Parse the rule into three pieces -- left, operator, and right,
+             * parsing out quotes.  The result is that left and right will have
+             * unquoted text.  E.g., "gt<'>'" will have right = ">".  Unquoted
+             * operators throw an exception.  Two quotes inside or outside
+             * quotes indicates a quote literal.  E.g., "o''clock" -> "o'clock".
+             */
+            int i = quotedIndexOf(rules, start, limit, OPERATORS);
+            if (i < 0) {
+                throw new IllegalArgumentException(
+                              "Syntax error: "
+                              + rules.substring(start, limit));
+            }
+            char c = rules.charAt(i);
+            switch (c) {
+            case FORWARD_RULE_OP:
+                if (i == start) {
+                    throw new IllegalArgumentException(
+                                  "Empty left side: "
+                                  + rules.substring(start, limit));
+                }
+                parseMatchPattern(start, i, left, anteContext, postContext);
+                if (i != (limit-1)) {
+                    parseOutputPattern(i+1, limit, right, cursorPos);
+                }
+                break;
+            case REVERSE_RULE_OP:
+                if (i == (limit-1)) {
+                    throw new IllegalArgumentException(
+                                  "Empty right side: "
+                                  + rules.substring(start, limit));
+                }
+                if (i != start) {
+                    parseOutputPattern(start, i, left, cursorPos);
+                }
+                parseMatchPattern(i+1, limit, right, anteContext, postContext);
+                break;
+            default:
+                if (i == start || i == (limit-1)) {
+                    throw new IllegalArgumentException(
+                                  "Empty left or right side: "
+                                  + rules.substring(start, limit));
+                }
+                parseSubPattern(start, i, left);
+                parseDefPattern(i+1, limit, right);
+                break;
+            }
+            return c;
+        }
+
+        /**
+         * Parses the match pattern of a forward or reverse rule.  Given the raw
+         * match pattern, return the match text and the context on both sides, if
+         * any.  Resolves all quotes and variables.
+         * @param start the beginning index, inclusive; <code>0 <= start
+         * <= limit</code>.
+         * @param limit the ending index, exclusive; <code>start <= limit
+         * <= rules.length()</code>.
+         * @param text the key to be matched will be appended to this buffer
+         * @param anteContext the preceding context, if any, will be appended
+         * to this buffer.
+         * @param postContext the following context, if any, will be appended
+         * to this buffer.
+         */
+        private void parseMatchPattern(int start, int limit,
+                                       StringBuffer text,
+                                       StringBuffer anteContext,
+                                       StringBuffer postContext) {
+            if (start >= limit) {
+                throw new IllegalArgumentException(
+                              "Empty expression in rule: "
+                              + rules.substring(start, limit));
+            }
+            if (anteContext != null) {
+                // Ignore optional opening and closing context characters
+                if (rules.charAt(start) == CONTEXT_OPEN) {
+                    ++start;
+                }
+                if (rules.charAt(limit-1) == CONTEXT_CLOSE) {
+                    --limit;
+                }
+                // The four possibilities are:
+                //             key
+                // anteContext]key
+                // anteContext]key[postContext
+                //             key[postContext
+                int ante = quotedIndexOf(rules, start, limit, String.valueOf(CONTEXT_CLOSE));
+                int post = quotedIndexOf(rules, start, limit, String.valueOf(CONTEXT_OPEN));
+                if (ante >= 0 && post >= 0 && ante > post) {
+                    throw new IllegalArgumentException(
+                                  "Syntax error in context specifier: "
+                                  + rules.substring(start, limit));
+                }
+                if (ante >= 0) {
+                    parseSubPattern(start, ante, anteContext);
+                    start = ante+1;
+                }
+                if (post >= 0) {
+                    parseSubPattern(post+1, limit, postContext);
+                    limit = post;
+                }
+            }
+            parseSubPattern(start, limit, text);
+        }
+
+        private final void parseSubPattern(int start, int limit,
+                                           StringBuffer text) {
+            parseSubPattern(start, limit, text, null, SPECIALS);
+        }
+
+        /**
+         * Parse a variable definition sub pattern.  This kind of sub
+         * pattern differs in the set of characters that are considered
+         * special.  In particular, the '[' and ']' characters are not
+         * special, since these are used in UnicodeSet patterns.
+         */
+        private final void parseDefPattern(int start, int limit,
+                                           StringBuffer text) {
+            parseSubPattern(start, limit, text, null, DEF_SPECIALS);
+        }
+
+        /**
+         * Parses the output pattern of a forward or reverse rule.  Given the
+         * output pattern, return the output text and the position of the cursor,
+         * if any.  Resolves all quotes and variables.
+         * @param rules the string to be parsed
+         * @param start the beginning index, inclusive; <code>0 <= start
+         * <= limit</code>.
+         * @param limit the ending index, exclusive; <code>start <= limit
+         * <= rules.length()</code>.
+         * @param text the output text will be appended to this buffer
+         * @param cursorPos if this parameter is not null, then cursorPos[0]
+         * will be set to the cursor position, or -1 if there is none.  If this
+         * parameter is null, then cursors will be disallowed.
+         */
+        private final void parseOutputPattern(int start, int limit,
+                                              StringBuffer text,
+                                              int[] cursorPos) {
+            parseSubPattern(start, limit, text, cursorPos, SPECIALS);
+        }
+
+        /**
+         * Parses a sub-pattern of a rule.  Return the text and the position of the cursor,
+         * if any.  Resolves all quotes and variables.
+         * @param rules the string to be parsed
+         * @param start the beginning index, inclusive; <code>0 <= start
+         * <= limit</code>.
+         * @param limit the ending index, exclusive; <code>start <= limit
+         * <= rules.length()</code>.
+         * @param text the output text will be appended to this buffer
+         * @param cursorPos if this parameter is not null, then cursorPos[0]
+         * will be set to the cursor position, or -1 if there is none.  If this
+         * parameter is null, then cursors will be disallowed.
+         * @param specials characters that must be quoted; typically either
+         * SPECIALS or DEF_SPECIALS.
+         */
+        private void parseSubPattern(int start, int limit,
+                                     StringBuffer text,
+                                     int[] cursorPos,
+                                     String specials) {
+            boolean inQuote = false;
+
+            if (start >= limit) {
+                throw new IllegalArgumentException("Empty expression in rule");
+            }
+            if (cursorPos != null) {
+                cursorPos[0] = -1;
+            }
+            for (int i=start; i<limit; ++i) {
+                char c = rules.charAt(i);
+                if (c == QUOTE) {
+                    // Check for double quote
+                    if ((i+1) < limit
+                        && rules.charAt(i+1) == QUOTE) {
+                        text.append(QUOTE);
+                        ++i; // Skip over both quotes
+                    } else {
+                        inQuote = !inQuote;
+                    }
+                } else if (inQuote) {
+                    text.append(c);
+                } else if (c == VARIABLE_REF_OPEN) {
+                    ++i;
+                    int j = rules.indexOf(VARIABLE_REF_CLOSE, i);
+                    if (i == j || j < 0) { // empty or unterminated
+                        throw new IllegalArgumentException("Illegal variable reference: "
+                                                           + rules.substring(start, limit));
+                    }
+                    String name = rules.substring(i, j);
+                    validateVariableName(name);
+                    text.append(getVariableDef(name).charValue());
+                    i = j;
+                } else if (c == CURSOR_POS && cursorPos != null) {
+                    if (cursorPos[0] >= 0) {
+                        throw new IllegalArgumentException("Multiple cursors: "
+                                                           + rules.substring(start, limit));
+                    }
+                    cursorPos[0] = text.length();
+                } else if (specials.indexOf(c) >= 0) {
+                    throw new IllegalArgumentException("Unquoted special character: "
+                                                       + rules.substring(start, limit));
+                } else {
+                    text.append(c);
+                }
+            }
+        }
+
+        private static void validateVariableName(String name) {
+            if (indexOf(name, SPECIALS) >= 0) {
+                throw new IllegalArgumentException(
+                              "Special character in variable name: "
+                              + name);
+            }
+        }
+
+        /**
+         * Returns the single character value of the given variable name.  Defined
+         * names are recognized.
+         *
+         * NO LONGER SUPPORTED:
+         * If a Unicode category name is given, a standard character variable
+         * in the range firstCategoryVariable to lastCategoryVariable is returned,
+         * with value firstCategoryVariable + n, where n is the category
+         * number.
+         * @exception IllegalArgumentException if the name is unknown.
+         */
+        private Character getVariableDef(String name) {
+            Character ch = (Character) data.variableNames.get(name);
+//!         if (ch == null) {
+//!             int id = UnicodeSet.getCategoryID(name);
+//!             if (id >= 0) {
+//!                 ch = new Character((char) (firstCategoryVariable + id));
+//!                 data.variableNames.put(name, ch);
+//!                 data.setVariables.put(ch, new UnicodeSet(id));
+//!             }
+//!         }
+            if (ch == null) {
+                throw new IllegalArgumentException("Undefined variable: "
+                                                   + name);
+            }
+            return ch;
+        }
+
+        /**
+         * Determines what part of the private use region of Unicode we can use for
+         * variable stand-ins.  The correct way to do this is as follows: Parse each
+         * rule, and for forward and reverse rules, take the FROM expression, and
+         * make a hash of all characters used.  The TO expression should be ignored.
+         * When done, everything not in the hash is available for use.  In practice,
+         * this method may employ some other algorithm for improved speed.
+         */
+        private final void determineVariableRange() {
+            Range r = new Range('\uE000', 0x1900); // Private use area
+            r = r.largestUnusedSubrange(rules);
+            
+            if (r == null) {
+                throw new RuntimeException(
+                    "No private use characters available for variables");
+            }
+
+            variableNext = r.start;
+            variableLimit = (char) (r.start + r.length);
+
+            if (variableNext >= variableLimit) {
+                throw new RuntimeException(
+                        "Too few private use characters available for variables");
+            }
+        }
+
+        /**
+         * Returns the index of the first character in a set, ignoring quoted text.
+         * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
+         * found by a search for "h".  Unlike String.indexOf(), this method searches
+         * not for a single character, but for any character of the string
+         * <code>setOfChars</code>.
+         * @param text text to be searched
+         * @param start the beginning index, inclusive; <code>0 <= start
+         * <= limit</code>.
+         * @param limit the ending index, exclusive; <code>start <= limit
+         * <= text.length()</code>.
+         * @param setOfChars string with one or more distinct characters
+         * @return Offset of the first character in <code>setOfChars</code>
+         * found, or -1 if not found.
+         * @see #indexOf
+         */
+        private static int quotedIndexOf(String text, int start, int limit,
+                                         String setOfChars) {
+            for (int i=start; i<limit; ++i) {
+                char c = text.charAt(i);
+                if (c == QUOTE) {
+                    while (++i < limit
+                           && text.charAt(i) != QUOTE) {}
+                } else if (setOfChars.indexOf(c) >= 0) {
+                    return i;
+                }
+            }
+            return -1;
+        }
+
+        /**
+         * Returns the index of the first character in a set.  Unlike
+         * String.indexOf(), this method searches not for a single character, but
+         * for any character of the string <code>setOfChars</code>.
+         * @param text text to be searched
+         * @param start the beginning index, inclusive; <code>0 <= start
+         * <= limit</code>.
+         * @param limit the ending index, exclusive; <code>start <= limit
+         * <= text.length()</code>.
+         * @param setOfChars string with one or more distinct characters
+         * @return Offset of the first character in <code>setOfChars</code>
+         * found, or -1 if not found.
+         * @see #quotedIndexOf
+         */
+        private static int indexOf(String text, int start, int limit,
+                                   String setOfChars) {
+            for (int i=start; i<limit; ++i) {
+                if (setOfChars.indexOf(text.charAt(i)) >= 0) {
+                    return i;
+                }
+            }
+            return -1;
+        }
+
+        /**
+         * Returns the index of the first character in a set.  Unlike
+         * String.indexOf(), this method searches not for a single character, but
+         * for any character of the string <code>setOfChars</code>.
+         * @param text text to be searched
+         * @param setOfChars string with one or more distinct characters
+         * @return Offset of the first character in <code>setOfChars</code>
+         * found, or -1 if not found.
+         * @see #quotedIndexOf
+         */
+        private static int indexOf(String text, String setOfChars) {
+            return indexOf(text, 0, text.length(), setOfChars);
+        }
+
+
+
+        /**
+         * A range of Unicode characters.  Support the operations of testing for
+         * inclusion (does this range contain this character?) and splitting.
+         * Splitting involves breaking a range into two smaller ranges around a
+         * character inside the original range.  The split character is not included
+         * in either range.  If the split character is at either extreme end of the
+         * range, one of the split products is an empty range.
+         *
+         * This class is used internally to determine the largest available private
+         * use character range for variable stand-ins.
+         */
+        private static class Range implements Cloneable {
+            char start;
+            int length;
+
+            Range(char start, int length) {
+                this.start = start;
+                this.length = length;
+            }
+
+            public Object clone() {
+                return new Range(start, length);
+            }
+
+            boolean contains(char c) {
+                return c >= start && (c - start) < length;
+            }
+
+            /**
+             * Assume that contains(c) is true.  Split this range into two new
+             * ranges around the character c.  Make this range one of the new ranges
+             * (modify it in place) and return the other new range.  The character
+             * itself is not included in either range.  If the split results in an
+             * empty range (that is, if c == start or c == start + length - 1) then
+             * return null.
+             */
+            Range split(char c) {
+                if (c == start) {
+                    ++start;
+                    --length;
+                    return null;
+                } else if (c - start == length - 1) {
+                    --length;
+                    return null;
+                } else {
+                    ++c;
+                    Range r = new Range(c, start + length - c);
+                    length = --c - start;
+                    return r;
+                }
+            }
+
+            /**
+             * Finds the largest unused subrange by the given string.  A
+             * subrange is unused by a string if the string contains no
+             * characters in that range.  If the given string contains no
+             * characters in this range, then this range itself is
+             * returned.
+             */
+            Range largestUnusedSubrange(String str) {
+                int n = str.length();
+
+                Vector v = new Vector(1);
+                v.addElement(clone());
+                for (int i=0; i<n; ++i) {
+                    char c = str.charAt(i);
+                    if (contains(c)) {
+                        for (int j=0; j<v.size(); ++j) {
+                            Range r = (Range) v.elementAt(j);
+                            if (r.contains(c)) {
+                                r = r.split(c);
+                                if (r != null) {
+                                    v.addElement(r);
+                                }
+                                break;
+                            }
+                        }
+                    }
+                }
+
+                Range bestRange = null;
+                for (int j=0; j<v.size(); ++j) {
+                    Range r = (Range) v.elementAt(j);
+                    if (bestRange == null || r.length > bestRange.length) {
+                        bestRange = r;
+                    }
+                }
+
+                return bestRange;
+            }
+        }
+    }
+}
diff --git a/icu4j/src/com/ibm/text/TransliterationRule.java b/icu4j/src/com/ibm/text/TransliterationRule.java
new file mode 100755
index 00000000000..383c77ed340
--- /dev/null
+++ b/icu4j/src/com/ibm/text/TransliterationRule.java
@@ -0,0 +1,530 @@
+package com.ibm.text;
+
+import java.util.Dictionary;
+
+/**
+ * A transliteration rule used by
+ * <code>RuleBasedTransliterator</code>.
+ * <code>TransliterationRule</code> is an immutable object.
+ *
+ * <p>A rule consists of an input pattern and an output string.  When
+ * the input pattern is matched, the output string is emitted.  The
+ * input pattern consists of zero or more characters which are matched
+ * exactly (the key) and optional context.  Context must match if it
+ * is specified.  Context may be specified before the key, after the
+ * key, or both.  The key, preceding context, and following context
+ * may contain variables.  Variables represent a set of Unicode
+ * characters, such as the letters <i>a</i> through <i>z</i>.
+ * Variables are detected by looking up each character in a supplied
+ * variable list to see if it has been so defined. 
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+class TransliterationRule {
+    /**
+     * Constant returned by <code>getMatchDegree()</code> indicating a mismatch
+     * between the text and this rule.  One or more characters of the context or
+     * key do not match the text.
+     * @see #getMatchDegree
+     */
+    public static final int MISMATCH      = 0;
+
+    /**
+     * Constant returned by <code>getMatchDegree()</code> indicating a partial
+     * match between the text and this rule.  All characters of the text match
+     * the corresponding context or key, but more characters are required for a
+     * complete match.  There are some key or context characters at the end of
+     * the pattern that remain unmatched because the text isn't long enough.
+     * @see #getMatchDegree
+     */
+    public static final int PARTIAL_MATCH = 1;
+
+    /**
+     * Constant returned by <code>getMatchDegree()</code> indicating a complete
+     * match between the text and this rule.  The text matches all context and
+     * key characters.
+     * @see #getMatchDegree
+     */
+    public static final int FULL_MATCH    = 2;
+
+    /**
+     * The string that must be matched.
+     */
+    private String key;
+
+    /**
+     * The string that is emitted if the key, anteContext, and postContext
+     * are matched.
+     */
+    private String output;
+
+    /**
+     * The string that must match before the key.  Must not be the empty string.
+     * May be null; if null, then there is no matching requirement before the
+     * key.
+     */
+    private String anteContext;
+
+    /**
+     * The string that must match after the key.  Must not be the empty string.
+     * May be null; if null, then there is no matching requirement after the
+     * key.
+     */
+    private String postContext;
+
+    /**
+     * The position of the cursor after emitting the output string, from 0 to
+     * output.length().  For most rules with no special cursor specification,
+     * the cursorPos is output.length().
+     */
+    private int cursorPos;
+
+    /**
+     * A string used to implement masks().
+     */
+    private String maskKey;
+
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    /**
+     * Construct a new rule with the given key, output text, and other
+     * attributes.  Zero, one, or two context strings may be specified.  A
+     * cursor position may be specified for the output text.
+     * @param key the string to match
+     * @param output the string to produce when the <code>key</code> is seen
+     * @param anteContext if not null and not empty, then it must be matched
+     * before the <code>key</code>
+     * @param postContext if not null and not empty, then it must be matched
+     * after the <code>key</code>
+     * @param cursorPos a position for the cursor after the <code>output</code>
+     * is emitted.  If less than zero, then the cursor is placed after the
+     * <code>output</code>; that is, -1 is equivalent to
+     * <code>output.length()</code>.  If greater than
+     * <code>output.length()</code> then an exception is thrown.
+     * @exception IllegalArgumentException if the cursor position is out of
+     * range.
+     */
+    public TransliterationRule(String key, String output,
+                               String anteContext, String postContext,
+                               int cursorPos) {
+        this.key = key;
+        this.output = output;
+        this.anteContext = (anteContext != null && anteContext.length() > 0)
+            ? anteContext : null;
+        this.postContext = (postContext != null && postContext.length() > 0)
+            ? postContext : null;
+        this.cursorPos = cursorPos < 0 ? output.length() : cursorPos;
+        if (this.cursorPos > output.length()) {
+            throw new IllegalArgumentException("Illegal cursor position");
+        }
+
+        /* The mask key is needed when we are adding individual rules to a rule
+         * set, for performance.  Here are the numbers: Without mask key, 13.0
+         * seconds.  With mask key, 6.2 seconds.  However, once the rules have
+         * been added to the set, then they can be discarded to free up space.
+         * This is what the freeze() method does.  After freeze() has been
+         * called, the method masks() must NOT be called.
+         */
+        maskKey = key;
+        if (postContext != null) {
+            maskKey += postContext;
+        }
+    }
+
+    /**
+     * Return the length of the key.  Equivalent to <code>getKey().length()</code>.
+     * @return the length of the match key.
+     */
+    public int getKeyLength() {
+        return key.length();
+    }
+
+    /**
+     * Return the key.
+     * @return the match key.
+     */
+    public String getKey() {
+        return key;
+    }
+
+    /**
+     * Return the output string.
+     * @return the output string.
+     */
+    public String getOutput() {
+        return output;
+    }
+
+    /**
+     * Return the position of the cursor within the output string.
+     * @return a value from 0 to <code>getOutput().length()</code>, inclusive.
+     */
+    public int getCursorPos() {
+        return cursorPos;
+    }
+
+    /**
+     * Return the preceding context length.  This method is needed to
+     * support the <code>Transliterator</code> method
+     * <code>getMaximumContextLength()</code>.
+     */
+    public int getAnteContextLength() {
+        return anteContext == null ? 0 : anteContext.length();
+    }
+
+    /**
+     * Return true if this rule masks another rule.  If r1 masks r2 then
+     * r1 matches any input string that r2 matches.  If r1 masks r2 and r2 masks
+     * r1 then r1 == r2.  Examples: "a>x" masks "ab>y".  "a>x" masks "a[b]>y".
+     * "[c]a>x" masks "[dc]a>y".
+     *
+     * <p>This method must not be called after freeze() is called.
+     */
+    public boolean masks(TransliterationRule r2) {
+        /* There are three cases of masking.  In each instance, rule1
+         * masks rule2.
+         *
+         * 1. KEY mask: len(key1) < len(key2), key2 starts with key1.
+         *
+         * 2. PREFIX mask: key1 == key2, len(prefix1) < len(prefix2),
+         * prefix2 ends with prefix1, suffix2 starts with suffix1.
+         *
+         * 3. SUFFIX mask: key1 == key2, len(suffix1) < len(suffix2),
+         * prefix2 ends with prefix1, suffix2 starts with suffix1.
+         */
+
+        /* LIMITATION of the current mask algorithm: Some rule
+         * maskings are currently not detected.  For example,
+         * "{Lu}]a>x" masks "A]a>y".  To detect these sorts of masking,
+         * we need a subset operator on UnicodeSet objects, which we
+         * currently do not have.  This can be added later.
+         */
+        return ((maskKey.length() < r2.maskKey.length() &&
+                 r2.maskKey.startsWith(maskKey)) ||
+                (r2.anteContext != null && maskKey.equals(r2.maskKey) &&
+                 ((anteContext == null) ||
+                  (anteContext.length() < r2.anteContext.length() &&
+                   r2.anteContext.endsWith(anteContext)))));
+    }
+
+    /**
+     * Free up space.  Once this method is called, masks() must NOT be called.
+     * If it is called, an exception will be thrown.
+     */
+    public void freeze() {
+        maskKey = null;
+    }
+
+    /**
+     * Return a string representation of this object.
+     * @return string representation of this object
+     */
+    public String toString() {
+        return getClass().getName() + '['
+            + escape((anteContext != null ? ("[" + anteContext + ']') : "")
+            + key
+            + (postContext != null ? ("[" + postContext + ']') : "")
+            + " -> "
+            + (cursorPos < output.length()
+               ? (output.substring(0, cursorPos) + '|' + output.substring(cursorPos))
+               : output))
+            + ']';
+    }
+
+    /**
+     * Return true if this rule matches the given text.  The text being matched
+     * occupies a virtual buffer consisting of the contents of
+     * <code>result</code> concatenated to a substring of <code>text</code>.
+     * The substring is specified by <code>start</code> and <code>limit</code>.
+     * The value of <code>cursor</code> is an index into this virtual buffer,
+     * from 0 to the length of the buffer.  In terms of the parameters,
+     * <code>cursor</code> must be between 0 and <code>result.length() + limit -
+     * start</code>.
+     * @param text the untranslated text
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param result translated text so far
+     * @param cursor position at which to translate next, an offset into result.
+     * If greater than or equal to result.length(), represents offset start +
+     * cursor - result.length() into text.
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     */
+    public boolean matches(String text, int start, int limit,
+                           StringBuffer result, int cursor,
+                           Dictionary variables,
+                           UnicodeFilter filter) {
+        return
+            (anteContext == null
+             || regionMatches(text, start, limit, result,
+                              cursor - anteContext.length(),
+                              anteContext, variables, filter)) &&
+            regionMatches(text, start, limit, result, cursor,
+                          key, variables, filter) &&
+            (postContext == null
+             || regionMatches(text, start, limit, result,
+                              cursor + key.length(),
+                              postContext, variables, filter));
+    }
+
+    /**
+     * Return true if this rule matches the given text.
+     * @param text the text, both translated and untranslated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param cursor position at which to translate next, representing offset
+     * into text.  This value must be between <code>start</code> and
+     * <code>limit</code>.
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     */
+    public boolean matches(Replaceable text, int start, int limit,
+                           int cursor, Dictionary variables,
+                           UnicodeFilter filter) {
+        return
+            (anteContext == null
+             || regionMatches(text, start, limit, cursor - anteContext.length(),
+                              anteContext, variables, filter)) &&
+            regionMatches(text, start, limit, cursor,
+                          key, variables, filter) &&
+            (postContext == null
+             || regionMatches(text, start, limit, cursor + key.length(),
+                              postContext, variables, filter));
+    }
+
+    /**
+     * Return the degree of match between this rule and the given text.  The
+     * degree of match may be mismatch, a partial match, or a full match.  A
+     * mismatch means at least one character of the text does not match the
+     * context or key.  A partial match means some context and key characters
+     * match, but the text is not long enough to match all of them.  A full
+     * match means all context and key characters match.
+     * @param text the text, both translated and untranslated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param cursor position at which to translate next, representing offset
+     * into text.  This value must be between <code>start</code> and
+     * <code>limit</code>.
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return one of <code>MISMATCH</code>, <code>PARTIAL_MATCH</code>, or
+     * <code>FULL_MATCH</code>.
+     * @see #MISMATCH
+     * @see #PARTIAL_MATCH
+     * @see #FULL_MATCH
+     */
+    public int getMatchDegree(Replaceable text, int start, int limit,
+                              int cursor, Dictionary variables,
+                              UnicodeFilter filter) {
+        if (anteContext != null
+            && !regionMatches(text, start, limit, cursor - anteContext.length(),
+                              anteContext, variables, filter)) {
+            return MISMATCH;
+        }
+        int len = getRegionMatchLength(text, start, limit, cursor,
+                                       key, variables, filter);
+        if (len < 0) {
+            return MISMATCH;
+        }
+        if (len < key.length()) {
+            return PARTIAL_MATCH;
+        }
+        if (postContext == null) {
+            return FULL_MATCH;
+        }
+        len = getRegionMatchLength(text, start, limit,
+                                   cursor + key.length(),
+                                   postContext, variables, filter);
+        return (len < 0) ? MISMATCH
+                         : ((len == postContext.length()) ? FULL_MATCH
+                                                          : PARTIAL_MATCH);
+    }
+
+    /**
+     * Return true if a template matches the text.  The entire length of the
+     * template is compared to the text at the cursor.  As in
+     * <code>matches()</code>, the text being matched occupies a virtual buffer
+     * consisting of the contents of <code>result</code> concatenated to a
+     * substring of <code>text</code>.  See <code>matches()</code> for details.
+     * @param text the untranslated text
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param result translated text so far
+     * @param cursor position at which to translate next, an offset into result.
+     * If greater than or equal to result.length(), represents offset start +
+     * cursor - result.length() into text.
+     * @param template the text to match against.  All characters must match.
+     * @param variables a dictionary of variables mapping <code>Character</code>
+     * to <code>UnicodeSet</code>
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return true if there is a match
+     */
+    protected static boolean regionMatches(String text, int start, int limit,
+                                           StringBuffer result, int cursor,
+                                           String template,
+                                           Dictionary variables,
+                                           UnicodeFilter filter) {
+        int rlen = result.length();
+        if (cursor < 0
+            || (cursor + template.length()) > (rlen + limit - start)) {
+            return false;
+        }
+        for (int i=0; i<template.length(); ++i, ++cursor) {
+            if (!charMatches(template.charAt(i),
+                             cursor < rlen ? result.charAt(cursor)
+                                           : text.charAt(cursor - rlen + start),
+                             variables, filter)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Return true if a template matches the text.  The entire length of the
+     * template is compared to the text at the cursor.
+     * @param text the text, both translated and untranslated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param cursor position at which to translate next, representing offset
+     * into text.  This value must be between <code>start</code> and
+     * <code>limit</code>.
+     * @param template the text to match against.  All characters must match.
+     * @param variables a dictionary of variables mapping <code>Character</code>
+     * to <code>UnicodeSet</code>
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return true if there is a match
+     */
+    protected static boolean regionMatches(Replaceable text, int start, int limit,
+                                           int cursor,
+                                           String template, Dictionary variables,
+                                           UnicodeFilter filter) {
+        if (cursor < start
+            || (cursor + template.length()) > limit) {
+            return false;
+        }
+        for (int i=0; i<template.length(); ++i, ++cursor) {
+            if (!charMatches(template.charAt(i), text.charAt(cursor),
+                             variables, filter)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Return the number of characters of the text that match this rule.  If
+     * there is a mismatch, return -1.  If the text is not long enough to match
+     * any characters, return 0.
+     * @param text the text, both translated and untranslated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param cursor position at which to translate next, representing offset
+     * into text.  This value must be between <code>start</code> and
+     * <code>limit</code>.
+     * @param template the text to match against.  All characters must match.
+     * @param variables a dictionary of variables mapping <code>Character</code>
+     * to <code>UnicodeSet</code>
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return -1 if there is a mismatch, 0 if the text is not long enough to
+     * match any characters, otherwise the number of characters of text that
+     * match this rule.
+     */
+    protected static int getRegionMatchLength(Replaceable text, int start,
+                                              int limit, int cursor,
+                                              String template,
+                                              Dictionary variables,
+                                              UnicodeFilter filter) {
+        if (cursor < start) {
+            return -1;
+        }
+        int i;
+        for (i=0; i<template.length() && cursor<limit; ++i, ++cursor) {
+            if (!charMatches(template.charAt(i), text.charAt(cursor),
+                             variables, filter)) {
+                return -1;
+            }
+        }
+        return i;
+    }
+
+    /**
+     * Return true if the given key matches the given text.  This method
+     * accounts for the fact that the key character may represent a character
+     * set.  Note that the key and text characters may not be interchanged
+     * without altering the results.
+     * @param keyChar a character in the match key
+     * @param textChar a character in the text being transliterated
+     * @param variables a dictionary of variables mapping <code>Character</code>
+     * to <code>UnicodeSet</code>
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     */
+    protected static boolean charMatches(char keyChar, char textChar,
+                                         Dictionary variables, UnicodeFilter filter) {
+        UnicodeSet set = null;
+        return (filter == null || filter.isIn(textChar)) &&
+            ((set = (UnicodeSet) variables.get(new Character(keyChar)))
+             == null) ?
+            keyChar == textChar : set.contains(textChar);
+    }
+
+    /**
+     * Escape non-ASCII characters as Unicode.
+     */
+    public static final String escape(String s) {
+        StringBuffer buf = new StringBuffer();
+        for (int i=0; i<s.length(); ++i) {
+            char c = s.charAt(i);
+            if (c >= ' ' && c <= 0x007F) {
+                buf.append(c);
+            } else {
+                buf.append("\\u");
+                if (c < 0x1000) {
+                    buf.append('0');
+                    if (c < 0x100) {
+                        buf.append('0');
+                        if (c < 0x10) {
+                            buf.append('0');
+                        }
+                    }
+                }
+                buf.append(Integer.toHexString(c));
+            }
+        }
+        return buf.toString();
+    }
+}
diff --git a/icu4j/src/com/ibm/text/TransliterationRuleSet.java b/icu4j/src/com/ibm/text/TransliterationRuleSet.java
new file mode 100755
index 00000000000..d57bf75464a
--- /dev/null
+++ b/icu4j/src/com/ibm/text/TransliterationRuleSet.java
@@ -0,0 +1,218 @@
+package com.ibm.text;
+
+import java.util.*;
+
+/**
+ * A set of rules for a <code>RuleBasedTransliterator</code>.  This set encodes
+ * the transliteration in one direction from one set of characters or short
+ * strings to another.  A <code>RuleBasedTransliterator</code> consists of up to
+ * two such sets, one for the forward direction, and one for the reverse.
+ *
+ * <p>A <code>TransliterationRuleSet</code> has one important operation, that of
+ * finding a matching rule at a given point in the text.  This is accomplished
+ * by the <code>findMatch()</code> method.
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: TransliterationRuleSet.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+class TransliterationRuleSet {
+    /* Note: There was an old implementation that indexed by first letter of
+     * key.  Problem with this is that key may not have a meaningful first
+     * letter; e.g., {Lu}>*.  One solution is to keep a separate vector of all
+     * rules whose intial key letter is a category variable.  However, the
+     * problem is that they must be kept in order with respect to other rules.
+     * One solution -- add a sequence number to each rule.  Do the usual
+     * first-letter lookup, and also a lookup from the spare bin with rules like
+     * {Lu}>*.  Take the lower sequence number.  This seems complex and not
+     * worth the trouble, but we may revisit this later.  For documentation (or
+     * possible resurrection) the old code is included below, commented out
+     * with the remark "// OLD INDEXED IMPLEMENTATION".  Under the old
+     * implementation, <code>rules</code> is a Hashtable, not a Vector.
+     */
+
+    /**
+     * Vector of rules, in the order added.
+     */
+    private Vector rules;
+
+    /**
+     * Length of the longest preceding context
+     */
+    private int maxContextLength;
+
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    /**
+     * Construct a new empty rule set.
+     */
+    public TransliterationRuleSet() {
+        rules = new Vector();
+        maxContextLength = 0;
+    }
+
+    /**
+     * Return the maximum context length.
+     * @return the length of the longest preceding context.
+     */
+    public int getMaximumContextLength() {
+        return maxContextLength;
+    }
+
+    /**
+     * Add a rule to this set.  Rules are added in order, and order is
+     * significant.
+     *
+     * <p>Once freeze() is called, this method must not be called.
+     * @param rule the rule to add
+     */
+    public void addRule(TransliterationRule rule) {
+        
+        // Build time, no checking  : 3562 ms
+        // Build time, with checking: 6234 ms
+
+        for (int i=0; i<rules.size(); ++i) {
+            TransliterationRule r = (TransliterationRule) rules.elementAt(i);
+            if (r.masks(rule)) {
+                throw new IllegalArgumentException("Rule " + rule +
+                                                   " must precede " + r);
+            }
+        }
+
+        rules.addElement(rule);
+        int len;
+        if ((len = rule.getAnteContextLength()) > maxContextLength) {
+            maxContextLength = len;
+        }
+    }
+
+    /**
+     * Free up space.  Once this method is called, addRule() must NOT
+     * be called again.
+     */
+    public void freeze() {
+        for (int i=0; i<rules.size(); ++i) {
+            ((TransliterationRule) rules.elementAt(i)).freeze();
+        }
+    }
+
+    /**
+     * Attempt to find a matching rule at the specified point in the text.  The
+     * text being matched occupies a virtual buffer consisting of the contents
+     * of <code>result</code> concatenated to a substring of <code>text</code>.
+     * The substring is specified by <code>start</code> and <code>limit</code>.
+     * The value of <code>cursor</code> is an index into this virtual buffer,
+     * from 0 to the length of the buffer.  In terms of the parameters,
+     * <code>cursor</code> must be between 0 and <code>result.length() + limit -
+     * start</code>.
+     * @param text the untranslated text
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param result tranlated text
+     * @param cursor position at which to translate next, an offset into result.
+     * If greater than or equal to result.length(), represents offset start +
+     * cursor - result.length() into text.
+     * @param variables a dictionary mapping variables to the sets they
+     * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return the matching rule, or null if none found.
+     */
+    public TransliterationRule findMatch(String text, int start, int limit,
+                                         StringBuffer result, int cursor,
+                                         Dictionary variables,
+                                         UnicodeFilter filter) {
+        for (Enumeration e = rules.elements(); e.hasMoreElements(); ) {
+            TransliterationRule rule = (TransliterationRule) e.nextElement();
+            if (rule.matches(text, start, limit, result, cursor, variables, filter)) {
+                return rule;
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Attempt to find a matching rule at the specified point in the text.
+     * @param text the text, both translated and untranslated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param cursor position at which to translate next, representing offset
+     * into text.  This value must be between <code>start</code> and
+     * <code>limit</code>.
+     * @param variables a dictionary mapping variables to the sets they
+     * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return the matching rule, or null if none found.
+     */
+    public TransliterationRule findMatch(Replaceable text, int start, int limit,
+                                         int cursor,
+                                         Dictionary variables,
+                                         UnicodeFilter filter) {
+        for (Enumeration e = rules.elements(); e.hasMoreElements(); ) {
+            TransliterationRule rule = (TransliterationRule) e.nextElement();
+            if (rule.matches(text, start, limit, cursor, variables, filter)) {
+                return rule;
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Attempt to find a matching rule at the specified point in the text.
+     * Unlike <code>findMatch()</code>, this method does an incremental match.
+     * An incremental match requires that there be no partial matches that might
+     * pre-empt the full match that is found.  If there are partial matches,
+     * then null is returned.  A non-null result indicates that a full match has
+     * been found, and that it cannot be pre-empted by a partial match
+     * regardless of what additional text is added to the translation buffer.
+     * @param text the text, both translated and untranslated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param cursor position at which to translate next, representing offset
+     * into text.  This value must be between <code>start</code> and
+     * <code>limit</code>.
+     * @param variables a dictionary mapping variables to the sets they
+     * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
+     * @param partial output parameter.  <code>partial[0]</code> is set to
+     * true if a partial match is returned.
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return the matching rule, or null if none found, or if the text buffer
+     * does not have enough text yet to unambiguously match a rule.
+     */
+    public TransliterationRule findIncrementalMatch(Replaceable text, int start,
+                                                    int limit, int cursor,
+                                                    Dictionary variables,
+                                                    boolean partial[],
+                                                    UnicodeFilter filter) {
+        partial[0] = false;
+        for (Enumeration e = rules.elements(); e.hasMoreElements(); ) {
+            TransliterationRule rule = (TransliterationRule) e.nextElement();
+            int match = rule.getMatchDegree(text, start, limit, cursor,
+                                            variables, filter);
+            switch (match) {
+            case TransliterationRule.FULL_MATCH:
+                return rule;
+            case TransliterationRule.PARTIAL_MATCH:
+                partial[0] = true;
+                return null;
+            }
+        }
+        return null;
+    }
+}
diff --git a/icu4j/src/com/ibm/text/Transliterator.java b/icu4j/src/com/ibm/text/Transliterator.java
new file mode 100755
index 00000000000..83171a961e7
--- /dev/null
+++ b/icu4j/src/com/ibm/text/Transliterator.java
@@ -0,0 +1,860 @@
+package com.ibm.text;
+
+import java.util.*;
+import java.text.MessageFormat;
+
+/**
+ * <code>Transliterator</code> is an abstract class that
+ * transliterates text from one format to another.  The most common
+ * kind of transliterator is a script, or alphabet, transliterator.
+ * For example, a Russian to Latin transliterator changes Russian text
+ * written in Cyrillic characters to phonetically equivalent Latin
+ * characters.  It does not <em>translate</em> Russian to English!
+ * Transliteration, unlike translation, operates on characters, without
+ * reference to the meanings of words and sentences.
+ *
+ * <p>Although script conversion is its most common use, a
+ * transliterator can actually perform a more general class of tasks.
+ * In fact, <code>Transliterator</code> defines a very general API
+ * which specifies only that a segment of the input text is replaced
+ * by new text.  The particulars of this conversion are determined
+ * entirely by subclasses of <code>Transliterator</code>.
+ *
+ * <p><b>Transliterators are stateless</b>
+ *
+ * <p><code>Transliterator</code> objects are <em>stateless</em>; they
+ * retain no information between calls to
+ * <code>transliterate()</code>.  As a result, threads may share
+ * transliterators without synchronizing them.  This might seem to
+ * limit the complexity of the transliteration operation.  In
+ * practice, subclasses perform complex transliterations by delaying
+ * the replacement of text until it is known that no other
+ * replacements are possible.  In other words, although the
+ * <code>Transliterator</code> objects are stateless, the source text
+ * itself embodies all the needed information, and delayed operation
+ * allows arbitrary complexity.
+ *
+ * <p><b>Batch transliteration</b>
+ *
+ * <p>The simplest way to perform transliteration is all at once, on a
+ * string of existing text.  This is referred to as <em>batch</em>
+ * transliteration.  For example, given a string <code>input</code>
+ * and a transliterator <code>t</code>, the call
+ *
+ * <blockquote><code>String result = t.transliterate(input);
+ * </code></blockquote>
+ *
+ * will transliterate it and return the result.  Other methods allow
+ * the client to specify a substring to be transliterated and to use
+ * {@link Replaceable} objects instead of strings, in order to
+ * preserve out-of-band information (such as text styles).
+ *
+ * <p><b>Keyboard transliteration</b>
+ *
+ * <p>Somewhat more involved is <em>keyboard</em>, or incremental
+ * transliteration.  This is the transliteration of text that is
+ * arriving from some source (typically the user's keyboard) one
+ * character at a time, or in some other piecemeal fashion.
+ *
+ * <p>In keyboard transliteration, a <code>Replaceable</code> buffer
+ * stores the text.  As text is inserted, as much as possible is
+ * transliterated on the fly.  This means a GUI that displays the
+ * contents of the buffer may show text being modified as each new
+ * character arrives.
+ *
+ * <p>Consider the simple <code>RuleBasedTransliterator</code>:
+ *
+ * <blockquote><code>
+ * th&gt;{theta}<br>
+ * t&gt;{tau}
+ * </code></blockquote>
+ *
+ * When the user types 't', nothing will happen, since the
+ * transliterator is waiting to see if the next character is 'h'.  To
+ * remedy this, we introduce the notion of a cursor, marked by a '|'
+ * in the output string:
+ *
+ * <blockquote><code>
+ * t&gt;|{tau}<br>
+ * {tau}h&gt;{theta}
+ * </code></blockquote>
+ *
+ * Now when the user types 't', tau appears, and if the next character
+ * is 'h', the tau changes to a theta.  This is accomplished by
+ * maintaining a cursor position (independent of the insertion point,
+ * and invisible in the GUI) across calls to
+ * <code>keyboardTransliterate()</code>.  Typically, the cursor will
+ * be coincident with the insertion point, but in a case like the one
+ * above, it will precede the insertion point.
+ *
+ * <p>Keyboard transliteration methods maintain a set of three indices
+ * that are updated with each call to
+ * <code>keyboardTransliterate()</code>, including the cursor, start,
+ * and limit.  Since these indices are changed by the method, they are
+ * passed in an <code>int[]</code> array. The <code>START</code> index
+ * marks the beginning of the substring that the transliterator will
+ * look at.  It is advanced as text becomes committed (but it is not
+ * the committed index; that's the <code>CURSOR</code>).  The
+ * <code>CURSOR</code> index, described above, marks the point at
+ * which the transliterator last stopped, either because it reached
+ * the end, or because it required more characters to disambiguate
+ * between possible inputs.  The <code>CURSOR</code> can also be
+ * explicitly set by rules in a <code>RuleBasedTransliterator</code>.
+ * Any characters before the <code>CURSOR</code> index are frozen;
+ * future keyboard transliteration calls within this input sequence
+ * will not change them.  New text is inserted at the
+ * <code>LIMIT</code> index, which marks the end of the substring that
+ * the transliterator looks at.
+ *
+ * <p>Because keyboard transliteration assumes that more characters
+ * are to arrive, it is conservative in its operation.  It only
+ * transliterates when it can do so unambiguously.  Otherwise it waits
+ * for more characters to arrive.  When the client code knows that no
+ * more characters are forthcoming, perhaps because the user has
+ * performed some input termination operation, then it should call
+ * <code>finishKeyboardTransliteration()</code> to complete any
+ * pending transliterations.
+ *
+ * <p><b>Inverses</b>
+ *
+ * <p>Pairs of transliterators may be inverses of one another.  For
+ * example, if transliterator <b>A</b> transliterates characters by
+ * incrementing their Unicode value (so "abc" -> "def"), and
+ * transliterator <b>B</b> decrements character values, then <b>A</b>
+ * is an inverse of <b>B</b> and vice versa.  If we compose <b>A</b>
+ * with <b>B</b> in a compound transliterator, the result is the
+ * indentity transliterator, that is, a transliterator that does not
+ * change its input text.
+ *
+ * The <code>Transliterator</code> method <code>getInverse()</code>
+ * returns a transliterator's inverse, if one exists, or
+ * <code>null</code> otherwise.  However, the result of
+ * <code>getInverse()</code> usually will <em>not</em> be a true
+ * mathematical inverse.  This is because true inverse transliterators
+ * are difficult to formulate.  For example, consider two
+ * transliterators: <b>AB</b>, which transliterates the character 'A'
+ * to 'B', and <b>BA</b>, which transliterates 'B' to 'A'.  It might
+ * seem that these are exact inverses, since
+ *
+ * <blockquote>"A" x <b>AB</b> -> "B"<br>
+ * "B" x <b>BA</b> -> "A"</blockquote>
+ *
+ * where 'x' represents transliteration.  However,
+ *
+ * <blockquote>"ABCD" x <b>AB</b> -> "BBCD"<br>
+ * "BBCD" x <b>BA</b> -> "AACD"</blockquote>
+ *
+ * so <b>AB</b> composed with <b>BA</b> is not the
+ * identity. Nonetheless, <b>BA</b> may be usefully considered to be
+ * <b>AB</b>'s inverse, and it is on this basis that
+ * <b>AB</b><code>.getInverse()</code> could legitimately return
+ * <b>BA</b>.
+ *
+ * <p><b>IDs and display names</b>
+ *
+ * <p>A transliterator is designated by a short identifier string or
+ * <em>ID</em>.  IDs follow the format <em>source-destination</em>,
+ * where <em>source</em> describes the entity being replaced, and
+ * <em>destination</em> describes the entity replacing
+ * <em>source</em>.  The entities may be the names of scripts,
+ * particular sequences of characters, or whatever else it is that the
+ * transliterator converts to or from.  For example, a transliterator
+ * from Russian to Latin might be named "Russian-Latin".  A
+ * transliterator from keyboard escape sequences to Latin-1 characters
+ * might be named "KeyboardEscape-Latin1".  By convention, system
+ * entity names are in English, with the initial letters of words
+ * capitalized; user entity names may follow any format so long as
+ * they do not contain dashes.
+ *
+ * <p>In addition to programmatic IDs, transliterator objects have
+ * display names for presentation in user interfaces, returned by
+ * {@link #getDisplayName}.
+ *
+ * <p><b>Factory methods and registration</b>
+ *
+ * <p>In general, client code should use the factory method
+ * <code>getInstance()</code> to obtain an instance of a
+ * transliterator given its ID.  Valid IDs may be enumerated using
+ * <code>getAvailableIDs()</code>.  Since transliterators are
+ * stateless, multiple calls to <code>getInstance()</code> with the
+ * same ID will return the same object.
+ *
+ * <p>In addition to the system transliterators registered at startup,
+ * user transliterators may be registered by calling
+ * <code>registerInstance()</code> at run time.  To register a
+ * transliterator subclass without instantiating it (until it is
+ * needed), users may call <code>registerClass()</code>.
+ *
+ * <p><b>Subclassing</b>
+ *
+ * <p>Subclasses must implement the abstract
+ * <code>transliterate()</code> method.  They should also override the
+ * <code>transliterate()</code> method taking a <code>String</code>
+ * and <code>StringBuffer</code> if the performance of these methods
+ * can be improved over the performance obtained by the default
+ * implementations in this class.  Subclasses must also implement
+ * <code>handleKeyboardTransliterate()</code>.
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: Transliterator.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public abstract class Transliterator {
+    /**
+     * In the <code>keyboardTransliterate()</code>
+     * <code>index[]</code> array, the beginning index, inclusive
+     * @see #keyboardTransliterate
+     */
+    public static final int START  = 0;
+
+    /**
+     * In the <code>keyboardTransliterate()</code>
+     * <code>index[]</code> array, the ending index, exclusive
+     * @see #keyboardTransliterate
+     */
+    public static final int LIMIT  = 1;
+
+    /**
+     * In the <code>keyboardTransliterate()</code>
+     * <code>index[]</code> array, the next character to be considered
+     * for transliteration
+     * @see #keyboardTransliterate
+     */
+    public static final int CURSOR = 2;
+
+    /**
+     * Programmatic name, e.g., "Latin-Arabic".
+     */
+    private String ID;
+
+    /** 
+     * This transliterator's filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     */
+    private UnicodeFilter filter;
+
+    /**
+     * Dictionary of known transliterators.  Keys are <code>String</code>
+     * names, values are one of the following:
+     *
+     * <ul><li><code>Transliterator</code> objects
+     *
+     * <li><code>Class</code> objects.  Such objects must represent
+     * subclasses of <code>Transliterator</code>, and must satisfy the
+     * constraints described in <code>registerClass()</code>
+     *
+     * <li><code>RULE_BASED_PLACEHOLDER</code>, in which case the ID
+     * will have its first '-' removed and be appended to
+     * RB_RULE_BASED_PREFIX to form a resource bundle name from which
+     * the RB_RULE key is looked up to obtain the rule.
+     *
+     * <li><code>REVERSE_RULE_BASED_PLACEHOLDER</code>.  Like
+     * <code>RULE_BASED_PLACEHOLDER</code>, except the entity names in
+     * the ID are reversed, and the argument
+     * RuleBasedTransliterator.REVERSE is pased to the
+     * RuleBasedTransliterator constructor.
+     * </ul>
+     */
+    private static Hashtable cache;
+
+    /**
+     * Internal object used to stand for instances of
+     * <code>RuleBasedTransliterator</code> that have not been
+     * constructed yet in the <code>cache</code>.  When a
+     * <code>getInstance()</code> call retrieves this object, it is
+     * replaced by the actual <code>RuleBasedTransliterator</code>.
+     * This allows <code>Transliterator</code> to delay instantiation
+     * of such transliterators until they are needed.
+     */
+    private static final Object RULE_BASED_PLACEHOLDER = new Object();
+
+    /**
+     * Internal object used to stand for instances of
+     * <code>RuleBasedTransliterator</code> that have not been
+     * constructed yet in the <code>cache</code>.  These instances are
+     * constructed with an argument
+     * <code>RuleBasedTransliterator.REVERSE</code>.
+     */
+    private static final Object REVERSE_RULE_BASED_PLACEHOLDER = new Object();
+
+    /**
+     * Prefix for resource bundle key for the display name for a
+     * transliterator.  The ID is appended to this to form the key.
+     * The resource bundle value should be a String.
+     */
+    private static final String RB_DISPLAY_NAME_PREFIX = "T:";
+
+    /**
+     * Resource bundle key for display name pattern.
+     * The resource bundle value should be a String forming a
+     * MessageFormat pattern, e.g.:
+     * "{0,choice,0#|1#{1} Transliterator|2#{1} to {2} Transliterator}".
+     */
+    private static final String RB_DISPLAY_NAME_PATTERN = "TransliteratorNamePattern";
+
+    /**
+     * Resource bundle key for the list of RuleBasedTransliterator IDs.
+     * The resource bundle value should be a String[] with each element
+     * being a valid ID.  The ID will be appended to RB_RULE_BASED_PREFIX
+     * to obtain the class name in which the RB_RULE key will be sought.
+     */
+    private static final String RB_RULE_BASED_IDS = "RuleBasedTransliteratorIDs";
+
+    /**
+     * Resource bundle containing display name keys and the
+     * RB_RULE_BASED_IDS array.
+     *
+     * <p>If we ever integrate this with the Sun JDK, the resource bundle
+     * root will change to java.text.resources.LocaleElements
+     */
+    private static final String RB_LOCALE_ELEMENTS =
+        "com.ibm.text.resources.LocaleElements";
+
+    /**
+     * Prefix for resource bundle containing RuleBasedTransliterator
+     * RB_RULE string.  The ID is munged to remove the first '-' then appended
+     * to this String to obtain the class name.
+     */
+    private static final String RB_RULE_BASED_PREFIX =
+        "com.ibm.text.resources.TransliterationRule";
+
+    /**
+     * Resource bundle key for the RuleBasedTransliterator rule.
+     */
+    private static final String RB_RULE = "Rule";
+
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    /**
+     * Default constructor.
+     * @param ID the string identifier for this transliterator
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     */
+    protected Transliterator(String ID, UnicodeFilter filter) {
+        if (ID == null) {
+            throw new NullPointerException();
+        }
+        this.ID = ID;
+        this.filter = filter;
+    }
+
+    /**
+     * Transliterates the segment of a string that begins at the
+     * character at offset <code>start</code> and extends to the
+     * character at offset <code>limit - 1</code>, with optional
+     * filtering.  A default implementaion is provided here;
+     * subclasses should provide a more efficient implementation if
+     * possible.
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param result buffer to receive the transliterated text; previous
+     * contents are discarded
+     */
+    public void transliterate(String text, int start, int limit,
+                              StringBuffer result) {
+        /* This is a default implementation that should be replaced by
+         * a more efficient subclass implementation if possible.
+         */
+        result.setLength(0);
+        result.append(text.substring(start, limit));
+        transliterate(new ReplaceableString(result),
+                      0, result.length());
+    }
+
+    /**
+     * Transliterates a segment of a string, with optional filtering.
+     * Subclasses must override this abstract method.
+     *
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return The new limit index.  The text previously occupying <code>[start,
+     * limit)</code> has been transliterated, possibly to a string of a different
+     * length, at <code>[start, </code><em>new-limit</em><code>)</code>, where
+     * <em>new-limit</em> is the return value.
+     */
+    public abstract int transliterate(Replaceable text, int start, int limit);
+
+    /**
+     * Transliterates an entire string. Convenience method.
+     * @param text the string to be transliterated
+     * @param result buffer to receive the transliterated text; previous
+     * contents are discarded
+     */
+    public final void transliterate(String text, StringBuffer result) {
+        transliterate(text, 0, text.length(), result);
+    }
+
+    /**
+     * Transliterate an entire string and returns the result. Convenience method.
+     *
+     * @param text the string to be transliterated
+     * @return The transliterated text
+     */
+    public final String transliterate(String text) {
+        StringBuffer result = new StringBuffer();
+        transliterate(text, 0, text.length(), result);
+        return result.toString();
+    }
+
+    /**
+     * Transliterates an entire string in place. Convenience method.
+     * @param text the string to be transliterated
+     */
+    public final void transliterate(Replaceable text) {
+        transliterate(text, 0, text.length());
+    }
+
+    /**
+     * Transliterates the portion of the text buffer that can be
+     * transliterated unambiguosly after new text has been inserted,
+     * typically as a result of a keyboard event.  The new text in
+     * <code>insertion</code> will be inserted into <code>text</code>
+     * at <code>index[LIMIT]</code>, advancing
+     * <code>index[LIMIT]</code> by <code>insertion.length()</code>.
+     * Then the transliterator will try to transliterate characters of
+     * <code>text</code> between <code>index[CURSOR]</code> and
+     * <code>index[LIMIT]</code>.  Characters before
+     * <code>index[CURSOR]</code> will not be changed.
+     *
+     * <p>Upon return, values in <code>index[]</code> will be updated.
+     * <code>index[START]</code> will be advanced to the first
+     * character that future calls to this method will read.
+     * <code>index[CURSOR]</code> and <code>index[LIMIT]</code> will
+     * be adjusted to delimit the range of text that future calls to
+     * this method may change.
+     *
+     * <p>Typical usage of this method begins with an initial call
+     * with <code>index[START]</code> and <code>index[LIMIT]</code>
+     * set to indicate the portion of <code>text</code> to be
+     * transliterated, and <code>index[CURSOR] == index[START]</code>.
+     * Thereafter, <code>index[]</code> can be used without
+     * modification in future calls, provided that all changes to
+     * <code>text</code> are made via this method.
+     *
+     * <p>This method assumes that future calls may be made that will
+     * insert new text into the buffer.  As a result, it only performs
+     * unambiguous transliterations.  After the last call to this
+     * method, there may be untransliterated text that is waiting for
+     * more input to resolve an ambiguity.  In order to perform these
+     * pending transliterations, clients should call {@link
+     * #finishKeyboardTransliteration} after the last call to this
+     * method has been made.
+     * 
+     * @param text the buffer holding transliterated and untransliterated text
+     * @param index an array of three integers.
+     *
+     * <ul><li><code>index[START]</code>: the beginning index,
+     * inclusive; <code>0 <= index[START] <= index[LIMIT]</code>.
+     *
+     * <li><code>index[LIMIT]</code>: the ending index, exclusive;
+     * <code>index[START] <= index[LIMIT] <= text.length()</code>.
+     * <code>insertion</code> is inserted at
+     * <code>index[LIMIT]</code>.
+     *
+     * <li><code>index[CURSOR]</code>: the next character to be
+     * considered for transliteration; <code>index[START] <=
+     * index[CURSOR] <= index[LIMIT]</code>.  Characters before
+     * <code>index[CURSOR]</code> will not be changed by future calls
+     * to this method.</ul>
+     *
+     * @param insertion text to be inserted and possibly
+     * transliterated into the translation buffer at
+     * <code>index[LIMIT]</code>.  If <code>null</code> then no text
+     * is inserted.
+     * @see #START
+     * @see #LIMIT
+     * @see #CURSOR
+     * @see #handleKeyboardTransliterate
+     * @exception IllegalArgumentException if <code>index[]</code>
+     * is invalid
+     */
+    public final void keyboardTransliterate(Replaceable text, int[] index,
+                                            String insertion) {
+        if (index.length < 3 ||
+            index[START] < 0 ||
+            index[LIMIT] > text.length() ||
+            index[CURSOR] < index[START] ||
+            index[CURSOR] > index[LIMIT]) {
+            throw new IllegalArgumentException("Invalid index array");
+        }
+
+        int originalStart = index[START];
+        if (insertion != null) {
+            text.replace(index[LIMIT], index[LIMIT], insertion);
+            index[LIMIT] += insertion.length();
+        }
+
+        handleKeyboardTransliterate(text, index);
+
+        index[START] = Math.max(index[CURSOR] - getMaximumContextLength(),
+                                originalStart);
+    }
+
+    /**
+     * Transliterates the portion of the text buffer that can be
+     * transliterated unambiguosly after a new character has been
+     * inserted, typically as a result of a keyboard event.  This is a
+     * convenience method; see {@link
+     * #keyboardTransliterate(Replaceable, int[], String)} for details.
+     * @param text the buffer holding transliterated and
+     * untransliterated text
+     * @param index an array of three integers.  See {@link
+     * #keyboardTransliterate(Replaceable, int[], String)}.
+     * @param insertion text to be inserted and possibly
+     * transliterated into the translation buffer at
+     * <code>index[LIMIT]</code>.
+     * @see #keyboardTransliterate(Replaceable, int[], String)
+     */
+    public final void keyboardTransliterate(Replaceable text, int[] index,
+                                            char insertion) {
+        keyboardTransliterate(text, index, String.valueOf(insertion));
+    }
+
+    /**
+     * Transliterates the portion of the text buffer that can be
+     * transliterated unambiguosly.  This is a convenience method; see
+     * {@link #keyboardTransliterate(Replaceable, int[], String)} for
+     * details.
+     * @param text the buffer holding transliterated and
+     * untransliterated text
+     * @param index an array of three integers.  See {@link
+     * #keyboardTransliterate(Replaceable, int[], String)}.
+     * @see #keyboardTransliterate(Replaceable, int[], String)
+     */
+    public final void keyboardTransliterate(Replaceable text, int[] index) {
+        keyboardTransliterate(text, index, null);
+    }
+
+    /**
+     * Finishes any pending transliterations that were waiting for
+     * more characters.  Clients should call this method as the last
+     * call after a sequence of one or more calls to
+     * <code>keyboardTransliterate()</code>.
+     * @param text the buffer holding transliterated and
+     * untransliterated text.
+     * @param index the array of indices previously passed to {@link
+     * #keyboardTransliterate}
+     */
+    public final void finishKeyboardTransliteration(Replaceable text,
+                                                    int[] index) {
+        transliterate(text, index[START], index[LIMIT]);
+    }
+
+    /**
+     * Abstract method that concrete subclasses define to implement
+     * keyboard transliteration.  This method should transliterate all
+     * characters between <code>index[CURSOR]</code> and
+     * <code>index[LIMIT]</code> that can be unambiguously
+     * transliterated, regardless of future insertions of text at
+     * <code>index[LIMIT]</code>.  <code>index[CURSOR]</code> should
+     * be advanced past committed characters (those that will not
+     * change in future calls to this method).
+     * <code>index[LIMIT]</code> should be updated to reflect text
+     * replacements that shorten or lengthen the text between
+     * <code>index[CURSOR]</code> and <code>index[LIMIT]</code>.  Upon
+     * return, neither <code>index[CURSOR]</code> nor
+     * <code>index[LIMIT]</code> should be less than the initial value
+     * of <code>index[CURSOR]</code>.  <code>index[START]</code>
+     * should <em>not</em> be changed.
+     *
+     * @param text the buffer holding transliterated and
+     * untransliterated text
+     * @param index an array of three integers.  See {@link
+     * #keyboardTransliterate(Replaceable, int[], String)}.
+     * @see #keyboardTransliterate
+     */
+    protected abstract void handleKeyboardTransliterate(Replaceable text,
+                                                        int[] index);
+
+    /**
+     * Returns the length of the longest context required by this transliterator.
+     * This is <em>preceding</em> context.  The default implementation supplied
+     * by <code>Transliterator</code> returns zero; subclasses
+     * that use preceding context should override this method to return the
+     * correct value.  For example, if a transliterator translates "ddd" (where
+     * d is any digit) to "555" when preceded by "(ddd)", then the preceding
+     * context length is 5, the length of "(ddd)".
+     *
+     * @return The maximum number of preceding context characters this
+     * transliterator needs to examine
+     */
+    protected int getMaximumContextLength() {
+        return 0;
+    }
+
+    /**
+     * Returns a programmatic identifier for this transliterator.
+     * If this identifier is passed to <code>getInstance()</code>, it
+     * will return this object, if it has been registered.
+     * @see #registerInstance
+     * @see #registerClass
+     * @see #getAvailableIDs
+     */
+    public final String getID() {
+        return ID;
+    }
+
+    /**
+     * Returns a name for this transliterator that is appropriate for
+     * display to the user in the default locale.  See {@link
+     * #getDisplayName(Locale)} for details.
+     */
+    public final String getDisplayName() {
+        return getDisplayName(Locale.getDefault());
+    }
+
+    /**
+     * Returns a name for this transliterator that is appropriate for
+     * display to the user in the given locale.  This name is taken
+     * from the locale resource data in the standard manner of the
+     * <code>java.text</code> package.
+     *
+     * <p>If no localized names exist in the system resource bundles,
+     * a name is synthesized using a localized
+     * <code>MessageFormat</code> pattern from the resource data.  The
+     * arguments to this pattern are an integer followed by one or two
+     * strings.  The integer is the number of strings, either 1 or 2.
+     * The strings are formed by splitting the ID for this
+     * transliterator at the first '-'.  If there is no '-', then the
+     * entire ID forms the only string.
+     * @param inLocale the Locale in which the display name should be
+     * localized.
+     * @see java.text.MessageFormat
+     */
+    public String getDisplayName(Locale inLocale) {
+        ResourceBundle bundle = ResourceBundle.getBundle(
+            RB_LOCALE_ELEMENTS, inLocale);
+
+        try {
+            return bundle.getString(RB_DISPLAY_NAME_PREFIX + ID);
+        } catch (MissingResourceException e) {}
+
+        try {
+            // Construct the formatter first; if getString() fails
+            // we'll exit the try block
+            MessageFormat format = new MessageFormat(
+                    bundle.getString(RB_DISPLAY_NAME_PATTERN));
+            // Construct the argument array
+            int i = ID.indexOf('-');
+            Object[] args = (i < 0)
+                ? new Object[] { new Integer(1), ID }
+                : new Object[] { new Integer(2), ID.substring(0, i),
+                                 ID.substring(i+1) };
+            // Format it using the pattern in the resource
+            return format.format(args);
+        } catch (MissingResourceException e2) {}
+
+        // We should not reach this point unless there is something
+        // wrong with the build or the RB_DISPLAY_NAME_PATTERN has
+        // been deleted from the root RB_LOCALE_ELEMENTS resource.
+        throw new RuntimeException();
+    }
+
+    /**
+     * Returns the filter used by this transliterator, or <tt>null</tt>
+     * if this transliterator uses no filter.
+     */
+    public UnicodeFilter getFilter() {
+        return filter;
+    }
+
+    /**
+     * Changes the filter used by this transliterator.  If the filter
+     * is set to <tt>null</tt> then no filtering will occur.
+     *
+     * <p>Callers must take care if a transliterator is in use by
+     * multiple threads.  The filter should not be changed by one
+     * thread while another thread may be transliterating.
+     */
+    public void setFilter(UnicodeFilter filter) {
+        this.filter = filter;
+    }
+
+    /**
+     * Returns this transliterator's inverse.  See the class
+     * documentation for details.  This implementation simply inverts
+     * the two entities in the ID and attempts to retrieve the
+     * resulting transliterator.  That is, if <code>getID()</code>
+     * returns "A-B", then this method will return the result of
+     * <code>getInstance("B-A")</code>, or <code>null</code> if that
+     * call fails.
+     *
+     * <p>This method does not take filtering into account.  The
+     * returned transliterator will have no filter.
+     *
+     * <p>Subclasses with knowledge of their inverse may wish to
+     * override this method.
+     *
+     * @return a transliterator that is an inverse, not necessarily
+     * exact, of this transliterator, or <code>null</code> if no such
+     * transliterator is registered.
+     * @see #registerInstance
+     */
+    public Transliterator getInverse() {
+        int i = ID.indexOf('-');
+        if (i >= 0) {
+            String inverseID = ID.substring(i+1) + '-' + ID.substring(0, i);
+            return internalGetInstance(inverseID);
+        }
+        return null;
+    }
+
+    /**
+     * Returns a <code>Transliterator</code> object given its ID.
+     * The ID must be either a system transliterator ID or a ID registered
+     * using <code>registerInstance()</code>.
+     *
+     * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
+     * @return A <code>Transliterator</code> object with the given ID
+     * @exception IllegalArgumentException if the given ID is invalid.
+     * @see #registerInstance
+     * @see #getAvailableIDs
+     * @see #getID
+     */
+    public static Transliterator getInstance(String ID) {
+        Transliterator t = internalGetInstance(ID);
+        if (t != null) {
+            return t;
+        }
+        throw new IllegalArgumentException("Unsupported transliterator: "
+                                           + ID);
+    }
+
+    /**
+     * Returns a transliterator object given its ID.  Unlike getInstance(),
+     * this method returns null if it cannot make use of the given ID.
+     */
+    private static Transliterator internalGetInstance(String ID) {
+        Object obj = cache.get(ID);
+        RuleBasedTransliterator.Data data = null;
+
+        if (obj instanceof RuleBasedTransliterator.Data) {
+            data = (RuleBasedTransliterator.Data) obj;
+            // Fall through to construct transliterator from cached Data object.
+        } else if (obj instanceof Class) {
+            try {
+                return (Transliterator) ((Class) obj).newInstance();
+            } catch (InstantiationException e) {
+            } catch (IllegalAccessException e2) {}
+        } else {
+            synchronized (cache) {
+                boolean isReverse = (obj == REVERSE_RULE_BASED_PLACEHOLDER);
+                String resourceName = RB_RULE_BASED_PREFIX;
+                int i = ID.indexOf('-');
+                if (i < 0) {
+                    resourceName += ID;
+                } else {
+                    String IDLeft  = ID.substring(0, i);
+                    String IDRight = ID.substring(i+1);
+                    resourceName += isReverse ? (IDRight + IDLeft)
+                                              : (IDLeft + IDRight);
+                }
+                try {
+                    ResourceBundle resource = ResourceBundle.getBundle(resourceName);
+
+                    data = RuleBasedTransliterator.parse(resource.getString(RB_RULE),
+                                                         isReverse
+                                                         ? RuleBasedTransliterator.REVERSE
+                                                         : RuleBasedTransliterator.FORWARD);
+
+                    cache.put(ID, data);
+                    // Fall through to construct transliterator from Data object.
+                } catch (MissingResourceException e) {}
+            }
+        }
+
+        if (data != null) {
+            return new RuleBasedTransliterator(ID, data, null);
+        }
+
+        return null;
+    }
+
+    /**
+     * Registers a subclass of <code>Transliterator</code> with the
+     * system.  This subclass must have a public constructor taking no
+     * arguments.  When that constructor is called, the resulting
+     * object must return the <code>ID</code> passed to this method if
+     * its <code>getID()</code> method is called.
+     *
+     * @param ID the result of <code>getID()</code> for this
+     * transliterator
+     * @param transClass a subclass of <code>Transliterator</code>
+     * @see #registerInstance
+     * @see #unregister
+     */
+    public static void registerClass(String ID, Class transClass) {
+        cache.put(ID, transClass);        
+    }
+
+    /**
+     * Unregisters a transliterator or class.  This may be either
+     * a system transliterator or a user transliterator or class.
+     * 
+     * @param ID the ID of the transliterator or class
+     * @return the <code>Object</code> that was registered with
+     * <code>ID</code>, or <code>null</code> if none was
+     * @see #registerInstance
+     * @see #registerClass
+     */
+    public static Object unregister(String ID) {
+        return cache.remove(ID);
+    }
+
+    /**
+     * Returns an enumeration over the programmatic names of registered
+     * <code>Transliterator</code> objects.  This includes both system
+     * transliterators and user transliterators registered using
+     * <code>registerInstance()</code>.  The enumerated names may be
+     * passed to <code>getInstance()</code>.
+     *
+     * @return An <code>Enumeration</code> over <code>String</code> objects
+     * @see #getInstance
+     * @see #registerInstance
+     */
+    public static final Enumeration getAvailableIDs() {
+        return cache.keys();
+    }
+
+    static {
+        ResourceBundle bundle = ResourceBundle.getBundle(RB_LOCALE_ELEMENTS);
+        
+        try {
+            String[] ruleBasedIDs = bundle.getStringArray(RB_RULE_BASED_IDS);
+            
+            cache = new Hashtable();
+            
+            for (int i=0; i<ruleBasedIDs.length; ++i) {
+                String ID = ruleBasedIDs[i];
+                boolean isReverse = (ID.charAt(0) == '*');
+                if (isReverse) {
+                    ID = ID.substring(1);
+                }
+                cache.put(ID, isReverse ? REVERSE_RULE_BASED_PLACEHOLDER
+                                        : RULE_BASED_PLACEHOLDER);
+            }
+        } catch (MissingResourceException e) {}
+
+        cache.put(HexToUnicodeTransliterator._ID,
+                  HexToUnicodeTransliterator.class);
+        cache.put(UnicodeToHexTransliterator._ID,
+                  UnicodeToHexTransliterator.class);
+    }
+}
diff --git a/icu4j/src/com/ibm/text/UnicodeFilter.java b/icu4j/src/com/ibm/text/UnicodeFilter.java
new file mode 100755
index 00000000000..3753883a476
--- /dev/null
+++ b/icu4j/src/com/ibm/text/UnicodeFilter.java
@@ -0,0 +1,22 @@
+package com.ibm.text;
+
+/**
+ * <code>UnicodeFilter</code> defines a protocol for selecting a
+ * subset of the full range (U+0000 to U+FFFF) of Unicode characters.
+ * Currently, filters are used in conjunction with classes like {@link
+ * Transliterator} to only process selected characters through a
+ * transformation.
+ *
+ * {@link UnicodeFilterLogic}
+ */
+
+public interface UnicodeFilter {
+
+    /**
+     * Returns <tt>true</tt> for characters that are in the selected
+     * subset.  In other words, if a character is <b>to be
+     * filtered</b>, then <tt>isIn()</tt> returns
+     * <b><tt>false</tt></b>.
+     */
+    public boolean isIn(char c);
+}
diff --git a/icu4j/src/com/ibm/text/UnicodeFilterLogic.java b/icu4j/src/com/ibm/text/UnicodeFilterLogic.java
new file mode 100755
index 00000000000..f9e6ec1c609
--- /dev/null
+++ b/icu4j/src/com/ibm/text/UnicodeFilterLogic.java
@@ -0,0 +1,112 @@
+package com.ibm.text;
+
+/**
+ * <code>UnicodeFilterLogic</code> provides logical operators on
+ * {@link UnicodeFilter} objects.  This class cannot be instantiated;
+ * it consists only of static methods.  The static methods return
+ * filter objects that perform logical inversion (<tt>not</tt>),
+ * intersection (<tt>and</tt>), or union (<tt>or</tt>) of the given
+ * filter objects.
+ */
+public final class UnicodeFilterLogic {
+
+    /**
+     * Returns a <tt>UnicodeFilter</tt> that implements the inverse of
+     * the given filter.
+     */
+    public static UnicodeFilter not(final UnicodeFilter f) {
+        return new UnicodeFilter() {
+            public boolean isIn(char c) {
+                return !f.isIn(c);
+            }
+        };
+    }
+
+    /**
+     * Returns a <tt>UnicodeFilter</tt> that implements a short
+     * circuit AND of the result of the two given filters.  That is,
+     * if <tt>f.isIn()</tt> is <tt>false</tt>, then <tt>g.isIn()</tt>
+     * is not called, and <tt>isIn()</tt> returns <tt>false</tt>.
+     *
+     * <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
+     */
+    public static UnicodeFilter and(final UnicodeFilter f,
+                                    final UnicodeFilter g) {
+        if (f == null) {
+            return g;
+        }
+        if (g == null) {
+            return f;
+        }
+        return new UnicodeFilter() {
+            public boolean isIn(char c) {
+                return f.isIn(c) && g.isIn(c);
+            }
+        };
+    }
+
+    /**
+     * Returns a <tt>UnicodeFilter</tt> that implements a short
+     * circuit AND of the result of the given filters.  That is, if
+     * <tt>f[i].isIn()</tt> is <tt>false</tt>, then
+     * <tt>f[j].isIn()</tt> is not called, where <tt>j > i</tt>, and
+     * <tt>isIn()</tt> returns <tt>false</tt>.
+     */
+    public static UnicodeFilter and(final UnicodeFilter[] f) {
+        return new UnicodeFilter() {
+            public boolean isIn(char c) {
+                for (int i=0; i<f.length; ++i) {
+                    if (!f[i].isIn(c)) {
+                        return false;
+                    }
+                }
+                return true;
+            }
+        };
+    }
+
+    /**
+     * Returns a <tt>UnicodeFilter</tt> that implements a short
+     * circuit OR of the result of the two given filters.  That is, if
+     * <tt>f.isIn()</tt> is <tt>true</tt>, then <tt>g.isIn()</tt> is
+     * not called, and <tt>isIn()</tt> returns <tt>true</tt>.
+     *
+     * <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
+     */
+    public static UnicodeFilter or(final UnicodeFilter f,
+                                   final UnicodeFilter g) {
+        if (f == null) {
+            return g;
+        }
+        if (g == null) {
+            return f;
+        }
+        return new UnicodeFilter() {
+            public boolean isIn(char c) {
+                return f.isIn(c) || g.isIn(c);
+            }
+        };
+    }
+
+    /**
+     * Returns a <tt>UnicodeFilter</tt> that implements a short
+     * circuit OR of the result of the given filters.  That is, if
+     * <tt>f[i].isIn()</tt> is <tt>false</tt>, then
+     * <tt>f[j].isIn()</tt> is not called, where <tt>j > i</tt>, and
+     * <tt>isIn()</tt> returns <tt>true</tt>.
+     */
+    public static UnicodeFilter or(final UnicodeFilter[] f) {
+        return new UnicodeFilter() {
+            public boolean isIn(char c) {
+                for (int i=0; i<f.length; ++i) {
+                    if (f[i].isIn(c)) {
+                        return true;
+                    }
+                }
+                return false;
+            }
+        };
+    }
+
+    // TODO: Add nand() & nor() for convenience, if needed.
+}
diff --git a/icu4j/src/com/ibm/text/UnicodeSet.java b/icu4j/src/com/ibm/text/UnicodeSet.java
new file mode 100755
index 00000000000..0d8db3021bb
--- /dev/null
+++ b/icu4j/src/com/ibm/text/UnicodeSet.java
@@ -0,0 +1,1354 @@
+package com.ibm.text;
+
+import java.text.*;
+
+/**
+ * A mutable set of Unicode characters.  Objects of this class
+ * represent <em>character classes</em> used in regular expressions.
+ * Such classes specify a subset of the set of all Unicode characters,
+ * which in this implementation is the characters from U+0000 to
+ * U+FFFF, ignoring surrogates.
+ *
+ * <p>This class supports two APIs.  The first is modeled after Java 2's
+ * <code>java.util.Set</code> interface, although this class does not
+ * implement that interface.  All methods of <code>Set</code> are
+ * supported, with the modification that they take a character range
+ * or single character instead of an <code>Object</code>, and they
+ * take a <code>UnicodeSet</code> instead of a <code>Collection</code>.
+ *
+ * <p>The second API is the
+ * <code>applyPattern()</code>/<code>toPattern()</code> API from the
+ * <code>java.text.Format</code>-derived classes.  Unlike the
+ * methods that add characters, add categories, and control the logic
+ * of the set, the method <code>applyPattern()</code> sets all
+ * attributes of a <code>UnicodeSet</code> at once, based on a
+ * string pattern.
+ *
+ * <p>In addition, the set complement operation is supported through
+ * the <code>complement()</code> method.
+ *
+ * <p><b>Pattern syntax</b></p>
+ *
+ * Patterns are accepted by the constructors and the
+ * <code>applyPattern()</code> methods and returned by the
+ * <code>toPattern()</code> method.  These patterns follow a syntax
+ * similar to that employed by version 8 regular expression character
+ * classes:
+ *
+ * <blockquote>
+ *   <table>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>pattern :=&nbsp; </code></td>
+ *       <td valign="top"><code>('[' '^'? item* ']') |
+ *       ('[:' '^'? category ':]')</code></td>
+ *     </tr>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>item :=&nbsp; </code></td>
+ *       <td valign="top"><code>char | (char '-' char) | pattern-expr<br>
+ *       </code></td>
+ *     </tr>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>pattern-expr :=&nbsp; </code></td>
+ *       <td valign="top"><code>pattern | pattern-expr pattern |
+ *       pattern-expr op pattern<br>
+ *       </code></td>
+ *     </tr>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>op :=&nbsp; </code></td>
+ *       <td valign="top"><code>'&amp;' | '-'<br>
+ *       </code></td>
+ *     </tr>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>special :=&nbsp; </code></td>
+ *       <td valign="top"><code>'[' | ']' | '-'<br>
+ *       </code></td>
+ *     </tr>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>char :=&nbsp; </code></td>
+ *       <td valign="top"><em>any character that is not</em><code> special<br>
+ *       | ('\u005C' </code><em>any character</em><code>)<br>
+ *       | ('\u005Cu' hex hex hex hex)<br>
+ *       </code></td>
+ *     </tr>
+ *     <tr align="top">
+ *       <td nowrap valign="top" align="right"><code>hex :=&nbsp; </code></td>
+ *       <td valign="top"><em>any character for which
+ *       </em><code>Character.digit(c, 16)</code><em>
+ *       returns a non-negative result</em></td>
+ *     </tr>
+ *     <tr>
+ *       <td nowrap valign="top" align="right"><code>category :=&nbsp; </code></td>
+ *       <td valign="top"><code>'M' | 'N' | 'Z' | 'C' | 'L' | 'P' |
+ *       'S' | 'Mn' | 'Mc' | 'Me' | 'Nd' | 'Nl' | 'No' | 'Zs' | 'Zl' |
+ *       'Zp' | 'Cc' | 'Cf' | 'Cs' | 'Co' | 'Cn' | 'Lu' | 'Ll' | 'Lt'
+ *       | 'Lm' | 'Lo' | 'Pc' | 'Pd' | 'Ps' | 'Pe' | 'Po' | 'Sm' |
+ *       'Sc' | 'Sk' | 'So'</code></td>
+ *     </tr>
+ *   </table>
+ *   <br>
+ *   <table border="1">
+ *     <tr>
+ *       <td>Legend: <table>
+ *         <tr>
+ *           <td nowrap valign="top"><code>a := b</code></td>
+ *           <td width="20" valign="top">&nbsp; </td>
+ *           <td valign="top"><code>a</code> may be replaced by <code>b</code> </td>
+ *         </tr>
+ *         <tr>
+ *           <td nowrap valign="top"><code>a?</code></td>
+ *           <td valign="top"></td>
+ *           <td valign="top">zero or one instance of <code>a</code><br>
+ *           </td>
+ *         </tr>
+ *         <tr>
+ *           <td nowrap valign="top"><code>a*</code></td>
+ *           <td valign="top"></td>
+ *           <td valign="top">one or more instances of <code>a</code><br>
+ *           </td>
+ *         </tr>
+ *         <tr>
+ *           <td nowrap valign="top"><code>a | b</code></td>
+ *           <td valign="top"></td>
+ *           <td valign="top">either <code>a</code> or <code>b</code><br>
+ *           </td>
+ *         </tr>
+ *         <tr>
+ *           <td nowrap valign="top"><code>'a'</code></td>
+ *           <td valign="top"></td>
+ *           <td valign="top">the literal string between the quotes </td>
+ *         </tr>
+ *       </table>
+ *       </td>
+ *     </tr>
+ *   </table>
+ * </blockquote>
+ *
+ * Patterns specify individual characters, ranges of characters, and
+ * Unicode character categories.  When elements are concatenated, they
+ * specify their union.  To complement a set, place a '^' immediately
+ * after the opening '[' or '[:'.  In any other location, '^' has no
+ * special meaning.
+ *
+ * <p>Ranges are indicated by placing two a '-' between two
+ * characters, as in "a-z".  This specifies the range of all
+ * characters from the left to the right, in Unicode order.  If the
+ * left and right characters are the same, then the range consists of
+ * just that character.  If the left character is greater than the
+ * right character it is a syntax error.  If a '-' occurs as the first
+ * character after the opening '[' or '[^', or if it occurs as the
+ * last character before the closing ']', then it is taken as a
+ * literal.  Thus "[a\u005C-b]", "[-ab]", and "[ab-]" all indicate the same
+ * set of three characters, 'a', 'b', and '-'.
+ *
+ * <p>Sets may be intersected using the '&' operator or the asymmetric
+ * set difference may be taken using the '-' operator, for example,
+ * "[[:L:]&[\u005Cu0000-\u005Cu0FFF]]" indicates the set of all Unicode letters
+ * with values less than 4096.  Operators ('&' and '|') have equal
+ * precedence and bind left-to-right.  Thus
+ * "[[:L:]-[a-z]-[\u005Cu0100-\u005Cu01FF]]" is equivalent to
+ * "[[[:L:]-[a-z]]-[\u005Cu0100-\u005Cu01FF]]".  This only really matters for
+ * difference; intersection is commutative.
+ *
+ * <table>
+ * <tr valign=top><td nowrap><code>[a]</code><td>The set containing 'a'
+ * <tr valign=top><td nowrap><code>[a-z]</code><td>The set containing 'a'
+ * through 'z' and all letters in between, in Unicode order
+ * <tr valign=top><td nowrap><code>[^a-z]</code><td>The set containing
+ * all characters but 'a' through 'z',
+ * that is, U+0000 through 'a'-1 and 'z'+1 through U+FFFF
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>][<em>pat2</em>]]</code>
+ * <td>The union of sets specified by <em>pat1</em> and <em>pat2</em>
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>]&[<em>pat2</em>]]</code>
+ * <td>The intersection of sets specified by <em>pat1</em> and <em>pat2</em>
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>]-[<em>pat2</em>]]</code>
+ * <td>The asymmetric difference of sets specified by <em>pat1</em> and
+ * <em>pat2</em>
+ * <tr valign=top><td nowrap><code>[:Lu:]</code>
+ * <td>The set of characters belonging to the given
+ * Unicode category, as defined by <code>Character.getType()</code>; in
+ * this case, Unicode uppercase letters
+ * <tr valign=top><td nowrap><code>[:L:]</code>
+ * <td>The set of characters belonging to all Unicode categories
+ * starting wih 'L', that is, <code>[[:Lu:][:Ll:][:Lt:][:Lm:][:Lo:]]</code>.
+ * </table>
+ *
+ * <p><b>Character categories.</b>
+ *
+ * Character categories are specified using the POSIX-like syntax
+ * '[:Lu:]'.  The complement of a category is specified by inserting
+ * '^' after the opening '[:'.  The following category names are
+ * recognized.  Actual determination of category data uses
+ * <code>Character.getType()</code>, so it reflects the underlying
+ * implmementation used by <code>Character</code>.  As of Java 2 and
+ * JDK 1.1.8, this is Unicode 2.1.2.
+ *
+ * <pre>
+ * Normative
+ *     Mn = Mark, Non-Spacing
+ *     Mc = Mark, Spacing Combining
+ *     Me = Mark, Enclosing
+ * 
+ *     Nd = Number, Decimal Digit
+ *     Nl = Number, Letter
+ *     No = Number, Other
+ * 
+ *     Zs = Separator, Space
+ *     Zl = Separator, Line
+ *     Zp = Separator, Paragraph
+ * 
+ *     Cc = Other, Control
+ *     Cf = Other, Format
+ *     Cs = Other, Surrogate
+ *     Co = Other, Private Use
+ *     Cn = Other, Not Assigned
+ * 
+ * Informative
+ *     Lu = Letter, Uppercase
+ *     Ll = Letter, Lowercase
+ *     Lt = Letter, Titlecase
+ *     Lm = Letter, Modifier
+ *     Lo = Letter, Other
+ * 
+ *     Pc = Punctuation, Connector
+ *     Pd = Punctuation, Dash
+ *     Ps = Punctuation, Open
+ *     Pe = Punctuation, Close
+ *    *Pi = Punctuation, Initial quote
+ *    *Pf = Punctuation, Final quote
+ *     Po = Punctuation, Other
+ * 
+ *     Sm = Symbol, Math
+ *     Sc = Symbol, Currency
+ *     Sk = Symbol, Modifier
+ *     So = Symbol, Other
+ * </pre>
+ * *Unsupported by Java (and hence unsupported by UnicodeSet).
+ *
+ * @author Alan Liu
+ * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $ */
+public class UnicodeSet {
+    /**
+     * The internal representation is a StringBuffer of even length.
+     * Each pair of characters represents a range that is included in
+     * the set.  A single character c is represented as cc.  Thus, the
+     * ranges in the set are (a,b), a and b inclusive, where a =
+     * pairs.charAt(i) and b = pairs.charAt(i+1) for all even i, 0 <=
+     * i <= pairs.length()-2.  Pairs are always stored in ascending
+     * Unicode order.  Pairs are always stored in shortest form.  For
+     * example, if the pair "hh", representing the single character
+     * 'h', is added to the pairs list "agik", representing the ranges
+     * 'a'-'g' and 'i'-'k', the result is "ak", not "aghhik".
+     *
+     * This representation format was originally used in Richard
+     * Gillam's CharSet class.
+     */
+    private StringBuffer pairs;
+
+    private static final String CATEGORY_NAMES =
+        //                    1 1 1 1 1 1 1   1 1 2 2 2 2 2 2 2 2 2
+        //0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6   8 9 0 1 2 3 4 5 6 7 8
+        "CnLuLlLtLmLoMnMeMcNdNlNoZsZlZpCcCf--CoCsPdPsPePcPoSmScSkSo";
+
+    private static final int UNSUPPORTED_CATEGORY = 17;
+
+    private static final int CATEGORY_COUNT = 29;
+
+    /**
+     * A cache mapping character category integers, as returned by
+     * Character.getType(), to pairs strings.  Entries are initially
+     * null and are created on demand.
+     */
+    private static final String[] CATEGORY_PAIRS_CACHE =
+        new String[CATEGORY_COUNT];
+
+    //----------------------------------------------------------------
+    // Debugging and testing
+    //----------------------------------------------------------------
+
+    /**
+     * Return the representation of this set as a list of character
+     * ranges.  Ranges are listed in ascending Unicode order.  For
+     * example, the set [a-zA-M3] is represented as "33AMaz".
+     */
+    public String getPairs() {
+        return pairs.toString();
+    }
+
+    //----------------------------------------------------------------
+    // Public API
+    //----------------------------------------------------------------
+
+    /**
+     * Constructs an empty set.
+     */
+    public UnicodeSet() {
+        pairs = new StringBuffer();
+    }
+
+    /**
+     * Constructs a set from the given pattern.  See the class description
+     * for the syntax of the pattern language.
+     * @param pattern a string specifying what characters are in the set
+     * @exception IllegalArgumentException if the pattern contains
+     * a syntax error.
+     */
+    public UnicodeSet(String pattern) {
+        applyPattern(pattern, false);
+    }
+
+    /**
+     * Constructs a set from the given pattern, optionally ignoring
+     * white space.  See the class description for the syntax of the
+     * pattern language.
+     * @param pattern a string specifying what characters are in the set
+     * @param ignoreSpaces if <code>true</code>, all spaces in the
+     * pattern are ignored, except those preceded by '\u005C'.  Spaces are
+     * those characters for which <code>Character.isSpaceChar()</code>
+     * is <code>true</code>.
+     * @exception <code>IllegalArgumentException</code> if the pattern
+     * contains a syntax error.
+     */
+    public UnicodeSet(String pattern, boolean ignoreSpaces) {
+        applyPattern(pattern, ignoreSpaces);
+    }
+
+    /**
+     * Constructs a set from the given Unicode character category.
+     * @param category an integer indicating the character category as
+     * returned by <code>Character.getType()</code>.
+     * @exception <code>IllegalArgumentException</code> if the given
+     * category is invalid.
+     */
+    public UnicodeSet(int category) {
+        if (category < 0 || category >= CATEGORY_COUNT ||
+            category == UNSUPPORTED_CATEGORY) {
+            throw new IllegalArgumentException("Invalid category");
+        }
+        pairs = new StringBuffer(getCategoryPairs(category));
+    }
+
+    /**
+     * Modifies this set to represent the set specified by the given
+     * pattern.  See the class description for the syntax of the
+     * pattern language.
+     * @param pattern a string specifying what characters are in the set
+     * @exception <code>IllegalArgumentException</code> if the pattern
+     * contains a syntax error.
+     */
+    public final void applyPattern(String pattern) {
+        applyPattern(pattern, false);
+    }
+
+    /**
+     * Modifies this set to represent the set specified by the given
+     * pattern, optionally ignoring white space.  See the class
+     * description for the syntax of the pattern language.
+     * @param pattern a string specifying what characters are in the set
+     * @param ignoreSpaces if <code>true</code>, all spaces in the
+     * pattern are ignored.  Spaces are those characters for which
+     * <code>Character.isSpaceChar()</code> is <code>true</code>.
+     * Characters preceded by '\\' are escaped, losing any special
+     * meaning they otherwise have.  Spaces may be included by
+     * escaping them.
+     * @exception <code>IllegalArgumentException</code> if the pattern
+     * contains a syntax error.
+     */
+    public void applyPattern(String pattern, boolean ignoreSpaces) {
+        ParsePosition pos = new ParsePosition(0);
+
+        // To ignore spaces, create a new pattern without spaces.  We
+        // have to process all '\' escapes.  If '\' is encountered,
+        // insert it and the following character (if any -- let parse
+        // deal with any syntax errors) in the pattern.  This allows
+        // escaped spaces.
+        if (ignoreSpaces) {
+            StringBuffer pat = new StringBuffer();
+            for (int i=0; i<pattern.length(); ++i) {
+                char c = pattern.charAt(i);
+                if (Character.isSpaceChar(c)) {
+                    continue;
+                }
+                if (c == '\\' && (i+1) < pattern.length()) {
+                    pat.append(c);
+                    c = pattern.charAt(++i);
+                    // Fall through and append the following char
+                }
+                pat.append(c);
+            }
+            pattern = pat.toString();
+        }
+
+        pairs = parse(pattern, pos);
+        if (pos.getIndex() != pattern.length()) {
+            throw new IllegalArgumentException("Parse of \"" + pattern +
+                                               "\" failed at " +
+                                               pos.getIndex());
+        }
+    }
+
+    /**
+     * Returns a string representation of this set.  If the result of
+     * calling this function is passed to a UnicodeSet constructor, it
+     * will produce another set that is equal to this one.
+     */
+    public String toPattern() {
+        StringBuffer result = new StringBuffer();
+        result.append('[');
+        
+        // iterate through the ranges in the UnicodeSet
+        for (int i=0; i<pairs.length(); i+=2) {
+            // for a range with the same beginning and ending point,
+            // output that character, otherwise, output the start and
+            // end points of the range separated by a dash
+            result.append(pairs.charAt(i));
+            if (pairs.charAt(i) != pairs.charAt(i+1)) {
+                result.append('-').append(pairs.charAt(i+1));
+            }
+        }
+        
+        return result.append(']').toString();        
+    }
+
+    /**
+     * Returns the number of elements in this set (its cardinality),
+     * <em>n</em>, where <code>0 <= </code><em>n</em><code> <= 65536</code>.
+     *
+     * @return the number of elements in this set (its cardinality).
+     */
+    public int size() {
+        int n = 0;
+        for (int i=0; i<pairs.length(); i+=2) {
+            n += pairs.charAt(i+1) - pairs.charAt(i) + 1;
+        }
+        return n;
+    }
+
+    /**
+     * Returns <tt>true</tt> if this set contains no elements.
+     *
+     * @return <tt>true</tt> if this set contains no elements.
+     */
+    public boolean isEmpty() {
+        return pairs.length() == 0;
+    }
+
+    /**
+     * Returns <tt>true</tt> if this set contains the specified range
+     * of chars.
+     *
+     * @return <tt>true</tt> if this set contains the specified range
+     * of chars.
+     */
+    public boolean contains(char first, char last) {
+        // Set i to the end of the smallest range such that its end
+        // point >= last, or pairs.length() if no such range exists.
+        int i = 1;
+        while (i<pairs.length() && last>pairs.charAt(i)) i+=2;
+        return i<pairs.length() && first>=pairs.charAt(i-1);
+    }
+
+    /**
+     * Returns <tt>true</tt> if this set contains the specified char.
+     *
+     * @return <tt>true</tt> if this set contains the specified char.
+     */
+    public boolean contains(char c) {
+        return contains(c, c);
+    }
+
+    /**
+     * Adds the specified range to this set if it is not already
+     * present.  If this set already contains the specified range,
+     * the call leaves this set unchanged.  If <code>last > first</code>
+     * then an empty range is added, leaving the set unchanged.
+     *
+     * @param first first character, inclusive, of range to be added
+     * to this set.
+     * @param last last character, inclusive, of range to be added
+     * to this set.
+     */
+    public void add(char first, char last) {
+        if (first <= last) {
+            addPair(pairs, first, last);
+        }
+    }
+
+    /**
+     * Adds the specified character to this set if it is not already
+     * present.  If this set already contains the specified character,
+     * the call leaves this set unchanged.
+     */
+    public final void add(char c) {
+        add(c, c);
+    }
+
+    /**
+     * Removes the specified range from this set if it is present.
+     * The set will not contain the specified range once the call
+     * returns.  If <code>last > first</code> then an empty range is
+     * removed, leaving the set unchanged.
+     * 
+     * @param first first character, inclusive, of range to be removed
+     * from this set.
+     * @param last last character, inclusive, of range to be removed
+     * from this set.
+     */
+    public void remove(char first, char last) {
+        if (first <= last) {
+            removePair(pairs, first, last);
+        }
+    }
+
+    /**
+     * Removes the specified character from this set if it is present.
+     * The set will not contain the specified range once the call
+     * returns.
+     */
+    public final void remove(char c) {
+        remove(c, c);
+    }
+
+    /**
+     * Returns <tt>true</tt> if the specified set is a <i>subset</i>
+     * of this set.
+     *
+     * @param c set to be checked for containment in this set.
+     * @return <tt>true</tt> if this set contains all of the elements of the
+     * 	       specified set.
+     */
+    public boolean containsAll(UnicodeSet c) {
+        // The specified set is a subset if all of its pairs are contained
+        // in this set.
+        int i = 1;
+        for (int j=0; j<c.pairs.length(); j+=2) {
+            char last = c.pairs.charAt(j+1);
+            // Set i to the end of the smallest range such that its
+            // end point >= last, or pairs.length() if no such range
+            // exists.
+            while (i<pairs.length() && last>pairs.charAt(i)) i+=2;
+            if (i>pairs.length() || c.pairs.charAt(j) < pairs.charAt(i-1)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Adds all of the elements in the specified set to this set if
+     * they're not already present.  This operation effectively
+     * modifies this set so that its value is the <i>union</i> of the two
+     * sets.  The behavior of this operation is unspecified if the specified
+     * collection is modified while the operation is in progress.
+     *
+     * @param c set whose elements are to be added to this set.
+     * @see #add(char, char)
+     */
+    public void addAll(UnicodeSet c) {
+        doUnion(pairs, c.pairs.toString());
+    }
+
+    /**
+     * Retains only the elements in this set that are contained in the
+     * specified set.  In other words, removes from this set all of
+     * its elements that are not contained in the specified set.  This
+     * operation effectively modifies this set so that its value is
+     * the <i>intersection</i> of the two sets.
+     *
+     * @param c set that defines which elements this set will retain.
+     */
+    public void retainAll(UnicodeSet c) {
+        doIntersection(pairs, c.pairs.toString());
+    }
+
+    /**
+     * Removes from this set all of its elements that are contained in the
+     * specified set.  This operation effectively modifies this
+     * set so that its value is the <i>asymmetric set difference</i> of
+     * the two sets.
+     *
+     * @param c set that defines which elements will be removed from
+     *          this set.
+     */
+    public void removeAll(UnicodeSet c) {
+        doDifference(pairs, c.pairs.toString());
+    }
+
+    /**
+     * Inverts this set.  This operation modifies this set so that
+     * its value is its complement.  This is equivalent to the pseudo code:
+     * <code>this = new UnicodeSet("[\u0000-\uFFFF]").removeAll(this)</code>.
+     */
+    public void complement() {
+        doComplement(pairs);
+    }
+
+    /**
+     * Removes all of the elements from this set.  This set will be
+     * empty after this call returns.
+     */
+    public void clear() {
+        pairs.setLength(0);
+    }
+
+    /**
+     * Compares the specified object with this set for equality.  Returns
+     * <tt>true</tt> if the specified object is also a set, the two sets
+     * have the same size, and every member of the specified set is
+     * contained in this set (or equivalently, every member of this set is
+     * contained in the specified set).
+     *
+     * @param o Object to be compared for equality with this set.
+     * @return <tt>true</tt> if the specified Object is equal to this set.
+     */
+    public boolean equals(Object o) {
+        return o instanceof UnicodeSet &&
+            pairs.equals(((UnicodeSet)o).pairs);
+    }
+
+    /**
+     * Returns the hash code value for this set.
+     *
+     * @return the hash code value for this set.
+     * @see Object#hashCode()
+     */
+    public int hashCode() {
+        return pairs.hashCode();
+    }
+
+    //----------------------------------------------------------------
+    // Implementation: Pattern parsing
+    //----------------------------------------------------------------
+
+    /**
+     * Parses the given pattern, starting at the given position.  The
+     * character at pattern.charAt(pos.getIndex()) must be '[', or the
+     * parse fails.  Parsing continues until the corresponding closing
+     * ']'.  If a syntax error is encountered between the opening and
+     * closing brace, the parse fails.  Upon return from a successful
+     * parse, the ParsePosition is updated to point to the character
+     * following the closing ']', and a StringBuffer containing a
+     * pairs list for the parsed pattern is returned.  This method calls
+     * itself recursively to parse embedded subpatterns.
+     *
+     * @param pattern the string containing the pattern to be parsed.
+     * The portion of the string from pos.getIndex(), which must be a
+     * '[', to the corresponding closing ']', is parsed.
+     * @param pos upon entry, the position at which to being parsing.
+     * The character at pattern.charAt(pos.getIndex()) must be a '['.
+     * Upon return from a successful parse, pos.getIndex() is either
+     * the character after the closing ']' of the parsed pattern, or
+     * pattern.length() if the closing ']' is the last character of
+     * the pattern string.
+     * @return a StringBuffer containing a pairs list for the parsed
+     * substring of <code>pattern</code>
+     * @exception IllegalArgumentException if the parse fails.
+     */
+    private static StringBuffer parse(String pattern, ParsePosition pos) {
+
+        boolean invert = false;
+        StringBuffer pairsBuf = new StringBuffer();
+
+        /**
+         * Nodes:  0 - idle, waiting for '['
+         *        10 - like 11, but immediately after "[" or "[^"
+         *        11 - awaiting x, "]", "[...]", or "[:...:]"
+         *        21 - after x
+         *        23 - after x-
+         * 
+         * The parsing state machine moves from node 0 through zero or more
+         * other nodes back to node 0, in a successful parse.
+         */
+        int node = 0;
+        char first = 0;
+        int i;
+
+        /**
+         * This loop iterates over the characters in the pattern.  We
+         * start at the position specified by pos.  We exit the loop
+         * when either a matching closing ']' is seen, or we read all
+         * characters of the pattern.
+         */
+        for (i=pos.getIndex(); i<pattern.length(); ++i) {
+            char c = pattern.charAt(i);
+
+            /**
+             * Handle escapes here.  If a character is escaped, then
+             * it assumes its literal value.  This is true for all
+             * characters, both special characters and characters with
+             * no special meaning.  We also interpret '\\uxxxx' Unicode
+             * escapes here.
+             */
+            boolean isLiteral = false;
+            if (c == '\\') {
+                ++i;
+                if (i < pattern.length()) {
+                    c = pattern.charAt(i);
+                    isLiteral = true;
+                    if (c == 'u') {
+                        if ((i+4) >= pattern.length()) {
+                            throw new IllegalArgumentException("Invalid \\u escape");
+                        }
+                        c = '\u0000';
+                        for (int j=(++i)+4; i<j; ++i) { // [sic]
+                            int digit = Character.digit(pattern.charAt(i), 16);
+                            if (digit<0) {
+                                throw new IllegalArgumentException("Invalid \\u escape");
+                            }
+                            c = (char) ((c << 4) | digit);
+                        }
+                        --i; // Move i back to last parsed character
+                    }
+                } else {
+                    throw new IllegalArgumentException("Trailing '\\'");
+                }
+            }
+
+            /**
+             * Within this loop, we handle each of the four
+             * conditions: '[', ']', '-', other.  The first three
+             * characters must not be escaped.
+             */
+
+            /**
+             * An opening bracket indicates either the first bracket
+             * of the entire subpattern we are parsing, in which case
+             * we are in node 0 and move into node 10.  We also check
+             * for an immediately following '^', indicating the
+             * complement of the following pattern.  ('^' is any other
+             * position has no special meaning.)  If we are not in
+             * node 0, '[' represents a nested subpattern that must be
+             * recursively parsed and checked for following operators
+             * ('&' or '|').  If two nested subpatterns follow one
+             * another with no operator, their union is formed, just
+             * as with any other elements that follow one another
+             * without intervening operator.  The other thing we
+             * handle here is the syntax "[:Xx:]" or "[:X:]" that
+             * indicates a Unicode category or supercategory.
+             */
+            if (!isLiteral && c == '[') {
+                boolean parseOp = false;
+                char d = charAfter(pattern, i);
+                // "[:...:]" represents a character category
+                if (d == ':') {
+                    if (node == 23) {
+                        throw new IllegalArgumentException("Unexpected \"[:\"");
+                    }
+                    if (node == 21) {
+                        addPair(pairsBuf, first, first);
+                        node = 11;
+                    }
+                    i += 2;
+                    int j = pattern.indexOf(":]", i);
+                    if (j < 0) {
+                        throw new IllegalArgumentException("Missing \":]\"");
+                    }
+                    doUnion(pairsBuf,
+                            getCategoryPairs(pattern.substring(i, j)));
+                    i = j+1;
+                    if (node == 10) {
+                        node = 11;
+                        parseOp = true;
+                    } else if (node == 0) {
+                        break;
+                    }
+                } else {
+                    if (node == 0) {
+                        node = 10;
+                        if (d == '^') {
+                            invert = true;
+                            ++i;
+                        }
+                    } else {
+                        // Nested '['
+                        pos.setIndex(i);
+                        doUnion(pairsBuf, parse(pattern, pos)
+                                .toString());
+                        i = pos.getIndex() - 1; // Subtract 1 to point at ']'
+                        parseOp = true;
+                    }
+                }
+                /**
+                 * parseOp is true after "[:...:]" or a nested
+                 * "[...]".  It is false only after the final closing
+                 * ']'.  If parseOp is true, we look past the closing
+                 * ']' to see if we have an operator character.  If
+                 * so, we parse the subsequent "[...]" recursively,
+                 * then perform the operation.  We do this in a loop
+                 * until there are no more operators.  Note that this
+                 * means the operators have equal precedence and are
+                 * bound left-to-right.
+                 */
+                if (parseOp) {
+                    for (;;) {
+                        // Is the next character an operator?
+                        char op = charAfter(pattern, i);
+                        if (op == '-' || op == '&') {
+                            pos.setIndex(i+2); // Add 2 to point AFTER op
+                            String rhs = parse(pattern, pos).toString();
+                            if (op == '-') {
+                                doDifference(pairsBuf, rhs);
+                            } else if (op == '&') {
+                                doIntersection(pairsBuf, rhs);
+                            }
+                            i = pos.getIndex() - 1; // - 1 to point at ']'
+                        } else {
+                            break;
+                        }
+                    }
+                }          
+            }
+
+            /**
+             * A closing bracket can only be a closing bracket for
+             * "[...]", since the closing bracket for "[:...:]" is
+             * taken care of when the initial "[:" is seen.  When we
+             * see a closing bracket, we then know, if we were in node
+             * 21 (after x) or 23 (after x-) that nothing more is
+             * coming, and we add the last character(s) we saw to the
+             * set.  Note that a trailing '-' assumes its literal
+             * meaning, just as a leading '-' after "[" or "[^".
+             */
+            else if (!isLiteral && c == ']') {
+                if (node == 0) {
+                    throw new IllegalArgumentException("Unexpected ']'");
+                }
+                if (node == 21 || node == 23) {
+                    addPair(pairsBuf, first, first);
+                    if (node == 23) {
+                        addPair(pairsBuf, '-', '-');
+                    }
+                }
+                node = 0;
+                break;
+            }
+
+            /**
+             * '-' has the following interpretations: 1. Within
+             * "[...]", between two letters, it indicates a range.
+             * 2. Between two nested bracket patterns, "[[...]-[...]",
+             * it indicates asymmetric difference.  3. At the start of
+             * a bracket pattern, "[-...]", "[^-...]", it indicates
+             * the literal character '-'.  4. At the end of a bracket
+             * pattern, "[...-]", it indicates the literal character
+             * '-'.
+             *
+             * We handle cases 1 and 3 here.  Cases 2 and 4 are
+             * handled in the ']' parsing code.
+             */
+            else if (!isLiteral && c == '-') {
+                if (node == 10) {
+                    addPair(pairsBuf, c, c); // Handle "[-...]", "[^-...]"
+                } else if (node == 21) {
+                    node = 23;
+                } else {
+                    throw new IllegalArgumentException("Unexpected '-'");
+                }
+            } 
+
+            /**
+             * If we fall through to this point, we have a literal
+             * character, either one that has been escaped with a
+             * backslash, escaped with a backslash u, or that isn't
+             * a special '[', ']', or '-'.
+             *
+             * Literals can either start a range "x-...", end a range,
+             * "...-x", or indicate a single character "x".
+             */
+            else {
+                if (node == 10 || node == 11) {
+                    first = c;
+                    node = 21;
+                } else if (node == 21) {
+                    addPair(pairsBuf, first, first);
+                    first = c;
+                    node = 21;
+                } else if (node == 23) {
+                    if (c < first) {
+                        throw new IllegalArgumentException("Bad range");
+                    }
+                    addPair(pairsBuf, first, c);
+                    node = 11;
+                } else {
+                    throw new IllegalArgumentException("Expected '[', got '" + c + '\'');
+                }
+            }
+        }
+
+        if (node != 0) {
+            throw new IllegalArgumentException("Missing ']'");
+        }
+
+        /**
+         * i indexes the last character we parsed or is
+         * pattern.length().  In the latter case, the node will not be
+         * zero, since we have run off the end without finding a
+         * closing ']'.  Therefore, the above statement will have
+         * thrown an exception, and we'll never get here.  If we get
+         * here, we know i < pattern.length(), and we set the
+         * ParsePosition to the next character to be parsed.
+         */
+        pos.setIndex(i+1);
+
+        /**
+         * If we saw a '^' after the initial '[' of this pattern, then
+         * perform the complement.  (Inversion after '[:' is handled
+         * elsewhere.)
+         */
+        if (invert) {
+            doComplement(pairsBuf);
+        }
+
+        return pairsBuf;
+    }
+
+    //----------------------------------------------------------------
+    // Implementation: Efficient in-place union & difference
+    //----------------------------------------------------------------
+
+    /**
+     * Performs a union operation: adds the range 'c'-'d' to the given
+     * pairs list.  The pairs list is modified in place.  The result
+     * is normalized (in order and as short as possible).  For
+     * example, addPair("am", 'l', 'q') => "aq".  addPair("ampz", 'n',
+     * 'o') => "az".
+     */
+    private static void addPair(StringBuffer pairs, char c, char d) {
+        char a = 0;
+        char b = 0;
+        for (int i=0; i<pairs.length(); i+=2) {
+            char e = pairs.charAt(i);
+            char f = pairs.charAt(i+1);
+            if (e <= (d+1) && c <= (f+1)) {
+                // Merge with this range
+                f = (char) Math.max(d, f);
+
+                // Check to see if we need to merge with the
+                // subsequent range also.  This happens if we have
+                // "abdf" and are merging in "cc".  We only need to
+                // check on the right side -- never on the left.
+                if ((i+2) < pairs.length() &&
+                    pairs.charAt(i+2) == (f+1)) {
+                    f = pairs.charAt(i+3);
+                    stringBufferDelete(pairs, i+2, i+4);
+                }
+                pairs.setCharAt(i, (char) Math.min(c, e));
+                pairs.setCharAt(i+1, f);
+                return;
+            } else if ((b+1) < c && (d+1) < e) {
+                // Insert before this range
+                pairs.insert(i, new char[] { c, d });
+                return;
+            }
+            a = e;
+            b = f;
+        }
+        // If nothing else, fall through and append this new range to
+        // the end.
+        pairs.append(c).append(d);
+    }
+
+    /**
+     * Performs an asymmetric difference: removes the range 'c'-'d'
+     * from the pairs list.  The pairs list is modified in place.  The
+     * result is normalized (in order and as short as possible).  For
+     * example, removePair("am", 'l', 'q') => "ak".
+     * removePair("ampz", 'l', 'q') => "akrz".
+     */
+    private static void removePair(StringBuffer pairs, char c, char d) {
+        // Iterate over pairs until we find a pair that overlaps
+        // with the given range.
+        for (int i=0; i<pairs.length(); i+=2) {
+            char b = pairs.charAt(i+1);
+            if (b < c) {
+                // Range at i is entirely before the given range,
+                // since we have a-b < c-d.  No overlap yet...keep
+                // iterating.
+                continue;
+            }
+            char a = pairs.charAt(i);
+            if (d < a) {
+                // Range at i is entirely after the given range; c-d <
+                // a-b.  Since ranges are in order, nothing else will
+                // overlap.
+                break;
+            }
+            // Once we get here, we know c <= b and d >= a.
+            // rangeEdited is set to true if we have modified the
+            // range a-b (the range at i) in place.
+            boolean rangeEdited = false;
+            if (c > a) {
+                // If c is after a and before b, then we have overlap
+                // of this sort: a--c==b--d or a--c==d--b, where a-b
+                // and c-d are the ranges of interest.  We need to
+                // add the range a,c-1.
+                pairs.setCharAt(i+1, (char)(c-1));
+                // i is already a
+                rangeEdited = true;
+            }
+            if (d < b) {
+                // If d is after a and before b, we overlap like this:
+                // c--a==d--b or a--c==d--b, where a-b is the range at
+                // i and c-d is the range being removed.  We need to
+                // add the range d+1,b.
+                if (rangeEdited) {
+                    pairs.insert(i+2, new char[] { (char)(d+1), b });
+                    i += 2;
+                } else {
+                    pairs.setCharAt(i, (char)(d+1));
+                    // i+1 is already b
+                    rangeEdited = true;
+                }
+            }
+            if (!rangeEdited) {
+                // If we didn't add any ranges, that means the entire
+                // range a-b must be deleted, since we have
+                // c--a==b--d.
+                stringBufferDelete(pairs, i, i+2);
+                i -= 2;
+            }
+        }
+    }
+
+    //----------------------------------------------------------------
+    // Implementation: Fundamental operators
+    //----------------------------------------------------------------
+
+    /**
+     * Changes the pairs list to represent the complement of the set it
+     * currently represents.  The pairs list will be normalized (in
+     * order and in shortest possible form) if the original pairs list
+     * was normalized.
+     */
+    private static void doComplement(StringBuffer pairs) {
+        if (pairs.length() == 0) {
+            pairs.append('\u0000').append('\uffff');
+            return;
+        }
+
+        // Change each end to a start and each start to an end of the
+        // gaps between the ranges.  That is, 3-7 9-12 becomes x-2 8-8
+        // 13-x, where 'x' represents a range that must now be fixed
+        // up.
+        for (int i=0; i<pairs.length(); i+=2) {
+            pairs.setCharAt(i,   (char) (pairs.charAt(i)   - 1));
+            pairs.setCharAt(i+1, (char) (pairs.charAt(i+1) + 1));
+        }
+
+        // Fix up the initial range, either by adding a start point of
+        // U+0000, or by deleting the range altogether, if the
+        // original range was U+0000 - x.
+        if (pairs.charAt(0) == '\uFFFF') {
+            stringBufferDelete(pairs, 0, 1);
+        } else {
+            pairs.insert(0, '\u0000');
+        }
+
+        // Fix up the final range, either by adding an end point of
+        // U+FFFF, or by deleting the range altogether, if the
+        // original range was x - U+FFFF.
+        if (pairs.charAt(pairs.length() - 1) == '\u0000') {
+            pairs.setLength(pairs.length() - 1);
+        } else {
+            pairs.append('\uFFFF');
+        }
+    }
+
+    /**
+     * Given two pairs lists, changes the first in place to represent
+     * the union of the two sets.
+     *
+     * This implementation format was stolen from Richard Gillam's
+     * CharSet class.
+     */
+    private static void doUnion(StringBuffer pairs, String c2) {
+        StringBuffer result = new StringBuffer();
+        String c1 = pairs.toString();
+
+        int i = 0;
+        int j = 0;
+
+        // consider all the characters in both strings
+        while (i < c1.length() && j < c2.length()) {
+            char ub;
+            
+            // the first character in the result is the lower of the
+            // starting characters of the two strings, and "ub" gets
+            // set to the upper bound of that range
+            if (c1.charAt(i) < c2.charAt(j)) {
+                result.append(c1.charAt(i));
+                ub = c1.charAt(++i);
+            }
+            else {
+                result.append(c2.charAt(j));
+                ub = c2.charAt(++j);
+            }
+            
+            // for as long as one of our two pointers is pointing to a range's
+            // end point, or i is pointing to a character that is less than
+            // "ub" plus one (the "plus one" stitches touching ranges together)...
+            while (i % 2 == 1 || j % 2 == 1 || (i < c1.length() && c1.charAt(i)
+                            <= ub + 1)) {
+                // advance i to the first character that is greater than
+                // "ub" plus one
+                while (i < c1.length() && c1.charAt(i) <= ub + 1)
+                    ++i;
+                    
+                // if i points to the endpoint of a range, update "ub"
+                // to that character, or if i points to the start of
+                // a range and the endpoint of the preceding range is
+                // greater than "ub", update "up" to _that_ character
+                if (i % 2 == 1)
+                    ub = c1.charAt(i);
+                else if (i > 0 && c1.charAt(i - 1) > ub)
+                    ub = c1.charAt(i - 1);
+
+                // now advance j to the first character that is greater
+                // that "ub" plus one
+                while (j < c2.length() && c2.charAt(j) <= ub + 1)
+                    ++j;
+                    
+                // if j points to the endpoint of a range, update "ub"
+                // to that character, or if j points to the start of
+                // a range and the endpoint of the preceding range is
+                // greater than "ub", update "up" to _that_ character
+                if (j % 2 == 1)
+                    ub = c2.charAt(j);
+                else if (j > 0 && c2.charAt(j - 1) > ub)
+                    ub = c2.charAt(j - 1);
+            }
+            // when we finally fall out of this loop, we will have stitched
+            // together a series of ranges that overlap or touch, i and j
+            // will both point to starting points of ranges, and "ub" will
+            // be the endpoint of the range we're working on.  Write "ub"
+            // to the result
+            result.append(ub);
+            
+        // loop back around to create the next range in the result
+        }
+        
+        // we fall out to here when we've exhausted all the characters in
+        // one of the operands.  We can append all of the remaining characters
+        // in the other operand without doing any extra work.
+        if (i < c1.length())
+            result.append(c1.substring(i));
+        if (j < c2.length())
+            result.append(c2.substring(j));
+
+        pairs.setLength(0);
+        pairs.append(result.toString());
+    }
+
+    /**
+     * Given two pairs lists, changes the first in place to represent
+     * the asymmetric difference of the two sets.
+     */
+    private static void doDifference(StringBuffer pairs, String pairs2) {
+        StringBuffer p2 = new StringBuffer(pairs2);
+        doComplement(p2);
+        doIntersection(pairs, p2.toString());
+    }
+
+    /**
+     * Given two pairs lists, changes the first in place to represent
+     * the intersection of the two sets.
+     *
+     * This implementation format was stolen from Richard Gillam's
+     * CharSet class.
+     */
+    private static void doIntersection(StringBuffer pairs, String c2) {
+        StringBuffer result = new StringBuffer();
+        String c1 = pairs.toString();
+
+        int i = 0;
+        int j = 0;
+        int oldI;
+        int oldJ;
+
+        // iterate until we've exhausted one of the operands
+        while (i < c1.length() && j < c2.length()) {
+            
+            // advance j until it points to a character that is larger than
+            // the one i points to.  If this is the beginning of a one-
+            // character range, advance j to point to the end
+            if (i < c1.length() && i % 2 == 0) {
+                while (j < c2.length() && c2.charAt(j) < c1.charAt(i))
+                    ++j;
+                if (j < c2.length() && j % 2 == 0 && c2.charAt(j) == c1.charAt(i))
+                    ++j;
+            }
+
+            // if j points to the endpoint of a range, save the current
+            // value of i, then advance i until it reaches a character
+            // which is larger than the character pointed at
+            // by j.  All of the characters we've advanced over (except
+            // the one currently pointed to by i) are added to the result
+            oldI = i;
+            while (j % 2 == 1 && i < c1.length() && c1.charAt(i) <= c2.charAt(j))
+                ++i;
+            result.append(c1.substring(oldI, i));
+
+            // if i points to the endpoint of a range, save the current
+            // value of j, then advance j until it reaches a character
+            // which is larger than the character pointed at
+            // by i.  All of the characters we've advanced over (except
+            // the one currently pointed to by i) are added to the result
+            oldJ = j;
+            while (i % 2 == 1 && j < c2.length() && c2.charAt(j) <= c1.charAt(i))
+                ++j;
+            result.append(c2.substring(oldJ, j));
+
+            // advance i until it points to a character larger than j
+            // If it points at the beginning of a one-character range,
+            // advance it to the end of that range
+            if (j < c2.length() && j % 2 == 0) {
+                while (i < c1.length() && c1.charAt(i) < c2.charAt(j))
+                    ++i;
+                if (i < c1.length() && i % 2 == 0 && c2.charAt(j) == c1.charAt(i))
+                    ++i;
+            }
+        }
+
+        pairs.setLength(0);
+        pairs.append(result.toString());
+    }
+
+    //----------------------------------------------------------------
+    // Implementation: Generation of pairs for Unicode categories
+    //----------------------------------------------------------------
+    
+    /**
+     * Returns a pairs string for the given category, given its name.
+     * The category name must be either a two-letter name, such as
+     * "Lu", or a one letter name, such as "L".  One-letter names
+     * indicate the logical union of all two-letter names that start
+     * with that letter.  Case is significant.  If the name starts
+     * with the character '^' then the complement of the given
+     * character set is returned.
+     *
+     * Although individual categories such as "Lu" are cached, we do
+     * not currently cache single-letter categories such as "L" or
+     * complements such as "^Lu" or "^L".  It would be easy to cache
+     * these as well in a hashtable should the need arise.
+     */
+    private static String getCategoryPairs(String catName) {
+        boolean invert = (catName.length() > 1 &&
+                          catName.charAt(0) == '^');
+        if (invert) {
+            catName = catName.substring(1);
+        }
+
+        StringBuffer cat = null;
+        
+        // if we have two characters, search the category map for that
+        // code and either construct and return a UnicodeSet from the
+        // data in the category map or throw an exception
+        if (catName.length() == 2) {
+            int i = CATEGORY_NAMES.indexOf(catName);
+            if (i>=0 && i%2==0) {
+                i /= 2;
+                if (i != UNSUPPORTED_CATEGORY) {
+                    String pairs = getCategoryPairs(i);
+                    if (!invert) {
+                        return pairs;
+                    }
+                    cat = new StringBuffer(pairs);
+                }
+            }
+        } else if (catName.length() == 1) {
+            // if we have one character, search the category map for
+            // codes beginning with that letter, and union together
+            // all of the matching sets that we find (or throw an
+            // exception if there are no matches)
+            for (int i=0; i<CATEGORY_COUNT; ++i) {
+                if (i != UNSUPPORTED_CATEGORY &&
+                    CATEGORY_NAMES.charAt(2*i) == catName.charAt(0)) {
+                    String pairs = getCategoryPairs(i);
+                    if (cat == null) {
+                        cat = new StringBuffer(pairs);
+                    } else {
+                        doUnion(cat, pairs);
+                    }
+                }
+            }
+        }
+
+        if (cat == null) {
+            throw new IllegalArgumentException("Bad category");            
+        }
+
+        if (invert) {
+            doComplement(cat);
+        }
+        return cat.toString();
+    }
+
+    /**
+     * Returns a pairs string for the given category.  This string is
+     * cached and returned again if this method is called again with
+     * the same parameter.
+     */
+    private static String getCategoryPairs(int cat) {
+        if (CATEGORY_PAIRS_CACHE[cat] == null) {
+            // Walk through all Unicode characters, noting the start
+            // and end of each range for which Character.getType(c)
+            // returns the given category integer.  Since we are
+            // iterating in order, we can simply append the resulting
+            // ranges to the pairs string.
+            StringBuffer pairs = new StringBuffer();
+            int first = -1;
+            int last = -2;
+            for (int i=0; i<=0xFFFF; ++i) {
+                if (Character.getType((char)i) == cat) {
+                    if ((last+1) == i) {
+                        last = i;
+                    } else {
+                        if (first >= 0) {
+                            pairs.append((char)first).append((char)last);
+                        }
+                        first = last = i;
+                    }
+                }
+            }
+            if (first >= 0) {
+                pairs.append((char)first).append((char)last);
+            }
+            CATEGORY_PAIRS_CACHE[cat] = pairs.toString();
+        }
+        return CATEGORY_PAIRS_CACHE[cat];
+    }
+
+    //----------------------------------------------------------------
+    // Implementation: Utility methods
+    //----------------------------------------------------------------
+
+    /**
+     * Returns the character after the given position, or '\uFFFF' if
+     * there is none.
+
+     */
+    private static final char charAfter(String str, int i) {
+        return ((++i) < str.length()) ? str.charAt(i) : '\uFFFF';
+    }
+    
+    /**
+     * Deletes a range of character from a StringBuffer, from start to
+     * limit-1.  This is not part of JDK 1.1 StringBuffer, but is
+     * present in Java 2.
+     * @param start inclusive start of range
+     * @param limit exclusive end of range
+     */
+    private static void stringBufferDelete(StringBuffer buf,
+                                           int start, int limit) {
+        // In Java 2 just use:
+        //   buf.delete(start, limit);
+        char[] chars = null;
+        if (buf.length() > limit) {
+            chars = new char[buf.length() - limit];
+            buf.getChars(limit, buf.length(), chars, 0);
+        }
+        buf.setLength(start);
+        if (chars != null) {
+            buf.append(chars);
+        }
+    }
+}
diff --git a/icu4j/src/com/ibm/text/UnicodeToHexTransliterator.java b/icu4j/src/com/ibm/text/UnicodeToHexTransliterator.java
new file mode 100755
index 00000000000..1e688f65fa9
--- /dev/null
+++ b/icu4j/src/com/ibm/text/UnicodeToHexTransliterator.java
@@ -0,0 +1,172 @@
+package com.ibm.text;
+import java.util.*;
+
+/**
+ * A transliterator that converts from Unicode characters to 
+ * hexadecimal Unicode escape sequences.  It outputs a
+ * prefix specified in the constructor and optionally converts the hex
+ * digits to uppercase.
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: UnicodeToHexTransliterator.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class UnicodeToHexTransliterator extends Transliterator {
+
+    /**
+     * Package accessible ID for this transliterator.
+     */
+    static String _ID = "Unicode-Hex";
+
+    private String prefix;
+
+    private boolean uppercase;
+
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    /**
+     * Constructs a transliterator.
+     * @param prefix the string that will precede the four hex
+     * digits for UNICODE_HEX transliterators.  Ignored
+     * if direction is HEX_UNICODE.
+     * @param uppercase if true, the four hex digits will be
+     * converted to uppercase; otherwise they will be lowercase.
+     * Ignored if direction is HEX_UNICODE.
+     */
+    public UnicodeToHexTransliterator(String prefix, boolean uppercase,
+                                      UnicodeFilter filter) {
+        super(_ID, filter);
+        this.prefix = prefix;
+        this.uppercase = uppercase;
+    }
+
+    /**
+     * Constructs a transliterator with the default prefix "&#092;u"
+     * that outputs uppercase hex digits.
+     */
+    public UnicodeToHexTransliterator() {
+        this("\\u", true, null);
+    }
+
+    /**
+     * Returns the string that precedes the four hex digits.
+     * @return prefix string
+     */
+    public String getPrefix() {
+        return prefix;
+    }
+
+    /**
+     * Sets the string that precedes the four hex digits.
+     *
+     * <p>Callers must take care if a transliterator is in use by
+     * multiple threads.  The prefix should not be changed by one
+     * thread while another thread may be transliterating.
+     * @param prefix prefix string
+     */
+    public void setPrefix(String prefix) {
+        this.prefix = prefix;
+    }
+
+    /**
+     * Returns true if this transliterator outputs uppercase hex digits.
+     */
+    public boolean isUppercase() {
+        return uppercase;
+    }
+
+    /**
+     * Sets if this transliterator outputs uppercase hex digits.
+     *
+     * <p>Callers must take care if a transliterator is in use by
+     * multiple threads.  The uppercase mode should not be changed by
+     * one thread while another thread may be transliterating.
+     * @param outputUppercase if true, then this transliterator
+     * outputs uppercase hex digits.
+     */
+    public void setUppercase(boolean outputUppercase) {
+        uppercase = outputUppercase;
+    }
+
+    /**
+     * Transliterates a segment of a string.  <code>Transliterator</code> API.
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @return the new limit index
+     */
+    public int transliterate(Replaceable text, int start, int limit) {
+        int[] offsets = { start, limit, start };
+        handleKeyboardTransliterate(text, offsets);
+        return offsets[LIMIT];
+    }
+
+    /**
+     * Implements {@link Transliterator#handleKeyboardTransliterate}.
+     */
+    protected void handleKeyboardTransliterate(Replaceable text,
+                                               int[] offsets) {
+        /**
+         * Performs transliteration changing all characters to
+         * Unicode hexadecimal escapes.  For example, '@' -> "U+0040",
+         * assuming the prefix is "U+". 
+         */
+        int cursor = offsets[CURSOR];
+        int limit = offsets[LIMIT];
+
+        UnicodeFilter filter = getFilter();
+
+    loop:
+        while (cursor < limit) {
+            char c = text.charAt(cursor);
+            if (filter != null && !filter.isIn(c)) {
+                ++cursor;
+                continue;
+            }
+            String hex = hex(c);
+            text.replace(cursor, cursor+1, hex);
+            int len = hex.length();
+            cursor += len; // Advance cursor by 1 and adjust for new text
+            --len;
+            limit += len;
+        }
+
+        offsets[LIMIT] = limit;
+        offsets[CURSOR] = cursor;
+    }
+
+    /**
+     * Return the length of the longest context required by this transliterator.
+     * This is <em>preceding</em> context.
+     * @param direction either <code>FORWARD</code> or <code>REVERSE</code>
+     * @return maximum number of preceding context characters this
+     * transliterator needs to examine
+     */
+    protected int getMaximumContextLength() {
+        return 0;
+    }
+
+    /**
+     * Form escape sequence.
+     */
+    private final String hex(char c) {
+        StringBuffer buf = new StringBuffer();
+        buf.append(prefix);
+        if (c < 0x1000) {
+            buf.append('0');
+            if (c < 0x100) {
+                buf.append('0');
+                if (c < 0x10) {
+                    buf.append('0');
+                }
+            }
+        } 
+        String h = Integer.toHexString(c);
+        buf.append(uppercase ? h.toUpperCase() : h);
+        return buf.toString();
+    }
+}
diff --git a/icu4j/src/com/ibm/text/components/AppletFrame.java b/icu4j/src/com/ibm/text/components/AppletFrame.java
new file mode 100755
index 00000000000..cf6cc399ddd
--- /dev/null
+++ b/icu4j/src/com/ibm/text/components/AppletFrame.java
@@ -0,0 +1,126 @@
+package com.ibm.text.components;
+import java.applet.*;
+import java.net.URL;
+import java.util.Enumeration;
+import java.awt.*;
+import java.awt.event.*;
+
+/**
+ * <p>A Frame that runs an Applet within itself, making it possible
+ * for an applet to run as an application.  Usage:
+ *
+ * <pre>
+ * public class MyApplet extends Applet {
+ *     public static void main(String args[]) {
+ *         MyApplet applet = new MyApplet();
+ *         new AppletFrame("My Applet Running As An App", applet, 640, 480);
+ *     }
+ *     ...
+ * }
+ * <pre>
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: AppletFrame.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class AppletFrame extends Frame implements AppletStub, AppletContext {
+
+    Applet applet;
+
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    /**
+     * Construct a Frame running the given Applet with the default size
+     * of 640 by 480.
+     * When the Frame is closed, the applet's stop() method is called,
+     * the Frame is dispose()d of, and System.exit(0) is called.
+     *
+     * @param name the Frame title
+     * @param applet the applet to be run
+     */
+    public AppletFrame(String name, Applet applet) {
+        this(name, applet, 640, 480);
+    }
+
+    /**
+     * Construct a Frame running the given Applet with the given size.
+     * When the Frame is closed, the applet's stop() method is called,
+     * the Frame is dispose()d of, and System.exit(0) is called.
+     *
+     * @param name the Frame title
+     * @param applet the applet to be run
+     * @param width width of the Frame
+     * @param height height of the Frame
+     */
+    public AppletFrame(String name, Applet applet, int width, int height) {
+        super(name);
+        this.applet = applet;
+        applet.setStub(this);
+
+        resize(width, height);
+        add("Center", applet);
+        show();
+        addWindowListener(new WindowAdapter() {
+            public void windowClosing(WindowEvent e) {
+                AppletFrame.this.applet.stop();
+                dispose();
+                System.exit(0);
+            }
+        });
+
+        applet.init();
+        applet.start();
+    }
+
+    // AppletStub API
+    public void appletResize(int width,
+                             int height) {
+        resize(width, height);
+    }
+
+    public AppletContext getAppletContext() {
+        return this;
+    }
+
+    public URL getCodeBase() {
+        return null;
+    }
+
+    public URL getDocumentBase() {
+        return null;
+    }
+    
+    public String getParameter(String name) {
+        return "PARAMETER";
+    }
+
+    public boolean isActive() {
+        return true;
+    }
+
+    // AppletContext API
+    public Applet getApplet(String name) {
+        return applet;
+    }
+
+    public Enumeration getApplets() {
+        return null;
+    }
+
+    public AudioClip getAudioClip(URL url) {
+        return null;
+    }
+
+    public Image getImage(URL url) {
+        return null;
+    }
+
+    public void showDocument(URL url) {}
+    public void showDocument(URL url, String target) {}
+
+    public void showStatus(String status) {
+        System.out.println(status);
+    }
+}
diff --git a/icu4j/src/com/ibm/text/components/DumbTextComponent.java b/icu4j/src/com/ibm/text/components/DumbTextComponent.java
new file mode 100755
index 00000000000..a400b9a76f1
--- /dev/null
+++ b/icu4j/src/com/ibm/text/components/DumbTextComponent.java
@@ -0,0 +1,708 @@
+package com.ibm.text.components;
+import java.awt.*;
+import java.awt.event.*;
+import java.text.*;
+import java.awt.datatransfer.*;
+
+// LIU: Changed from final to non-final
+public class DumbTextComponent extends Canvas
+  implements KeyListener, MouseListener, MouseMotionListener, FocusListener
+  {
+    private transient static final String copyright =
+      "Copyright \u00A9 1998, Mark Davis. All Rights Reserved.";
+    private transient static boolean DEBUG = false;
+
+    private String contents = "";
+    private Selection selection = new Selection();
+    private boolean editable = true;
+
+    private transient Selection tempSelection = new Selection();
+    private transient boolean focus;
+    private transient BreakIterator lineBreaker = BreakIterator.getLineInstance();
+    private transient BreakIterator wordBreaker = BreakIterator.getWordInstance();
+    private transient BreakIterator charBreaker = BreakIterator.getCharacterInstance();
+    private transient int lineAscent;
+    private transient int lineHeight;
+    private transient int lineLeading;
+    private transient int lastHeight = 10;
+    private transient int lastWidth = 50;
+    private static final int MAX_LINES = 200; // LIU: Use symbolic name
+    private transient int[] lineStarts = new int[MAX_LINES]; // LIU
+    private transient int lineCount = 1;
+
+    private transient boolean valid = false;
+    private transient FontMetrics fm;
+    private transient boolean redoLines = true;
+    private transient boolean doubleClick = false;
+    private transient TextListener textListener;
+    private transient ActionListener selectionListener;
+    private transient Image cacheImage;
+    private transient Dimension mySize;
+    private transient int xInset = 5;
+    private transient int yInset = 5;
+    private transient Point startPoint = new Point();
+    private transient Point endPoint = new Point();
+    private transient Point caretPoint = new Point();
+    private transient static String clipBoard;
+
+    private static final char CR = '\015'; // LIU
+
+    // ============================================
+
+    public DumbTextComponent() {
+        addMouseListener(this);
+        addMouseMotionListener(this);
+        addKeyListener(this);
+        addFocusListener(this);
+        setCursor(Cursor.getPredefinedCursor(Cursor.TEXT_CURSOR));
+
+    }
+
+// ================ Events ====================
+
+    public boolean isFocusTraversable() { return true; }
+
+	public void addActionListener(ActionListener l) {
+        selectionListener = AWTEventMulticaster.add(selectionListener, l);
+	}
+
+	public void removeActionListener(ActionListener l) {
+        selectionListener = AWTEventMulticaster.remove(selectionListener, l);
+	}
+
+	public void addTextListener(TextListener l) {
+        textListener = AWTEventMulticaster.add(textListener, l);
+	}
+
+	public void removeTextListener(TextListener l) {
+        textListener = AWTEventMulticaster.remove(textListener, l);
+	}
+
+    private transient boolean pressed;
+
+	public void mousePressed(MouseEvent e) {
+	    if (DEBUG) System.out.println("mousePressed");
+	    if (pressed) {
+	        select(e,false);
+	    } else {
+            doubleClick = e.getClickCount() > 1;
+            requestFocus();
+            select(e, true);
+    	    pressed = true;
+	    }
+	}
+
+	public void mouseDragged(MouseEvent e) {
+	    if (DEBUG) System.out.println("mouseDragged");
+	    select(e, false);
+	}
+
+	public void mouseReleased(MouseEvent e) {
+	    if (DEBUG) System.out.println("mouseReleased");
+	    pressed = false;
+	}
+
+	public void mouseEntered(MouseEvent e) {
+	    //if (pressed) select(e, false);
+	}
+
+	public void mouseExited(MouseEvent e){
+	    //if (pressed) select(e, false);
+	}
+
+	public void mouseClicked(MouseEvent e) {}
+	public void mouseMoved(MouseEvent e) {}
+
+
+    public void focusGained(FocusEvent e) {
+	    if (DEBUG) System.out.println("focusGained");
+	    focus = true;
+	    valid = false;
+	    repaint(16);
+	}
+	public void focusLost(FocusEvent e) {
+	    if (DEBUG) System.out.println("focusLost");
+	    focus = false;
+	    valid = false;
+	    repaint(16);
+	}
+
+    public void select(MouseEvent e, boolean first) {
+	    point2Offset(e.getPoint(), tempSelection);
+        if (first) {
+            if ((e.getModifiers() & InputEvent.SHIFT_MASK) == 0) {
+                tempSelection.anchor = tempSelection.caret;
+            }
+	    }
+	    // fix words
+	    if (doubleClick) {
+	        tempSelection.expand(wordBreaker);
+	    }
+	    select(tempSelection);
+    }
+
+	public void keyPressed(KeyEvent e) {
+        int code = e.getKeyCode();
+        if (DEBUG) System.out.println("keyPressed "
+          + hex((char)code) + ", " + hex((char)e.getModifiers()));
+        int start = selection.getStart();
+        int end = selection.getEnd();
+        boolean shift = (e.getModifiers() & KeyEvent.SHIFT_MASK) != 0;
+        boolean ctrl = (e.getModifiers() & KeyEvent.CTRL_MASK) != 0;
+        switch (code) {
+        case KeyEvent.VK_Q:
+            if (!ctrl || !editable) break;
+            fixHex();
+            break;
+        case KeyEvent.VK_V:
+            if (!ctrl || !editable) break;
+            insertText(clipBoard);
+            break;
+        case KeyEvent.VK_C:
+            if (!ctrl) break;
+            clipBoard = contents.substring(selection.getStart(), selection.getEnd());
+            break;
+        case KeyEvent.VK_X:
+            if (!ctrl) break;
+            clipBoard = contents.substring(selection.getStart(), selection.getEnd());
+            if (editable) break;
+            insertText("");
+            break;
+        case KeyEvent.VK_A:
+            if (!ctrl) break;
+            select(Integer.MAX_VALUE, 0, false);
+            break;
+        case KeyEvent.VK_RIGHT:
+            tempSelection.set(selection);
+            tempSelection.nextBound(ctrl ? wordBreaker : charBreaker, +1, shift);
+            select(tempSelection);
+            break;
+        case KeyEvent.VK_LEFT:
+            tempSelection.set(selection);
+            tempSelection.nextBound(ctrl ? wordBreaker : charBreaker, -1, shift);
+            select(tempSelection);
+            break;
+        case KeyEvent.VK_UP: // LIU: Add support for up arrow
+            tempSelection.set(selection);
+            tempSelection.caret = lineDelta(tempSelection.caret, -1);
+            if (!shift) {
+                tempSelection.anchor = tempSelection.caret;
+            }
+            select(tempSelection);
+            break;
+        case KeyEvent.VK_DOWN: // LIU: Add support for down arrow
+            tempSelection.set(selection);
+            tempSelection.caret = lineDelta(tempSelection.caret, +1);
+            if (!shift) {
+                tempSelection.anchor = tempSelection.caret;
+            }
+            select(tempSelection);
+            break;
+        case KeyEvent.VK_DELETE: // LIU: Add delete key support
+            if (!editable) break;
+            if (contents.length() == 0) break;
+            start = selection.getStart();
+            end = selection.getEnd();
+            if (start == end) {
+                ++end;
+                if (end > contents.length()) {
+                    getToolkit().beep();
+                    return;
+                }
+            }
+            replaceRange("", start, end);
+            break;            
+        }
+    }
+
+    /**
+     * LIU: Given an offset into contents, moves up or down by lines,
+     * according to lineStarts[].
+     * @param off the offset into contents
+     * @param delta how many lines to move up (< 0) or down (> 0)
+     * @return the new offset into contents
+     */
+    private int lineDelta(int off, int delta) {
+        int line = findLine(off, false);
+        int posInLine = off - lineStarts[line];
+        // System.out.println("off=" + off + " at " + line + ":" + posInLine);
+        line += delta;
+        if (line < 0) {
+            line = posInLine = 0;
+        } else if (line >= lineCount) {
+            return contents.length();
+        }
+        off = lineStarts[line] + posInLine;
+        if (off >= lineStarts[line+1]) {
+            off = lineStarts[line+1] - 1;
+        }
+        return off;
+    }
+      
+	public void keyReleased(KeyEvent e) {
+        int code = e.getKeyCode();
+        if (DEBUG) System.out.println("keyReleased "
+          + hex((char)code) + ", " + hex((char)e.getModifiers()));
+    }
+
+	public void keyTyped(KeyEvent e) {
+        char ch = e.getKeyChar();
+        if (DEBUG) System.out.println("keyTyped "
+          + hex((char)ch) + ", " + hex((char)e.getModifiers()));
+        if ((e.getModifiers() & KeyEvent.CTRL_MASK) != 0) return;
+        switch (ch) {
+        case KeyEvent.CHAR_UNDEFINED:
+            break;
+        case KeyEvent.VK_BACK_SPACE:
+            if (!editable) break;
+            if (contents.length() == 0) break;
+            int start = selection.getStart();
+            int end = selection.getEnd();
+            if (start == end) {
+                --start;
+                if (start < 0) {
+                    getToolkit().beep(); // LIU: Add audio feedback of NOP
+                    return;
+                }
+            }
+            replaceRange("", start, end);
+            break;
+        default:
+            if (!editable) break;
+            // LIU: Dispatch to subclass API
+            handleKeyTyped(e);
+            break;
+        }
+    }
+
+    // LIU: Subclass API for handling of key typing
+    protected void handleKeyTyped(KeyEvent e) {
+        insertText(String.valueOf(e.getKeyChar()));
+    }
+
+// ===================== Control ======================
+
+    public synchronized void setEditable(boolean b) {
+        editable = b;
+    }
+
+    public boolean isEditable() {
+        return editable;
+    }
+
+    public void select(Selection newSelection) {
+        newSelection.pin(contents);
+        if (!selection.equals(newSelection)) {
+            selection.set(newSelection);
+            if (selectionListener != null) {
+                selectionListener.actionPerformed(
+                  new ActionEvent(this, ActionEvent.ACTION_PERFORMED,
+                    "Selection Changed", 0));
+            }
+            repaint(10);
+            valid = false;
+        }
+    }
+
+    public void select(int start, int end) {
+        select(start, end, false);
+    }
+
+    public void select(int start, int end, boolean clickAfter) {
+        tempSelection.set(start, end, clickAfter);
+        select(tempSelection);
+    }
+
+    public int getSelectionStart() {
+        return selection.getStart();
+    }
+
+    public int getSelectionEnd() {
+        return selection.getEnd();
+    }
+
+    public void setBounds(int x, int y, int w, int h) {
+        super.setBounds(x,y,w,h);
+        redoLines = true;
+    }
+
+    public Dimension getPreferredSize() {
+        return new Dimension(lastWidth,lastHeight);
+    }
+
+    public Dimension getMaximumSize() {
+        return new Dimension(lastWidth,lastHeight);
+    }
+
+    public Dimension getMinimumSize() {
+        return new Dimension(lastHeight,lastHeight);
+    }
+
+    public void setText(String text) {
+        setText2(text);
+        select(tempSelection.set(selection).pin(contents));
+    }
+
+    public void setText2(String text) {
+        contents = text;
+        charBreaker.setText(text);
+        wordBreaker.setText(text);
+        lineBreaker.setText(text);
+        redoLines = true;
+        if (textListener != null)
+            textListener.textValueChanged(
+              new TextEvent(this, TextEvent.TEXT_VALUE_CHANGED));
+        repaint(16);
+    }
+
+    public void insertText(String text) {
+        replaceRange(text, selection.getStart(), selection.getEnd());
+    }
+
+    public void replaceRange(String s, int start, int end) {
+        setText2(contents.substring(0,start) + s
+          + contents.substring(end));
+        select(tempSelection.set(selection).
+          fixAfterReplace(start, end, s.length()));
+    }
+
+    public String getText() {
+        return contents;
+    }
+
+    public void setFont(Font font) {
+        super.setFont(font);
+        redoLines = true;
+        repaint(16);
+    }
+
+    // ================== Graphics ======================
+
+    public void update(Graphics g) {
+        if (DEBUG) System.out.println("update");
+        paint(g);
+    }
+
+    public void paint(Graphics g) {
+        mySize = getSize();
+        if (cacheImage == null
+          || cacheImage.getHeight(this) != mySize.height
+          || cacheImage.getWidth(this) != mySize.width) {
+            cacheImage = createImage(mySize.width, mySize.height);
+            valid = false;
+        }
+        if (!valid || redoLines) {
+            if (DEBUG) System.out.println("painting");
+            paint2(cacheImage.getGraphics());
+            valid = true;
+        }
+        //getToolkit().sync();
+        if (DEBUG) System.out.println("copying");
+        g.drawImage(cacheImage,
+          0, 0, mySize.width, mySize.height,
+          0, 0, mySize.width, mySize.height,
+          this);
+    }
+
+    public void paint2(Graphics g) {
+        g.clearRect(0, 0, mySize.width, mySize.height);
+        if (DEBUG) System.out.println("print");
+        if (focus) g.setColor(Color.black);
+        else g.setColor(Color.gray);
+        g.drawRect(0,0,mySize.width-1,mySize.height-1);
+        g.setClip(1,1,
+          mySize.width-2,mySize.height-2);
+        g.setColor(Color.black);
+        g.setFont(getFont());
+        fm = g.getFontMetrics();
+        lineAscent = fm.getAscent();
+        lineLeading = fm.getLeading();
+        lineHeight = lineAscent + fm.getDescent() + lineLeading;
+        int y = yInset + lineAscent;
+        String lastSubstring = "";
+        if (redoLines) fixLineStarts(mySize.width-xInset-xInset);
+        for (int i = 0; i < lineCount; y += lineHeight, ++i) {
+            // LIU: Don't display terminating ^M characters
+            int lim = lineStarts[i+1];
+            if (lim > 0 && contents.length() > 0 &&
+                contents.charAt(lim-1) == CR) --lim;
+            lastSubstring = contents.substring(lineStarts[i],lim);
+            g.drawString(lastSubstring, xInset, y);
+        }
+        drawSelection(g, lastSubstring);
+        lastHeight = y + yInset - lineHeight + yInset;
+        lastWidth = mySize.width-xInset-xInset;
+    }
+
+    void paintRect(Graphics g, int x, int y, int w, int h) {
+        if (focus) {
+            g.fillRect(x, y, w, h);
+        } else {
+            g.drawRect(x, y, w-1, h-1);
+        }
+    }
+
+    public void drawSelection(Graphics g, String lastSubstring) {
+        g.setXORMode(Color.black);
+        if (selection.isCaret()) {
+            offset2Point(selection.caret, selection.clickAfter, caretPoint);
+        } else {
+            if (focus) g.setColor(Color.blue);
+            else g.setColor(Color.yellow);
+            offset2Point(selection.getStart(), true, startPoint);
+            offset2Point(selection.getEnd(), false, endPoint);
+            if (selection.getStart() == selection.caret)
+                caretPoint.setLocation(startPoint);
+            else caretPoint.setLocation(endPoint);
+            if (startPoint.y == endPoint.y) {
+                paintRect(g, startPoint.x, startPoint.y,
+                  Math.max(1,endPoint.x-startPoint.x), lineHeight);
+            } else {
+                paintRect(g, startPoint.x, startPoint.y,
+                  (mySize.width-xInset)-startPoint.x, lineHeight);
+                if (startPoint.y + lineHeight < endPoint.y)
+                  paintRect(g, xInset, startPoint.y + lineHeight,
+                  (mySize.width-xInset)-xInset, endPoint.y - startPoint.y - lineHeight);
+                paintRect(g, xInset, endPoint.y, endPoint.x-xInset, lineHeight);
+            }
+        }
+        if (focus || selection.isCaret()) {
+            if (focus) g.setColor(Color.green);
+            else g.setColor(Color.red);
+            int line = caretPoint.x - (selection.clickAfter ? 0 : 1);
+            g.fillRect(line, caretPoint.y, 1, lineHeight);
+            int w = lineHeight/12 + 1;
+            int braces = line - (selection.clickAfter ? -1 : w);
+            g.fillRect(braces, caretPoint.y, w, 1);
+            g.fillRect(braces, caretPoint.y + lineHeight - 1, w, 1);
+        }
+    }
+
+    public Point offset2Point(int off, boolean start, Point p) {
+        int line = findLine(off, start);
+        int width = 0;
+        try {
+            width = fm.stringWidth(
+              contents.substring(lineStarts[line], off));
+        } catch (Exception e) {
+            System.out.println(e);
+        }
+        p.x = width + xInset;
+        if (p.x > mySize.width - xInset)
+            p.x = mySize.width - xInset;
+        p.y = lineHeight * line + yInset;
+        return p;
+    }
+
+    private int findLine(int off, boolean start) {
+        // if it is start, then go to the next line!
+        if (start) ++off;
+        for (int i = 1; i < lineCount; ++i) {
+            // LIU: This was <= ; changed to < to make caret after
+            // final CR in line appear at START of next line.
+            if (off < lineStarts[i]) return i-1;
+        }
+        // LIU: Check for special case; after CR at end of the last line
+        if (off == lineStarts[lineCount] &&
+            off > 0 && contents.length() > 0 && contents.charAt(off-1) == CR) {
+            return lineCount;
+        }
+        return lineCount-1;
+    }
+
+    // offsets on any line will go from start,true to end,false
+    // excluding start,false and end,true
+    public Selection point2Offset(Point p, Selection o) {
+        if (p.y < yInset) {
+            o.caret = 0;
+            o.clickAfter = true;
+            return o;
+        }
+        int line = (p.y - yInset)/lineHeight;
+        if (line >= lineCount) {
+            o.caret = contents.length();
+            o.clickAfter = false;
+            return o;
+        }
+        int target = p.x - xInset;
+        if (target <= 0) {
+            o.caret = lineStarts[line];
+            o.clickAfter = true;
+            return o;
+        }
+        int lowGuess = lineStarts[line];
+        int lowWidth = 0;
+        int highGuess = lineStarts[line+1];
+        int highWidth = fm.stringWidth(contents.substring(lineStarts[line],highGuess));
+        if (target >= highWidth) {
+            o.caret = lineStarts[line+1];
+            o.clickAfter = false;
+            return o;
+        }
+        while (lowGuess < highGuess - 1) {
+            int guess = (lowGuess + highGuess)/2;
+            int width = fm.stringWidth(contents.substring(lineStarts[line],guess));
+            if (width <= target) {
+                lowGuess = guess;
+                lowWidth = width;
+                if (width == target) break;
+            } else {
+                highGuess = guess;
+                highWidth = width;
+            }
+        }
+        // at end, either lowWidth < target < width(low+1), or lowWidth = target
+        int highBound = charBreaker.following(lowGuess);
+        int lowBound = charBreaker.previous();
+        // we are now at character boundaries
+        if (lowBound != lowGuess)
+            lowWidth = fm.stringWidth(contents.substring(lineStarts[line],lowBound));
+        if (highBound != highGuess)
+            highWidth = fm.stringWidth(contents.substring(lineStarts[line],highBound));
+        // we now have the right widths
+        if (target - lowWidth < highWidth - target) {
+            o.caret = lowBound;
+            o.clickAfter = true;
+        } else {
+            o.caret = highBound;
+            o.clickAfter = false;
+        }
+        // we now have the closest!
+        return o;
+    }
+
+    private void fixLineStarts(int width) {
+        lineCount = 1;
+        lineStarts[0] = 0;
+        if (contents.length() == 0) {
+            lineStarts[1] = 0;
+            return;
+        }
+        int end = 0;
+        // LIU: Add check for MAX_LINES
+        for (int start = 0; start < contents.length() && lineCount < MAX_LINES;
+             start = end) {
+            end = nextLine(fm, start, width);
+            lineStarts[lineCount++] = end;
+            if (end == start) { // LIU: Assertion
+                throw new RuntimeException("nextLine broken");
+            }
+        }
+        --lineCount;
+        redoLines = false;
+    }
+
+    // LIU: Enhanced to wrap long lines.  Bug with return of start fixed.
+    public int nextLine(FontMetrics fm, int start, int width) {
+        int len = contents.length();
+        for (int i = start; i < len; ++i) {
+            // check for line separator
+            char ch = (contents.charAt(i));
+            if (ch >= 0x000A && ch <= 0x000D || ch == 0x2028 || ch == 0x2029) {
+                len = i + 1;
+                if (ch == 0x000D && i+1 < len && contents.charAt(i+1) == 0x000A) // crlf
+                    ++len; // grab extra char
+                break;
+            }
+        }
+        String subject = contents.substring(start,len);
+        if (visibleWidth(fm, subject) <= width)
+          return len;
+
+        // LIU: Remainder of this method rewritten to accomodate lines
+        // longer than the component width by first trying to break
+        // into lines; then words; finally chars.
+        int n = findFittingBreak(fm, subject, width, lineBreaker);
+        if (n == 0) {
+            n = findFittingBreak(fm, subject, width, wordBreaker);
+        }
+        if (n == 0) {
+            n = findFittingBreak(fm, subject, width, charBreaker);
+        }
+        return n > 0 ? start + n : len;
+    }
+
+    /**
+     * LIU: Finds the longest substring that fits a given width
+     * composed of subunits returned by a BreakIterator.  If the smallest
+     * subunit is too long, returns 0.
+     * @param fm metrics to use
+     * @param line the string to be fix into width
+     * @param width line.substring(0, result) must be <= width
+     * @param breaker the BreakIterator that will be used to find subunits
+     * @return maximum characters, at boundaries returned by breaker,
+     * that fit into width, or zero on failure
+     */
+    private int findFittingBreak(FontMetrics fm, String line, int width,
+                                 BreakIterator breaker) {
+        breaker.setText(line);
+        int last = breaker.first();
+        int end = breaker.next();
+        while (end != BreakIterator.DONE &&
+               visibleWidth(fm, line.substring(0, end)) <= width) {
+            last = end;
+            end = breaker.next();
+        }
+        return last;
+    }
+
+    public int visibleWidth(FontMetrics fm, String s) {
+        int i;
+        for (i = s.length()-1; i >= 0; --i) {
+            char ch = s.charAt(i);
+            if (!(ch == ' ' || ch >= 0x000A && ch <= 0x000D || ch == 0x2028 || ch == 0x2029))
+              return fm.stringWidth(s.substring(0,i+1));;
+        }
+        return 0;
+    }
+
+// =============== Utility ====================
+
+    private void fixHex() {
+        if (selection.getEnd() == 0) return;
+        int store = 0;
+        int places = 1;
+        int count = 0;
+        int min = Math.min(8,selection.getEnd());
+        for (int i = 0; i < min; ++i) {
+            char ch = contents.charAt(selection.getEnd()-1-i);
+            int value = Character.getNumericValue(ch);
+            if (value < 0 || value > 15) break;
+            store += places * value;
+            ++count;
+            places *= 16;
+        }
+        String add = "";
+        int bottom = store & 0xFFFF;
+        if (store >= 0xD8000000 && store < 0xDC000000
+          && bottom >= 0xDC00 && bottom < 0xE000) { // surrogates
+            add = "" + (char)(store >> 16) + (char)bottom;
+        } else if (store > 0xFFFF && store <= 0x10FFFF) {
+            store -= 0x10000;
+            add = "" + (char)(((store >> 10) & 0x3FF) + 0xD800)
+              + (char)((store & 0x3FF) + 0xDC00);
+              
+        } else if (count >= 4) {
+            count = 4;
+            add = ""+(char)(store & 0xFFFF);
+        } else {
+            count = 1;
+            char ch = contents.charAt(selection.getEnd()-1);
+            add = hex(ch);
+            if (ch >= 0xDC00 && ch <= 0xDFFF && selection.getEnd() > 1) {
+                ch = contents.charAt(selection.getEnd()-2);
+                if (ch >= 0xD800 && ch <= 0xDBFF) {
+                    count = 2;
+                    add = hex(ch) + add;
+                }
+            }
+        }
+        replaceRange(add, selection.getEnd()-count, selection.getEnd());
+    }
+
+    public static String hex(char ch) {
+        String result = Integer.toString(ch,16).toUpperCase();
+        result = "0000".substring(result.length(),4) + result;
+        return result;
+    }
+}
diff --git a/icu4j/src/com/ibm/text/components/Selection.java b/icu4j/src/com/ibm/text/components/Selection.java
new file mode 100755
index 00000000000..985b36f3521
--- /dev/null
+++ b/icu4j/src/com/ibm/text/components/Selection.java
@@ -0,0 +1,155 @@
+package com.ibm.text.components;
+import java.text.*;
+
+public final class Selection {
+
+    public int anchor;
+    public int caret;
+    public boolean clickAfter;
+
+    public int getStart() {
+        return anchor < caret ? anchor : caret;
+    }
+
+    public int getEnd() {
+        return anchor > caret ? anchor : caret;
+    }
+
+    public boolean isCaret() {
+        return anchor == caret;
+    }
+
+    public Selection set(Selection other) {
+        anchor = other.anchor;
+        caret = other.caret;
+        clickAfter = other.clickAfter;
+        return this;
+    }
+
+    public Selection set(int anchor, int caret, boolean clickAfter) {
+        this.anchor = anchor;
+        this.caret = caret;
+        this.clickAfter = clickAfter;
+        return this;
+    }
+
+    public boolean equals(Object other) {
+        Selection other2 = (Selection)other;
+        return anchor == other2.anchor
+          && caret == other2.caret
+          && clickAfter == other2.clickAfter;
+    }
+
+    public boolean isLessThan(Selection other) {
+        return getStart() < other.getEnd();
+    }
+
+    public Selection pin(String text) {
+        if (anchor > text.length()) {
+            anchor = text.length();
+        } else if (anchor < 0) {
+            anchor = 0;
+        }
+        if (caret > text.length()) {
+            caret = text.length();
+            clickAfter = true;
+        } else if (caret < 0) {
+            caret = 0;
+            clickAfter = false;
+        }
+        return this;
+    }
+
+    public Selection swap(Selection after) {
+        int temp = anchor;
+        anchor = after.anchor;
+        after.anchor = temp;
+        temp = caret;
+        caret = after.caret;
+        after.caret = temp;
+        boolean b = clickAfter;
+        clickAfter = after.clickAfter;
+        after.clickAfter = b;
+        return this;
+    }
+
+    public Selection fixAfterReplace(int start, int end, int len) {
+        if (anchor >= start) {
+            if (anchor < end) anchor = end;
+            anchor = start + len + anchor - end;
+        }
+        if (caret >= start) {
+            if (caret < end) caret = end;
+            caret = start + len + caret - end;
+        }
+        return this;
+    }
+
+        // Mac & Windows considerably different
+        // Mac: end++. If start!=end, start=end
+        //  SHIFT: move end right
+        //  CTL: no different
+        // Windows:
+        //  UNSHIFTED: if start!=end, start = end, else start=end=end+1;
+        //       anchor = tip = start
+        //  SHIFT: tip++
+        //  CTL: if start!=end, start = end = nextbound(end-1),
+        //   else start=end=nextbound(end)
+        //       anchor = tip = start
+        //  CTL/SHIFT: tip = nextbound(tip)
+
+    public Selection nextBound(BreakIterator breaker,
+      int direction, boolean extend) {
+        if (!extend && anchor != caret) caret -= direction;
+        caret = next(caret, breaker, direction, true);
+        if (!extend) anchor = caret;
+        clickAfter = false;
+        return this;
+    }
+
+    // expand start and end to word breaks--if they are not already on one
+    public void expand(BreakIterator breaker) {
+        if (anchor <= caret) {
+            anchor = next(anchor,breaker,-1,false);
+            caret = next(caret,breaker,1,false);
+            /*
+            try {
+                breaker.following(anchor);
+                anchor = breaker.previous();
+            } catch (Exception e) {}
+            try {
+                caret = breaker.following(caret-1);
+            } catch (Exception e) {}
+            */
+        } else {
+            anchor = next(anchor,breaker,1,false);
+            caret = next(caret,breaker,-1,false);
+            /*
+            try {
+                breaker.following(caret);
+                caret = breaker.previous();
+            } catch (Exception e) {}
+            try {
+                anchor = breaker.following(anchor-1);
+            } catch (Exception e) {}
+            */
+        }
+    }
+
+    // different = false - move to next boundary, unless on one
+    // true - move to next boundary, even if on one
+    public static int next(int position, BreakIterator breaker,
+      int direction, boolean different) {
+        if (!different) position -= direction;
+        try {
+            if (direction > 0) {
+                position = breaker.following(position);
+            } else {
+                breaker.following(position-1);
+                position = breaker.previous();
+            }
+        } catch (Exception e) {}
+        return position;
+    }
+}
+
diff --git a/icu4j/src/com/ibm/text/components/TransliteratingTextComponent.java b/icu4j/src/com/ibm/text/components/TransliteratingTextComponent.java
new file mode 100755
index 00000000000..02bcd5996a5
--- /dev/null
+++ b/icu4j/src/com/ibm/text/components/TransliteratingTextComponent.java
@@ -0,0 +1,191 @@
+package com.ibm.text.components;
+
+import java.awt.*;
+import java.awt.event.*;
+import java.text.*;
+import java.awt.datatransfer.*;
+import com.ibm.text.*;
+
+/**
+ * A subclass of {@link DumbTextComponent} that passes key events through
+ * a {@link com.ibm.text.Transliterator}.
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: TransliteratingTextComponent.java,v $ $Revision: 1.1 $ $Date: 1999/12/20 18:29:21 $
+ */
+public class TransliteratingTextComponent extends DumbTextComponent {
+
+    private static boolean DEBUG = false;
+
+    private Transliterator translit = null;
+
+    // Index into getText() where the start of transliteration is.
+    // As we commit text during keyboardTransliteration, we advance
+    // this.
+    private int start = 0;
+
+    // Index into getText() where the cursor is; cursor >= start
+    private int cursor = 0;
+
+    private static final String COPYRIGHT =
+        "\u00A9 IBM Corporation 1999. All rights reserved.";
+
+    /**
+     * Constructor.
+     */
+    public TransliteratingTextComponent() {
+        super();
+        addActionListener(new ActionListener() {
+            public void actionPerformed(ActionEvent e) {
+                // We get an ActionEvent only when the selection changes
+                resetTransliterationStart();
+            }
+        });
+    }
+
+    /**
+     * {@link DumbTextComponent} API.  Framework method that is called
+     * when a <code>KeyEvent</code> is received.  This implementation
+     * runs the new character through the current
+     * <code>Transliterator</code>, if one is set, and inserts the
+     * transliterated text into the buffer.
+     */
+	protected void handleKeyTyped(KeyEvent e) {
+        char ch = e.getKeyChar();
+
+        if (translit == null) {
+            super.handleKeyTyped(e);
+            return;
+        }
+
+        // ------------------------------------------------------------
+        // The following case motivates the two lines that recompute
+        // start and cursor below.
+
+        //      "     "   
+        // a b c q r|s t u m m
+        // 0 1 2 3 4 5 6 7 8 9
+        //       0 1 2
+
+        // start 3, cursor 5, sel 6 -> { 0, 3, 2 }
+        // : new int[] { 0, sel - start, cursor - start };
+        
+        // sz>99|9
+
+        //      "     {   "
+        // a b c q r 9 9|9 t u m m
+        // 0 1 2 3 4 5 6 7 8 9 a b
+        //       0 1 2 3 4
+
+        // { 3, 5, 4 } -> start 6, cursor 7, sel 8
+        // : start += index[0];
+        // : cursor = start + index[2] - index[0];
+        // ------------------------------------------------------------
+
+        // Need to save start because calls to replaceRange will update
+        // start and cursor.
+        int saveStart = start;
+
+        ReplaceableString buf = new ReplaceableString();
+        buf.getStringBuffer().append(getText().substring(start,
+                                                         getSelectionStart()));
+
+        int[] index = new int[] { 0, getSelectionStart() - start,
+                                  cursor - start};
+
+        StringBuffer log = null;
+        if (DEBUG) {
+            log = new StringBuffer();
+            log.append("start " + start + ", cursor " + cursor);
+            log.append(", sel " + getSelectionStart());
+            log.append(", {" + index[0] + ", " + index[1] + ", " + index[2] + "}, ");
+            log.append('"' + buf.toString() + "\" + '" + ch + "' -> \"");
+        }
+
+        translit.keyboardTransliterate(buf, index, ch);
+        replaceRange(buf.toString(), start, getSelectionEnd());
+        // At this point start has been changed by the callback to
+        // resetTransliteratorStart() via replaceRange() -- so use our
+        // local copy, saveStart.
+
+        // The START index is zero-based.  On entry to keyboardTransliterate(),
+        // it was zero.  We can therefore just add it to our original
+        // getText()-based index value of start (in saveStart) to get
+        // the new getText()-based start.
+        start = saveStart + index[Transliterator.START];
+
+        // Make the cursor getText()-based.  The CURSOR index is zero-based.
+        cursor = start + index[Transliterator.CURSOR]
+            - index[Transliterator.START];
+
+        if (DEBUG) {
+            String out = buf.toString();
+            log.append(out.substring(0, index[Transliterator.START])).
+                append('{').
+                append(out.substring(index[Transliterator.START],
+                                     index[Transliterator.CURSOR])).
+                append('|').
+                append(out.substring(index[Transliterator.CURSOR])).
+                append('"');
+            log.append(", {" + index[0] + ", " + index[1] + ", " + index[2] + "}, ");
+            log.append("start " + start + ", cursor " + cursor);
+            log.append(", sel " + getSelectionStart());
+            System.out.println(escape(log.toString()));
+        }
+    }
+
+    /**
+     * Set the {@link com.ibm.text.Transliterator} and direction to
+     * use to process incoming <code>KeyEvent</code>s.
+     * @param t the {@link com.ibm.text.Transliterator} to use
+     */
+    public void setTransliterator(Transliterator t) {
+        if (translit != t) { // [sic] pointer compare ok; singletons
+            resetTransliterationStart();
+        }
+        translit = t;
+    }
+
+    /**
+     * Reset the start point at which transliteration begins.  This
+     * needs to be done when the user moves the cursor or when the
+     * current {@link com.ibm.text.Transliterator} is changed. 
+     */
+    private void resetTransliterationStart() {
+        start = getSelectionStart();
+        cursor = start;
+    }
+
+    /**
+     * Escape non-ASCII characters as Unicode.
+     * JUST FOR DEBUGGING OUTPUT.
+     */
+    public static final String escape(String s) {
+        StringBuffer buf = new StringBuffer();
+        for (int i=0; i<s.length(); ++i) {
+            char c = s.charAt(i);
+            if (c >= ' ' && c <= 0x007F) {
+                if (c == '\\') {
+                    buf.append("\\\\"); // That is, "\\"
+                } else {
+                    buf.append(c);
+                }
+            } else {
+                buf.append("\\u");
+                if (c < 0x1000) {
+                    buf.append('0');
+                    if (c < 0x100) {
+                        buf.append('0');
+                        if (c < 0x10) {
+                            buf.append('0');
+                        }
+                    }
+                }
+                buf.append(Integer.toHexString(c));
+            }
+        }
+        return buf.toString();
+    }
+}
diff --git a/icu4j/src/com/ibm/text/resources/TransliterationRule$KeyboardEscape$Latin1.java b/icu4j/src/com/ibm/text/resources/TransliterationRule$KeyboardEscape$Latin1.java
new file mode 100755
index 00000000000..fa9a89b2d60
--- /dev/null
+++ b/icu4j/src/com/ibm/text/resources/TransliterationRule$KeyboardEscape$Latin1.java
@@ -0,0 +1,132 @@
+package com.ibm.text.resources;
+
+import java.util.ListResourceBundle;
+
+public class TransliterationRuleKeyboardEscapeLatin1 extends ListResourceBundle {
+    /**
+     * Overrides ListResourceBundle
+     */
+    public Object[][] getContents() {
+        return new Object[][] {
+            { "Description",
+                "Keyboard transliterator for Latin-1 block" },
+
+            { "Rule",
+                "esc=''\n"
+                + "grave=`\n"
+                + "acute=''\n"
+                + "hat=^\n"
+                + "tilde=~\n"
+                + "umlaut=:\n"
+                + "ring=.\n"
+                + "cedilla=,\n"
+                + "slash=/\n"
+                + "super=^\n"
+
+                // Make keyboard entry of {esc} possible
+                // and of backslash
+                + "'\\'{esc}>{esc}\n"
+                + "'\\\\'>'\\'\n"
+              
+                // Long keys
+                + "cur{esc}>\u00A4\n"
+                + "sec{esc}>\u00A7\n"
+                + "not{esc}>\u00AC\n"
+                + "mul{esc}>\u00D7\n"
+                + "div{esc}>\u00F7\n"
+
+                + " {esc}>\u00A0\n" // non-breaking space
+                + "!{esc}>\u00A1\n" // inverted exclamation
+                + "c/{esc}>\u00A2\n" // cent sign
+                + "lb{esc}>\u00A3\n" // pound sign
+                + "'|'{esc}>\u00A6\n" // broken vertical bar
+                + ":{esc}>\u00A8\n" // umlaut
+                + "{super}a{esc}>\u00AA\n" // feminine ordinal
+                + "'<<'{esc}>\u00AB\n"
+                + "r{esc}>\u00AE\n"
+                + "--{esc}>\u00AF\n"
+                + "-{esc}>\u00AD\n"
+                + "+-{esc}>\u00B1\n"
+                + "{super}2{esc}>\u00B2\n"
+                + "{super}3{esc}>\u00B3\n"
+                + "{acute}{esc}>\u00B4\n"
+                + "m{esc}>\u00B5\n"
+                + "para{esc}>\u00B6\n"
+                + "dot{esc}>\u00B7\n"
+                + "{cedilla}{esc}>\u00B8\n"
+                + "{super}1{esc}>\u00B9\n"
+                + "{super}o{esc}>\u00BA\n" // masculine ordinal
+                + "'>>'{esc}>\u00BB\n"
+                + "1/4{esc}>\u00BC\n"
+                + "1/2{esc}>\u00BD\n"
+                + "3/4{esc}>\u00BE\n"
+                + "?{esc}>\u00BF\n"
+                + "A{grave}{esc}>\u00C0\n"
+                + "A{acute}{esc}>\u00C1\n"
+                + "A{hat}{esc}>\u00C2\n"
+                + "A{tilde}{esc}>\u00C3\n"
+                + "A{umlaut}{esc}>\u00C4\n"
+                + "A{ring}{esc}>\u00C5\n"
+                + "AE{esc}>\u00C6\n"
+                + "C{cedilla}{esc}>\u00C7\n"
+                + "E{grave}{esc}>\u00C8\n"
+                + "E{acute}{esc}>\u00C9\n"
+                + "E{hat}{esc}>\u00CA\n"
+                + "E{umlaut}{esc}>\u00CB\n"
+                + "I{grave}{esc}>\u00CC\n"
+                + "I{acute}{esc}>\u00CD\n"
+                + "I{hat}{esc}>\u00CE\n"
+                + "I{umlaut}{esc}>\u00CF\n"
+                + "D-{esc}>\u00D0\n"
+                + "N{tilde}{esc}>\u00D1\n"
+                + "O{grave}{esc}>\u00D2\n"
+                + "O{acute}{esc}>\u00D3\n"
+                + "O{hat}{esc}>\u00D4\n"
+                + "O{tilde}{esc}>\u00D5\n"
+                + "O{umlaut}{esc}>\u00D6\n"
+                + "O{slash}{esc}>\u00D8\n"
+                + "U{grave}{esc}>\u00D9\n"
+                + "U{acute}{esc}>\u00DA\n"
+                + "U{hat}{esc}>\u00DB\n"
+                + "U{umlaut}{esc}>\u00DC\n"
+                + "Y{acute}{esc}>\u00DD\n"
+                + "TH{esc}>\u00DE\n"
+                + "ss{esc}>\u00DF\n"
+                + "a{grave}{esc}>\u00E0\n"
+                + "a{acute}{esc}>\u00E1\n"
+                + "a{hat}{esc}>\u00E2\n"
+                + "a{tilde}{esc}>\u00E3\n"
+                + "a{umlaut}{esc}>\u00E4\n"
+                + "a{ring}{esc}>\u00E5\n"
+                + "ae{esc}>\u00E6\n"
+                + "c{cedilla}{esc}>\u00E7\n"
+                + "c{esc}>\u00A9\n" // copyright - after c{cedilla}
+                + "e{grave}{esc}>\u00E8\n"
+                + "e{acute}{esc}>\u00E9\n"
+                + "e{hat}{esc}>\u00EA\n"
+                + "e{umlaut}{esc}>\u00EB\n"
+                + "i{grave}{esc}>\u00EC\n"
+                + "i{acute}{esc}>\u00ED\n"
+                + "i{hat}{esc}>\u00EE\n"
+                + "i{umlaut}{esc}>\u00EF\n"
+                + "d-{esc}>\u00F0\n"
+                + "n{tilde}{esc}>\u00F1\n"
+                + "o{grave}{esc}>\u00F2\n"
+                + "o{acute}{esc}>\u00F3\n"
+                + "o{hat}{esc}>\u00F4\n"
+                + "o{tilde}{esc}>\u00F5\n"
+                + "o{umlaut}{esc}>\u00F6\n"
+                + "o{slash}{esc}>\u00F8\n"
+                + "o{esc}>\u00B0\n"
+                + "u{grave}{esc}>\u00F9\n"
+                + "u{acute}{esc}>\u00FA\n"
+                + "u{hat}{esc}>\u00FB\n"
+                + "u{umlaut}{esc}>\u00FC\n"
+                + "y{acute}{esc}>\u00FD\n"
+                + "y{esc}>\u00A5\n" // yen sign
+                + "th{esc}>\u00FE\n"
+                + "ss{esc}>\u00FF\n"
+            }
+        };
+    }
+}
diff --git a/icu4j/src/com/ibm/text/resources/TransliterationRule$Latin$Arabic.java b/icu4j/src/com/ibm/text/resources/TransliterationRule$Latin$Arabic.java
new file mode 100755
index 00000000000..bb96443d051
--- /dev/null
+++ b/icu4j/src/com/ibm/text/resources/TransliterationRule$Latin$Arabic.java
@@ -0,0 +1,243 @@
+package com.ibm.text.resources;
+
+import java.util.ListResourceBundle;
+
+public class TransliterationRuleLatinArabic extends ListResourceBundle {
+    /**
+     * Overrides ListResourceBundle
+     */
+    public Object[][] getContents() {
+        return new Object[][] {
+            { "HasInverse", "1" },
+
+            { "Rule",
+                // To Do: finish adding shadda, add sokoon
+
+                "alefmadda=\u0622\n"+
+                "alefuhamza=\u0623\n"+
+                "wauuhamza=\u0624\n"+
+                "alefhamza=\u0625\n"+
+                "yehuhamza=\u0626\n"+
+                "alef=\u0627\n"+
+                "beh=\u0628\n"+
+                "tehmarbuta=\u0629\n"+
+                "teh=\u062A\n"+
+                "theh=\u062B\n"+
+                "geem=\u062C\n"+
+                "hah=\u062D\n"+
+                "kha=\u062E\n"+
+                "dal=\u062F\n"+
+                "dhal=\u0630\n"+
+                "reh=\u0631\n"+
+                "zain=\u0632\n"+
+                "seen=\u0633\n"+
+                "sheen=\u0634\n"+
+                "sad=\u0635\n"+
+                "dad=\u0636\n"+
+                "tah=\u0637\n"+
+                "zah=\u0638\n"+
+                "ein=\u0639\n"+
+                "ghein=\u063A\n"+
+                "feh=\u0641\n"+
+                "qaaf=\u0642\n"+
+                "kaf=\u0643\n"+
+                "lam=\u0644\n"+
+                "meem=\u0645\n"+
+                "noon=\u0646\n"+
+                "heh=\u0647\n"+
+                "wau=\u0648\n"+
+                "yehmaqsura=\u0649\n"+
+                "yeh=\u064A\n"+
+                "peh=\u06A4\n"+
+
+                "hamza=\u0621\n"+
+                "fathatein=\u064B\n"+
+                "dammatein=\u064C\n"+
+                "kasratein=\u064D\n"+
+                "fatha=\u064E\n"+
+                "damma=\u064F\n"+
+                "kasra=\u0650\n"+
+                "shadda=\u0651\n"+
+                "sokoon=\u0652\n"+
+
+                // convert English to Arabic
+                "Arabic>"+
+                "\u062a\u062a\u0645\u062a\u0639\u0020"+
+                "\u0627\u0644\u0644\u063a\u0629\u0020"+
+                "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
+                "\u0628\u0628\u0646\u0638\u0645\u0020"+
+                "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
+                "\u062c\u0645\u064a\u0644\u0629\n"+
+
+                "ai>{alefmadda}\n"+
+                "ae>{alefuhamza}\n"+
+                "ao>{alefhamza}\n"+
+                "aa>{alef}\n"+
+                "an>{fathatein}\n"+
+                "a>{fatha}\n"+
+                "b>{beh}\n"+
+                "c>{kaf}\n"+
+                "{dhal}]dh>{shadda}\n"+
+                "dh>{dhal}\n"+
+                "{dad}]dd>{shadda}\n"+
+                "dd>{dad}\n"+
+                "{dal}]d>{shadda}\n"+
+                "d>{dal}\n"+
+                "e>{ein}\n"+
+                "f>{feh}\n"+
+                "gh>{ghein}\n"+
+                "g>{geem}\n"+
+                "hh>{hah}\n"+
+                "h>{heh}\n"+
+                "ii>{kasratein}\n"+
+                "i>{kasra}\n"+
+                "j>{geem}\n"+
+                "kh>{kha}\n"+
+                "k>{kaf}\n"+
+                "l>{lam}\n"+
+                "m>{meem}\n"+
+                "n>{noon}\n"+
+                "o>{hamza}\n"+
+                "p>{peh}\n"+
+                "q>{qaaf}\n"+
+                "r>{reh}\n"+
+                "sh>{sheen}\n"+
+                "ss>{sad}\n"+
+                "s>{seen}\n"+
+                "th>{theh}\n"+
+                "tm>{tehmarbuta}\n"+
+                "tt>{tah}\n"+
+                "t>{teh}\n"+
+                "uu>{dammatein}\n"+
+                "u>{damma}\n"+
+                "v>{beh}\n"+
+                "we>{wauuhamza}\n"+
+                "w>{wau}\n"+
+                "x>{kaf}{shadda}{seen}\n"+
+                "ye>{yehuhamza}\n"+
+                "ym>{yehmaqsura}\n"+
+                "y>{yeh}\n"+
+                "zz>{zah}\n"+
+                "z>{zain}\n"+
+
+                "0>\u0660\n"+ // Arabic digit 0
+                "1>\u0661\n"+ // Arabic digit 1
+                "2>\u0662\n"+ // Arabic digit 2
+                "3>\u0663\n"+ // Arabic digit 3
+                "4>\u0664\n"+ // Arabic digit 4
+                "5>\u0665\n"+ // Arabic digit 5
+                "6>\u0666\n"+ // Arabic digit 6
+                "7>\u0667\n"+ // Arabic digit 7
+                "8>\u0668\n"+ // Arabic digit 8
+                "9>\u0669\n"+ // Arabic digit 9
+                "%>\u066A\n"+ // Arabic %
+                ".>\u066B\n"+ // Arabic decimal separator
+                ",>\u066C\n"+ // Arabic thousands separator
+                "*>\u066D\n"+ // Arabic five-pointed star
+
+                "`0>0\n"+ // Escaped forms of the above
+                "`1>1\n"+
+                "`2>2\n"+
+                "`3>3\n"+
+                "`4>4\n"+
+                "`5>5\n"+
+                "`6>6\n"+
+                "`7>7\n"+
+                "`8>8\n"+
+                "`9>9\n"+
+                "`%>%\n"+
+                "`.>.\n"+
+                "`,>,\n"+
+                "`*>*\n"+
+                "``>`\n"+
+
+                "''>\n"+
+
+                // now Arabic to English
+
+                "''ai<a]{alefmadda}\n"+
+                "ai<{alefmadda}\n"+
+                "''ae<a]{alefuhamza}\n"+
+                "ae<{alefuhamza}\n"+
+                "''ao<a]{alefhamza}\n"+
+                "ao<{alefhamza}\n"+
+                "''aa<a]{alef}\n"+
+                "aa<{alef}\n"+
+                "''an<a]{fathatein}\n"+
+                "an<{fathatein}\n"+
+                "''a<a]{fatha}\n"+
+                "a<{fatha}\n"+
+                "b<{beh}\n"+
+                "''dh<d]{dhal}\n"+
+                "dh<{dhal}\n"+
+                "''dd<d]{dad}\n"+
+                "dd<{dad}\n"+
+                "''d<d]{dal}\n"+
+                "d<{dal}\n"+
+                "''e<a]{ein}\n"+
+                "''e<w]{ein}\n"+
+                "''e<y]{ein}\n"+
+                "e<{ein}\n"+
+                "f<{feh}\n"+
+                "gh<{ghein}\n"+
+                "''hh<d]{hah}\n"+
+                "''hh<t]{hah}\n"+
+                "''hh<k]{hah}\n"+
+                "''hh<s]{hah}\n"+
+                "hh<{hah}\n"+
+                "''h<d]{heh}\n"+
+                "''h<t]{heh}\n"+
+                "''h<k]{heh}\n"+
+                "''h<s]{heh}\n"+
+                "h<{heh}\n"+
+                "''ii<i]{kasratein}\n"+
+                "ii<{kasratein}\n"+
+                "''i<i]{kasra}\n"+
+                "i<{kasra}\n"+
+                "j<{geem}\n"+
+                "kh<{kha}\n"+
+                "x<{kaf}{shadda}{seen}\n"+
+                "k<{kaf}\n"+
+                "l<{lam}\n"+
+                "''m<y]{meem}\n"+
+                "''m<t]{meem}\n"+
+                "m<{meem}\n"+
+                "n<{noon}\n"+
+                "''o<a]{hamza}\n"+
+                "o<{hamza}\n"+
+                "p<{peh}\n"+
+                "q<{qaaf}\n"+
+                "r<{reh}\n"+
+                "sh<{sheen}\n"+
+                "''ss<s]{sad}\n"+
+                "ss<{sad}\n"+
+                "''s<s]{seen}\n"+
+                "s<{seen}\n"+
+                "th<{theh}\n"+
+                "tm<{tehmarbuta}\n"+
+                "''tt<t]{tah}\n"+
+                "tt<{tah}\n"+
+                "''t<t]{teh}\n"+
+                "t<{teh}\n"+
+                "''uu<u]{dammatein}\n"+
+                "uu<{dammatein}\n"+
+                "''u<u]{damma}\n"+
+                "u<{damma}\n"+
+                "we<{wauuhamza}\n"+
+                "w<{wau}\n"+
+                "ye<{yehuhamza}\n"+
+                "ym<{yehmaqsura}\n"+
+                "''y<y]{yeh}\n"+
+                "y<{yeh}\n"+
+                "''zz<z]{zah}\n"+
+                "zz<{zah}\n"+
+                "''z<z]{zain}\n"+
+                "z<{zain}\n"+
+
+                "dh<dh]{shadda}\n"+
+                "dd<dd]{shadda}\n"+
+                "''d<d]{shadda}\n"
+            }
+        };
+    }
+}
diff --git a/icu4j/src/com/ibm/text/resources/TransliterationRule$Latin$Cyrillic.java b/icu4j/src/com/ibm/text/resources/TransliterationRule$Latin$Cyrillic.java
new file mode 100755
index 00000000000..29035d1f9f2
--- /dev/null
+++ b/icu4j/src/com/ibm/text/resources/TransliterationRule$Latin$Cyrillic.java
@@ -0,0 +1,367 @@
+package com.ibm.text.resources;
+
+import java.util.ListResourceBundle;
+
+public class TransliterationRuleLatinRussian extends ListResourceBundle {
+    /**
+     * Overrides ListResourceBundle
+     */
+    public Object[][] getContents() {
+        return new Object[][] {
+            { "Description",
+                "xxxxxxxxxxxx" },
+
+            { "Rule",
+              // Russian Letters
+
+              "cyA=\u0410\n" +
+              "cyBe=\u0411\n" +
+              "cyVe=\u0412\n" +
+              "cyGe=\u0413\n" +
+              "cyDe=\u0414\n" +
+              "cyYe=\u0415\n" +
+              "cyYo=\u0416\n" +
+              "cyZhe=\u0417\n" +
+              "cyZe=\u0418\n" +
+              "cyYi=\u0419\n" +
+              "cyY=\u0419\n" +
+              "cyKe=\u041a\n" +
+              "cyLe=\u041b\n" +
+              "cyMe=\u041c\n" +
+              "cyNe=\u041d\n" +
+              "cyO=\u041e\n" +
+              "cyPe=\u041f\n" +
+
+              "cyRe=\u0420\n" +
+              "cySe=\u0421\n" +
+              "cyTe=\u0422\n" +
+              "cyU=\u0423\n" +
+              "cyFe=\u0424\n" +
+              "cyKhe=\u0425\n" +
+              "cyTse=\u0426\n" +
+              "cyChe=\u0427\n" +
+              "cyShe=\u0428\n" +
+              "cyShche=\u0429\n" +
+              "cyHard=\u042a\n" +
+              "cyI=\u042b\n" +
+              "cySoft=\u042c\n" +
+              "cyE=\u042d\n" +
+              "cyYu=\u042e\n" +
+              "cyYa=\u042f\n" +
+
+              "cya=\u0430\n" +
+              "cybe=\u0431\n" +
+              "cyve=\u0432\n" +
+              "cyge=\u0433\n" +
+              "cyde=\u0434\n" +
+              "cyye=\u0435\n" +
+              "cyzhe=\u0436\n" +
+              "cyze=\u0437\n" +
+              "cyyi=\u0438\n" +
+              "cyy=\u0439\n" +
+              "cyke=\u043a\n" +
+              "cyle=\u043b\n" +
+              "cyme=\u043c\n" +
+              "cyne=\u043d\n" +
+              "cyo=\u043e\n" +
+              "cype=\u043f\n" +
+
+              "cyre=\u0440\n" +
+              "cyse=\u0441\n" +
+              "cyte=\u0442\n" +
+              "cyu=\u0443\n" +
+              "cyfe=\u0444\n" +
+              "cykhe=\u0445\n" +
+              "cytse=\u0446\n" +
+              "cyche=\u0447\n" +
+              "cyshe=\u0448\n" +
+              "cyshche=\u0449\n" +
+              "cyhard=\u044a\n" +
+              "cyi=\u044b\n" +
+              "cysoft=\u044c\n" +
+              "cye=\u044d\n" +
+              "cyyu=\u044e\n" +
+              "cyya=\u044f\n" +
+
+              "cyyo=\u0451\n" +
+
+              "a=[aA]\n" +
+              "c=[cC]\n" +
+              "e=[eE]\n" +
+              "h=[hH]\n" +
+              "i=[iI]\n" +
+              "o=[oO]\n" +
+              "s=[sS]\n" +
+              "t=[tT]\n" +
+              "u=[uU]\n" +
+              "iey=[ieyIEY]\n" +
+              "lower=[:Lu:]\n" +
+
+              // convert English to Russian
+              "Russian>\u041f\u0420\u0410\u0412\u0414\u0410\u00D1\u0020\u0411\u044d\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f\u002c\u0020\u043a\u044b\u0440\u0433\u044b\u0437\u002c\u0020\u041c\u043e\u043b\u0434\u043e\u0432\u044d\u043d\u044f\u0441\u043a\u044d\u002e\n" +
+
+              //special equivs for ay, oy, ...
+              "Y{a}{i}>{cyYa}{cyY}\n" +
+              "Y{e}{i}>{cyYe}{cyY}\n" +
+              "Y{i}{i}>{cyYi}{cyY}\n" +
+              "Y{o}{i}>{cyYo}{cyY}\n" +
+              "Y{u}{i}>{cyYu}{cyY}\n" +
+              "A{i}>{cyA}{cyY}\n" +
+              "E{i}>{cyE}{cyY}\n" +
+              //skip II, since it is the soft sign
+              "O{i}>{cyO}{cyY}\n" +
+              "U{i}>{cyU}{cyY}\n" +
+
+              "A>{cyA}\n" +
+              "B>{cyBe}\n" +
+              "C{h}>{cyChe}\n" +
+              "C[{iey}>{cySe}\n" +
+              "C>{cyKe}\n" +
+              "D>{cyDe}\n" +
+              "E>{cyE}\n" +
+              "F>{cyFe}\n" +
+              "G>{cyGe}\n" +
+              "H>{cyHard}\n" +
+              "I{i}>{cySoft}\n" +
+              "I>{cyI}\n" +
+              "J>{cyDe}{cyZhe}\n" +
+              "K{h}>{cyKhe}\n" +
+              "K>{cyKe}\n" +
+              "L>{cyLe}\n" +
+              "M>{cyMe}\n" +
+              "N>{cyNe}\n" +
+              "O>{cyO}\n" +
+              "P>{cyPe}\n" +
+              "Q{u}>{cyKe}{cyVe}\n" +
+              "R>{cyRe}\n" +
+              "S{h}{t}{c}{h}>{cyShche}\n" +
+              "S{h}{c}{h}>{cyShche}\n" +
+              "S{h}>{cyShe}\n" +
+              "S>{cySe}\n" +
+              "T{c}{h}>{cyChe}\n" +
+              "T{h}>{cyZe}\n" +
+              "T{s}>{cyTse}\n" +
+              "T>{cyTe}\n" +
+              "U>{cyU}\n" +
+              "V>{cyVe}\n" +
+              "W{h}>{cyVe}\n" +
+              "W>{cyVe}\n" +
+              "X>{cyKe}{cySe}\n" +
+              "Y{e}>{cyYe}\n" +
+              "Y{o}>{cyYo}\n" +
+              "Y{u}>{cyYu}\n" +
+              "Y{a}>{cyYa}\n" +
+              "Y{i}>{cyYi}\n" +
+              "Y>{cyY}\n" +
+              "Z{h}>{cyZhe}\n" +
+              "Z>{cyZe}\n" +
+              "X>{cyKe}{cySe}\n" +
+
+              //lower case: doesn''t solve join bug
+              "y{a}{i}>{cyya}{cyy}\n" +
+              "y{e}{i}>{cyye}{cyy}\n" +
+              "y{i}{i}>{cyyi}{cyy}\n" +
+              "y{o}{i}>{cyyo}{cyy}\n" +
+              "y{u}{i}>{cyyu}{cyy}\n" +
+              "a{i}>{cya}{cyy}\n" +
+              "e{i}>{cye}{cyy}\n" +
+              //skip ii, since it is the soft sign
+              "o{i}>{cyo}{cyy}\n" +
+              "u{i}>{cyu}{cyy}\n" +
+
+              "a>{cya}\n" +
+              "b>{cybe}\n" +
+              "c{h}>{cyche}\n" +
+              "c[{iey}>{cyse}\n" +
+              "c>{cyke}\n" +
+              "d>{cyde}\n" +
+              "e>{cye}\n" +
+              "f>{cyfe}\n" +
+              "g>{cyge}\n" +
+              "h>{cyhard}\n" +
+              "i{i}>{cysoft}\n" +
+              "i>{cyi}\n" +
+              "j>{cyde}{cyzhe}\n" +
+              "k{h}>{cykhe}\n" +
+              "k>{cyke}\n" +
+              "l>{cyle}\n" +
+              "m>{cyme}\n" +
+              "n>{cyne}\n" +
+              "o>{cyo}\n" +
+              "p>{cype}\n" +
+              "q{u}>{cyke}{cyve}\n" +
+              "r>{cyre}\n" +
+              "s{h}{t}{c}{h}>{cyshche}\n" +
+              "s{h}{c}{h}>{cyshche}\n" +
+              "s{h}>{cyshe}\n" +
+              "s>{cyse}\n" +
+              "t{c}{h}>{cyche}\n" +
+              "t{h}>{cyze}\n" +
+              "t{s}>{cytse}\n" +
+              "t>{cyte}\n" +
+              "u>{cyu}\n" +
+              "v>{cyve}\n" +
+              "w{h}>{cyve}\n" +
+              "w>{cyve}\n" +
+              "x>{cyke}{cyse}\n" +
+              "y{e}>{cyye}\n" +
+              "y{o}>{cyyo}\n" +
+              "y{u}>{cyyu}\n" +
+              "y{a}>{cyya}\n" +
+              "y{i}>{cyyi}\n" +
+              "y>{cyy}\n" +
+              "z{h}>{cyzhe}\n" +
+              "z>{cyze}\n" +
+              "x>{cyke}{cyse}\n" +
+
+              //generally the last rule
+              "''>\n" +
+
+              //now Russian to English
+
+              "Y''<{cyY}[{cyA}\n" +
+              "Y''<{cyY}[{cyE}\n" +
+              "Y''<{cyY}[{cyI}\n" +
+              "Y''<{cyY}[{cyO}\n" +
+              "Y''<{cyY}[{cyU}\n" +
+              "Y''<{cyY}[{cya}\n" +
+              "Y''<{cyY}[{cye}\n" +
+              "Y''<{cyY}[{cyi}\n" +
+              "Y''<{cyY}[{cyo}\n" +
+              "Y''<{cyY}[{cyu}\n" +
+              "A<{cyA}\n" +
+              "B<{cyBe}\n" +
+              "J<{cyDe}{cyZhe}\n" +
+              "J<{cyDe}{cyzhe}\n" +
+              "D<{cyDe}\n" +
+              "V<{cyVe}\n" +
+              "G<{cyGe}\n" +
+              "Zh<{cyZhe}[{lower}\n" +
+              "ZH<{cyZhe}\n" +
+              "Z''<{cyZe}[{cyHard}\n" +
+              "Z''<{cyZe}[{cyhard}\n" +
+              "Z<{cyZe}\n" +
+              "Ye<{cyYe}[{lower}\n" +
+              "YE<{cyYe}\n" +
+              "Yo<{cyYo}[{lower}\n" +
+              "YO<{cyYo}\n" +
+              "Yu<{cyYu}[{lower}\n" +
+              "YU<{cyYu}\n" +
+              "Ya<{cyYa}[{lower}\n" +
+              "YA<{cyYa}\n" +
+              "Yi<{cyYi}[{lower}\n" +
+              "YI<{cyYi}\n" +
+              "Y<{cyY}\n" +
+              "Kh<{cyKhe}[{lower}\n" +
+              "KH<{cyKhe}\n" +
+              "K''<{cyKe}[{cyHard}\n" +
+              "K''<{cyKe}[{cyhard}\n" +
+              "X<{cyKe}{cySe}\n" +
+              "X<{cyKe}{cyse}\n" +
+              "K<{cyKe}\n" +
+              "L<{cyLe}\n" +
+              "M<{cyMe}\n" +
+              "N<{cyNe}\n" +
+              "O<{cyO}\n" +
+              "P<{cyPe}\n" +
+
+              "R<{cyRe}\n" +
+              "Shch<{cyShche}[{lower}\n" +
+              "SHCH<{cyShche}\n" +
+              "Sh''<{cyShe}[{cyche}\n" +
+              "SH''<{cyShe}[{cyChe}\n" +
+              "Sh<{cyShe}[{lower}\n" +
+              "SH<{cyShe}\n" +
+              "S''<{cySe}[{cyHard}\n" +
+              "S''<{cySe}[{cyhard}\n" +
+              "S<{cySe}\n" +
+              "Ts<{cyTse}[{lower}\n" +
+              "TS<{cyTse}\n" +
+              "T''<{cyTe}[{cySe}\n" +
+              "T''<{cyTe}[{cyse}\n" +
+              "T''<{cyTe}[{cyHard}\n" +
+              "T''<{cyTe}[{cyhard}\n" +
+              "T<{cyTe}\n" +
+              "U<{cyU}\n" +
+              "F<{cyFe}\n" +
+              "Ch<{cyChe}[{lower}\n" +
+              "CH<{cyChe}\n" +
+              "H<{cyHard}\n" +
+              "I''<{cyI}[{cyI}\n" +
+              "I''<{cyI}[{cyi}\n" +
+              "I<{cyI}\n" +
+              "Ii<{cySoft}[{lower}\n" +
+              "II<{cySoft}\n" +
+              "E<{cyE}\n" +
+
+              //lowercase
+              "y''<{cyy}[{cya}\n" +
+              "y''<{cyy}[{cye}\n" +
+              "y''<{cyy}[{cyi}\n" +
+              "y''<{cyy}[{cyo}\n" +
+              "y''<{cyy}[{cyu}\n" +
+              "y''<{cyy}[{cyA}\n" +
+              "y''<{cyy}[{cyE}\n" +
+              "y''<{cyy}[{cyI}\n" +
+              "y''<{cyy}[{cyO}\n" +
+              "y''<{cyy}[{cyU}\n" +
+              "a<{cya}\n" +
+              "b<{cybe}\n" +
+              "j<{cyde}{cyzhe}\n" +
+              "j<{cyde}{cyZhe}\n" +
+              "d<{cyde}\n" +
+              "v<{cyve}\n" +
+              "g<{cyge}\n" +
+              "zh<{cyzhe}\n" +
+              "z''<{cyze}[{cyhard}\n" +
+              "z''<{cyze}[{cyHard}\n" +
+              "z<{cyze}\n" +
+              "ye<{cyye}\n" +
+              "yo<{cyyo}\n" +
+              "yu<{cyyu}\n" +
+              "ya<{cyya}\n" +
+              "yi<{cyyi}\n" +
+              "y<{cyy}\n" +
+              "kh<{cykhe}\n" +
+              "k''<{cyke}[{cyhard}\n" +
+              "k''<{cyke}[{cyHard}\n" +
+              "x<{cyke}{cyse}\n" +
+              "x<{cyke}{cySe}\n" +
+              "k<{cyke}\n" +
+              "l<{cyle}\n" +
+              "m<{cyme}\n" +
+              "n<{cyne}\n" +
+              "o<{cyo}\n" +
+              "p<{cype}\n" +
+
+              "r<{cyre}\n" +
+              "shch<{cyshche}\n" +
+              "sh''<{cyshe}[{cyche}\n" +
+              "sh''<{cyshe}[{cyChe}\n" +
+              "sh<{cyshe}\n" +
+              "s''<{cyse}[{cyhard}\n" +
+              "s''<{cyse}[{cyHard}\n" +
+              "s<{cyse}\n" +
+              "ts<{cytse}\n" +
+              "t''<{cyte}[{cyse}\n" +
+              "t''<{cyte}[{cySe}\n" +
+              "t''<{cyte}[{cyhard}\n" +
+              "t''<{cyte}[{cyHard}\n" +
+              "t<{cyte}\n" +
+              "u<{cyu}\n" +
+              "f<{cyfe}\n" +
+              "ch<{cyche}\n" +
+              "h<{cyhard}\n" +
+              "i''<{cyi}[{cyI}\n" +
+              "i''<{cyi}[{cyi}\n" +
+              "i<{cyi}\n" +
+              "ii<{cysoft}\n" +
+              "e<{cye}\n" +
+
+              //generally the last rule
+              "''>\n"
+              //the end
+            }
+        };
+    }
+}
diff --git a/icu4j/src/com/ibm/text/resources/TransliterationRule$Latin$Devanagari.java b/icu4j/src/com/ibm/text/resources/TransliterationRule$Latin$Devanagari.java
new file mode 100755
index 00000000000..d359adde14a
--- /dev/null
+++ b/icu4j/src/com/ibm/text/resources/TransliterationRule$Latin$Devanagari.java
@@ -0,0 +1,412 @@
+package com.ibm.text.resources;
+
+import java.util.ListResourceBundle;
+
+public class TransliterationRuleLatinDevanagari extends ListResourceBundle {
+    /**
+     * Overrides ListResourceBundle
+     */
+    public Object[][] getContents() {
+        return new Object[][] {
+            { "Description",
+                "Latin to Devanagari" },
+
+            { "Rule",
+                //#####################################################################
+                //	Keyboard Transliteration Table
+                //#####################################################################
+                // Conversions should be:
+                // 1. complete
+                //  * convert every sequence of Latin letters (a to z plus apostrophe) 
+                //    to a sequence of Native letters
+                //  * convert every sequence of Native letters to Latin letters
+                // 2. reversable
+                //  * any string of Native converted to Latin and back should be the same
+                //  * this is not true for English converted to Native & back, e.g.:
+                //		k -> {kaf} -> k
+                //		c -> {kaf} -> k
+                //#####################################################################
+                // Sequences of Latin letters may convert to a single Native letter.
+                // When this is the case, an apostrophe can be used to indicate separate
+                // letters.$
+                // E.g.	sh -> {shin}
+                //		s'h -> {sin}{heh}
+                // 		ss -> {sad}
+                // 		s's -> {sin}{shadda}
+                //#####################################################################
+                // To Do:
+                //	finish adding shadda, add sokoon, fix uppercase
+                //	make two transliteration tables: one with vowels, one without
+                //#####################################################################
+                // Modifications
+                //	Devanagari Transliterator:  broken up with consonsants/vowels
+                //#####################################################################
+                // Unicode character name definitions
+                //#####################################################################
+
+                //consonants
+                  "candrabindu=\u0901\n"
+                + "bindu=\u0902\n"
+                + "visarga=\u0903\n"
+
+                // w<vowel> represents the stand-alone form
+                + "wa=\u0905\n"
+                + "waa=\u0906\n"
+                + "wi=\u0907\n"
+                + "wii=\u0908\n"
+                + "wu=\u0909\n"
+                + "wuu=\u090A\n"
+                + "wr=\u090B\n"
+                + "wl=\u090C\n"
+                + "we=\u090F\n"
+                + "wai=\u0910\n"
+                + "wo=\u0913\n"
+                + "wau=\u0914\n"
+
+                + "ka=\u0915\n"
+                + "kha=\u0916\n"
+                + "ga=\u0917\n"
+                + "gha=\u0918\n"
+                + "nga=\u0919\n"
+
+                + "ca=\u091A\n"
+                + "cha=\u091B\n"
+                + "ja=\u091C\n"
+                + "jha=\u091D\n"
+                + "nya=\u091E\n"
+
+                + "tta=\u091F\n"
+                + "ttha=\u0920\n"
+                + "dda=\u0921\n"
+                + "ddha=\u0922\n"
+                + "nna=\u0923\n"
+
+                + "ta=\u0924\n"
+                + "tha=\u0925\n"
+                + "da=\u0926\n"
+                + "dha=\u0927\n"
+                + "na=\u0928\n"
+
+                + "pa=\u092A\n"
+                + "pha=\u092B\n"
+                + "ba=\u092C\n"
+                + "bha=\u092D\n"
+                + "ma=\u092E\n"
+
+                + "ya=\u092F\n"
+                + "ra=\u0930\n"
+                + "rra=\u0931\n"
+                + "la=\u0933\n"
+                + "va=\u0935\n"
+
+                + "sha=\u0936\n"
+                + "ssa=\u0937\n"
+                + "sa=\u0938\n"
+                + "ha=\u0939\n"
+
+                // <vowel> represents the dependent form
+                + "aa=\u093E\n"
+                + "i=\u093F\n"
+                + "ii=\u0940\n"
+                + "u=\u0941\n"
+                + "uu=\u0942\n"
+                + "rh=\u0943\n"
+                + "lh=\u0944\n"
+                + "e=\u0947\n"
+                + "ai=\u0948\n"
+                + "o=\u094B\n"
+                + "au=\u094C\n"
+
+                + "virama=\u094D\n"
+
+                + "wrr=\u0960\n"
+                + "rrh=\u0962\n"
+
+                  + "danda=\u0964\n"
+                  + "doubleDanda=\u0965\n"
+                  + "depVowelAbove=[\u093E-\u0940\u0945-\u094C]\n"
+                  + "depVowelBelow=[\u0941-\u0944]\n"
+                  + "endThing=[{danda}{doubleDanda}\u0000-\u08FF\u0980-\uFFFF]\n"
+
+                + "&=[{virama}{aa}{ai}{au}{ii}{i}{uu}{u}{rrh}{rh}{lh}{e}{o}]\n"
+                + "%=[bcdfghjklmnpqrstvwxyz]\n"
+
+                //#####################################################################
+                // convert from Latin letters to Native letters
+                //#####################################################################
+                //Hindi>\u092d\u093e\u0930\u0924--\u0020\u0926\u0947\u0936\u0020\u092c\u0928\u094d\u0927\u0941\u002e
+
+                // special forms with no good conversion
+
+                + "mm>{bindu}\n"
+                + "x>{visarga}\n"
+ 
+                // convert to independent forms at start of word or syllable: 
+                // e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
+                // Moved up [LIU]
+
+                + "aa>{waa}\n"
+                + "ai>{wai}\n"
+                + "au>{wau}\n"
+                + "ii>{wii}\n"
+                + "i>{wi}\n"
+                + "uu>{wuu}\n"
+                + "u>{wu}\n"
+                + "rrh>{wrr}\n"
+                + "rh>{wr}\n"
+                + "lh>{wl}\n"
+                + "e>{we}\n"
+                + "o>{wo}\n"
+                + "a>{wa}\n"
+
+                // normal consonants
+
+                + "kh>{kha}|{virama}\n"
+                + "k>{ka}|{virama}\n"
+                + "q>{ka}|{virama}\n"
+                + "gh>{gha}|{virama}\n"
+                + "g>{ga}|{virama}\n"
+                + "ng>{nga}|{virama}\n"
+                + "ch>{cha}|{virama}\n"
+                + "c>{ca}|{virama}\n"
+                + "jh>{jha}|{virama}\n"
+                + "j>{ja}|{virama}\n"
+                + "ny>{nya}|{virama}\n"
+                + "tth>{ttha}|{virama}\n"
+                + "tt>{tta}|{virama}\n"
+                + "ddh>{ddha}|{virama}\n"
+                + "dd>{dda}|{virama}\n"
+                + "nn>{nna}|{virama}\n"
+                + "th>{tha}|{virama}\n"
+                + "t>{ta}|{virama}\n"
+                + "dh>{dha}|{virama}\n"
+                + "d>{da}|{virama}\n"
+                + "n>{na}|{virama}\n"
+                + "ph>{pha}|{virama}\n"
+                + "p>{pa}|{virama}\n"
+                + "bh>{bha}|{virama}\n"
+                + "b>{ba}|{virama}\n"
+                + "m>{ma}|{virama}\n"
+                + "y>{ya}|{virama}\n"
+                + "r>{ra}|{virama}\n"
+                + "l>{la}|{virama}\n"
+                + "v>{va}|{virama}\n"
+                + "f>{va}|{virama}\n"
+                + "w>{va}|{virama}\n"
+                + "sh>{sha}|{virama}\n"
+                + "ss>{ssa}|{virama}\n"
+                + "s>{sa}|{virama}\n"
+                + "z>{sa}|{virama}\n"
+                + "h>{ha}|{virama}\n"
+
+                  + ".>{danda}\n"
+                  + "{danda}.>{doubleDanda}\n"
+                  + "{depVowelAbove}]~>{bindu}\n"
+                  + "{depVowelBelow}]~>{candrabindu}\n"
+
+                // convert to dependent forms after consonant with no vowel: 
+                // e.g. kai -> {ka}{virama}ai -> {ka}{ai}
+
+                + "{virama}aa>{aa}\n"
+                + "{virama}ai>{ai}\n"
+                + "{virama}au>{au}\n"
+                + "{virama}ii>{ii}\n"
+                + "{virama}i>{i}\n"
+                + "{virama}uu>{uu}\n"
+                + "{virama}u>{u}\n"
+                + "{virama}rrh>{rrh}\n"
+                + "{virama}rh>{rh}\n"
+                + "{virama}lh>{lh}\n"
+                + "{virama}e>{e}\n"
+                + "{virama}o>{o}\n"
+                + "{virama}a>\n"
+
+                // otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
+
+                + "{virama}''aa>{waa}\n"
+                + "{virama}''ai>{wai}\n"
+                + "{virama}''au>{wau}\n"
+                + "{virama}''ii>{wii}\n"
+                + "{virama}''i>{wi}\n"
+                + "{virama}''uu>{wuu}\n"
+                + "{virama}''u>{wu}\n"
+                + "{virama}''rrh>{wrr}\n"
+                + "{virama}''rh>{wr}\n"
+                + "{virama}''lh>{wl}\n"
+                + "{virama}''e>{we}\n"
+                + "{virama}''o>{wo}\n"
+                + "{virama}''a>{wa}\n"
+
+                  + "{virama}[{endThing}>\n"
+
+                // convert any left-over apostrophes used for separation
+
+                + "''>\n"
+
+                //#####################################################################
+                // convert from Native letters to Latin letters
+                //#####################################################################
+
+                // special forms with no good conversion
+
+                + "mm<{bindu}\n"
+                + "x<{visarga}\n"
+
+                // normal consonants
+
+                + "kh<{kha}[&\n"
+                + "kha<{kha}\n"
+                + "k''<{ka}{virama}[{ha}\n"
+                + "k<{ka}[&\n"
+                + "ka<{ka}\n"
+                + "gh<{gha}[&\n"
+                + "gha<{gha}\n"
+                + "g''<{ga}{virama}[{ha}\n"
+                + "g<{ga}[&\n"
+                + "ga<{ga}\n"
+                + "ng<{nga}[&\n"
+                + "nga<{nga}\n"
+                + "ch<{cha}[&\n"
+                + "cha<{cha}\n"
+                + "c''<{ca}{virama}[{ha}\n"
+                + "c<{ca}[&\n"
+                + "ca<{ca}\n"
+                + "jh<{jha}[&\n"
+                + "jha<{jha}\n"
+                + "j''<{ja}{virama}[{ha}\n"
+                + "j<{ja}[&\n"
+                + "ja<{ja}\n"
+                + "ny<{nya}[&\n"
+                + "nya<{nya}\n"
+                + "tth<{ttha}[&\n"
+                + "ttha<{ttha}\n"
+                + "tt''<{tta}{virama}[{ha}\n"
+                + "tt<{tta}[&\n"
+                + "tta<{tta}\n"
+                + "ddh<{ddha}[&\n"
+                + "ddha<{ddha}\n"
+                + "dd''<{dda}[&{ha}\n"
+                + "dd<{dda}[&\n"
+                + "dda<{dda}\n"
+                + "dh<{dha}[&\n"
+                + "dha<{dha}\n"
+                + "d''<{da}{virama}[{ha}\n"
+                + "d''<{da}{virama}[{ddha}\n"
+                + "d''<{da}{virama}[{dda}\n"
+                + "d''<{da}{virama}[{dha}\n"
+                + "d''<{da}{virama}[{da}\n"
+                + "d<{da}[&\n"
+                + "da<{da}\n"
+                + "th<{tha}[&\n"
+                + "tha<{tha}\n"
+                + "t''<{ta}{virama}[{ha}\n"
+                + "t''<{ta}{virama}[{ttha}\n"
+                + "t''<{ta}{virama}[{tta}\n"
+                + "t''<{ta}{virama}[{tha}\n"
+                + "t''<{ta}{virama}[{ta}\n"
+                + "t<{ta}[&\n"
+                + "ta<{ta}\n"
+                + "n''<{na}{virama}[{ga}\n"
+                + "n''<{na}{virama}[{ya}\n"
+                + "n<{na}[&\n"
+                + "na<{na}\n"
+                + "ph<{pha}[&\n"
+                + "pha<{pha}\n"
+                + "p''<{pa}{virama}[{ha}\n"
+                + "p<{pa}[&\n"
+                + "pa<{pa}\n"
+                + "bh<{bha}[&\n"
+                + "bha<{bha}\n"
+                + "b''<{ba}{virama}[{ha}\n"
+                + "b<{ba}[&\n"
+                + "ba<{ba}\n"
+                + "m''<{ma}{virama}[{ma}\n"
+                + "m''<{ma}{virama}[{bindu}\n"
+                + "m<{ma}[&\n"
+                + "ma<{ma}\n"
+                + "y<{ya}[&\n"
+                + "ya<{ya}\n"
+                + "r''<{ra}{virama}[{ha}\n"
+                + "r<{ra}[&\n"
+                + "ra<{ra}\n"
+                + "l''<{la}{virama}[{ha}\n"
+                + "l<{la}[&\n"
+                + "la<{la}\n"
+                + "v<{va}[&\n"
+                + "va<{va}\n"
+                + "sh<{sha}[&\n"
+                + "sha<{sha}\n"
+                + "ss<{ssa}[&\n"
+                + "ssa<{ssa}\n"
+                + "s''<{sa}{virama}[{ha}\n"
+                + "s''<{sa}{virama}[{sha}\n"
+                + "s''<{sa}{virama}[{ssa}\n"
+                + "s''<{sa}{virama}[{sa}\n"
+                + "s<{sa}[&\n"
+                + "sa<{sa}\n"
+                + "h<{ha}[&\n"
+                + "ha<{ha}\n"
+
+                // dependent vowels (should never occur except following consonants)
+
+                + "aa<{aa}\n"
+                + "ai<{ai}\n"
+                + "au<{au}\n"
+                + "ii<{ii}\n"
+                + "i<{i}\n"
+                + "uu<{uu}\n"
+                + "u<{u}\n"
+                + "rrh<{rrh}\n"
+                + "rh<{rh}\n"
+                + "lh<{lh}\n"
+                + "e<{e}\n"
+                + "o<{o}\n"
+
+                // independent vowels (when following consonants)
+
+                + "''aa<a]{waa}\n"
+                + "''aa<%]{waa}\n"
+                + "''ai<a]{wai}\n"
+                + "''ai<%]{wai}\n"
+                + "''au<a]{wau}\n"
+                + "''au<%]{wau}\n"
+                + "''ii<a]{wii}\n"
+                + "''ii<%]{wii}\n"
+                + "''i<a]{wi}\n"
+                + "''i<%]{wi}\n"
+                + "''uu<a]{wuu}\n"
+                + "''uu<%]{wuu}\n"
+                + "''u<a]{wu}\n"
+                + "''u<%]{wu}\n"
+                + "''rrh<%]{wrr}\n"
+                + "''rh<%]{wr}\n"
+                + "''lh<%]{wl}\n"
+                + "''e<%]{we}\n"
+                + "''o<%]{wo}\n"
+                + "''a<a]{wa}\n"
+                + "''a<%]{wa}\n"
+
+
+                // independent vowels (otherwise)
+
+                + "aa<{waa}\n"
+                + "ai<{wai}\n"
+                + "au<{wau}\n"
+                + "ii<{wii}\n"
+                + "i<{wi}\n"
+                + "uu<{wuu}\n"
+                + "u<{wu}\n"
+                + "rrh<{wrr}\n"
+                + "rh<{wr}\n"
+                + "lh<{wl}\n"
+                + "e<{we}\n"
+                + "o<{wo}\n"
+                + "a<{wa}\n"
+
+                // blow away any remaining viramas
+
+                + "<{virama}\n"
+            }
+        };
+    }
+}
diff --git a/icu4j/src/com/ibm/text/resources/TransliterationRule$Latin$Greek.java b/icu4j/src/com/ibm/text/resources/TransliterationRule$Latin$Greek.java
new file mode 100755
index 00000000000..76a4dda5dbf
--- /dev/null
+++ b/icu4j/src/com/ibm/text/resources/TransliterationRule$Latin$Greek.java
@@ -0,0 +1,384 @@
+package com.ibm.text.resources;
+
+import java.util.ListResourceBundle;
+
+public class TransliterationRuleLatinGreek extends ListResourceBundle {
+    /**
+     * Overrides ListResourceBundle
+     */
+    public Object[][] getContents() {
+        return new Object[][] {
+            { "Description",
+                "Latin to Greek" },
+
+            { "Rule",
+                // Greek Letters
+
+                "grAl=\u0391\n"
+                + "grBe=\u0392\n"
+                + "grGa=\u0393\n"
+                + "grDe=\u0394\n"
+                + "grEp=\u0395\n"
+                + "grZe=\u0396\n"
+                + "grEt=\u0397\n"
+                + "grTh=\u0398\n"
+                + "grIo=\u0399\n"
+                + "grKa=\u039A\n"
+                + "grLa=\u039B\n"
+                + "grMu=\u039C\n"
+                + "grNu=\u039D\n"
+                + "grKs=\u039E\n"
+                + "grOm=\u039F\n"
+                + "grPi=\u03A0\n"
+                + "grRh=\u03A1\n"
+                + "grSi=\u03A3\n"
+                + "grTa=\u03A4\n"
+                + "grUp=\u03A5\n"
+                + "grPh=\u03A6\n"
+                + "grKh=\u03A7\n"
+                + "grPs=\u03A8\n"
+                + "grOme=\u03A9\n"
+
+                + "gral=\u03B1\n"
+                + "grbe=\u03B2\n"
+                + "grga=\u03B3\n"
+                + "grde=\u03B4\n"
+                + "grep=\u03B5\n"
+                + "grze=\u03B6\n"
+                + "gret=\u03B7\n"
+                + "grth=\u03B8\n"
+                + "grio=\u03B9\n"
+                + "grka=\u03BA\n"
+                + "grla=\u03BB\n"
+                + "grmu=\u03BC\n"
+                + "grnu=\u03BD\n"
+                + "grks=\u03BE\n"
+                + "grom=\u03BF\n"
+                + "grpi=\u03C0\n"
+                + "grrh=\u03C1\n"
+                + "grsi=\u03C3\n"
+                + "grta=\u03C4\n"
+                + "grup=\u03C5\n"
+                + "grph=\u03C6\n"
+                + "grkh=\u03C7\n"
+                + "grps=\u03C8\n"
+                + "grome=\u03C9\n"
+
+                //forms
+                + "grfinal=\u03C2\n"
+
+                + "grAcAl=\u0386\n"
+                + "grAcEp=\u0388\n"
+                + "grAcEt=\u0389\n"
+                + "grAcIo=\u038A\n"
+                + "grAcOm=\u038C\n"
+                + "grAcUp=\u038E\n"
+                + "grAcOme=\u038F\n"
+                + "grDiIo=\u03AA\n"
+                + "grDiUp=\u03AB\n"
+
+                + "gracal=\u03AC\n"
+                + "gracep=\u03AD\n"
+                + "gracet=\u03AE\n"
+                + "gracio=\u03AF\n"
+                + "gracom=\u03CC\n"
+                + "gracup=\u03CD\n"
+                + "gracome=\u03CE\n"
+                + "grdiio=\u03CA\n"
+                + "grdiup=\u03CB\n"
+
+                //gracdiio=\u00FD
+                //gracdiup=\u00FE
+
+                + "letter=[[:Lu:][:Ll:]]\n"
+
+                // convert Roman to Native
+                + "Greek>\u039c\u0397\u039d\u0399\u039d\u0020\u0391\u0395\u0399\u0394\u0395\u002c\u0020\u0398\u0395\u0391\u002c\u0020--\u0397\u039b\u0397\u0399\u0391\u0394\u0395\u03a9\u0020\u0391\u03a7\u0399\u039b\u0397\u039f\u03a3\n"
+
+                + "AV`>{grAl}{grAcUp}\n"
+                + "EV`>{grEp}{grAcUp}\n"
+                + "AV>{grAl}{grUp}\n"
+                + "EV>{grEp}{grUp}\n"
+                + "NG>{grGa}{grGa}\n"
+                + "NK>{grGa}{grKa}\n"
+                + "NX>{grGa}{grKs}\n"
+                + "NCH>{grGa}{grKh}\n"
+
+                //+ "final = [ .;]\n" // Syntax error, unused anyway - Liu
+
+                + "A`>{grAcAl}\n"
+                + "EE`>{grAcEt}\n"
+                + "E`>{grAcEp}\n"
+                + "I`>{grAcIo}\n"
+                + "U`>{grAcUp}\n"
+                + "OO`>{grAcOme}\n"
+                + "O`>{grAcOm}\n"
+                + "''I>{grDiIo}\n"
+                + "''U>{grDiUp}\n"
+                + "A>{grAl}\n"
+                + "B>{grBe}\n"
+                + "C[I>{grSi}\n"
+                + "C[E>{grSi}\n"
+                + "C[Y>{grSi}\n"
+                + "CH>{grKh}\n"
+                + "C>{grKa}\n"
+                + "D>{grDe}\n"
+                + "EE>{grEt}\n"
+                + "E>{grEp}\n"
+                + "F>{grPh}\n"
+                + "G>{grGa}\n"
+                + "H>{grKh}\n"
+                + "I>{grIo}\n"
+                + "J>{grIo}\n"
+                + "KS>{grKs}\n"
+                + "KH>{grKh}\n"
+                + "K>{grKa}\n"
+                + "L>{grLa}\n"
+                + "M>{grMu}\n"
+                + "N>{grNu}\n"
+                + "OO>{grOme}\n"
+                + "O>{grOm}\n"
+                + "PS>{grPs}\n"
+                + "PH>{grPh}\n"
+                + "P>{grPi}\n"
+                + "Q>{grKa}\n"
+                + "R>{grRh}\n"
+                + "S>{grSi}\n"
+                + "TH>{grTh}\n"
+                + "T>{grTa}\n"
+                + "W>{grUp}{grUp}\n"
+                + "U>{grUp}\n"
+                + "V>{grUp}\n"
+                + "X>{grKs}\n"
+                + "Y>{grUp}\n"
+                + "Z>{grZe}\n"
+
+                //now Native to Roman
+
+                + "AV<{grAl}{grUp}\n"
+                + "EV<{grEp}{grUp}\n"
+                + "AV`<{grAl}{grAcUp}\n"
+                + "EV`<{grEp}{grAcUp}\n"
+                + "N''<{grNu}[{grGa}\n"
+                + "NG<{grGa}{grGa}\n"
+                + "N''<{grNu}[{grKa}\n"
+                + "NK<{grGa}{grKa}\n"
+                + "N''<{grNu}[{grKs}\n"
+                + "NX<{grGa}{grKs}\n"
+                + "N''<{grNu}[{grKh}\n"
+                + "NCH<{grGa}{grKh}\n"
+
+                + "A<{grAl}\n"
+                + "B<{grBe}\n"
+                + "G<{grGa}\n"
+                + "D<{grDe}\n"
+                + "E''<{grEp}[{grEp}\n"
+                + "E''<{grEp}[{grEt}\n"
+                + "E''<{grEp}[{grAcEp}\n"
+                + "E''<{grEp}[{grAcEt}\n"
+                + "E<{grEp}\n"
+                + "Z<{grZe}\n"
+                + "EE<{grEt}\n"
+                + "TH<{grTh}\n"
+                + "I<{grIo}\n"
+                + "K<{grKa}\n"
+                + "L<{grLa}\n"
+                + "M<{grMu}\n"
+                + "N<{grNu}\n"
+                + "X<{grKs}\n"
+                + "O''<{grOm}[{grOm}\n"
+                + "O''<{grOm}[{grOme}\n"
+                + "O''<{grOm}[{grAcOm}\n"
+                + "O''<{grOm}[{grAcOme}\n"
+                + "O<{grOm}\n"
+                + "P''<{grPi}[{grSi}\n"
+                + "P''<{grPi}[{grfinal}\n"
+                + "P<{grPi}\n"
+                + "R<{grRh}\n"
+                + "S<{grSi}\n"
+                + "T<{grTa}\n"
+                + "W<{grUp}{grUp}\n"
+
+                + "V<{grUp}[{grAcAl}\n"
+                + "V<{grUp}[{grAcEp}\n"
+                + "V<{grUp}[{grAcEt}\n"
+                + "V<{grUp}[{grAcIo}\n"
+                + "V<{grUp}[{grAcOm}\n"
+                + "V<{grUp}[{grAcUp}\n"
+                + "V<{grUp}[{grAcOme}\n"
+
+                + "V<{grUp}[{grAl}\n"
+                + "V<{grUp}[{grEp}\n"
+                + "V<{grUp}[{grEt}\n"
+                + "V<{grUp}[{grIo}\n"
+                + "V<{grUp}[{grOm}\n"
+                //{grUp}[{grUp}<V
+                + "V<{grUp}[{grOme}\n"
+
+                + "U<{grUp}\n"
+                + "PH<{grPh}\n"
+                + "CH<{grKh}\n"
+                + "PS<{grPs}\n"
+                + "OO<{grOme}\n"
+                //forms
+                + "A`<{grAcAl}\n"
+                + "E`<{grAcEp}\n"
+                + "EE`<{grAcEt}\n"
+                + "I`<{grAcIo}\n"
+                + "O`<{grAcOm}\n"
+                + "U`<{grAcUp}\n"
+                + "OO`<{grAcOme}\n"
+                + "''I<{grDiIo}\n"
+                + "''U<{grDiUp}\n"
+
+                //{gracdiio}<XX
+                //{gracdiup}<XX
+                  //{grfinal}<XX
+
+                + "av`>{gral}{gracup}\n"
+                + "ev`>{grep}{gracup}\n"
+                + "av>{gral}{grup}\n"
+                + "ev>{grep}{grup}\n"
+                + "ng>{grga}{grga}\n"
+                + "nk>{grga}{grka}\n"
+                + "nx>{grga}{grks}\n"
+                + "nch>{grga}{grkh}\n"
+
+                + "a`>{gracal}\n"
+                + "ee`>{gracet}\n"
+                + "e`>{gracep}\n"
+                + "i`>{gracio}\n"
+                + "u`>{gracup}\n"
+                + "oo`>{gracome}\n"
+                + "o`>{gracom}\n"
+                + "''i>{grdiio}\n"
+                + "''u>{grdiup}\n"
+                + "a>{gral}\n"
+                + "b>{grbe}\n"
+                + "c[i>{grsi}\n"
+                + "c[e>{grsi}\n"
+                + "c[y>{grsi}\n"
+                + "ch>{grkh}\n"
+                + "c>{grka}\n"
+                + "d>{grde}\n"
+                + "ee>{gret}\n"
+                + "e>{grep}\n"
+                + "f>{grph}\n"
+                + "g>{grga}\n"
+                + "h>{grkh}\n"
+                + "i>{grio}\n"
+                + "j>{grio}\n"
+                + "ks>{grks}\n"
+                + "kh>{grkh}\n"
+                + "k>{grka}\n"
+                + "l>{grla}\n"
+                + "m>{grmu}\n"
+                + "n>{grnu}\n"
+                + "oo>{grome}\n"
+                + "o>{grom}\n"
+                + "ps>{grps}\n"
+                + "ph>{grph}\n"
+                + "p>{grpi}\n"
+                + "q>{grka}\n"
+                + "r>{grrh}\n"
+                + "s>|{grfinal}\n"
+                + "{grfinal}[{letter}>{grsi}\n"
+                + "th>{grth}\n"
+                + "t>{grta}\n"
+                + "w>{grup}{grup}\n"
+                + "u>{grup}\n"
+                + "v>{grup}\n"
+                + "x>{grks}\n"
+                + "y>{grup}\n"
+                + "z>{grze}\n"
+
+
+                //forms
+                + "''>\n"
+                //now native to roman
+
+                + "av<{gral}{grup}\n"
+                + "ev<{grep}{grup}\n"
+                + "av`<{gral}{gracup}\n"
+                + "ev`<{grep}{gracup}\n"
+                + "n''<{grnu}[{grga}\n"
+                + "ng<{grga}{grga}\n"
+                + "n''<{grnu}[{grka}\n"
+                + "nk<{grga}{grka}\n"
+                + "n''<{grnu}[{grks}\n"
+                + "nx<{grga}{grks}\n"
+                + "n''<{grnu}[{grkh}\n"
+                + "nch<{grga}{grkh}\n"
+
+                + "a<{gral}\n"
+                + "b<{grbe}\n"
+                + "g<{grga}\n"
+                + "d<{grde}\n"
+                + "e''<{grep}[{grep}\n"
+                + "e''<{grep}[{gret}\n"
+                + "e''<{grep}[{gracep}\n"
+                + "e''<{grep}[{gracet}\n"
+                + "e<{grep}\n"
+                + "z<{grze}\n"
+                + "ee<{gret}\n"
+                + "th<{grth}\n"
+                + "i<{grio}\n"
+                + "k<{grka}\n"
+                + "l<{grla}\n"
+                + "m<{grmu}\n"
+                + "n<{grnu}\n"
+                + "x<{grks}\n"
+                + "o''<{grom}[{grom}\n"
+                + "o''<{grom}[{grome}\n"
+                + "o''<{grom}[{gracom}\n"
+                + "o''<{grom}[{gracome}\n"
+                + "o<{grom}\n"
+                + "p''<{grpi}[{grsi}\n"
+                + "p''<{grpi}[{grfinal}\n"
+                + "p<{grpi}\n"
+                + "r<{grrh}\n"
+                + "s<{grsi}\n"
+                + "s<{grfinal}\n"
+                + "t<{grta}\n"
+                + "w<{grup}{grup}\n"
+
+                + "v<{grup}[{gracal}\n"
+                + "v<{grup}[{gracep}\n"
+                + "v<{grup}[{gracet}\n"
+                + "v<{grup}[{gracio}\n"
+                + "v<{grup}[{gracom}\n"
+                + "v<{grup}[{gracup}\n"
+                + "v<{grup}[{gracome}\n"
+
+                + "v<{grup}[{gral}\n"
+                + "v<{grup}[{grep}\n"
+                + "v<{grup}[{gret}\n"
+                + "v<{grup}[{grio}\n"
+                + "v<{grup}[{grom}\n"
+                //{grup}[{grup}<v
+                + "v<{grup}[{grome}\n"
+
+                + "u<{grup}\n"
+                + "ph<{grph}\n"
+                + "ch<{grkh}\n"
+                + "ps<{grps}\n"
+                + "oo<{grome}\n"
+                //forms
+                + "a`<{gracal}\n"
+                + "e`<{gracep}\n"
+                + "ee`<{gracet}\n"
+                + "i`<{gracio}\n"
+                + "o`<{gracom}\n"
+                + "u`<{gracup}\n"
+                + "oo`<{gracome}\n"
+                + "''i<{grdiio}\n"
+                + "''u<{grdiup}\n"
+                + "<''\n"
+
+                //{gracdiio}<xx
+                //{gracdiup}<xx
+                //{grfinal}<xx
+            }
+        };
+    }
+}
diff --git a/icu4j/src/com/ibm/text/resources/TransliterationRule$Latin$Hebrew.java b/icu4j/src/com/ibm/text/resources/TransliterationRule$Latin$Hebrew.java
new file mode 100755
index 00000000000..3604a8f0130
--- /dev/null
+++ b/icu4j/src/com/ibm/text/resources/TransliterationRule$Latin$Hebrew.java
@@ -0,0 +1,283 @@
+package com.ibm.text.resources;
+
+import java.util.ListResourceBundle;
+
+public class TransliterationRuleLatinHebrew extends ListResourceBundle {
+    /**
+     * Overrides ListResourceBundle
+     */
+    public Object[][] getContents() {
+        return new Object[][] {
+            { "Description",
+                "Latin to Hebrew" },
+
+            { "Rule",
+                //variable names, derived from the Unicode names.
+
+                "POINT_SHEVA=\u05B0\n"
+                + "POINT_HATAF_SEGOL=\u05B1\n"
+                + "POINT_HATAF_PATAH=\u05B2\n"
+                + "POINT_HATAF_QAMATS=\u05B3\n"
+                + "POINT_HIRIQ=\u05B4\n"
+                + "POINT_TSERE=\u05B5\n"
+                + "POINT_SEGOL=\u05B6\n"
+                + "POINT_PATAH=\u05B7\n"
+                + "POINT_QAMATS=\u05B8\n"
+                + "POINT_HOLAM=\u05B9\n"
+                + "POINT_QUBUTS=\u05BB\n"
+                + "POINT_DAGESH_OR_MAPIQ=\u05BC\n"
+                + "POINT_METEG=\u05BD\n"
+                + "PUNCTUATION_MAQAF=\u05BE\n"
+                + "POINT_RAFE=\u05BF\n"
+                + "PUNCTUATION_PASEQ=\u05C0\n"
+                + "POINT_SHIN_DOT=\u05C1\n"
+                + "POINT_SIN_DOT=\u05C2\n"
+                + "PUNCTUATION_SOF_PASUQ=\u05C3\n"
+                + "ALEF=\u05D0\n"
+                + "BET=\u05D1\n"
+                + "GIMEL=\u05D2\n"
+                + "DALET=\u05D3\n"
+                + "HE=\u05D4\n"
+                + "VAV=\u05D5\n"
+                + "ZAYIN=\u05D6\n"
+                + "HET=\u05D7\n"
+                + "TET=\u05D8\n"
+                + "YOD=\u05D9\n"
+                + "FINAL_KAF=\u05DA\n"
+                + "KAF=\u05DB\n"
+                + "LAMED=\u05DC\n"
+                + "FINAL_MEM=\u05DD\n"
+                + "MEM=\u05DE\n"
+                + "FINAL_NUN=\u05DF\n"
+                + "NUN=\u05E0\n"
+                + "SAMEKH=\u05E1\n"
+                + "AYIN=\u05E2\n"
+                + "FINAL_PE=\u05E3\n"
+                + "PE=\u05E4\n"
+                + "FINAL_TSADI=\u05E5\n"
+                + "TSADI=\u05E6\n"
+                + "QOF=\u05E7\n"
+                + "RESH=\u05E8\n"
+                + "SHIN=\u05E9\n"
+                + "TAV=\u05EA\n"
+                + "YIDDISH_DOUBLE_VAV=\u05F0\n"
+                + "YIDDISH_VAV_YOD=\u05F1\n"
+                + "YIDDISH_DOUBLE_YOD=\u05F2\n"
+                + "PUNCTUATION_GERESH=\u05F3\n"
+                + "PUNCTUATION_GERSHAYIM=\u05F4\n"
+
+                //wildcards
+                //The values can be anything we don't use in this file: start at E000.
+
+                + "letter=[abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ]\n"
+
+                + "softvowel=[eiyEIY]\n"
+
+                + "vowellike=[{ALEF}{AYIN}{YOD}{VAV}]\n"
+
+                //?>{POINT_SHEVA}
+                //?>{POINT_HATAF_SEGOL}
+                //?>{POINT_HATAF_PATAH}
+                //?>{POINT_HATAF_QAMATS}
+                //?>{POINT_HIRIQ}
+                //?>{POINT_TSERE}
+                //?>{POINT_SEGOL}
+                //?>{POINT_PATAH}
+                //?>{POINT_QAMATS}
+                //?>{POINT_HOLAM}
+                //?>{POINT_QUBUTS}
+                //?>{POINT_DAGESH_OR_MAPIQ}
+                //?>{POINT_METEG}
+                //?>{PUNCTUATION_MAQAF}
+                //?>{POINT_RAFE}
+                //?>{PUNCTUATION_PASEQ}
+                //?>{POINT_SHIN_DOT}
+                //?>{POINT_SIN_DOT}
+                //?>{PUNCTUATION_SOF_PASUQ}
+
+                + "a>{ALEF}\n"
+                + "A>{ALEF}\n"
+
+                + "b>{BET}\n"
+                + "B>{BET}\n"
+
+                + "c[{softvowel}>{SAMEKH}\n"
+                + "C[{softvowel}>{SAMEKH}\n"
+                + "c[{letter}>{KAF}\n"
+                + "C[{letter}>{KAF}\n"
+                + "c>{FINAL_KAF}\n"
+                + "C>{FINAL_KAF}\n"
+
+                + "d>{DALET}\n"
+                + "D>{DALET}\n"
+
+                + "e>{AYIN}\n"
+                + "E>{AYIN}\n"
+
+                + "f[{letter}>{PE}\n"
+                + "f>{FINAL_PE}\n"
+                + "F[{letter}>{PE}\n"
+                + "F>{FINAL_PE}\n"
+
+                + "g>{GIMEL}\n"
+                + "G>{GIMEL}\n"
+
+                + "h>{HE}\n"
+                + "H>{HE}\n"
+
+                + "i>{YOD}\n"
+                + "I>{YOD}\n"
+
+                + "j>{DALET}{SHIN}\n"
+                + "J>{DALET}{SHIN}\n"
+
+                + "kH>{HET}\n"
+                + "kh>{HET}\n"
+                + "KH>{HET}\n"
+                + "Kh>{HET}\n"
+                + "k[{letter}>{KAF}\n"
+                + "K[{letter}>{KAF}\n"
+                + "k>{FINAL_KAF}\n"
+                + "K>{FINAL_KAF}\n"
+
+                + "l>{LAMED}\n"
+                + "L>{LAMED}\n"
+
+                + "m[{letter}>{MEM}\n"
+                + "m>{FINAL_MEM}\n"
+                + "M[{letter}>{MEM}\n"
+                + "M>{FINAL_MEM}\n"
+
+                + "n[{letter}>{NUN}\n"
+                + "n>{FINAL_NUN}\n"
+                + "N[{letter}>{NUN}\n"
+                + "N>{FINAL_NUN}\n"
+
+                + "o>{VAV}\n"
+                + "O>{VAV}\n"
+
+                + "p[{letter}>{PE}\n"
+                + "p>{FINAL_PE}\n"
+                + "P[{letter}>{PE}\n"
+                + "P>{FINAL_PE}\n"
+
+                + "q>{QOF}\n"
+                + "Q>{QOF}\n"
+
+                + "r>{RESH}\n"
+                + "R>{RESH}\n"
+
+                + "sH>{SHIN}\n"
+                + "sh>{SHIN}\n"
+                + "SH>{SHIN}\n"
+                + "Sh>{SHIN}\n"
+                + "s>{SAMEKH}\n"
+                + "S>{SAMEKH}\n"
+
+                + "th>{TAV}\n"
+                + "tH>{TAV}\n"
+                + "TH>{TAV}\n"
+                + "Th>{TAV}\n"
+                + "tS[{letter}>{TSADI}\n"
+                + "ts[{letter}>{TSADI}\n"
+                + "Ts[{letter}>{TSADI}\n"
+                + "TS[{letter}>{TSADI}\n"
+                + "tS>{FINAL_TSADI}\n"
+                + "ts>{FINAL_TSADI}\n"
+                + "Ts>{FINAL_TSADI}\n"
+                + "TS>{FINAL_TSADI}\n"
+                + "t>{TET}\n"
+                + "T>{TET}\n"
+
+                + "u>{VAV}\n"
+                + "U>{VAV}\n"
+
+                + "v>{VAV}\n"
+                + "V>{VAV}\n"
+
+                + "w>{VAV}\n"
+                + "W>{VAV}\n"
+
+                + "x>{KAF}{SAMEKH}\n"
+                + "X>{KAF}{SAMEKH}\n"
+
+                + "y>{YOD}\n"
+                + "Y>{YOD}\n"
+
+                + "z>{ZAYIN}\n"
+                + "Z>{ZAYIN}\n"
+
+                //#?>{YIDDISH_DOUBLE_VAV}
+                //?>{YIDDISH_VAV_YOD}
+                //?>{YIDDISH_DOUBLE_YOD}
+                //?>{PUNCTUATION_GERESH}
+                //?>{PUNCTUATION_GERSHAYIM}
+
+                + "''>\n"
+
+                //{POINT_SHEVA}>@
+                //{POINT_HATAF_SEGOL}>@
+                //{POINT_HATAF_PATAH}>@
+                //{POINT_HATAF_QAMATS}>@
+                //{POINT_HIRIQ}>@
+                //{POINT_TSERE}>@
+                //{POINT_SEGOL}>@
+                //{POINT_PATAH}>@
+                //{POINT_QAMATS}>@
+                //{POINT_HOLAM}>@
+                //{POINT_QUBUTS}>@
+                //{POINT_DAGESH_OR_MAPIQ}>@
+                //{POINT_METEG}>@
+                //{PUNCTUATION_MAQAF}>@
+                //{POINT_RAFE}>@
+                //{PUNCTUATION_PASEQ}>@
+                //{POINT_SHIN_DOT}>@
+                //{POINT_SIN_DOT}>@
+                //{PUNCTUATION_SOF_PASUQ}>@
+
+                + "a<{ALEF}\n"
+                + "e<{AYIN}\n"
+                + "b<{BET}\n"
+                + "d<{DALET}\n"
+                + "k<{FINAL_KAF}\n"
+                + "m<{FINAL_MEM}\n"
+                + "n<{FINAL_NUN}\n"
+                + "p<{FINAL_PE}\n"
+                + "ts<{FINAL_TSADI}\n"
+                + "g<{GIMEL}\n"
+                + "kh<{HET}\n"
+                + "h<{HE}\n"
+                + "k''<{KAF}[{HE}\n"
+                + "k<{KAF}\n"
+                + "l<{LAMED}\n"
+                + "m<{MEM}\n"
+                + "n<{NUN}\n"
+                + "p<{PE}\n"
+                + "q<{QOF}\n"
+                + "r<{RESH}\n"
+                + "s''<{SAMEKH}[{HE}\n"
+                + "s<{SAMEKH}\n"
+                + "sh<{SHIN}\n"
+                + "th<{TAV}\n"
+                + "t''<{TET}[{HE}\n"
+                + "t''<{TET}[{HE}\n"
+                + "t''<{TET}[{SAMEKH}\n"
+                + "t''<{TET}[{SHIN}\n"
+                + "t<{TET}\n"
+                + "ts<{TSADI}\n"
+                + "v<{VAV}[{vowellike}\n"
+                + "u<{VAV}\n"
+                + "y<{YOD}\n"
+                + "z<{ZAYIN}\n"
+
+                //{YIDDISH_DOUBLE_VAV}>@
+                //{YIDDISH_VAV_YOD}>@
+                //{YIDDISH_DOUBLE_YOD}>@
+                //{PUNCTUATION_GERESH}>@
+                //{PUNCTUATION_GERSHAYIM}>@
+
+                + "<''\n"
+            }
+        };
+    }
+}
diff --git a/icu4j/src/com/ibm/text/resources/TransliterationRule$Latin$Kana.java b/icu4j/src/com/ibm/text/resources/TransliterationRule$Latin$Kana.java
new file mode 100755
index 00000000000..47b6e2a3de2
--- /dev/null
+++ b/icu4j/src/com/ibm/text/resources/TransliterationRule$Latin$Kana.java
@@ -0,0 +1,883 @@
+package com.ibm.text.resources;
+
+import java.util.ListResourceBundle;
+
+/**
+ * Rewritten April 1999 to implement Hepburn (kebon shiki)
+ * transliteration.  Reference: CJKV Information Processing, Lunde,
+ * 1999, pp. 30-35.
+ * @author Alan Liu
+ */
+public class TransliterationRuleLatinKana extends ListResourceBundle {
+    /**
+     * Overrides ListResourceBundle
+     */
+    public Object[][] getContents() {
+        return new Object[][] {
+            { "Description",
+                "Lowercase Latin to Hiragana; Uppercase Latin to Katakana" },
+
+            {   "Rule",
+
+                //------------------------------------------------------------
+                // Variables
+                //------------------------------------------------------------
+
+                // Hiragana.  These are named according to the
+                // regularized Nippon romanization (the naming system
+                // used by Unicode).  Thus \u3062 is called "di", not
+                // "ji".  "x_" is the small form of "_", e.g. "xa" is
+                // small "a".
+
+                "xa=\u3041\n"
+                + "a=\u3042\n"
+                + "xi=\u3043\n"
+                + "i=\u3044\n"
+                + "xu=\u3045\n"
+                + "u=\u3046\n"
+                + "xe=\u3047\n"
+                + "e=\u3048\n"
+                + "xo=\u3049\n"
+                + "o=\u304A\n"
+
+                + "ka=\u304B\n"
+                + "ga=\u304C\n"
+                + "ki=\u304D\n"
+                + "gi=\u304E\n"
+                + "ku=\u304F\n"
+                + "gu=\u3050\n"
+                + "ke=\u3051\n"
+                + "ge=\u3052\n"
+                + "ko=\u3053\n"
+                + "go=\u3054\n"
+
+                + "sa=\u3055\n"
+                + "za=\u3056\n"
+                + "si=\u3057\n"
+                + "zi=\u3058\n"
+                + "su=\u3059\n"
+                + "zu=\u305A\n"
+                + "se=\u305B\n"
+                + "ze=\u305C\n"
+                + "so=\u305D\n"
+                + "zo=\u305E\n"
+
+                + "ta=\u305F\n"
+                + "da=\u3060\n"
+                + "ti=\u3061\n"
+                + "di=\u3062\n"
+                + "xtu=\u3063\n"
+                + "tu=\u3064\n"
+                + "du=\u3065\n"
+                + "te=\u3066\n"
+                + "de=\u3067\n"
+                + "to=\u3068\n"
+                + "do=\u3069\n"
+
+                + "na=\u306A\n"
+                + "ni=\u306B\n"
+                + "nu=\u306C\n"
+                + "ne=\u306D\n"
+                + "no=\u306E\n"
+
+                + "ha=\u306F\n"
+                + "ba=\u3070\n"
+                + "pa=\u3071\n"
+                + "hi=\u3072\n"
+                + "bi=\u3073\n"
+                + "pi=\u3074\n"
+                + "hu=\u3075\n"
+                + "bu=\u3076\n"
+                + "pu=\u3077\n"
+                + "he=\u3078\n"
+                + "be=\u3079\n"
+                + "pe=\u307A\n"
+                + "ho=\u307B\n"
+                + "bo=\u307C\n"
+                + "po=\u307D\n"
+
+                + "ma=\u307E\n"
+                + "mi=\u307F\n"
+                + "mu=\u3080\n"
+                + "me=\u3081\n"
+                + "mo=\u3082\n"
+
+                + "xya=\u3083\n"
+                + "ya=\u3084\n"
+                + "xyu=\u3085\n"
+                + "yu=\u3086\n"
+                + "xyo=\u3087\n"
+                + "yo=\u3088\n"
+
+                + "ra=\u3089\n"
+                + "ri=\u308A\n"
+                + "ru=\u308B\n"
+                + "re=\u308C\n"
+                + "ro=\u308D\n"
+
+                + "xwa=\u308E\n"
+                + "wa=\u308F\n"
+                + "wi=\u3090\n"
+                + "we=\u3091\n"
+                + "wo=\u3092\n"
+
+                + "n=\u3093\n"
+                + "vu=\u3094\n"
+
+                // Katakana.  "X_" is the small form of "_", e.g. "XA"
+                // is small "A".
+
+                + "XA=\u30A1\n"
+                + "A=\u30A2\n"
+                + "XI=\u30A3\n"
+                + "I=\u30A4\n"
+                + "XU=\u30A5\n"
+                + "U=\u30A6\n"
+                + "XE=\u30A7\n"
+                + "E=\u30A8\n"
+                + "XO=\u30A9\n"
+                + "O=\u30AA\n"
+
+                + "KA=\u30AB\n"
+                + "GA=\u30AC\n"
+                + "KI=\u30AD\n"
+                + "GI=\u30AE\n"
+                + "KU=\u30AF\n"
+                + "GU=\u30B0\n"
+                + "KE=\u30B1\n"
+                + "GE=\u30B2\n"
+                + "KO=\u30B3\n"
+                + "GO=\u30B4\n"
+
+                + "SA=\u30B5\n"
+                + "ZA=\u30B6\n"
+                + "SI=\u30B7\n"
+                + "ZI=\u30B8\n"
+                + "SU=\u30B9\n"
+                + "ZU=\u30BA\n"
+                + "SE=\u30BB\n"
+                + "ZE=\u30BC\n"
+                + "SO=\u30BD\n"
+                + "ZO=\u30BE\n"
+
+                + "TA=\u30BF\n"
+                + "DA=\u30C0\n"
+                + "TI=\u30C1\n"
+                + "DI=\u30C2\n"
+                + "XTU=\u30C3\n"
+                + "TU=\u30C4\n"
+                + "DU=\u30C5\n"
+                + "TE=\u30C6\n"
+                + "DE=\u30C7\n"
+                + "TO=\u30C8\n"
+                + "DO=\u30C9\n"
+
+                + "NA=\u30CA\n"
+                + "NI=\u30CB\n"
+                + "NU=\u30CC\n"
+                + "NE=\u30CD\n"
+                + "NO=\u30CE\n"
+
+                + "HA=\u30CF\n"
+                + "BA=\u30D0\n"
+                + "PA=\u30D1\n"
+                + "HI=\u30D2\n"
+                + "BI=\u30D3\n"
+                + "PI=\u30D4\n"
+                + "HU=\u30D5\n"
+                + "BU=\u30D6\n"
+                + "PU=\u30D7\n"
+                + "HE=\u30D8\n"
+                + "BE=\u30D9\n"
+                + "PE=\u30DA\n"
+                + "HO=\u30DB\n"
+                + "BO=\u30DC\n"
+                + "PO=\u30DD\n"
+
+                + "MA=\u30DE\n"
+                + "MI=\u30DF\n"
+                + "MU=\u30E0\n"
+                + "ME=\u30E1\n"
+                + "MO=\u30E2\n"
+
+                + "XYA=\u30E3\n"
+                + "YA=\u30E4\n"
+                + "XYU=\u30E5\n"
+                + "YU=\u30E6\n"
+                + "XYO=\u30E7\n"
+                + "YO=\u30E8\n"
+
+                + "RA=\u30E9\n"
+                + "RI=\u30EA\n"
+                + "RU=\u30EB\n"
+                + "RE=\u30EC\n"
+                + "RO=\u30ED\n"
+
+                + "XWA=\u30EE\n"
+                + "WA=\u30EF\n"
+                + "WI=\u30F0\n"
+                + "WE=\u30F1\n"
+                + "WO=\u30F2\n"
+
+                + "N=\u30F3\n"
+                + "VU=\u30F4\n"
+
+                + "XKA=\u30F5\n"
+                + "XKE=\u30F6\n"
+
+                + "VA=\u30F7\n"
+                + "VI=\u30F8\n"
+                + "VE=\u30F9\n"
+                + "VO=\u30FA\n"
+
+                + "DOT=\u30FB\n"  // Middle dot
+                + "LONG=\u30FC\n" // Prolonged sound mark
+ 
+                // Categories and programmatic variables
+                
+                + "vowel=[aiueo]\n"
+                + "small=\uE000\n"
+                + "hvr=\uE001\n"
+                + "hv=[{xya}{xi}{xyu}{xe}{xyo}]\n"
+
+                //------------------------------------------------------------
+                // Rules
+                //------------------------------------------------------------
+                /*
+// Hepburn equivalents
+
+shi>|si
+ji>|zi
+chi>|ti
+// ji>|di // By default we use the ji-zi mapping
+tsu>|tu
+fu>|hu
+
+sh[{vowel}>|sy
+ja>|zya
+// ji = zi
+ju>|zyu
+je>|zye
+jo>|zyo
+cha>|tya
+// chi = ti
+chu>|tyu
+che>|tye
+cho>|tyo
+// j[{vowel} = dy{vowel}, but we use zy{vowel} by default
+
+// Historically, m preceded b, p, or m; now n is used
+// in all cases
+m[b>n
+m[p>n
+m[m>n
+
+// Compatibility
+
+// 'f' group
+fa>{fu}{xa}
+fi>{fu}{xi}
+// fu = hu
+fe>{fu}{xe}
+fo>{fu}{xo}
+
+// 'jy' group; these will not round-trip, except for "jyi"
+// See also the 'j' group.
+jya>|zya
+jyi>{zi}{xyi}
+jyu>|zyu
+jye>|zye
+jyo>|zyo
+
+// Nippon romanized forms
+
+a>{a}
+i>{i}
+u>{u}
+e>{e}
+o>{o}
+ka>{ka}
+ki>{ki}
+ku>{ku}
+ke>{ke}
+ko>{ko}
+ga>{ga}
+gi>{gi}
+gu>{gu}
+ge>{ge}
+go>{go}
+sa>{sa}
+si>{si}
+su>{su}
+se>{se}
+so>{so}
+za>{za}
+zi>{zi}
+zu>{zu}
+ze>{ze}
+zo>{zo}
+ta>{ta}
+ti>{ti}
+tu>{tu}
+te>{te}
+to>{to}
+da>{da}
+di>{di}
+du>{du}
+de>{de}
+do>{do}
+na>{na}
+ni>{ni}
+nu>{nu}
+ne>{ne}
+no>{no}
+ha>{ha}
+hi>{hi}
+hu>{hu}
+he>{he}
+ho>{ho}
+ba>{ba}
+bi>{bi}
+bu>{bu}
+be>{be}
+bo>{bo}
+pa>{pa}
+pi>{pi}
+pu>{pu}
+pe>{pe}
+po>{po}
+ma>{ma}
+mi>{mi}
+mu>{mu}
+me>{me}
+mo>{mo}
+ya>{ya}
+yu>{yu}
+yo>{yo}
+ra>{ra}
+ri>{ri}
+ru>{ru}
+re>{re}
+ro>{ro}
+wa>{wa}
+wi>{wi}
+// No "wu"
+we>{we}
+wo>{wo} // Reverse {wo} to "o", not "wo"
+n''>{n}
+n>{n}
+
+// Palatized Nippon romanized syllables
+
+ky[{vowel}>{ki}|{small}
+gy[{vowel}>{gi}|{small}
+sy[{vowel}>{si}|{small}
+zy[{vowel}>{zi}|{small}
+ty[{vowel}>{ti}|{small}
+dy[{vowel}>{di}|{small}
+ny[{vowel}>{ni}|{small}
+my[{vowel}>{mi}|{small}
+hy[{vowel}>{hi}|{small}
+by[{vowel}>{bi}|{small}
+py[{vowel}>{pi}|{small}
+ry[{vowel}>{ri}|{small}
+
+// Doubled consonants
+
+c[c>{xtu}
+k[k>{xtu}
+g[g>{xtu}
+s[s>{xtu}
+z[z>{xtu}
+j[j>{xtu}
+t[t>{xtu}
+d[d>{xtu}
+h[h>{xtu}
+f[f>{xtu}
+p[p>{xtu}
+b[b>{xtu}
+m[m>{xtu}
+y[y>{xtu}
+r[r>{xtu}
+w[w>{xtu}
+                */
+
+                + "a>{a}\n"
+
+                + "ba>{ba}\n"
+                + "bi>{bi}\n"
+                + "bu>{bu}\n"
+                + "be>{be}\n"
+                + "bo>{bo}\n"
+                + "by[{vowel}>{bi}|{small}\n"
+                + "b[b>{xtu}\n"
+
+                + "da>{da}\n"
+                + "di>{di}\n"
+                + "du>{du}\n"
+                + "de>{de}\n"
+                + "do>{do}\n"
+                + "dy[{vowel}>{di}|{small}\n"
+                + "dh[{vowel}>{de}|{small}\n"
+                + "d[d>{xtu}\n"
+
+                + "e>{e}\n"
+
+                + "fa>{hu}{xa}\n"
+                + "fi>{hu}{xi}\n"
+                + "fe>{hu}{xe}\n"
+                + "fo>{hu}{xo}\n"
+                + "fya>{hu}{xya}\n"
+                + "fyu>{hu}{xyu}\n"
+                + "fyo>{hu}{xyo}\n"
+                + "f[f>{xtu}\n"
+
+                + "ga>{ga}\n"
+                + "gi>{gi}\n"
+                + "gu>{gu}\n"
+                + "ge>{ge}\n"
+                + "go>{go}\n"
+                + "gy[{vowel}>{gi}|{small}\n"
+                + "gwa>{gu}{xwa}\n"
+                + "gwi>{gu}{xi}\n"
+                + "gwu>{gu}{xu}\n"
+                + "gwe>{gu}{xe}\n"
+                + "gwo>{gu}{xo}\n"
+                + "g[g>{xtu}\n"
+
+                + "ha>{ha}\n"
+                + "hi>{hi}\n"
+                + "hu>{hu}\n"
+                + "he>{he}\n"
+                + "ho>{ho}\n"
+                + "hy[{vowel}>{hi}|{small}\n"
+                + "h[h>{xtu}\n"
+
+                + "i>{i}\n"
+
+                + "ka>{ka}\n"
+                + "ki>{ki}\n"
+                + "ku>{ku}\n"
+                + "ke>{ke}\n"
+                + "ko>{ko}\n"
+                + "kwa>{ku}{xwa}\n"
+                + "kwi>{ku}{xi}\n"
+                + "kwu>{ku}{xu}\n"
+                + "kwe>{ku}{xe}\n"
+                + "kwo>{ku}{xo}\n"
+                + "ky[{vowel}>{ki}|{small}\n"
+                + "k[k>{xtu}\n"
+
+                + "ma>{ma}\n"
+                + "mi>{mi}\n"
+                + "mu>{mu}\n"
+                + "me>{me}\n"
+                + "mo>{mo}\n"
+                + "my[{vowel}>{mi}|{small}\n"
+                + "m[b>{n}\n"
+                + "m[f>{n}\n"
+                + "m[m>{n}\n"
+                + "m[p>{n}\n"
+                + "m[v>{n}\n"
+                + "m''>{n}\n"
+
+                + "na>{na}\n"
+                + "ni>{ni}\n"
+                + "nu>{nu}\n"
+                + "ne>{ne}\n"
+                + "no>{no}\n"
+                + "ny[{vowel}>{ni}|{small}\n"
+                + "nn>{n}\n"
+                + "n''>{n}\n"
+                + "n>{n}\n"
+
+                + "o>{o}\n"
+
+                + "pa>{pa}\n"
+                + "pi>{pi}\n"
+                + "pu>{pu}\n"
+                + "pe>{pe}\n"
+                + "po>{po}\n"
+                + "py[{vowel}>{pi}|{small}\n"
+                + "p[p>{xtu}\n"
+
+                + "qa>{ku}{xa}\n"
+                + "qi>{ku}{xi}\n"
+                + "qu>{ku}{xu}\n"
+                + "qe>{ku}{xe}\n"
+                + "qo>{ku}{xo}\n"
+                + "qy[{vowel}>{ku}|{small}\n"
+                + "q[q>{xtu}\n"
+
+                + "ra>{ra}\n"
+                + "ri>{ri}\n"
+                + "ru>{ru}\n"
+                + "re>{re}\n"
+                + "ro>{ro}\n"
+                + "ry[{vowel}>{ri}|{small}\n"
+                + "r[r>{xtu}\n"
+
+                + "sa>{sa}\n"
+                + "si>{si}\n"
+                + "su>{su}\n"
+                + "se>{se}\n"
+                + "so>{so}\n"
+                + "sy[{vowel}>{si}|{small}\n"
+                + "s[sh>{xtu}\n"
+                + "s[s>{xtu}\n"
+
+                + "ta>{ta}\n"
+                + "ti>{ti}\n"
+                + "tu>{tu}\n"
+                + "te>{te}\n"
+                + "to>{to}\n"
+                + "th[{vowel}>{te}|{small}\n"
+                + "tsa>{tu}{xa}\n"
+                + "tsi>{tu}{xi}\n"
+                + "tse>{tu}{xe}\n"
+                + "tso>{tu}{xo}\n"
+                + "ty[{vowel}>{ti}|{small}\n"
+                + "t[ts>{xtu}\n"
+                + "t[ch>{xtu}\n"
+                + "t[t>{xtu}\n"
+
+                + "u>{u}\n"
+
+                + "va>{VA}\n"
+                + "vi>{VI}\n"
+                + "vu>{vu}\n"
+                + "ve>{VE}\n"
+                + "vo>{VO}\n"
+                + "vy[{vowel}>{VI}|{small}\n"
+                + "v[v>{xtu}\n"
+
+                + "wa>{wa}\n"
+                + "wi>{wi}\n"
+                + "we>{we}\n"
+                + "wo>{wo}\n"
+                + "w[w>{xtu}\n"
+
+                + "ya>{ya}\n"
+                + "yu>{yu}\n"
+                + "ye>{i}{xe}\n"
+                + "yo>{yo}\n"
+                + "y[y>{xtu}\n"
+
+                + "za>{za}\n"
+                + "zi>{zi}\n"
+                + "zu>{zu}\n"
+                + "ze>{ze}\n"
+                + "zo>{zo}\n"
+                + "zy[{vowel}>{zi}|{small}\n"
+                + "z[z>{xtu}\n"
+
+                + "xa>{xa}\n"
+                + "xi>{xi}\n"
+                + "xu>{xu}\n"
+                + "xe>{xe}\n"
+                + "xo>{xo}\n"
+                + "xka>{XKA}\n"
+                + "xke>{XKE}\n"
+                + "xtu>{xtu}\n"
+                + "xwa>{xwa}\n"
+                + "xya>{xya}\n"
+                + "xyu>{xyu}\n"
+                + "xyo>{xyo}\n"
+
+                // optional mappings
+                + "wu>{u}\n"
+
+                + "ca>{ka}\n"
+                + "ci>{si}\n"
+                + "cu>{ku}\n"
+                + "ce>{se}\n"
+                + "co>{ko}\n"
+                + "cha>{ti}{xya}\n"
+                + "chi>{ti}\n"
+                + "chu>{ti}{xyu}\n"
+                + "che>{ti}{xe}\n"
+                + "cho>{ti}{xyo}\n"
+                + "cy[{vowel}>{ti}|{small}\n"
+                + "c[k>{xtu}\n"
+                + "c[c>{xtu}\n"
+
+                + "fu>{hu}\n"
+
+                + "ja>{zi}{xya}\n"
+                + "ji>{zi}\n"
+                + "ju>{zi}{xyu}\n"
+                + "je>{zi}{xe}\n"
+                + "jo>{zi}{xyo}\n"
+                + "jy[{vowel}>{zi}|{small}\n"
+                + "j[j>{xtu}\n"
+
+                + "la>{ra}\n"
+                + "li>{ri}\n"
+                + "lu>{ru}\n"
+                + "le>{re}\n"
+                + "lo>{ro}\n"
+                + "ly[{vowel}>{ri}|{small}\n"
+                + "l[l>{xtu}\n"
+
+                + "sha>{si}{xya}\n"
+                + "shi>{si}\n"
+                + "shu>{si}{xyu}\n"
+                + "she>{si}{xe}\n"
+                + "sho>{si}{xyo}\n"
+
+                + "tsu>{tu}\n"
+
+                + "yi>{i}\n"
+
+                + "xtsu>{xtu}\n"
+                + "xyi>{xi}\n"
+                + "xye>{xe}\n"
+
+
+
+
+
+
+
+                // Convert vowels to small form
+                + "{small}a>{xya}\n"
+                + "{small}i>{xi}\n"
+                + "{small}u>{xyu}\n"
+                + "{small}e>{xe}\n"
+                + "{small}o>{xyo}\n"
+
+
+
+
+                + "gy|{hvr}<{gi}[{hv}\n"
+                + "gwa<{gu}{xwa}\n"
+                + "gwi<{gu}{xi}\n"
+                + "gwu<{gu}{xu}\n"
+                + "gwe<{gu}{xe}\n"
+                + "gwo<{gu}{xo}\n"
+                + "ga<{ga}\n"
+                + "gi<{gi}\n"
+                + "gu<{gu}\n"
+                + "ge<{ge}\n"
+                + "go<{go}\n"
+
+                + "ky|{hvr}<{ki}[{hv}\n"
+                + "kwa<{ku}{xwa}\n"
+                + "kwi<{ku}{xi}\n"
+                + "kwu<{ku}{xu}\n"
+                + "kwe<{ku}{xe}\n"
+                + "kwo<{ku}{xo}\n"
+                + "qa<{ku}{xa}\n"
+                + "qya<{ku}{xya}\n"
+                + "qyu<{ku}{xyu}\n"
+                + "qyo<{ku}{xyo}\n"
+                + "ka<{ka}\n"
+                + "ki<{ki}\n"
+                + "ku<{ku}\n"
+                + "ke<{ke}\n"
+                + "ko<{ko}\n"
+
+                + "j|{hvr}<{zi}[{hv}\n" // Hepburn
+                + "za<{za}\n"
+                + "ji<{zi}\n" // Hepburn
+                + "zu<{zu}\n"
+                + "ze<{ze}\n"
+                + "zo<{zo}\n"
+
+                + "sh|{hvr}<{si}[{hv}\n" // Hepburn
+                + "sa<{sa}\n"
+                + "shi<{si}\n"
+                + "su<{su}\n"
+                + "se<{se}\n"
+                + "so<{so}\n"
+
+                + "j|{hvr}<{di}[{hv}\n" // Hepburn
+                + "dh|{hvr}<{de}[{hv}\n" 
+                + "da<{da}\n"
+                + "ji<{di}\n" // Hepburn
+                + "de<{de}\n"
+                + "do<{do}\n"
+                + "zu<{du}\n" // Hepburn
+
+                + "ch|{hvr}<{ti}[{hv}\n" // Hepburn
+                + "tsa<{tu}{xa}\n"
+                + "tsi<{tu}{xi}\n"
+                + "tse<{tu}{xe}\n"
+                + "tso<{tu}{xo}\n"
+                + "th|{hvr}<{te}[{hv}\n"
+                + "ta<{ta}\n"
+                + "chi<{ti}\n" // Hepburn
+                + "tsu<{tu}\n" // Hepburn
+                + "te<{te}\n"
+                + "to<{to}\n"
+
+                + "ny|{hvr}<{ni}[{hv}\n"
+                + "na<{na}\n"
+                + "ni<{ni}\n"
+                + "nu<{nu}\n"
+                + "ne<{ne}\n"
+                + "no<{no}\n"
+
+                + "by|{hvr}<{bi}[{hv}\n"
+                + "ba<{ba}\n"
+                + "bi<{bi}\n"
+                + "bu<{bu}\n"
+                + "be<{be}\n"
+                + "bo<{bo}\n"
+
+                + "py|{hvr}<{pi}[{hv}\n"
+                + "pa<{pa}\n"
+                + "pi<{pi}\n"
+                + "pu<{pu}\n"
+                + "pe<{pe}\n"
+                + "po<{po}\n"
+
+                + "hy|{hvr}<{hi}[{hv}\n"
+                + "fa<{hu}{xa}\n"
+                + "fi<{hu}{xi}\n"
+                + "fe<{hu}{xe}\n"
+                + "fo<{hu}{xo}\n"
+                + "fya<{hu}{xya}\n"
+                + "fyu<{hu}{xyu}\n"
+                + "fyo<{hu}{xyo}\n"
+                + "ha<{ha}\n"
+                + "hi<{hi}\n"
+                + "fu<{hu}\n" // Hepburn
+                + "he<{he}\n"
+                + "ho<{ho}\n"
+
+                + "my|{hvr}<{mi}[{hv}\n"
+                + "ma<{ma}\n"
+                + "mi<{mi}\n"
+                + "mu<{mu}\n"
+                + "me<{me}\n"
+                + "mo<{mo}\n"
+
+                + "ya<{ya}\n"
+                + "yu<{yu}\n"
+                + "ye<{i}{xe}\n"
+                + "yo<{yo}\n"
+                + "xya<{xya}\n"
+                + "xyu<{xyu}\n"
+                + "xyo<{xyo}\n"
+
+                + "ry|{hvr}<{ri}[{hv}\n"
+                + "ra<{ra}\n"
+                + "ri<{ri}\n"
+                + "ru<{ru}\n"
+                + "re<{re}\n"
+                + "ro<{ro}\n"
+
+                + "wa<{wa}\n"
+                + "wi<{wi}\n"
+                + "we<{we}\n"
+                + "wo<{wo}\n"
+
+                + "vu<{vu}\n"
+                + "vy|{hvr}<{VI}[{hv}\n"
+                + "v<{xtu}[{vu}\n"
+
+                + "xa<{xa}\n"
+                + "xi<{xi}\n"
+                + "xu<{xu}\n"
+                + "xe<{xe}\n"
+                + "xo<{xo}\n"
+
+                + "n''<{n}[{a}\n"
+                + "n''<{n}[{i}\n"
+                + "n''<{n}[{u}\n"
+                + "n''<{n}[{e}\n"
+                + "n''<{n}[{o}\n"
+                + "n''<{n}[{na}\n"
+                + "n''<{n}[{ni}\n"
+                + "n''<{n}[{nu}\n"
+                + "n''<{n}[{ne}\n"
+                + "n''<{n}[{no}\n"
+                + "n''<{n}[{ya}\n"
+                + "n''<{n}[{yu}\n"
+                + "n''<{n}[{yo}\n"
+                + "n''<{n}[{n}\n"
+                + "n<{n}\n"
+
+
+                + "g<{xtu}[{ga}\n"
+                + "g<{xtu}[{gi}\n"
+                + "g<{xtu}[{gu}\n"
+                + "g<{xtu}[{ge}\n"
+                + "g<{xtu}[{go}\n"
+                + "k<{xtu}[{ka}\n"
+                + "k<{xtu}[{ki}\n"
+                + "k<{xtu}[{ku}\n"
+                + "k<{xtu}[{ke}\n"
+                + "k<{xtu}[{ko}\n"
+
+                + "z<{xtu}[{za}\n"
+                + "z<{xtu}[{zi}\n"
+                + "z<{xtu}[{zu}\n"
+                + "z<{xtu}[{ze}\n"
+                + "z<{xtu}[{zo}\n"
+                + "s<{xtu}[{sa}\n"
+                + "s<{xtu}[{si}\n"
+                + "s<{xtu}[{su}\n"
+                + "s<{xtu}[{se}\n"
+                + "s<{xtu}[{so}\n"
+
+                + "d<{xtu}[{da}\n"
+                + "d<{xtu}[{di}\n"
+                + "d<{xtu}[{du}\n"
+                + "d<{xtu}[{de}\n"
+                + "d<{xtu}[{do}\n"
+                + "t<{xtu}[{ta}\n"
+                + "t<{xtu}[{ti}\n"
+                + "t<{xtu}[{tu}\n"
+                + "t<{xtu}[{te}\n"
+                + "t<{xtu}[{to}\n"
+
+
+                + "b<{xtu}[{ba}\n"
+                + "b<{xtu}[{bi}\n"
+                + "b<{xtu}[{bu}\n"
+                + "b<{xtu}[{be}\n"
+                + "b<{xtu}[{bo}\n"
+                + "p<{xtu}[{pa}\n"
+                + "p<{xtu}[{pi}\n"
+                + "p<{xtu}[{pu}\n"
+                + "p<{xtu}[{pe}\n"
+                + "p<{xtu}[{po}\n"
+                + "h<{xtu}[{ha}\n"
+                + "h<{xtu}[{hi}\n"
+                + "h<{xtu}[{hu}\n"
+                + "h<{xtu}[{he}\n"
+                + "h<{xtu}[{ho}\n"
+
+
+                + "r<{xtu}[{ra}\n"
+                + "r<{xtu}[{ri}\n"
+                + "r<{xtu}[{ru}\n"
+                + "r<{xtu}[{re}\n"
+                + "r<{xtu}[{ro}\n"
+
+                + "w<{xtu}[{wa}\n"
+                + "xtu<{xtu}\n"
+
+                + "a<{a}\n"
+                + "i<{i}\n"
+                + "u<{u}\n"
+                + "e<{e}\n"
+                + "o<{o}\n"
+
+
+
+                // Convert small forms to vowels
+                + "a<{hvr}{xya}\n"
+                + "i<{hvr}{xi}\n"
+                + "u<{hvr}{xyu}\n"
+                + "e<{hvr}{xe}\n"
+                + "o<{hvr}{xyo}\n"              
+            }
+        };
+    }
+}
+
+
+
diff --git a/icu4j/src/com/ibm/text/resources/TransliterationRule$StraightQuotes$CurlyQuotes.java b/icu4j/src/com/ibm/text/resources/TransliterationRule$StraightQuotes$CurlyQuotes.java
new file mode 100755
index 00000000000..409d0a1e29b
--- /dev/null
+++ b/icu4j/src/com/ibm/text/resources/TransliterationRule$StraightQuotes$CurlyQuotes.java
@@ -0,0 +1,87 @@
+package com.ibm.text.resources;
+
+import java.util.ListResourceBundle;
+
+public class TransliterationRuleStraightQuotesCurlyQuotes extends ListResourceBundle {
+    /**
+     * Overrides ListResourceBundle
+     */
+    public Object[][] getContents() {
+        return new Object[][] {
+            { "Description",
+                "Use left and right double quotes" },
+
+            {   "Rule",
+                // Rewritten using character codes [LIU]
+                "white=[[:Zs:][:Zl:][:Zp:]]\n"
+                + "black=[^[:Zs:][:Zl:][:Zp:]]\n"
+                + "open=[[:Ps:]]\n"
+                + "dquote=\"\n"
+
+                + "lAng=\u3008\n"
+                + "ldAng=\u300A\n"
+                + "lBrk='['\n"
+                + "lBrc='{'\n"
+
+                + "lquote=\u2018\n"
+                + "rquote=\u2019\n"
+                + "ldquote=\u201C\n"
+                + "rdquote=\u201D\n"
+
+                + "ldguill=\u00AB\n"
+                + "rdguill=\u00BB\n"
+                + "lguill=\u2039\n"
+                + "rguill=\u203A\n"
+
+                + "mdash=\u2014\n"
+
+                //#######################################
+                // Conversions from input
+                //#######################################
+
+                // join single quotes
+                + "{lquote}''>{ldquote}\n"
+                + "{lquote}{lquote}>{ldquote}\n"
+                + "{rquote}''>{rdquote}\n"
+                + "{rquote}{rquote}>{rdquote}\n"
+
+                //smart single quotes
+                + "{white}]''>{lquote}\n"
+                + "{open}]''>{lquote}\n"
+                + "{black}]''>{rquote}\n"
+                + "''>{lquote}\n"
+
+                //smart doubles
+                + "{white}]{dquote}>{ldquote}\n"
+                + "{open}]{dquote}>{ldquote}\n"
+                + "{black}]{dquote}>{rdquote}\n"
+                + "{dquote}>{ldquote}\n"
+
+                // join single guillemets
+                + "{rguill}{rguill}>{rdguill}\n"
+                + "'>>'>{rdguill}\n"
+                + "{lguill}{lguill}>{ldguill}\n"
+                + "'<<'>{ldguill}\n"
+
+                // prevent double spaces
+                + " ] >\n"
+
+                // join hyphens into dash
+                + "-->{mdash}\n"
+
+                //#######################################
+                // Conversions back to input
+                //#######################################
+
+                //smart quotes
+                + "''<{lquote}\n"
+                + "''<{rquote}\n"
+                + "{dquote}<{ldquote}\n"
+                + "{dquote}<{rdquote}\n"
+
+                //hyphens
+                + "--<{mdash}\n"
+            }
+        };
+    }
+}

`[abc]`	The set containing the characters 'a', 'b', and 'c'.
`[^abc]`	The set of all characters except 'a', 'b', and 'c'.
`[A-Z]`	The set of all characters from 'A' to 'Z' in Unicode order.
`[:Lu:]`	The set of Unicode uppercase letters. See + * www.unicode.org + * for a complete list of categories and their two-letter codes.
`[^a-z[:Lu:][:Ll:]]`	The set of all characters except 'a' through 'z' and + * uppercase or lowercase letters.
`\|adefabcdefz`	Initial state, no rules match. Advance cursor.
`a\|defabcdefz`	Still no match. Rule 1 does not match because the preceding + * context is not present.
`ad\|efabcdefz`	Still no match. Keep advancing until there is a match...
`ade\|fabcdefz`	...
`adef\|abcdefz`	...
`adefa\|bcdefz`	...
`adefab\|cdefz`	...
`adefabc\|defz`	Rule 1 matches; replace "`def`" with "`xy`" + * and back up the cursor to before the '`y`'.
`adefabcx\|yz`	Although "`xyz`" is present, rule 2 does not match + * because the cursor is before the '`y`', not before the + * '`x`'. Rule 3 does match. Replace "`yz`" with + * "`q`".
`adefabcxq\|`	The cursor is at the end; transliteration is complete.
`pattern :=`	`('[' '^'? item* ']') \| + * ('[:' '^'? category ':]')`
`item :=`	`char \| (char '-' char) \| pattern-expr + *`
`pattern-expr :=`	`pattern \| pattern-expr pattern \| + * pattern-expr op pattern + *`
`op :=`	`'&' \| '-' + *`
`special :=`	`'[' \| ']' \| '-' + *`
`char :=`	any character that is not`special + * \| ('\u005C'`any character`) + * \| ('\u005Cu' hex hex hex hex) + *`
`hex :=`	any character for which + * `Character.digit(c, 16)` + returns a non-negative result*
`category :=`	`'M' \| 'N' \| 'Z' \| 'C' \| 'L' \| 'P' \| + * 'S' \| 'Mn' \| 'Mc' \| 'Me' \| 'Nd' \| 'Nl' \| 'No' \| 'Zs' \| 'Zl' \| + * 'Zp' \| 'Cc' \| 'Cf' \| 'Cs' \| 'Co' \| 'Cn' \| 'Lu' \| 'Ll' \| 'Lt' + * \| 'Lm' \| 'Lo' \| 'Pc' \| 'Pd' \| 'Ps' \| 'Pe' \| 'Po' \| 'Sm' \| + * 'Sc' \| 'Sk' \| 'So'`
`[a]`	The set containing 'a' + *
`[a-z]`	The set containing 'a' + * through 'z' and all letters in between, in Unicode order + *
`[^a-z]`	The set containing + * all characters but 'a' through 'z', + * that is, U+0000 through 'a'-1 and 'z'+1 through U+FFFF + *
`[[pat1][pat2]]` + *	The union of sets specified by pat1 and pat2 + *
`[[pat1]&[pat2]]` + *	The intersection of sets specified by pat1 and pat2 + *
`[[pat1]-[pat2]]` + *	The asymmetric difference of sets specified by pat1 and + * pat2 + *
`[:Lu:]` + *	The set of characters belonging to the given + * Unicode category, as defined by `Character.getType()`; in + * this case, Unicode uppercase letters + *
`[:L:]` + *	The set of characters belonging to all Unicode categories + * starting wih 'L', that is, `[[:Lu:][:Ll:][:Lt:][:Lm:][:Lo:]]`. + *