ICU-114 Transliterator framework first working version

X-SVN-Rev: 194
2025-04-10 07:39:16 +00:00 · 1999-11-20 00:40:50 +00:00 · 1999-11-20 00:40:50 +00:00 · bd14077b79
commit bd14077b79
parent a2f31432aa
35 changed files with 14712 additions and 1 deletions
--- a/icu4c/data/translit/expcon.txt
+++ b/icu4c/data/translit/expcon.txt
--- a/icu4c/data/translit/kbdescl1.txt
+++ b/icu4c/data/translit/kbdescl1.txt
@ -0,0 +1,128 @@
+//--------------------------------------------------------------------
+//  Copyright (C) 1999, International Business Machines
+//  Corporation and others.  All Rights Reserved.
+//--------------------------------------------------------------------
+//  Date        Name        Description
+//  11/17/99    aliu        Creation.
+//--------------------------------------------------------------------
+
+// KeyboardEscape-Latin1
+
+kbdescl1 {
+    Rule {
+        "esc=''\n"
+        "grave=`\n"
+        "acute=''\n"
+        "hat=^\n"
+        "tilde=~\n"
+        "umlaut=:\n"
+        "ring=.\n"
+        "cedilla=,\n"
+        "slash=/\n"
+        "super=^\n"
+
+        // Make keyboard entry of {esc} possible
+        // and of backslash
+        "'\\'{esc}>{esc}\n"
+        "'\\\\'>'\\'\n"
+
+        // Long keys
+        "cur{esc}>\u00A4\n"
+        "sec{esc}>\u00A7\n"
+        "not{esc}>\u00AC\n"
+        "mul{esc}>\u00D7\n"
+        "div{esc}>\u00F7\n"
+
+        " {esc}>\u00A0\n" // non-breaking space
+        "!{esc}>\u00A1\n" // inverted exclamation
+        "c/{esc}>\u00A2\n" // cent sign
+        "lb{esc}>\u00A3\n" // pound sign
+        "'|'{esc}>\u00A6\n" // broken vertical bar
+        ":{esc}>\u00A8\n" // umlaut
+        "{super}a{esc}>\u00AA\n" // feminine ordinal
+        "'<<'{esc}>\u00AB\n"
+        "r{esc}>\u00AE\n"
+        "--{esc}>\u00AF\n"
+        "-{esc}>\u00AD\n"
+        "+-{esc}>\u00B1\n"
+        "{super}2{esc}>\u00B2\n"
+        "{super}3{esc}>\u00B3\n"
+        "{acute}{esc}>\u00B4\n"
+        "m{esc}>\u00B5\n"
+        "para{esc}>\u00B6\n"
+        "dot{esc}>\u00B7\n"
+        "{cedilla}{esc}>\u00B8\n"
+        "{super}1{esc}>\u00B9\n"
+        "{super}o{esc}>\u00BA\n" // masculine ordinal
+        "'>>'{esc}>\u00BB\n"
+        "1/4{esc}>\u00BC\n"
+        "1/2{esc}>\u00BD\n"
+        "3/4{esc}>\u00BE\n"
+        "?{esc}>\u00BF\n"
+        "A{grave}{esc}>\u00C0\n"
+        "A{acute}{esc}>\u00C1\n"
+        "A{hat}{esc}>\u00C2\n"
+        "A{tilde}{esc}>\u00C3\n"
+        "A{umlaut}{esc}>\u00C4\n"
+        "A{ring}{esc}>\u00C5\n"
+        "AE{esc}>\u00C6\n"
+        "C{cedilla}{esc}>\u00C7\n"
+        "E{grave}{esc}>\u00C8\n"
+        "E{acute}{esc}>\u00C9\n"
+        "E{hat}{esc}>\u00CA\n"
+        "E{umlaut}{esc}>\u00CB\n"
+        "I{grave}{esc}>\u00CC\n"
+        "I{acute}{esc}>\u00CD\n"
+        "I{hat}{esc}>\u00CE\n"
+        "I{umlaut}{esc}>\u00CF\n"
+        "D-{esc}>\u00D0\n"
+        "N{tilde}{esc}>\u00D1\n"
+        "O{grave}{esc}>\u00D2\n"
+        "O{acute}{esc}>\u00D3\n"
+        "O{hat}{esc}>\u00D4\n"
+        "O{tilde}{esc}>\u00D5\n"
+        "O{umlaut}{esc}>\u00D6\n"
+        "O{slash}{esc}>\u00D8\n"
+        "U{grave}{esc}>\u00D9\n"
+        "U{acute}{esc}>\u00DA\n"
+        "U{hat}{esc}>\u00DB\n"
+        "U{umlaut}{esc}>\u00DC\n"
+        "Y{acute}{esc}>\u00DD\n"
+        "TH{esc}>\u00DE\n"
+        "ss{esc}>\u00DF\n"
+        "a{grave}{esc}>\u00E0\n"
+        "a{acute}{esc}>\u00E1\n"
+        "a{hat}{esc}>\u00E2\n"
+        "a{tilde}{esc}>\u00E3\n"
+        "a{umlaut}{esc}>\u00E4\n"
+        "a{ring}{esc}>\u00E5\n"
+        "ae{esc}>\u00E6\n"
+        "c{cedilla}{esc}>\u00E7\n"
+        "c{esc}>\u00A9\n" // copyright - after c{cedilla}
+        "e{grave}{esc}>\u00E8\n"
+        "e{acute}{esc}>\u00E9\n"
+        "e{hat}{esc}>\u00EA\n"
+        "e{umlaut}{esc}>\u00EB\n"
+        "i{grave}{esc}>\u00EC\n"
+        "i{acute}{esc}>\u00ED\n"
+        "i{hat}{esc}>\u00EE\n"
+        "i{umlaut}{esc}>\u00EF\n"
+        "d-{esc}>\u00F0\n"
+        "n{tilde}{esc}>\u00F1\n"
+        "o{grave}{esc}>\u00F2\n"
+        "o{acute}{esc}>\u00F3\n"
+        "o{hat}{esc}>\u00F4\n"
+        "o{tilde}{esc}>\u00F5\n"
+        "o{umlaut}{esc}>\u00F6\n"
+        "o{slash}{esc}>\u00F8\n"
+        "o{esc}>\u00B0\n"
+        "u{grave}{esc}>\u00F9\n"
+        "u{acute}{esc}>\u00FA\n"
+        "u{hat}{esc}>\u00FB\n"
+        "u{umlaut}{esc}>\u00FC\n"
+        "y{acute}{esc}>\u00FD\n"
+        "y{esc}>\u00A5\n" // yen sign
+        "th{esc}>\u00FE\n"
+        "ss{esc}>\u00FF\n"
+    }
+}
--- a/icu4c/data/translit/larabic.txt
+++ b/icu4c/data/translit/larabic.txt
@ -0,0 +1,240 @@
+//--------------------------------------------------------------------
+//  Copyright (C) 1999, International Business Machines
+//  Corporation and others.  All Rights Reserved.
+//--------------------------------------------------------------------
+//  Date        Name        Description
+//  11/17/99    aliu        Creation.
+//--------------------------------------------------------------------
+
+// Latin-Arabic
+
+larabic {
+    Rule {
+        // To Do: finish adding shadda, add sokoon
+
+        "alefmadda=\u0622\n"
+        "alefuhamza=\u0623\n"
+        "wauuhamza=\u0624\n"
+        "alefhamza=\u0625\n"
+        "yehuhamza=\u0626\n"
+        "alef=\u0627\n"
+        "beh=\u0628\n"
+        "tehmarbuta=\u0629\n"
+        "teh=\u062A\n"
+        "theh=\u062B\n"
+        "geem=\u062C\n"
+        "hah=\u062D\n"
+        "kha=\u062E\n"
+        "dal=\u062F\n"
+        "dhal=\u0630\n"
+        "reh=\u0631\n"
+        "zain=\u0632\n"
+        "seen=\u0633\n"
+        "sheen=\u0634\n"
+        "sad=\u0635\n"
+        "dad=\u0636\n"
+        "tah=\u0637\n"
+        "zah=\u0638\n"
+        "ein=\u0639\n"
+        "ghein=\u063A\n"
+        "feh=\u0641\n"
+        "qaaf=\u0642\n"
+        "kaf=\u0643\n"
+        "lam=\u0644\n"
+        "meem=\u0645\n"
+        "noon=\u0646\n"
+        "heh=\u0647\n"
+        "wau=\u0648\n"
+        "yehmaqsura=\u0649\n"
+        "yeh=\u064A\n"
+        "peh=\u06A4\n"
+
+        "hamza=\u0621\n"
+        "fathatein=\u064B\n"
+        "dammatein=\u064C\n"
+        "kasratein=\u064D\n"
+        "fatha=\u064E\n"
+        "damma=\u064F\n"
+        "kasra=\u0650\n"
+        "shadda=\u0651\n"
+        "sokoon=\u0652\n"
+
+        // convert English to Arabic
+        "Arabic>"
+        "\u062a\u062a\u0645\u062a\u0639\u0020"
+        "\u0627\u0644\u0644\u063a\u0629\u0020"
+        "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"
+        "\u0628\u0628\u0646\u0638\u0645\u0020"
+        "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"
+        "\u062c\u0645\u064a\u0644\u0629\n"
+
+        "ai>{alefmadda}\n"
+        "ae>{alefuhamza}\n"
+        "ao>{alefhamza}\n"
+        "aa>{alef}\n"
+        "an>{fathatein}\n"
+        "a>{fatha}\n"
+        "b>{beh}\n"
+        "c>{kaf}\n"
+        "{dhal}]dh>{shadda}\n"
+        "dh>{dhal}\n"
+        "{dad}]dd>{shadda}\n"
+        "dd>{dad}\n"
+        "{dal}]d>{shadda}\n"
+        "d>{dal}\n"
+        "e>{ein}\n"
+        "f>{feh}\n"
+        "gh>{ghein}\n"
+        "g>{geem}\n"
+        "hh>{hah}\n"
+        "h>{heh}\n"
+        "ii>{kasratein}\n"
+        "i>{kasra}\n"
+        "j>{geem}\n"
+        "kh>{kha}\n"
+        "k>{kaf}\n"
+        "l>{lam}\n"
+        "m>{meem}\n"
+        "n>{noon}\n"
+        "o>{hamza}\n"
+        "p>{peh}\n"
+        "q>{qaaf}\n"
+        "r>{reh}\n"
+        "sh>{sheen}\n"
+        "ss>{sad}\n"
+        "s>{seen}\n"
+        "th>{theh}\n"
+        "tm>{tehmarbuta}\n"
+        "tt>{tah}\n"
+        "t>{teh}\n"
+        "uu>{dammatein}\n"
+        "u>{damma}\n"
+        "v>{beh}\n"
+        "we>{wauuhamza}\n"
+        "w>{wau}\n"
+        "x>{kaf}{shadda}{seen}\n"
+        "ye>{yehuhamza}\n"
+        "ym>{yehmaqsura}\n"
+        "y>{yeh}\n"
+        "zz>{zah}\n"
+        "z>{zain}\n"
+
+        "0>\u0660\n"+ // Arabic digit 0
+        "1>\u0661\n"+ // Arabic digit 1
+        "2>\u0662\n"+ // Arabic digit 2
+        "3>\u0663\n"+ // Arabic digit 3
+        "4>\u0664\n"+ // Arabic digit 4
+        "5>\u0665\n"+ // Arabic digit 5
+        "6>\u0666\n"+ // Arabic digit 6
+        "7>\u0667\n"+ // Arabic digit 7
+        "8>\u0668\n"+ // Arabic digit 8
+        "9>\u0669\n"+ // Arabic digit 9
+        "%>\u066A\n"+ // Arabic %
+        ".>\u066B\n"+ // Arabic decimal separator
+        ",>\u066C\n"+ // Arabic thousands separator
+        "*>\u066D\n"+ // Arabic five-pointed star
+
+        "`0>0\n"+ // Escaped forms of the above
+        "`1>1\n"
+        "`2>2\n"
+        "`3>3\n"
+        "`4>4\n"
+        "`5>5\n"
+        "`6>6\n"
+        "`7>7\n"
+        "`8>8\n"
+        "`9>9\n"
+        "`%>%\n"
+        "`.>.\n"
+        "`,>,\n"
+        "`*>*\n"
+        "``>`\n"
+
+        "''>\n"
+
+        // now Arabic to English
+
+        "''ai<a]{alefmadda}\n"
+        "ai<{alefmadda}\n"
+        "''ae<a]{alefuhamza}\n"
+        "ae<{alefuhamza}\n"
+        "''ao<a]{alefhamza}\n"
+        "ao<{alefhamza}\n"
+        "''aa<a]{alef}\n"
+        "aa<{alef}\n"
+        "''an<a]{fathatein}\n"
+        "an<{fathatein}\n"
+        "''a<a]{fatha}\n"
+        "a<{fatha}\n"
+        "b<{beh}\n"
+        "''dh<d]{dhal}\n"
+        "dh<{dhal}\n"
+        "''dd<d]{dad}\n"
+        "dd<{dad}\n"
+        "''d<d]{dal}\n"
+        "d<{dal}\n"
+        "''e<a]{ein}\n"
+        "''e<w]{ein}\n"
+        "''e<y]{ein}\n"
+        "e<{ein}\n"
+        "f<{feh}\n"
+        "gh<{ghein}\n"
+        "''hh<d]{hah}\n"
+        "''hh<t]{hah}\n"
+        "''hh<k]{hah}\n"
+        "''hh<s]{hah}\n"
+        "hh<{hah}\n"
+        "''h<d]{heh}\n"
+        "''h<t]{heh}\n"
+        "''h<k]{heh}\n"
+        "''h<s]{heh}\n"
+        "h<{heh}\n"
+        "''ii<i]{kasratein}\n"
+        "ii<{kasratein}\n"
+        "''i<i]{kasra}\n"
+        "i<{kasra}\n"
+        "j<{geem}\n"
+        "kh<{kha}\n"
+        "x<{kaf}{shadda}{seen}\n"
+        "k<{kaf}\n"
+        "l<{lam}\n"
+        "''m<y]{meem}\n"
+        "''m<t]{meem}\n"
+        "m<{meem}\n"
+        "n<{noon}\n"
+        "''o<a]{hamza}\n"
+        "o<{hamza}\n"
+        "p<{peh}\n"
+        "q<{qaaf}\n"
+        "r<{reh}\n"
+        "sh<{sheen}\n"
+        "''ss<s]{sad}\n"
+        "ss<{sad}\n"
+        "''s<s]{seen}\n"
+        "s<{seen}\n"
+        "th<{theh}\n"
+        "tm<{tehmarbuta}\n"
+        "''tt<t]{tah}\n"
+        "tt<{tah}\n"
+        "''t<t]{teh}\n"
+        "t<{teh}\n"
+        "''uu<u]{dammatein}\n"
+        "uu<{dammatein}\n"
+        "''u<u]{damma}\n"
+        "u<{damma}\n"
+        "we<{wauuhamza}\n"
+        "w<{wau}\n"
+        "ye<{yehuhamza}\n"
+        "ym<{yehmaqsura}\n"
+        "''y<y]{yeh}\n"
+        "y<{yeh}\n"
+        "''zz<z]{zah}\n"
+        "zz<{zah}\n"
+        "''z<z]{zain}\n"
+        "z<{zain}\n"
+
+        "dh<dh]{shadda}\n"
+        "dd<dd]{shadda}\n"
+        "''d<d]{shadda}\n"
+    }
+}
--- a/icu4c/data/translit/ldevan.txt
+++ b/icu4c/data/translit/ldevan.txt
@ -0,0 +1,411 @@
+//--------------------------------------------------------------------
+//  Copyright (C) 1999, International Business Machines
+//  Corporation and others.  All Rights Reserved.
+//--------------------------------------------------------------------
+//  Date        Name        Description
+//  11/17/99    aliu        Creation.
+//--------------------------------------------------------------------
+
+// Latin-Devanagari
+
+ldevan {
+    Rule {
+        //#####################################################################
+        //	Keyboard Transliteration Table
+        //#####################################################################
+        // Conversions should be:
+        // 1. complete
+        //  * convert every sequence of Latin letters (a to z plus apostrophe) 
+        //    to a sequence of Native letters
+        //  * convert every sequence of Native letters to Latin letters
+        // 2. reversable
+        //  * any string of Native converted to Latin and back should be the same
+        //  * this is not true for English converted to Native & back, e.g.:
+        //		k -> {kaf} -> k
+        //		c -> {kaf} -> k
+        //#####################################################################
+        // Sequences of Latin letters may convert to a single Native letter.
+        // When this is the case, an apostrophe can be used to indicate separate
+        // letters.$
+        // E.g.	sh -> {shin}
+        //		s'h -> {sin}{heh}
+        // 		ss -> {sad}
+        // 		s's -> {sin}{shadda}
+        //#####################################################################
+        // To Do:
+        //	finish adding shadda, add sokoon, fix uppercase
+        //	make two transliteration tables: one with vowels, one without
+        //#####################################################################
+        // Modifications
+        //	Devanagari Transliterator:  broken up with consonsants/vowels
+        //#####################################################################
+        // Unicode character name definitions
+        //#####################################################################
+
+        //consonants
+        "candrabindu=\u0901\n"
+        "bindu=\u0902\n"
+        "visarga=\u0903\n"
+
+        // w<vowel> represents the stand-alone form
+        "wa=\u0905\n"
+        "waa=\u0906\n"
+        "wi=\u0907\n"
+        "wii=\u0908\n"
+        "wu=\u0909\n"
+        "wuu=\u090A\n"
+        "wr=\u090B\n"
+        "wl=\u090C\n"
+        "we=\u090F\n"
+        "wai=\u0910\n"
+        "wo=\u0913\n"
+        "wau=\u0914\n"
+
+        "ka=\u0915\n"
+        "kha=\u0916\n"
+        "ga=\u0917\n"
+        "gha=\u0918\n"
+        "nga=\u0919\n"
+
+        "ca=\u091A\n"
+        "cha=\u091B\n"
+        "ja=\u091C\n"
+        "jha=\u091D\n"
+        "nya=\u091E\n"
+
+        "tta=\u091F\n"
+        "ttha=\u0920\n"
+        "dda=\u0921\n"
+        "ddha=\u0922\n"
+        "nna=\u0923\n"
+
+        "ta=\u0924\n"
+        "tha=\u0925\n"
+        "da=\u0926\n"
+        "dha=\u0927\n"
+        "na=\u0928\n"
+
+        "pa=\u092A\n"
+        "pha=\u092B\n"
+        "ba=\u092C\n"
+        "bha=\u092D\n"
+        "ma=\u092E\n"
+
+        "ya=\u092F\n"
+        "ra=\u0930\n"
+        "rra=\u0931\n"
+        "la=\u0933\n"
+        "va=\u0935\n"
+
+        "sha=\u0936\n"
+        "ssa=\u0937\n"
+        "sa=\u0938\n"
+        "ha=\u0939\n"
+
+        // <vowel> represents the dependent form
+        "aa=\u093E\n"
+        "i=\u093F\n"
+        "ii=\u0940\n"
+        "u=\u0941\n"
+        "uu=\u0942\n"
+        "rh=\u0943\n"
+        "lh=\u0944\n"
+        "e=\u0947\n"
+        "ai=\u0948\n"
+        "o=\u094B\n"
+        "au=\u094C\n"
+
+        "virama=\u094D\n"
+
+        "wrr=\u0960\n"
+        "rrh=\u0962\n"
+
+        "danda=\u0964\n"
+        "doubleDanda=\u0965\n"
+        "depVowelAbove=[\u093E-\u0940\u0945-\u094C]\n"
+        "depVowelBelow=[\u0941-\u0944]\n"
+        // Ech: Double escape U+0000, so UnicodeString doesn't consider it
+        // to be the end of the string.  This is only necessary for U+0000
+        // right now. [liu]
+        "endThing=[{danda}{doubleDanda}\\u0000-\u08FF\u0980-\uFFFF]\n"
+
+        "&=[{virama}{aa}{ai}{au}{ii}{i}{uu}{u}{rrh}{rh}{lh}{e}{o}]\n"
+        "%=[bcdfghjklmnpqrstvwxyz]\n"
+
+        //#####################################################################
+        // convert from Latin letters to Native letters
+        //#####################################################################
+        //Hindi>\u092d\u093e\u0930\u0924--\u0020\u0926\u0947\u0936\u0020\u092c\u0928\u094d\u0927\u0941\u002e
+
+        // special forms with no good conversion
+
+        "mm>{bindu}\n"
+        "x>{visarga}\n"
+
+        // convert to independent forms at start of word or syllable: 
+        // e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
+        // Moved up [LIU]
+
+        "aa>{waa}\n"
+        "ai>{wai}\n"
+        "au>{wau}\n"
+        "ii>{wii}\n"
+        "i>{wi}\n"
+        "uu>{wuu}\n"
+        "u>{wu}\n"
+        "rrh>{wrr}\n"
+        "rh>{wr}\n"
+        "lh>{wl}\n"
+        "e>{we}\n"
+        "o>{wo}\n"
+        "a>{wa}\n"
+
+        // normal consonants
+
+        "kh>{kha}|{virama}\n"
+        "k>{ka}|{virama}\n"
+        "q>{ka}|{virama}\n"
+        "gh>{gha}|{virama}\n"
+        "g>{ga}|{virama}\n"
+        "ng>{nga}|{virama}\n"
+        "ch>{cha}|{virama}\n"
+        "c>{ca}|{virama}\n"
+        "jh>{jha}|{virama}\n"
+        "j>{ja}|{virama}\n"
+        "ny>{nya}|{virama}\n"
+        "tth>{ttha}|{virama}\n"
+        "tt>{tta}|{virama}\n"
+        "ddh>{ddha}|{virama}\n"
+        "dd>{dda}|{virama}\n"
+        "nn>{nna}|{virama}\n"
+        "th>{tha}|{virama}\n"
+        "t>{ta}|{virama}\n"
+        "dh>{dha}|{virama}\n"
+        "d>{da}|{virama}\n"
+        "n>{na}|{virama}\n"
+        "ph>{pha}|{virama}\n"
+        "p>{pa}|{virama}\n"
+        "bh>{bha}|{virama}\n"
+        "b>{ba}|{virama}\n"
+        "m>{ma}|{virama}\n"
+        "y>{ya}|{virama}\n"
+        "r>{ra}|{virama}\n"
+        "l>{la}|{virama}\n"
+        "v>{va}|{virama}\n"
+        "f>{va}|{virama}\n"
+        "w>{va}|{virama}\n"
+        "sh>{sha}|{virama}\n"
+        "ss>{ssa}|{virama}\n"
+        "s>{sa}|{virama}\n"
+        "z>{sa}|{virama}\n"
+        "h>{ha}|{virama}\n"
+
+        ".>{danda}\n"
+        "{danda}.>{doubleDanda}\n"
+        "{depVowelAbove}]~>{bindu}\n"
+        "{depVowelBelow}]~>{candrabindu}\n"
+
+        // convert to dependent forms after consonant with no vowel: 
+        // e.g. kai -> {ka}{virama}ai -> {ka}{ai}
+
+        "{virama}aa>{aa}\n"
+        "{virama}ai>{ai}\n"
+        "{virama}au>{au}\n"
+        "{virama}ii>{ii}\n"
+        "{virama}i>{i}\n"
+        "{virama}uu>{uu}\n"
+        "{virama}u>{u}\n"
+        "{virama}rrh>{rrh}\n"
+        "{virama}rh>{rh}\n"
+        "{virama}lh>{lh}\n"
+        "{virama}e>{e}\n"
+        "{virama}o>{o}\n"
+        "{virama}a>\n"
+
+        // otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
+
+        "{virama}''aa>{waa}\n"
+        "{virama}''ai>{wai}\n"
+        "{virama}''au>{wau}\n"
+        "{virama}''ii>{wii}\n"
+        "{virama}''i>{wi}\n"
+        "{virama}''uu>{wuu}\n"
+        "{virama}''u>{wu}\n"
+        "{virama}''rrh>{wrr}\n"
+        "{virama}''rh>{wr}\n"
+        "{virama}''lh>{wl}\n"
+        "{virama}''e>{we}\n"
+        "{virama}''o>{wo}\n"
+        "{virama}''a>{wa}\n"
+
+        "{virama}[{endThing}>\n"
+
+        // convert any left-over apostrophes used for separation
+
+        "''>\n"
+
+        //#####################################################################
+        // convert from Native letters to Latin letters
+        //#####################################################################
+
+        // special forms with no good conversion
+
+        "mm<{bindu}\n"
+        "x<{visarga}\n"
+
+        // normal consonants
+
+        "kh<{kha}[&\n"
+        "kha<{kha}\n"
+        "k''<{ka}{virama}[{ha}\n"
+        "k<{ka}[&\n"
+        "ka<{ka}\n"
+        "gh<{gha}[&\n"
+        "gha<{gha}\n"
+        "g''<{ga}{virama}[{ha}\n"
+        "g<{ga}[&\n"
+        "ga<{ga}\n"
+        "ng<{nga}[&\n"
+        "nga<{nga}\n"
+        "ch<{cha}[&\n"
+        "cha<{cha}\n"
+        "c''<{ca}{virama}[{ha}\n"
+        "c<{ca}[&\n"
+        "ca<{ca}\n"
+        "jh<{jha}[&\n"
+        "jha<{jha}\n"
+        "j''<{ja}{virama}[{ha}\n"
+        "j<{ja}[&\n"
+        "ja<{ja}\n"
+        "ny<{nya}[&\n"
+        "nya<{nya}\n"
+        "tth<{ttha}[&\n"
+        "ttha<{ttha}\n"
+        "tt''<{tta}{virama}[{ha}\n"
+        "tt<{tta}[&\n"
+        "tta<{tta}\n"
+        "ddh<{ddha}[&\n"
+        "ddha<{ddha}\n"
+        "dd''<{dda}[&{ha}\n"
+        "dd<{dda}[&\n"
+        "dda<{dda}\n"
+        "dh<{dha}[&\n"
+        "dha<{dha}\n"
+        "d''<{da}{virama}[{ha}\n"
+        "d''<{da}{virama}[{ddha}\n"
+        "d''<{da}{virama}[{dda}\n"
+        "d''<{da}{virama}[{dha}\n"
+        "d''<{da}{virama}[{da}\n"
+        "d<{da}[&\n"
+        "da<{da}\n"
+        "th<{tha}[&\n"
+        "tha<{tha}\n"
+        "t''<{ta}{virama}[{ha}\n"
+        "t''<{ta}{virama}[{ttha}\n"
+        "t''<{ta}{virama}[{tta}\n"
+        "t''<{ta}{virama}[{tha}\n"
+        "t''<{ta}{virama}[{ta}\n"
+        "t<{ta}[&\n"
+        "ta<{ta}\n"
+        "n''<{na}{virama}[{ga}\n"
+        "n''<{na}{virama}[{ya}\n"
+        "n<{na}[&\n"
+        "na<{na}\n"
+        "ph<{pha}[&\n"
+        "pha<{pha}\n"
+        "p''<{pa}{virama}[{ha}\n"
+        "p<{pa}[&\n"
+        "pa<{pa}\n"
+        "bh<{bha}[&\n"
+        "bha<{bha}\n"
+        "b''<{ba}{virama}[{ha}\n"
+        "b<{ba}[&\n"
+        "ba<{ba}\n"
+        "m''<{ma}{virama}[{ma}\n"
+        "m''<{ma}{virama}[{bindu}\n"
+        "m<{ma}[&\n"
+        "ma<{ma}\n"
+        "y<{ya}[&\n"
+        "ya<{ya}\n"
+        "r''<{ra}{virama}[{ha}\n"
+        "r<{ra}[&\n"
+        "ra<{ra}\n"
+        "l''<{la}{virama}[{ha}\n"
+        "l<{la}[&\n"
+        "la<{la}\n"
+        "v<{va}[&\n"
+        "va<{va}\n"
+        "sh<{sha}[&\n"
+        "sha<{sha}\n"
+        "ss<{ssa}[&\n"
+        "ssa<{ssa}\n"
+        "s''<{sa}{virama}[{ha}\n"
+        "s''<{sa}{virama}[{sha}\n"
+        "s''<{sa}{virama}[{ssa}\n"
+        "s''<{sa}{virama}[{sa}\n"
+        "s<{sa}[&\n"
+        "sa<{sa}\n"
+        "h<{ha}[&\n"
+        "ha<{ha}\n"
+
+        // dependent vowels (should never occur except following consonants)
+
+        "aa<{aa}\n"
+        "ai<{ai}\n"
+        "au<{au}\n"
+        "ii<{ii}\n"
+        "i<{i}\n"
+        "uu<{uu}\n"
+        "u<{u}\n"
+        "rrh<{rrh}\n"
+        "rh<{rh}\n"
+        "lh<{lh}\n"
+        "e<{e}\n"
+        "o<{o}\n"
+
+        // independent vowels (when following consonants)
+
+        "''aa<a]{waa}\n"
+        "''aa<%]{waa}\n"
+        "''ai<a]{wai}\n"
+        "''ai<%]{wai}\n"
+        "''au<a]{wau}\n"
+        "''au<%]{wau}\n"
+        "''ii<a]{wii}\n"
+        "''ii<%]{wii}\n"
+        "''i<a]{wi}\n"
+        "''i<%]{wi}\n"
+        "''uu<a]{wuu}\n"
+        "''uu<%]{wuu}\n"
+        "''u<a]{wu}\n"
+        "''u<%]{wu}\n"
+        "''rrh<%]{wrr}\n"
+        "''rh<%]{wr}\n"
+        "''lh<%]{wl}\n"
+        "''e<%]{we}\n"
+        "''o<%]{wo}\n"
+        "''a<a]{wa}\n"
+        "''a<%]{wa}\n"
+
+
+        // independent vowels (otherwise)
+
+        "aa<{waa}\n"
+        "ai<{wai}\n"
+        "au<{wau}\n"
+        "ii<{wii}\n"
+        "i<{wi}\n"
+        "uu<{wuu}\n"
+        "u<{wu}\n"
+        "rrh<{wrr}\n"
+        "rh<{wr}\n"
+        "lh<{wl}\n"
+        "e<{we}\n"
+        "o<{wo}\n"
+        "a<{wa}\n"
+
+        // blow away any remaining viramas
+
+        "<{virama}\n"
+    }
+}
--- a/icu4c/data/translit/lgreek.txt
+++ b/icu4c/data/translit/lgreek.txt
@ -0,0 +1,380 @@
+//--------------------------------------------------------------------
+//  Copyright (C) 1999, International Business Machines
+//  Corporation and others.  All Rights Reserved.
+//--------------------------------------------------------------------
+//  Date        Name        Description
+//  11/17/99    aliu        Creation.
+//--------------------------------------------------------------------
+
+// Latin-Greek
+
+lgreek {
+    Rule {
+        // Greek Letters
+
+        "grAl=\u0391\n"
+        "grBe=\u0392\n"
+        "grGa=\u0393\n"
+        "grDe=\u0394\n"
+        "grEp=\u0395\n"
+        "grZe=\u0396\n"
+        "grEt=\u0397\n"
+        "grTh=\u0398\n"
+        "grIo=\u0399\n"
+        "grKa=\u039A\n"
+        "grLa=\u039B\n"
+        "grMu=\u039C\n"
+        "grNu=\u039D\n"
+        "grKs=\u039E\n"
+        "grOm=\u039F\n"
+        "grPi=\u03A0\n"
+        "grRh=\u03A1\n"
+        "grSi=\u03A3\n"
+        "grTa=\u03A4\n"
+        "grUp=\u03A5\n"
+        "grPh=\u03A6\n"
+        "grKh=\u03A7\n"
+        "grPs=\u03A8\n"
+        "grOme=\u03A9\n"
+
+        "gral=\u03B1\n"
+        "grbe=\u03B2\n"
+        "grga=\u03B3\n"
+        "grde=\u03B4\n"
+        "grep=\u03B5\n"
+        "grze=\u03B6\n"
+        "gret=\u03B7\n"
+        "grth=\u03B8\n"
+        "grio=\u03B9\n"
+        "grka=\u03BA\n"
+        "grla=\u03BB\n"
+        "grmu=\u03BC\n"
+        "grnu=\u03BD\n"
+        "grks=\u03BE\n"
+        "grom=\u03BF\n"
+        "grpi=\u03C0\n"
+        "grrh=\u03C1\n"
+        "grsi=\u03C3\n"
+        "grta=\u03C4\n"
+        "grup=\u03C5\n"
+        "grph=\u03C6\n"
+        "grkh=\u03C7\n"
+        "grps=\u03C8\n"
+        "grome=\u03C9\n"
+
+        //forms
+        "grfinal=\u03C2\n"
+
+        "grAcAl=\u0386\n"
+        "grAcEp=\u0388\n"
+        "grAcEt=\u0389\n"
+        "grAcIo=\u038A\n"
+        "grAcOm=\u038C\n"
+        "grAcUp=\u038E\n"
+        "grAcOme=\u038F\n"
+        "grDiIo=\u03AA\n"
+        "grDiUp=\u03AB\n"
+
+        "gracal=\u03AC\n"
+        "gracep=\u03AD\n"
+        "gracet=\u03AE\n"
+        "gracio=\u03AF\n"
+        "gracom=\u03CC\n"
+        "gracup=\u03CD\n"
+        "gracome=\u03CE\n"
+        "grdiio=\u03CA\n"
+        "grdiup=\u03CB\n"
+
+        //gracdiio=\u00FD
+        //gracdiup=\u00FE
+
+        "letter=[[:Lu:][:Ll:]]\n"
+
+        // convert Roman to Native
+        "Greek>\u039c\u0397\u039d\u0399\u039d\u0020\u0391\u0395\u0399\u0394\u0395\u002c\u0020\u0398\u0395\u0391\u002c\u0020--\u0397\u039b\u0397\u0399\u0391\u0394\u0395\u03a9\u0020\u0391\u03a7\u0399\u039b\u0397\u039f\u03a3\n"
+
+        "AV`>{grAl}{grAcUp}\n"
+        "EV`>{grEp}{grAcUp}\n"
+        "AV>{grAl}{grUp}\n"
+        "EV>{grEp}{grUp}\n"
+        "NG>{grGa}{grGa}\n"
+        "NK>{grGa}{grKa}\n"
+        "NX>{grGa}{grKs}\n"
+        "NCH>{grGa}{grKh}\n"
+
+        //+ "final = [ .;]\n" // Syntax error, unused anyway - Liu
+
+        "A`>{grAcAl}\n"
+        "EE`>{grAcEt}\n"
+        "E`>{grAcEp}\n"
+        "I`>{grAcIo}\n"
+        "U`>{grAcUp}\n"
+        "OO`>{grAcOme}\n"
+        "O`>{grAcOm}\n"
+        "''I>{grDiIo}\n"
+        "''U>{grDiUp}\n"
+        "A>{grAl}\n"
+        "B>{grBe}\n"
+        "C[I>{grSi}\n"
+        "C[E>{grSi}\n"
+        "C[Y>{grSi}\n"
+        "CH>{grKh}\n"
+        "C>{grKa}\n"
+        "D>{grDe}\n"
+        "EE>{grEt}\n"
+        "E>{grEp}\n"
+        "F>{grPh}\n"
+        "G>{grGa}\n"
+        "H>{grKh}\n"
+        "I>{grIo}\n"
+        "J>{grIo}\n"
+        "KS>{grKs}\n"
+        "KH>{grKh}\n"
+        "K>{grKa}\n"
+        "L>{grLa}\n"
+        "M>{grMu}\n"
+        "N>{grNu}\n"
+        "OO>{grOme}\n"
+        "O>{grOm}\n"
+        "PS>{grPs}\n"
+        "PH>{grPh}\n"
+        "P>{grPi}\n"
+        "Q>{grKa}\n"
+        "R>{grRh}\n"
+        "S>{grSi}\n"
+        "TH>{grTh}\n"
+        "T>{grTa}\n"
+        "W>{grUp}{grUp}\n"
+        "U>{grUp}\n"
+        "V>{grUp}\n"
+        "X>{grKs}\n"
+        "Y>{grUp}\n"
+        "Z>{grZe}\n"
+
+        //now Native to Roman
+
+        "AV<{grAl}{grUp}\n"
+        "EV<{grEp}{grUp}\n"
+        "AV`<{grAl}{grAcUp}\n"
+        "EV`<{grEp}{grAcUp}\n"
+        "N''<{grNu}[{grGa}\n"
+        "NG<{grGa}{grGa}\n"
+        "N''<{grNu}[{grKa}\n"
+        "NK<{grGa}{grKa}\n"
+        "N''<{grNu}[{grKs}\n"
+        "NX<{grGa}{grKs}\n"
+        "N''<{grNu}[{grKh}\n"
+        "NCH<{grGa}{grKh}\n"
+
+        "A<{grAl}\n"
+        "B<{grBe}\n"
+        "G<{grGa}\n"
+        "D<{grDe}\n"
+        "E''<{grEp}[{grEp}\n"
+        "E''<{grEp}[{grEt}\n"
+        "E''<{grEp}[{grAcEp}\n"
+        "E''<{grEp}[{grAcEt}\n"
+        "E<{grEp}\n"
+        "Z<{grZe}\n"
+        "EE<{grEt}\n"
+        "TH<{grTh}\n"
+        "I<{grIo}\n"
+        "K<{grKa}\n"
+        "L<{grLa}\n"
+        "M<{grMu}\n"
+        "N<{grNu}\n"
+        "X<{grKs}\n"
+        "O''<{grOm}[{grOm}\n"
+        "O''<{grOm}[{grOme}\n"
+        "O''<{grOm}[{grAcOm}\n"
+        "O''<{grOm}[{grAcOme}\n"
+        "O<{grOm}\n"
+        "P''<{grPi}[{grSi}\n"
+        "P''<{grPi}[{grfinal}\n"
+        "P<{grPi}\n"
+        "R<{grRh}\n"
+        "S<{grSi}\n"
+        "T<{grTa}\n"
+        "W<{grUp}{grUp}\n"
+
+        "V<{grUp}[{grAcAl}\n"
+        "V<{grUp}[{grAcEp}\n"
+        "V<{grUp}[{grAcEt}\n"
+        "V<{grUp}[{grAcIo}\n"
+        "V<{grUp}[{grAcOm}\n"
+        "V<{grUp}[{grAcUp}\n"
+        "V<{grUp}[{grAcOme}\n"
+
+        "V<{grUp}[{grAl}\n"
+        "V<{grUp}[{grEp}\n"
+        "V<{grUp}[{grEt}\n"
+        "V<{grUp}[{grIo}\n"
+        "V<{grUp}[{grOm}\n"
+        //{grUp}[{grUp}<V
+        "V<{grUp}[{grOme}\n"
+
+        "U<{grUp}\n"
+        "PH<{grPh}\n"
+        "CH<{grKh}\n"
+        "PS<{grPs}\n"
+        "OO<{grOme}\n"
+        //forms
+        "A`<{grAcAl}\n"
+        "E`<{grAcEp}\n"
+        "EE`<{grAcEt}\n"
+        "I`<{grAcIo}\n"
+        "O`<{grAcOm}\n"
+        "U`<{grAcUp}\n"
+        "OO`<{grAcOme}\n"
+        "''I<{grDiIo}\n"
+        "''U<{grDiUp}\n"
+
+        //{gracdiio}<XX
+        //{gracdiup}<XX
+          //{grfinal}<XX
+
+        "av`>{gral}{gracup}\n"
+        "ev`>{grep}{gracup}\n"
+        "av>{gral}{grup}\n"
+        "ev>{grep}{grup}\n"
+        "ng>{grga}{grga}\n"
+        "nk>{grga}{grka}\n"
+        "nx>{grga}{grks}\n"
+        "nch>{grga}{grkh}\n"
+
+        "a`>{gracal}\n"
+        "ee`>{gracet}\n"
+        "e`>{gracep}\n"
+        "i`>{gracio}\n"
+        "u`>{gracup}\n"
+        "oo`>{gracome}\n"
+        "o`>{gracom}\n"
+        "''i>{grdiio}\n"
+        "''u>{grdiup}\n"
+        "a>{gral}\n"
+        "b>{grbe}\n"
+        "c[i>{grsi}\n"
+        "c[e>{grsi}\n"
+        "c[y>{grsi}\n"
+        "ch>{grkh}\n"
+        "c>{grka}\n"
+        "d>{grde}\n"
+        "ee>{gret}\n"
+        "e>{grep}\n"
+        "f>{grph}\n"
+        "g>{grga}\n"
+        "h>{grkh}\n"
+        "i>{grio}\n"
+        "j>{grio}\n"
+        "ks>{grks}\n"
+        "kh>{grkh}\n"
+        "k>{grka}\n"
+        "l>{grla}\n"
+        "m>{grmu}\n"
+        "n>{grnu}\n"
+        "oo>{grome}\n"
+        "o>{grom}\n"
+        "ps>{grps}\n"
+        "ph>{grph}\n"
+        "p>{grpi}\n"
+        "q>{grka}\n"
+        "r>{grrh}\n"
+        "s>|{grfinal}\n"
+        "{grfinal}[{letter}>{grsi}\n"
+        "th>{grth}\n"
+        "t>{grta}\n"
+        "w>{grup}{grup}\n"
+        "u>{grup}\n"
+        "v>{grup}\n"
+        "x>{grks}\n"
+        "y>{grup}\n"
+        "z>{grze}\n"
+
+
+        //forms
+        "''>\n"
+        //now native to roman
+
+        "av<{gral}{grup}\n"
+        "ev<{grep}{grup}\n"
+        "av`<{gral}{gracup}\n"
+        "ev`<{grep}{gracup}\n"
+        "n''<{grnu}[{grga}\n"
+        "ng<{grga}{grga}\n"
+        "n''<{grnu}[{grka}\n"
+        "nk<{grga}{grka}\n"
+        "n''<{grnu}[{grks}\n"
+        "nx<{grga}{grks}\n"
+        "n''<{grnu}[{grkh}\n"
+        "nch<{grga}{grkh}\n"
+
+        "a<{gral}\n"
+        "b<{grbe}\n"
+        "g<{grga}\n"
+        "d<{grde}\n"
+        "e''<{grep}[{grep}\n"
+        "e''<{grep}[{gret}\n"
+        "e''<{grep}[{gracep}\n"
+        "e''<{grep}[{gracet}\n"
+        "e<{grep}\n"
+        "z<{grze}\n"
+        "ee<{gret}\n"
+        "th<{grth}\n"
+        "i<{grio}\n"
+        "k<{grka}\n"
+        "l<{grla}\n"
+        "m<{grmu}\n"
+        "n<{grnu}\n"
+        "x<{grks}\n"
+        "o''<{grom}[{grom}\n"
+        "o''<{grom}[{grome}\n"
+        "o''<{grom}[{gracom}\n"
+        "o''<{grom}[{gracome}\n"
+        "o<{grom}\n"
+        "p''<{grpi}[{grsi}\n"
+        "p''<{grpi}[{grfinal}\n"
+        "p<{grpi}\n"
+        "r<{grrh}\n"
+        "s<{grsi}\n"
+        "s<{grfinal}\n"
+        "t<{grta}\n"
+        "w<{grup}{grup}\n"
+
+        "v<{grup}[{gracal}\n"
+        "v<{grup}[{gracep}\n"
+        "v<{grup}[{gracet}\n"
+        "v<{grup}[{gracio}\n"
+        "v<{grup}[{gracom}\n"
+        "v<{grup}[{gracup}\n"
+        "v<{grup}[{gracome}\n"
+
+        "v<{grup}[{gral}\n"
+        "v<{grup}[{grep}\n"
+        "v<{grup}[{gret}\n"
+        "v<{grup}[{grio}\n"
+        "v<{grup}[{grom}\n"
+        //{grup}[{grup}<v
+        "v<{grup}[{grome}\n"
+
+        "u<{grup}\n"
+        "ph<{grph}\n"
+        "ch<{grkh}\n"
+        "ps<{grps}\n"
+        "oo<{grome}\n"
+        //forms
+        "a`<{gracal}\n"
+        "e`<{gracep}\n"
+        "ee`<{gracet}\n"
+        "i`<{gracio}\n"
+        "o`<{gracom}\n"
+        "u`<{gracup}\n"
+        "oo`<{gracome}\n"
+        "''i<{grdiio}\n"
+        "''u<{grdiup}\n"
+        "<''\n"
+
+        //{gracdiio}<xx
+        //{gracdiup}<xx
+        //{grfinal}<xx
+    }
+}
--- a/icu4c/data/translit/lhalfwid.txt
+++ b/icu4c/data/translit/lhalfwid.txt
--- a/icu4c/data/translit/lhebrew.txt
+++ b/icu4c/data/translit/lhebrew.txt
@ -0,0 +1,279 @@
+//--------------------------------------------------------------------
+//  Copyright (C) 1999, International Business Machines
+//  Corporation and others.  All Rights Reserved.
+//--------------------------------------------------------------------
+//  Date        Name        Description
+//  11/17/99    aliu        Creation.
+//--------------------------------------------------------------------
+
+// Latin-Hebrew
+
+lhebrew {
+    Rule {
+        //variable names, derived from the Unicode names.
+
+        "POINT_SHEVA=\u05B0\n"
+        "POINT_HATAF_SEGOL=\u05B1\n"
+        "POINT_HATAF_PATAH=\u05B2\n"
+        "POINT_HATAF_QAMATS=\u05B3\n"
+        "POINT_HIRIQ=\u05B4\n"
+        "POINT_TSERE=\u05B5\n"
+        "POINT_SEGOL=\u05B6\n"
+        "POINT_PATAH=\u05B7\n"
+        "POINT_QAMATS=\u05B8\n"
+        "POINT_HOLAM=\u05B9\n"
+        "POINT_QUBUTS=\u05BB\n"
+        "POINT_DAGESH_OR_MAPIQ=\u05BC\n"
+        "POINT_METEG=\u05BD\n"
+        "PUNCTUATION_MAQAF=\u05BE\n"
+        "POINT_RAFE=\u05BF\n"
+        "PUNCTUATION_PASEQ=\u05C0\n"
+        "POINT_SHIN_DOT=\u05C1\n"
+        "POINT_SIN_DOT=\u05C2\n"
+        "PUNCTUATION_SOF_PASUQ=\u05C3\n"
+        "ALEF=\u05D0\n"
+        "BET=\u05D1\n"
+        "GIMEL=\u05D2\n"
+        "DALET=\u05D3\n"
+        "HE=\u05D4\n"
+        "VAV=\u05D5\n"
+        "ZAYIN=\u05D6\n"
+        "HET=\u05D7\n"
+        "TET=\u05D8\n"
+        "YOD=\u05D9\n"
+        "FINAL_KAF=\u05DA\n"
+        "KAF=\u05DB\n"
+        "LAMED=\u05DC\n"
+        "FINAL_MEM=\u05DD\n"
+        "MEM=\u05DE\n"
+        "FINAL_NUN=\u05DF\n"
+        "NUN=\u05E0\n"
+        "SAMEKH=\u05E1\n"
+        "AYIN=\u05E2\n"
+        "FINAL_PE=\u05E3\n"
+        "PE=\u05E4\n"
+        "FINAL_TSADI=\u05E5\n"
+        "TSADI=\u05E6\n"
+        "QOF=\u05E7\n"
+        "RESH=\u05E8\n"
+        "SHIN=\u05E9\n"
+        "TAV=\u05EA\n"
+        "YIDDISH_DOUBLE_VAV=\u05F0\n"
+        "YIDDISH_VAV_YOD=\u05F1\n"
+        "YIDDISH_DOUBLE_YOD=\u05F2\n"
+        "PUNCTUATION_GERESH=\u05F3\n"
+        "PUNCTUATION_GERSHAYIM=\u05F4\n"
+
+        //wildcards
+        //The values can be anything we don't use in this file: start at E000.
+
+        "letter=[abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ]\n"
+
+        "softvowel=[eiyEIY]\n"
+
+        "vowellike=[{ALEF}{AYIN}{YOD}{VAV}]\n"
+
+        //?>{POINT_SHEVA}
+        //?>{POINT_HATAF_SEGOL}
+        //?>{POINT_HATAF_PATAH}
+        //?>{POINT_HATAF_QAMATS}
+        //?>{POINT_HIRIQ}
+        //?>{POINT_TSERE}
+        //?>{POINT_SEGOL}
+        //?>{POINT_PATAH}
+        //?>{POINT_QAMATS}
+        //?>{POINT_HOLAM}
+        //?>{POINT_QUBUTS}
+        //?>{POINT_DAGESH_OR_MAPIQ}
+        //?>{POINT_METEG}
+        //?>{PUNCTUATION_MAQAF}
+        //?>{POINT_RAFE}
+        //?>{PUNCTUATION_PASEQ}
+        //?>{POINT_SHIN_DOT}
+        //?>{POINT_SIN_DOT}
+        //?>{PUNCTUATION_SOF_PASUQ}
+
+        "a>{ALEF}\n"
+        "A>{ALEF}\n"
+
+        "b>{BET}\n"
+        "B>{BET}\n"
+
+        "c[{softvowel}>{SAMEKH}\n"
+        "C[{softvowel}>{SAMEKH}\n"
+        "c[{letter}>{KAF}\n"
+        "C[{letter}>{KAF}\n"
+        "c>{FINAL_KAF}\n"
+        "C>{FINAL_KAF}\n"
+
+        "d>{DALET}\n"
+        "D>{DALET}\n"
+
+        "e>{AYIN}\n"
+        "E>{AYIN}\n"
+
+        "f[{letter}>{PE}\n"
+        "f>{FINAL_PE}\n"
+        "F[{letter}>{PE}\n"
+        "F>{FINAL_PE}\n"
+
+        "g>{GIMEL}\n"
+        "G>{GIMEL}\n"
+
+        "h>{HE}\n"
+        "H>{HE}\n"
+
+        "i>{YOD}\n"
+        "I>{YOD}\n"
+
+        "j>{DALET}{SHIN}\n"
+        "J>{DALET}{SHIN}\n"
+
+        "kH>{HET}\n"
+        "kh>{HET}\n"
+        "KH>{HET}\n"
+        "Kh>{HET}\n"
+        "k[{letter}>{KAF}\n"
+        "K[{letter}>{KAF}\n"
+        "k>{FINAL_KAF}\n"
+        "K>{FINAL_KAF}\n"
+
+        "l>{LAMED}\n"
+        "L>{LAMED}\n"
+
+        "m[{letter}>{MEM}\n"
+        "m>{FINAL_MEM}\n"
+        "M[{letter}>{MEM}\n"
+        "M>{FINAL_MEM}\n"
+
+        "n[{letter}>{NUN}\n"
+        "n>{FINAL_NUN}\n"
+        "N[{letter}>{NUN}\n"
+        "N>{FINAL_NUN}\n"
+
+        "o>{VAV}\n"
+        "O>{VAV}\n"
+
+        "p[{letter}>{PE}\n"
+        "p>{FINAL_PE}\n"
+        "P[{letter}>{PE}\n"
+        "P>{FINAL_PE}\n"
+
+        "q>{QOF}\n"
+        "Q>{QOF}\n"
+
+        "r>{RESH}\n"
+        "R>{RESH}\n"
+
+        "sH>{SHIN}\n"
+        "sh>{SHIN}\n"
+        "SH>{SHIN}\n"
+        "Sh>{SHIN}\n"
+        "s>{SAMEKH}\n"
+        "S>{SAMEKH}\n"
+
+        "th>{TAV}\n"
+        "tH>{TAV}\n"
+        "TH>{TAV}\n"
+        "Th>{TAV}\n"
+        "tS[{letter}>{TSADI}\n"
+        "ts[{letter}>{TSADI}\n"
+        "Ts[{letter}>{TSADI}\n"
+        "TS[{letter}>{TSADI}\n"
+        "tS>{FINAL_TSADI}\n"
+        "ts>{FINAL_TSADI}\n"
+        "Ts>{FINAL_TSADI}\n"
+        "TS>{FINAL_TSADI}\n"
+        "t>{TET}\n"
+        "T>{TET}\n"
+
+        "u>{VAV}\n"
+        "U>{VAV}\n"
+
+        "v>{VAV}\n"
+        "V>{VAV}\n"
+
+        "w>{VAV}\n"
+        "W>{VAV}\n"
+
+        "x>{KAF}{SAMEKH}\n"
+        "X>{KAF}{SAMEKH}\n"
+
+        "y>{YOD}\n"
+        "Y>{YOD}\n"
+
+        "z>{ZAYIN}\n"
+        "Z>{ZAYIN}\n"
+
+        //#?>{YIDDISH_DOUBLE_VAV}
+        //?>{YIDDISH_VAV_YOD}
+        //?>{YIDDISH_DOUBLE_YOD}
+        //?>{PUNCTUATION_GERESH}
+        //?>{PUNCTUATION_GERSHAYIM}
+
+        "''>\n"
+
+        //{POINT_SHEVA}>@
+        //{POINT_HATAF_SEGOL}>@
+        //{POINT_HATAF_PATAH}>@
+        //{POINT_HATAF_QAMATS}>@
+        //{POINT_HIRIQ}>@
+        //{POINT_TSERE}>@
+        //{POINT_SEGOL}>@
+        //{POINT_PATAH}>@
+        //{POINT_QAMATS}>@
+        //{POINT_HOLAM}>@
+        //{POINT_QUBUTS}>@
+        //{POINT_DAGESH_OR_MAPIQ}>@
+        //{POINT_METEG}>@
+        //{PUNCTUATION_MAQAF}>@
+        //{POINT_RAFE}>@
+        //{PUNCTUATION_PASEQ}>@
+        //{POINT_SHIN_DOT}>@
+        //{POINT_SIN_DOT}>@
+        //{PUNCTUATION_SOF_PASUQ}>@
+
+        "a<{ALEF}\n"
+        "e<{AYIN}\n"
+        "b<{BET}\n"
+        "d<{DALET}\n"
+        "k<{FINAL_KAF}\n"
+        "m<{FINAL_MEM}\n"
+        "n<{FINAL_NUN}\n"
+        "p<{FINAL_PE}\n"
+        "ts<{FINAL_TSADI}\n"
+        "g<{GIMEL}\n"
+        "kh<{HET}\n"
+        "h<{HE}\n"
+        "k''<{KAF}[{HE}\n"
+        "k<{KAF}\n"
+        "l<{LAMED}\n"
+        "m<{MEM}\n"
+        "n<{NUN}\n"
+        "p<{PE}\n"
+        "q<{QOF}\n"
+        "r<{RESH}\n"
+        "s''<{SAMEKH}[{HE}\n"
+        "s<{SAMEKH}\n"
+        "sh<{SHIN}\n"
+        "th<{TAV}\n"
+        "t''<{TET}[{HE}\n"
+        "t''<{TET}[{HE}\n"
+        "t''<{TET}[{SAMEKH}\n"
+        "t''<{TET}[{SHIN}\n"
+        "t<{TET}\n"
+        "ts<{TSADI}\n"
+        "v<{VAV}[{vowellike}\n"
+        "u<{VAV}\n"
+        "y<{YOD}\n"
+        "z<{ZAYIN}\n"
+
+        //{YIDDISH_DOUBLE_VAV}>@
+        //{YIDDISH_VAV_YOD}>@
+        //{YIDDISH_DOUBLE_YOD}>@
+        //{PUNCTUATION_GERESH}>@
+        //{PUNCTUATION_GERSHAYIM}>@
+
+        "<''\n"
+    }
+}
--- a/icu4c/data/translit/lkana.txt
+++ b/icu4c/data/translit/lkana.txt
@ -0,0 +1,877 @@
+//--------------------------------------------------------------------
+//  Copyright (C) 1999, International Business Machines
+//  Corporation and others.  All Rights Reserved.
+//--------------------------------------------------------------------
+//  Date        Name        Description
+//  11/17/99    aliu        Creation.
+//--------------------------------------------------------------------
+
+// Latin-Kana
+
+// Rewritten April 1999 to implement Hepburn (kebon shiki)
+// transliteration.  Reference: CJKV Information Processing, Lunde,
+// 1999, pp. 30-35.
+// @author Alan Liu
+
+lkana {
+    Rule {
+        //------------------------------------------------------------
+        // Variables
+        //------------------------------------------------------------
+
+        // Hiragana.  These are named according to the
+        // regularized Nippon romanization (the naming system
+        // used by Unicode).  Thus \u3062 is called "di", not
+        // "ji".  "x_" is the small form of "_", e.g. "xa" is
+        // small "a".
+
+        "xa=\u3041\n"
+        "a=\u3042\n"
+        "xi=\u3043\n"
+        "i=\u3044\n"
+        "xu=\u3045\n"
+        "u=\u3046\n"
+        "xe=\u3047\n"
+        "e=\u3048\n"
+        "xo=\u3049\n"
+        "o=\u304A\n"
+
+        "ka=\u304B\n"
+        "ga=\u304C\n"
+        "ki=\u304D\n"
+        "gi=\u304E\n"
+        "ku=\u304F\n"
+        "gu=\u3050\n"
+        "ke=\u3051\n"
+        "ge=\u3052\n"
+        "ko=\u3053\n"
+        "go=\u3054\n"
+
+        "sa=\u3055\n"
+        "za=\u3056\n"
+        "si=\u3057\n"
+        "zi=\u3058\n"
+        "su=\u3059\n"
+        "zu=\u305A\n"
+        "se=\u305B\n"
+        "ze=\u305C\n"
+        "so=\u305D\n"
+        "zo=\u305E\n"
+
+        "ta=\u305F\n"
+        "da=\u3060\n"
+        "ti=\u3061\n"
+        "di=\u3062\n"
+        "xtu=\u3063\n"
+        "tu=\u3064\n"
+        "du=\u3065\n"
+        "te=\u3066\n"
+        "de=\u3067\n"
+        "to=\u3068\n"
+        "do=\u3069\n"
+
+        "na=\u306A\n"
+        "ni=\u306B\n"
+        "nu=\u306C\n"
+        "ne=\u306D\n"
+        "no=\u306E\n"
+
+        "ha=\u306F\n"
+        "ba=\u3070\n"
+        "pa=\u3071\n"
+        "hi=\u3072\n"
+        "bi=\u3073\n"
+        "pi=\u3074\n"
+        "hu=\u3075\n"
+        "bu=\u3076\n"
+        "pu=\u3077\n"
+        "he=\u3078\n"
+        "be=\u3079\n"
+        "pe=\u307A\n"
+        "ho=\u307B\n"
+        "bo=\u307C\n"
+        "po=\u307D\n"
+
+        "ma=\u307E\n"
+        "mi=\u307F\n"
+        "mu=\u3080\n"
+        "me=\u3081\n"
+        "mo=\u3082\n"
+
+        "xya=\u3083\n"
+        "ya=\u3084\n"
+        "xyu=\u3085\n"
+        "yu=\u3086\n"
+        "xyo=\u3087\n"
+        "yo=\u3088\n"
+
+        "ra=\u3089\n"
+        "ri=\u308A\n"
+        "ru=\u308B\n"
+        "re=\u308C\n"
+        "ro=\u308D\n"
+
+        "xwa=\u308E\n"
+        "wa=\u308F\n"
+        "wi=\u3090\n"
+        "we=\u3091\n"
+        "wo=\u3092\n"
+
+        "n=\u3093\n"
+        "vu=\u3094\n"
+
+        // Katakana.  "X_" is the small form of "_", e.g. "XA"
+        // is small "A".
+
+        "XA=\u30A1\n"
+        "A=\u30A2\n"
+        "XI=\u30A3\n"
+        "I=\u30A4\n"
+        "XU=\u30A5\n"
+        "U=\u30A6\n"
+        "XE=\u30A7\n"
+        "E=\u30A8\n"
+        "XO=\u30A9\n"
+        "O=\u30AA\n"
+
+        "KA=\u30AB\n"
+        "GA=\u30AC\n"
+        "KI=\u30AD\n"
+        "GI=\u30AE\n"
+        "KU=\u30AF\n"
+        "GU=\u30B0\n"
+        "KE=\u30B1\n"
+        "GE=\u30B2\n"
+        "KO=\u30B3\n"
+        "GO=\u30B4\n"
+
+        "SA=\u30B5\n"
+        "ZA=\u30B6\n"
+        "SI=\u30B7\n"
+        "ZI=\u30B8\n"
+        "SU=\u30B9\n"
+        "ZU=\u30BA\n"
+        "SE=\u30BB\n"
+        "ZE=\u30BC\n"
+        "SO=\u30BD\n"
+        "ZO=\u30BE\n"
+
+        "TA=\u30BF\n"
+        "DA=\u30C0\n"
+        "TI=\u30C1\n"
+        "DI=\u30C2\n"
+        "XTU=\u30C3\n"
+        "TU=\u30C4\n"
+        "DU=\u30C5\n"
+        "TE=\u30C6\n"
+        "DE=\u30C7\n"
+        "TO=\u30C8\n"
+        "DO=\u30C9\n"
+
+        "NA=\u30CA\n"
+        "NI=\u30CB\n"
+        "NU=\u30CC\n"
+        "NE=\u30CD\n"
+        "NO=\u30CE\n"
+
+        "HA=\u30CF\n"
+        "BA=\u30D0\n"
+        "PA=\u30D1\n"
+        "HI=\u30D2\n"
+        "BI=\u30D3\n"
+        "PI=\u30D4\n"
+        "HU=\u30D5\n"
+        "BU=\u30D6\n"
+        "PU=\u30D7\n"
+        "HE=\u30D8\n"
+        "BE=\u30D9\n"
+        "PE=\u30DA\n"
+        "HO=\u30DB\n"
+        "BO=\u30DC\n"
+        "PO=\u30DD\n"
+
+        "MA=\u30DE\n"
+        "MI=\u30DF\n"
+        "MU=\u30E0\n"
+        "ME=\u30E1\n"
+        "MO=\u30E2\n"
+
+        "XYA=\u30E3\n"
+        "YA=\u30E4\n"
+        "XYU=\u30E5\n"
+        "YU=\u30E6\n"
+        "XYO=\u30E7\n"
+        "YO=\u30E8\n"
+
+        "RA=\u30E9\n"
+        "RI=\u30EA\n"
+        "RU=\u30EB\n"
+        "RE=\u30EC\n"
+        "RO=\u30ED\n"
+
+        "XWA=\u30EE\n"
+        "WA=\u30EF\n"
+        "WI=\u30F0\n"
+        "WE=\u30F1\n"
+        "WO=\u30F2\n"
+
+        "N=\u30F3\n"
+        "VU=\u30F4\n"
+
+        "XKA=\u30F5\n"
+        "XKE=\u30F6\n"
+
+        "VA=\u30F7\n"
+        "VI=\u30F8\n"
+        "VE=\u30F9\n"
+        "VO=\u30FA\n"
+
+        "DOT=\u30FB\n"  // Middle dot
+        "LONG=\u30FC\n" // Prolonged sound mark
+
+        // Categories and programmatic variables
+
+        "vowel=[aiueo]\n"
+        "small=\uE000\n"
+        "hvr=\uE001\n"
+        "hv=[{xya}{xi}{xyu}{xe}{xyo}]\n"
+
+        //------------------------------------------------------------
+        // Rules
+        //------------------------------------------------------------
+        /*
+// Hepburn equivalents
+
+shi>|si
+ji>|zi
+chi>|ti
+// ji>|di // By default we use the ji-zi mapping
+tsu>|tu
+fu>|hu
+
+sh[{vowel}>|sy
+ja>|zya
+// ji = zi
+ju>|zyu
+je>|zye
+jo>|zyo
+cha>|tya
+// chi = ti
+chu>|tyu
+che>|tye
+cho>|tyo
+// j[{vowel} = dy{vowel}, but we use zy{vowel} by default
+
+// Historically, m preceded b, p, or m; now n is used
+// in all cases
+m[b>n
+m[p>n
+m[m>n
+
+// Compatibility
+
+// 'f' group
+fa>{fu}{xa}
+fi>{fu}{xi}
+// fu = hu
+fe>{fu}{xe}
+fo>{fu}{xo}
+
+// 'jy' group; these will not round-trip, except for "jyi"
+// See also the 'j' group.
+jya>|zya
+jyi>{zi}{xyi}
+jyu>|zyu
+jye>|zye
+jyo>|zyo
+
+// Nippon romanized forms
+
+a>{a}
+i>{i}
+u>{u}
+e>{e}
+o>{o}
+ka>{ka}
+ki>{ki}
+ku>{ku}
+ke>{ke}
+ko>{ko}
+ga>{ga}
+gi>{gi}
+gu>{gu}
+ge>{ge}
+go>{go}
+sa>{sa}
+si>{si}
+su>{su}
+se>{se}
+so>{so}
+za>{za}
+zi>{zi}
+zu>{zu}
+ze>{ze}
+zo>{zo}
+ta>{ta}
+ti>{ti}
+tu>{tu}
+te>{te}
+to>{to}
+da>{da}
+di>{di}
+du>{du}
+de>{de}
+do>{do}
+na>{na}
+ni>{ni}
+nu>{nu}
+ne>{ne}
+no>{no}
+ha>{ha}
+hi>{hi}
+hu>{hu}
+he>{he}
+ho>{ho}
+ba>{ba}
+bi>{bi}
+bu>{bu}
+be>{be}
+bo>{bo}
+pa>{pa}
+pi>{pi}
+pu>{pu}
+pe>{pe}
+po>{po}
+ma>{ma}
+mi>{mi}
+mu>{mu}
+me>{me}
+mo>{mo}
+ya>{ya}
+yu>{yu}
+yo>{yo}
+ra>{ra}
+ri>{ri}
+ru>{ru}
+re>{re}
+ro>{ro}
+wa>{wa}
+wi>{wi}
+// No "wu"
+we>{we}
+wo>{wo} // Reverse {wo} to "o", not "wo"
+n''>{n}
+n>{n}
+
+// Palatized Nippon romanized syllables
+
+ky[{vowel}>{ki}|{small}
+gy[{vowel}>{gi}|{small}
+sy[{vowel}>{si}|{small}
+zy[{vowel}>{zi}|{small}
+ty[{vowel}>{ti}|{small}
+dy[{vowel}>{di}|{small}
+ny[{vowel}>{ni}|{small}
+my[{vowel}>{mi}|{small}
+hy[{vowel}>{hi}|{small}
+by[{vowel}>{bi}|{small}
+py[{vowel}>{pi}|{small}
+ry[{vowel}>{ri}|{small}
+
+// Doubled consonants
+
+c[c>{xtu}
+k[k>{xtu}
+g[g>{xtu}
+s[s>{xtu}
+z[z>{xtu}
+j[j>{xtu}
+t[t>{xtu}
+d[d>{xtu}
+h[h>{xtu}
+f[f>{xtu}
+p[p>{xtu}
+b[b>{xtu}
+m[m>{xtu}
+y[y>{xtu}
+r[r>{xtu}
+w[w>{xtu}
+        */
+
+        "a>{a}\n"
+
+        "ba>{ba}\n"
+        "bi>{bi}\n"
+        "bu>{bu}\n"
+        "be>{be}\n"
+        "bo>{bo}\n"
+        "by[{vowel}>{bi}|{small}\n"
+        "b[b>{xtu}\n"
+
+        "da>{da}\n"
+        "di>{di}\n"
+        "du>{du}\n"
+        "de>{de}\n"
+        "do>{do}\n"
+        "dy[{vowel}>{di}|{small}\n"
+        "dh[{vowel}>{de}|{small}\n"
+        "d[d>{xtu}\n"
+
+        "e>{e}\n"
+
+        "fa>{hu}{xa}\n"
+        "fi>{hu}{xi}\n"
+        "fe>{hu}{xe}\n"
+        "fo>{hu}{xo}\n"
+        "fya>{hu}{xya}\n"
+        "fyu>{hu}{xyu}\n"
+        "fyo>{hu}{xyo}\n"
+        "f[f>{xtu}\n"
+
+        "ga>{ga}\n"
+        "gi>{gi}\n"
+        "gu>{gu}\n"
+        "ge>{ge}\n"
+        "go>{go}\n"
+        "gy[{vowel}>{gi}|{small}\n"
+        "gwa>{gu}{xwa}\n"
+        "gwi>{gu}{xi}\n"
+        "gwu>{gu}{xu}\n"
+        "gwe>{gu}{xe}\n"
+        "gwo>{gu}{xo}\n"
+        "g[g>{xtu}\n"
+
+        "ha>{ha}\n"
+        "hi>{hi}\n"
+        "hu>{hu}\n"
+        "he>{he}\n"
+        "ho>{ho}\n"
+        "hy[{vowel}>{hi}|{small}\n"
+        "h[h>{xtu}\n"
+
+        "i>{i}\n"
+
+        "ka>{ka}\n"
+        "ki>{ki}\n"
+        "ku>{ku}\n"
+        "ke>{ke}\n"
+        "ko>{ko}\n"
+        "kwa>{ku}{xwa}\n"
+        "kwi>{ku}{xi}\n"
+        "kwu>{ku}{xu}\n"
+        "kwe>{ku}{xe}\n"
+        "kwo>{ku}{xo}\n"
+        "ky[{vowel}>{ki}|{small}\n"
+        "k[k>{xtu}\n"
+
+        "ma>{ma}\n"
+        "mi>{mi}\n"
+        "mu>{mu}\n"
+        "me>{me}\n"
+        "mo>{mo}\n"
+        "my[{vowel}>{mi}|{small}\n"
+        "m[b>{n}\n"
+        "m[f>{n}\n"
+        "m[m>{n}\n"
+        "m[p>{n}\n"
+        "m[v>{n}\n"
+        "m''>{n}\n"
+
+        "na>{na}\n"
+        "ni>{ni}\n"
+        "nu>{nu}\n"
+        "ne>{ne}\n"
+        "no>{no}\n"
+        "ny[{vowel}>{ni}|{small}\n"
+        "nn>{n}\n"
+        "n''>{n}\n"
+        "n>{n}\n"
+
+        "o>{o}\n"
+
+        "pa>{pa}\n"
+        "pi>{pi}\n"
+        "pu>{pu}\n"
+        "pe>{pe}\n"
+        "po>{po}\n"
+        "py[{vowel}>{pi}|{small}\n"
+        "p[p>{xtu}\n"
+
+        "qa>{ku}{xa}\n"
+        "qi>{ku}{xi}\n"
+        "qu>{ku}{xu}\n"
+        "qe>{ku}{xe}\n"
+        "qo>{ku}{xo}\n"
+        "qy[{vowel}>{ku}|{small}\n"
+        "q[q>{xtu}\n"
+
+        "ra>{ra}\n"
+        "ri>{ri}\n"
+        "ru>{ru}\n"
+        "re>{re}\n"
+        "ro>{ro}\n"
+        "ry[{vowel}>{ri}|{small}\n"
+        "r[r>{xtu}\n"
+
+        "sa>{sa}\n"
+        "si>{si}\n"
+        "su>{su}\n"
+        "se>{se}\n"
+        "so>{so}\n"
+        "sy[{vowel}>{si}|{small}\n"
+        "s[sh>{xtu}\n"
+        "s[s>{xtu}\n"
+
+        "ta>{ta}\n"
+        "ti>{ti}\n"
+        "tu>{tu}\n"
+        "te>{te}\n"
+        "to>{to}\n"
+        "th[{vowel}>{te}|{small}\n"
+        "tsa>{tu}{xa}\n"
+        "tsi>{tu}{xi}\n"
+        "tse>{tu}{xe}\n"
+        "tso>{tu}{xo}\n"
+        "ty[{vowel}>{ti}|{small}\n"
+        "t[ts>{xtu}\n"
+        "t[ch>{xtu}\n"
+        "t[t>{xtu}\n"
+
+        "u>{u}\n"
+
+        "va>{VA}\n"
+        "vi>{VI}\n"
+        "vu>{vu}\n"
+        "ve>{VE}\n"
+        "vo>{VO}\n"
+        "vy[{vowel}>{VI}|{small}\n"
+        "v[v>{xtu}\n"
+
+        "wa>{wa}\n"
+        "wi>{wi}\n"
+        "we>{we}\n"
+        "wo>{wo}\n"
+        "w[w>{xtu}\n"
+
+        "ya>{ya}\n"
+        "yu>{yu}\n"
+        "ye>{i}{xe}\n"
+        "yo>{yo}\n"
+        "y[y>{xtu}\n"
+
+        "za>{za}\n"
+        "zi>{zi}\n"
+        "zu>{zu}\n"
+        "ze>{ze}\n"
+        "zo>{zo}\n"
+        "zy[{vowel}>{zi}|{small}\n"
+        "z[z>{xtu}\n"
+
+        "xa>{xa}\n"
+        "xi>{xi}\n"
+        "xu>{xu}\n"
+        "xe>{xe}\n"
+        "xo>{xo}\n"
+        "xka>{XKA}\n"
+        "xke>{XKE}\n"
+        "xtu>{xtu}\n"
+        "xwa>{xwa}\n"
+        "xya>{xya}\n"
+        "xyu>{xyu}\n"
+        "xyo>{xyo}\n"
+
+        // optional mappings
+        "wu>{u}\n"
+
+        "ca>{ka}\n"
+        "ci>{si}\n"
+        "cu>{ku}\n"
+        "ce>{se}\n"
+        "co>{ko}\n"
+        "cha>{ti}{xya}\n"
+        "chi>{ti}\n"
+        "chu>{ti}{xyu}\n"
+        "che>{ti}{xe}\n"
+        "cho>{ti}{xyo}\n"
+        "cy[{vowel}>{ti}|{small}\n"
+        "c[k>{xtu}\n"
+        "c[c>{xtu}\n"
+
+        "fu>{hu}\n"
+
+        "ja>{zi}{xya}\n"
+        "ji>{zi}\n"
+        "ju>{zi}{xyu}\n"
+        "je>{zi}{xe}\n"
+        "jo>{zi}{xyo}\n"
+        "jy[{vowel}>{zi}|{small}\n"
+        "j[j>{xtu}\n"
+
+        "la>{ra}\n"
+        "li>{ri}\n"
+        "lu>{ru}\n"
+        "le>{re}\n"
+        "lo>{ro}\n"
+        "ly[{vowel}>{ri}|{small}\n"
+        "l[l>{xtu}\n"
+
+        "sha>{si}{xya}\n"
+        "shi>{si}\n"
+        "shu>{si}{xyu}\n"
+        "she>{si}{xe}\n"
+        "sho>{si}{xyo}\n"
+
+        "tsu>{tu}\n"
+
+        "yi>{i}\n"
+
+        "xtsu>{xtu}\n"
+        "xyi>{xi}\n"
+        "xye>{xe}\n"
+
+
+
+
+
+
+
+        // Convert vowels to small form
+        "{small}a>{xya}\n"
+        "{small}i>{xi}\n"
+        "{small}u>{xyu}\n"
+        "{small}e>{xe}\n"
+        "{small}o>{xyo}\n"
+
+
+
+
+        "gy|{hvr}<{gi}[{hv}\n"
+        "gwa<{gu}{xwa}\n"
+        "gwi<{gu}{xi}\n"
+        "gwu<{gu}{xu}\n"
+        "gwe<{gu}{xe}\n"
+        "gwo<{gu}{xo}\n"
+        "ga<{ga}\n"
+        "gi<{gi}\n"
+        "gu<{gu}\n"
+        "ge<{ge}\n"
+        "go<{go}\n"
+
+        "ky|{hvr}<{ki}[{hv}\n"
+        "kwa<{ku}{xwa}\n"
+        "kwi<{ku}{xi}\n"
+        "kwu<{ku}{xu}\n"
+        "kwe<{ku}{xe}\n"
+        "kwo<{ku}{xo}\n"
+        "qa<{ku}{xa}\n"
+        "qya<{ku}{xya}\n"
+        "qyu<{ku}{xyu}\n"
+        "qyo<{ku}{xyo}\n"
+        "ka<{ka}\n"
+        "ki<{ki}\n"
+        "ku<{ku}\n"
+        "ke<{ke}\n"
+        "ko<{ko}\n"
+
+        "j|{hvr}<{zi}[{hv}\n" // Hepburn
+        "za<{za}\n"
+        "ji<{zi}\n" // Hepburn
+        "zu<{zu}\n"
+        "ze<{ze}\n"
+        "zo<{zo}\n"
+
+        "sh|{hvr}<{si}[{hv}\n" // Hepburn
+        "sa<{sa}\n"
+        "shi<{si}\n"
+        "su<{su}\n"
+        "se<{se}\n"
+        "so<{so}\n"
+
+        "j|{hvr}<{di}[{hv}\n" // Hepburn
+        "dh|{hvr}<{de}[{hv}\n" 
+        "da<{da}\n"
+        "ji<{di}\n" // Hepburn
+        "de<{de}\n"
+        "do<{do}\n"
+        "zu<{du}\n" // Hepburn
+
+        "ch|{hvr}<{ti}[{hv}\n" // Hepburn
+        "tsa<{tu}{xa}\n"
+        "tsi<{tu}{xi}\n"
+        "tse<{tu}{xe}\n"
+        "tso<{tu}{xo}\n"
+        "th|{hvr}<{te}[{hv}\n"
+        "ta<{ta}\n"
+        "chi<{ti}\n" // Hepburn
+        "tsu<{tu}\n" // Hepburn
+        "te<{te}\n"
+        "to<{to}\n"
+
+        "ny|{hvr}<{ni}[{hv}\n"
+        "na<{na}\n"
+        "ni<{ni}\n"
+        "nu<{nu}\n"
+        "ne<{ne}\n"
+        "no<{no}\n"
+
+        "by|{hvr}<{bi}[{hv}\n"
+        "ba<{ba}\n"
+        "bi<{bi}\n"
+        "bu<{bu}\n"
+        "be<{be}\n"
+        "bo<{bo}\n"
+
+        "py|{hvr}<{pi}[{hv}\n"
+        "pa<{pa}\n"
+        "pi<{pi}\n"
+        "pu<{pu}\n"
+        "pe<{pe}\n"
+        "po<{po}\n"
+
+        "hy|{hvr}<{hi}[{hv}\n"
+        "fa<{hu}{xa}\n"
+        "fi<{hu}{xi}\n"
+        "fe<{hu}{xe}\n"
+        "fo<{hu}{xo}\n"
+        "fya<{hu}{xya}\n"
+        "fyu<{hu}{xyu}\n"
+        "fyo<{hu}{xyo}\n"
+        "ha<{ha}\n"
+        "hi<{hi}\n"
+        "fu<{hu}\n" // Hepburn
+        "he<{he}\n"
+        "ho<{ho}\n"
+
+        "my|{hvr}<{mi}[{hv}\n"
+        "ma<{ma}\n"
+        "mi<{mi}\n"
+        "mu<{mu}\n"
+        "me<{me}\n"
+        "mo<{mo}\n"
+
+        "ya<{ya}\n"
+        "yu<{yu}\n"
+        "ye<{i}{xe}\n"
+        "yo<{yo}\n"
+        "xya<{xya}\n"
+        "xyu<{xyu}\n"
+        "xyo<{xyo}\n"
+
+        "ry|{hvr}<{ri}[{hv}\n"
+        "ra<{ra}\n"
+        "ri<{ri}\n"
+        "ru<{ru}\n"
+        "re<{re}\n"
+        "ro<{ro}\n"
+
+        "wa<{wa}\n"
+        "wi<{wi}\n"
+        "we<{we}\n"
+        "wo<{wo}\n"
+
+        "vu<{vu}\n"
+        "vy|{hvr}<{VI}[{hv}\n"
+        "v<{xtu}[{vu}\n"
+
+        "xa<{xa}\n"
+        "xi<{xi}\n"
+        "xu<{xu}\n"
+        "xe<{xe}\n"
+        "xo<{xo}\n"
+
+        "n''<{n}[{a}\n"
+        "n''<{n}[{i}\n"
+        "n''<{n}[{u}\n"
+        "n''<{n}[{e}\n"
+        "n''<{n}[{o}\n"
+        "n''<{n}[{na}\n"
+        "n''<{n}[{ni}\n"
+        "n''<{n}[{nu}\n"
+        "n''<{n}[{ne}\n"
+        "n''<{n}[{no}\n"
+        "n''<{n}[{ya}\n"
+        "n''<{n}[{yu}\n"
+        "n''<{n}[{yo}\n"
+        "n''<{n}[{n}\n"
+        "n<{n}\n"
+
+
+        "g<{xtu}[{ga}\n"
+        "g<{xtu}[{gi}\n"
+        "g<{xtu}[{gu}\n"
+        "g<{xtu}[{ge}\n"
+        "g<{xtu}[{go}\n"
+        "k<{xtu}[{ka}\n"
+        "k<{xtu}[{ki}\n"
+        "k<{xtu}[{ku}\n"
+        "k<{xtu}[{ke}\n"
+        "k<{xtu}[{ko}\n"
+
+        "z<{xtu}[{za}\n"
+        "z<{xtu}[{zi}\n"
+        "z<{xtu}[{zu}\n"
+        "z<{xtu}[{ze}\n"
+        "z<{xtu}[{zo}\n"
+        "s<{xtu}[{sa}\n"
+        "s<{xtu}[{si}\n"
+        "s<{xtu}[{su}\n"
+        "s<{xtu}[{se}\n"
+        "s<{xtu}[{so}\n"
+
+        "d<{xtu}[{da}\n"
+        "d<{xtu}[{di}\n"
+        "d<{xtu}[{du}\n"
+        "d<{xtu}[{de}\n"
+        "d<{xtu}[{do}\n"
+        "t<{xtu}[{ta}\n"
+        "t<{xtu}[{ti}\n"
+        "t<{xtu}[{tu}\n"
+        "t<{xtu}[{te}\n"
+        "t<{xtu}[{to}\n"
+
+
+        "b<{xtu}[{ba}\n"
+        "b<{xtu}[{bi}\n"
+        "b<{xtu}[{bu}\n"
+        "b<{xtu}[{be}\n"
+        "b<{xtu}[{bo}\n"
+        "p<{xtu}[{pa}\n"
+        "p<{xtu}[{pi}\n"
+        "p<{xtu}[{pu}\n"
+        "p<{xtu}[{pe}\n"
+        "p<{xtu}[{po}\n"
+        "h<{xtu}[{ha}\n"
+        "h<{xtu}[{hi}\n"
+        "h<{xtu}[{hu}\n"
+        "h<{xtu}[{he}\n"
+        "h<{xtu}[{ho}\n"
+
+
+        "r<{xtu}[{ra}\n"
+        "r<{xtu}[{ri}\n"
+        "r<{xtu}[{ru}\n"
+        "r<{xtu}[{re}\n"
+        "r<{xtu}[{ro}\n"
+
+        "w<{xtu}[{wa}\n"
+        "xtu<{xtu}\n"
+
+        "a<{a}\n"
+        "i<{i}\n"
+        "u<{u}\n"
+        "e<{e}\n"
+        "o<{o}\n"
+
+
+
+        // Convert small forms to vowels
+        "a<{hvr}{xya}\n"
+        "i<{hvr}{xi}\n"
+        "u<{hvr}{xyu}\n"
+        "e<{hvr}{xe}\n"
+        "o<{hvr}{xyo}\n"              
+    }
+}
+
+
+
--- a/icu4c/data/translit/lrussian.txt
+++ b/icu4c/data/translit/lrussian.txt
@ -0,0 +1,315 @@
+//--------------------------------------------------------------------
+//  Copyright (C) 1999, International Business Machines
+//  Corporation and others.  All Rights Reserved.
+//--------------------------------------------------------------------
+//  Date        Name        Description
+//  11/17/99    aliu        Creation.
+//--------------------------------------------------------------------
+
+// Latin-Russion
+
+lrussian {
+    Rule {
+        // Russian Letters
+
+        "cyA=\u0410\n"
+        "cyBe=\u0411\n"
+        "cyVe=\u0412\n"
+        "cyGe=\u0413\n"
+        "cyDe=\u0414\n"
+        "cyYe=\u0415\n"
+        "cyYo=\u0416\n"
+        "cyZhe=\u0417\n"
+        "cyZe=\u0418\n"
+        "cyYi=\u0419\n"
+        "cyY=\u0419\n"
+        "cyKe=\u041a\n"
+        "cyLe=\u041b\n"
+        "cyMe=\u041c\n"
+        "cyNe=\u041d\n"
+        "cyO=\u041e\n"
+        "cyPe=\u041f\n"
+
+        "cyRe=\u0420\n"
+        "cySe=\u0421\n"
+        "cyTe=\u0422\n"
+        "cyU=\u0423\n"
+        "cyFe=\u0424\n"
+        "cyKhe=\u0425\n"
+        "cyTse=\u0426\n"
+        "cyChe=\u0427\n"
+        "cyShe=\u0428\n"
+        "cyShche=\u0429\n"
+        "cyHard=\u042a\n"
+        "cyI=\u042b\n"
+        "cySoft=\u042c\n"
+        "cyE=\u042d\n"
+        "cyYu=\u042e\n"
+        "cyYa=\u042f\n"
+
+        "cya=\u0430\n"
+        "cybe=\u0431\n"
+        "cyve=\u0432\n"
+        "cyge=\u0433\n"
+        "cyde=\u0434\n"
+        "cyye=\u0435\n"
+        "cyzhe=\u0436\n"
+        "cyze=\u0437\n"
+        "cyyi=\u0438\n"
+        "cyy=\u0439\n"
+        "cyke=\u043a\n"
+        "cyle=\u043b\n"
+        "cyme=\u043c\n"
+        "cyne=\u043d\n"
+        "cyo=\u043e\n"
+        "cype=\u043f\n"
+
+        "cyre=\u0440\n"
+        "cyse=\u0441\n"
+        "cyte=\u0442\n"
+        "cyu=\u0443\n"
+        "cyfe=\u0444\n"
+        "cykhe=\u0445\n"
+        "cytse=\u0446\n"
+        "cyche=\u0447\n"
+        "cyshe=\u0448\n"
+        "cyshche=\u0449\n"
+        "cyhard=\u044a\n"
+        "cyi=\u044b\n"
+        "cysoft=\u044c\n"
+        "cye=\u044d\n"
+        "cyyu=\u044e\n"
+        "cyya=\u044f\n"
+
+        "cyyo=\u0451\n"
+
+        // convert English to Russian
+        "Russian>\u041f\u0420\u0410\u0412\u0414\u0410\u00D1\u0020\u0411\u044d\u043b\u0430\u0440\u0443\u0441\u043a\u0430\u044f\u002c\u0020\u043a\u044b\u0440\u0433\u044b\u0437\u002c\u0020\u041c\u043e\u043b\u0434\u043e\u0432\u044d\u043d\u044f\u0441\u043a\u044d\u002e\n"
+
+        //special equivs for ay, oy, ...
+        "YAI>{cyYa}{cyY}\n"
+        "YEI>{cyYe}{cyY}\n"
+        "YII>{cyYi}{cyY}\n"
+        "YOI>{cyYo}{cyY}\n"
+        "YUI>{cyYu}{cyY}\n"
+        "AI>{cyA}{cyY}\n"
+        "EI>{cyE}{cyY}\n"
+        //skip II, since it is the soft sign
+        "OI>{cyO}{cyY}\n"
+        "UI>{cyU}{cyY}\n"
+
+        "A>{cyA}\n"
+        "B>{cyBe}\n"
+        "CH>{cyChe}\n"
+        "C[I>{cySe}\n"
+        "C[E>{cySe}\n"
+        "C[Y>{cySe}\n"
+        "C>{cyKe}\n"
+        "D>{cyDe}\n"
+        "E>{cyE}\n"
+        "F>{cyFe}\n"
+        "G>{cyGe}\n"
+        "H>{cyHard}\n"
+        "II>{cySoft}\n"
+        "I>{cyI}\n"
+        "J>{cyDe}{cyZhe}\n"
+        "KH>{cyKhe}\n"
+        "K>{cyKe}\n"
+        "L>{cyLe}\n"
+        "M>{cyMe}\n"
+        "N>{cyNe}\n"
+        "O>{cyO}\n"
+        "P>{cyPe}\n"
+        "QU>{cyKe}{cyVe}\n"
+        "R>{cyRe}\n"
+        "SHTCH>{cyShche}\n"
+        "SHCH>{cyShche}\n"
+        "SH>{cyShe}\n"
+        "S>{cySe}\n"
+        "TCH>{cyChe}\n"
+        "TH>{cyZe}\n"
+        "TS>{cyTse}\n"
+        "T>{cyTe}\n"
+        "U>{cyU}\n"
+        "V>{cyVe}\n"
+        "WH>{cyVe}\n"
+        "W>{cyVe}\n"
+        "X>{cyKe}{cySe}\n"
+        "YE>{cyYe}\n"
+        "YO>{cyYo}\n"
+        "YU>{cyYu}\n"
+        "YA>{cyYa}\n"
+        "YI>{cyYi}\n"
+        "Y>{cyY}\n"
+        "ZH>{cyZhe}\n"
+        "Z>{cyZe}\n"
+        "X>{cyKe}{cySe}\n"
+
+        //lower case: doesn''t solve join bug
+        "yai>{cyya}{cyy}\n"
+        "yei>{cyye}{cyy}\n"
+        "yii>{cyyi}{cyy}\n"
+        "yoi>{cyyo}{cyy}\n"
+        "yui>{cyyu}{cyy}\n"
+        "ai>{cya}{cyy}\n"
+        "ei>{cye}{cyy}\n"
+        //skip ii, since it is the soft sign
+        "oi>{cyo}{cyy}\n"
+        "ui>{cyu}{cyy}\n"
+
+        "a>{cya}\n"
+        "b>{cybe}\n"
+        "ch>{cyche}\n"
+        "c[i>{cyse}\n"
+        "c[e>{cyse}\n"
+        "c[y>{cyse}\n"
+        "c>{cyke}\n"
+        "d>{cyde}\n"
+        "e>{cye}\n"
+        "f>{cyfe}\n"
+        "g>{cyge}\n"
+        "h>{cyhard}\n"
+        "ii>{cysoft}\n"
+        "i>{cyi}\n"
+        "j>{cyde}{cyzhe}\n"
+        "kh>{cykhe}\n"
+        "k>{cyke}\n"
+        "l>{cyle}\n"
+        "m>{cyme}\n"
+        "n>{cyne}\n"
+        "o>{cyo}\n"
+        "p>{cype}\n"
+        "qu>{cyke}{cyve}\n"
+        "r>{cyre}\n"
+        "shtch>{cyshche}\n"
+        "shch>{cyshche}\n"
+        "sh>{cyshe}\n"
+        "s>{cyse}\n"
+        "tch>{cyche}\n"
+        "th>{cyze}\n"
+        "ts>{cytse}\n"
+        "t>{cyte}\n"
+        "u>{cyu}\n"
+        "v>{cyve}\n"
+        "wh>{cyve}\n"
+        "w>{cyve}\n"
+        "x>{cyke}{cyse}\n"
+        "ye>{cyye}\n"
+        "yo>{cyyo}\n"
+        "yu>{cyyu}\n"
+        "ya>{cyya}\n"
+        "yi>{cyyi}\n"
+        "y>{cyy}\n"
+        "zh>{cyzhe}\n"
+        "z>{cyze}\n"
+        "x>{cyke}{cyse}\n"
+
+        //generally the last rule
+        "''>\n"
+
+        //now Russian to English
+
+        "Y''<{cyY}[{cyA}\n"
+        "Y''<{cyY}[{cyE}\n"
+        "Y''<{cyY}[{cyI}\n"
+        "Y''<{cyY}[{cyO}\n"
+        "Y''<{cyY}[{cyU}\n"
+        "A<{cyA}\n"
+        "B<{cyBe}\n"
+        "J<{cyDe}{cyZhe}\n"
+        "D<{cyDe}\n"
+        "V<{cyVe}\n"
+        "G<{cyGe}\n"
+        "ZH<{cyZhe}\n"
+        "Z''<{cyZe}[{cyHard}\n"
+        "Z<{cyZe}\n"
+        "YE<{cyYe}\n"
+        "YO<{cyYo}\n"
+        "YU<{cyYu}\n"
+        "YA<{cyYa}\n"
+        "YI<{cyYi}\n"
+        "Y<{cyY}\n"
+        "KH<{cyKhe}\n"
+        "K''<{cyKe}[{cyHard}\n"
+        "X<{cyKe}{cySe}\n"
+        "K<{cyKe}\n"
+        "L<{cyLe}\n"
+        "M<{cyMe}\n"
+        "N<{cyNe}\n"
+        "O<{cyO}\n"
+        "P<{cyPe}\n"
+
+        "R<{cyRe}\n"
+        "SHCH<{cyShche}\n"
+        "SH''<{cyShe}[{cyChe}\n"
+        "SH<{cyShe}\n"
+        "S''<{cySe}[{cyHard}\n"
+        "S<{cySe}\n"
+        "TS<{cyTse}\n"
+        "T''<{cyTe}[{cySe}\n"
+        "T''<{cyTe}[{cyHard}\n"
+        "T<{cyTe}\n"
+        "U<{cyU}\n"
+        "F<{cyFe}\n"
+        "CH<{cyChe}\n"
+        "H<{cyHard}\n"
+        "I''<{cyI}[{cyI}\n"
+        "I<{cyI}\n"
+        "II<{cySoft}\n"
+        "E<{cyE}\n"
+
+        //lowercase
+        "y''<{cyy}[{cya}\n"
+        "y''<{cyy}[{cye}\n"
+        "y''<{cyy}[{cyi}\n"
+        "y''<{cyy}[{cyo}\n"
+        "y''<{cyy}[{cyu}\n"
+        "a<{cya}\n"
+        "b<{cybe}\n"
+        "j<{cyde}{cyzhe}\n"
+        "d<{cyde}\n"
+        "v<{cyve}\n"
+        "g<{cyge}\n"
+        "zh<{cyzhe}\n"
+        "z''<{cyze}[{cyhard}\n"
+        "z<{cyze}\n"
+        "ye<{cyye}\n"
+        "yo<{cyyo}\n"
+        "yu<{cyyu}\n"
+        "ya<{cyya}\n"
+        "yi<{cyyi}\n"
+        "y<{cyy}\n"
+        "kh<{cykhe}\n"
+        "k''<{cyke}[{cyhard}\n"
+        "x<{cyke}{cyse}\n"
+        "k<{cyke}\n"
+        "l<{cyle}\n"
+        "m<{cyme}\n"
+        "n<{cyne}\n"
+        "o<{cyo}\n"
+        "p<{cype}\n"
+
+        "r<{cyre}\n"
+        "shch<{cyshche}\n"
+        "sh''<{cyshe}[{cyche}\n"
+        "sh<{cyshe}\n"
+        "s''<{cyse}[{cyhard}\n"
+        "s<{cyse}\n"
+        "ts<{cytse}\n"
+        "t''<{cyte}[{cyse}\n"
+        "t''<{cyte}[{cyhard}\n"
+        "t<{cyte}\n"
+        "u<{cyu}\n"
+        "f<{cyfe}\n"
+        "ch<{cyche}\n"
+        "h<{cyhard}\n"
+        "i''<{cyi}[{cyi}\n"
+        "i<{cyi}\n"
+        "ii<{cysoft}\n"
+        "e<{cye}\n"
+
+        //generally the last rule
+        "''>\n"
+        //the end
+    }
+}
--- a/icu4c/data/translit/quotes.txt
+++ b/icu4c/data/translit/quotes.txt
@ -0,0 +1,83 @@
+//--------------------------------------------------------------------
+//  Copyright (C) 1999, International Business Machines
+//  Corporation and others.  All Rights Reserved.
+//--------------------------------------------------------------------
+//  Date        Name        Description
+//  11/17/99    aliu        Creation.
+//--------------------------------------------------------------------
+
+// StraightQuotes-CurlyQuotes
+
+quotes {
+    Rule {
+        // Rewritten using character codes [LIU]
+        "white=[[:Zs:][:Zl:][:Zp:]]\n"
+        "black=[^[:Zs:][:Zl:][:Zp:]]\n"
+        "open=[[:Ps:]]\n"
+        "dquote=\"\n"
+
+        "lAng=\u3008\n"
+        "ldAng=\u300A\n"
+        "lBrk='['\n"
+        "lBrc='{'\n"
+
+        "lquote=\u2018\n"
+        "rquote=\u2019\n"
+        "ldquote=\u201C\n"
+        "rdquote=\u201D\n"
+
+        "ldguill=\u00AB\n"
+        "rdguill=\u00BB\n"
+        "lguill=\u2039\n"
+        "rguill=\u203A\n"
+
+        "mdash=\u2014\n"
+
+        //#######################################
+        // Conversions from input
+        //#######################################
+
+        // join single quotes
+        "{lquote}''>{ldquote}\n"
+        "{lquote}{lquote}>{ldquote}\n"
+        "{rquote}''>{rdquote}\n"
+        "{rquote}{rquote}>{rdquote}\n"
+
+        //smart single quotes
+        "{white}]''>{lquote}\n"
+        "{open}]''>{lquote}\n"
+        "{black}]''>{rquote}\n"
+        "''>{lquote}\n"
+
+        //smart doubles
+        "{white}]{dquote}>{ldquote}\n"
+        "{open}]{dquote}>{ldquote}\n"
+        "{black}]{dquote}>{rdquote}\n"
+        "{dquote}>{ldquote}\n"
+
+        // join single guillemets
+        "{rguill}{rguill}>{rdguill}\n"
+        "'>>'>{rdguill}\n"
+        "{lguill}{lguill}>{ldguill}\n"
+        "'<<'>{ldguill}\n"
+
+        // prevent double spaces
+        " ] >\n"
+
+        // join hyphens into dash
+        "-->{mdash}\n"
+
+        //#######################################
+        // Conversions back to input
+        //#######################################
+
+        //smart quotes
+        "''<{lquote}\n"
+        "''<{rquote}\n"
+        "{dquote}<{ldquote}\n"
+        "{dquote}<{rdquote}\n"
+
+        //hyphens
+        "--<{mdash}\n"
+    }
+}
--- a/icu4c/data/translit/ucname.txt
+++ b/icu4c/data/translit/ucname.txt
--- a/icu4c/source/i18n/cpdtrans.cpp
+++ b/icu4c/source/i18n/cpdtrans.cpp
@ -0,0 +1,277 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#include "cpdtrans.h"
+#include "unifilt.h"
+#include "unifltlg.h"
+
+CompoundTransliterator::CompoundTransliterator(const UnicodeString& ID,
+                           UnicodeFilter* adoptedFilter) :
+    Transliterator(ID,adoptedFilter),
+    trans(0), count(0) {
+}
+
+/**
+ * Constructs a new compound transliterator given an array of
+ * transliterators.  The array of transliterators may be of any
+ * length, including zero or one, however, useful compound
+ * transliterators have at least two components.
+ * @param transliterators array of <code>Transliterator</code>
+ * objects
+ * @param filter the filter.  Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator.  If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ */
+CompoundTransliterator::CompoundTransliterator(const UnicodeString& ID,
+                           Transliterator* const transliterators[],
+                           int32_t transCount,
+                           UnicodeFilter* adoptedFilter) :
+    Transliterator(ID,adoptedFilter),
+    trans(0), count(0) {
+    setTransliterators(transliterators, transCount);
+}
+
+/**
+ * Copy constructor.
+ */
+CompoundTransliterator::CompoundTransliterator(const CompoundTransliterator& t) :
+    Transliterator(t), trans(0), count(0) {
+    *this = t;
+}
+
+/**
+ * Destructor
+ */
+CompoundTransliterator::~CompoundTransliterator() {
+    freeTransliterators();
+}
+
+void CompoundTransliterator::freeTransliterators() {
+    for (int32_t i=0; i<count; ++i) {
+        delete trans[i];
+    }
+    delete[] trans;    
+    trans = 0;
+    count = 0;
+}
+
+/**
+ * Assignment operator.
+ */
+CompoundTransliterator& CompoundTransliterator::operator=(
+                                             const CompoundTransliterator& t) {
+    Transliterator::operator=(t);
+    int32_t i;
+    for (i=0; i<count; ++i) {
+        delete trans[i];
+        trans[i] = 0;
+    }
+    if (t.count > count) {
+        delete[] trans;
+        trans = new Transliterator*[t.count];
+    }
+    count = t.count;
+    for (i=0; i<count; ++i) {
+        trans[i] = t.trans[i]->clone();
+    }
+    return *this;
+}
+
+/**
+ * Transliterator API.
+ */
+Transliterator* CompoundTransliterator::clone() const {
+    return new CompoundTransliterator(*this);
+}
+
+/**
+ * Returns the number of transliterators in this chain.
+ * @return number of transliterators in this chain.
+ */
+int32_t CompoundTransliterator::getCount() const {
+    return count;
+}
+
+/**
+ * Returns the transliterator at the given index in this chain.
+ * @param index index into chain, from 0 to <code>getCount() - 1</code>
+ * @return transliterator at the given index
+ */
+const Transliterator& CompoundTransliterator::getTransliterator(int32_t index) const {
+    return *trans[index];
+}
+
+
+void CompoundTransliterator::setTransliterators(Transliterator* const transliterators[],
+                                                int32_t transCount) {
+    Transliterator** a = new Transliterator*[transCount];
+    for (int32_t i=0; i<transCount; ++i) {
+        a[i] = transliterators[i]->clone();
+    }
+    adoptTransliterators(a, transCount);
+}
+
+void CompoundTransliterator::adoptTransliterators(Transliterator* adoptedTransliterators[],
+                                                  int32_t transCount) {
+    freeTransliterators();
+    trans = adoptedTransliterators;
+    count = transCount;
+}
+
+/**
+ * Transliterates a segment of a string.  <code>Transliterator</code> API.
+ * @param text the string to be transliterated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @return the new limit index
+ */
+int32_t CompoundTransliterator::transliterate(Replaceable& text,
+                                              int32_t start, int32_t limit) const {
+    for (int32_t i=0; i<count; ++i) {
+        limit = trans[i]->transliterate(text, start, limit);
+    }
+    return limit;
+}
+
+/**
+ * Implements {@link Transliterator#handleKeyboardTransliterate}.
+ */
+void CompoundTransliterator::handleKeyboardTransliterate(Replaceable& text,
+                                                         int32_t index[3]) const {
+    /* Call each transliterator with the same start value and
+     * initial cursor index, but with the limit index as modified
+     * by preceding transliterators.  The cursor index must be
+     * reset for each transliterator to give each a chance to
+     * transliterate the text.  The initial cursor index is known
+     * to still point to the same place after each transliterator
+     * is called because each transliterator will not change the
+     * text between start and the initial value of cursor.
+     *
+     * IMPORTANT: After the first transliterator, each subsequent
+     * transliterator only gets to transliterate text committed by
+     * preceding transliterators; that is, the cursor (output
+     * value) of transliterator i becomes the limit (input value)
+     * of transliterator i+1.  Finally, the overall limit is fixed
+     * up before we return.
+     *
+     * Assumptions we make here:
+     * (1) start <= cursor <= limit    ;cursor valid on entry
+     * (2) cursor <= cursor' <= limit' ;cursor doesn't move back
+     * (3) cursor <= limit'            ;text before cursor unchanged
+     * - cursor' is the value of cursor after calling handleKT
+     * - limit' is the value of limit after calling handleKT
+     */
+
+    /**
+     * Example: 3 transliterators.  This example illustrates the
+     * mechanics we need to implement.  S, C, and L are the start,
+     * cursor, and limit.  gl is the globalLimit.
+     *
+     * 1. h-u, changes hex to Unicode
+     *
+     *    4  7  a  d  0      4  7  a
+     *    abc/u0061/u    =>  abca/u    
+     *    S  C       L       S   C L   gl=f->a
+     *
+     * 2. upup, changes "x" to "XX"
+     *
+     *    4  7  a       4  7  a
+     *    abca/u    =>  abcAA/u    
+     *    S  CL         S    C   
+     *                       L    gl=a->b
+     * 3. u-h, changes Unicode to hex
+     *
+     *    4  7  a        4  7  a  d  0  3
+     *    abcAA/u    =>  abc/u0041/u0041/u    
+     *    S  C L         S              C
+     *                                  L   gl=b->15
+     * 4. return
+     *
+     *    4  7  a  d  0  3
+     *    abc/u0041/u0041/u    
+     *    S C L
+     */
+
+    if (count < 1) {
+        return; // Short circuit for empty compound transliterators
+    }
+
+    /**
+     * One more wrinkle.  If there is a filter F for the compound
+     * transliterator as a whole, then we need to modify every
+     * non-null filter f in the chain to be f' = F & f.  Then,
+     * when we're done, we restore the original filters.
+     *
+     * A possible future optimization is to change f to f' at
+     * construction time, but then if anyone else is using the
+     * transliterators in the chain outside of this context, they
+     * will get unexpected results.
+     */
+    const UnicodeFilter* F = getFilter();
+    UnicodeFilter** f = 0;
+    if (F != 0) {
+        f = new UnicodeFilter*[count];
+        for (int32_t i=0; i<count; ++i) {
+            f[i] = trans[i]->getFilter()->clone();
+            trans[i]->adoptFilter(UnicodeFilterLogic::createAnd(*F, *f[i]));
+        }
+    }
+
+    int32_t cursor = index[CURSOR];
+    int32_t limit = index[LIMIT];
+    int32_t globalLimit = limit;
+    /* globalLimit is the overall limit.  We keep track of this
+     * since we overwrite index[LIMIT] with the previous
+     * index[CURSOR].  After each transliteration, we update
+     * globalLimit for insertions or deletions that have happened.
+     */
+    
+    for (int32_t i=0; i<count; ++i) {
+        index[CURSOR] = cursor; // Reset cursor
+        index[LIMIT] = limit;
+        
+        trans[i]->handleKeyboardTransliterate(text, index);
+        
+        // Adjust overall limit for insertions/deletions
+        globalLimit += index[LIMIT] - limit;
+        limit = index[CURSOR]; // Move limit to end of committed text
+    }
+    // Cursor is good where it is -- where the last
+    // transliterator left it.  Limit needs to be put back
+    // where it was, modulo adjustments for deletions/insertions.
+    index[LIMIT] = globalLimit;
+    
+    // Fixup the transliterator filters, if we had to modify them.
+    if (f != 0) {
+        for (int32_t i=0; i<count; ++i) {
+            trans[i]->adoptFilter(f[i]);
+        }
+        delete[] f;
+    }
+}
+
+/**
+ * Returns the length of the longest context required by this transliterator.
+ * This is <em>preceding</em> context.
+ * @return maximum number of preceding context characters this
+ * transliterator needs to examine
+ */
+int32_t CompoundTransliterator::getMaximumContextLength() const {
+    int32_t max = 0;
+    for (int32_t i=0; i<count; ++i) {
+        int32_t len = trans[i]->getMaximumContextLength();
+        if (len > max) {
+            max = len;
+        }
+    }
+    return max;
+}
--- a/icu4c/source/i18n/cpdtrans.h
+++ b/icu4c/source/i18n/cpdtrans.h
@ -0,0 +1,133 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#ifndef CPDTRANS_H
+#define CPDTRANS_H
+
+#include "translit.h"
+
+/**
+ * A transliterator that is composed of two or more other
+ * transliterator objects linked together.  For example, if one
+ * transliterator transliterates from script A to script B, and
+ * another transliterates from script B to script C, the two may be
+ * combined to form a new transliterator from A to C.
+ *
+ * <p>Composed transliterators may not behave as expected.  For
+ * example, inverses may not combine to form the identity
+ * transliterator.  See the class documentation for {@link
+ * Transliterator} for details.
+ *
+ * <p>If a non-<tt>null</tt> <tt>UnicodeFilter</tt> is applied to a
+ * <tt>CompoundTransliterator</tt>, it has the effect of being
+ * logically <b>and</b>ed with the filter of each transliterator in
+ * the chain.
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: cpdtrans.h,v $ $Revision: 1.1 $ $Date: 1999/11/20 00:36:43 $
+ */
+class U_I18N_API CompoundTransliterator : public Transliterator {
+
+    Transliterator** trans;
+
+    int32_t count;
+
+public:
+
+    /**
+     * Constructs a new compound transliterator given an array of
+     * transliterators.  The array of transliterators may be of any
+     * length, including zero or one, however, useful compound
+     * transliterators have at least two components.
+     * @param transliterators array of <code>Transliterator</code>
+     * objects
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     */
+    CompoundTransliterator(const UnicodeString& ID,
+                           Transliterator* const transliterators[],
+                           int32_t count,
+                           UnicodeFilter* adoptedFilter = 0);
+
+    CompoundTransliterator(const UnicodeString& ID,
+                           UnicodeFilter* adoptedFilter = 0);
+
+    /**
+     * Destructor.
+     */
+    virtual ~CompoundTransliterator();
+
+    /**
+     * Copy constructor.
+     */
+    CompoundTransliterator(const CompoundTransliterator&);
+
+    /**
+     * Assignment operator.
+     */
+    CompoundTransliterator& operator=(const CompoundTransliterator&);
+
+    /**
+     * Transliterator API.
+     */
+    Transliterator* clone() const;
+
+    /**
+     * Returns the number of transliterators in this chain.
+     * @return number of transliterators in this chain.
+     */
+    virtual int32_t getCount() const;
+
+    /**
+     * Returns the transliterator at the given index in this chain.
+     * @param index index into chain, from 0 to <code>getCount() - 1</code>
+     * @return transliterator at the given index
+     */
+    virtual const Transliterator& getTransliterator(int32_t index) const;
+
+    void setTransliterators(Transliterator* const transliterators[],
+                            int32_t count);
+
+    void adoptTransliterators(Transliterator* adoptedTransliterators[],
+                              int32_t count);
+
+    /**
+     * Transliterates a segment of a string.  <code>Transliterator</code> API.
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @return the new limit index
+     */
+    virtual int32_t transliterate(Replaceable& text, int32_t start, int32_t limit) const;
+
+    /**
+     * Implements {@link Transliterator#handleKeyboardTransliterate}.
+     */
+    virtual void handleKeyboardTransliterate(Replaceable& text,
+                                             int32_t index[3]) const;
+
+    /**
+     * Returns the length of the longest context required by this transliterator.
+     * This is <em>preceding</em> context.
+     * @return maximum number of preceding context characters this
+     * transliterator needs to examine
+     */
+    virtual int32_t getMaximumContextLength() const;
+
+private:
+
+    void freeTransliterators();
+};
+#endif
--- a/icu4c/source/i18n/hextouni.cpp
+++ b/icu4c/source/i18n/hextouni.cpp
@ -0,0 +1,155 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#include "hextouni.h"
+#include "rep.h"
+#include "unifilt.h"
+#include "uniset.h" // For UnicodeSet::digit REMOVE LATER
+
+/**
+ * ID for this transliterator.
+ */
+const char* HexToUnicodeTransliterator::_ID = "Hex-Unicode";
+
+/**
+ * Constructs a transliterator.
+ */
+HexToUnicodeTransliterator::HexToUnicodeTransliterator(UnicodeFilter* adoptedFilter) :
+    Transliterator(_ID, adoptedFilter) {
+}
+
+/**
+ * Copy constructor.
+ */
+HexToUnicodeTransliterator::HexToUnicodeTransliterator(const HexToUnicodeTransliterator& o) :
+    Transliterator(o) {
+}
+
+/**
+ * Assignment operator.
+ */
+HexToUnicodeTransliterator& HexToUnicodeTransliterator::operator=(
+                                             const HexToUnicodeTransliterator& o) {
+    Transliterator::operator=(o);
+    return *this;
+}
+
+/**
+ * Transliterator API.
+ */
+Transliterator* HexToUnicodeTransliterator::clone() const {
+    return new HexToUnicodeTransliterator(*this);
+}
+
+/**
+ * Transliterates a segment of a string.  <code>Transliterator</code> API.
+ * @param text the string to be transliterated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @return the new limit index
+ */
+int32_t HexToUnicodeTransliterator::transliterate(Replaceable& text,
+                                                  int32_t start, int32_t limit) const {
+    int32_t offsets[3] = { start, limit, start };
+    handleKeyboardTransliterate(text, offsets);
+    return offsets[LIMIT];
+}
+
+/**
+ * Implements {@link Transliterator#handleKeyboardTransliterate}.
+ */
+void HexToUnicodeTransliterator::handleKeyboardTransliterate(Replaceable& text,
+                                                             int32_t offsets[3]) const {
+    /**
+     * Performs transliteration changing Unicode hexadecimal
+     * escapes to characters.  For example, "U+0040" -> '@'.  A fixed
+     * set of prefixes is recognized: "&#92;u", "&#92;U", "u+", "U+". 
+     */
+    int32_t cursor = offsets[CURSOR];
+    int32_t limit = offsets[LIMIT];
+
+    int32_t maxCursor = limit - 6;
+
+    while (cursor <= maxCursor) {
+        UChar c = filteredCharAt(text, cursor + 5);
+        int32_t digit0 = UnicodeSet::digit(c, 16);
+        if (digit0 < 0) {
+            if (c == '\\') {
+                cursor += 5;
+            } else if (c == 'U' || c == 'u' || c == '+') {
+                cursor += 4;
+            } else {
+                cursor += 6;
+            }
+            continue;
+        }
+
+        int32_t u = digit0;
+        bool_t toTop = FALSE;
+
+        for (int32_t i=4; i>=2; --i) {
+            c = filteredCharAt(text, cursor + i);
+            int32_t digit = UnicodeSet::digit(c, 16);
+            if (digit < 0) {
+                if (c == 'U' || c == 'u' || c == '+') {
+                    cursor += i-1;
+                } else {
+                    cursor += 6;
+                }
+                toTop = TRUE; // This is a little awkward -- it was a "continue loop:"
+                break;        // statement in Java, where loop marked the while().
+            } else {
+                u |= digit << (4 * (5-i));
+            }
+        }
+
+        if (toTop) {
+            continue;
+        }
+
+        c = filteredCharAt(text, cursor);
+        UChar d = filteredCharAt(text, cursor + 1);
+        if (((c == 'U' || c == 'u') && d == '+')
+            || (c == '\\' && (d == 'U' || d == 'u'))) {
+            
+            // At this point, we have a match; replace cursor..cursor+5
+            // with u.
+            text.handleReplaceBetween(cursor, cursor+6, UnicodeString((UChar)u));
+            limit -= 5;
+            maxCursor -= 5;
+
+            ++cursor;
+        } else {
+            cursor += 6;
+        }
+    }
+
+    offsets[LIMIT] = limit;
+    offsets[CURSOR] = cursor;
+}
+
+UChar HexToUnicodeTransliterator::filteredCharAt(Replaceable& text, int32_t i) const {
+    UChar c;
+    const UnicodeFilter* filter = getFilter();
+    return (filter == 0) ? text.charAt(i) :
+        (filter->isIn(c = text.charAt(i)) ? c : (UChar)0xFFFF);
+}
+
+/**
+ * Return the length of the longest context required by this transliterator.
+ * This is <em>preceding</em> context.
+ * @param direction either <code>FORWARD</code> or <code>REVERSE</code>
+ * @return maximum number of preceding context characters this
+ * transliterator needs to examine
+ */
+int32_t HexToUnicodeTransliterator::getMaximumContextLength() const {
+    return 0;
+}
--- a/icu4c/source/i18n/hextouni.h
+++ b/icu4c/source/i18n/hextouni.h
@ -0,0 +1,95 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#ifndef HEXTOUNI_H
+#define HEXTOUNI_H
+
+#include "translit.h"
+
+/**
+ * A transliterator that converts from hexadecimal Unicode
+ * escape sequences to the characters they represent.  For example, "U+0040"
+ * and '\u0040'.  It recognizes the
+ * prefixes "U+", "u+", "&#92;U", and "&#92;u".  Hex values may be
+ * upper- or lowercase.
+ *
+ * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
+ *
+ * @author Alan Liu
+ * @version $RCSfile: hextouni.h,v $ $Revision: 1.1 $ $Date: 1999/11/20 00:36:43 $
+ */
+class U_I18N_API HexToUnicodeTransliterator : public Transliterator {
+
+    /**
+     * ID for this transliterator.
+     */
+    static const char* _ID;
+
+public:
+
+    /**
+     * Constructs a transliterator.
+     */
+    HexToUnicodeTransliterator(UnicodeFilter* adoptedFilter = 0);
+
+    /**
+     * Destructor.
+     */
+    virtual ~HexToUnicodeTransliterator();
+
+    /**
+     * Copy constructor.
+     */
+    HexToUnicodeTransliterator(const HexToUnicodeTransliterator&);
+
+    /**
+     * Assignment operator.
+     */
+    HexToUnicodeTransliterator& operator=(const HexToUnicodeTransliterator&);
+
+    /**
+     * Transliterator API.
+     */
+    Transliterator* clone() const;
+
+    /**
+     * Transliterates a segment of a string.  <code>Transliterator</code> API.
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @return the new limit index
+     */
+    virtual int32_t transliterate(Replaceable &text,
+                                  int32_t start, int32_t limit) const;
+
+    /**
+     * Implements {@link Transliterator#handleKeyboardTransliterate}.
+     */
+    virtual void handleKeyboardTransliterate(Replaceable& text,
+                                             int32_t offsets[3]) const;
+
+    /**
+     * Return the length of the longest context required by this transliterator.
+     * This is <em>preceding</em> context.
+     * @param direction either <code>FORWARD</code> or <code>REVERSE</code>
+     * @return maximum number of preceding context characters this
+     * transliterator needs to examine
+     */
+    virtual int32_t getMaximumContextLength() const;
+
+private:
+
+    UChar filteredCharAt(Replaceable& text, int32_t i) const;
+};
+
+inline HexToUnicodeTransliterator::~HexToUnicodeTransliterator() {}
+
+#endif
--- a/icu4c/source/i18n/i18n.dsp
+++ b/icu4c/source/i18n/i18n.dsp
@ -69,7 +69,7 @@ LINK32=link.exe
 # PROP Ignore_Export_Lib 0
 # PROP Target_Dir ""
 # ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "I18N_EXPORTS" /YX /FD /GZ /c
-# ADD CPP /nologo /MDd /Za /W3 /Gm /GX /ZI /Od /I "..\..\include" /I "..\..\source\common" /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "I18N_EXPORTS" /D "U_I18N_IMPLEMENTATION" /YX /FD /GZ /c
+# ADD CPP /nologo /MDd /Za /W3 /Gm /GX /ZI /Od /I "..\..\include" /I "..\..\source\common" /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "I18N_EXPORTS" /D "U_I18N_IMPLEMENTATION" /FR /YX /FD /GZ /c
 # ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32
 # ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32
 # ADD BASE RSC /l 0x409 /d "_DEBUG"
@ -124,6 +124,10 @@ SOURCE=.\colrules.cpp
 # End Source File
 # Begin Source File

+SOURCE=.\cpdtrans.cpp
+# End Source File
+# Begin Source File
+
 SOURCE=.\datefmt.cpp
 # End Source File
 # Begin Source File
@ -153,6 +157,10 @@ SOURCE=.\gregocal.cpp
 # End Source File
 # Begin Source File

+SOURCE=.\hextouni.cpp
+# End Source File
+# Begin Source File
+
 SOURCE=.\lnbkdat.cpp
 # End Source File
 # Begin Source File
@ -173,6 +181,26 @@ SOURCE=.\ptnentry.cpp
 # End Source File
 # Begin Source File

+SOURCE=.\rbt.cpp
+# End Source File
+# Begin Source File
+
+SOURCE=.\rbt_data.cpp
+# End Source File
+# Begin Source File
+
+SOURCE=.\rbt_pars.cpp
+# End Source File
+# Begin Source File
+
+SOURCE=.\rbt_rule.cpp
+# End Source File
+# Begin Source File
+
+SOURCE=.\rbt_set.cpp
+# End Source File
+# Begin Source File
+
 SOURCE=.\simpletz.cpp
 # End Source File
 # Begin Source File
@ -209,6 +237,10 @@ SOURCE=.\timezone.cpp
 # End Source File
 # Begin Source File

+SOURCE=.\translit.cpp
+# End Source File
+# Begin Source File
+
 SOURCE=.\txtbdat.cpp
 # End Source File
 # Begin Source File
@ -241,10 +273,22 @@ SOURCE=.\unicdcm.cpp
 # End Source File
 # Begin Source File

+SOURCE=.\unifltlg.cpp
+# End Source File
+# Begin Source File
+
+SOURCE=.\unirange.cpp
+# End Source File
+# Begin Source File
+
 SOURCE=.\uniset.cpp
 # End Source File
 # Begin Source File

+SOURCE=.\unitohex.cpp
+# End Source File
+# Begin Source File
+
 SOURCE=.\unum.cpp
 # End Source File
 # Begin Source File
@ -404,6 +448,33 @@ SOURCE=.\colrules.h
 # End Source File
 # Begin Source File

+SOURCE=.\cpdtrans.h
+
+!IF  "$(CFG)" == "i18n - Win32 Release"
+
+# Begin Custom Build
+InputPath=.\cpdtrans.h
+
+"..\..\include\cpdtrans.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy               cpdtrans.h                ..\..\include
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "i18n - Win32 Debug"
+
+# Begin Custom Build
+InputPath=.\cpdtrans.h
+
+"..\..\include\cpdtrans.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy               cpdtrans.h                ..\..\include
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
 SOURCE=.\datefmt.h

 !IF  "$(CFG)" == "i18n - Win32 Release"
@ -620,6 +691,33 @@ InputPath=.\gregocal.h
 # End Source File
 # Begin Source File

+SOURCE=.\hextouni.h
+
+!IF  "$(CFG)" == "i18n - Win32 Release"
+
+# Begin Custom Build
+InputPath=.\hextouni.h
+
+"..\..\include\hextouni.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy               hextouni.h                ..\..\include
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "i18n - Win32 Debug"
+
+# Begin Custom Build
+InputPath=.\hextouni.h
+
+"..\..\include\hextouni.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy               hextouni.h                ..\..\include
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
 SOURCE=.\mergecol.h
 # End Source File
 # Begin Source File
@ -709,6 +807,57 @@ SOURCE=.\ptnentry.h
 # End Source File
 # Begin Source File

+SOURCE=.\rbbi.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\rbbi_bld.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\rbt.h
+
+!IF  "$(CFG)" == "i18n - Win32 Release"
+
+# Begin Custom Build
+InputPath=.\rbt.h
+
+"..\..\include\rbt.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy               rbt.h                ..\..\include
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "i18n - Win32 Debug"
+
+# Begin Custom Build
+InputPath=.\rbt.h
+
+"..\..\include\rbt.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy               rbt.h                ..\..\include
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\rbt_data.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\rbt_pars.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\rbt_rule.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\rbt_set.h
+# End Source File
+# Begin Source File
+
 SOURCE=.\simpletz.h

 !IF  "$(CFG)" == "i18n - Win32 Release"
@ -860,6 +1009,33 @@ InputPath=.\timezone.h
 # End Source File
 # Begin Source File

+SOURCE=.\translit.h
+
+!IF  "$(CFG)" == "i18n - Win32 Release"
+
+# Begin Custom Build
+InputPath=.\translit.h
+
+"..\..\include\translit.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy               translit.h                ..\..\include
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "i18n - Win32 Debug"
+
+# Begin Custom Build
+InputPath=.\translit.h
+
+"..\..\include\translit.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy               translit.h                ..\..\include
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
 SOURCE=.\txtbdat.h
 # End Source File
 # Begin Source File
@ -1007,6 +1183,64 @@ SOURCE=.\unicdcm.h
 # End Source File
 # Begin Source File

+SOURCE=.\unifilt.h
+
+!IF  "$(CFG)" == "i18n - Win32 Release"
+
+# Begin Custom Build
+InputPath=.\unifilt.h
+
+"..\..\include\unifilt.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy               unifilt.h                ..\..\include
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "i18n - Win32 Debug"
+
+# Begin Custom Build
+InputPath=.\unifilt.h
+
+"..\..\include\unifilt.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy               unifilt.h                ..\..\include
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\unifltlg.h
+
+!IF  "$(CFG)" == "i18n - Win32 Release"
+
+# Begin Custom Build
+InputPath=.\unifltlg.h
+
+"..\..\include\unifltlg.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy               unifltlg.h                ..\..\include
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "i18n - Win32 Debug"
+
+# Begin Custom Build
+InputPath=.\unifltlg.h
+
+"..\..\include\unifltlg.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy               unifltlg.h                ..\..\include
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\unirange.h
+# End Source File
+# Begin Source File
+
 SOURCE=.\uniset.h

 !IF  "$(CFG)" == "i18n - Win32 Release"
@ -1034,6 +1268,33 @@ InputPath=.\uniset.h
 # End Source File
 # Begin Source File

+SOURCE=.\unitohex.h
+
+!IF  "$(CFG)" == "i18n - Win32 Release"
+
+# Begin Custom Build
+InputPath=.\unitohex.h
+
+"..\..\include\unitohex.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy               unitohex.h                ..\..\include
+
+# End Custom Build
+
+!ELSEIF  "$(CFG)" == "i18n - Win32 Debug"
+
+# Begin Custom Build
+InputPath=.\unitohex.h
+
+"..\..\include\unitohex.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy               unitohex.h                ..\..\include
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
 SOURCE=.\unum.h

 !IF  "$(CFG)" == "i18n - Win32 Release"
--- a/icu4c/source/i18n/rbt.cpp
+++ b/icu4c/source/i18n/rbt.cpp
@ -0,0 +1,227 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#include "rbt.h"
+#include "rbt_pars.h"
+#include "rbt_data.h"
+#include "rbt_rule.h"
+#include "rep.h"
+
+void RuleBasedTransliterator::_construct(const UnicodeString& rules,
+                                         Direction direction,
+                                         UErrorCode& status) {
+    data = 0;
+    isDataOwned = TRUE;
+    if (U_SUCCESS(status)) {
+        data = TransliterationRuleParser::parse(rules, direction);
+        if (data == 0) {
+            status = U_ILLEGAL_ARGUMENT_ERROR;
+        }
+    }
+}
+
+RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& ID,
+                                 const TransliterationRuleData* theData,
+                                 UnicodeFilter* adoptedFilter) :
+    Transliterator(ID, adoptedFilter),
+    data(theData), isDataOwned(FALSE) {}
+
+/**
+ * Copy constructor.  Since the data object is immutable, we can share
+ * it with other objects -- no need to clone it.
+ */
+RuleBasedTransliterator::RuleBasedTransliterator(
+        const RuleBasedTransliterator& other) :
+    Transliterator(other), data(other.data) {}
+
+/**
+ * Destructor.  We do NOT own the data object, so we do not delete it.
+ */
+RuleBasedTransliterator::~RuleBasedTransliterator() {}
+
+Transliterator* // Covariant return NOT ALLOWED (for portability)
+RuleBasedTransliterator::clone() const {
+    return new RuleBasedTransliterator(*this);
+}
+
+/**
+ * Transliterates a segment of a string.  <code>Transliterator</code> API.
+ * @param text the string to be transliterated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param result buffer to receive the transliterated text; previous
+ * contents are discarded
+ */
+void RuleBasedTransliterator::transliterate(const UnicodeString& text,
+                                            int32_t start, int32_t limit,
+                                            UnicodeString& result) const {
+    /* In the following loop there is a virtual buffer consisting of the
+     * text transliterated so far followed by the untransliterated text.  There is
+     * also a cursor, which may be in the already transliterated buffer or just
+     * before the untransliterated text.
+     *
+     * Example: rules 1. ab>x|y
+     *                2. yc>z
+     *
+     * []|eabcd  start - no match, copy e to tranlated buffer
+     * [e]|abcd  match rule 1 - copy output & adjust cursor
+     * [ex|y]cd  match rule 2 - copy output & adjust cursor
+     * [exz]|d   no match, copy d to transliterated buffer
+     * [exzd]|   done
+     *
+     * cursor: an index into the virtual buffer, 0..result.length()-1.
+     * Matches take place at the cursor.  If there is no match, the cursor
+     * is advanced, and one character is moved from the source text to the
+     * result buffer.
+     *         
+     * start, limit: these designate the substring of the source text which
+     * has not been processed yet.  The range of offsets is start..limit-1.
+     * At any moment the virtual buffer consists of result +
+     * text.substring(start, limit).
+     */
+    int32_t cursor = 0;
+    result.remove();
+    while (start < limit || cursor < result.length()) {
+        TransliterationRule* r = data->ruleSet.findMatch(text, start, limit,
+                                                         result,
+                                                         cursor,
+                                                         *data,
+                                                         getFilter());
+        if (r == 0) {
+            if (cursor == result.length()) {
+                result.append(text.charAt(start++));
+            }
+            ++cursor;
+        } else {
+            // At this point we have a match of one or more
+            // characters.  The characters cover the range [cursor,
+            // cursor + r->getKeyLength()) - a half-open interval.
+            // The index values refer to a virtual buffer with result
+            // holding [0, result.length()) and text holding
+            // [result.length(),...).
+
+            // First, figure out the range of result being replaced.
+            int32_t rfirst = cursor;
+            int32_t rlimit = icu_min(result.length(),
+                                     cursor + r->getKeyLength());
+
+            // resultPad is length of result to right of cursor; >= 0
+            int32_t resultPad = result.length() - cursor;
+
+            if (r->getKeyLength() > resultPad) {
+                start += r->getKeyLength() - resultPad;
+            }
+            
+            result.replaceBetween(rfirst, rlimit,
+                                  r->getOutput());
+
+            cursor += r->getCursorPos();
+        }
+    }
+}
+
+/**
+ * Transliterates a segment of a string.  <code>Transliterator</code> API.
+ * @param text the string to be transliterated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @return The new limit index
+ */
+int32_t RuleBasedTransliterator::transliterate(Replaceable& text,
+                                               int32_t start,
+                                               int32_t limit) const {
+    /* When using Replaceable, the algorithm is simpler, since we don't have
+     * two separate buffers.  We keep start and limit fixed the entire time,
+     * relative to the text -- limit may move numerically if text is
+     * inserted or removed.  The cursor moves from start to limit, with
+     * replacements happening under it.
+     *
+     * Example: rules 1. ab>x|y
+     *                2. yc>z
+     *
+     * |eabcd   start - no match, advance cursor
+     * e|abcd   match rule 1 - change text & adjust cursor
+     * ex|ycd   match rule 2 - change text & adjust cursor
+     * exz|d    no match, advance cursor
+     * exzd|    done
+     */
+    int32_t cursor = start;
+    while (cursor < limit) {
+        TransliterationRule* r =
+            data->ruleSet.findMatch(text, start, limit,
+                                    cursor, *data,
+                                    getFilter());
+        if (r == 0) {
+            ++cursor;
+        } else {
+            text.handleReplaceBetween(cursor, cursor + r->getKeyLength(),
+                                      r->getOutput());
+            limit += r->getOutput().length() - r->getKeyLength();
+            cursor += r->getCursorPos();
+        }
+    }
+    return limit;
+}
+
+/**
+ * Implements {@link Transliterator#handleKeyboardTransliterate}.
+ */
+void
+RuleBasedTransliterator::handleKeyboardTransliterate(Replaceable& text,
+                                                     int32_t index[3]) const {
+    int32_t start = index[START];
+    int32_t limit = index[LIMIT];
+    int32_t cursor = index[CURSOR];
+
+    bool_t isPartial;
+
+    while (cursor < limit) {
+        TransliterationRule* r = data->ruleSet.findIncrementalMatch(
+                text, start, limit, cursor,
+                *data, isPartial,
+                getFilter());
+        /* If we match a rule then apply it by replacing the key
+         * with the rule output and repositioning the cursor
+         * appropriately.  If we get a partial match, then we
+         * can't do anything without more text; return with the
+         * cursor at the current position.  If we get null, then
+         * there is no match at this position, and we can advance
+         * the cursor.
+         */
+        if (r == 0) {
+            if (isPartial) {
+                break;
+            } else {
+                ++cursor;
+            }
+        } else {
+            text.handleReplaceBetween(cursor, cursor + r->getKeyLength(),
+                                      r->getOutput());
+            limit += r->getOutput().length() - r->getKeyLength();
+            cursor += r->getCursorPos();
+        }
+    }
+
+    index[LIMIT] = limit;
+    index[CURSOR] = cursor;
+}
+
+/**
+ * Returns the length of the longest context required by this transliterator.
+ * This is <em>preceding</em> context.
+ * @return Maximum number of preceding context characters this
+ * transliterator needs to examine
+ */
+int32_t RuleBasedTransliterator::getMaximumContextLength() const {
+    return data->ruleSet.getMaximumContextLength();
+}
--- a/icu4c/source/i18n/rbt.h
+++ b/icu4c/source/i18n/rbt.h
@ -0,0 +1,377 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#ifndef RBT_H
+#define RBT_H
+
+#include "translit.h"
+#include "uhash.h"
+#include "utypes.h"
+
+class TransliterationRuleData;
+
+/**
+ * A transliterator that reads a set of rules in order to determine how to
+ * perform translations.  Rules are stored in resource bundles indexed by name.
+ * Rules are separated by newline characters ('\n'); to include a literal
+ * newline, prefix it with a backslash ('\\\n').  Whitespace is significant.  If
+ * the first character on a line is '#', the entire line is ignored as a
+ * comment.
+ *
+ * <p>Each set of rules consists of two groups, one forward, and one reverse.
+ * This is a convention that is not enforced; rules for one direction may be
+ * omitted, with the result that translations in that direction will not modify
+ * the source text.
+ *
+ * <p><b>Rule syntax</b>
+ *
+ * <p>Rule statements take one of the following forms:
+ * <dl>
+ *   <dt><code>alefmadda=&#092;u0622</code></dt>
+ *
+ *   <dd><strong>Variable definition.</strong> The name on the left is
+ *   assigned the character or expression on the right. Names may not
+ *   contain any special characters (see list below). Duplicate names
+ *   (including duplicates of simple variables or category names)
+ *   cause an exception to be thrown.  If the right hand side consists
+ *   of one character, then the variable stands for that character.
+ *   In this example, after this statement, instances of the left hand
+ *   name surrounded by braces, &quot;<code>{alefmadda}</code>&quot,
+ *   will be replaced by the Unicode character U+0622.</dd> If the
+ *   right hand side is longer than one character, then it is
+ *   interpreted as a character category expression; see below for
+ *   details.
+ *
+ *   <dt><code>softvowel=[eiyEIY]</code></dt>
+ *
+ *   <dd><strong>Category definition.</strong> The name on the left is assigned
+ *   to stand for a set of characters.  The same rules for names of simple
+ *   variables apply. After this statement, the left hand variable will be
+ *   interpreted as indicating a set of characters in appropriate contexts. The
+ *   pattern syntax defining sets of characters is defined by {@link UnicodeSet}.
+ *   Examples of valid patterns are:<table>
+ *
+ *       <tr valign=top>
+ *         <td nowrap><code>[abc]</code></td>
+ *         <td>The set containing the characters 'a', 'b', and 'c'.</td>
+ *       </tr>
+ *       <tr valign=top>
+ *         <td nowrap><code>[^abc]</code></td>
+ *         <td>The set of all characters <em>except</em> 'a', 'b', and 'c'.</td>
+ *       </tr>
+ *       <tr valign=top>
+ *         <td nowrap><code>[A-Z]</code></td>
+ *         <td>The set of all characters from 'A' to 'Z' in Unicode order.</td>
+ *       </tr>
+ *       <tr valign=top>
+ *         <td nowrap><code>[:Lu:]</code></td>
+ *         <td>The set of Unicode uppercase letters. See
+ *         <a href="http://www.unicode.org">www.unicode.org</a>
+ *         for a complete list of categories and their two-letter codes.</td>
+ *       </tr>
+ *       <tr valign=top>
+ *         <td nowrap><code>[^a-z[:Lu:][:Ll:]]</code></td>
+ *         <td>The set of all characters <em>except</em> 'a' through 'z' and
+ *         uppercase or lowercase letters.</td>
+ *       </tr>
+ *     </table>
+ *
+ *   See {@link UnicodeSet} for more documentation and examples.
+ *   </dd>
+ *
+ *   <dt><code>ai&gt;{alefmadda}</code></dt>
+ *
+ *   <dd><strong>Forward translation rule.</strong> This rule states that the
+ *   string on the left will be changed to the string on the right when
+ *   performing forward transliteration.</dd>
+ *
+ *   <dt><code>ai&lt;{alefmadda}</code></dt>
+ *
+ *   <dd><strong>Reverse translation rule.</strong> This rule states that the
+ *   string on the right will be changed to the string on the left when
+ *   performing reverse transliteration.</dd>
+ *
+ * </dl>
+ *
+ * <p>Forward and reverse translation rules consist of a <em>match
+ * pattern</em> and an <em>output string</em>.  The match pattern consists
+ * of literal characters, optionally preceded by context, and optionally
+ * followed by context.  Context characters, like literal pattern characters,
+ * must be matched in the text being transliterated.  However, unlike literal
+ * pattern characters, they are not replaced by the output text.  For example,
+ * the pattern "<code>[abc]def</code>" indicates the characters
+ * "<code>def</code>" must be preceded by "<code>abc</code>" for a successful
+ * match.  If there is a successful match, "<code>def</code>" will be replaced,
+ * but not "<code>abc</code>".  The initial '<code>[</code>' is optional, so
+ * "<code>abc]def</code>" is equivalent to "<code>[abc]def</code>".  Another
+ * example is "<code>123[456]</code>" (or "<code>123[456</code>") in which the
+ * literal pattern "<code>123</code>" must be followed by "<code>456</code>".
+ *
+ * <p>The output string of a forward or reverse rule consists of characters to
+ * replace the literal pattern characters.  If the output string contains the
+ * character '<code>|</code>', this is taken to indicate the location of the
+ * <em>cursor</em> after replacement.  The cursor is the point in the text
+ * at which the next replacement, if any, will be applied.
+ *
+ * <p><b>Example</b>
+ *
+ * <p>The following example rules illustrate many of the features of the rule
+ * language.
+ * <table cellpadding="4">
+ * <tr valign=top><td>Rule 1.</td>
+ *     <td nowrap><code>abc]def&gt;x|y</code></td></tr>
+ * <tr valign=top><td>Rule 2.</td>
+ *     <td nowrap><code>xyz&gt;r</code></td></tr>
+ * <tr valign=top><td>Rule 3.</td>
+ *     <td nowrap><code>yz&gt;q</code></td></tr>
+ * </table>
+ *
+ * <p>Applying these rules to the string "<code>adefabcdefz</code>" yields the
+ * following results:
+ *
+ * <table cellpadding="4">
+ * <tr valign=top><td nowrap><code>|adefabcdefz</code></td>
+ *     <td>Initial state, no rules match.  Advance cursor.</td></tr>
+ * <tr valign=top><td nowrap><code>a|defabcdefz</code></td>
+ *     <td>Still no match.  Rule 1 does not match because the preceding
+ *     context is not present.</td></tr>
+ * <tr valign=top><td nowrap><code>ad|efabcdefz</code></td>
+ *     <td>Still no match.  Keep advancing until there is a match...</td></tr>
+ * <tr valign=top><td nowrap><code>ade|fabcdefz</code></td>
+ *     <td>...</td></tr>
+ * <tr valign=top><td nowrap><code>adef|abcdefz</code></td>
+ *     <td>...</td></tr>
+ * <tr valign=top><td nowrap><code>adefa|bcdefz</code></td>
+ *     <td>...</td></tr>
+ * <tr valign=top><td nowrap><code>adefab|cdefz</code></td>
+ *     <td>...</td></tr>
+ * <tr valign=top><td nowrap><code>adefabc|defz</code></td>
+ *     <td>Rule 1 matches; replace "<code>def</code>" with "<code>xy</code>"
+ *     and back up the cursor to before the '<code>y</code>'.</td></tr>
+ * <tr valign=top><td nowrap><code>adefabcx|yz</code></td>
+ *     <td>Although "<code>xyz</code>" is present, rule 2 does not match
+ *     because the cursor is before the '<code>y</code>', not before the
+ *     '<code>x</code>'.  Rule 3 does match.  Replace "<code>yz</code>" with
+ *     "<code>q</code>".</td></tr>
+ * <tr valign=top><td nowrap><code>adefabcxq|</code></td>
+ *     <td>The cursor is at the end; transliteration is complete.</td></tr>
+ * </table>
+ *
+ * <p>The order of rules is significant.  If multiple rules may match at some
+ * point, the first matching rule is applied.
+ *
+ * <p>Forward and reverse rules may have an empty output string.  Otherwise, an
+ * empty left or right hand side of any statement is a syntax error.
+ *
+ * <p>Single quotes are used to quote the special characters
+ * <code>=&gt;&lt;{}[]|</code>.  To specify a single quote itself, inside or
+ * outside of quotes, use two single quotes in a row.  For example, the rule
+ * "<code>'&gt;'&gt;o''clock</code>" changes the string "<code>&gt;</code>" to
+ * the string "<code>o'clock</code>".
+ *
+ * <p><b>Notes</b>
+ *
+ * <p>While a RuleBasedTransliterator is being built, it checks that the rules
+ * are added in proper order.  For example, if the rule "a>x" is followed by the
+ * rule "ab>y", then the second rule will throw an exception.  The reason is
+ * that the second rule can never be triggered, since the first rule always
+ * matches anything it matches.  In other words, the first rule <em>masks</em>
+ * the second rule.  There is a cost of O(n^2) to make this check; in real-world
+ * tests it appears to approximately double build time.
+ *
+ * <p>One optimization that can be made is to add a pragma to the rule language,
+ * "#pragma order", that turns off ordering checking.  This pragma can then be
+ * added to all of our resource-based rules (after we build these once and
+ * determine that there are no ordering errors).  I haven't made this change yet
+ * in the interests of keeping the code from getting too byzantine.
+ *
+ * @author Alan Liu
+ */
+class U_I18N_API RuleBasedTransliterator : public Transliterator {
+
+    /**
+     * The data object is immutable, so we can freely share it with
+     * other instances of RBT, as long as we do NOT own this object.
+     */
+    TransliterationRuleData* data;
+
+    /**
+     * If true, we own the data object and must delete it.
+     */
+    bool_t dataIsOwned;
+
+public:
+
+    /**
+     * Direction constant passed to constructor to specify whether forward
+     * or reverse rules are parsed.  The other rules are ignored.
+     */
+    enum Direction {
+        /**
+         * Direction constant passed to constructor to create a transliterator
+         * using the forward rules.
+         */
+        FORWARD,
+
+        /**
+         * Direction constant passed to constructor to create a transliterator
+         * using the reverse rules.
+         */
+        REVERSE
+    };
+
+    /**
+     * Constructs a new transliterator from the given rules.
+     * @param rules rules, separated by '\n'
+     * @param direction either FORWARD or REVERSE.
+     * @exception IllegalArgumentException if rules are malformed
+     * or direction is invalid.
+     */
+    RuleBasedTransliterator(const UnicodeString& ID,
+                            const UnicodeString& rules,
+                            Direction direction,
+                            UnicodeFilter* adoptedFilter,
+                            UErrorCode& status);
+
+    /**
+     * Covenience constructor with no filter.
+     */
+    RuleBasedTransliterator(const UnicodeString& ID,
+                            const UnicodeString& rules,
+                            Direction direction,
+                            UErrorCode& status);
+
+    /**
+     * Covenience constructor with no filter and FORWARD direction.
+     */
+    RuleBasedTransliterator(const UnicodeString& ID,
+                            const UnicodeString& rules,
+                            UErrorCode& status);
+
+    /**
+     * Covenience constructor with FORWARD direction.
+     */
+    RuleBasedTransliterator(const UnicodeString& ID,
+                            const UnicodeString& rules,
+                            UnicodeFilter* adoptedFilter,
+                            UErrorCode& status);
+
+    RuleBasedTransliterator(const UnicodeString& ID,
+                            const TransliterationRuleData* theData,
+                            UnicodeFilter* adoptedFilter = 0);
+
+    RuleBasedTransliterator(const RuleBasedTransliterator&);
+
+    virtual ~RuleBasedTransliterator();
+
+    /**
+     * Implement Transliterator API.
+     */
+    Transliterator* clone() const;
+
+    /**
+     * Transliterates a segment of a string.  <code>Transliterator</code> API.
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param result buffer to receive the transliterated text; previous
+     * contents are discarded
+     */
+    virtual void transliterate(const UnicodeString& text,
+                               int32_t start, int32_t limit,
+                               UnicodeString& result) const;
+
+    /**
+     * Transliterates a segment of a string.  <code>Transliterator</code> API.
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @return The new limit index
+     */
+    virtual int32_t transliterate(Replaceable& text,
+                                  int32_t start, int32_t limit) const;
+
+    /**
+     * Implements {@link Transliterator#handleKeyboardTransliterate}.
+     */
+    virtual void handleKeyboardTransliterate(Replaceable& text,
+                                             int32_t index[3]) const;
+
+    /**
+     * Returns the length of the longest context required by this transliterator.
+     * This is <em>preceding</em> context.
+     * @return Maximum number of preceding context characters this
+     * transliterator needs to examine
+     */
+    virtual int32_t getMaximumContextLength() const;
+
+private:
+
+    void _construct(const UnicodeString& rules,
+                    Direction direction,
+                    UErrorCode& status);
+};
+
+/**
+ * Constructs a new transliterator from the given rules.
+ * @param rules rules, separated by '\n'
+ * @param direction either FORWARD or REVERSE.
+ * @exception IllegalArgumentException if rules are malformed
+ * or direction is invalid.
+ */
+inline RuleBasedTransliterator::RuleBasedTransliterator(
+                            const UnicodeString& ID,
+                            const UnicodeString& rules,
+                            Direction direction,
+                            UnicodeFilter* adoptedFilter,
+                            UErrorCode& status) :
+    Transliterator(ID, adoptedFilter) {
+    _construct(rules, direction, status);
+}
+
+/**
+ * Covenience constructor with no filter.
+ */
+inline RuleBasedTransliterator::RuleBasedTransliterator(
+                            const UnicodeString& ID,
+                            const UnicodeString& rules,
+                            Direction direction,
+                            UErrorCode& status) :
+    Transliterator(ID, 0) {
+    _construct(rules, direction, status);
+}
+
+/**
+ * Covenience constructor with no filter and FORWARD direction.
+ */
+inline RuleBasedTransliterator::RuleBasedTransliterator(
+                            const UnicodeString& ID,
+                            const UnicodeString& rules,
+                            UErrorCode& status) :
+    Transliterator(ID, 0) {
+    _construct(rules, FORWARD, status);
+}
+
+/**
+ * Covenience constructor with FORWARD direction.
+ */
+inline RuleBasedTransliterator::RuleBasedTransliterator(
+                            const UnicodeString& ID,
+                            const UnicodeString& rules,
+                            UnicodeFilter* adoptedFilter,
+                            UErrorCode& status) :
+    Transliterator(ID, adoptedFilter) {
+    _construct(rules, FORWARD, status);
+}
+
+#endif
--- a/icu4c/source/i18n/rbt_data.cpp
+++ b/icu4c/source/i18n/rbt_data.cpp
@ -0,0 +1,83 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#include "rbt_data.h"
+#include "uhash.h"
+#include "unistr.h"
+
+TransliterationRuleData::TransliterationRuleData(UErrorCode& status) :
+    variableNames(0), setVariables(0) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+    variableNames = uhash_open(uhash_hashUString, &status);
+    setVariables = uhash_open(0, &status);
+}
+
+TransliterationRuleData::~TransliterationRuleData() {
+    if (variableNames != 0) {
+        uhash_close(variableNames);
+    }
+    if (setVariables != 0) {
+        uhash_close(setVariables);
+    }
+}
+
+void
+TransliterationRuleData::defineVariable(const UnicodeString& name,
+                                        UChar value,
+                                        UErrorCode& status) {
+    uhash_putKey(variableNames, name.hashCode() & 0x7FFFFFFF,
+                 (void*) value,
+                 &status);
+}
+
+void
+TransliterationRuleData::defineVariable(const UnicodeString& name,
+                                        UChar standIn,
+                                        UnicodeSet* adoptedSet,
+                                        UErrorCode& status) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+    if (adoptedSet == 0) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    uhash_putKey(variableNames, name.hashCode() & 0x7FFFFFFF,
+                 (void*) standIn,
+                 &status);
+    uhash_putKey(setVariables, (int32_t) (standIn & 0x7FFFFFFF),
+                 adoptedSet,
+                 &status);
+}
+
+UChar
+TransliterationRuleData::lookupVariable(const UnicodeString& name,
+                                        UErrorCode& status) const {
+    if (U_FAILURE(status)) {
+        return 0;
+    }
+    void* value = uhash_get(variableNames, name.hashCode() & 0x7FFFFFFF);
+    if (value == 0) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+    }
+    return (UChar) (int32_t) value;
+}
+
+UnicodeSet*
+TransliterationRuleData::lookupSet(UChar standIn) const {
+    void* value = uhash_get(setVariables, (int32_t) (standIn & 0x7FFFFFFF));
+    return (UnicodeSet*) value;
+}
+
+bool_t
+TransliterationRuleData::isVariableDefined(const UnicodeString& name) const {
+    return 0 != uhash_get(variableNames, name.hashCode() & 0x7FFFFFFF);
+}
--- a/icu4c/source/i18n/rbt_data.h
+++ b/icu4c/source/i18n/rbt_data.h
@ -0,0 +1,85 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#ifndef RBT_DATA_H
+#define RBT_DATA_H
+
+#include "rbt_set.h"
+
+class UnicodeString;
+class UnicodeSet;
+struct UHashtable;
+
+/**
+ * The rule data for a RuleBasedTransliterators.  RBT objects hold
+ * a const pointer to a TRD object that they do not own.  TRD objects
+ * are essentially the parsed rules in compact, usable form.  The
+ * TRD objects themselves are held for the life of the process in
+ * a static cache owned by Transliterator.
+ */
+class TransliterationRuleData {
+
+public:
+
+    /**
+     * Rule table.  May be empty.
+     *
+     * PUBLIC DATA MEMBER for internal use by RBT
+     */
+    TransliterationRuleSet ruleSet;
+
+    /**
+     * Map variable name (UnicodeString) to variable (Character).
+     * A variable name may correspond to a single literal
+     * character, in which case the character is stored in this
+     * hash.  It may also correspond to a UnicodeSet, in which
+     * case a character is again stored in this hash, but the
+     * character is a stand-in: it is a key for a secondary lookup
+     * in data.setVariables.  The stand-in also represents the
+     * UnicodeSet in the stored rules.
+     *
+     * PUBLIC DATA MEMBER for internal use by RBT
+     */
+    UHashtable* variableNames;
+    
+    /**
+     * Map category variable (UChar) to set (UnicodeSet).
+     * Variables that correspond to a set of characters are mapped
+     * from variable name to a stand-in character in
+     * data.variableNames.  The stand-in then serves as a key in
+     * this hash to lookup the actual UnicodeSet object.  In
+     * addition, the stand-in is stored in the rule text to
+     * represent the set of characters.
+     *
+     * PUBLIC DATA MEMBER for internal use by RBT
+     */
+    UHashtable* setVariables;
+    
+    TransliterationRuleData(UErrorCode& status);
+
+    ~TransliterationRuleData();
+    
+    void defineVariable(const UnicodeString& name,
+                        UChar value,
+                        UErrorCode& status);
+        
+    void defineVariable(const UnicodeString& name,
+                        UChar standIn,
+                        UnicodeSet* adoptedSet,
+                        UErrorCode& status);
+
+    UChar lookupVariable(const UnicodeString& name,
+                         UErrorCode& status) const;
+    
+	UnicodeSet* lookupSet(UChar standIn) const;
+
+    bool_t isVariableDefined(const UnicodeString& name) const;
+};
+
+#endif
--- a/icu4c/source/i18n/rbt_pars.cpp
+++ b/icu4c/source/i18n/rbt_pars.cpp
@ -0,0 +1,640 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#include "rbt_pars.h"
+#include "rbt.h"
+#include "rbt_rule.h"
+#include "unirange.h"
+#include "rbt_data.h"
+#include "uniset.h"
+
+// Operators
+const UChar TransliterationRuleParser::VARIABLE_DEF_OP = '=';
+const UChar TransliterationRuleParser::FORWARD_RULE_OP = '>';
+const UChar TransliterationRuleParser::REVERSE_RULE_OP = '<';
+const char* TransliterationRuleParser::OPERATORS = "=><";
+
+// Other special characters
+const UChar TransliterationRuleParser::QUOTE = '\'';
+const UChar TransliterationRuleParser::VARIABLE_REF_OPEN = '{';
+const UChar TransliterationRuleParser::VARIABLE_REF_CLOSE = '}';
+const UChar TransliterationRuleParser::CONTEXT_OPEN = '[';
+const UChar TransliterationRuleParser::CONTEXT_CLOSE = ']';
+const UChar TransliterationRuleParser::CURSOR_POS = '|';
+const UChar TransliterationRuleParser::RULE_COMMENT_CHAR = '#';
+
+
+/**
+ * Specials must be quoted in rules to be used as literals.
+ * Specials may not occur in variable names.
+ *
+ * This string is a superset of OPERATORS.
+ */
+const char* TransliterationRuleParser::SPECIALS = "'{}[]|#=><";
+
+/**
+ * Specials that must be quoted in variable definitions.
+ */
+const char* TransliterationRuleParser::DEF_SPECIALS = "'{}";
+
+TransliterationRuleData*
+TransliterationRuleParser::parse(const UnicodeString& rules,
+                                 RuleBasedTransliterator::Direction direction) {
+    TransliterationRuleParser parser(rules, direction);
+    parser.parseRules();
+    if (U_FAILURE(parser.status)) {
+        delete parser.data;
+        parser.data = 0;
+    }
+    return parser.data;
+}
+
+/**
+ * @param rules list of rules, separated by newline characters
+ * @exception IllegalArgumentException if there is a syntax error in the
+ * rules
+ */
+TransliterationRuleParser::TransliterationRuleParser(
+                                     const UnicodeString& theRules,
+                                     RuleBasedTransliterator::Direction theDirection) :
+    rules(theRules), direction(theDirection), data(0) {}
+
+/**
+ * Parse the given string as a sequence of rules, separated by newline
+ * characters ('\n'), and cause this object to implement those rules.  Any
+ * previous rules are discarded.  Typically this method is called exactly
+ * once, during construction.
+ * @exception IllegalArgumentException if there is a syntax error in the
+ * rules
+ */
+void TransliterationRuleParser::parseRules() {
+    status = U_ZERO_ERROR;
+
+    delete data;
+    data = new TransliterationRuleData(status);
+    if (U_FAILURE(status)) {
+        return;
+    }
+    
+    determineVariableRange();
+
+    int32_t n = rules.length();
+    int32_t i = 0;
+    while (i<n && U_SUCCESS(status)) {
+        int32_t limit = rules.indexOf('\n', i);
+
+        // Recognize "\\\n" as an escaped "\n"
+        while (limit>0 && rules.charAt(limit-1) == '\\') {
+            limit = rules.indexOf('\n', limit+1);
+        }
+
+        if (limit == -1) {
+            limit = n;
+        }
+        // Skip over empty lines and line starting with #
+        if (limit > i && rules.charAt(i) != RULE_COMMENT_CHAR) {
+            applyRule(i, limit);
+        }
+        i = limit + 1;
+    }
+
+    data->ruleSet.freeze();
+}
+
+/**
+ * Parse the given substring as a rule, and append it to the rules currently
+ * represented in this object.
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= rules.length()</code>.
+ * @exception IllegalArgumentException if there is a syntax error in the
+ * rules
+ */
+void TransliterationRuleParser::applyRule(int32_t start, int32_t limit) {
+    /* General description of parsing: Initially, rules contain two types of
+     * quoted characters.  First, there are variable references, such as
+     * "{alpha}".  Second, there are quotes, such as "'<'" or "''".  One of
+     * the first steps in parsing a rule is to resolve such quoted matter.
+     * Quotes are removed early, leaving unquoted literal matter.  Variable
+     * references are resolved and replaced by single characters.  In some
+     * instances these characters represent themselves; in others, they
+     * stand for categories of characters.  Character categories are either
+     * predefined (e.g., "{Lu}"), or are defined by the user using a
+     * statement (e.g., "vowels:aeiouAEIOU").
+     *
+     * Another early step in parsing is to split each rule into component
+     * pieces.  These pieces are, for every rule, a left-hand side, a right-
+     * hand side, and an operator.  The left- and right-hand sides may not
+     * be empty, except for the output patterns of forward and reverse
+     * rules.  In addition to this partitioning, the match patterns of
+     * forward and reverse rules must be partitioned into antecontext,
+     * postcontext, and literal pattern, where the context portions may or
+     * may not be present.  Finally, output patterns must have the cursor
+     * indicator '|' detected and removed, with its position recorded.
+     *
+     * Quote removal, variable resolution, and sub-pattern splitting must
+     * all happen at once.  This is due chiefly to the quoting mechanism,
+     * which allows special characters to appear at arbitrary positions in
+     * the final unquoted text.  (For this reason, alteration of the rule
+     * language is somewhat clumsy; it entails reassessment and revision of
+     * the parsing methods as a whole.)
+     *
+     * After this processing of rules is complete, the final end products
+     * are unquoted pieces of text of various types, and an integer cursor
+     * position, if one is specified.  These processed raw materials are now
+     * easy to deal with; other classes such as UnicodeSet and
+     * TransliterationRule need know nothing of quoting or variables.
+     */
+    UnicodeString left;
+    UnicodeString right;
+    UnicodeString anteContext;
+    UnicodeString postContext;
+    int32_t cursorPos;
+
+    UChar op = parseRule(start, limit, left, right,
+                         anteContext, postContext, cursorPos);
+
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    switch (op) {
+    case VARIABLE_DEF_OP:
+        applyVariableDef(left, right);
+        break;
+    case FORWARD_RULE_OP:
+        if (direction == RuleBasedTransliterator::FORWARD) {
+            data->ruleSet.addRule(new TransliterationRule(
+                                     left, right,
+                                     anteContext, postContext,
+                                     cursorPos, status),
+                                  status);
+        } // otherwise ignore the rule; it's not the direction we want
+        break;
+    case REVERSE_RULE_OP:
+        if (direction == RuleBasedTransliterator::REVERSE) {
+            data->ruleSet.addRule(new TransliterationRule(
+                                     right, left,
+                                     anteContext, postContext,
+                                     cursorPos, status),
+                                  status);
+        } // otherwise ignore the rule; it's not the direction we want
+        break;
+    }
+}
+
+/**
+ * Add a variable definition.
+ * @param name the name of the variable.  It must not already be defined.
+ * @param pattern the value of the variable.  It may be a single character
+ * or a pattern describing a character set.
+ * @exception IllegalArgumentException if there is a syntax error
+ */
+void TransliterationRuleParser::applyVariableDef(const UnicodeString& name,
+                                                 const UnicodeString& pattern) {
+    validateVariableName(name);
+
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    if (data->isVariableDefined(name)) {
+        // throw new IllegalArgumentException("Duplicate variable definition: "
+        //                                   + name + '=' + pattern);
+        status = U_ILLEGAL_ARGUMENT_ERROR; 
+        return;
+    }
+//!         if (UnicodeSet.getCategoryID(name) >= 0) {
+//!             throw new IllegalArgumentException("Reserved variable name: "
+//!                                                + name);
+//!         }
+    if (pattern.length() < 1) {
+        // throw new IllegalArgumentException("Variable definition missing: "
+        //                                   + name);
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+
+    if (pattern.length() == 1) {
+        // Got a single character variable definition
+        //$ data->variableNames.put(name, new Character(pattern.charAt(0)));
+        data->defineVariable(name, pattern.charAt(0), status);
+    } else {
+        // Got more than one character; parse it as a category
+        if (variableNext >= variableLimit) {
+            //$ throw new RuntimeException("Private use variables exhausted");
+            status = U_ILLEGAL_ARGUMENT_ERROR;
+            return;
+        }
+        //$ Character c = new Character(variableNext++);
+        //$ data->variableNames.put(name, c);
+        //$ data->setVariables.put(c, new UnicodeSet(pattern));
+        data->defineVariable(name, variableNext++,
+                             new UnicodeSet(pattern, status),
+                             status);
+    }
+}
+
+/**
+ * Given a rule, parses it into three pieces: The left side, the right side,
+ * and the operator.  Returns the operator.  Quotes and variable references
+ * are resolved; the otuput text in all <code>StringBuffer</code> parameters
+ * is literal text.  This method delegates to other parsing methods to
+ * handle the match pattern, output pattern, and other sub-patterns in the
+ * rule.
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= rules.length()</code>.
+ * @param left left side of rule is appended to this buffer
+ * with the quotes removed and variables resolved
+ * @param right right side of rule is appended to this buffer
+ * with the quotes removed and variables resolved
+ * @param anteContext the preceding context of the match pattern,
+ * if there is one, is appended to this buffer
+ * @param postContext the following context of the match pattern,
+ * if there is one, is appended to this buffer
+ * @param cursorPos if there is a cursor in the output pattern, its
+ * offset is stored in <code>cursorPos</code>
+ * @return The operator character, one of the characters in OPERATORS.
+ */
+UChar TransliterationRuleParser::parseRule(int32_t start, int32_t limit,
+                                           UnicodeString& left,
+                                           UnicodeString& right,
+                                           UnicodeString& anteContext,
+                                           UnicodeString& postContext,
+                                           int32_t& cursorPos) {
+    /* Parse the rule into three pieces -- left, operator, and right,
+     * parsing out quotes.  The result is that left and right will have
+     * unquoted text.  E.g., "gt<'>'" will have right = ">".  Unquoted
+     * operators throw an exception.  Two quotes inside or outside
+     * quotes indicates a quote literal.  E.g., "o''clock" -> "o'clock".
+     */
+    int32_t i = quotedIndexOf(rules, start, limit, OPERATORS);
+    if (i < 0) {
+        //$ throw new IllegalArgumentException(
+        //$              "Syntax error: "
+        //$              + rules.substring(start, limit));
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+    UChar c = rules.charAt(i);
+    switch (c) {
+    case FORWARD_RULE_OP:
+        if (i == start) {
+            //$ throw new IllegalArgumentException(
+            //$               "Empty left side: "
+            //$               + rules.substring(start, limit));
+            status = U_ILLEGAL_ARGUMENT_ERROR;
+            return 0;
+        }
+        parseMatchPattern(start, i, left, anteContext, postContext);
+        if (i != (limit-1)) {
+            parseOutputPattern(i+1, limit, right, cursorPos);
+        }
+        break;
+    case REVERSE_RULE_OP:
+        if (i == (limit-1)) {
+            //$ throw new IllegalArgumentException(
+            //$               "Empty right side: "
+            //$               + rules.substring(start, limit));
+            status = U_ILLEGAL_ARGUMENT_ERROR;
+            return 0;
+        }
+        if (i != start) {
+            parseOutputPattern(start, i, left, cursorPos);
+        }
+        parseMatchPattern(i+1, limit, right, anteContext, postContext);
+        break;
+    default:
+        if (i == start || i == (limit-1)) {
+            //$ throw new IllegalArgumentException(
+            //$               "Empty left or right side: "
+            //$               + rules.substring(start, limit));
+            status = U_ILLEGAL_ARGUMENT_ERROR;
+            return 0;
+        }
+        parseSubPattern(start, i, left);
+        parseDefPattern(i+1, limit, right);
+        break;
+    }
+    return c;
+}
+
+/**
+ * Parses the match pattern of a forward or reverse rule.  Given the raw
+ * match pattern, return the match text and the context on both sides, if
+ * any.  Resolves all quotes and variables.
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= rules.length()</code>.
+ * @param text the key to be matched will be appended to this buffer
+ * @param anteContext the preceding context, if any, will be appended
+ * to this buffer.
+ * @param postContext the following context, if any, will be appended
+ * to this buffer.
+ */
+void TransliterationRuleParser::parseMatchPattern(int32_t start, int32_t limit,
+                                                  UnicodeString& text,
+                                                  UnicodeString& anteContext,
+                                                  UnicodeString& postContext) {
+    if (start >= limit) {
+        //$ throw new IllegalArgumentException(
+        //$               "Empty expression in rule: "
+        //$               + rules.substring(start, limit));
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+    //$ if (anteContext != 0) {
+        // Ignore optional opening and closing context characters
+        if (rules.charAt(start) == CONTEXT_OPEN) {
+            ++start;
+        }
+        if (rules.charAt(limit-1) == CONTEXT_CLOSE) {
+            --limit;
+        }
+        // The four possibilities are:
+        //             key
+        // anteContext]key
+        // anteContext]key[postContext
+        //             key[postContext
+        int32_t ante = quotedIndexOf(rules, start, limit, CONTEXT_CLOSE);
+        int32_t post = quotedIndexOf(rules, start, limit, CONTEXT_OPEN);
+        if (ante >= 0 && post >= 0 && ante > post) {
+            //$ throw new IllegalArgumentException(
+            //$               "Syntax error in context specifier: "
+            //$               + rules.substring(start, limit));
+            status = U_ILLEGAL_ARGUMENT_ERROR;
+            return;
+        }
+        if (ante >= 0) {
+            parseSubPattern(start, ante, anteContext);
+            start = ante+1;
+        }
+        if (post >= 0) {
+            parseSubPattern(post+1, limit, postContext);
+            limit = post;
+        }
+    //$ }
+    parseSubPattern(start, limit, text);
+}
+
+void TransliterationRuleParser::parseSubPattern(int32_t start, int32_t limit,
+                                                UnicodeString& text) {
+    parseSubPattern(start, limit, text, 0, SPECIALS);
+}
+
+/**
+ * Parse a variable definition sub pattern.  This kind of sub
+ * pattern differs in the set of characters that are considered
+ * special.  In particular, the '[' and ']' characters are not
+ * special, since these are used in UnicodeSet patterns.
+ */
+void TransliterationRuleParser::parseDefPattern(int32_t start, int32_t limit,
+                                                UnicodeString& text) {
+    parseSubPattern(start, limit, text, 0, DEF_SPECIALS);
+}
+
+/**
+ * Parses the output pattern of a forward or reverse rule.  Given the
+ * output pattern, return the output text and the position of the cursor,
+ * if any.  Resolves all quotes and variables.
+ * @param rules the string to be parsed
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= rules.length()</code>.
+ * @param text the output text will be appended to this buffer
+ * @param cursorPos if this parameter is not null, then cursorPos
+ * will be set to the cursor position, or -1 if there is none.  If this
+ * parameter is null, then cursors will be disallowed.
+ */
+void TransliterationRuleParser::parseOutputPattern(int32_t start, int32_t limit,
+                                                   UnicodeString& text,
+                                                   int32_t& cursorPos) {
+    parseSubPattern(start, limit, text, &cursorPos, SPECIALS);
+}
+
+/**
+ * Parses a sub-pattern of a rule.  Return the text and the position of the cursor,
+ * if any.  Resolves all quotes and variables.
+ * @param rules the string to be parsed
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= rules.length()</code>.
+ * @param text the output text will be appended to this buffer
+ * @param cursorPos if this parameter is not null, then cursorPos
+ * will be set to the cursor position, or -1 if there is none.  If this
+ * parameter is null, then cursors will be disallowed.
+ * @param specials characters that must be quoted; typically either
+ * SPECIALS or DEF_SPECIALS.
+ */
+void TransliterationRuleParser::parseSubPattern(int32_t start, int32_t limit,
+                                                UnicodeString& text,
+                                                int32_t* cursorPos,
+                                                const UnicodeString& specials) {
+    bool_t inQuote = FALSE;
+
+    if (start >= limit) {
+        //$ throw new IllegalArgumentException("Empty expression in rule");
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+    if (cursorPos != 0) {
+        *cursorPos = -1;
+    }
+    for (int32_t i=start; i<limit; ++i) {
+        UChar c = rules.charAt(i);
+        if (c == QUOTE) {
+            // Check for double quote
+            if ((i+1) < limit
+                && rules.charAt(i+1) == QUOTE) {
+                text.append(QUOTE);
+                ++i; // Skip over both quotes
+            } else {
+                inQuote = !inQuote;
+            }
+        } else if (inQuote) {
+            text.append(c);
+        } else if (c == VARIABLE_REF_OPEN) {
+            ++i;
+            int32_t j = rules.indexOf(VARIABLE_REF_CLOSE, i);
+            if (i == j || j < 0) { // empty or unterminated
+                //$ throw new IllegalArgumentException("Illegal variable reference: "
+                //$                                    + rules.substring(start, limit));
+                status = U_ILLEGAL_ARGUMENT_ERROR;
+                return;
+            }
+            UnicodeString name;
+            rules.extractBetween(i, j, name);
+            validateVariableName(name);
+            if (U_FAILURE(status)) {
+                return;
+            }
+            UChar ch = data->lookupVariable(name, status);
+            if (U_FAILURE(status)) {
+                return;
+            }
+            text.append(ch);
+            i = j;
+        } else if (c == CURSOR_POS && cursorPos != 0) {
+            if (*cursorPos >= 0) {
+                //$ throw new IllegalArgumentException("Multiple cursors: "
+                //$                                    + rules.substring(start, limit));
+                status = U_ILLEGAL_ARGUMENT_ERROR;
+                return;
+            }
+            *cursorPos = text.length();
+        } else if (specials.indexOf(c) >= 0) {
+            //$ throw new IllegalArgumentException("Unquoted special character: "
+            //$                                    + rules.substring(start, limit));
+            status = U_ILLEGAL_ARGUMENT_ERROR;
+            return;
+        } else {
+            text.append(c);
+        }
+    }
+}
+
+void TransliterationRuleParser::validateVariableName(const UnicodeString& name) {
+    if (indexOf(name, SPECIALS) >= 0) {
+        //throw new IllegalArgumentException(
+        //              "Special character in variable name: "
+        //              + name);
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+    }
+}
+
+/**
+ * Returns the single character value of the given variable name.  Defined
+ * names are recognized.
+ *
+ * NO LONGER SUPPORTED:
+ * If a Unicode category name is given, a standard character variable
+ * in the range firstCategoryVariable to lastCategoryVariable is returned,
+ * with value firstCategoryVariable + n, where n is the category
+ * number.
+ * @exception IllegalArgumentException if the name is unknown.
+ */
+//$ UChar TransliterationRuleParser::getVariableDef(const UnicodeString& name) {
+//$     UChar ch = data->lookupVariable(name, status);
+//$ //!         if (ch == null) {
+//$ //!             int id = UnicodeSet.getCategoryID(name);
+//$ //!             if (id >= 0) {
+//$ //!                 ch = new Character((char) (firstCategoryVariable + id));
+//$ //!                 data->variableNames.put(name, ch);
+//$ //!                 data->setVariables.put(ch, new UnicodeSet(id));
+//$ //!             }
+//$ //!         }
+//$     if (ch == 0) {
+//$         throw new IllegalArgumentException("Undefined variable: "
+//$                                            + name);
+//$     }
+//$     return ch;
+//$ }
+
+/**
+ * Determines what part of the private use region of Unicode we can use for
+ * variable stand-ins.  The correct way to do this is as follows: Parse each
+ * rule, and for forward and reverse rules, take the FROM expression, and
+ * make a hash of all characters used.  The TO expression should be ignored.
+ * When done, everything not in the hash is available for use.  In practice,
+ * this method may employ some other algorithm for improved speed.
+ */
+void TransliterationRuleParser::determineVariableRange() {
+    UnicodeRange privateUse(0xE000, 0x1900); // Private use area
+
+    UnicodeRange* r = privateUse.largestUnusedSubrange(rules);
+
+    variableNext = variableLimit = (UChar) 0;
+    
+    if (r != 0) {
+        variableNext = r->start;
+        variableLimit = (UChar) (r->start + r->length);
+        delete r;
+    }
+
+    if (variableNext >= variableLimit) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+    }
+}
+
+/**
+ * Returns the index of the first character in a set, ignoring quoted text.
+ * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
+ * found by a search for "h".  Unlike String.indexOf(), this method searches
+ * not for a single character, but for any character of the string
+ * <code>setOfChars</code>.
+ * @param text text to be searched
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param setOfChars string with one or more distinct characters
+ * @return Offset of the first character in <code>setOfChars</code>
+ * found, or -1 if not found.
+ * @see #indexOf
+ */
+int32_t TransliterationRuleParser::quotedIndexOf(const UnicodeString& text,
+                                                 int32_t start, int32_t limit,
+                                                 const UnicodeString& setOfChars) {
+    for (int32_t i=start; i<limit; ++i) {
+        UChar c = text.charAt(i);
+        if (c == QUOTE) {
+            while (++i < limit
+                   && text.charAt(i) != QUOTE) {}
+        } else if (setOfChars.indexOf(c) >= 0) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+/**
+ * Returns the index of the first character in a set.  Unlike
+ * String.indexOf(), this method searches not for a single character, but
+ * for any character of the string <code>setOfChars</code>.
+ * @param text text to be searched
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param setOfChars string with one or more distinct characters
+ * @return Offset of the first character in <code>setOfChars</code>
+ * found, or -1 if not found.
+ * @see #quotedIndexOf
+ */
+int32_t TransliterationRuleParser::indexOf(const UnicodeString& text,
+                                           int32_t start, int32_t limit,
+                                           const UnicodeString& setOfChars) {
+    for (int32_t i=start; i<limit; ++i) {
+        if (setOfChars.indexOf(text.charAt(i)) >= 0) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+/**
+ * Returns the index of the first character in a set.  Unlike
+ * String.indexOf(), this method searches not for a single character, but
+ * for any character of the string <code>setOfChars</code>.
+ * @param text text to be searched
+ * @param setOfChars string with one or more distinct characters
+ * @return Offset of the first character in <code>setOfChars</code>
+ * found, or -1 if not found.
+ * @see #quotedIndexOf
+ */
+int32_t TransliterationRuleParser::indexOf(const UnicodeString& text,
+                                           const UnicodeString& setOfChars) {
+    return indexOf(text, 0, text.length(), setOfChars);
+}
--- a/icu4c/source/i18n/rbt_pars.h
+++ b/icu4c/source/i18n/rbt_pars.h
@ -0,0 +1,302 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#ifndef RBT_PARS_H
+#define RBT_PARS_H
+
+#include "rbt.h"
+
+class TransliterationRuleData;
+
+class TransliterationRuleParser {
+
+    /**
+     * This is a reference to external data we don't own.  This works because
+     * we only hold this for the duration of the call to parse().
+     */
+    const UnicodeString& rules;
+
+    RuleBasedTransliterator::Direction direction;
+
+    TransliterationRuleData* data;
+
+    /**
+     * We use a single error code during parsing.  Rather than pass it
+     * through each API, we keep it here.
+     */
+    UErrorCode status;
+
+    /**
+     * The next available stand-in for variables.  This starts at some point in
+     * the private use area (discovered dynamically) and increments up toward
+     * <code>variableLimit</code>.  At any point during parsing, available
+     * variables are <code>variableNext..variableLimit-1</code>.
+     */
+    UChar variableNext;
+
+    /**
+     * The last available stand-in for variables.  This is discovered
+     * dynamically.  At any point during parsing, available variables are
+     * <code>variableNext..variableLimit-1</code>.
+     */
+    UChar variableLimit;
+
+    // Operators
+    static const UChar VARIABLE_DEF_OP;
+    static const UChar FORWARD_RULE_OP;
+    static const UChar REVERSE_RULE_OP;
+    static const char* OPERATORS;
+
+
+    // Other special characters
+    static const UChar QUOTE;
+    static const UChar VARIABLE_REF_OPEN;
+    static const UChar VARIABLE_REF_CLOSE;
+    static const UChar CONTEXT_OPEN;
+    static const UChar CONTEXT_CLOSE;
+    static const UChar CURSOR_POS;
+    static const UChar RULE_COMMENT_CHAR;
+
+
+    /**
+     * Specials must be quoted in rules to be used as literals.
+     * Specials may not occur in variable names.
+     */
+    static const char* SPECIALS;
+
+    /**
+     * Specials that must be quoted in variable definitions.
+     */
+    static const char* DEF_SPECIALS;
+
+public:
+
+    static TransliterationRuleData*
+        parse(const UnicodeString& rules,
+              RuleBasedTransliterator::Direction direction);
+    
+private:
+
+    /**
+     * @param rules list of rules, separated by newline characters
+     * @exception IllegalArgumentException if there is a syntax error in the
+     * rules
+     */
+    TransliterationRuleParser(const UnicodeString& rules,
+                              RuleBasedTransliterator::Direction direction);
+
+    /**
+     * Parse the given string as a sequence of rules, separated by newline
+     * characters ('\n'), and cause this object to implement those rules.  Any
+     * previous rules are discarded.  Typically this method is called exactly
+     * once, during construction.
+     * @exception IllegalArgumentException if there is a syntax error in the
+     * rules
+     */
+    void parseRules();
+
+    /**
+     * Parse the given substring as a rule, and append it to the rules currently
+     * represented in this object.
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= rules.length()</code>.
+     * @exception IllegalArgumentException if there is a syntax error in the
+     * rules
+     */
+    void applyRule(int32_t start, int32_t limit);
+
+    /**
+     * Add a variable definition.
+     * @param name the name of the variable.  It must not already be defined.
+     * @param pattern the value of the variable.  It may be a single character
+     * or a pattern describing a character set.
+     * @exception IllegalArgumentException if there is a syntax error
+     */
+    void applyVariableDef(const UnicodeString& name,
+                          const UnicodeString& pattern);
+
+    /**
+     * Given a rule, parses it into three pieces: The left side, the right side,
+     * and the operator.  Returns the operator.  Quotes and variable references
+     * are resolved; the otuput text in all <code>StringBuffer</code> parameters
+     * is literal text.  This method delegates to other parsing methods to
+     * handle the match pattern, output pattern, and other sub-patterns in the
+     * rule.
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= rules.length()</code>.
+     * @param left left side of rule is appended to this buffer
+     * with the quotes removed and variables resolved
+     * @param right right side of rule is appended to this buffer
+     * with the quotes removed and variables resolved
+     * @param anteContext the preceding context of the match pattern,
+     * if there is one, is appended to this buffer
+     * @param postContext the following context of the match pattern,
+     * if there is one, is appended to this buffer
+     * @param cursorPos if there is a cursor in the output pattern, its
+     * offset is stored in <code>cursorPos[0]</code>
+     * @return The operator character, one of the characters in OPERATORS.
+     */
+    UChar parseRule(int32_t start, int32_t limit,
+                    UnicodeString& left, UnicodeString& right,
+                    UnicodeString& anteContext,
+                    UnicodeString& postContext,
+                    int32_t& cursorPos);
+
+    /**
+     * Parses the match pattern of a forward or reverse rule.  Given the raw
+     * match pattern, return the match text and the context on both sides, if
+     * any.  Resolves all quotes and variables.
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= rules.length()</code>.
+     * @param text the key to be matched will be appended to this buffer
+     * @param anteContext the preceding context, if any, will be appended
+     * to this buffer.
+     * @param postContext the following context, if any, will be appended
+     * to this buffer.
+     */
+    void parseMatchPattern(int32_t start, int32_t limit,
+                           UnicodeString& text,
+                           UnicodeString& anteContext,
+                           UnicodeString& postContext);
+
+    void parseSubPattern(int32_t start, int32_t limit,
+                         UnicodeString& text);
+    
+    /**
+     * Parse a variable definition sub pattern.  This kind of sub
+     * pattern differs in the set of characters that are considered
+     * special.  In particular, the '[' and ']' characters are not
+     * special, since these are used in UnicodeSet patterns.
+     */
+    void parseDefPattern(int32_t start, int32_t limit,
+                         UnicodeString& text);
+    
+    /**
+     * Parses the output pattern of a forward or reverse rule.  Given the
+     * output pattern, return the output text and the position of the cursor,
+     * if any.  Resolves all quotes and variables.
+     * @param rules the string to be parsed
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= rules.length()</code>.
+     * @param text the output text will be appended to this buffer
+     * @param cursorPos if this parameter is not null, then cursorPos[0]
+     * will be set to the cursor position, or -1 if there is none.  If this
+     * parameter is null, then cursors will be disallowed.
+     */
+    void parseOutputPattern(int32_t start, int32_t limit,
+                            UnicodeString& text,
+                            int32_t& cursorPos);
+
+    /**
+     * Parses a sub-pattern of a rule.  Return the text and the position of the cursor,
+     * if any.  Resolves all quotes and variables.
+     * @param rules the string to be parsed
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= rules.length()</code>.
+     * @param text the output text will be appended to this buffer
+     * @param cursorPos if this parameter is not null, then cursorPos[0]
+     * will be set to the cursor position, or -1 if there is none.  If this
+     * parameter is null, then cursors will be disallowed.
+     * @param specials characters that must be quoted; typically either
+     * SPECIALS or DEF_SPECIALS.
+     */
+    void parseSubPattern(int32_t start, int32_t limit,
+                         UnicodeString& text,
+                         int32_t* cursorPos,
+                         const UnicodeString& specials);
+
+    void validateVariableName(const UnicodeString& name);
+
+    /**
+     * Returns the single character value of the given variable name.  Defined
+     * names are recognized.
+     *
+     * NO LONGER SUPPORTED:
+     * If a Unicode category name is given, a standard character variable
+     * in the range firstCategoryVariable to lastCategoryVariable is returned,
+     * with value firstCategoryVariable + n, where n is the category
+     * number.
+     * @exception IllegalArgumentException if the name is unknown.
+     */
+    //$ Character getVariableDef(const UnicodeString& name);
+
+    /**
+     * Determines what part of the private use region of Unicode we can use for
+     * variable stand-ins.  The correct way to do this is as follows: Parse each
+     * rule, and for forward and reverse rules, take the FROM expression, and
+     * make a hash of all characters used.  The TO expression should be ignored.
+     * When done, everything not in the hash is available for use.  In practice,
+     * this method may employ some other algorithm for improved speed.
+     */
+    void determineVariableRange();
+
+    /**
+     * Returns the index of the first character in a set, ignoring quoted text.
+     * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
+     * found by a search for "h".  Unlike String.indexOf(), this method searches
+     * not for a single character, but for any character of the string
+     * <code>setOfChars</code>.
+     * @param text text to be searched
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param setOfChars string with one or more distinct characters
+     * @return Offset of the first character in <code>setOfChars</code>
+     * found, or -1 if not found.
+     * @see #indexOf
+     */
+    static int32_t quotedIndexOf(const UnicodeString& text,
+                                 int32_t start, int32_t limit,
+                                 const UnicodeString& setOfChars);
+
+    /**
+     * Returns the index of the first character in a set.  Unlike
+     * String.indexOf(), this method searches not for a single character, but
+     * for any character of the string <code>setOfChars</code>.
+     * @param text text to be searched
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param setOfChars string with one or more distinct characters
+     * @return Offset of the first character in <code>setOfChars</code>
+     * found, or -1 if not found.
+     * @see #quotedIndexOf
+     */
+    static int32_t indexOf(const UnicodeString& text,
+                           int32_t start, int32_t limit,
+                           const UnicodeString& setOfChars);
+    
+    /**
+     * Returns the index of the first character in a set.  Unlike
+     * String.indexOf(), this method searches not for a single character, but
+     * for any character of the string <code>setOfChars</code>.
+     * @param text text to be searched
+     * @param setOfChars string with one or more distinct characters
+     * @return Offset of the first character in <code>setOfChars</code>
+     * found, or -1 if not found.
+     * @see #quotedIndexOf
+     */
+    static int32_t indexOf(const UnicodeString& text,
+                           const UnicodeString& setOfChars);
+    
+};
+
+#endif
--- a/icu4c/source/i18n/rbt_rule.cpp
+++ b/icu4c/source/i18n/rbt_rule.cpp
@ -0,0 +1,436 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#include "rbt_rule.h"
+#include "rep.h"
+#include "rbt_data.h"
+#include "unifilt.h"
+#include "uniset.h"
+
+/**
+ * Construct a new rule with the given key, output text, and other
+ * attributes.  Zero, one, or two context strings may be specified.  A
+ * cursor position may be specified for the output text.
+ * @param key the string to match
+ * @param output the string to produce when the <code>key</code> is seen
+ * @param anteContext if not null and not empty, then it must be matched
+ * before the <code>key</code>
+ * @param postContext if not null and not empty, then it must be matched
+ * after the <code>key</code>
+ * @param cursorPos a position for the cursor after the <code>output</code>
+ * is emitted.  If less than zero, then the cursor is placed after the
+ * <code>output</code>; that is, -1 is equivalent to
+ * <code>output.length()</code>.  If greater than
+ * <code>output.length()</code> then an exception is thrown.
+ * @exception IllegalArgumentException if the cursor position is out of
+ * range.
+ */
+TransliterationRule::TransliterationRule(const UnicodeString& theKey,
+                                         const UnicodeString& theOutput,
+                                         const UnicodeString& theAnteContext,
+                                         const UnicodeString& thePostContext,
+                                         int32_t theCursorPos,
+                                         UErrorCode &status) :
+    key(theKey), output(theOutput),
+    anteContext(theAnteContext),
+    postContext(thePostContext),
+    cursorPos(theCursorPos),
+    maskKey(0) {
+
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    if (cursorPos < 0) {
+        cursorPos = output.length();
+    }
+    if (cursorPos > output.length()) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+    }
+    /* The mask key is needed when we are adding individual rules to a rule
+     * set, for performance.  Here are the numbers: Without mask key, 13.0
+     * seconds.  With mask key, 6.2 seconds.  However, once the rules have
+     * been added to the set, then they can be discarded to free up space.
+     * This is what the freeze() method does.  After freeze() has been
+     * called, the method masks() must NOT be called.
+     */
+    maskKey = new UnicodeString(key);
+    if (maskKey == 0) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+    } else {
+        maskKey->append(postContext);
+    }
+}
+
+TransliterationRule::~TransliterationRule() {
+    delete maskKey;
+}
+
+/**
+ * Return the length of the key.  Equivalent to <code>getKey().length()</code>.
+ * @return the length of the match key.
+ */
+int32_t TransliterationRule::getKeyLength() const {
+    return key.length();
+}
+
+/**
+ * Return the key.
+ * @return the match key.
+ */
+const UnicodeString& TransliterationRule::getKey() const {
+    return key;
+}
+
+/**
+ * Return the output string.
+ * @return the output string.
+ */
+const UnicodeString& TransliterationRule::getOutput() const {
+    return output;
+}
+
+/**
+ * Return the position of the cursor within the output string.
+ * @return a value from 0 to <code>getOutput().length()</code>, inclusive.
+ */
+int32_t TransliterationRule::getCursorPos() const {
+    return cursorPos;
+}
+
+/**
+ * Return the preceding context length.  This method is needed to
+ * support the <code>Transliterator</code> method
+ * <code>getMaximumContextLength()</code>.
+ */
+int32_t TransliterationRule::getAnteContextLength() const {
+    return anteContext.length();
+}
+
+/**
+ * Return true if this rule masks another rule.  If r1 masks r2 then
+ * r1 matches any input string that r2 matches.  If r1 masks r2 and r2 masks
+ * r1 then r1 == r2.  Examples: "a>x" masks "ab>y".  "a>x" masks "a[b]>y".
+ * "[c]a>x" masks "[dc]a>y".
+ *
+ * <p>This method must not be called after freeze() is called.
+ */
+bool_t TransliterationRule::masks(const TransliterationRule& r2) const {
+    /* There are three cases of masking.  In each instance, rule1
+     * masks rule2.
+     *
+     * 1. KEY mask: len(key1) < len(key2), key2 starts with key1.
+     *
+     * 2. PREFIX mask: key1 == key2, len(prefix1) < len(prefix2),
+     * prefix2 ends with prefix1, suffix2 starts with suffix1.
+     *
+     * 3. SUFFIX mask: key1 == key2, len(suffix1) < len(suffix2),
+     * prefix2 ends with prefix1, suffix2 starts with suffix1.
+     */
+
+    /* LIMITATION of the current mask algorithm: Some rule
+     * maskings are currently not detected.  For example,
+     * "{Lu}]a>x" masks "A]a>y".  To detect these sorts of masking,
+     * we need a subset operator on UnicodeSet objects, which we
+     * currently do not have.  This can be added later.
+     */
+    return ((maskKey->length() < r2.maskKey->length() &&
+             r2.maskKey->startsWith(*maskKey)) ||
+            (r2.anteContext.length() != 0 && *maskKey == *r2.maskKey &&
+             ((anteContext.length() == 0) ||
+              (anteContext.length() < r2.anteContext.length() &&
+               r2.anteContext.endsWith(anteContext)))));
+}
+
+/**
+ * Free up space.  Once this method is called, masks() must NOT be called.
+ * If it is called, an exception will be thrown.
+ */
+void TransliterationRule::freeze() {
+    delete maskKey;
+    maskKey = 0;
+}
+
+/**
+ * Return true if this rule matches the given text.  The text being matched
+ * occupies a virtual buffer consisting of the contents of
+ * <code>result</code> concatenated to a substring of <code>text</code>.
+ * The substring is specified by <code>start</code> and <code>limit</code>.
+ * The value of <code>cursor</code> is an index into this virtual buffer,
+ * from 0 to the length of the buffer.  In terms of the parameters,
+ * <code>cursor</code> must be between 0 and <code>result.length() + limit -
+ * start</code>.
+ * @param text the untranslated text
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param result translated text so far
+ * @param cursor position at which to translate next, an offset into result.
+ * If greater than or equal to result.length(), represents offset start +
+ * cursor - result.length() into text.
+ * @param filter the filter.  Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator.  If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ */
+bool_t TransliterationRule::matches(const UnicodeString& text,
+                                    int32_t start, int32_t limit,
+                                    const UnicodeString& result,
+                                    int32_t cursor,
+                                    const TransliterationRuleData& data,
+                                    const UnicodeFilter* filter) const {
+    return
+        (anteContext.length() == 0
+         || regionMatches(text, start, limit, result,
+                          cursor - anteContext.length(),
+                          anteContext, data, filter)) &&
+        regionMatches(text, start, limit, result, cursor,
+                      key, data, filter) &&
+        (postContext.length() == 0
+         || regionMatches(text, start, limit, result,
+                          cursor + key.length(),
+                          postContext, data, filter));
+}
+
+/**
+ * Return true if this rule matches the given text.
+ * @param text the text, both translated and untranslated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param cursor position at which to translate next, representing offset
+ * into text.  This value must be between <code>start</code> and
+ * <code>limit</code>.
+ * @param filter the filter.  Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator.  If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ */
+bool_t TransliterationRule::matches(const Replaceable& text,
+                                    int32_t start, int32_t limit,
+                                    int32_t cursor,
+                                    const TransliterationRuleData& data,
+                                    const UnicodeFilter* filter) const {
+    return
+        (anteContext.length() == 0
+         || regionMatches(text, start, limit, cursor - anteContext.length(),
+                          anteContext, data, filter)) &&
+        regionMatches(text, start, limit, cursor,
+                      key, data, filter) &&
+        (postContext.length() == 0
+         || regionMatches(text, start, limit, cursor + key.length(),
+                          postContext, data, filter));
+}
+
+/**
+ * Return the degree of match between this rule and the given text.  The
+ * degree of match may be mismatch, a partial match, or a full match.  A
+ * mismatch means at least one character of the text does not match the
+ * context or key.  A partial match means some context and key characters
+ * match, but the text is not long enough to match all of them.  A full
+ * match means all context and key characters match.
+ * @param text the text, both translated and untranslated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param cursor position at which to translate next, representing offset
+ * into text.  This value must be between <code>start</code> and
+ * <code>limit</code>.
+ * @param filter the filter.  Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator.  If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ * @return one of <code>MISMATCH</code>, <code>PARTIAL_MATCH</code>, or
+ * <code>FULL_MATCH</code>.
+ * @see #MISMATCH
+ * @see #PARTIAL_MATCH
+ * @see #FULL_MATCH
+ */
+int32_t TransliterationRule::getMatchDegree(const Replaceable& text,
+                                            int32_t start, int32_t limit,
+                                            int32_t cursor,
+                                            const TransliterationRuleData& data,
+                                            const UnicodeFilter* filter) const {
+    if (anteContext.length() != 0
+        && !regionMatches(text, start, limit, cursor - anteContext.length(),
+                          anteContext, data, filter)) {
+        return MISMATCH;
+    }
+    int32_t len = getRegionMatchLength(text, start, limit, cursor,
+                                       key, data, filter);
+    if (len < 0) {
+        return MISMATCH;
+    }
+    if (len < key.length()) {
+        return PARTIAL_MATCH;
+    }
+    if (postContext.length() == 0) {
+        return FULL_MATCH;
+    }
+    len = getRegionMatchLength(text, start, limit,
+                               cursor + key.length(),
+                               postContext, data, filter);
+    return (len < 0) ? MISMATCH
+                     : ((len == postContext.length()) ? FULL_MATCH
+                                                      : PARTIAL_MATCH);
+}
+
+/**
+ * Return true if a template matches the text.  The entire length of the
+ * template is compared to the text at the cursor.  As in
+ * <code>matches()</code>, the text being matched occupies a virtual buffer
+ * consisting of the contents of <code>result</code> concatenated to a
+ * substring of <code>text</code>.  See <code>matches()</code> for details.
+ * @param text the untranslated text
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param result translated text so far
+ * @param cursor position at which to translate next, an offset into result.
+ * If greater than or equal to result.length(), represents offset start +
+ * cursor - result.length() into text.
+ * @param templ the text to match against.  All characters must match.
+ * @param data a dictionary of variables mapping <code>Character</code>
+ * to <code>UnicodeSet</code>
+ * @param filter the filter.  Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator.  If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ * @return true if there is a match
+ */
+bool_t TransliterationRule::regionMatches(const UnicodeString& text,
+                                          int32_t start, int32_t limit,
+                                          const UnicodeString& result,
+                                          int32_t cursor,
+                                          const UnicodeString& templ,
+                                          const TransliterationRuleData& data,
+                                          const UnicodeFilter* filter) const {
+    int32_t rlen = result.length();
+    if (cursor < 0
+        || (cursor + templ.length()) > (rlen + limit - start)) {
+        return FALSE;
+    }
+    for (int32_t i=0; i<templ.length(); ++i, ++cursor) {
+        if (!charMatches(templ.charAt(i),
+                         cursor < rlen ? result.charAt(cursor)
+                                       : text.charAt(cursor - rlen + start),
+                         data, filter)) {
+            return FALSE;
+        }
+    }
+    return TRUE;
+}
+
+/**
+ * Return true if a template matches the text.  The entire length of the
+ * template is compared to the text at the cursor.
+ * @param text the text, both translated and untranslated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param cursor position at which to translate next, representing offset
+ * into text.  This value must be between <code>start</code> and
+ * <code>limit</code>.
+ * @param templ the text to match against.  All characters must match.
+ * @param data a dictionary of variables mapping <code>Character</code>
+ * to <code>UnicodeSet</code>
+ * @param filter the filter.  Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator.  If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ * @return true if there is a match
+ */
+bool_t TransliterationRule::regionMatches(const Replaceable& text,
+                                          int32_t start, int32_t limit,
+                                          int32_t cursor,
+                                          const UnicodeString& templ,
+                                          const TransliterationRuleData& data,
+                                          const UnicodeFilter* filter) const {
+    if (cursor < start
+        || (cursor + templ.length()) > limit) {
+        return FALSE;
+    }
+    for (int32_t i=0; i<templ.length(); ++i, ++cursor) {
+        if (!charMatches(templ.charAt(i), text.charAt(cursor),
+                         data, filter)) {
+            return FALSE;
+        }
+    }
+    return TRUE;
+}
+
+/**
+ * Return the number of characters of the text that match this rule.  If
+ * there is a mismatch, return -1.  If the text is not long enough to match
+ * any characters, return 0.
+ * @param text the text, both translated and untranslated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param cursor position at which to translate next, representing offset
+ * into text.  This value must be between <code>start</code> and
+ * <code>limit</code>.
+ * @param templ the text to match against.  All characters must match.
+ * @param data a dictionary of variables mapping <code>Character</code>
+ * to <code>UnicodeSet</code>
+ * @param filter the filter.  Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator.  If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ * @return -1 if there is a mismatch, 0 if the text is not long enough to
+ * match any characters, otherwise the number of characters of text that
+ * match this rule.
+ */
+int32_t TransliterationRule::getRegionMatchLength(const Replaceable& text,
+                                          int32_t start,
+                                          int32_t limit, int32_t cursor,
+                                          const UnicodeString& templ,
+                                          const TransliterationRuleData& data,
+                                          const UnicodeFilter* filter) const {
+    if (cursor < start) {
+        return -1;
+    }
+    int32_t i;
+    for (i=0; i<templ.length() && cursor<limit; ++i, ++cursor) {
+        if (!charMatches(templ.charAt(i), text.charAt(cursor),
+                         data, filter)) {
+            return -1;
+        }
+    }
+    return i;
+}
+
+/**
+ * Return true if the given key matches the given text.  This method
+ * accounts for the fact that the key character may represent a character
+ * set.  Note that the key and text characters may not be interchanged
+ * without altering the results.
+ * @param keyChar a character in the match key
+ * @param textChar a character in the text being transliterated
+ * @param data a dictionary of variables mapping <code>Character</code>
+ * to <code>UnicodeSet</code>
+ * @param filter the filter.  Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator.  If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ */
+bool_t TransliterationRule::charMatches(UChar keyChar, UChar textChar,
+                                        const TransliterationRuleData& data,
+                                        const UnicodeFilter* filter) const {
+    UnicodeSet* set = 0;
+    return (filter == 0 || filter->isIn(textChar)) &&
+        ((set = data.lookupSet(keyChar)) == 0) ?
+        keyChar == textChar : set->contains(textChar);
+}
--- a/icu4c/source/i18n/rbt_rule.h
+++ b/icu4c/source/i18n/rbt_rule.h
@ -0,0 +1,380 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#ifndef RBT_RULE_H
+#define RBT_RULE_H
+
+#include "unistr.h"
+
+class Replaceable;
+class TransliterationRuleData;
+class UnicodeFilter;
+
+/**
+ * A transliteration rule used by
+ * <code>RuleBasedTransliterator</code>.
+ * <code>TransliterationRule</code> is an immutable object.
+ *
+ * <p>A rule consists of an input pattern and an output string.  When
+ * the input pattern is matched, the output string is emitted.  The
+ * input pattern consists of zero or more characters which are matched
+ * exactly (the key) and optional context.  Context must match if it
+ * is specified.  Context may be specified before the key, after the
+ * key, or both.  The key, preceding context, and following context
+ * may contain variables.  Variables represent a set of Unicode
+ * characters, such as the letters <i>a</i> through <i>z</i>.
+ * Variables are detected by looking up each character in a supplied
+ * variable list to see if it has been so defined. 
+ *
+ * @author Alan Liu
+ */
+class TransliterationRule {
+
+public:
+
+    /**
+     * Constants returned by <code>getMatchDegree()</code> indicating
+     * the degree of match between the text and this rule.
+     * @see #getMatchDegree
+     */
+    enum {
+        /**
+         * Constant returned by <code>getMatchDegree()</code>
+         * indicating a mismatch between the text and this rule.  One
+         * or more characters of the context or key do not match the
+         * text.
+         */
+        MISMATCH,
+
+        /**
+         * Constant returned by <code>getMatchDegree()</code>
+         * indicating a partial match between the text and this rule.
+         * All characters of the text match the corresponding context
+         * or key, but more characters are required for a complete
+         * match.  There are some key or context characters at the end
+         * of the pattern that remain unmatched because the text isn't
+         * long enough.
+         */
+        PARTIAL_MATCH,
+        
+        /**
+         * Constant returned by <code>getMatchDegree()</code>
+         * indicating a complete match between the text and this rule.
+         * The text matches all context and key characters.
+         */
+        FULL_MATCH
+    };
+
+private:
+
+    /**
+     * The string that must be matched.
+     */
+    UnicodeString key;
+
+    /**
+     * The string that is emitted if the key, anteContext, and postContext
+     * are matched.
+     */
+    UnicodeString output;
+
+    /**
+     * The string that must match before the key.  If empty, then
+     * there is no matching requirement before the key.
+     */
+    UnicodeString anteContext;
+
+    /**
+     * The string that must match after the key.  If empty, then there
+     * is no matching requirement after the key.
+     */
+    UnicodeString postContext;
+
+    /**
+     * The position of the cursor after emitting the output string, from 0 to
+     * output.length().  For most rules with no special cursor specification,
+     * the cursorPos is output.length().
+     */
+    int32_t cursorPos;
+
+    /**
+     * A string used to implement masks().
+     * @see #freeze
+     */
+    UnicodeString* maskKey;
+
+public:
+
+    /**
+     * Construct a new rule with the given key, output text, and other
+     * attributes.  Zero, one, or two context strings may be specified.  A
+     * cursor position may be specified for the output text.
+     * @param key the string to match
+     * @param output the string to produce when the <code>key</code> is seen
+     * @param anteContext if not null and not empty, then it must be matched
+     * before the <code>key</code>
+     * @param postContext if not null and not empty, then it must be matched
+     * after the <code>key</code>
+     * @param cursorPos a position for the cursor after the <code>output</code>
+     * is emitted.  If less than zero, then the cursor is placed after the
+     * <code>output</code>; that is, -1 is equivalent to
+     * <code>output.length()</code>.  If greater than
+     * <code>output.length()</code> then an exception is thrown.
+     * @exception IllegalArgumentException if the cursor position is out of
+     * range.
+     */
+    TransliterationRule(const UnicodeString& theKey,
+                        const UnicodeString& theOutput,
+                        const UnicodeString& theAnteContext,
+                        const UnicodeString& thePostContext,
+                        int32_t theCursorPos,
+                        UErrorCode &status);
+
+    /**
+     * Destructor.
+     */
+    virtual ~TransliterationRule();
+
+    /**
+     * Return the length of the key.  Equivalent to <code>getKey().length()</code>.
+     * @return the length of the match key.
+     */
+    virtual int32_t getKeyLength() const;
+
+    /**
+     * Return the key.
+     * @return the match key.
+     */
+    virtual const UnicodeString& getKey() const;
+
+    /**
+     * Return the output string.
+     * @return the output string.
+     */
+    virtual const UnicodeString& getOutput() const;
+
+    /**
+     * Return the position of the cursor within the output string.
+     * @return a value from 0 to <code>getOutput().length()</code>, inclusive.
+     */
+    virtual int32_t getCursorPos() const;
+
+    /**
+     * Return the preceding context length.  This method is needed to
+     * support the <code>Transliterator</code> method
+     * <code>getMaximumContextLength()</code>.
+     */
+    virtual int32_t getAnteContextLength() const;
+
+    /**
+     * Return true if this rule masks another rule.  If r1 masks r2 then
+     * r1 matches any input string that r2 matches.  If r1 masks r2 and r2 masks
+     * r1 then r1 == r2.  Examples: "a>x" masks "ab>y".  "a>x" masks "a[b]>y".
+     * "[c]a>x" masks "[dc]a>y".
+     *
+     * <p>This method must not be called after freeze() is called.
+     */
+    virtual bool_t masks(const TransliterationRule& r2) const;
+
+    /**
+     * Free up space.  Once this method is called, masks() must NOT be called.
+     * If it is called, an exception will be thrown.
+     */
+    virtual void freeze();
+
+    /**
+     * Return true if this rule matches the given text.  The text being matched
+     * occupies a virtual buffer consisting of the contents of
+     * <code>result</code> concatenated to a substring of <code>text</code>.
+     * The substring is specified by <code>start</code> and <code>limit</code>.
+     * The value of <code>cursor</code> is an index into this virtual buffer,
+     * from 0 to the length of the buffer.  In terms of the parameters,
+     * <code>cursor</code> must be between 0 and <code>result.length() + limit -
+     * start</code>.
+     * @param text the untranslated text
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param result translated text so far
+     * @param cursor position at which to translate next, an offset into result.
+     * If greater than or equal to result.length(), represents offset start +
+     * cursor - result.length() into text.
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     */
+    virtual bool_t matches(const UnicodeString& text,
+                           int32_t start, int32_t limit,
+                           const UnicodeString& result,
+                           int32_t cursor,
+                           const TransliterationRuleData& data,
+                           const UnicodeFilter* filter) const;
+
+    /**
+     * Return true if this rule matches the given text.
+     * @param text the text, both translated and untranslated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param cursor position at which to translate next, representing offset
+     * into text.  This value must be between <code>start</code> and
+     * <code>limit</code>.
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     */
+    virtual bool_t matches(const Replaceable& text,
+                           int32_t start, int32_t limit,
+                           int32_t cursor,
+                           const TransliterationRuleData& data,
+                           const UnicodeFilter* filter) const;
+
+    /**
+     * Return the degree of match between this rule and the given text.  The
+     * degree of match may be mismatch, a partial match, or a full match.  A
+     * mismatch means at least one character of the text does not match the
+     * context or key.  A partial match means some context and key characters
+     * match, but the text is not long enough to match all of them.  A full
+     * match means all context and key characters match.
+     * @param text the text, both translated and untranslated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param cursor position at which to translate next, representing offset
+     * into text.  This value must be between <code>start</code> and
+     * <code>limit</code>.
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return one of <code>MISMATCH</code>, <code>PARTIAL_MATCH</code>, or
+     * <code>FULL_MATCH</code>.
+     * @see #MISMATCH
+     * @see #PARTIAL_MATCH
+     * @see #FULL_MATCH
+     */
+    virtual int32_t getMatchDegree(const Replaceable& text,
+                                   int32_t start, int32_t limit,
+                                   int32_t cursor,
+                                   const TransliterationRuleData& data,
+                                   const UnicodeFilter* filter) const;
+
+    /**
+     * Return true if a template matches the text.  The entire length of the
+     * template is compared to the text at the cursor.  As in
+     * <code>matches()</code>, the text being matched occupies a virtual buffer
+     * consisting of the contents of <code>result</code> concatenated to a
+     * substring of <code>text</code>.  See <code>matches()</code> for details.
+     * @param text the untranslated text
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param result translated text so far
+     * @param cursor position at which to translate next, an offset into result.
+     * If greater than or equal to result.length(), represents offset start +
+     * cursor - result.length() into text.
+     * @param templ the text to match against.  All characters must match.
+     * @param data a dictionary of variables mapping <code>Character</code>
+     * to <code>UnicodeSet</code>
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return true if there is a match
+     */
+    virtual bool_t regionMatches(const UnicodeString& text,
+                                 int32_t start, int32_t limit,
+                                 const UnicodeString& result,
+                                 int32_t cursor,
+                                 const UnicodeString& templ,
+                                 const TransliterationRuleData& data,
+                                 const UnicodeFilter* filter) const;
+
+    /**
+     * Return true if a template matches the text.  The entire length of the
+     * template is compared to the text at the cursor.
+     * @param text the text, both translated and untranslated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param cursor position at which to translate next, representing offset
+     * into text.  This value must be between <code>start</code> and
+     * <code>limit</code>.
+     * @param templ the text to match against.  All characters must match.
+     * @param data a dictionary of variables mapping <code>Character</code>
+     * to <code>UnicodeSet</code>
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return true if there is a match
+     */
+    virtual bool_t regionMatches(const Replaceable& text,
+                                 int32_t start, int32_t limit,
+                                 int32_t cursor,
+                                 const UnicodeString& templ,
+                                 const TransliterationRuleData& data,
+                                 const UnicodeFilter* filter) const;
+
+    /**
+     * Return the number of characters of the text that match this rule.  If
+     * there is a mismatch, return -1.  If the text is not long enough to match
+     * any characters, return 0.
+     * @param text the text, both translated and untranslated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param cursor position at which to translate next, representing offset
+     * into text.  This value must be between <code>start</code> and
+     * <code>limit</code>.
+     * @param templ the text to match against.  All characters must match.
+     * @param data a dictionary of variables mapping <code>Character</code>
+     * to <code>UnicodeSet</code>
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return -1 if there is a mismatch, 0 if the text is not long enough to
+     * match any characters, otherwise the number of characters of text that
+     * match this rule.
+     */
+    virtual int32_t getRegionMatchLength(const Replaceable& text, int32_t start,
+                                         int32_t limit, int32_t cursor,
+                                         const UnicodeString& templ,
+                                         const TransliterationRuleData& data,
+                                         const UnicodeFilter* filter) const;
+    
+    /**
+     * Return true if the given key matches the given text.  This method
+     * accounts for the fact that the key character may represent a character
+     * set.  Note that the key and text characters may not be interchanged
+     * without altering the results.
+     * @param keyChar a character in the match key
+     * @param textChar a character in the text being transliterated
+     * @param data a dictionary of variables mapping <code>Character</code>
+     * to <code>UnicodeSet</code>
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     */
+    virtual bool_t charMatches(UChar keyChar, UChar textChar,
+                               const TransliterationRuleData& data,
+                               const UnicodeFilter* filter) const;
+};
+
+#endif
--- a/icu4c/source/i18n/rbt_set.cpp
+++ b/icu4c/source/i18n/rbt_set.cpp
@ -0,0 +1,217 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#include "rbt_set.h"
+#include "rbt_rule.h"
+#include "unistr.h"
+
+/* Note: There was an old implementation that indexed by first letter of
+ * key.  Problem with this is that key may not have a meaningful first
+ * letter; e.g., {Lu}>*.  One solution is to keep a separate vector of all
+ * rules whose intial key letter is a category variable.  However, the
+ * problem is that they must be kept in order with respect to other rules.
+ * One solution -- add a sequence number to each rule.  Do the usual
+ * first-letter lookup, and also a lookup from the spare bin with rules like
+ * {Lu}>*.  Take the lower sequence number.  This seems complex and not
+ * worth the trouble, but we may revisit this later.  For documentation (or
+ * possible resurrection) the old code is included below, commented out
+ * with the remark "// OLD INDEXED IMPLEMENTATION".  Under the old
+ * implementation, <code>rules</code> is a Hashtable, not a Vector.
+ */
+
+/**
+ * Construct a new empty rule set.
+ */
+TransliterationRuleSet::TransliterationRuleSet() {
+    maxContextLength = 0;
+}
+
+/**
+ * Return the maximum context length.
+ * @return the length of the longest preceding context.
+ */
+int32_t TransliterationRuleSet::getMaximumContextLength() const {
+    return maxContextLength;
+}
+
+/**
+ * Add a rule to this set.  Rules are added in order, and order is
+ * significant.
+ *
+ * <p>Once freeze() is called, this method must not be called.
+ * @param rule the rule to add
+ */
+void TransliterationRuleSet::addRule(TransliterationRule* adoptedRule,
+                                     UErrorCode& status) {
+    
+    // Build time, no checking  : 3562 ms
+    // Build time, with checking: 6234 ms
+
+    if (U_FAILURE(status)) {
+        delete adoptedRule;
+        return;
+    }
+
+    for (int32_t i=0; i<rules.size(); ++i) {
+        TransliterationRule* r = (TransliterationRule*) rules.elementAt(i);
+        if (r->masks(*adoptedRule)) {
+            //throw new IllegalArgumentException("Rule " + rule +
+            //                                   " must precede " + r);
+            status = U_ILLEGAL_ARGUMENT_ERROR;
+            delete adoptedRule;
+            return;
+        }
+    }
+
+    rules.addElement(adoptedRule);
+    int32_t len;
+    if ((len = adoptedRule->getAnteContextLength()) > maxContextLength) {
+        maxContextLength = len;
+    }
+}
+
+/**
+ * Free up space.  Once this method is called, addRule() must NOT
+ * be called again.
+ */
+void TransliterationRuleSet::freeze() {
+    for (int32_t i=0; i<rules.size(); ++i) {
+        ((TransliterationRule*) rules.elementAt(i))->freeze();
+    }
+}
+
+/**
+ * Attempt to find a matching rule at the specified point in the text.  The
+ * text being matched occupies a virtual buffer consisting of the contents
+ * of <code>result</code> concatenated to a substring of <code>text</code>.
+ * The substring is specified by <code>start</code> and <code>limit</code>.
+ * The value of <code>cursor</code> is an index into this virtual buffer,
+ * from 0 to the length of the buffer.  In terms of the parameters,
+ * <code>cursor</code> must be between 0 and <code>result.length() + limit -
+ * start</code>.
+ * @param text the untranslated text
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param result translated text
+ * @param cursor position at which to translate next, an offset into result.
+ * If greater than or equal to result.length(), represents offset start +
+ * cursor - result.length() into text.
+ * @param data a dictionary mapping variables to the sets they
+ * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
+ * @param filter the filter.  Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator.  If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ * @return the matching rule, or null if none found.
+ */
+TransliterationRule*
+TransliterationRuleSet::findMatch(const UnicodeString& text,
+                                  int32_t start, int32_t limit,
+                                  const UnicodeString& result,
+                                  int32_t cursor,
+                                  const TransliterationRuleData& data,
+                                  const UnicodeFilter* filter) const {
+    for (int32_t i=0; i<rules.size(); ++i) {
+        TransliterationRule* rule =
+            (TransliterationRule*) rules.elementAt(i);
+        if (rule->matches(text, start, limit, result,
+                          cursor, data, filter)) {
+            return rule;
+        }
+    }
+    return 0;
+}
+
+/**
+ * Attempt to find a matching rule at the specified point in the text.
+ * @param text the text, both translated and untranslated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param cursor position at which to translate next, representing offset
+ * into text.  This value must be between <code>start</code> and
+ * <code>limit</code>.
+ * @param data a dictionary mapping variables to the sets they
+ * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
+ * @param filter the filter.  Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator.  If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ * @return the matching rule, or null if none found.
+ */
+TransliterationRule*
+TransliterationRuleSet::findMatch(const Replaceable& text,
+                                  int32_t start, int32_t limit,
+                                  int32_t cursor,
+                                  const TransliterationRuleData& data,
+                                  const UnicodeFilter* filter) const {
+    for (int32_t i=0; i<rules.size(); ++i) {
+        TransliterationRule* rule =
+            (TransliterationRule*) rules.elementAt(i);
+        if (rule->matches(text, start, limit, cursor,
+                          data, filter)) {
+            return rule;
+        }
+    }
+    return 0;
+}
+
+/**
+ * Attempt to find a matching rule at the specified point in the text.
+ * Unlike <code>findMatch()</code>, this method does an incremental match.
+ * An incremental match requires that there be no partial matches that might
+ * pre-empt the full match that is found.  If there are partial matches,
+ * then null is returned.  A non-null result indicates that a full match has
+ * been found, and that it cannot be pre-empted by a partial match
+ * regardless of what additional text is added to the translation buffer.
+ * @param text the text, both translated and untranslated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param cursor position at which to translate next, representing offset
+ * into text.  This value must be between <code>start</code> and
+ * <code>limit</code>.
+ * @param data a dictionary mapping variables to the sets they
+ * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
+ * @param partial output parameter.  <code>partial[0]</code> is set to
+ * true if a partial match is returned.
+ * @param filter the filter.  Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator.  If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ * @return the matching rule, or null if none found, or if the text buffer
+ * does not have enough text yet to unambiguously match a rule.
+ */
+TransliterationRule*
+TransliterationRuleSet::findIncrementalMatch(const Replaceable& text,
+                                             int32_t start,
+                                             int32_t limit, int32_t cursor,
+                                             const TransliterationRuleData& data,
+                                             bool_t& isPartial,
+                                             const UnicodeFilter* filter) const {
+    isPartial = FALSE;
+    for (int32_t i=0; i<rules.size(); ++i) {
+        TransliterationRule* rule =
+            (TransliterationRule*) rules.elementAt(i);
+        int32_t match = rule->getMatchDegree(text, start, limit, cursor,
+                                             data, filter);
+        switch (match) {
+        case TransliterationRule::FULL_MATCH:
+            return rule;
+        case TransliterationRule::PARTIAL_MATCH:
+            isPartial = TRUE;
+            return 0;
+        }
+    }
+    return 0;
+}
--- a/icu4c/source/i18n/rbt_set.h
+++ b/icu4c/source/i18n/rbt_set.h
@ -0,0 +1,164 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#ifndef RBT_SET_H
+#define RBT_SET_H
+
+#include "uvector.h"
+
+class Replaceable;
+class TransliterationRule;
+class TransliterationRuleData;
+class UnicodeFilter;
+class UnicodeString;
+
+/**
+ * A set of rules for a <code>RuleBasedTransliterator</code>.  This set encodes
+ * the transliteration in one direction from one set of characters or short
+ * strings to another.  A <code>RuleBasedTransliterator</code> consists of up to
+ * two such sets, one for the forward direction, and one for the reverse.
+ *
+ * <p>A <code>TransliterationRuleSet</code> has one important operation, that of
+ * finding a matching rule at a given point in the text.  This is accomplished
+ * by the <code>findMatch()</code> method.
+ *
+ * @author Alan Liu
+ */
+class TransliterationRuleSet {
+    /**
+     * Vector of rules, in the order added.
+     */
+    UVector rules;
+
+    /**
+     * Length of the longest preceding context
+     */
+    int32_t maxContextLength;
+
+public:
+
+    /**
+     * Construct a new empty rule set.
+     */
+    TransliterationRuleSet();
+
+    /**
+     * Return the maximum context length.
+     * @return the length of the longest preceding context.
+     */
+    virtual int32_t getMaximumContextLength() const;
+
+    /**
+     * Add a rule to this set.  Rules are added in order, and order is
+     * significant.
+     *
+     * <p>Once freeze() is called, this method must not be called.
+     * @param rule the rule to add
+     */
+    virtual void addRule(TransliterationRule* adoptedRule,
+                         UErrorCode& status);
+
+    /**
+     * Free up space.  Once this method is called, addRule() must NOT
+     * be called again.
+     */
+    virtual void freeze();
+
+    /**
+     * Attempt to find a matching rule at the specified point in the text.  The
+     * text being matched occupies a virtual buffer consisting of the contents
+     * of <code>result</code> concatenated to a substring of <code>text</code>.
+     * The substring is specified by <code>start</code> and <code>limit</code>.
+     * The value of <code>cursor</code> is an index into this virtual buffer,
+     * from 0 to the length of the buffer.  In terms of the parameters,
+     * <code>cursor</code> must be between 0 and <code>result.length() + limit -
+     * start</code>.
+     * @param text the untranslated text
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param result tranlated text
+     * @param cursor position at which to translate next, an offset into result.
+     * If greater than or equal to result.length(), represents offset start +
+     * cursor - result.length() into text.
+     * @param data a dictionary mapping variables to the sets they
+     * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return the matching rule, or null if none found.
+     */
+    virtual TransliterationRule* findMatch(const UnicodeString& text,
+                                           int32_t start, int32_t limit,
+                                           const UnicodeString& result,
+                                           int32_t cursor,
+                                           const TransliterationRuleData& data,
+                                           const UnicodeFilter* filter) const;
+
+    /**
+     * Attempt to find a matching rule at the specified point in the text.
+     * @param text the text, both translated and untranslated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param cursor position at which to translate next, representing offset
+     * into text.  This value must be between <code>start</code> and
+     * <code>limit</code>.
+     * @param data a dictionary mapping variables to the sets they
+     * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return the matching rule, or null if none found.
+     */
+    virtual TransliterationRule* findMatch(const Replaceable& text,
+                                           int32_t start, int32_t limit,
+                                           int32_t cursor,
+                                           const TransliterationRuleData& data,
+                                           const UnicodeFilter* filter) const;
+    
+    /**
+     * Attempt to find a matching rule at the specified point in the text.
+     * Unlike <code>findMatch()</code>, this method does an incremental match.
+     * An incremental match requires that there be no partial matches that might
+     * pre-empt the full match that is found.  If there are partial matches,
+     * then null is returned.  A non-null result indicates that a full match has
+     * been found, and that it cannot be pre-empted by a partial match
+     * regardless of what additional text is added to the translation buffer.
+     * @param text the text, both translated and untranslated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param cursor position at which to translate next, representing offset
+     * into text.  This value must be between <code>start</code> and
+     * <code>limit</code>.
+     * @param data a dictionary mapping variables to the sets they
+     * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
+     * @param partial output parameter.  <code>partial[0]</code> is set to
+     * true if a partial match is returned.
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return the matching rule, or null if none found, or if the text buffer
+     * does not have enough text yet to unambiguously match a rule.
+     */
+    virtual TransliterationRule* findIncrementalMatch(const Replaceable& text,
+                                              int32_t start,
+                                              int32_t limit, int32_t cursor,
+                                              const TransliterationRuleData& data,
+                                              bool_t& isPartial,
+                                              const UnicodeFilter* filter) const;
+};
+#endif
--- a/icu4c/source/i18n/translit.cpp
+++ b/icu4c/source/i18n/translit.cpp
@ -0,0 +1,879 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#include "translit.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "hextouni.h"
+#include "locid.h"
+#include "msgfmt.h"
+#include "mutex.h"
+#include "rbt_data.h"
+#include "rbt_pars.h"
+#include "rep.h"
+#include "resbund.h"
+#include "uhash.h"
+#include "unifilt.h"
+#include "unitohex.h"
+
+/**
+ * Dictionary of known transliterators.  Keys are <code>String</code>
+ * names, values are one of the following:
+ *
+ * <ul><li><code>Transliterator</code> objects
+ *
+ * <li><code>RULE_BASED_PLACEHOLDER</code>, in which case the ID
+ * will have its first '-' removed and be appended to
+ * RB_RULE_BASED_PREFIX to form a resource bundle name from which
+ * the RB_RULE key is looked up to obtain the rule.
+ *
+ * <li><code>REVERSE_RULE_BASED_PLACEHOLDER</code>.  Like
+ * <code>RULE_BASED_PLACEHOLDER</code>, except the entity names in
+ * the ID are reversed, and the argument
+ * RuleBasedTransliterator.REVERSE is pased to the
+ * RuleBasedTransliterator constructor.
+ * </ul>
+ */
+UHashtable* Transliterator::cache = 0;
+
+/**
+ * The mutex controlling access to the cache.
+ */
+UMTX Transliterator::cacheMutex = NULL;
+
+/**
+ * When set to TRUE, the cache has been initialized.  Any code must
+ * check this boolean before accessing the cache, and if the boolean
+ * is FALSE, it must call initializeCache().  We do this form of lazy
+ * evaluation for two reasons: (1) so we don't initialize if we don't
+ * have to (i.e., if no one is using Transliterator, but has included
+ * the code as part of a shared library, and (2) to avoid static
+ * intialization problems.
+ */
+bool_t Transliterator::cacheInitialized = FALSE;
+
+/**
+ * Prefix for resource bundle key for the display name for a
+ * transliterator.  The ID is appended to this to form the key.
+ * The resource bundle value should be a String.
+ */
+const char* Transliterator::RB_DISPLAY_NAME_PREFIX = "T:";
+
+/**
+ * Resource bundle key for display name pattern.
+ * The resource bundle value should be a String forming a
+ * MessageFormat pattern, e.g.:
+ * "{0,choice,0#|1#{1} Transliterator|2#{1} to {2} Transliterator}".
+ */
+const char* Transliterator::RB_DISPLAY_NAME_PATTERN =
+    "TransliteratorNamePattern";
+
+/**
+ * Resource bundle key for the list of RuleBasedTransliterator IDs.
+ * The resource bundle value should be a String[] with each element
+ * being a valid ID.  The ID will be appended to RB_RULE_BASED_PREFIX
+ * to obtain the class name in which the RB_RULE key will be sought.
+ */
+const char* Transliterator::RB_RULE_BASED_IDS =
+    "RuleBasedTransliteratorIDs";
+
+/**
+ * Resource bundle key for the RuleBasedTransliterator rule.
+ */
+const char* Transliterator::RB_RULE = "Rule";
+
+/**
+ * Default constructor.
+ * @param theID the string identifier for this transliterator
+ * @param theFilter the filter.  Any character for which
+ * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+ * altered by this transliterator.  If <tt>filter</tt> is
+ * <tt>null</tt> then no filtering is applied.
+ */
+Transliterator::Transliterator(const UnicodeString& theID,
+                               UnicodeFilter* adoptedFilter) :
+    ID(theID), filter(adoptedFilter) {}
+
+/**
+ * Destructor.
+ */
+Transliterator::~Transliterator() {
+    delete filter;
+}
+
+/**
+ * Copy constructor.
+ */
+Transliterator::Transliterator(const Transliterator& other) :
+    ID(other.ID), filter(0) {
+    if (other.filter != 0) {
+        // We own the filter, so we must have our own copy
+        filter = other.filter->clone();
+    }
+}
+
+/**
+ * Assignment operator.
+ */
+Transliterator& Transliterator::operator=(const Transliterator& other) {
+    ID = other.ID;
+    filter = (other.filter == 0) ?
+        0 : other.filter->clone();
+    return *this;
+}
+
+/**
+ * Transliterates the segment of a string that begins at the character
+ * at offset <code>start</code> and extends to the character at offset
+ * <code>limit - 1</code>.  A default implementation is provided here;
+ * subclasses should provide a more efficient implementation if
+ * possible.
+ * @param text the string to be transliterated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @param result buffer to receive the transliterated text; previous
+ * contents are discarded
+ */
+void Transliterator::transliterate(const UnicodeString& text,
+                                   int32_t start, int32_t limit,
+                                   UnicodeString& result) const {
+    /* This is a default implementation that should be replaced by
+     * a more efficient subclass implementation if possible.
+     */
+    text.extractBetween(start, limit, result);
+    transliterate(result);
+}
+
+/**
+ * Transliterates an entire string. Convenience method.
+ * @param text the string to be transliterated
+ * @param result buffer to receive the transliterated text; previous
+ * contents are discarded
+ */
+void Transliterator::transliterate(const UnicodeString& text,
+                                   UnicodeString& result) const {
+    transliterate(text, 0, text.length(), result);
+}
+
+/**
+ * Transliterates an entire string in place. Convenience method.
+ * @param text the string to be transliterated
+ */
+void Transliterator::transliterate(Replaceable& text) const {
+    transliterate(text, 0, text.length());
+}
+
+/**
+ * Transliterates the portion of the text buffer that can be
+ * transliterated unambiguosly after new text has been inserted,
+ * typically as a result of a keyboard event.  The new text in
+ * <code>insertion</code> will be inserted into <code>text</code>
+ * at <code>index[LIMIT]</code>, advancing
+ * <code>index[LIMIT]</code> by <code>insertion.length()</code>.
+ * Then the transliterator will try to transliterate characters of
+ * <code>text</code> between <code>index[CURSOR]</code> and
+ * <code>index[LIMIT]</code>.  Characters before
+ * <code>index[CURSOR]</code> will not be changed.
+ *
+ * <p>Upon return, values in <code>index[]</code> will be updated.
+ * <code>index[START]</code> will be advanced to the first
+ * character that future calls to this method will read.
+ * <code>index[CURSOR]</code> and <code>index[LIMIT]</code> will
+ * be adjusted to delimit the range of text that future calls to
+ * this method may change.
+ *
+ * <p>Typical usage of this method begins with an initial call
+ * with <code>index[START]</code> and <code>index[LIMIT]</code>
+ * set to indicate the portion of <code>text</code> to be
+ * transliterated, and <code>index[CURSOR] == index[START]</code>.
+ * Thereafter, <code>index[]</code> can be used without
+ * modification in future calls, provided that all changes to
+ * <code>text</code> are made via this method.
+ *
+ * <p>This method assumes that future calls may be made that will
+ * insert new text into the buffer.  As a result, it only performs
+ * unambiguous transliterations.  After the last call to this
+ * method, there may be untransliterated text that is waiting for
+ * more input to resolve an ambiguity.  In order to perform these
+ * pending transliterations, clients should call {@link
+ * #finishKeyboardTransliteration} after the last call to this
+ * method has been made.
+ * 
+ * @param text the buffer holding transliterated and untransliterated text
+ * @param index an array of three integers.
+ *
+ * <ul><li><code>index[START]</code>: the beginning index,
+ * inclusive; <code>0 <= index[START] <= index[LIMIT]</code>.
+ *
+ * <li><code>index[LIMIT]</code>: the ending index, exclusive;
+ * <code>index[START] <= index[LIMIT] <= text.length()</code>.
+ * <code>insertion</code> is inserted at
+ * <code>index[LIMIT]</code>.
+ *
+ * <li><code>index[CURSOR]</code>: the next character to be
+ * considered for transliteration; <code>index[START] <=
+ * index[CURSOR] <= index[LIMIT]</code>.  Characters before
+ * <code>index[CURSOR]</code> will not be changed by future calls
+ * to this method.</ul>
+ *
+ * @param insertion text to be inserted and possibly
+ * transliterated into the translation buffer at
+ * <code>index[LIMIT]</code>.  If <code>null</code> then no text
+ * is inserted.
+ * @see #START
+ * @see #LIMIT
+ * @see #CURSOR
+ * @see #handleKeyboardTransliterate
+ * @exception IllegalArgumentException if <code>index[]</code>
+ * is invalid
+ */
+void Transliterator::keyboardTransliterate(Replaceable& text,
+                                           int32_t index[3],
+                                           const UnicodeString& insertion,
+                                           UErrorCode &status) const {
+    _keyboardTransliterate(text, index, &insertion, status);
+}
+
+/**
+ * Transliterates the portion of the text buffer that can be
+ * transliterated unambiguosly after a new character has been
+ * inserted, typically as a result of a keyboard event.  This is a
+ * convenience method; see {@link
+ * #keyboardTransliterate(Replaceable, int[], String)} for details.
+ * @param text the buffer holding transliterated and
+ * untransliterated text
+ * @param index an array of three integers.  See {@link
+ * #keyboardTransliterate(Replaceable, int[], String)}.
+ * @param insertion text to be inserted and possibly
+ * transliterated into the translation buffer at
+ * <code>index[LIMIT]</code>.
+ * @see #keyboardTransliterate(Replaceable, int[], String)
+ */
+void Transliterator::keyboardTransliterate(Replaceable& text,
+                                           int32_t index[3],
+                                           UChar insertion,
+                                           UErrorCode& status) const {
+    UnicodeString str(insertion);
+    _keyboardTransliterate(text, index, &str, status);
+}
+
+/**
+ * Transliterates the portion of the text buffer that can be
+ * transliterated unambiguosly.  This is a convenience method; see
+ * {@link #keyboardTransliterate(Replaceable, int[], String)} for
+ * details.
+ * @param text the buffer holding transliterated and
+ * untransliterated text
+ * @param index an array of three integers.  See {@link
+ * #keyboardTransliterate(Replaceable, int[], String)}.
+ * @see #keyboardTransliterate(Replaceable, int[], String)
+ */
+void Transliterator::keyboardTransliterate(Replaceable& text,
+                                           int32_t index[3],
+                                           UErrorCode& status) const {
+    _keyboardTransliterate(text, index, 0, status);
+}
+
+/**
+ * Finishes any pending transliterations that were waiting for
+ * more characters.  Clients should call this method as the last
+ * call after a sequence of one or more calls to
+ * <code>keyboardTransliterate()</code>.
+ * @param text the buffer holding transliterated and
+ * untransliterated text.
+ * @param index the array of indices previously passed to {@link
+ * #keyboardTransliterate}
+ */
+void Transliterator::finishKeyboardTransliteration(Replaceable& text,
+                                                   int32_t index[3]) const {
+    transliterate(text, index[START], index[LIMIT]);
+}
+
+/**
+ * This internal method does keyboard transliteration.  If the
+ * 'insertion' is non-null then we append it to 'text' before
+ * proceeding.  This method calls through to the pure virtual
+ * framework method handleKeyboardTransliterate() to do the actual
+ * work.
+ */
+void Transliterator::_keyboardTransliterate(Replaceable& text,
+                                            int32_t index[3],
+                                            const UnicodeString* insertion,
+                                            UErrorCode &status) const {
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    if (index[START] < 0 ||
+        index[LIMIT] > text.length() ||
+        index[CURSOR] < index[START] ||
+        index[CURSOR] > index[LIMIT]) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+
+    int32_t originalStart = index[START];
+    if (insertion != 0) {
+        text.handleReplaceBetween(index[LIMIT], index[LIMIT], *insertion);
+        index[LIMIT] += insertion->length();
+    }
+
+    handleKeyboardTransliterate(text, index);
+
+    index[START] = icu_max(index[CURSOR] - getMaximumContextLength(),
+                           originalStart);
+}
+
+/**
+ * Returns the length of the longest context required by this transliterator.
+ * This is <em>preceding</em> context.  The default implementation supplied
+ * by <code>Transliterator</code> returns zero; subclasses
+ * that use preceding context should override this method to return the
+ * correct value.  For example, if a transliterator translates "ddd" (where
+ * d is any digit) to "555" when preceded by "(ddd)", then the preceding
+ * context length is 5, the length of "(ddd)".
+ *
+ * @return The maximum number of preceding context characters this
+ * transliterator needs to examine
+ */
+int32_t Transliterator::getMaximumContextLength() const {
+    return 0;
+}
+
+/**
+ * Returns a programmatic identifier for this transliterator.
+ * If this identifier is passed to <code>getInstance()</code>, it
+ * will return this object, if it has been registered.
+ * @see #registerInstance
+ * @see #getAvailableIDs
+ */
+const UnicodeString& Transliterator::getID() const {
+    return ID;
+}
+
+/**
+ * Returns a name for this transliterator that is appropriate for
+ * display to the user in the default locale.  See {@link
+ * #getDisplayName(Locale)} for details.
+ */
+UnicodeString& Transliterator::getDisplayName(UnicodeString& result) const {
+    return getDisplayName(Locale::getDefault(), result);
+}
+
+/**
+ * Returns a name for this transliterator that is appropriate for
+ * display to the user in the given locale.  This name is taken
+ * from the locale resource data in the standard manner of the
+ * <code>java.text</code> package.
+ *
+ * <p>If no localized names exist in the system resource bundles,
+ * a name is synthesized using a localized
+ * <code>MessageFormat</code> pattern from the resource data.  The
+ * arguments to this pattern are an integer followed by one or two
+ * strings.  The integer is the number of strings, either 1 or 2.
+ * The strings are formed by splitting the ID for this
+ * transliterator at the first '-'.  If there is no '-', then the
+ * entire ID forms the only string.
+ * @param inLocale the Locale in which the display name should be
+ * localized.
+ * @see java.text.MessageFormat
+ */
+UnicodeString& Transliterator::getDisplayName(const Locale& inLocale,
+                                              UnicodeString& result) const {
+    UErrorCode status = U_ZERO_ERROR;
+    ResourceBundle bundle(Locale::getDataDirectory(), inLocale, status);
+    // Suspend checking status until later...
+
+    UnicodeString key(RB_DISPLAY_NAME_PREFIX);
+    key.append(ID);
+
+    // Try to retrieve a UnicodeString* from the bundle.  The result,
+    // if any, should NOT be deleted.
+    const UnicodeString* resString = bundle.getString(key, status);
+
+    if (U_SUCCESS(status) && resString != 0) {
+        return result = *resString; // [sic] assign & return
+    }
+
+    // We have failed to get a name from the locale data.  This is
+    // typical, since most transliterators will not have localized
+    // name data.  The next step is to retrieve the MessageFormat
+    // pattern from the locale data and to use it to synthesize the
+    // name from the ID.
+
+    status = U_ZERO_ERROR;
+    resString = bundle.getString(RB_DISPLAY_NAME_PATTERN, status);
+
+    if (U_SUCCESS(status) && resString != 0) {
+        MessageFormat msg(*resString, inLocale, status);
+        // Suspend checking status until later...
+
+        // We pass either 2 or 3 Formattable objects to msg.
+        Formattable args[3];
+        int32_t i = ID.indexOf((UChar)'-');
+        int32_t nargs;
+        if (i < 0) {
+            args[0].setLong(1); // # of args to follow
+            args[1].setString(ID);
+            nargs = 2;
+        } else {
+            UnicodeString left, right;
+            ID.extractBetween(0, i, left);
+            ID.extractBetween(i+1, ID.length(), right);
+            args[0].setLong(2); // # of args to follow
+            args[1].setString(left);
+            args[2].setString(right);
+            nargs = 3;
+        }
+        FieldPosition pos; // ignored by msg
+        msg.format(args, nargs, result, pos, status);
+        if (U_SUCCESS(status)) {
+            return result;
+        }
+    }
+
+    // We should not reach this point unless there is something
+    // wrong with the build or the RB_DISPLAY_NAME_PATTERN has
+    // been deleted from the root RB_LOCALE_ELEMENTS resource.
+    result = ID;
+    return result;
+}
+
+/**
+ * Returns the filter used by this transliterator, or <tt>null</tt>
+ * if this transliterator uses no filter.  Caller musn't delete
+ * the result!
+ */
+const UnicodeFilter* Transliterator::getFilter() const {
+    return filter;
+}
+
+/**
+ * Changes the filter used by this transliterator.  If the filter
+ * is set to <tt>null</tt> then no filtering will occur.
+ *
+ * <p>Callers must take care if a transliterator is in use by
+ * multiple threads.  The filter should not be changed by one
+ * thread while another thread may be transliterating.
+ */
+void Transliterator::adoptFilter(UnicodeFilter* filterToAdopt) {
+    delete filter;
+    filter = filterToAdopt;
+}
+
+/**
+ * Returns this transliterator's inverse.  See the class
+ * documentation for details.  This implementation simply inverts
+ * the two entities in the ID and attempts to retrieve the
+ * resulting transliterator.  That is, if <code>getID()</code>
+ * returns "A-B", then this method will return the result of
+ * <code>getInstance("B-A")</code>, or <code>null</code> if that
+ * call fails.
+ *
+ * <p>This method does not take filtering into account.  The
+ * returned transliterator will have no filter.
+ *
+ * <p>Subclasses with knowledge of their inverse may wish to
+ * override this method.
+ *
+ * @return a transliterator that is an inverse, not necessarily
+ * exact, of this transliterator, or <code>null</code> if no such
+ * transliterator is registered.
+ * @see #registerInstance
+ */
+Transliterator* Transliterator::createInverse() const {
+    int32_t i = ID.indexOf((UChar)'-');
+    if (i >= 0) {
+        UnicodeString inverseID, right;
+        ID.extractBetween(i+1, ID.length(), inverseID);
+        ID.extractBetween(0, i, right);
+        inverseID.append((UChar)'-').append(right);
+        return _createInstance(inverseID);
+    }
+    return 0;
+}
+
+/**
+ * Returns a <code>Transliterator</code> object given its ID.
+ * The ID must be either a system transliterator ID or a ID registered
+ * using <code>registerInstance()</code>.
+ *
+ * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
+ * @return A <code>Transliterator</code> object with the given ID
+ * @exception IllegalArgumentException if the given ID is invalid.
+ * @see #registerInstance
+ * @see #getAvailableIDs
+ * @see #getID
+ */
+Transliterator* Transliterator::createInstance(const UnicodeString& ID) {
+    Transliterator* t = _createInstance(ID);
+    return t;
+}
+
+/**
+ * This is the path to the subdirectory within the locale data
+ * directory that contains the rule-based transliterator resource
+ * bundle files.  This is constructed dynamically the first time
+ * Transliterator::getDataDirectory() is called.
+ */
+char* Transliterator::DATA_DIR = 0;
+
+/**
+ * This is the name of a subdirectory within the locale data directory
+ * that contains the rule-based transliterator resource bundle files.
+ */
+const char* Transliterator::RESOURCE_SUB_DIR = "translit";
+
+/**
+ * Returns the directory in which the transliterator resource bundle
+ * files are located.  This is a subdirectory, named RESOURCE_SUB_DIR,
+ * under Locale::getDataDirectory().  It ends in a path separator.
+ */
+const char* Transliterator::getDataDirectory() {
+    if (DATA_DIR == 0) {
+        Mutex lock; // Okay to use the global mutex here
+        if (DATA_DIR == 0) {
+            /* Construct the transliterator data directory path.  This
+             * is a subdirectory of the locale data directory.  For
+             * now, we get the separator from the data directory
+             * assuming a path separator of one character.  In the
+             * future we might add API to get the separator.
+             *
+             * TODO: Fix this to get the path separator in some better
+             * way.  File an rfe for this.
+             */
+            const char* data = Locale::getDataDirectory();
+            int32_t len = icu_strlen(data);
+            char sep[2];
+            sep[0] = data[len-1];
+            sep[1] = 0;
+            DATA_DIR = (char*) icu_malloc(
+                                 len + icu_strlen(RESOURCE_SUB_DIR) + 2);
+            if (DATA_DIR == 0) {
+                // This is a fatal unrecoverable error -- what should we do?
+            }
+            icu_strcpy(DATA_DIR, data);
+            icu_strcat(DATA_DIR, RESOURCE_SUB_DIR);
+            icu_strcat(DATA_DIR, sep);
+        }
+    }
+    return DATA_DIR;
+}
+
+inline int32_t Transliterator::hash(const UnicodeString& str) {
+    return str.hashCode() & 0x7FFFFFFF;
+}
+
+/**
+ * Returns a transliterator object given its ID.  Unlike getInstance(),
+ * this method returns null if it cannot make use of the given ID.
+ */
+Transliterator* Transliterator::_createInstance(const UnicodeString& ID) {
+    UErrorCode status = U_ZERO_ERROR;
+
+    if (!cacheInitialized) {
+        initializeCache();
+    }
+
+    Mutex lock(&cacheMutex);
+
+    CacheEntry* entry = (CacheEntry*) uhash_get(cache, hash(ID));
+    TransliterationRuleData* data = 0;
+
+    if (entry == 0) {
+        return 0;
+    }
+
+    if (entry->entryType == CacheEntry::RBT_DATA) {
+        data = entry->u.data;
+        // Fall through to construct transliterator from cached Data object.
+    } else if (entry->entryType == CacheEntry::PROTOTYPE) {
+        return entry->u.prototype->clone();
+    } else {
+        // At this point entry type must be either RULE_BASED_PLACEHOLDER
+        // or REVERSE_RULE_BASED_PLACEHOLDER.
+        bool_t isReverse =
+            (entry->entryType ==
+             CacheEntry::REVERSE_RULE_BASED_PLACEHOLDER);
+        
+        // We use the file name, taken from another resource bundle
+        // 2-d array at static init time, as a locale language.  We're
+        // just using the locale mechanism to map through to a file
+        // name; this in no way represents an actual locale.
+        Locale fakeLocale(entry->rbFile);
+
+        ResourceBundle bundle(Transliterator::getDataDirectory(),
+                              fakeLocale, status);
+        
+        // Call RBT to parse the rules from the resource bundle
+
+        // We don't own the rules - 'rules' is an alias pointer to
+        // a string in the RB cache.
+        const UnicodeString* rules = bundle.getString(RB_RULE, status);
+
+        // If rules == 0 at this piont, or if the status indicates a
+        // failure, then we don't have any rules -- there is probably
+        // an installation error.  The list in the root locale should
+        // correspond to all the installed transliterators; if it
+        // lists something that's not installed, we'll get a null
+        // pointer here.
+        if (rules != 0 && U_SUCCESS(status)) {
+
+            data = TransliterationRuleParser::parse(*rules, isReverse
+                                                    ? RuleBasedTransliterator.REVERSE
+                                                    : RuleBasedTransliterator.FORWARD);
+            
+            // Double check to see if someone has modified the entry
+            // since we last looked at it.
+            if (entry->entryType != CacheEntry::RBT_DATA) {
+                entry->entryType = CacheEntry::RBT_DATA;
+                entry->u.data = data;
+            } else {
+                // Oops!  Another thread has updated this cache entry
+                // already to point to a data object.  Discard the
+                // one we just created and use the one in the cache
+                // instead.
+                delete data;
+                data = entry->u.data;
+            }
+        }
+    }
+
+    if (data != 0) {
+        return new RuleBasedTransliterator(ID, data);
+    } else {
+        // We have a failure of some kind.  Remove the ID from the
+        // cache so we don't keep trying.  NOTE: This will throw off
+        // anyone who is, at the moment, trying to iterate over the
+        // available IDs.  That's acceptable since we should never
+        // really get here except under installation, configuration,
+        // or unrecoverable run time memory failures.
+        _unregister(ID);
+        return 0;
+    }
+}
+
+/**
+ * Registers a instance <tt>obj</tt> of a subclass of
+ * <code>Transliterator</code> with the system.  This object must
+ * implement the <tt>clone()</tt> method.  When
+ * <tt>getInstance()</tt> is called with an ID string that is
+ * equal to <tt>obj.getID()</tt>, then <tt>obj.clone()</tt> is
+ * returned.
+ *
+ * @param obj an instance of subclass of
+ * <code>Transliterator</code> that defines <tt>clone()</tt>
+ * @see #getInstance
+ * @see #unregister
+ */
+void Transliterator::registerInstance(Transliterator* adoptedPrototype,
+                                      UErrorCode &status) {    
+    if (!cacheInitialized) {
+        initializeCache();
+    }
+
+    Mutex lock(&cacheMutex);
+    _registerInstance(adoptedPrototype, status);
+}
+
+/**
+ * This internal method registers a prototype instance in the cache.
+ * The CALLER MUST MUTEX using cacheMutex before calling this method.
+ */
+void Transliterator::_registerInstance(Transliterator* adoptedPrototype,
+                                       UErrorCode &status) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    int32_t hashCode = hash(adoptedPrototype->getID());
+
+    // This needs explaining: The string reference that getID returns
+    // is to the ID data member of Transliterator.  As long as the
+    // Transliterator object exists, this reference is valid, and in
+    // fact we can take its address and store it in IDS.  No problem
+    // there.  The only thing we have to be sure of is that before we
+    // remove the prototype (via unregister()), we remove the ID
+    // entry.
+    cacheIDs.addElement((void*) &adoptedPrototype->getID());
+
+    CacheEntry* entry = (CacheEntry*) uhash_get(cache, hashCode);
+    if (entry == 0) {
+        entry = new CacheEntry();
+    }
+
+    entry->adoptPrototype(adoptedPrototype);
+
+    uhash_putKey(cache, hashCode, entry, &status);
+}
+
+/**
+ * Unregisters a transliterator or class.  This may be either
+ * a system transliterator or a user transliterator or class.
+ * 
+ * @param ID the ID of the transliterator or class
+ * @see #registerInstance
+ */
+void Transliterator::unregister(const UnicodeString& ID) {
+    if (!cacheInitialized) {
+        initializeCache();
+    }
+    Mutex lock(&cacheMutex);
+    _unregister(ID);
+}
+
+/**
+ * Unregisters a transliterator or class.  Internal method.
+ * Prerequisites: The cache must be initialized, and the
+ * caller must own the cacheMutex.
+ */
+void Transliterator::_unregister(const UnicodeString& ID) {
+    cacheIDs.removeElement((void*) &ID);
+	int32_t hc = hash(ID);
+    CacheEntry* entry = (CacheEntry*) uhash_get(cache, hc);
+	if (entry != 0) {
+		UErrorCode status = U_ZERO_ERROR;
+		uhash_remove(cache, hc, &status);
+		delete entry;
+	}
+}
+
+/**
+ * Vector of registered IDs.
+ */
+UVector Transliterator::cacheIDs;
+
+/**
+ * Return the number of IDs currently registered with the system.
+ * To retrieve the actual IDs, call getAvailableID(i) with
+ * i from 0 to countAvailableIDs() - 1.
+ */
+int32_t Transliterator::countAvailableIDs() {
+    if (!cacheInitialized) {
+        initializeCache();
+    }
+    Mutex lock(&cacheMutex);
+    return cacheIDs.size();
+}
+
+/**
+ * Return the index-th available ID.  index must be between 0
+ * and countAvailableIDs() - 1, inclusive.  If index is out of
+ * range, the result of getAvailableID(0) is returned.
+ */
+const UnicodeString& Transliterator::getAvailableID(int32_t index) {
+    if (index < 0 || index >= cacheIDs.size()) {
+        index = 0;
+    }
+    if (!cacheInitialized) {
+        initializeCache();
+    }
+    Mutex lock(&cacheMutex);
+    return *(const UnicodeString*) cacheIDs[index];
+}
+
+/**
+ * Comparison function for UVector.  Compares two UnicodeString
+ * objects given void* pointers to them.
+ */
+bool_t Transliterator::compareIDs(void* a, void* b) {
+    const UnicodeString* aa = (const UnicodeString*) a;
+    const UnicodeString* bb = (const UnicodeString*) b;
+    return *aa == *bb;
+}
+
+void Transliterator::initializeCache() {
+    // Lock first, check init boolean second
+    Mutex lock(&cacheMutex);
+    if (cacheInitialized) {
+        return;
+    }
+        
+    UErrorCode status = U_ZERO_ERROR;
+
+    // Before looking for the resource, construct our cache.
+    // That way if the resource is absent, we will at least
+    // have a valid cache object.
+    cache = uhash_open(uhash_hashUString, &status);
+    cacheIDs.setComparer(compareIDs);
+
+    /* The following code is assuming an n x 3 table
+     * that looks like this:
+     *
+     * RuleBasedTransliteratorIDs {
+     *     { "Latin-Arabic", "Arabic-Latin", "larabic" }
+     *     { "KeyboardEscape-Latin1", "", "keyescl1" }
+     *     ...
+     * }
+     */
+
+    ResourceBundle bundle(Locale::getDataDirectory(),
+                          Locale::getDefault(),
+                          status);
+    int32_t rows, cols;
+    const UnicodeString** ruleBasedIDs =
+        bundle.get2dArray(RB_RULE_BASED_IDS, rows, cols, status);
+        
+    if (U_SUCCESS(status) && (cols == 3)) {
+        for (int32_t i=0; i<rows; ++i) {
+            const UnicodeString* row = ruleBasedIDs[i];
+            for (int32_t col=0; col<2; ++col) {
+                
+                if (row[col].length() > 0) {
+                    CacheEntry* entry = new CacheEntry();
+                    entry->entryType = (col == 0) ?
+                        CacheEntry::RULE_BASED_PLACEHOLDER :
+                        CacheEntry::REVERSE_RULE_BASED_PLACEHOLDER;
+                    entry->rbFile = row[2];
+                    uhash_putKey(cache, hash(row[col]), entry, &status);
+
+                    /* It's okay to take the address of the string
+                     * from the resource bundle under the assumption
+                     * that the RB is caching these, and that they
+                     * stay around forever.  If this changes, what we
+                     * need to do is change the id vector so that it
+                     * owns its strings and create a copy here.
+                     */
+                    cacheIDs.addElement((void*) &row[col]);
+                }
+            }
+        }
+    }
+
+    // Manually add prototypes that the system knows about to the
+    // cache.  This is how new non-rule-based transliterators are
+    // added to the system.
+
+    status = U_ZERO_ERROR; // Reset status for following calls
+    _registerInstance(new HexToUnicodeTransliterator(), status);
+    _registerInstance(new UnicodeToHexTransliterator(), status);
+
+    cacheInitialized = TRUE;
+}
+
+Transliterator::CacheEntry::CacheEntry() {
+    u.prototype = 0;
+    entryType = NONE;
+}
+
+Transliterator::CacheEntry::~CacheEntry() {
+    if (entryType == PROTOTYPE) {
+        delete u.prototype;
+    }
+}
+
+void Transliterator::CacheEntry::adoptPrototype(Transliterator* adopted) {
+    if (entryType == PROTOTYPE) {
+        delete u.prototype;
+    }
+    entryType = PROTOTYPE;
+    u.prototype = adopted;
+}
--- a/icu4c/source/i18n/translit.h
+++ b/icu4c/source/i18n/translit.h
@ -0,0 +1,860 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#ifndef TRANSLIT_H
+#define TRANSLIT_H
+
+#include "unistr.h"
+#include "umutex.h"
+#include "uvector.h"
+
+class Replaceable;
+class UnicodeFilter;
+class TransliterationRuleData;
+struct UHashtable;
+
+/**
+ * <code>Transliterator</code> is an abstract class that
+ * transliterates text from one format to another.  The most common
+ * kind of transliterator is a script, or alphabet, transliterator.
+ * For example, a Russian to Latin transliterator changes Russian text
+ * written in Cyrillic characters to phonetically equivalent Latin
+ * characters.  It does not <em>translate</em> Russian to English!
+ * Transliteration, unlike translation, operates on characters, without
+ * reference to the meanings of words and sentences.
+ *
+ * <p>Although script conversion is its most common use, a
+ * transliterator can actually perform a more general class of tasks.
+ * In fact, <code>Transliterator</code> defines a very general API
+ * which specifies only that a segment of the input text is replaced
+ * by new text.  The particulars of this conversion are determined
+ * entirely by subclasses of <code>Transliterator</code>.
+ *
+ * <p><b>Transliterators are stateless</b>
+ *
+ * <p><code>Transliterator</code> objects are <em>stateless</em>; they
+ * retain no information between calls to
+ * <code>transliterate()</code>.  (However, this does <em>not</em>
+ * mean that threads may share transliterators without synchronizing
+ * them.  Transliterators are not immutable, so they must be
+ * synchronized when shared between threads.)  This1 might seem to
+ * limit the complexity of the transliteration operation.  In
+ * practice, subclasses perform complex transliterations by delaying
+ * the replacement of text until it is known that no other
+ * replacements are possible.  In other words, although the
+ * <code>Transliterator</code> objects are stateless, the source text
+ * itself embodies all the needed information, and delayed operation
+ * allows arbitrary complexity.
+ *
+ * <p><b>Batch transliteration</b>
+ *
+ * <p>The simplest way to perform transliteration is all at once, on a
+ * string of existing text.  This is referred to as <em>batch</em>
+ * transliteration.  For example, given a string <code>input</code>
+ * and a transliterator <code>t</code>, the call
+ *
+ * <blockquote><code>String result = t.transliterate(input);
+ * </code></blockquote>
+ *
+ * will transliterate it and return the result.  Other methods allow
+ * the client to specify a substring to be transliterated and to use
+ * {@link Replaceable} objects instead of strings, in order to
+ * preserve out-of-band information (such as text styles).
+ *
+ * <p><b>Keyboard transliteration</b>
+ *
+ * <p>Somewhat more involved is <em>keyboard</em>, or incremental
+ * transliteration.  This is the transliteration of text that is
+ * arriving from some source (typically the user's keyboard) one
+ * character at a time, or in some other piecemeal fashion.
+ *
+ * <p>In keyboard transliteration, a <code>Replaceable</code> buffer
+ * stores the text.  As text is inserted, as much as possible is
+ * transliterated on the fly.  This means a GUI that displays the
+ * contents of the buffer may show text being modified as each new
+ * character arrives.
+ *
+ * <p>Consider the simple <code>RuleBasedTransliterator</code>:
+ *
+ * <blockquote><code>
+ * th&gt;{theta}<br>
+ * t&gt;{tau}
+ * </code></blockquote>
+ *
+ * When the user types 't', nothing will happen, since the
+ * transliterator is waiting to see if the next character is 'h'.  To
+ * remedy this, we introduce the notion of a cursor, marked by a '|'
+ * in the output string:
+ *
+ * <blockquote><code>
+ * t&gt;|{tau}<br>
+ * {tau}h&gt;{theta}
+ * </code></blockquote>
+ *
+ * Now when the user types 't', tau appears, and if the next character
+ * is 'h', the tau changes to a theta.  This is accomplished by
+ * maintaining a cursor position (independent of the insertion point,
+ * and invisible in the GUI) across calls to
+ * <code>keyboardTransliterate()</code>.  Typically, the cursor will
+ * be coincident with the insertion point, but in a case like the one
+ * above, it will precede the insertion point.
+ *
+ * <p>Keyboard transliteration methods maintain a set of three indices
+ * that are updated with each call to
+ * <code>keyboardTransliterate()</code>, including the cursor, start,
+ * and limit.  Since these indices are changed by the method, they are
+ * passed in an <code>int[]</code> array. The <code>START</code> index
+ * marks the beginning of the substring that the transliterator will
+ * look at.  It is advanced as text becomes committed (but it is not
+ * the committed index; that's the <code>CURSOR</code>).  The
+ * <code>CURSOR</code> index, described above, marks the point at
+ * which the transliterator last stopped, either because it reached
+ * the end, or because it required more characters to disambiguate
+ * between possible inputs.  The <code>CURSOR</code> can also be
+ * explicitly set by rules in a <code>RuleBasedTransliterator</code>.
+ * Any characters before the <code>CURSOR</code> index are frozen;
+ * future keyboard transliteration calls within this input sequence
+ * will not change them.  New text is inserted at the
+ * <code>LIMIT</code> index, which marks the end of the substring that
+ * the transliterator looks at.
+ *
+ * <p>Because keyboard transliteration assumes that more characters
+ * are to arrive, it is conservative in its operation.  It only
+ * transliterates when it can do so unambiguously.  Otherwise it waits
+ * for more characters to arrive.  When the client code knows that no
+ * more characters are forthcoming, perhaps because the user has
+ * performed some input termination operation, then it should call
+ * <code>finishKeyboardTransliteration()</code> to complete any
+ * pending transliterations.
+ *
+ * <p><b>Inverses</b>
+ *
+ * <p>Pairs of transliterators may be inverses of one another.  For
+ * example, if transliterator <b>A</b> transliterates characters by
+ * incrementing their Unicode value (so "abc" -> "def"), and
+ * transliterator <b>B</b> decrements character values, then <b>A</b>
+ * is an inverse of <b>B</b> and vice versa.  If we compose <b>A</b>
+ * with <b>B</b> in a compound transliterator, the result is the
+ * indentity transliterator, that is, a transliterator that does not
+ * change its input text.
+ *
+ * The <code>Transliterator</code> method <code>getInverse()</code>
+ * returns a transliterator's inverse, if one exists, or
+ * <code>null</code> otherwise.  However, the result of
+ * <code>getInverse()</code> usually will <em>not</em> be a true
+ * mathematical inverse.  This is because true inverse transliterators
+ * are difficult to formulate.  For example, consider two
+ * transliterators: <b>AB</b>, which transliterates the character 'A'
+ * to 'B', and <b>BA</b>, which transliterates 'B' to 'A'.  It might
+ * seem that these are exact inverses, since
+ *
+ * <blockquote>"A" x <b>AB</b> -> "B"<br>
+ * "B" x <b>BA</b> -> "A"</blockquote>
+ *
+ * where 'x' represents transliteration.  However,
+ *
+ * <blockquote>"ABCD" x <b>AB</b> -> "BBCD"<br>
+ * "BBCD" x <b>BA</b> -> "AACD"</blockquote>
+ *
+ * so <b>AB</b> composed with <b>BA</b> is not the
+ * identity. Nonetheless, <b>BA</b> may be usefully considered to be
+ * <b>AB</b>'s inverse, and it is on this basis that
+ * <b>AB</b><code>.getInverse()</code> could legitimately return
+ * <b>BA</b>.
+ *
+ * <p><b>IDs and display names</b>
+ *
+ * <p>A transliterator is designated by a short identifier string or
+ * <em>ID</em>.  IDs follow the format <em>source-destination</em>,
+ * where <em>source</em> describes the entity being replaced, and
+ * <em>destination</em> describes the entity replacing
+ * <em>source</em>.  The entities may be the names of scripts,
+ * particular sequences of characters, or whatever else it is that the
+ * transliterator converts to or from.  For example, a transliterator
+ * from Russian to Latin might be named "Russian-Latin".  A
+ * transliterator from keyboard escape sequences to Latin-1 characters
+ * might be named "KeyboardEscape-Latin1".  By convention, system
+ * entity names are in English, with the initial letters of words
+ * capitalized; user entity names may follow any format so long as
+ * they do not contain dashes.
+ *
+ * <p>In addition to programmatic IDs, transliterator objects have
+ * display names for presentation in user interfaces, returned by
+ * {@link #getDisplayName}.
+ *
+ * <p><b>Factory methods and registration</b>
+ *
+ * <p>In general, client code should use the factory method
+ * <code>getInstance()</code> to obtain an instance of a
+ * transliterator given its ID.  Valid IDs may be enumerated using
+ * <code>getAvailableIDs()</code>.  Since transliterators are mutable,
+ * multiple calls to <code>getInstance()</code> with the same ID will
+ * return distinct objects.
+ *
+ * <p>In addition to the system transliterators registered at startup,
+ * user transliterators may be registered by calling
+ * <code>registerInstance()</code> at run time.  A registered instance
+ * acts a template; future calls to <tt>getInstance()</tt> with the ID
+ * of the registered object return clones of that object.  Thus any
+ * object passed to <tt>registerInstance()</tt> must implement
+ * <tt>clone()</tt> propertly.  To register a transliterator subclass
+ * without instantiating it (until it is needed), users may call
+ * <code>registerClass()</code>.  In this case, the objects are
+ * instantiated by invoking the zero-argument public constructor of
+ * the class.
+ *
+ * <p><b>Subclassing</b>
+ *
+ * <p>Subclasses must implement the abstract
+ * <code>transliterate()</code> method.  They should also override the
+ * <code>transliterate()</code> method taking a <code>String</code>
+ * and <code>StringBuffer</code> if the performance of these methods
+ * can be improved over the performance obtained by the default
+ * implementations in this class.  Subclasses must also implement
+ * <code>handleKeyboardTransliterate()</code>.
+ *
+ * @author Alan Liu
+ */
+class U_I18N_API Transliterator {
+
+public:
+
+    enum {
+        /**
+         * In the <code>keyboardTransliterate()</code>
+         * <code>index[]</code> array, the beginning index, inclusive
+         * @see #keyboardTransliterate
+         */
+        START = 0,
+
+        /**
+         * In the <code>keyboardTransliterate()</code>
+         * <code>index[]</code> array, the ending index, exclusive
+         * @see #keyboardTransliterate
+         */
+        LIMIT = 1,
+
+        /**
+         * In the <code>keyboardTransliterate()</code>
+         * <code>index[]</code> array, the next character to be considered
+         * for transliteration
+         * @see #keyboardTransliterate
+         */
+        CURSOR = 2
+    };
+
+private:
+
+    /**
+     * Programmatic name, e.g., "Latin-Arabic".
+     */
+    UnicodeString ID;
+
+    /** 
+     * This transliterator's filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     */
+    UnicodeFilter* filter;
+
+    /**
+     * Dictionary of known transliterators.  Keys are <code>String</code>
+     * names, values are one of the following:
+     *
+     * <ul><li><code>Transliterator</code> objects
+     *
+     * <li><code>Class</code> objects.  Such objects must represent
+     * subclasses of <code>Transliterator</code>, and must satisfy the
+     * constraints described in <code>registerClass()</code>
+     *
+     * <li><code>RULE_BASED_PLACEHOLDER</code>, in which case the ID
+     * will have its first '-' removed and be appended to
+     * RB_RULE_BASED_PREFIX to form a resource bundle name from which
+     * the RB_RULE key is looked up to obtain the rule.
+     *
+     * <li><code>REVERSE_RULE_BASED_PLACEHOLDER</code>.  Like
+     * <code>RULE_BASED_PLACEHOLDER</code>, except the entity names in
+     * the ID are reversed, and the argument
+     * RuleBasedTransliterator.REVERSE is pased to the
+     * RuleBasedTransliterator constructor.
+     * </ul>
+     */
+    static UHashtable* cache;
+
+    /**
+     * The mutex controlling access to the cache.
+     */
+    static UMTX cacheMutex;
+
+    /**
+     * When set to TRUE, the cache has been initialized.  Any code must
+     * check this boolean before accessing the cache, and if the boolean
+     * is FALSE, it must call initializeCache().  We do this form of lazy
+     * evaluation for two reasons: (1) so we don't initialize if we don't
+     * have to (i.e., if no one is using Transliterator, but has included
+     * the code as part of a shared library, and (2) to avoid static
+     * intialization problems.
+     */
+    static bool_t cacheInitialized;
+
+    /**
+     * In Java, the cache stores objects of different types and
+     * singleton objects as placeholders for rule-based
+     * transliterators to be built as needed.  In C++ we use the
+     * following struct to achieve the same purpose.  Instances of
+     * this struct can be placeholders, can represent prototype
+     * transliterators to be cloned, or can represent
+     * RuleBasedTransliterator::Data objects.  We don't support
+     * storing classes in the cache because we don't have the rtti
+     * infrastructure for it.  We could easily add this if there is a
+     * need for it in the future.  The rbFile is the resource bundle
+     * file name for rule-based transliterators.
+     */
+    struct CacheEntry {
+        enum Type {
+            RULE_BASED_PLACEHOLDER,
+            REVERSE_RULE_BASED_PLACEHOLDER,
+            PROTOTYPE,
+            RBT_DATA,
+            NONE // Only used for uninitialized entries
+        } entryType;
+        UnicodeString rbFile; // For *PLACEHOLDER
+        union {
+            Transliterator* prototype; // For PROTOTYPE
+            TransliterationRuleData* data; // For RBT_DATA
+        } u;
+        CacheEntry();
+        ~CacheEntry();
+        void adoptPrototype(Transliterator* adopted);
+    };
+
+    /**
+     * Prefix for resource bundle key for the display name for a
+     * transliterator.  The ID is appended to this to form the key.
+     * The resource bundle value should be a String.
+     */
+    static const char* RB_DISPLAY_NAME_PREFIX;
+
+    /**
+     * Resource bundle key for display name pattern.
+     * The resource bundle value should be a String forming a
+     * MessageFormat pattern, e.g.:
+     * "{0,choice,0#|1#{1} Transliterator|2#{1} to {2} Transliterator}".
+     */
+    static const char* RB_DISPLAY_NAME_PATTERN;
+
+    /**
+     * Resource bundle key for the list of RuleBasedTransliterator IDs.
+     * The resource bundle value should be a String[] with each element
+     * being a valid ID.  The ID will be appended to RB_RULE_BASED_PREFIX
+     * to obtain the class name in which the RB_RULE key will be sought.
+     */
+    static const char* RB_RULE_BASED_IDS;
+
+    /**
+     * Resource bundle key for the RuleBasedTransliterator rule.
+     */
+    static const char* RB_RULE;
+
+protected:
+
+    /**
+     * Default constructor.
+     * @param ID the string identifier for this transliterator
+     * @param adoptedFilter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     */
+    Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter);
+
+    /**
+     * Copy constructor.
+     */
+    Transliterator(const Transliterator&);
+
+    /**
+     * Assignment operator.
+     */
+    Transliterator& operator=(const Transliterator&);
+
+public:
+
+    /**
+     * Destructor.
+     */
+    virtual ~Transliterator();
+
+    /**
+     * Implements Cloneable.
+     * All subclasses are encouraged to implement this method if it is
+     * possible and reasonable to do so.  Subclasses that are to be
+     * registered with the system using <tt>registerInstance()<tt>
+     * are required to implement this method.  If a subclass does not
+     * implement clone() properly and is registered with the system
+     * using registerInstance(), then the default clone() implementation
+     * will return null, and calls to createInstance() will fail.
+     *
+     * @see #registerInstance
+     */
+    virtual Transliterator* clone() const { return 0; }
+
+    /**
+     * Transliterates the segment of a string that begins at the
+     * character at offset <code>start</code> and extends to the
+     * character at offset <code>limit - 1</code>, with optional
+     * filtering.  A default implementaion is provided here;
+     * subclasses should provide a more efficient implementation if
+     * possible.
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param result buffer to receive the transliterated text; previous
+     * contents are discarded
+     */
+    virtual void transliterate(const UnicodeString& text,
+                               int32_t start, int32_t limit,
+                               UnicodeString& result) const;
+
+    /**
+     * Transliterates a segment of a string, with optional filtering.
+     * Subclasses must override this abstract method.
+     *
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @param filter the filter.  Any character for which
+     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
+     * altered by this transliterator.  If <tt>filter</tt> is
+     * <tt>null</tt> then no filtering is applied.
+     * @return The new limit index.  The text previously occupying <code>[start,
+     * limit)</code> has been transliterated, possibly to a string of a different
+     * length, at <code>[start, </code><em>new-limit</em><code>)</code>, where
+     * <em>new-limit</em> is the return value.
+     */
+    virtual int32_t transliterate(Replaceable& text,
+                                  int32_t start, int32_t limit) const = 0;
+
+    /**
+     * Transliterates an entire string. Convenience method.
+     * @param text the string to be transliterated
+     * @param result buffer to receive the transliterated text; previous
+     * contents are discarded
+     */
+    virtual void transliterate(const UnicodeString& text,
+                               UnicodeString& result) const;
+
+    /**
+     * Transliterates an entire string in place. Convenience method.
+     * @param text the string to be transliterated
+     */
+    virtual void transliterate(Replaceable& text) const;
+
+    /**
+     * Transliterates the portion of the text buffer that can be
+     * transliterated unambiguosly after new text has been inserted,
+     * typically as a result of a keyboard event.  The new text in
+     * <code>insertion</code> will be inserted into <code>text</code>
+     * at <code>index[LIMIT]</code>, advancing
+     * <code>index[LIMIT]</code> by <code>insertion.length()</code>.
+     * Then the transliterator will try to transliterate characters of
+     * <code>text</code> between <code>index[CURSOR]</code> and
+     * <code>index[LIMIT]</code>.  Characters before
+     * <code>index[CURSOR]</code> will not be changed.
+     *
+     * <p>Upon return, values in <code>index[]</code> will be updated.
+     * <code>index[START]</code> will be advanced to the first
+     * character that future calls to this method will read.
+     * <code>index[CURSOR]</code> and <code>index[LIMIT]</code> will
+     * be adjusted to delimit the range of text that future calls to
+     * this method may change.
+     *
+     * <p>Typical usage of this method begins with an initial call
+     * with <code>index[START]</code> and <code>index[LIMIT]</code>
+     * set to indicate the portion of <code>text</code> to be
+     * transliterated, and <code>index[CURSOR] == index[START]</code>.
+     * Thereafter, <code>index[]</code> can be used without
+     * modification in future calls, provided that all changes to
+     * <code>text</code> are made via this method.
+     *
+     * <p>This method assumes that future calls may be made that will
+     * insert new text into the buffer.  As a result, it only performs
+     * unambiguous transliterations.  After the last call to this
+     * method, there may be untransliterated text that is waiting for
+     * more input to resolve an ambiguity.  In order to perform these
+     * pending transliterations, clients should call {@link
+     * #finishKeyboardTransliteration} after the last call to this
+     * method has been made.
+     * 
+     * @param text the buffer holding transliterated and untransliterated text
+     * @param index an array of three integers.
+     *
+     * <ul><li><code>index[START]</code>: the beginning index,
+     * inclusive; <code>0 <= index[START] <= index[LIMIT]</code>.
+     *
+     * <li><code>index[LIMIT]</code>: the ending index, exclusive;
+     * <code>index[START] <= index[LIMIT] <= text.length()</code>.
+     * <code>insertion</code> is inserted at
+     * <code>index[LIMIT]</code>.
+     *
+     * <li><code>index[CURSOR]</code>: the next character to be
+     * considered for transliteration; <code>index[START] <=
+     * index[CURSOR] <= index[LIMIT]</code>.  Characters before
+     * <code>index[CURSOR]</code> will not be changed by future calls
+     * to this method.</ul>
+     *
+     * @param insertion text to be inserted and possibly
+     * transliterated into the translation buffer at
+     * <code>index[LIMIT]</code>.  If <code>null</code> then no text
+     * is inserted.
+     * @see #START
+     * @see #LIMIT
+     * @see #CURSOR
+     * @see #handleKeyboardTransliterate
+     * @exception IllegalArgumentException if <code>index[]</code>
+     * is invalid
+     */
+    virtual void keyboardTransliterate(Replaceable& text,
+                                       int32_t index[3],
+                                       const UnicodeString& insertion,
+                                       UErrorCode& status) const;
+
+    /**
+     * Transliterates the portion of the text buffer that can be
+     * transliterated unambiguosly after a new character has been
+     * inserted, typically as a result of a keyboard event.  This is a
+     * convenience method; see {@link
+     * #keyboardTransliterate(Replaceable, int[], String)} for details.
+     * @param text the buffer holding transliterated and
+     * untransliterated text
+     * @param index an array of three integers.  See {@link
+     * #keyboardTransliterate(Replaceable, int[], String)}.
+     * @param insertion text to be inserted and possibly
+     * transliterated into the translation buffer at
+     * <code>index[LIMIT]</code>.
+     * @see #keyboardTransliterate(Replaceable, int[], String)
+     */
+    virtual void keyboardTransliterate(Replaceable& text, int32_t index[3],
+                                       UChar insertion,
+                                       UErrorCode& status) const;
+
+    /**
+     * Transliterates the portion of the text buffer that can be
+     * transliterated unambiguosly.  This is a convenience method; see
+     * {@link #keyboardTransliterate(Replaceable, int[], String)} for
+     * details.
+     * @param text the buffer holding transliterated and
+     * untransliterated text
+     * @param index an array of three integers.  See {@link
+     * #keyboardTransliterate(Replaceable, int[], String)}.
+     * @see #keyboardTransliterate(Replaceable, int[], String)
+     */
+    virtual void keyboardTransliterate(Replaceable& text, int32_t index[3],
+                                       UErrorCode& status) const;
+
+    /**
+     * Finishes any pending transliterations that were waiting for
+     * more characters.  Clients should call this method as the last
+     * call after a sequence of one or more calls to
+     * <code>keyboardTransliterate()</code>.
+     * @param text the buffer holding transliterated and
+     * untransliterated text.
+     * @param index the array of indices previously passed to {@link
+     * #keyboardTransliterate}
+     */
+    virtual void finishKeyboardTransliteration(Replaceable& text,
+                                               int32_t index[3]) const;
+
+private:
+
+    /**
+     * This internal method does keyboard transliteration.  If the
+     * 'insertion' is non-null then we append it to 'text' before
+     * proceeding.  This method calls through to the pure virtual
+     * framework method handleKeyboardTransliterate() to do the actual
+     * work.
+     */
+    void _keyboardTransliterate(Replaceable& text,
+                                int32_t index[3],
+                                const UnicodeString* insertion,
+                                UErrorCode &status) const;
+
+protected:
+
+    /**
+     * Abstract method that concrete subclasses define to implement
+     * keyboard transliteration.  This method should transliterate all
+     * characters between <code>index[CURSOR]</code> and
+     * <code>index[LIMIT]</code> that can be unambiguously
+     * transliterated, regardless of future insertions of text at
+     * <code>index[LIMIT]</code>.  <code>index[CURSOR]</code> should
+     * be advanced past committed characters (those that will not
+     * change in future calls to this method).
+     * <code>index[LIMIT]</code> should be updated to reflect text
+     * replacements that shorten or lengthen the text between
+     * <code>index[CURSOR]</code> and <code>index[LIMIT]</code>.  Upon
+     * return, neither <code>index[CURSOR]</code> nor
+     * <code>index[LIMIT]</code> should be less than the initial value
+     * of <code>index[CURSOR]</code>.  <code>index[START]</code>
+     * should <em>not</em> be changed.
+     *
+     * @param text the buffer holding transliterated and
+     * untransliterated text
+     * @param index an array of three integers.  See {@link
+     * #keyboardTransliterate(Replaceable, int[], String)}.
+     * @see #keyboardTransliterate
+     */
+    virtual void handleKeyboardTransliterate(Replaceable& text,
+                                             int32_t index[3]) const = 0;
+
+    // C++ requires this friend declaration so CompoundTransliterator
+    // can access handleKeyboardTransliterate.  Alternatively, we could
+    // make handleKeyboardTransliterate public.
+    friend class CompoundTransliterator;
+
+public:
+
+    /**
+     * Returns the length of the longest context required by this transliterator.
+     * This is <em>preceding</em> context.  The default implementation supplied
+     * by <code>Transliterator</code> returns zero; subclasses
+     * that use preceding context should override this method to return the
+     * correct value.  For example, if a transliterator translates "ddd" (where
+     * d is any digit) to "555" when preceded by "(ddd)", then the preceding
+     * context length is 5, the length of "(ddd)".
+     *
+     * @return The maximum number of preceding context characters this
+     * transliterator needs to examine
+     */
+    virtual int32_t getMaximumContextLength() const;
+
+    /**
+     * Returns a programmatic identifier for this transliterator.
+     * If this identifier is passed to <code>getInstance()</code>, it
+     * will return this object, if it has been registered.
+     * @see #registerInstance
+     * @see #registerClass
+     * @see #getAvailableIDs
+     */
+    virtual const UnicodeString& getID() const;
+
+    /**
+     * Returns a name for this transliterator that is appropriate for
+     * display to the user in the default locale.  See {@link
+     * #getDisplayName(Locale)} for details.
+     */
+    virtual UnicodeString& getDisplayName(UnicodeString& result) const;
+
+    /**
+     * Returns a name for this transliterator that is appropriate for
+     * display to the user in the given locale.  This name is taken
+     * from the locale resource data in the standard manner of the
+     * <code>java.text</code> package.
+     *
+     * <p>If no localized names exist in the system resource bundles,
+     * a name is synthesized using a localized
+     * <code>MessageFormat</code> pattern from the resource data.  The
+     * arguments to this pattern are an integer followed by one or two
+     * strings.  The integer is the number of strings, either 1 or 2.
+     * The strings are formed by splitting the ID for this
+     * transliterator at the first '-'.  If there is no '-', then the
+     * entire ID forms the only string.
+     * @param inLocale the Locale in which the display name should be
+     * localized.
+     * @see java.text.MessageFormat
+     */
+    virtual UnicodeString& getDisplayName(const Locale& inLocale,
+                                          UnicodeString& result) const;
+
+    /**
+     * Returns the filter used by this transliterator, or <tt>null</tt>
+     * if this transliterator uses no filter.
+     */
+    virtual const UnicodeFilter* getFilter() const;
+
+    /**
+     * Changes the filter used by this transliterator.  If the filter
+     * is set to <tt>null</tt> then no filtering will occur.
+     *
+     * <p>Callers must take care if a transliterator is in use by
+     * multiple threads.  The filter should not be changed by one
+     * thread while another thread may be transliterating.
+     */
+    virtual void adoptFilter(UnicodeFilter* adoptedFilter);
+
+    /**
+     * Returns this transliterator's inverse.  See the class
+     * documentation for details.  This implementation simply inverts
+     * the two entities in the ID and attempts to retrieve the
+     * resulting transliterator.  That is, if <code>getID()</code>
+     * returns "A-B", then this method will return the result of
+     * <code>getInstance("B-A")</code>, or <code>null</code> if that
+     * call fails.
+     *
+     * <p>This method does not take filtering into account.  The
+     * returned transliterator will have no filter.
+     *
+     * <p>Subclasses with knowledge of their inverse may wish to
+     * override this method.
+     *
+     * @return a transliterator that is an inverse, not necessarily
+     * exact, of this transliterator, or <code>null</code> if no such
+     * transliterator is registered.
+     * @see #registerInstance
+     */
+    virtual Transliterator* createInverse() const;
+
+    /**
+     * Returns a <code>Transliterator</code> object given its ID.
+     * The ID must be either a system transliterator ID or a ID registered
+     * using <code>registerInstance()</code>.
+     *
+     * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
+     * @return A <code>Transliterator</code> object with the given ID
+     * @exception IllegalArgumentException if the given ID is invalid.
+     * @see #registerInstance
+     * @see #getAvailableIDs
+     * @see #getID
+     */
+    static Transliterator* createInstance(const UnicodeString& ID);
+
+private:
+
+    /**
+     * This is the path to the subdirectory within the locale data
+     * directory that contains the rule-based transliterator resource
+     * bundle files.  This is constructed dynamically the first time
+     * Transliterator::getDataDirectory() is called.
+     */
+    static char* DATA_DIR;
+    
+    /**
+     * This is the name of a subdirectory within the locale data directory
+     * that contains the rule-based transliterator resource bundle files.
+     */
+    static const char* RESOURCE_SUB_DIR;
+
+    /**
+     * Returns the directory in which the transliterator resource bundle
+     * files are located.  This is a subdirectory, named RESOURCE_SUB_DIR,
+     * under Locale::getDataDirectory().  It ends in a path separator.
+     */
+    static const char* getDataDirectory();
+
+    static int32_t hash(const UnicodeString& str);
+
+    /**
+     * Returns a transliterator object given its ID.  Unlike getInstance(),
+     * this method returns null if it cannot make use of the given ID.
+     */
+    static Transliterator* _createInstance(const UnicodeString& ID);
+
+public:
+
+    /**
+     * Registers a instance <tt>obj</tt> of a subclass of
+     * <code>Transliterator</code> with the system.  When
+     * <tt>createInstance()</tt> is called with an ID string that is
+     * equal to <tt>obj->getID()</tt>, then <tt>obj->clone()</tt> is
+     * returned.
+     *
+     * After this call the Transliterator class owns the adoptedObj
+     * and will delete it.
+     *
+     * @param obj an instance of subclass of
+     * <code>Transliterator</code> that defines <tt>clone()</tt>
+     * @see #getInstance
+     * @see #registerClass
+     * @see #unregister
+     */
+    static void registerInstance(Transliterator* adoptedObj,
+                                 UErrorCode& status);
+
+private:
+
+    /**
+     * This internal method registers a prototype instance in the cache.
+     * The CALLER MUST MUTEX using cacheMutex before calling this method.
+     */
+    static void _registerInstance(Transliterator* adoptedPrototype,
+                                  UErrorCode &status);
+
+public:
+
+    /**
+     * Unregisters a transliterator or class.  This may be either
+     * a system transliterator or a user transliterator or class.
+     * 
+     * @param ID the ID of the transliterator or class
+     * @return the <code>Object</code> that was registered with
+     * <code>ID</code>, or <code>null</code> if none was
+     * @see #registerInstance
+     * @see #registerClass
+     */
+    static void unregister(const UnicodeString& ID);
+
+private:
+
+    /**
+     * Unregisters a transliterator or class.  Internal method.
+     * Prerequisites: The cache must be initialized, and the
+     * caller must own the cacheMutex.
+     */
+    static void _unregister(const UnicodeString& ID);
+
+    /**
+     * Returns an enumeration over the programmatic names of registered
+     * <code>Transliterator</code> objects.  This includes both system
+     * transliterators and user transliterators registered using
+     * <code>registerInstance()</code>.  The enumerated names may be
+     * passed to <code>getInstance()</code>.
+     *
+     * @return An <code>Enumeration</code> over <code>String</code> objects
+     * @see #getInstance
+     * @see #registerInstance
+     */
+    // virtual Enumeration getAvailableIDs();
+
+    /**
+     * Vector of registered IDs.
+     */
+    static UVector cacheIDs;
+
+public:
+
+    /**
+     * Return the number of IDs currently registered with the system.
+     * To retrieve the actual IDs, call getAvailableID(i) with
+     * i from 0 to countAvailableIDs() - 1.
+     */
+    static int32_t countAvailableIDs();
+
+    /**
+     * Return the index-th available ID.  index must be between 0
+     * and countAvailableIDs() - 1, inclusive.  If index is out of
+     * range, the result of getAvailableID(0) is returned.
+     */
+    static const UnicodeString& getAvailableID(int32_t index);
+
+private:
+    /**
+     * Comparison function for UVector.  Compares two UnicodeString
+     * objects given void* pointers to them.
+     */
+    static bool_t compareIDs(void* a, void* b);
+
+    static void initializeCache();
+};
+
+#endif
--- a/icu4c/source/i18n/unifilt.h
+++ b/icu4c/source/i18n/unifilt.h
@ -0,0 +1,51 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#ifndef UNIFILT_H
+#define UNIFILT_H
+
+/**
+ * <code>UnicodeFilter</code> defines a protocol for selecting a
+ * subset of the full range (U+0000 to U+FFFF) of Unicode characters.
+ * Currently, filters are used in conjunction with classes like {@link
+ * Transliterator} to only process selected characters through a
+ * transformation.
+ *
+ * @see UnicodeFilterLogic
+ */
+class U_I18N_API UnicodeFilter {
+
+public:
+
+    virtual ~UnicodeFilter();
+
+    /**
+     * Returns <tt>true</tt> for characters that are in the selected
+     * subset.  In other words, if a character is <b>to be
+     * filtered</b>, then <tt>isIn()</tt> returns
+     * <b><tt>false</tt></b>.
+     */
+    virtual bool_t isIn(UChar c) const = 0;
+
+    /**
+     * Returns a copy of this object.  All UnicodeFilter objects have
+     * to support cloning in order to allow classes using
+     * UnicodeFilters, such as Transliterator, to implement cloning.
+     */
+    virtual UnicodeFilter* clone() const = 0;
+
+protected:
+
+    UnicodeFilter();
+};
+
+inline UnicodeFilter::UnicodeFilter() {}
+inline UnicodeFilter::~UnicodeFilter() {}
+
+#endif
--- a/icu4c/source/i18n/unifltlg.cpp
+++ b/icu4c/source/i18n/unifltlg.cpp
@ -0,0 +1,139 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#include "unifltlg.h"
+#include "unifilt.h"
+
+class UnicodeNotFilter : public UnicodeFilter {
+    UnicodeFilter* filt;
+public:
+    UnicodeNotFilter(UnicodeFilter* adopted);
+    UnicodeNotFilter(const UnicodeNotFilter&);
+    virtual ~UnicodeNotFilter();
+    virtual bool_t isIn(UChar c) const;
+    virtual UnicodeFilter* clone() const;
+};
+
+UnicodeNotFilter::UnicodeNotFilter(UnicodeFilter* adopted) : filt(adopted) {}
+UnicodeNotFilter::UnicodeNotFilter(const UnicodeNotFilter& f) : filt(f.filt->clone()) {}
+UnicodeNotFilter::~UnicodeNotFilter() { delete filt; }
+bool_t UnicodeNotFilter::isIn(UChar c) const { return !filt->isIn(c); }
+UnicodeFilter* UnicodeNotFilter::clone() const { return new UnicodeNotFilter(*this); }
+
+/**
+ * Returns a <tt>UnicodeFilter</tt> that implements the inverse of
+ * the given filter.
+ */
+UnicodeFilter* UnicodeFilterLogic::createNot(const UnicodeFilter& f) {
+    return new UnicodeNotFilter(f.clone());
+}
+
+class UnicodeAndFilter : public UnicodeFilter {
+    UnicodeFilter* filt1;
+    UnicodeFilter* filt2;
+public:
+    UnicodeAndFilter(UnicodeFilter* adopted1, UnicodeFilter* adopted2);
+    UnicodeAndFilter(const UnicodeAndFilter&);
+    virtual ~UnicodeAndFilter();
+    virtual bool_t isIn(UChar c) const;
+    virtual UnicodeFilter* clone() const;
+};
+
+UnicodeAndFilter::UnicodeAndFilter(UnicodeFilter* f1, UnicodeFilter* f2) : filt1(f1), filt2(f2) {}
+UnicodeAndFilter::UnicodeAndFilter(const UnicodeAndFilter& f) :
+    filt1(f.filt1->clone()), filt2(f.filt2->clone()) {}
+UnicodeAndFilter::~UnicodeAndFilter() { delete filt1; delete filt2; }
+bool_t UnicodeAndFilter::isIn(UChar c) const { return filt1->isIn(c) && filt2->isIn(c); }
+UnicodeFilter* UnicodeAndFilter::clone() const { return new UnicodeAndFilter(*this); }
+
+/**
+ * Returns a <tt>UnicodeFilter</tt> that implements a short
+ * circuit AND of the result of the two given filters.  That is,
+ * if <tt>f.isIn()</tt> is <tt>false</tt>, then <tt>g.isIn()</tt>
+ * is not called, and <tt>isIn()</tt> returns <tt>false</tt>.
+ *
+ * <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
+ */
+UnicodeFilter* UnicodeFilterLogic::createAnd(const UnicodeFilter& f,
+                                             const UnicodeFilter& g) {
+    return new UnicodeAndFilter(f.clone(), g.clone());
+}
+
+/**
+ * Returns a <tt>UnicodeFilter</tt> that implements a short
+ * circuit AND of the result of the given filters.  That is, if
+ * <tt>f[i].isIn()</tt> is <tt>false</tt>, then
+ * <tt>f[j].isIn()</tt> is not called, where <tt>j > i</tt>, and
+ * <tt>isIn()</tt> returns <tt>false</tt>.
+ */
+//!UnicodeFilter* UnicodeFilterLogic::and(const UnicodeFilter** f) {
+//!    return new UnicodeFilter() {
+//!        public bool_t isIn(UChar c) {
+//!            for (int32_t i=0; i<f.length; ++i) {
+//!                if (!f[i].isIn(c)) {
+//!                    return FALSE;
+//!                }
+//!            }
+//!            return TRUE;
+//!        }
+//!    };
+//!}
+
+class UnicodeOrFilter : public UnicodeFilter {
+    UnicodeFilter* filt1;
+    UnicodeFilter* filt2;
+public:
+    UnicodeOrFilter(UnicodeFilter* adopted1, UnicodeFilter* adopted2);
+    UnicodeOrFilter(const UnicodeOrFilter&);
+    virtual ~UnicodeOrFilter();
+    virtual bool_t isIn(UChar c) const;
+    virtual UnicodeFilter* clone() const;
+};
+
+UnicodeOrFilter::UnicodeOrFilter(UnicodeFilter* f1, UnicodeFilter* f2) : filt1(f1), filt2(f2) {}
+UnicodeOrFilter::UnicodeOrFilter(const UnicodeOrFilter& f) :
+    filt1(f.filt1->clone()), filt2(f.filt2->clone()) {}
+UnicodeOrFilter::~UnicodeOrFilter() { delete filt1; delete filt2; }
+bool_t UnicodeOrFilter::isIn(UChar c) const { return filt1->isIn(c) || filt2->isIn(c); }
+UnicodeFilter* UnicodeOrFilter::clone() const { return new UnicodeOrFilter(*this); }
+
+/**
+ * Returns a <tt>UnicodeFilter</tt> that implements a short
+ * circuit OR of the result of the two given filters.  That is, if
+ * <tt>f.isIn()</tt> is <tt>true</tt>, then <tt>g.isIn()</tt> is
+ * not called, and <tt>isIn()</tt> returns <tt>true</tt>.
+ *
+ * <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
+ */
+UnicodeFilter* UnicodeFilterLogic::createOr(const UnicodeFilter& f,
+                                            const UnicodeFilter& g) {
+    return new UnicodeOrFilter(f.clone(), g.clone());
+}
+
+/**
+ * Returns a <tt>UnicodeFilter</tt> that implements a short
+ * circuit OR of the result of the given filters.  That is, if
+ * <tt>f[i].isIn()</tt> is <tt>false</tt>, then
+ * <tt>f[j].isIn()</tt> is not called, where <tt>j > i</tt>, and
+ * <tt>isIn()</tt> returns <tt>true</tt>.
+ */
+//!UnicodeFilter* UnicodeFilterLogic::or(const UnicodeFilter** f) {
+//!    return new UnicodeFilter() {
+//!        public bool_t isIn(UChar c) {
+//!            for (int32_t i=0; i<f.length; ++i) {
+//!                if (f[i].isIn(c)) {
+//!                    return TRUE;
+//!                }
+//!            }
+//!            return FALSE;
+//!        }
+//!    };
+//!}
+
+// TODO: Add nand() & nor() for convenience, if needed.
--- a/icu4c/source/i18n/unifltlg.h
+++ b/icu4c/source/i18n/unifltlg.h
@ -0,0 +1,84 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#ifndef UNIFLTLG_H
+#define UNIFLTLG_H
+
+#include "utypes.h"
+
+class UnicodeFilter;
+
+/**
+ * <code>UnicodeFilterLogic</code> provides logical operators on
+ * {@link UnicodeFilter} objects.  This class cannot be instantiated;
+ * it consists only of static methods.  The static methods return
+ * filter objects that perform logical inversion (<tt>not</tt>),
+ * intersection (<tt>and</tt>), or union (<tt>or</tt>) of the given
+ * filter objects.
+ */
+class U_I18N_API UnicodeFilterLogic {
+
+public:
+
+    /**
+     * Returns a <tt>UnicodeFilter</tt> that implements the inverse of
+     * the given filter.
+     */
+    static UnicodeFilter* createNot(const UnicodeFilter& f);
+
+    /**
+     * Returns a <tt>UnicodeFilter</tt> that implements a short
+     * circuit AND of the result of the two given filters.  That is,
+     * if <tt>f.isIn()</tt> is <tt>false</tt>, then <tt>g.isIn()</tt>
+     * is not called, and <tt>isIn()</tt> returns <tt>false</tt>.
+     *
+     * <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
+     */
+    static UnicodeFilter* createAnd(const UnicodeFilter& f,
+                                    const UnicodeFilter& g);
+
+    /**
+     * Returns a <tt>UnicodeFilter</tt> that implements a short
+     * circuit AND of the result of the given filters.  That is, if
+     * <tt>f[i].isIn()</tt> is <tt>false</tt>, then
+     * <tt>f[j].isIn()</tt> is not called, where <tt>j > i</tt>, and
+     * <tt>isIn()</tt> returns <tt>false</tt>.
+     */
+    // static UnicodeFilter* and(const UnicodeFilter** f);
+
+    /**
+     * Returns a <tt>UnicodeFilter</tt> that implements a short
+     * circuit OR of the result of the two given filters.  That is, if
+     * <tt>f.isIn()</tt> is <tt>true</tt>, then <tt>g.isIn()</tt> is
+     * not called, and <tt>isIn()</tt> returns <tt>true</tt>.
+     *
+     * <p>Either <tt>f</tt> or <tt>g</tt> must be non-null.
+     */
+    static UnicodeFilter* createOr(const UnicodeFilter& f,
+                                   const UnicodeFilter& g);
+
+    /**
+     * Returns a <tt>UnicodeFilter</tt> that implements a short
+     * circuit OR of the result of the given filters.  That is, if
+     * <tt>f[i].isIn()</tt> is <tt>false</tt>, then
+     * <tt>f[j].isIn()</tt> is not called, where <tt>j > i</tt>, and
+     * <tt>isIn()</tt> returns <tt>true</tt>.
+     */
+    // static UnicodeFilter* or(const UnicodeFilter** f);
+
+    // TODO: Add nand() & nor() for convenience, if needed.
+
+private:
+    // Disallow instantiation
+    UnicodeFilterLogic();
+};
+
+inline UnicodeFilterLogic::UnicodeFilterLogic() {}
+
+#endif
--- a/icu4c/source/i18n/unirange.cpp
+++ b/icu4c/source/i18n/unirange.cpp
@ -0,0 +1,108 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#include "unirange.h"
+#include "uvector.h"
+#include "unistr.h"
+
+UnicodeRange::UnicodeRange(UChar theStart, int32_t theLength) {
+    start = theStart;
+    length = theLength;
+}
+
+UnicodeRange* UnicodeRange::clone() const {
+    return new UnicodeRange(start, length);
+}
+
+/**
+ * CALLER OWNS RESULT.
+ */
+bool_t UnicodeRange::contains(UChar c) const {
+    return c >= start && (c - start) < length;
+}
+
+/**
+ * Assume that contains(c) is true.  Split this range into two new
+ * ranges around the character c.  Make this range one of the new ranges
+ * (modify it in place) and return the other new range.  The character
+ * itself is not included in either range.  If the split results in an
+ * empty range (that is, if c == start or c == start + length - 1) then
+ * return null.
+ *
+ * MODIFIES THIS RANGE IN PLACE.
+ *
+ * CALLER OWNS RESULT.
+ */
+UnicodeRange* UnicodeRange::split(UChar c) {
+    if (c == start) {
+        ++start;
+        --length;
+        return 0;
+    } else if (c - start == length - 1) {
+        --length;
+        return 0;
+    } else {
+        ++c;
+        UnicodeRange* r = new UnicodeRange(c, start + length - c);
+        length = --c - start;
+        return r;
+    }
+}
+
+/**
+ * Finds the largest unused subrange by the given string.  A
+ * subrange is unused by a string if the string contains no
+ * characters in that range.  If the given string contains no
+ * characters in this range, then this range itself is
+ * returned.
+ *
+ * CALLER OWNS RESULT.
+ */
+UnicodeRange*
+UnicodeRange::largestUnusedSubrange(const UnicodeString& str) const {
+    int32_t n = str.length();
+
+    UVector v;
+    v.setDeleter(UnicodeRange::deleter);
+    v.addElement(clone());
+    for (int32_t i=0; i<n; ++i) {
+        UChar c = str.charAt(i);
+        if (contains(c)) {
+            for (int32_t j=0; j<v.size(); ++j) {
+                UnicodeRange* r = (UnicodeRange*) v.elementAt(j);
+                if (r->contains(c)) {
+                    r = r->split(c);
+                    if (r != 0) {
+                        v.addElement(r);
+                    }
+                    break;
+                }
+            }
+        }
+    }
+
+    UnicodeRange* bestRange = 0;
+    int32_t ibest = -1;
+    for (int32_t j=0; j<v.size(); ++j) {
+        UnicodeRange* r = (UnicodeRange*) v.elementAt(j);
+        if (bestRange == 0 || r->length > bestRange->length) {
+            bestRange = r;
+            ibest = j;
+        }
+    }
+
+    v.orphanElementAt(ibest); // So bestRange doesn't get deleted
+
+    return bestRange;
+}
+
+// For UVector of UnicodeRange* objects
+void UnicodeRange::deleter(void* e) {
+    delete (UnicodeRange*) e;
+}
--- a/icu4c/source/i18n/unirange.h
+++ b/icu4c/source/i18n/unirange.h
@ -0,0 +1,79 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#ifndef UNIRANGE_H
+#define UNIRANGE_H
+
+#include "utypes.h"
+
+class UnicodeString;
+
+/**
+ * %%% INTERNAL CLASS USED BY RuleBasedTransliterator %%%
+ *
+ * A range of Unicode characters.  Support the operations of testing for
+ * inclusion (does this range contain this character?) and splitting.
+ * Splitting involves breaking a range into two smaller ranges around a
+ * character inside the original range.  The split character is not included
+ * in either range.  If the split character is at either extreme end of the
+ * range, one of the split products is an empty range.
+ *
+ * This class is used internally to determine the largest available private
+ * use character range for variable stand-ins.
+ */
+class UnicodeRange {
+
+public:
+
+    UChar start;
+
+    int32_t length;
+
+    UnicodeRange(UChar start, int32_t length);
+
+    /**
+     * CALLER OWNS RESULT.
+     */
+    UnicodeRange* clone() const;
+
+    bool_t contains(UChar c) const;
+
+    /**
+     * Assume that contains(c) is true.  Split this range into two new
+     * ranges around the character c.  Make this range one of the new ranges
+     * (modify it in place) and return the other new range.  The character
+     * itself is not included in either range.  If the split results in an
+     * empty range (that is, if c == start or c == start + length - 1) then
+     * return null.
+     *
+     * MODIFIES THIS RANGE IN PLACE.
+     *
+     * CALLER OWNS RESULT.
+     */
+    UnicodeRange* split(UChar c);
+
+    /**
+     * Finds the largest subrange of this range that is unused by the
+     * given string.  A subrange is unused by a string if the string
+     * contains no characters in that range.  If the given string
+     * contains no characters in this range, then this range itself is
+     * returned.
+     *
+     * CALLER OWNS RESULT.
+     */
+    UnicodeRange* largestUnusedSubrange(const UnicodeString& str) const;
+
+private:
+
+    // For UVector of UnicodeRange* objects
+    static void deleter(void*);
+
+};
+
+#endif
--- a/icu4c/source/i18n/unitohex.cpp
+++ b/icu4c/source/i18n/unitohex.cpp
@ -0,0 +1,204 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#include "unitohex.h"
+#include "rep.h"
+#include "unifilt.h"
+
+/**
+ * ID for this transliterator.
+ */
+const char* UnicodeToHexTransliterator::_ID = "Unicode-Hex";
+
+const char* UnicodeToHexTransliterator::DEFAULT_PREFIX = "\\u";
+
+/**
+ * Constructs a transliterator.
+ * @param prefix the string that will precede the four hex
+ * digits for UNICODE_HEX transliterators.  Ignored
+ * if direction is HEX_UNICODE.
+ * @param uppercase if true, the four hex digits will be
+ * converted to uppercase; otherwise they will be lowercase.
+ * Ignored if direction is HEX_UNICODE.
+ */
+UnicodeToHexTransliterator::UnicodeToHexTransliterator(
+                                const UnicodeString& hexPrefix,
+                                bool_t isUppercase,
+                                UnicodeFilter* adoptedFilter) :
+    Transliterator(_ID, adoptedFilter),
+    prefix(hexPrefix),
+    uppercase(isUppercase) {
+}
+
+/**
+ * Constructs a transliterator with the default prefix "&#092;u"
+ * that outputs uppercase hex digits.
+ */
+UnicodeToHexTransliterator::UnicodeToHexTransliterator(
+                                UnicodeFilter* adoptedFilter) :
+    Transliterator(_ID, adoptedFilter),
+    prefix(DEFAULT_PREFIX),
+    uppercase(TRUE) {
+}
+
+/**
+ * Copy constructor.
+ */
+UnicodeToHexTransliterator::UnicodeToHexTransliterator(
+                                const UnicodeToHexTransliterator& other) :
+    Transliterator(other), prefix(other.prefix),
+    uppercase(other.uppercase) {
+}
+
+/**
+ * Assignment operator.
+ */
+UnicodeToHexTransliterator&
+UnicodeToHexTransliterator::operator=(const UnicodeToHexTransliterator& other) {
+    Transliterator::operator=(other);
+    prefix = other.prefix;
+    uppercase = other.uppercase;
+    return *this;
+}
+
+Transliterator*
+UnicodeToHexTransliterator::clone() const {
+    return new UnicodeToHexTransliterator(*this);
+}
+
+/**
+ * Returns the string that precedes the four hex digits.
+ * @return prefix string
+ */
+const UnicodeString& UnicodeToHexTransliterator::getPrefix() const {
+    return prefix;
+}
+
+/**
+ * Sets the string that precedes the four hex digits.
+ *
+ * <p>Callers must take care if a transliterator is in use by
+ * multiple threads.  The prefix should not be changed by one
+ * thread while another thread may be transliterating.
+ * @param prefix prefix string
+ */
+void UnicodeToHexTransliterator::setPrefix(const UnicodeString& hexPrefix) {
+    prefix = hexPrefix;
+}
+
+/**
+ * Returns true if this transliterator outputs uppercase hex digits.
+ */
+bool_t UnicodeToHexTransliterator::isUppercase() const {
+    return uppercase;
+}
+
+/**
+ * Sets if this transliterator outputs uppercase hex digits.
+ *
+ * <p>Callers must take care if a transliterator is in use by
+ * multiple threads.  The uppercase mode should not be changed by
+ * one thread while another thread may be transliterating.
+ * @param outputUppercase if true, then this transliterator
+ * outputs uppercase hex digits.
+ */
+void UnicodeToHexTransliterator::setUppercase(bool_t outputUppercase) {
+    uppercase = outputUppercase;
+}
+
+/**
+ * Transliterates a segment of a string.  <code>Transliterator</code> API.
+ * @param text the string to be transliterated
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= text.length()</code>.
+ * @return the new limit index
+ */
+int32_t UnicodeToHexTransliterator::transliterate(Replaceable& text,
+                                                  int32_t start,
+                                                  int32_t limit) const {
+    int32_t offsets[3] = { start, limit, start };
+    handleKeyboardTransliterate(text, offsets);
+    return offsets[LIMIT];
+}
+
+/**
+ * Implements {@link Transliterator#handleKeyboardTransliterate}.
+ */
+void UnicodeToHexTransliterator::handleKeyboardTransliterate(Replaceable& text,
+                                     int32_t offsets[3]) const {
+    /**
+     * Performs transliteration changing all characters to
+     * Unicode hexadecimal escapes.  For example, '@' -> "U+0040",
+     * assuming the prefix is "U+". 
+     */
+    int32_t cursor = offsets[CURSOR];
+    int32_t limit = offsets[LIMIT];
+
+    const UnicodeFilter* filter = getFilter();
+    UnicodeString hex;
+
+    while (cursor < limit) {
+        UChar c = text.charAt(cursor);
+        if (filter != 0 && !filter->isIn(c)) {
+            ++cursor;
+            continue;
+        }
+        toHex(hex, c);
+        text.handleReplaceBetween(cursor, cursor+1, hex);
+        int32_t len = hex.length();
+        cursor += len; // Advance cursor by 1 and adjust for new text
+        --len;
+        limit += len;
+    }
+
+    offsets[LIMIT] = limit;
+    offsets[CURSOR] = cursor;
+}
+
+/**
+ * Return the length of the longest context required by this transliterator.
+ * This is <em>preceding</em> context.
+ * @param direction either <code>FORWARD</code> or <code>REVERSE</code>
+ * @return maximum number of preceding context characters this
+ * transliterator needs to examine
+ */
+int32_t UnicodeToHexTransliterator::getMaximumContextLength() {
+    return 0;
+}
+
+UChar UnicodeToHexTransliterator::HEX_DIGITS[32] = {
+    // If necessary, replace these character constants with their hex values
+    '0', '1', '2', '3', '4', '5', '6', '7',
+    '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
+    '0', '1', '2', '3', '4', '5', '6', '7',
+    '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
+};
+
+/**
+ * Given an integer, return its least significant hex digit.
+ */
+UChar UnicodeToHexTransliterator::itoh(int32_t i) const {
+    i &= 0xF;
+    return HEX_DIGITS[uppercase ? (i|16) : i];
+}
+
+/**
+ * Form escape sequence.
+ */
+UnicodeString& UnicodeToHexTransliterator::toHex(UnicodeString& result,
+                                                 UChar c) const {
+    result = prefix;
+    result.append(itoh(c >> 12));
+    result.append(itoh(c >> 8));
+    result.append(itoh(c >> 4));
+    result.append(itoh(c));
+    return result;
+}
--- a/icu4c/source/i18n/unitohex.h
+++ b/icu4c/source/i18n/unitohex.h
@ -0,0 +1,157 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   Date        Name        Description
+*   11/17/99    aliu        Creation.
+**********************************************************************
+*/
+#ifndef UNITOHEX_H
+#define UNITOHEX_H
+
+#include "translit.h"
+#include "unistr.h"
+
+class UnicodeFilter;
+
+/**
+ * A transliterator that converts from Unicode characters to 
+ * hexadecimal Unicode escape sequences.  It outputs a
+ * prefix specified in the constructor and optionally converts the hex
+ * digits to uppercase.
+ *
+ * @author Alan Liu
+ */
+class U_I18N_API UnicodeToHexTransliterator : public Transliterator {
+
+private:
+
+    /**
+     * ID for this transliterator.
+     */
+    static const char* _ID;
+
+    static const char* DEFAULT_PREFIX;
+
+    UnicodeString prefix;
+
+    bool_t uppercase;
+
+public:
+
+    /**
+     * Constructs a transliterator.
+     * @param prefix the string that will precede the four hex
+     * digits for UNICODE_HEX transliterators.  Ignored
+     * if direction is HEX_UNICODE.
+     * @param uppercase if true, the four hex digits will be
+     * converted to uppercase; otherwise they will be lowercase.
+     * Ignored if direction is HEX_UNICODE.
+     */
+    UnicodeToHexTransliterator(const UnicodeString& hexPrefix,
+                               bool_t isUppercase,
+                               UnicodeFilter* adoptedFilter = 0);
+
+    /**
+     * Constructs a transliterator with the default prefix "\u"
+     * that outputs uppercase hex digits.
+     */
+    UnicodeToHexTransliterator(UnicodeFilter* adoptedFilter = 0);
+
+    /**
+     * Destructor.
+     */
+    virtual ~UnicodeToHexTransliterator();
+
+    /**
+     * Copy constructor.
+     */
+    UnicodeToHexTransliterator(const UnicodeToHexTransliterator&);
+
+    /**
+     * Assignment operator.
+     */
+    UnicodeToHexTransliterator& operator=(const UnicodeToHexTransliterator&);
+
+    /**
+     * Transliterator API.
+     */
+    virtual Transliterator* clone() const;
+
+    /**
+     * Returns the string that precedes the four hex digits.
+     * @return prefix string
+     */
+    virtual const UnicodeString& getPrefix() const;
+
+    /**
+     * Sets the string that precedes the four hex digits.
+     *
+     * <p>Callers must take care if a transliterator is in use by
+     * multiple threads.  The prefix should not be changed by one
+     * thread while another thread may be transliterating.
+     * @param prefix prefix string
+     */
+    virtual void setPrefix(const UnicodeString& prefix);
+
+    /**
+     * Returns true if this transliterator outputs uppercase hex digits.
+     */
+    virtual bool_t isUppercase() const;
+
+    /**
+     * Sets if this transliterator outputs uppercase hex digits.
+     *
+     * <p>Callers must take care if a transliterator is in use by
+     * multiple threads.  The uppercase mode should not be changed by
+     * one thread while another thread may be transliterating.
+     * @param outputUppercase if true, then this transliterator
+     * outputs uppercase hex digits.
+     */
+    virtual void setUppercase(bool_t outputUppercase);
+
+    /**
+     * Transliterates a segment of a string.  <code>Transliterator</code> API.
+     * @param text the string to be transliterated
+     * @param start the beginning index, inclusive; <code>0 <= start
+     * <= limit</code>.
+     * @param limit the ending index, exclusive; <code>start <= limit
+     * <= text.length()</code>.
+     * @return the new limit index
+     */
+    virtual int32_t transliterate(Replaceable& text, int32_t start, int32_t limit) const;
+
+    /**
+     * Implements {@link Transliterator#handleKeyboardTransliterate}.
+     */
+    virtual void handleKeyboardTransliterate(Replaceable& text,
+                                             int32_t offsets[3]) const;
+
+    /**
+     * Return the length of the longest context required by this transliterator.
+     * This is <em>preceding</em> context.
+     * @param direction either <code>FORWARD</code> or <code>REVERSE</code>
+     * @return maximum number of preceding context characters this
+     * transliterator needs to examine
+     */
+    virtual int32_t getMaximumContextLength();
+
+private:
+
+    static UChar HEX_DIGITS[32];
+
+    /**
+     * Given an integer, return its least significant hex digit.
+     */
+    UChar itoh(int32_t i) const;
+
+    /**
+     * Form escape sequence.
+     */
+    UnicodeString& toHex(UnicodeString& result, UChar c) const;
+};
+
+inline UnicodeToHexTransliterator::~UnicodeToHexTransliterator() {}
+
+#endif