ICU-352 rbt support for segments, cursor offset, and new syntax

X-SVN-Rev: 1422
2025-04-09 15:27:38 +00:00 · 2000-05-20 04:40:29 +00:00 · 2000-05-20 04:40:29 +00:00 · 563d9e5006
commit 563d9e5006
parent 6a59bb39c8
25 changed files with 6885 additions and 5751 deletions
--- a/icu4c/data/fullhalf.txt
+++ b/icu4c/data/fullhalf.txt
@ -1,142 +1,147 @@
 //--------------------------------------------------------------------
-//  Copyright (c) 2000, International Business Machines
-//  Corporation and others.  All Rights Reserved.
+// Copyright (c) 1999-2000, International Business Machines
+// Corporation and others.  All Rights Reserved.
 //--------------------------------------------------------------------
-//  Date        Name        Description
-//  01/13/2000  aliu        Creation.
+// THIS IS A MACHINE-GENERATED FILE
+// Tool: src\com\ibm\tools\translit\dumpICUrules.bat
+// Source: src\com\ibm\text\resources/TransliterationRule_Fullwidth_Halfwidth.java
+// Date: Fri May 19 15:50:22 2000
 //--------------------------------------------------------------------

+// Fullwidth-Halfwidth
+
 fullhalf {
-    Rule {
+  Rule {
+    
    // Mechanically generated from Unicode Character Database
-
+    
    // multicharacter
-
-    "\u30AC<>\uFF76\uFF9E;"   //  to KATAKANA LETTER GA
-    "\u30AE<>\uFF77\uFF9E;"   //  to KATAKANA LETTER GI
-    "\u30B0<>\uFF78\uFF9E;"   //  to KATAKANA LETTER GU
-    "\u30B2<>\uFF79\uFF9E;"   //  to KATAKANA LETTER GE
-    "\u30B4<>\uFF7A\uFF9E;"   //  to KATAKANA LETTER GO
-    "\u30B6<>\uFF7B\uFF9E;"   //  to KATAKANA LETTER ZA
-    "\u30B8<>\uFF7C\uFF9E;"   //  to KATAKANA LETTER ZI
-    "\u30BA<>\uFF7D\uFF9E;"   //  to KATAKANA LETTER ZU
-    "\u30BC<>\uFF7E\uFF9E;"   //  to KATAKANA LETTER ZE
-    "\u30BE<>\uFF7F\uFF9E;"   //  to KATAKANA LETTER ZO
-    "\u30C0<>\uFF80\uFF9E;"   //  to KATAKANA LETTER DA
-    "\u30C2<>\uFF81\uFF9E;"   //  to KATAKANA LETTER DI
-    "\u30C5<>\uFF82\uFF9E;"   //  to KATAKANA LETTER DU
-    "\u30C7<>\uFF83\uFF9E;"   //  to KATAKANA LETTER DE
-    "\u30C9<>\uFF84\uFF9E;"   //  to KATAKANA LETTER DO
-    "\u30D0<>\uFF8A\uFF9E;"   //  to KATAKANA LETTER BA
-    "\u30D1<>\uFF8A\uFF9F;"   //  to KATAKANA LETTER PA
-    "\u30D3<>\uFF8B\uFF9E;"   //  to KATAKANA LETTER BI
-    "\u30D4<>\uFF8B\uFF9F;"   //  to KATAKANA LETTER PI
-    "\u30D6<>\uFF8C\uFF9E;"   //  to KATAKANA LETTER BU
-    "\u30D7<>\uFF8C\uFF9F;"   //  to KATAKANA LETTER PU
-    "\u30D9<>\uFF8D\uFF9E;"   //  to KATAKANA LETTER BE
-    "\u30DA<>\uFF8D\uFF9F;"   //  to KATAKANA LETTER PE
-    "\u30DC<>\uFF8E\uFF9E;"   //  to KATAKANA LETTER BO
-    "\u30DD<>\uFF8E\uFF9F;"   //  to KATAKANA LETTER PO
-    "\u30F4<>\uFF73\uFF9E;"   //  to KATAKANA LETTER VU
-    "\u30F7<>\uFF9C\uFF9E;"   //  to KATAKANA LETTER VA
-    "\u30FA<>\uFF66\uFF9E;"   //  to KATAKANA LETTER VO
+    
+    "\u30AC<>\uFF76\uFF9E;" //  to KATAKANA LETTER GA
+    "\u30AE<>\uFF77\uFF9E;" //  to KATAKANA LETTER GI
+    "\u30B0<>\uFF78\uFF9E;" //  to KATAKANA LETTER GU
+    "\u30B2<>\uFF79\uFF9E;" //  to KATAKANA LETTER GE
+    "\u30B4<>\uFF7A\uFF9E;" //  to KATAKANA LETTER GO
+    "\u30B6<>\uFF7B\uFF9E;" //  to KATAKANA LETTER ZA
+    "\u30B8<>\uFF7C\uFF9E;" //  to KATAKANA LETTER ZI
+    "\u30BA<>\uFF7D\uFF9E;" //  to KATAKANA LETTER ZU
+    "\u30BC<>\uFF7E\uFF9E;" //  to KATAKANA LETTER ZE
+    "\u30BE<>\uFF7F\uFF9E;" //  to KATAKANA LETTER ZO
+    "\u30C0<>\uFF80\uFF9E;" //  to KATAKANA LETTER DA
+    "\u30C2<>\uFF81\uFF9E;" //  to KATAKANA LETTER DI
+    "\u30C5<>\uFF82\uFF9E;" //  to KATAKANA LETTER DU
+    "\u30C7<>\uFF83\uFF9E;" //  to KATAKANA LETTER DE
+    "\u30C9<>\uFF84\uFF9E;" //  to KATAKANA LETTER DO
+    "\u30D0<>\uFF8A\uFF9E;" //  to KATAKANA LETTER BA
+    "\u30D1<>\uFF8A\uFF9F;" //  to KATAKANA LETTER PA
+    "\u30D3<>\uFF8B\uFF9E;" //  to KATAKANA LETTER BI
+    "\u30D4<>\uFF8B\uFF9F;" //  to KATAKANA LETTER PI
+    "\u30D6<>\uFF8C\uFF9E;" //  to KATAKANA LETTER BU
+    "\u30D7<>\uFF8C\uFF9F;" //  to KATAKANA LETTER PU
+    "\u30D9<>\uFF8D\uFF9E;" //  to KATAKANA LETTER BE
+    "\u30DA<>\uFF8D\uFF9F;" //  to KATAKANA LETTER PE
+    "\u30DC<>\uFF8E\uFF9E;" //  to KATAKANA LETTER BO
+    "\u30DD<>\uFF8E\uFF9F;" //  to KATAKANA LETTER PO
+    "\u30F4<>\uFF73\uFF9E;" //  to KATAKANA LETTER VU
+    "\u30F7<>\uFF9C\uFF9E;" //  to KATAKANA LETTER VA
+    "\u30FA<>\uFF66\uFF9E;" //  to KATAKANA LETTER VO
    
    // single character
-
-    "\uFF01<>'!';"    //  from FULLWIDTH EXCLAMATION MARK
-    "\uFF02<>'\"';"   //  from FULLWIDTH QUOTATION MARK
-    "\uFF03<>'#';"    //  from FULLWIDTH NUMBER SIGN
-    "\uFF04<>'$';"    //  from FULLWIDTH DOLLAR SIGN
-    "\uFF05<>'%';"    //  from FULLWIDTH PERCENT SIGN
-    "\uFF06<>'&';"    //  from FULLWIDTH AMPERSAND
+    
+    "\uFF01<>'!';" //  from FULLWIDTH EXCLAMATION MARK
+    "\uFF02<>'\"';" //  from FULLWIDTH QUOTATION MARK
+    "\uFF03<>'#';" //  from FULLWIDTH NUMBER SIGN
+    "\uFF04<>'$';" //  from FULLWIDTH DOLLAR SIGN
+    "\uFF05<>'%';" //  from FULLWIDTH PERCENT SIGN
+    "\uFF06<>'&';" //  from FULLWIDTH AMPERSAND
    "\uFF07<>'';" //  from FULLWIDTH APOSTROPHE
-    "\uFF08<>'(';"    //  from FULLWIDTH LEFT PARENTHESIS
-    "\uFF09<>')';"    //  from FULLWIDTH RIGHT PARENTHESIS
-    "\uFF0A<>'*';"    //  from FULLWIDTH ASTERISK
-    "\uFF0B<>'+';"    //  from FULLWIDTH PLUS SIGN
-    "\uFF0C<>',';"    //  from FULLWIDTH COMMA
-    "\uFF0D<>'-';"    //  from FULLWIDTH HYPHEN-MINUS
-    "\uFF0E<>'.';"    //  from FULLWIDTH FULL STOP
-    "\uFF0F<>'/';"    //  from FULLWIDTH SOLIDUS
-    "\uFF10<>'0';"    //  from FULLWIDTH DIGIT ZERO
-    "\uFF11<>'1';"    //  from FULLWIDTH DIGIT ONE
-    "\uFF12<>'2';"    //  from FULLWIDTH DIGIT TWO
-    "\uFF13<>'3';"    //  from FULLWIDTH DIGIT THREE
-    "\uFF14<>'4';"    //  from FULLWIDTH DIGIT FOUR
-    "\uFF15<>'5';"    //  from FULLWIDTH DIGIT FIVE
-    "\uFF16<>'6';"    //  from FULLWIDTH DIGIT SIX
-    "\uFF17<>'7';"    //  from FULLWIDTH DIGIT SEVEN
-    "\uFF18<>'8';"    //  from FULLWIDTH DIGIT EIGHT
-    "\uFF19<>'9';"    //  from FULLWIDTH DIGIT NINE
-    "\uFF1A<>':';"    //  from FULLWIDTH COLON
-    "\uFF1B<>';';"    //  from FULLWIDTH SEMICOLON
-    "\uFF1C<>'<';"    //  from FULLWIDTH LESS-THAN SIGN
-    "\uFF1D<>'=';"    //  from FULLWIDTH EQUALS SIGN
-    "\uFF1E<>'>';"    //  from FULLWIDTH GREATER-THAN SIGN
-    "\uFF1F<>'?';"    //  from FULLWIDTH QUESTION MARK
-    "\uFF20<>'@';"    //  from FULLWIDTH COMMERCIAL AT
-    "\uFF21<>A;"  //  from FULLWIDTH LATIN CAPITAL LETTER A
-    "\uFF22<>B;"  //  from FULLWIDTH LATIN CAPITAL LETTER B
-    "\uFF23<>C;"  //  from FULLWIDTH LATIN CAPITAL LETTER C
-    "\uFF24<>D;"  //  from FULLWIDTH LATIN CAPITAL LETTER D
-    "\uFF25<>E;"  //  from FULLWIDTH LATIN CAPITAL LETTER E
-    "\uFF26<>F;"  //  from FULLWIDTH LATIN CAPITAL LETTER F
-    "\uFF27<>G;"  //  from FULLWIDTH LATIN CAPITAL LETTER G
-    "\uFF28<>H;"  //  from FULLWIDTH LATIN CAPITAL LETTER H
-    "\uFF29<>I;"  //  from FULLWIDTH LATIN CAPITAL LETTER I
-    "\uFF2A<>J;"  //  from FULLWIDTH LATIN CAPITAL LETTER J
-    "\uFF2B<>K;"  //  from FULLWIDTH LATIN CAPITAL LETTER K
-    "\uFF2C<>L;"  //  from FULLWIDTH LATIN CAPITAL LETTER L
-    "\uFF2D<>M;"  //  from FULLWIDTH LATIN CAPITAL LETTER M
-    "\uFF2E<>N;"  //  from FULLWIDTH LATIN CAPITAL LETTER N
-    "\uFF2F<>O;"  //  from FULLWIDTH LATIN CAPITAL LETTER O
-    "\uFF30<>P;"  //  from FULLWIDTH LATIN CAPITAL LETTER P
-    "\uFF31<>Q;"  //  from FULLWIDTH LATIN CAPITAL LETTER Q
-    "\uFF32<>R;"  //  from FULLWIDTH LATIN CAPITAL LETTER R
-    "\uFF33<>S;"  //  from FULLWIDTH LATIN CAPITAL LETTER S
-    "\uFF34<>T;"  //  from FULLWIDTH LATIN CAPITAL LETTER T
-    "\uFF35<>U;"  //  from FULLWIDTH LATIN CAPITAL LETTER U
-    "\uFF36<>V;"  //  from FULLWIDTH LATIN CAPITAL LETTER V
-    "\uFF37<>W;"  //  from FULLWIDTH LATIN CAPITAL LETTER W
-    "\uFF38<>X;"  //  from FULLWIDTH LATIN CAPITAL LETTER X
-    "\uFF39<>Y;"  //  from FULLWIDTH LATIN CAPITAL LETTER Y
-    "\uFF3A<>Z;"  //  from FULLWIDTH LATIN CAPITAL LETTER Z
-    "\uFF3B<>'[';"    //  from FULLWIDTH LEFT SQUARE BRACKET
-    "\uFF3C<>'\\';"    //  from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
-    "\uFF3D<>']';"    //  from FULLWIDTH RIGHT SQUARE BRACKET
-    "\uFF3E<>'^';"    //  from FULLWIDTH CIRCUMFLEX ACCENT
-    "\uFF3F<>'_';"    //  from FULLWIDTH LOW LINE
-    "\uFF40<>'`';"    //  from FULLWIDTH GRAVE ACCENT
-    "\uFF41<>a;"  //  from FULLWIDTH LATIN SMALL LETTER A
-    "\uFF42<>b;"  //  from FULLWIDTH LATIN SMALL LETTER B
-    "\uFF43<>c;"  //  from FULLWIDTH LATIN SMALL LETTER C
-    "\uFF44<>d;"  //  from FULLWIDTH LATIN SMALL LETTER D
-    "\uFF45<>e;"  //  from FULLWIDTH LATIN SMALL LETTER E
-    "\uFF46<>f;"  //  from FULLWIDTH LATIN SMALL LETTER F
-    "\uFF47<>g;"  //  from FULLWIDTH LATIN SMALL LETTER G
-    "\uFF48<>h;"  //  from FULLWIDTH LATIN SMALL LETTER H
-    "\uFF49<>i;"  //  from FULLWIDTH LATIN SMALL LETTER I
-    "\uFF4A<>j;"  //  from FULLWIDTH LATIN SMALL LETTER J
-    "\uFF4B<>k;"  //  from FULLWIDTH LATIN SMALL LETTER K
-    "\uFF4C<>l;"  //  from FULLWIDTH LATIN SMALL LETTER L
-    "\uFF4D<>m;"  //  from FULLWIDTH LATIN SMALL LETTER M
-    "\uFF4E<>n;"  //  from FULLWIDTH LATIN SMALL LETTER N
-    "\uFF4F<>o;"  //  from FULLWIDTH LATIN SMALL LETTER O
-    "\uFF50<>p;"  //  from FULLWIDTH LATIN SMALL LETTER P
-    "\uFF51<>q;"  //  from FULLWIDTH LATIN SMALL LETTER Q
-    "\uFF52<>r;"  //  from FULLWIDTH LATIN SMALL LETTER R
-    "\uFF53<>s;"  //  from FULLWIDTH LATIN SMALL LETTER S
-    "\uFF54<>t;"  //  from FULLWIDTH LATIN SMALL LETTER T
-    "\uFF55<>u;"  //  from FULLWIDTH LATIN SMALL LETTER U
-    "\uFF56<>v;"  //  from FULLWIDTH LATIN SMALL LETTER V
-    "\uFF57<>w;"  //  from FULLWIDTH LATIN SMALL LETTER W
-    "\uFF58<>x;"  //  from FULLWIDTH LATIN SMALL LETTER X
-    "\uFF59<>y;"  //  from FULLWIDTH LATIN SMALL LETTER Y
-    "\uFF5A<>z;"  //  from FULLWIDTH LATIN SMALL LETTER Z
-    "\uFF5B<>'{';"    //  from FULLWIDTH LEFT CURLY BRACKET
-    "\uFF5C<>'|';"    //  from FULLWIDTH VERTICAL LINE
-    "\uFF5D<>'}';"    //  from FULLWIDTH RIGHT CURLY BRACKET
-    "\uFF5E<>'~';"    //  from FULLWIDTH TILDE
+    "\uFF08<>'(';" //  from FULLWIDTH LEFT PARENTHESIS
+    "\uFF09<>')';" //  from FULLWIDTH RIGHT PARENTHESIS
+    "\uFF0A<>'*';" //  from FULLWIDTH ASTERISK
+    "\uFF0B<>'+';" //  from FULLWIDTH PLUS SIGN
+    "\uFF0C<>',';" //  from FULLWIDTH COMMA
+    "\uFF0D<>'-';" //  from FULLWIDTH HYPHEN-MINUS
+    "\uFF0E<>'.';" //  from FULLWIDTH FULL STOP
+    "\uFF0F<>'/';" //  from FULLWIDTH SOLIDUS
+    "\uFF10<>'0';" //  from FULLWIDTH DIGIT ZERO
+    "\uFF11<>'1';" //  from FULLWIDTH DIGIT ONE
+    "\uFF12<>'2';" //  from FULLWIDTH DIGIT TWO
+    "\uFF13<>'3';" //  from FULLWIDTH DIGIT THREE
+    "\uFF14<>'4';" //  from FULLWIDTH DIGIT FOUR
+    "\uFF15<>'5';" //  from FULLWIDTH DIGIT FIVE
+    "\uFF16<>'6';" //  from FULLWIDTH DIGIT SIX
+    "\uFF17<>'7';" //  from FULLWIDTH DIGIT SEVEN
+    "\uFF18<>'8';" //  from FULLWIDTH DIGIT EIGHT
+    "\uFF19<>'9';" //  from FULLWIDTH DIGIT NINE
+    "\uFF1A<>':';" //  from FULLWIDTH COLON
+    "\uFF1B<>';';" //  from FULLWIDTH SEMICOLON
+    "\uFF1C<>'<';" //  from FULLWIDTH LESS-THAN SIGN
+    "\uFF1D<>'=';" //  from FULLWIDTH EQUALS SIGN
+    "\uFF1E<>'>';" //  from FULLWIDTH GREATER-THAN SIGN
+    "\uFF1F<>'?';" //  from FULLWIDTH QUESTION MARK
+    "\uFF20<>'@';" //  from FULLWIDTH COMMERCIAL AT
+    "\uFF21<>A;" //  from FULLWIDTH LATIN CAPITAL LETTER A
+    "\uFF22<>B;" //  from FULLWIDTH LATIN CAPITAL LETTER B
+    "\uFF23<>C;" //  from FULLWIDTH LATIN CAPITAL LETTER C
+    "\uFF24<>D;" //  from FULLWIDTH LATIN CAPITAL LETTER D
+    "\uFF25<>E;" //  from FULLWIDTH LATIN CAPITAL LETTER E
+    "\uFF26<>F;" //  from FULLWIDTH LATIN CAPITAL LETTER F
+    "\uFF27<>G;" //  from FULLWIDTH LATIN CAPITAL LETTER G
+    "\uFF28<>H;" //  from FULLWIDTH LATIN CAPITAL LETTER H
+    "\uFF29<>I;" //  from FULLWIDTH LATIN CAPITAL LETTER I
+    "\uFF2A<>J;" //  from FULLWIDTH LATIN CAPITAL LETTER J
+    "\uFF2B<>K;" //  from FULLWIDTH LATIN CAPITAL LETTER K
+    "\uFF2C<>L;" //  from FULLWIDTH LATIN CAPITAL LETTER L
+    "\uFF2D<>M;" //  from FULLWIDTH LATIN CAPITAL LETTER M
+    "\uFF2E<>N;" //  from FULLWIDTH LATIN CAPITAL LETTER N
+    "\uFF2F<>O;" //  from FULLWIDTH LATIN CAPITAL LETTER O
+    "\uFF30<>P;" //  from FULLWIDTH LATIN CAPITAL LETTER P
+    "\uFF31<>Q;" //  from FULLWIDTH LATIN CAPITAL LETTER Q
+    "\uFF32<>R;" //  from FULLWIDTH LATIN CAPITAL LETTER R
+    "\uFF33<>S;" //  from FULLWIDTH LATIN CAPITAL LETTER S
+    "\uFF34<>T;" //  from FULLWIDTH LATIN CAPITAL LETTER T
+    "\uFF35<>U;" //  from FULLWIDTH LATIN CAPITAL LETTER U
+    "\uFF36<>V;" //  from FULLWIDTH LATIN CAPITAL LETTER V
+    "\uFF37<>W;" //  from FULLWIDTH LATIN CAPITAL LETTER W
+    "\uFF38<>X;" //  from FULLWIDTH LATIN CAPITAL LETTER X
+    "\uFF39<>Y;" //  from FULLWIDTH LATIN CAPITAL LETTER Y
+    "\uFF3A<>Z;" //  from FULLWIDTH LATIN CAPITAL LETTER Z
+    "\uFF3B<>'[';" //  from FULLWIDTH LEFT SQUARE BRACKET
+    "\uFF3C<>'\\';" //  from FULLWIDTH REVERSE SOLIDUS {double escape - aliu}
+    "\uFF3D<>']';" //  from FULLWIDTH RIGHT SQUARE BRACKET
+    "\uFF3E<>'^';" //  from FULLWIDTH CIRCUMFLEX ACCENT
+    "\uFF3F<>'_';" //  from FULLWIDTH LOW LINE
+    "\uFF40<>'`';" //  from FULLWIDTH GRAVE ACCENT
+    "\uFF41<>a;" //  from FULLWIDTH LATIN SMALL LETTER A
+    "\uFF42<>b;" //  from FULLWIDTH LATIN SMALL LETTER B
+    "\uFF43<>c;" //  from FULLWIDTH LATIN SMALL LETTER C
+    "\uFF44<>d;" //  from FULLWIDTH LATIN SMALL LETTER D
+    "\uFF45<>e;" //  from FULLWIDTH LATIN SMALL LETTER E
+    "\uFF46<>f;" //  from FULLWIDTH LATIN SMALL LETTER F
+    "\uFF47<>g;" //  from FULLWIDTH LATIN SMALL LETTER G
+    "\uFF48<>h;" //  from FULLWIDTH LATIN SMALL LETTER H
+    "\uFF49<>i;" //  from FULLWIDTH LATIN SMALL LETTER I
+    "\uFF4A<>j;" //  from FULLWIDTH LATIN SMALL LETTER J
+    "\uFF4B<>k;" //  from FULLWIDTH LATIN SMALL LETTER K
+    "\uFF4C<>l;" //  from FULLWIDTH LATIN SMALL LETTER L
+    "\uFF4D<>m;" //  from FULLWIDTH LATIN SMALL LETTER M
+    "\uFF4E<>n;" //  from FULLWIDTH LATIN SMALL LETTER N
+    "\uFF4F<>o;" //  from FULLWIDTH LATIN SMALL LETTER O
+    "\uFF50<>p;" //  from FULLWIDTH LATIN SMALL LETTER P
+    "\uFF51<>q;" //  from FULLWIDTH LATIN SMALL LETTER Q
+    "\uFF52<>r;" //  from FULLWIDTH LATIN SMALL LETTER R
+    "\uFF53<>s;" //  from FULLWIDTH LATIN SMALL LETTER S
+    "\uFF54<>t;" //  from FULLWIDTH LATIN SMALL LETTER T
+    "\uFF55<>u;" //  from FULLWIDTH LATIN SMALL LETTER U
+    "\uFF56<>v;" //  from FULLWIDTH LATIN SMALL LETTER V
+    "\uFF57<>w;" //  from FULLWIDTH LATIN SMALL LETTER W
+    "\uFF58<>x;" //  from FULLWIDTH LATIN SMALL LETTER X
+    "\uFF59<>y;" //  from FULLWIDTH LATIN SMALL LETTER Y
+    "\uFF5A<>z;" //  from FULLWIDTH LATIN SMALL LETTER Z
+    "\uFF5B<>'{';" //  from FULLWIDTH LEFT CURLY BRACKET
+    "\uFF5C<>'|';" //  from FULLWIDTH VERTICAL LINE
+    "\uFF5D<>'}';" //  from FULLWIDTH RIGHT CURLY BRACKET
+    "\uFF5E<>'~';" //  from FULLWIDTH TILDE
    "\u3002<>\uFF61;" //  to HALFWIDTH IDEOGRAPHIC FULL STOP
    "\u300C<>\uFF62;" //  to HALFWIDTH LEFT CORNER BRACKET
    "\u300D<>\uFF63;" //  to HALFWIDTH RIGHT CORNER BRACKET
@ -252,12 +257,12 @@ fullhalf {
    "\u1173<>\uFFDA;" //  to HALFWIDTH HANGUL LETTER EU
    "\u1174<>\uFFDB;" //  to HALFWIDTH HANGUL LETTER YI
    "\u1175<>\uFFDC;" //  to HALFWIDTH HANGUL LETTER I
-    "\uFFE0<>'\u00a2';"    //  from FULLWIDTH CENT SIGN
-    "\uFFE1<>'\u00a3';"    //  from FULLWIDTH POUND SIGN
-    "\uFFE2<>'\u00ac';"    //  from FULLWIDTH NOT SIGN
-    "\uFFE3<>' '\u0304;"  //  from FULLWIDTH MACRON
-    "\uFFE4<>'\u00a6';"    //  from FULLWIDTH BROKEN BAR
-    "\uFFE5<>'\u00a5';"    //  from FULLWIDTH YEN SIGN
+    "\uFFE0<>'\u00a2';" //  from FULLWIDTH CENT SIGN
+    "\uFFE1<>'\u00a3';" //  from FULLWIDTH POUND SIGN
+    "\uFFE2<>'\u00ac';" //  from FULLWIDTH NOT SIGN
+    "\uFFE3<>' '\u0304;" //  from FULLWIDTH MACRON
+    "\uFFE4<>'\u00a6';" //  from FULLWIDTH BROKEN BAR
+    "\uFFE5<>'\u00a5';" //  from FULLWIDTH YEN SIGN
    "\uFFE6<>\u20A9;" //  from FULLWIDTH WON SIGN
    "\u2502<>\uFFE8;" //  to HALFWIDTH FORMS LIGHT VERTICAL
    "\u2190<>\uFFE9;" //  to HALFWIDTH LEFTWARDS ARROW
@ -266,5 +271,6 @@ fullhalf {
    "\u2193<>\uFFEC;" //  to HALFWIDTH DOWNWARDS ARROW
    "\u25A0<>\uFFED;" //  to HALFWIDTH BLACK SQUARE
    "\u25CB<>\uFFEE;" //  to HALFWIDTH WHITE CIRCLE
-    }
+    
+  }
 }
--- a/icu4c/data/kbdescl1.txt
+++ b/icu4c/data/kbdescl1.txt
@ -1,128 +1,130 @@
 //--------------------------------------------------------------------
-//  Copyright (C) 1999, International Business Machines
-//  Corporation and others.  All Rights Reserved.
+// Copyright (c) 1999-2000, International Business Machines
+// Corporation and others.  All Rights Reserved.
 //--------------------------------------------------------------------
-//  Date        Name        Description
-//  11/17/99    aliu        Creation.
+// THIS IS A MACHINE-GENERATED FILE
+// Tool: src\com\ibm\tools\translit\dumpICUrules.bat
+// Source: src\com\ibm\text\resources/TransliterationRule_KeyboardEscape_Latin1.java
+// Date: Fri May 19 15:50:22 2000
 //--------------------------------------------------------------------

 // KeyboardEscape-Latin1

 kbdescl1 {
-    Rule {
-        "esc='';"
-        "grave=`;"
-        "acute='';"
-        "hat=^;"
-        "tilde=~;"
-        "umlaut=:;"
-        "ring=.;"
-        "cedilla=,;"
-        "slash=/;"
-        "super=^;"
-
-        // Make keyboard entry of {esc} possible
-        // and of backslash
-        "'\\'{esc}>{esc};"
-        "'\\\\'>'\\';"
-
-        // Long keys
-        "cur{esc}>\u00A4;"
-        "sec{esc}>\u00A7;"
-        "not{esc}>\u00AC;"
-        "mul{esc}>\u00D7;"
-        "div{esc}>\u00F7;"
-
-        "\\ {esc}>\u00A0;" // non-breaking space
-        "!{esc}>\u00A1;" // inverted exclamation
-        "c/{esc}>\u00A2;" // cent sign
-        "lb{esc}>\u00A3;" // pound sign
-        "'|'{esc}>\u00A6;" // broken vertical bar
-        ":{esc}>\u00A8;" // umlaut
-        "{super}a{esc}>\u00AA;" // feminine ordinal
-        "'<<'{esc}>\u00AB;"
-        "r{esc}>\u00AE;"
-        "--{esc}>\u00AF;"
-        "-{esc}>\u00AD;"
-        "+-{esc}>\u00B1;"
-        "{super}2{esc}>\u00B2;"
-        "{super}3{esc}>\u00B3;"
-        "{acute}{esc}>\u00B4;"
-        "m{esc}>\u00B5;"
-        "para{esc}>\u00B6;"
-        "dot{esc}>\u00B7;"
-        "{cedilla}{esc}>\u00B8;"
-        "{super}1{esc}>\u00B9;"
-        "{super}o{esc}>\u00BA;" // masculine ordinal
-        "'>>'{esc}>\u00BB;"
-        "1/4{esc}>\u00BC;"
-        "1/2{esc}>\u00BD;"
-        "3/4{esc}>\u00BE;"
-        "?{esc}>\u00BF;"
-        "A{grave}{esc}>\u00C0;"
-        "A{acute}{esc}>\u00C1;"
-        "A{hat}{esc}>\u00C2;"
-        "A{tilde}{esc}>\u00C3;"
-        "A{umlaut}{esc}>\u00C4;"
-        "A{ring}{esc}>\u00C5;"
-        "AE{esc}>\u00C6;"
-        "C{cedilla}{esc}>\u00C7;"
-        "E{grave}{esc}>\u00C8;"
-        "E{acute}{esc}>\u00C9;"
-        "E{hat}{esc}>\u00CA;"
-        "E{umlaut}{esc}>\u00CB;"
-        "I{grave}{esc}>\u00CC;"
-        "I{acute}{esc}>\u00CD;"
-        "I{hat}{esc}>\u00CE;"
-        "I{umlaut}{esc}>\u00CF;"
-        "D-{esc}>\u00D0;"
-        "N{tilde}{esc}>\u00D1;"
-        "O{grave}{esc}>\u00D2;"
-        "O{acute}{esc}>\u00D3;"
-        "O{hat}{esc}>\u00D4;"
-        "O{tilde}{esc}>\u00D5;"
-        "O{umlaut}{esc}>\u00D6;"
-        "O{slash}{esc}>\u00D8;"
-        "U{grave}{esc}>\u00D9;"
-        "U{acute}{esc}>\u00DA;"
-        "U{hat}{esc}>\u00DB;"
-        "U{umlaut}{esc}>\u00DC;"
-        "Y{acute}{esc}>\u00DD;"
-        "TH{esc}>\u00DE;"
-        "ss{esc}>\u00DF;"
-        "a{grave}{esc}>\u00E0;"
-        "a{acute}{esc}>\u00E1;"
-        "a{hat}{esc}>\u00E2;"
-        "a{tilde}{esc}>\u00E3;"
-        "a{umlaut}{esc}>\u00E4;"
-        "a{ring}{esc}>\u00E5;"
-        "ae{esc}>\u00E6;"
-        "c{cedilla}{esc}>\u00E7;"
-        "c{esc}>\u00A9;" // copyright - after c{cedilla}
-        "e{grave}{esc}>\u00E8;"
-        "e{acute}{esc}>\u00E9;"
-        "e{hat}{esc}>\u00EA;"
-        "e{umlaut}{esc}>\u00EB;"
-        "i{grave}{esc}>\u00EC;"
-        "i{acute}{esc}>\u00ED;"
-        "i{hat}{esc}>\u00EE;"
-        "i{umlaut}{esc}>\u00EF;"
-        "d-{esc}>\u00F0;"
-        "n{tilde}{esc}>\u00F1;"
-        "o{grave}{esc}>\u00F2;"
-        "o{acute}{esc}>\u00F3;"
-        "o{hat}{esc}>\u00F4;"
-        "o{tilde}{esc}>\u00F5;"
-        "o{umlaut}{esc}>\u00F6;"
-        "o{slash}{esc}>\u00F8;"
-        "o{esc}>\u00B0;"
-        "u{grave}{esc}>\u00F9;"
-        "u{acute}{esc}>\u00FA;"
-        "u{hat}{esc}>\u00FB;"
-        "u{umlaut}{esc}>\u00FC;"
-        "y{acute}{esc}>\u00FD;"
-        "y{esc}>\u00A5;" // yen sign
-        "th{esc}>\u00FE;"
-      //masked:  "ss{esc}>\u00FF;"
-    }
+  Rule {
+    "$esc='';"
+    "$grave='`';"
+    "$acute='';"
+    "$hat='^';"
+    "$tilde='~';"
+    "$umlaut=':';"
+    "$ring='.';"
+    "$cedilla=',';"
+    "$slash='/';"
+    "$super='^';"
+    
+    // Make keyboard entry of {esc} possible
+    // and of backslash
+    "'\\'$esc>$esc;"
+    "'\\\\'>'\\';"
+    
+    // Long keys
+    "cur$esc>\u00A4;"
+    "sec$esc>\u00A7;"
+    "not$esc>\u00AC;"
+    "mul$esc>\u00D7;"
+    "div$esc>\u00F7;"
+    
+    "\\ $esc>\u00A0;" // non-breaking space
+    "'!'$esc>\u00A1;" // inverted exclamation
+    "c'/'$esc>\u00A2;" // cent sign
+    "lb$esc>\u00A3;" // pound sign
+    "'|'$esc>\u00A6;" // broken vertical bar
+    "':'$esc>\u00A8;" // umlaut
+    "$super a$esc>\u00AA;" // feminine ordinal
+    "'<<'$esc>\u00AB;"
+    "r$esc>\u00AE;"
+    "'--'$esc>\u00AF;"
+    "'-'$esc>\u00AD;"
+    "'+-'$esc>\u00B1;"
+    "$super 2$esc>\u00B2;"
+    "$super 3$esc>\u00B3;"
+    "$acute$esc>\u00B4;"
+    "m$esc>\u00B5;"
+    "para$esc>\u00B6;"
+    "dot$esc>\u00B7;"
+    "$cedilla$esc>\u00B8;"
+    "$super 1$esc>\u00B9;"
+    "$super o$esc>\u00BA;" // masculine ordinal
+    "'>>'$esc>\u00BB;"
+    "'1/4'$esc>\u00BC;"
+    "'1/2'$esc>\u00BD;"
+    "'3/4'$esc>\u00BE;"
+    "'?'$esc>\u00BF;"
+    "A$grave$esc>\u00C0;"
+    "A$acute$esc>\u00C1;"
+    "A$hat$esc>\u00C2;"
+    "A$tilde$esc>\u00C3;"
+    "A$umlaut$esc>\u00C4;"
+    "A$ring$esc>\u00C5;"
+    "AE$esc>\u00C6;"
+    "C$cedilla$esc>\u00C7;"
+    "E$grave$esc>\u00C8;"
+    "E$acute$esc>\u00C9;"
+    "E$hat$esc>\u00CA;"
+    "E$umlaut$esc>\u00CB;"
+    "I$grave$esc>\u00CC;"
+    "I$acute$esc>\u00CD;"
+    "I$hat$esc>\u00CE;"
+    "I$umlaut$esc>\u00CF;"
+    "'D-'$esc>\u00D0;"
+    "N$tilde$esc>\u00D1;"
+    "O$grave$esc>\u00D2;"
+    "O$acute$esc>\u00D3;"
+    "O$hat$esc>\u00D4;"
+    "O$tilde$esc>\u00D5;"
+    "O$umlaut$esc>\u00D6;"
+    "O$slash$esc>\u00D8;"
+    "U$grave$esc>\u00D9;"
+    "U$acute$esc>\u00DA;"
+    "U$hat$esc>\u00DB;"
+    "U$umlaut$esc>\u00DC;"
+    "Y$acute$esc>\u00DD;"
+    "TH$esc>\u00DE;"
+    "ss$esc>\u00DF;"
+    "a$grave$esc>\u00E0;"
+    "a$acute$esc>\u00E1;"
+    "a$hat$esc>\u00E2;"
+    "a$tilde$esc>\u00E3;"
+    "a$umlaut$esc>\u00E4;"
+    "a$ring$esc>\u00E5;"
+    "ae$esc>\u00E6;"
+    "c$cedilla$esc>\u00E7;"
+    "c$esc>\u00A9;" // copyright - after c{cedilla}
+    "e$grave$esc>\u00E8;"
+    "e$acute$esc>\u00E9;"
+    "e$hat$esc>\u00EA;"
+    "e$umlaut$esc>\u00EB;"
+    "i$grave$esc>\u00EC;"
+    "i$acute$esc>\u00ED;"
+    "i$hat$esc>\u00EE;"
+    "i$umlaut$esc>\u00EF;"
+    "'d-'$esc>\u00F0;"
+    "n$tilde$esc>\u00F1;"
+    "o$grave$esc>\u00F2;"
+    "o$acute$esc>\u00F3;"
+    "o$hat$esc>\u00F4;"
+    "o$tilde$esc>\u00F5;"
+    "o$umlaut$esc>\u00F6;"
+    "o$slash$esc>\u00F8;"
+    "o$esc>\u00B0;"
+    "u$grave$esc>\u00F9;"
+    "u$acute$esc>\u00FA;"
+    "u$hat$esc>\u00FB;"
+    "u$umlaut$esc>\u00FC;"
+    "y$acute$esc>\u00FD;"
+    "y$esc>\u00A5;" // yen sign
+    "th$esc>\u00FE;"
+    //masked: + "ss$esc>\u00FF;"
+  }
 }
--- a/icu4c/data/larabic.txt
+++ b/icu4c/data/larabic.txt
@ -1,240 +1,257 @@
 //--------------------------------------------------------------------
-//  Copyright (C) 1999, International Business Machines
-//  Corporation and others.  All Rights Reserved.
+// Copyright (c) 1999-2000, International Business Machines
+// Corporation and others.  All Rights Reserved.
 //--------------------------------------------------------------------
-//  Date        Name        Description
-//  11/17/99    aliu        Creation.
+// THIS IS A MACHINE-GENERATED FILE
+// Tool: src\com\ibm\tools\translit\dumpICUrules.bat
+// Source: src\com\ibm\text\resources/TransliterationRule_Latin_Arabic.java
+// Date: Fri May 19 15:50:22 2000
 //--------------------------------------------------------------------

 // Latin-Arabic

 larabic {
-    Rule {
-        // To Do: finish adding shadda, add sokoon
-
-        "alefmadda=\u0622;"
-        "alefuhamza=\u0623;"
-        "wauuhamza=\u0624;"
-        "alefhamza=\u0625;"
-        "yehuhamza=\u0626;"
-        "alef=\u0627;"
-        "beh=\u0628;"
-        "tehmarbuta=\u0629;"
-        "teh=\u062A;"
-        "theh=\u062B;"
-        "geem=\u062C;"
-        "hah=\u062D;"
-        "kha=\u062E;"
-        "dal=\u062F;"
-        "dhal=\u0630;"
-        "reh=\u0631;"
-        "zain=\u0632;"
-        "seen=\u0633;"
-        "sheen=\u0634;"
-        "sad=\u0635;"
-        "dad=\u0636;"
-        "tah=\u0637;"
-        "zah=\u0638;"
-        "ein=\u0639;"
-        "ghein=\u063A;"
-        "feh=\u0641;"
-        "qaaf=\u0642;"
-        "kaf=\u0643;"
-        "lam=\u0644;"
-        "meem=\u0645;"
-        "noon=\u0646;"
-        "heh=\u0647;"
-        "wau=\u0648;"
-        "yehmaqsura=\u0649;"
-        "yeh=\u064A;"
-        "peh=\u06A4;"
-
-        "hamza=\u0621;"
-        "fathatein=\u064B;"
-        "dammatein=\u064C;"
-        "kasratein=\u064D;"
-        "fatha=\u064E;"
-        "damma=\u064F;"
-        "kasra=\u0650;"
-        "shadda=\u0651;"
-        "sokoon=\u0652;"
-
-        // convert English to Arabic
-        "Arabic>"
-        "\u062a\u062a\u0645\u062a\u0639' '"
-        "\u0627\u0644\u0644\u063a\u0629' '"
-        "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629' '"
-        "\u0628\u0628\u0646\u0638\u0645' '"
-        "\u0643\u062a\u0627\u0628\u0628\u064a\u0629' '"
-        "\u062c\u0645\u064a\u0644\u0629;"
-
-        "ai>{alefmadda};"
-        "ae>{alefuhamza};"
-        "ao>{alefhamza};"
-        "aa>{alef};"
-        "an>{fathatein};"
-        "a>{fatha};"
-        "b>{beh};"
-        "c>{kaf};"
-        "{dhal})dh>{shadda};"
-        "dh>{dhal};"
-        "{dad})dd>{shadda};"
-        "dd>{dad};"
-        "{dal})d>{shadda};"
-        "d>{dal};"
-        "e>{ein};"
-        "f>{feh};"
-        "gh>{ghein};"
-        "g>{geem};"
-        "hh>{hah};"
-        "h>{heh};"
-        "ii>{kasratein};"
-        "i>{kasra};"
-        "j>{geem};"
-        "kh>{kha};"
-        "k>{kaf};"
-        "l>{lam};"
-        "m>{meem};"
-        "n>{noon};"
-        "o>{hamza};"
-        "p>{peh};"
-        "q>{qaaf};"
-        "r>{reh};"
-        "sh>{sheen};"
-        "ss>{sad};"
-        "s>{seen};"
-        "th>{theh};"
-        "tm>{tehmarbuta};"
-        "tt>{tah};"
-        "t>{teh};"
-        "uu>{dammatein};"
-        "u>{damma};"
-        "v>{beh};"
-        "we>{wauuhamza};"
-        "w>{wau};"
-        "x>{kaf}{shadda}{seen};"
-        "ye>{yehuhamza};"
-        "ym>{yehmaqsura};"
-        "y>{yeh};"
-        "zz>{zah};"
-        "z>{zain};"
-
-        "0>\u0660;"+ // Arabic digit 0
-        "1>\u0661;"+ // Arabic digit 1
-        "2>\u0662;"+ // Arabic digit 2
-        "3>\u0663;"+ // Arabic digit 3
-        "4>\u0664;"+ // Arabic digit 4
-        "5>\u0665;"+ // Arabic digit 5
-        "6>\u0666;"+ // Arabic digit 6
-        "7>\u0667;"+ // Arabic digit 7
-        "8>\u0668;"+ // Arabic digit 8
-        "9>\u0669;"+ // Arabic digit 9
-        "%>\u066A;"+ // Arabic %
-        ".>\u066B;"+ // Arabic decimal separator
-        ",>\u066C;"+ // Arabic thousands separator
-        "*>\u066D;"+ // Arabic five-pointed star
-
-        "`0>0;"+ // Escaped forms of the above
-        "`1>1;"
-        "`2>2;"
-        "`3>3;"
-        "`4>4;"
-        "`5>5;"
-        "`6>6;"
-        "`7>7;"
-        "`8>8;"
-        "`9>9;"
-        "`%>%;"
-        "`.>.;"
-        "`,>,;"
-        "`*>*;"
-        "``>`;"
-
-        "''>;"
-
-        // now Arabic to English
-
-        "''ai<a){alefmadda};"
-        "ai<{alefmadda};"
-        "''ae<a){alefuhamza};"
-        "ae<{alefuhamza};"
-        "''ao<a){alefhamza};"
-        "ao<{alefhamza};"
-        "''aa<a){alef};"
-        "aa<{alef};"
-        "''an<a){fathatein};"
-        "an<{fathatein};"
-        "''a<a){fatha};"
-        "a<{fatha};"
-        "b<{beh};"
-        "''dh<d){dhal};"
-        "dh<{dhal};"
-        "''dd<d){dad};"
-        "dd<{dad};"
-        "''d<d){dal};"
-        "d<{dal};"
-        "''e<a){ein};"
-        "''e<w){ein};"
-        "''e<y){ein};"
-        "e<{ein};"
-        "f<{feh};"
-        "gh<{ghein};"
-        "''hh<d){hah};"
-        "''hh<t){hah};"
-        "''hh<k){hah};"
-        "''hh<s){hah};"
-        "hh<{hah};"
-        "''h<d){heh};"
-        "''h<t){heh};"
-        "''h<k){heh};"
-        "''h<s){heh};"
-        "h<{heh};"
-        "''ii<i){kasratein};"
-        "ii<{kasratein};"
-        "''i<i){kasra};"
-        "i<{kasra};"
-        "j<{geem};"
-        "kh<{kha};"
-        "x<{kaf}{shadda}{seen};"
-        "k<{kaf};"
-        "l<{lam};"
-        "''m<y){meem};"
-        "''m<t){meem};"
-        "m<{meem};"
-        "n<{noon};"
-        "''o<a){hamza};"
-        "o<{hamza};"
-        "p<{peh};"
-        "q<{qaaf};"
-        "r<{reh};"
-        "sh<{sheen};"
-        "''ss<s){sad};"
-        "ss<{sad};"
-        "''s<s){seen};"
-        "s<{seen};"
-        "th<{theh};"
-        "tm<{tehmarbuta};"
-        "''tt<t){tah};"
-        "tt<{tah};"
-        "''t<t){teh};"
-        "t<{teh};"
-        "''uu<u){dammatein};"
-        "uu<{dammatein};"
-        "''u<u){damma};"
-        "u<{damma};"
-        "we<{wauuhamza};"
-        "w<{wau};"
-        "ye<{yehuhamza};"
-        "ym<{yehmaqsura};"
-        "''y<y){yeh};"
-        "y<{yeh};"
-        "''zz<z){zah};"
-        "zz<{zah};"
-        "''z<z){zain};"
-        "z<{zain};"
-
-        "dh<dh){shadda};"
-        "dd<dd){shadda};"
-        "''d<d){shadda};"
-    }
+  Rule {
+    // To Do: finish adding shadda, add sokoon
+    
+    "$alefmadda=\u0622;"
+    "$alefuhamza=\u0623;"
+    "$wauuhamza=\u0624;"
+    "$alefhamza=\u0625;"
+    "$yehuhamza=\u0626;"
+    "$alef=\u0627;"
+    "$beh=\u0628;"
+    "$tehmarbuta=\u0629;"
+    "$teh=\u062A;"
+    "$theh=\u062B;"
+    "$geem=\u062C;"
+    "$hah=\u062D;"
+    "$kha=\u062E;"
+    "$dal=\u062F;"
+    "$dhal=\u0630;"
+    "$reh=\u0631;"
+    "$zain=\u0632;"
+    "$seen=\u0633;"
+    "$sheen=\u0634;"
+    "$sad=\u0635;"
+    "$dad=\u0636;"
+    "$tah=\u0637;"
+    "$zah=\u0638;"
+    "$ein=\u0639;"
+    "$ghein=\u063A;"
+    "$feh=\u0641;"
+    "$qaaf=\u0642;"
+    "$kaf=\u0643;"
+    "$lam=\u0644;"
+    "$meem=\u0645;"
+    "$noon=\u0646;"
+    "$heh=\u0647;"
+    "$wau=\u0648;"
+    "$yehmaqsura=\u0649;"
+    "$yeh=\u064A;"
+    "$peh=\u06A4;"
+    
+    "$hamza=\u0621;"
+    "$fathatein=\u064B;"
+    "$dammatein=\u064C;"
+    "$kasratein=\u064D;"
+    "$fatha=\u064E;"
+    "$damma=\u064F;"
+    "$kasra=\u0650;"
+    "$shadda=\u0651;"
+    "$sokoon=\u0652;"
+    
+    // handle doubles - liu
+    "t'' < {$teh} [$teh$theh$tehmarbuta$tah];"
+    "h'' < {$heh} [$heh$hah];"
+    "s'' < {$seen} $sheen;"
+    // handle a few pathological special cases to make round
+    // trip work. - liu
+    "d'~'d   <> $dal $dal;"
+    "dh'~'dh <> $dhal $dhal;"
+    "dd'~'dd <> $dad $dad;"
+    
+    // convert English to Arabic
+    "Arabic>"
+    "\u062a\u062a\u0645\u062a\u0639' '"
+    "\u0627\u0644\u0644\u063a\u0629' '"
+    "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629' '"
+    "\u0628\u0628\u0646\u0638\u0645' '"
+    "\u0643\u062a\u0627\u0628\u0628\u064a\u0629' '"
+    "\u062c\u0645\u064a\u0644\u0629;"
+    
+    "ai>$alefmadda;"
+    "ae>$alefuhamza;"
+    "ao>$alefhamza;"
+    "aa>$alef;"
+    "an>$fathatein;"
+    "a>$fatha;"
+    "b>$beh;"
+    "c>$kaf;"
+    // To be symmetrical with shadda rules below, we want to
+    // map $dad $shadda to dd'dd, etc.  - liu
+    "$dhal{dh>$shadda;"
+    "dh>$dhal;"
+    "$dad{dd>$shadda;"
+    "dd>$dad;"
+    "$dal{d>$shadda;"
+    "d>$dal;"
+    "e>$ein;"
+    "f>$feh;"
+    "gh>$ghein;"
+    "g>$geem;"
+    "hh>$hah;"
+    "h>$heh;"
+    "ii>$kasratein;"
+    "i>$kasra;"
+    "j>$geem;"
+    "kh>$kha;"
+    "k>$kaf;"
+    "l>$lam;"
+    "m>$meem;"
+    "n>$noon;"
+    "o>$hamza;"
+    "p>$peh;"
+    "q>$qaaf;"
+    "r>$reh;"
+    "sh>$sheen;"
+    "ss>$sad;"
+    "s>$seen;"
+    "th>$theh;"
+    "tm>$tehmarbuta;"
+    "tt>$tah;"
+    "t>$teh;"
+    "uu>$dammatein;"
+    "u>$damma;"
+    "v>$beh;"
+    "we>$wauuhamza;"
+    "w>$wau;"
+    "x>$kaf$shadda$seen;"
+    "ye>$yehuhamza;"
+    "ym>$yehmaqsura;"
+    "y>$yeh;"
+    "zz>$zah;"
+    "z>$zain;"
+    
+    "0>\u0660;" // Arabic digit 0
+    "1>\u0661;" // Arabic digit 1
+    "2>\u0662;" // Arabic digit 2
+    "3>\u0663;" // Arabic digit 3
+    "4>\u0664;" // Arabic digit 4
+    "5>\u0665;" // Arabic digit 5
+    "6>\u0666;" // Arabic digit 6
+    "7>\u0667;" // Arabic digit 7
+    "8>\u0668;" // Arabic digit 8
+    "9>\u0669;" // Arabic digit 9
+    "'%'>\u066A;" // Arabic %
+    "'.'>\u066B;" // Arabic decimal separator
+    "','>\u066C;" // Arabic thousands separator
+    "'*'>\u066D;" // Arabic five-pointed star
+    
+    "'`0'>0;" // Escaped forms of the above
+    "'`1'>1;"
+    "'`2'>2;"
+    "'`3'>3;"
+    "'`4'>4;"
+    "'`5'>5;"
+    "'`6'>6;"
+    "'`7'>7;"
+    "'`8'>8;"
+    "'`9'>9;"
+    "'`%'>'%';"
+    "'`.'>'.';"
+    "'`,'>',';"
+    "'`*'>'*';"
+    "'``'>'`';"
+    
+    "''>;"
+    
+    // now Arabic to English
+    
+    "''ai<a{$alefmadda;"
+    "ai<$alefmadda;"
+    "''ae<a{$alefuhamza;"
+    "ae<$alefuhamza;"
+    "''ao<a{$alefhamza;"
+    "ao<$alefhamza;"
+    "''aa<a{$alef;"
+    "aa<$alef;"
+    "''an<a{$fathatein;"
+    "an<$fathatein;"
+    "''a<a{$fatha;"
+    "a<$fatha;"
+    "b<$beh;"
+    "''dh<d{$dhal;"
+    "dh<$dhal;"
+    "''dd<d{$dad;"
+    "dd<$dad;"
+    "''d<d{$dal;"
+    "d<$dal;"
+    "''e<a{$ein;"
+    "''e<w{$ein;"
+    "''e<y{$ein;"
+    "e<$ein;"
+    "f<$feh;"
+    "gh<$ghein;"
+    "''hh<d{$hah;"
+    "''hh<t{$hah;"
+    "''hh<k{$hah;"
+    "''hh<s{$hah;"
+    "hh<$hah;"
+    "''h<d{$heh;"
+    "''h<t{$heh;"
+    "''h<k{$heh;"
+    "''h<s{$heh;"
+    "h<$heh;"
+    "''ii<i{$kasratein;"
+    "ii<$kasratein;"
+    "''i<i{$kasra;"
+    "i<$kasra;"
+    "j<$geem;"
+    "kh<$kha;"
+    "x<$kaf$shadda$seen;"
+    "k<$kaf;"
+    "l<$lam;"
+    "''m<y{$meem;"
+    "''m<t{$meem;"
+    "m<$meem;"
+    "n<$noon;"
+    "''o<a{$hamza;"
+    "o<$hamza;"
+    "p<$peh;"
+    "q<$qaaf;"
+    "r<$reh;"
+    "sh<$sheen;"
+    "''ss<s{$sad;"
+    "ss<$sad;"
+    "''s<s{$seen;"
+    "s<$seen;"
+    "th<$theh;"
+    "tm<$tehmarbuta;"
+    "''tt<t{$tah;"
+    "tt<$tah;"
+    "''t<t{$teh;"
+    "t<$teh;"
+    "''uu<u{$dammatein;"
+    "uu<$dammatein;"
+    "''u<u{$damma;"
+    "u<$damma;"
+    "we<$wauuhamza;"
+    "w<$wau;"
+    "ye<$yehuhamza;"
+    "ym<$yehmaqsura;"
+    "''y<y{$yeh;"
+    "y<$yeh;"
+    "''zz<z{$zah;"
+    "zz<$zah;"
+    "''z<z{$zain;"
+    "z<$zain;"
+    
+    // The following three rules map x $shadda to x x, where
+    // x is dh or dd.  If x is d, d'd is output.  Net effect
+    // is to map s.th. like $dad $shadda to dd'dd. - liu
+    "dh<dh{$shadda;"
+    "dd<dd{$shadda;"
+    "''d<d{$shadda;"
+  }
 }
--- a/icu4c/data/lcyril.txt
+++ b/icu4c/data/lcyril.txt
@ -1,307 +1,312 @@
 //--------------------------------------------------------------------
-//  Copyright (C) 1999, International Business Machines
-//  Corporation and others.  All Rights Reserved.
+// Copyright (c) 1999-2000, International Business Machines
+// Corporation and others.  All Rights Reserved.
 //--------------------------------------------------------------------
-//  Date        Name        Description
-//  11/17/99    aliu        Creation.
-//  12/10/99    aliu        Fix case handling.
+// THIS IS A MACHINE-GENERATED FILE
+// Tool: src\com\ibm\tools\translit\dumpICUrules.bat
+// Source: src\com\ibm\text\resources/TransliterationRule_Latin_Cyrillic.java
+// Date: Fri May 19 15:50:22 2000
 //--------------------------------------------------------------------

+// Latin-Cyrillic
+
 lcyril {
-    Rule {
-//*     /* This class is designed to be a general Latin-Cyrillic
-//*        transliteration. The standard Russian transliterations
-//*        are generally used for the letters from Russian,
-//*        with additional Cyrillic characters given consistent
-//*        mappings.
-//*     */
-
-        "S-hacek=\u0160;"
-        "s-hacek=\u0161;"
-
-        "YO=\u0401;"
-        "J=\u0408;"
-        "A=\u0410;"
-        "B=\u0411;"
-        "V=\u0412;"
-        "G=\u0413;"
-        "D=\u0414;"
-        "YE=\u0415;"
-        "ZH=\u0416;"
-        "Z=\u0417;"
-        "YI=\u0418;"
-        "Y=\u0419;"
-        "K=\u041A;"
-        "L=\u041B;"
-        "M=\u041C;"
-        "N=\u041D;"
-        "O=\u041E;"
-        "P=\u041F;"
-        "R=\u0420;"
-        "S=\u0421;"
-        "T=\u0422;"
-        "U=\u0423;"
-        "F=\u0424;"
-        "KH=\u0425;"
-        "TS=\u0426;"
-        "CH=\u0427;"
-        "SH=\u0428;"
-        "SHCH=\u0429;"
-        "HARD=\u042A;"
-        "I=\u042B;"
-        "SOFT=\u042C;"
-        "E=\u042D;"
-        "YU=\u042E;"
-        "YA=\u042F;"
-
-        // Lowercase
-
-        "a=\u0430;"
-        "b=\u0431;"
-        "v=\u0432;"
-        "g=\u0433;"
-        "d=\u0434;"
-        "ye=\u0435;"
-        "zh=\u0436;"
-        "z=\u0437;"
-        "yi=\u0438;"
-        "y=\u0439;"
-        "k=\u043a;"
-        "l=\u043b;"
-        "m=\u043c;"
-        "n=\u043d;"
-        "o=\u043e;"
-        "p=\u043f;"
-        "r=\u0440;"
-        "s=\u0441;"
-        "t=\u0442;"
-        "u=\u0443;"
-        "f=\u0444;"
-        "kh=\u0445;"
-        "ts=\u0446;"
-        "ch=\u0447;"
-        "sh=\u0448;"
-        "shch=\u0449;"
-        "hard=\u044a;"
-        "i=\u044b;"
-        "soft=\u044c;"
-        "e=\u044d;"
-        "yu=\u044e;"
-        "ya=\u044f;"
-
-        "yo=\u0451;"
-        "j=\u0458;"
-
-        // variables
-        // some are duplicated so lowercasing works
-
-        "csoft=[eiyEIY];"
-        "CSOFT=[eiyEIY];"
-
-        "BECOMES_H=[{HARD}{hard}];"
-        "becomes_h=[{HARD}{hard}];"
-
-        "BECOMES_S=[{S}{s}];"
-        "becomes_s=[{S}{s}];"
-
-        "BECOMES_C=[{CH}{ch}];"
-        "becomes_c=[{CH}{ch}];"
-
-        "BECOMES_VOWEL=[{A}{E}{I}{O}{U}{a}{e}{i}{o}{u}];"
-        "becomes_vowel=[{A}{E}{I}{O}{U}{a}{e}{i}{o}{u}];"
-
-        "letter=[[:Lu:][:Ll:]];"
-        "lower=[[:Ll:]];"     
-
-//*     /*
-//*         Modified to combine display transliterator and typing transliterator.
-//*         The display mapping uses accents for the "soft" vowels.
-//*         It does not, although it could, use characters like \u0161 instead of digraphs
-//*         like sh.
-//*     */
-
-        // #############################################
-        // Special titlecase forms, not duplicated
-        // #############################################
-
-        "Ch>{CH};" "Ch<{CH}({lower};"
-        "Kh>{KH};" "Kh<{KH}({lower};"
-        "Shch>{SHCH};" "Shch<{SHCH}({lower};"
-        "Sh>{SH};" "Sh<{SH}({lower};"
-        "Ts>{TS};"  "Ts<{TS}({lower};"
-        "Zh>{ZH};" "Zh<{ZH}({lower};"
-        "Yi>{YI};"  //+ "Yi<{YI}({lower};"
-        "Ye>{YE};"  //+ "Ye<{YE}({lower};"
-        "Yo>{YO};" //+ "Yo<{YO}({lower};"
-        "Yu>{YU};" //+ "Yu<{YU}({lower};"
-        "Ya>{YA};" //+ "Ya<{YA}({lower};"
-
-        // #############################################
-        // Rules to Duplicate
-        // To get the lowercase versions, copy these and lowercase
-        // #############################################
-
-        // variant spellings in English
-
-        "SHTCH>{SHCH};"
-        "TCH>{CH};"
-        "TH>{Z};"
-        "Q>{K};"
-        "WH>{V};"
-        "W>{V};"
-        "X>{K}{S};"      //+ "X<{K}{S};"
-
-        // Separate letters that would otherwise join
-
-        "SH''<{SH}({BECOMES_C};"
-        "T''<{T}({BECOMES_S};"
-
-        "K''<{K}({BECOMES_H};"
-        "S''<{S}({BECOMES_H};"
-        "T''<{T}({BECOMES_H};"
-        "Z''<{Z}({BECOMES_H};"
-
-        "Y''<{Y}({BECOMES_VOWEL};"
-
-        // Main letters
-
-        "A<>{A};"
-        "B<>{B};"
-        "CH<>{CH};"
-        "D<>{D};"
-        "E<>{E};"
-        "F<>{F};"
-        "G<>{G};"
-        "\u00cc<>{YI};"
-        "I<>{I};"
-        "KH<>{KH};"
-        "K<>{K};"
-        "L<>{L};"
-        "M<>{M};"
-        "N<>{N};"
-        "O<>{O};"
-        "P<>{P};"
-        "R<>{R};"
-        "SHCH<>{SHCH};"
-        "SH>{SH};"       //+ "SH<{SH};"
-        "{S-hacek}<>{SH};"
-        "S<>{S};"
-        "TS<>{TS};"
-        "T<>{T};"
-        "U<>{U};"
-        "V<>{V};"
-        //\u00cc\u00c0\u00c8\u00d2\u00d9
-        "YE>{YE};"       //+ "YE<{YE};"
-        "\u00c8<>{YE};"
-        "YO>{YO};"       //+ "YO<{YO};"
-        "\u00d2<>{YO};"
-        "YU>{YU};"       //+ "YU<{YU};"
-        "\u00d9<>{YU};"
-        "YA>{YA};"       //+ "YA<{YA};"
-        "\u00c0<>{YA};"
-        "Y<>{Y};"
-        "ZH<>{ZH};"
-        "Z<>{Z};"
-
-        "H<>{HARD};"
-        "\u0178<>{SOFT};"
-
-        // Non-russian
-
-        "J<>{J};"
-
-        // variant spellings in English
-
-        "C({csoft}>{S};"
-        "C>{K};"
-
-        // #############################################
-        // Duplicated Rules
-        // Copy and lowercase the above rules
-        // #############################################
-
-         // variant spellings in english
-
-        "shtch>{shch};"
-        "tch>{ch};"
-        "th>{z};"
-        "q>{k};"
-        "wh>{v};"
-        "w>{v};"
-        "x>{k}{s};"      //+ "x<{k}{s};"
-
-        // separate letters that would otherwise join
-
-        "sh''<{sh}({becomes_c};"
-        "t''<{t}({becomes_s};"
-
-        "k''<{k}({becomes_h};"
-        "s''<{s}({becomes_h};"
-        "t''<{t}({becomes_h};"
-        "z''<{z}({becomes_h};"
-
-        "y''<{y}({becomes_vowel};"
-
-        // main letters
-
-        "a<>{a};"
-        "b<>{b};"
-        "ch<>{ch};"
-        "d<>{d};"
-        "e<>{e};"
-        "f<>{f};"
-        "g<>{g};"
-        "\u00ec<>{yi};"
-        "i<>{i};"
-        "kh<>{kh};"
-        "k<>{k};"
-        "l<>{l};"
-        "m<>{m};"
-        "n<>{n};"
-        "o<>{o};"
-        "p<>{p};"
-        "r<>{r};"
-        "shch<>{shch};"
-        "sh>{sh};"       //+ "sh<{sh};"
-        "{s-hacek}<>{sh};"
-        "s<>{s};"
-        "ts<>{ts};"
-        "t<>{t};"
-        "u<>{u};"
-        "v<>{v};"
-        //\u00ec\u00e0\u00e8\u00f2\u00f9
-        "ye>{ye};"       //+ "ye<{ye};"
-        "\u00e8<>{ye};"
-        "yo>{yo};"       //+ "yo<{yo};"
-        "\u00f2<>{yo};"
-        "yu>{yu};"       //+ "yu<{yu};"
-        "\u00f9<>{yu};"
-        "ya>{ya};"       //+ "ya<{ya};"
-        "\u00e0<>{ya};"
-        "y<>{y};"
-        "zh<>{zh};"
-        "z<>{z};"
-
-        "h<>{hard};"
-        "\u00ff<>{soft};"
-
-        // non-russian
-
-        "j<>{j};"
-
-        // variant spellings in english
-
-        "c({csoft}>{s};"
-        "c>{k};"
-
-
-
-        // #############################################
-        // End of Duplicated Rules
-        // #############################################
-
-        //generally the last rule
-        "''>;"
-        //the end
-    }
+  Rule {
+    
+    // This class is designed to be a general Latin-Cyrillic
+    // transliteration. The standard Russian transliterations
+    // are generally used for the letters from Russian,
+    // with additional Cyrillic characters given consistent
+    // mappings.
+    
+    "$S_hacek=\u0160;"
+    "$s_hacek=\u0161;"
+    
+    "$YO=\u0401;"
+    "$J=\u0408;"
+    "$A=\u0410;"
+    "$B=\u0411;"
+    "$V=\u0412;"
+    "$G=\u0413;"
+    "$D=\u0414;"
+    "$YE=\u0415;"
+    "$ZH=\u0416;"
+    "$Z=\u0417;"
+    "$YI=\u0418;"
+    "$Y=\u0419;"
+    "$K=\u041A;"
+    "$L=\u041B;"
+    "$M=\u041C;"
+    "$N=\u041D;"
+    "$O=\u041E;"
+    "$P=\u041F;"
+    "$R=\u0420;"
+    "$S=\u0421;"
+    "$T=\u0422;"
+    "$U=\u0423;"
+    "$F=\u0424;"
+    "$KH=\u0425;"
+    "$TS=\u0426;"
+    "$CH=\u0427;"
+    "$SH=\u0428;"
+    "$SHCH=\u0429;"
+    "$HARD=\u042A;"
+    "$I=\u042B;"
+    "$SOFT=\u042C;"
+    "$E=\u042D;"
+    "$YU=\u042E;"
+    "$YA=\u042F;"
+    
+    // Lowercase
+    
+    "$a=\u0430;"
+    "$b=\u0431;"
+    "$v=\u0432;"
+    "$g=\u0433;"
+    "$d=\u0434;"
+    "$ye=\u0435;"
+    "$zh=\u0436;"
+    "$z=\u0437;"
+    "$yi=\u0438;"
+    "$y=\u0439;"
+    "$k=\u043a;"
+    "$l=\u043b;"
+    "$m=\u043c;"
+    "$n=\u043d;"
+    "$o=\u043e;"
+    "$p=\u043f;"
+    "$r=\u0440;"
+    "$s=\u0441;"
+    "$t=\u0442;"
+    "$u=\u0443;"
+    "$f=\u0444;"
+    "$kh=\u0445;"
+    "$ts=\u0446;"
+    "$ch=\u0447;"
+    "$sh=\u0448;"
+    "$shch=\u0449;"
+    "$hard=\u044a;"
+    "$i=\u044b;"
+    "$soft=\u044c;"
+    "$e=\u044d;"
+    "$yu=\u044e;"
+    "$ya=\u044f;"
+    
+    "$yo=\u0451;"
+    "$j=\u0458;"
+    
+    // variables
+    // some are duplicated so lowercasing works
+    
+    "$csoft=[eiyEIY];"
+    "$CSOFT=[eiyEIY];"
+    
+    "$BECOMES_H=[$HARD$hard];"
+    "$becomes_h=[$HARD$hard];"
+    
+    "$BECOMES_S=[$S$s];"
+    "$becomes_s=[$S$s];"
+    
+    "$BECOMES_C=[$CH$ch];"
+    "$becomes_c=[$CH$ch];"
+    
+    "$BECOMES_VOWEL=[$A$E$I$O$U$a$e$i$o$u];"
+    "$becomes_vowel=[$A$E$I$O$U$a$e$i$o$u];"
+    
+    "$letter=[[:Lu:][:Ll:]];"
+    "$lower=[[:Ll:]];"
+    
+    // Modified to combine display transliterator and typing transliterator.
+    // The display mapping uses accents for the "soft" vowels.
+    // It does not, although it could, use characters like \u0161 instead of digraphs
+    // like sh.
+    
+    // #############################################
+    // Special titlecase forms, not duplicated
+    // #############################################
+    
+    "Sh''ch<>$SH$ch;" // LIU Distinguish $SH$ch from $SHCH
+    
+    "Ch>$CH;" "Ch<$CH}$lower;"
+    "Kh>$KH;" "Kh<$KH}$lower;"
+    "Shch>$SHCH;" "Shch<$SHCH}$lower;"
+    "Sh>$SH;" "Sh<$SH}$lower;"
+    "Ts>$TS;" "Ts<$TS}$lower;"
+    "Zh>$ZH;" "Zh<$ZH}$lower;"
+    "Yi>$YI;" //+ "Yi<$YI}$lower;"
+    "Ye>$YE;" //+ "Ye<$YE}$lower;"
+    "Yo>$YO;" //+ "Yo<$YO}$lower;"
+    "Yu>$YU;" //+ "Yu<$YU}$lower;"
+    "Ya>$YA;" //+ "Ya<$YA}$lower;"
+    
+    // #############################################
+    // Rules to Duplicate
+    // To get the lowercase versions, copy these and lowercase
+    // #############################################
+    
+    // variant spellings in English
+    
+    "SHTCH>$SHCH;"
+    "TCH>$CH;"
+    "TH>$Z;"
+    "Q>$K;"
+    "WH>$V;"
+    "W>$V;"
+    "X>$K$S;" //+ "X<$K$S;"
+    
+    // Separate letters that would otherwise join
+    
+    "SH''<$SH}$BECOMES_C;"
+    "T''<$T}$BECOMES_S;"
+    "T''<$T}[$CH$SHCH$shch];" // LIU add special cases
+    
+    "K''<$K}$BECOMES_H;"
+    "S''<$S}$BECOMES_H;"
+    "T''<$T}$BECOMES_H;"
+    "Z''<$Z}$BECOMES_H;"
+    
+    "Y''<$Y}$BECOMES_VOWEL;"
+    
+    // Main letters
+    
+    "A<>$A;"
+    "B<>$B;"
+    "CH<>$CH;"
+    "D<>$D;"
+    "E<>$E;"
+    "F<>$F;"
+    "G<>$G;"
+    "\u00cc<>$YI;"
+    "I<>$I;"
+    "KH<>$KH;"
+    "K<>$K;"
+    "L<>$L;"
+    "M<>$M;"
+    "N<>$N;"
+    "O<>$O;"
+    "P<>$P;"
+    "R<>$R;"
+    "SHCH<>$SHCH;"
+    "SH>$SH;" //+ "SH<$SH;"
+    "$S_hacek<>$SH;"
+    "S<>$S;"
+    "TS<>$TS;"
+    "T<>$T;"
+    "U<>$U;"
+    "V<>$V;"
+    //\u00cc\u00c0\u00c8\u00d2\u00d9
+    "YE>$YE;" //+ "YE<$YE;"
+    "\u00c8<>$YE;"
+    "YO>$YO;" //+ "YO<$YO;"
+    "\u00d2<>$YO;"
+    "YU>$YU;" //+ "YU<$YU;"
+    "\u00d9<>$YU;"
+    "YA>$YA;" //+ "YA<$YA;"
+    "\u00c0<>$YA;"
+    "Y<>$Y;"
+    "ZH<>$ZH;"
+    "Z<>$Z;"
+    
+    "H<>$HARD;"
+    "\u0178<>$SOFT;"
+    
+    // Non-russian
+    
+    "J<>$J;"
+    
+    // variant spellings in English
+    
+    "C}$csoft>$S;"
+    "C>$K;"
+    
+    // #############################################
+    // Duplicated Rules
+    // Copy and lowercase the above rules
+    // #############################################
+    
+    // variant spellings in english
+    
+    "shtch>$shch;"
+    "tch>$ch;"
+    "th>$z;"
+    "q>$k;"
+    "wh>$v;"
+    "w>$v;"
+    "x>$k$s;" //+ "x<$k$s;"
+    
+    // separate letters that would otherwise join
+    
+    "sh''<$sh}$becomes_c;"
+    "t''<$t}$becomes_s;"
+    "t''<$t}[$ch$shch];" // LIU add special cases
+    
+    "k''<$k}$becomes_h;"
+    "s''<$s}$becomes_h;"
+    "t''<$t}$becomes_h;"
+    "z''<$z}$becomes_h;"
+    
+    "y''<$y}$becomes_vowel;"
+    
+    // main letters
+    
+    "a<>$a;"
+    "b<>$b;"
+    "ch<>$ch;"
+    "d<>$d;"
+    "e<>$e;"
+    "f<>$f;"
+    "g<>$g;"
+    "\u00ec<>$yi;"
+    "i<>$i;"
+    "kh<>$kh;"
+    "k<>$k;"
+    "l<>$l;"
+    "m<>$m;"
+    "n<>$n;"
+    "o<>$o;"
+    "p<>$p;"
+    "r<>$r;"
+    "shch<>$shch;"
+    "sh>$sh;" //+ "sh<$sh;"
+    "$s_hacek<>$sh;"
+    "s<>$s;"
+    "ts<>$ts;"
+    "t<>$t;"
+    "u<>$u;"
+    "v<>$v;"
+    //\u00ec\u00e0\u00e8\u00f2\u00f9
+    "ye>$ye;" //+ "ye<$ye;"
+    "\u00e8<>$ye;"
+    "yo>$yo;" //+ "yo<$yo;"
+    "\u00f2<>$yo;"
+    "yu>$yu;" //+ "yu<$yu;"
+    "\u00f9<>$yu;"
+    "ya>$ya;" //+ "ya<$ya;"
+    "\u00e0<>$ya;"
+    "y<>$y;"
+    "zh<>$zh;"
+    "z<>$z;"
+    
+    "h<>$hard;"
+    "\u00ff<>$soft;"
+    
+    // non-russian
+    
+    "j<>$j;"
+    
+    // variant spellings in english
+    
+    "c}$csoft>$s;"
+    "c>$k;"
+    
+    
+    
+    // #############################################
+    // End of Duplicated Rules
+    // #############################################
+    
+    //generally the last rule
+    "''>;"
+    //the end
+  }
 }
--- a/icu4c/data/ldevan.txt
+++ b/icu4c/data/ldevan.txt
@ -1,411 +1,411 @@
 //--------------------------------------------------------------------
-//  Copyright (C) 1999, International Business Machines
-//  Corporation and others.  All Rights Reserved.
+// Copyright (c) 1999-2000, International Business Machines
+// Corporation and others.  All Rights Reserved.
 //--------------------------------------------------------------------
-//  Date        Name        Description
-//  11/17/99    aliu        Creation.
+// THIS IS A MACHINE-GENERATED FILE
+// Tool: src\com\ibm\tools\translit\dumpICUrules.bat
+// Source: src\com\ibm\text\resources/TransliterationRule_Latin_Devanagari.java
+// Date: Fri May 19 15:50:22 2000
 //--------------------------------------------------------------------

 // Latin-Devanagari

 ldevan {
-    Rule {
-        //#####################################################################
-        //	Keyboard Transliteration Table
-        //#####################################################################
-        // Conversions should be:
-        // 1. complete
-        //  * convert every sequence of Latin letters (a to z plus apostrophe) 
-        //    to a sequence of Native letters
-        //  * convert every sequence of Native letters to Latin letters
-        // 2. reversable
-        //  * any string of Native converted to Latin and back should be the same
-        //  * this is not true for English converted to Native & back, e.g.:
-        //		k -> {kaf} -> k
-        //		c -> {kaf} -> k
-        //#####################################################################
-        // Sequences of Latin letters may convert to a single Native letter.
-        // When this is the case, an apostrophe can be used to indicate separate
-        // letters.$
-        // E.g.	sh -> {shin}
-        //		s'h -> {sin}{heh}
-        // 		ss -> {sad}
-        // 		s's -> {sin}{shadda}
-        //#####################################################################
-        // To Do:
-        //	finish adding shadda, add sokoon, fix uppercase
-        //	make two transliteration tables: one with vowels, one without
-        //#####################################################################
-        // Modifications
-        //	Devanagari Transliterator:  broken up with consonsants/vowels
-        //#####################################################################
-        // Unicode character name definitions
-        //#####################################################################
-
-        //consonants
-        "candrabindu=\u0901;"
-        "bindu=\u0902;"
-        "visarga=\u0903;"
-
-        // w<vowel> represents the stand-alone form
-        "wa=\u0905;"
-        "waa=\u0906;"
-        "wi=\u0907;"
-        "wii=\u0908;"
-        "wu=\u0909;"
-        "wuu=\u090A;"
-        "wr=\u090B;"
-        "wl=\u090C;"
-        "we=\u090F;"
-        "wai=\u0910;"
-        "wo=\u0913;"
-        "wau=\u0914;"
-
-        "ka=\u0915;"
-        "kha=\u0916;"
-        "ga=\u0917;"
-        "gha=\u0918;"
-        "nga=\u0919;"
-
-        "ca=\u091A;"
-        "cha=\u091B;"
-        "ja=\u091C;"
-        "jha=\u091D;"
-        "nya=\u091E;"
-
-        "tta=\u091F;"
-        "ttha=\u0920;"
-        "dda=\u0921;"
-        "ddha=\u0922;"
-        "nna=\u0923;"
-
-        "ta=\u0924;"
-        "tha=\u0925;"
-        "da=\u0926;"
-        "dha=\u0927;"
-        "na=\u0928;"
-
-        "pa=\u092A;"
-        "pha=\u092B;"
-        "ba=\u092C;"
-        "bha=\u092D;"
-        "ma=\u092E;"
-
-        "ya=\u092F;"
-        "ra=\u0930;"
-        "rra=\u0931;"
-        "la=\u0933;"
-        "va=\u0935;"
-
-        "sha=\u0936;"
-        "ssa=\u0937;"
-        "sa=\u0938;"
-        "ha=\u0939;"
-
-        // <vowel> represents the dependent form
-        "aa=\u093E;"
-        "i=\u093F;"
-        "ii=\u0940;"
-        "u=\u0941;"
-        "uu=\u0942;"
-        "rh=\u0943;"
-        "lh=\u0944;"
-        "e=\u0947;"
-        "ai=\u0948;"
-        "o=\u094B;"
-        "au=\u094C;"
-
-        "virama=\u094D;"
-
-        "wrr=\u0960;"
-        "rrh=\u0962;"
-
-        "danda=\u0964;"
-        "doubleDanda=\u0965;"
-        "depVowelAbove=[\u093E-\u0940\u0945-\u094C];"
-        "depVowelBelow=[\u0941-\u0944];"
-        // Ech: Double escape U+0000, so UnicodeString doesn't consider it
-        // to be the end of the string.  This is only necessary for U+0000
-        // right now. [liu]
-        "endThing=[{danda}{doubleDanda}\\u0000-\u08FF\u0980-\uFFFF];"
-
-        "&=[{virama}{aa}{ai}{au}{ii}{i}{uu}{u}{rrh}{rh}{lh}{e}{o}];"
-        "%=[bcdfghjklmnpqrstvwxyz];"
-
-        //#####################################################################
-        // convert from Latin letters to Native letters
-        //#####################################################################
-        //Hindi>\u092d\u093e\u0930\u0924--\u0020\u0926\u0947\u0936\u0020\u092c\u0928\u094d\u0927\u0941\u002e
-
-        // special forms with no good conversion
-
-        "mm>{bindu};"
-        "x>{visarga};"
-
-        // convert to independent forms at start of word or syllable: 
-        // e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
-        // Moved up [LIU]
-
-        "aa>{waa};"
-        "ai>{wai};"
-        "au>{wau};"
-        "ii>{wii};"
-        "i>{wi};"
-        "uu>{wuu};"
-        "u>{wu};"
-        "rrh>{wrr};"
-        "rh>{wr};"
-        "lh>{wl};"
-        "e>{we};"
-        "o>{wo};"
-        "a>{wa};"
-
-        // normal consonants
-
-        "kh>{kha}|{virama};"
-        "k>{ka}|{virama};"
-        "q>{ka}|{virama};"
-        "gh>{gha}|{virama};"
-        "g>{ga}|{virama};"
-        "ng>{nga}|{virama};"
-        "ch>{cha}|{virama};"
-        "c>{ca}|{virama};"
-        "jh>{jha}|{virama};"
-        "j>{ja}|{virama};"
-        "ny>{nya}|{virama};"
-        "tth>{ttha}|{virama};"
-        "tt>{tta}|{virama};"
-        "ddh>{ddha}|{virama};"
-        "dd>{dda}|{virama};"
-        "nn>{nna}|{virama};"
-        "th>{tha}|{virama};"
-        "t>{ta}|{virama};"
-        "dh>{dha}|{virama};"
-        "d>{da}|{virama};"
-        "n>{na}|{virama};"
-        "ph>{pha}|{virama};"
-        "p>{pa}|{virama};"
-        "bh>{bha}|{virama};"
-        "b>{ba}|{virama};"
-        "m>{ma}|{virama};"
-        "y>{ya}|{virama};"
-        "r>{ra}|{virama};"
-        "l>{la}|{virama};"
-        "v>{va}|{virama};"
-        "f>{va}|{virama};"
-        "w>{va}|{virama};"
-        "sh>{sha}|{virama};"
-        "ss>{ssa}|{virama};"
-        "s>{sa}|{virama};"
-        "z>{sa}|{virama};"
-        "h>{ha}|{virama};"
-
-        ".>{danda};"
-        "{danda}.>{doubleDanda};"
-        "{depVowelAbove})~>{bindu};"
-        "{depVowelBelow})~>{candrabindu};"
-
-        // convert to dependent forms after consonant with no vowel: 
-        // e.g. kai -> {ka}{virama}ai -> {ka}{ai}
-
-        "{virama}aa>{aa};"
-        "{virama}ai>{ai};"
-        "{virama}au>{au};"
-        "{virama}ii>{ii};"
-        "{virama}i>{i};"
-        "{virama}uu>{uu};"
-        "{virama}u>{u};"
-        "{virama}rrh>{rrh};"
-        "{virama}rh>{rh};"
-        "{virama}lh>{lh};"
-        "{virama}e>{e};"
-        "{virama}o>{o};"
-        "{virama}a>;"
-
-        // otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
-
-        "{virama}''aa>{waa};"
-        "{virama}''ai>{wai};"
-        "{virama}''au>{wau};"
-        "{virama}''ii>{wii};"
-        "{virama}''i>{wi};"
-        "{virama}''uu>{wuu};"
-        "{virama}''u>{wu};"
-        "{virama}''rrh>{wrr};"
-        "{virama}''rh>{wr};"
-        "{virama}''lh>{wl};"
-        "{virama}''e>{we};"
-        "{virama}''o>{wo};"
-        "{virama}''a>{wa};"
-
-        "{virama}({endThing}>;"
-
-        // convert any left-over apostrophes used for separation
-
-        "''>;"
-
-        //#####################################################################
-        // convert from Native letters to Latin letters
-        //#####################################################################
-
-        // special forms with no good conversion
-
-        "mm<{bindu};"
-        "x<{visarga};"
-
-        // normal consonants
-
-        "kh<{kha}(&;"
-        "kha<{kha};"
-        "k''<{ka}{virama}({ha};"
-        "k<{ka}(&;"
-        "ka<{ka};"
-        "gh<{gha}(&;"
-        "gha<{gha};"
-        "g''<{ga}{virama}({ha};"
-        "g<{ga}(&;"
-        "ga<{ga};"
-        "ng<{nga}(&;"
-        "nga<{nga};"
-        "ch<{cha}(&;"
-        "cha<{cha};"
-        "c''<{ca}{virama}({ha};"
-        "c<{ca}(&;"
-        "ca<{ca};"
-        "jh<{jha}(&;"
-        "jha<{jha};"
-        "j''<{ja}{virama}({ha};"
-        "j<{ja}(&;"
-        "ja<{ja};"
-        "ny<{nya}(&;"
-        "nya<{nya};"
-        "tth<{ttha}(&;"
-        "ttha<{ttha};"
-        "tt''<{tta}{virama}({ha};"
-        "tt<{tta}(&;"
-        "tta<{tta};"
-        "ddh<{ddha}(&;"
-        "ddha<{ddha};"
-        "dd''<{dda}(&{ha};"
-        "dd<{dda}(&;"
-        "dda<{dda};"
-        "dh<{dha}(&;"
-        "dha<{dha};"
-        "d''<{da}{virama}({ha};"
-        "d''<{da}{virama}({ddha};"
-        "d''<{da}{virama}({dda};"
-        "d''<{da}{virama}({dha};"
-        "d''<{da}{virama}({da};"
-        "d<{da}(&;"
-        "da<{da};"
-        "th<{tha}(&;"
-        "tha<{tha};"
-        "t''<{ta}{virama}({ha};"
-        "t''<{ta}{virama}({ttha};"
-        "t''<{ta}{virama}({tta};"
-        "t''<{ta}{virama}({tha};"
-        "t''<{ta}{virama}({ta};"
-        "t<{ta}(&;"
-        "ta<{ta};"
-        "n''<{na}{virama}({ga};"
-        "n''<{na}{virama}({ya};"
-        "n<{na}(&;"
-        "na<{na};"
-        "ph<{pha}(&;"
-        "pha<{pha};"
-        "p''<{pa}{virama}({ha};"
-        "p<{pa}(&;"
-        "pa<{pa};"
-        "bh<{bha}(&;"
-        "bha<{bha};"
-        "b''<{ba}{virama}({ha};"
-        "b<{ba}(&;"
-        "ba<{ba};"
-        "m''<{ma}{virama}({ma};"
-        "m''<{ma}{virama}({bindu};"
-        "m<{ma}(&;"
-        "ma<{ma};"
-        "y<{ya}(&;"
-        "ya<{ya};"
-        "r''<{ra}{virama}({ha};"
-        "r<{ra}(&;"
-        "ra<{ra};"
-        "l''<{la}{virama}({ha};"
-        "l<{la}(&;"
-        "la<{la};"
-        "v<{va}(&;"
-        "va<{va};"
-        "sh<{sha}(&;"
-        "sha<{sha};"
-        "ss<{ssa}(&;"
-        "ssa<{ssa};"
-        "s''<{sa}{virama}({ha};"
-        "s''<{sa}{virama}({sha};"
-        "s''<{sa}{virama}({ssa};"
-        "s''<{sa}{virama}({sa};"
-        "s<{sa}(&;"
-        "sa<{sa};"
-        "h<{ha}(&;"
-        "ha<{ha};"
-
-        // dependent vowels (should never occur except following consonants)
-
-        "aa<{aa};"
-        "ai<{ai};"
-        "au<{au};"
-        "ii<{ii};"
-        "i<{i};"
-        "uu<{uu};"
-        "u<{u};"
-        "rrh<{rrh};"
-        "rh<{rh};"
-        "lh<{lh};"
-        "e<{e};"
-        "o<{o};"
-
-        // independent vowels (when following consonants)
-
-        "''aa<a){waa};"
-        "''aa<%){waa};"
-        "''ai<a){wai};"
-        "''ai<%){wai};"
-        "''au<a){wau};"
-        "''au<%){wau};"
-        "''ii<a){wii};"
-        "''ii<%){wii};"
-        "''i<a){wi};"
-        "''i<%){wi};"
-        "''uu<a){wuu};"
-        "''uu<%){wuu};"
-        "''u<a){wu};"
-        "''u<%){wu};"
-        "''rrh<%){wrr};"
-        "''rh<%){wr};"
-        "''lh<%){wl};"
-        "''e<%){we};"
-        "''o<%){wo};"
-        "''a<a){wa};"
-        "''a<%){wa};"
-
-
-        // independent vowels (otherwise)
-
-        "aa<{waa};"
-        "ai<{wai};"
-        "au<{wau};"
-        "ii<{wii};"
-        "i<{wi};"
-        "uu<{wuu};"
-        "u<{wu};"
-        "rrh<{wrr};"
-        "rh<{wr};"
-        "lh<{wl};"
-        "e<{we};"
-        "o<{wo};"
-        "a<{wa};"
-
-        // blow away any remaining viramas
-
-        "<{virama};"
-    }
+  Rule {
+    //#####################################################################
+    //	Keyboard Transliteration Table
+    //#####################################################################
+    // Conversions should be:
+    // 1. complete
+    //  * convert every sequence of Latin letters (a to z plus apostrophe)
+    //    to a sequence of Native letters
+    //  * convert every sequence of Native letters to Latin letters
+    // 2. reversable
+    //  * any string of Native converted to Latin and back should be the same
+    //  * this is not true for English converted to Native & back, e.g.:
+    //		k -> {kaf} -> k
+    //		c -> {kaf} -> k
+    //#####################################################################
+    // Sequences of Latin letters may convert to a single Native letter.
+    // When this is the case, an apostrophe can be used to indicate separate
+    // letters.$
+    // E.g.	sh -> {shin}
+    //		s'h -> {sin}{heh}
+    // 		ss -> {sad}
+    // 		s's -> {sin}{shadda}
+    //#####################################################################
+    // To Do:
+    //	finish adding shadda, add sokoon, fix uppercase
+    //	make two transliteration tables: one with vowels, one without
+    //#####################################################################
+    // Modifications
+    //	Devanagari Transliterator:  broken up with consonsants/vowels
+    //#####################################################################
+    // Unicode character name definitions
+    //#####################################################################
+    
+    //consonants
+    "$candrabindu=\u0901;"
+    "$bindu=\u0902;"
+    "$visarga=\u0903;"
+    
+    // w<vowel> represents the stand-alone form
+    "$wa=\u0905;"
+    "$waa=\u0906;"
+    "$wi=\u0907;"
+    "$wii=\u0908;"
+    "$wu=\u0909;"
+    "$wuu=\u090A;"
+    "$wr=\u090B;"
+    "$wl=\u090C;"
+    "$we=\u090F;"
+    "$wai=\u0910;"
+    "$wo=\u0913;"
+    "$wau=\u0914;"
+    
+    "$ka=\u0915;"
+    "$kha=\u0916;"
+    "$ga=\u0917;"
+    "$gha=\u0918;"
+    "$nga=\u0919;"
+    
+    "$ca=\u091A;"
+    "$cha=\u091B;"
+    "$ja=\u091C;"
+    "$jha=\u091D;"
+    "$nya=\u091E;"
+    
+    "$tta=\u091F;"
+    "$ttha=\u0920;"
+    "$dda=\u0921;"
+    "$ddha=\u0922;"
+    "$nna=\u0923;"
+    
+    "$ta=\u0924;"
+    "$tha=\u0925;"
+    "$da=\u0926;"
+    "$dha=\u0927;"
+    "$na=\u0928;"
+    
+    "$pa=\u092A;"
+    "$pha=\u092B;"
+    "$ba=\u092C;"
+    "$bha=\u092D;"
+    "$ma=\u092E;"
+    
+    "$ya=\u092F;"
+    "$ra=\u0930;"
+    "$rra=\u0931;"
+    "$la=\u0933;"
+    "$va=\u0935;"
+    
+    "$sha=\u0936;"
+    "$ssa=\u0937;"
+    "$sa=\u0938;"
+    "$ha=\u0939;"
+    
+    // <vowel> represents the dependent form
+    "$aa=\u093E;"
+    "$i=\u093F;"
+    "$ii=\u0940;"
+    "$u=\u0941;"
+    "$uu=\u0942;"
+    "$rh=\u0943;"
+    "$lh=\u0944;"
+    "$e=\u0947;"
+    "$ai=\u0948;"
+    "$o=\u094B;"
+    "$au=\u094C;"
+    
+    "$virama=\u094D;"
+    
+    "$wrr=\u0960;"
+    "$rrh=\u0962;"
+    
+    "$danda=\u0964;"
+    "$doubleDanda=\u0965;"
+    "$depVowelAbove=[\u093E-\u0940\u0945-\u094C];"
+    "$depVowelBelow=[\u0941-\u0944];"
+    "$endThing=[$danda$doubleDanda \\u0000-\u08FF\u0980-\uFFFF];"
+    
+    // $x was originally called '&'; $z was '%'
+    "$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o];"
+    "$z=[bcdfghjklmnpqrstvwxyz];"
+    
+    //#####################################################################
+    // convert from Latin letters to Native letters
+    //#####################################################################
+    //Hindi>\u092d\u093e\u0930\u0924--\u0020\u0926\u0947\u0936\u0020\u092c\u0928\u094d\u0927\u0941\u002e
+    
+    // special forms with no good conversion
+    
+    "mm>$bindu;"
+    "x>$visarga;"
+    
+    // convert to independent forms at start of word or syllable:
+    // e.g. keai -> {ka}{e}{wai}; k'ai -> {ka}{wai}; (ai) -> ({wai})
+    // Moved up [LIU]
+    
+    "aa>$waa;"
+    "ai>$wai;"
+    "au>$wau;"
+    "ii>$wii;"
+    "i>$wi;"
+    "uu>$wuu;"
+    "u>$wu;"
+    "rrh>$wrr;"
+    "rh>$wr;"
+    "lh>$wl;"
+    "e>$we;"
+    "o>$wo;"
+    "a>$wa;"
+    
+    // normal consonants
+    
+    "kh>$kha|$virama;"
+    "k>$ka|$virama;"
+    "q>$ka|$virama;"
+    "gh>$gha|$virama;"
+    "g>$ga|$virama;"
+    "ng>$nga|$virama;"
+    "ch>$cha|$virama;"
+    "c>$ca|$virama;"
+    "jh>$jha|$virama;"
+    "j>$ja|$virama;"
+    "ny>$nya|$virama;"
+    "tth>$ttha|$virama;"
+    "tt>$tta|$virama;"
+    "ddh>$ddha|$virama;"
+    "dd>$dda|$virama;"
+    "nn>$nna|$virama;"
+    "th>$tha|$virama;"
+    "t>$ta|$virama;"
+    "dh>$dha|$virama;"
+    "d>$da|$virama;"
+    "n>$na|$virama;"
+    "ph>$pha|$virama;"
+    "p>$pa|$virama;"
+    "bh>$bha|$virama;"
+    "b>$ba|$virama;"
+    "m>$ma|$virama;"
+    "y>$ya|$virama;"
+    "r>$ra|$virama;"
+    "l>$la|$virama;"
+    "v>$va|$virama;"
+    "f>$va|$virama;"
+    "w>$va|$virama;"
+    "sh>$sha|$virama;"
+    "ss>$ssa|$virama;"
+    "s>$sa|$virama;"
+    "z>$sa|$virama;"
+    "h>$ha|$virama;"
+    
+    "'.'>$danda;"
+    "$danda'.'>$doubleDanda;"
+    "$depVowelAbove{'~'>$bindu;"
+    "$depVowelBelow{'~'>$candrabindu;"
+    
+    // convert to dependent forms after consonant with no vowel:
+    // e.g. kai -> {ka}{virama}ai -> {ka}{ai}
+    
+    "$virama aa>$aa;"
+    "$virama ai>$ai;"
+    "$virama au>$au;"
+    "$virama ii>$ii;"
+    "$virama i>$i;"
+    "$virama uu>$uu;"
+    "$virama u>$u;"
+    "$virama rrh>$rrh;"
+    "$virama rh>$rh;"
+    "$virama lh>$lh;"
+    "$virama e>$e;"
+    "$virama o>$o;"
+    "$virama a>;"
+    
+    // otherwise convert independent forms when separated by ': k'ai -> {ka}{virama}{wai}
+    
+    "$virama''aa>$waa;"
+    "$virama''ai>$wai;"
+    "$virama''au>$wau;"
+    "$virama''ii>$wii;"
+    "$virama''i>$wi;"
+    "$virama''uu>$wuu;"
+    "$virama''u>$wu;"
+    "$virama''rrh>$wrr;"
+    "$virama''rh>$wr;"
+    "$virama''lh>$wl;"
+    "$virama''e>$we;"
+    "$virama''o>$wo;"
+    "$virama''a>$wa;"
+    
+    "$virama}$endThing>;"
+    
+    // convert any left-over apostrophes used for separation
+    
+    "''>;"
+    
+    //#####################################################################
+    // convert from Native letters to Latin letters
+    //#####################################################################
+    
+    // special forms with no good conversion
+    
+    "mm<$bindu;"
+    "x<$visarga;"
+    
+    // normal consonants
+    
+    "kh<$kha}$x;"
+    "kha<$kha;"
+    "k''<$ka$virama}$ha;"
+    "k<$ka}$x;"
+    "ka<$ka;"
+    "gh<$gha}$x;"
+    "gha<$gha;"
+    "g''<$ga$virama}$ha;"
+    "g<$ga}$x;"
+    "ga<$ga;"
+    "ng<$nga}$x;"
+    "nga<$nga;"
+    "ch<$cha}$x;"
+    "cha<$cha;"
+    "c''<$ca$virama}$ha;"
+    "c<$ca}$x;"
+    "ca<$ca;"
+    "jh<$jha}$x;"
+    "jha<$jha;"
+    "j''<$ja$virama}$ha;"
+    "j<$ja}$x;"
+    "ja<$ja;"
+    "ny<$nya}$x;"
+    "nya<$nya;"
+    "tth<$ttha}$x;"
+    "ttha<$ttha;"
+    "tt''<$tta$virama}$ha;"
+    "tt<$tta}$x;"
+    "tta<$tta;"
+    "ddh<$ddha}$x;"
+    "ddha<$ddha;"
+    "dd''<$dda}$x$ha;"
+    "dd<$dda}$x;"
+    "dda<$dda;"
+    "dh<$dha}$x;"
+    "dha<$dha;"
+    "d''<$da$virama}$ha;"
+    "d''<$da$virama}$ddha;"
+    "d''<$da$virama}$dda;"
+    "d''<$da$virama}$dha;"
+    "d''<$da$virama}$da;"
+    "d<$da}$x;"
+    "da<$da;"
+    "th<$tha}$x;"
+    "tha<$tha;"
+    "t''<$ta$virama}$ha;"
+    "t''<$ta$virama}$ttha;"
+    "t''<$ta$virama}$tta;"
+    "t''<$ta$virama}$tha;"
+    "t''<$ta$virama}$ta;"
+    "t<$ta}$x;"
+    "ta<$ta;"
+    "n''<$na$virama}$ga;"
+    "n''<$na$virama}$ya;"
+    "n<$na}$x;"
+    "na<$na;"
+    "ph<$pha}$x;"
+    "pha<$pha;"
+    "p''<$pa$virama}$ha;"
+    "p<$pa}$x;"
+    "pa<$pa;"
+    "bh<$bha}$x;"
+    "bha<$bha;"
+    "b''<$ba$virama}$ha;"
+    "b<$ba}$x;"
+    "ba<$ba;"
+    "m''<$ma$virama}$ma;"
+    "m''<$ma$virama}$bindu;"
+    "m<$ma}$x;"
+    "ma<$ma;"
+    "y<$ya}$x;"
+    "ya<$ya;"
+    "r''<$ra$virama}$ha;"
+    "r<$ra}$x;"
+    "ra<$ra;"
+    "l''<$la$virama}$ha;"
+    "l<$la}$x;"
+    "la<$la;"
+    "v<$va}$x;"
+    "va<$va;"
+    "sh<$sha}$x;"
+    "sha<$sha;"
+    "ss<$ssa}$x;"
+    "ssa<$ssa;"
+    "s''<$sa$virama}$ha;"
+    "s''<$sa$virama}$sha;"
+    "s''<$sa$virama}$ssa;"
+    "s''<$sa$virama}$sa;"
+    "s<$sa}$x;"
+    "sa<$sa;"
+    "h<$ha}$x;"
+    "ha<$ha;"
+    
+    // dependent vowels (should never occur except following consonants)
+    
+    "aa<$aa;"
+    "ai<$ai;"
+    "au<$au;"
+    "ii<$ii;"
+    "i<$i;"
+    "uu<$uu;"
+    "u<$u;"
+    "rrh<$rrh;"
+    "rh<$rh;"
+    "lh<$lh;"
+    "e<$e;"
+    "o<$o;"
+    
+    // independent vowels (when following consonants)
+    
+    "''aa<a{$waa;"
+    "''aa<$z{$waa;"
+    "''ai<a{$wai;"
+    "''ai<$z{$wai;"
+    "''au<a{$wau;"
+    "''au<$z{$wau;"
+    "''ii<a{$wii;"
+    "''ii<$z{$wii;"
+    "''i<a{$wi;"
+    "''i<$z{$wi;"
+    "''uu<a{$wuu;"
+    "''uu<$z{$wuu;"
+    "''u<a{$wu;"
+    "''u<$z{$wu;"
+    "''rrh<$z{$wrr;"
+    "''rh<$z{$wr;"
+    "''lh<$z{$wl;"
+    "''e<$z{$we;"
+    "''o<$z{$wo;"
+    "''a<a{$wa;"
+    "''a<$z{$wa;"
+    
+    
+    // independent vowels (otherwise)
+    
+    "aa<$waa;"
+    "ai<$wai;"
+    "au<$wau;"
+    "ii<$wii;"
+    "i<$wi;"
+    "uu<$wuu;"
+    "u<$wu;"
+    "rrh<$wrr;"
+    "rh<$wr;"
+    "lh<$wl;"
+    "e<$we;"
+    "o<$wo;"
+    "a<$wa;"
+    
+    // blow away any remaining viramas
+    
+    "<$virama;"
+  }
 }
--- a/icu4c/data/lgreek.txt
+++ b/icu4c/data/lgreek.txt
@ -1,376 +1,380 @@
 //--------------------------------------------------------------------
-//  Copyright (C) 1999, International Business Machines
-//  Corporation and others.  All Rights Reserved.
+// Copyright (c) 1999-2000, International Business Machines
+// Corporation and others.  All Rights Reserved.
 //--------------------------------------------------------------------
-//  Date        Name        Description
-//  11/17/99    aliu        Creation.
+// THIS IS A MACHINE-GENERATED FILE
+// Tool: src\com\ibm\tools\translit\dumpICUrules.bat
+// Source: src\com\ibm\text\resources/TransliterationRule_Latin_Greek.java
+// Date: Fri May 19 15:50:22 2000
 //--------------------------------------------------------------------

 // Latin-Greek

 lgreek {
-    Rule {
-                // ==============================================
-                // Modern Greek Transliteration Rules
-                //
-                // This transliterates modern Greek characters, but using rules
-                // that are traditional for Ancient Greek, and
-                // thus more resemble Greek words that have become part
-                // of English. It differs from the official Greek
-                // transliteration, which is more phonetic (since
-                // most modern Greek vowels, for example, have
-                // degenerated simply to sound like "ee").
-                //
-                // There are only a few tricky parts.
-                // 1. eta and omega don't map directly to Latin vowels,
-                //    so we use a macron on e and o, and some
-                //    other combinations if they are accented.
-                // 2. The accented, diaeresis i and y are substituted too.
-                // 3. Some letters use digraphs, like "ph". While typical,
-                //    they need some special handling.
-                // 4. A gamma before a gamma or a few other letters is
-                //    transliterated as an "n", as in "Anglo"
-                // 5. An ypsilon after a vowel is a "u", as in
-                //    "Mouseio". Otherwise it is a "y" as in "Physikon"
-                // 6. The construction of the rules is made simpler by making sure
-                //    that most rules for lowercase letters exactly correspond to the
-                //    rules for uppercase letters, *except* for the case of the letters
-                //    in the rule itself. That way, after modifying the uppercase rules,
-                //    you can just copy, paste, and "set to lowercase" to get
-                //    the rules for lowercase letters!
-                // ==============================================
-            
-                // ==============================================
-                // Variables, used to make the rules more comprehensible
-                // and for conditionals.
-                // ==============================================
-                
-                // Latin Letters
-                
-                "E-MACRON=\u0112;"
-                "e-macron=\u0113;"
-                "O-MACRON=\u014C;"
-                "o-macron=\u014D;"
-                "Y-UMLAUT=\u0178;"
-                "y-umlaut=\u00FF;"
-                
-                //! // with real accents.
-                //! + "E-MACRON-ACUTE=\u0112\u0301;"
-                //! + "e-macron-acute=\u0113\u0301;"
-                //! + "O-MACRON-ACUTE=\u014C\u0301;"
-                //! + "o-macron-acute=\u014D\u0301;"
-                //! + "y-umlaut-acute=\u00FF\u0301;"
-                //! + "\u00ef-acute=\u00ef\u0301;"
-                //! + "\u00fc-acute=\u00fc\u0301;"
-                //! //
- 
-                // single letter equivalents
-                
-                "E-MACRON-ACUTE=\u00CA;"
-                "e-macron-acute=\u00EA;"
-                "O-MACRON-ACUTE=\u00D4;"
-                "o-macron-acute=\u00F4;"
-                "y-umlaut-acute=\u0177;"
-                "\u00ef-acute=\u00EE;"
-                "\u00fc-acute=\u00FB;"       
-                
-                // Greek Letters
-
-                "ALPHA=\u0391;"
-                "BETA=\u0392;"
-                "GAMMA=\u0393;"
-                "DELTA=\u0394;"
-                "EPSILON=\u0395;"
-                "ZETA=\u0396;"
-                "ETA=\u0397;"
-                "THETA=\u0398;"
-                "IOTA=\u0399;"
-                "KAPPA=\u039A;"
-                "LAMBDA=\u039B;"
-                "MU=\u039C;"
-                "NU=\u039D;"
-                "XI=\u039E;"
-                "OMICRON=\u039F;"
-                "PI=\u03A0;"
-                "RHO=\u03A1;"
-                "SIGMA=\u03A3;"
-                "TAU=\u03A4;"
-                "YPSILON=\u03A5;"
-                "PHI=\u03A6;"
-                "CHI=\u03A7;"
-                "PSI=\u03A8;"
-                "OMEGA=\u03A9;"
-
-                "ALPHA+=\u0386;"
-                "EPSILON+=\u0388;"
-                "ETA+=\u0389;"
-                "IOTA+=\u038A;"
-                "OMICRON+=\u038C;"
-                "YPSILON+=\u038E;"
-                "OMEGA+=\u038F;"
-                "IOTA_DIAERESIS=\u03AA;"
-                "YPSILON_DIAERESIS=\u03AB;"
-
-                "alpha=\u03B1;"
-                "beta=\u03B2;"
-                "gamma=\u03B3;"
-                "delta=\u03B4;"
-                "epsilon=\u03B5;"
-                "zeta=\u03B6;"
-                "eta=\u03B7;"
-                "theta=\u03B8;"
-                "iota=\u03B9;"
-                "kappa=\u03BA;"
-                "lambda=\u03BB;"
-                "mu=\u03BC;"
-                "nu=\u03BD;"
-                "xi=\u03BE;"
-                "omicron=\u03BF;"
-                "pi=\u03C0;"
-                "rho=\u03C1;"
-                "sigma=\u03C3;"
-                "tau=\u03C4;"
-                "ypsilon=\u03C5;"
-                "phi=\u03C6;"
-                "chi=\u03C7;"
-                "psi=\u03C8;"
-                "omega=\u03C9;"
-
-                //forms
-
-                "alpha+=\u03AC;"
-                "epsilon+=\u03AD;"
-                "eta+=\u03AE;"
-                "iota+=\u03AF;"
-                "omicron+=\u03CC;"
-                "ypsilon+=\u03CD;"
-                "omega+=\u03CE;"
-                "iota_diaeresis=\u03CA;"
-                "ypsilon_diaeresis=\u03CB;"
-                "iota_diaeresis+=\u0390;"
-                "ypsilon_diaeresis+=\u03B0;"
-                "sigma+=\u03C2;"
-
-                // Variables for conditional mappings
-                
-                // Use lowercase for all variable names, to allow cut/paste below.
-
-                "letter=[~[:Lu:][:Ll:]];"
-                "lower=[[:Ll:]];"
-                "softener=[eiyEIY];"
-                "vowel=[aeiouAEIOU"
-                  "{ALPHA}{EPSILON}{ETA}{IOTA}{OMICRON}{YPSILON}{OMEGA}"
-                  "{ALPHA+}{EPSILON+}{ETA+}{IOTA+}{OMICRON+}{YPSILON+}{OMEGA+}"
-                  "{IOTA_DIAERESIS}{YPSILON_DIAERESIS}"
-                  "{alpha}{epsilon}{eta}{iota}{omicron}{ypsilon}{omega}"
-                  "{alpha+}{epsilon+}{eta+}{iota+}{omicron+}{ypsilon+}{omega+}"
-                  "{iota_diaeresis}{ypsilon_diaeresis}"
-                  "{iota_diaeresis+}{ypsilon_diaeresis+}"
-                  "];"
-                "n-gamma=[GKXCgkxc];"
-                "gamma-n=[{GAMMA}{KAPPA}{CHI}{XI}{gamma}{kappa}{chi}{xi}];"
-                "pp=[Pp];"
-
-                // ==============================================
-                // Rules
-                // ==============================================
-                // The following are special titlecases, and should
-                // not be copied when duplicating the lowercase
-                // ==============================================
-                
-                "Th <> {THETA}({lower};"
-                "Ph <> {PHI}({lower};"
-                "Ch <> {CHI}({lower};"
-              //masked: + "Ps<{PHI}({lower};"
-                
-                // Because there is no uppercase forms for final sigma,
-                // we had to move all the sigma rules up here.
-                
-                // Remember to insert ' to preserve round trip, for double letters
-                // don't need to do this for the digraphs with h,
-                // since it is not created when mapping back from greek
-                
-                // use special form for s
-                
-                "''S <> ({pp}) {SIGMA} ;" // handle PS
-                "S <> {SIGMA};"
-                
-                // The following are a bit tricky. 's' takes two forms in greek
-                // final or non final. 
-                // We use ~s to represent the abnormal form: final before letter
-                // or non-final before non-letter.
-                // We use 's to separate p and s (otherwise ps is one letter)
-                // so, we break out the following forms:
-                
-                "''s < ({pp}) {sigma} ({letter});"
-                "s <          {sigma} ({letter});"
-                "~s <         {sigma} ;"
-
-                "~s <         {sigma+} ({letter});"
-                "''s < ({pp}) {sigma+} ;"
-                "s <          {sigma+} ;"
-
-                "~s ({letter})  > {sigma+};"
-                "~s             > {sigma};"
-                "''s ({letter}) > {sigma};"
-                "''s            > {sigma+};"
-                "s ({letter})   > {sigma};"
-                "s              > {sigma+};"
-                
-                // because there are no uppercase forms, had to move these up too.
-                
-                "i\"`>{iota_diaeresis+};"
-                "y\"`>{ypsilon_diaeresis+};"
-                
-                "{\u00ef-acute} <> {iota_diaeresis+};"
-                "{\u00fc-acute} <> {vowel}){ypsilon_diaeresis+};"
-                "{y-umlaut-acute} <> {ypsilon_diaeresis+};"
-                                
-                // ==============================================
-                // Uppercase Forms.
-                // To make lowercase forms, just copy and lowercase below
-                // ==============================================
- 
-                // Typing variants, in case the keyboard doesn't have accents
-                
-                "A`>{ALPHA+};"
-                "E`>{EPSILON+};"
-                "EE`>{ETA+};"
-                "EE>{ETA};" 
-                "I`>{IOTA+};"
-                "O`>{OMICRON+};"
-                "OO`>{OMEGA+};"
-                "OO>{OMEGA};"
-                "I\">{IOTA_DIAERESIS};"
-                "Y\">{YPSILON_DIAERESIS};"
-                
-                // Basic Letters
-                
-                "A<>{ALPHA};"
-                "\u00c1<>{ALPHA+};"
-                "B<>{BETA};"
-                "N ({n-gamma}) <> {GAMMA} ({gamma-n});"
-                "G<>{GAMMA};"
-                "D<>{DELTA};"
-                "''E <> ([Ee]){EPSILON};" // handle EE
-                "E<>{EPSILON};"
-                "\u00c9<>{EPSILON+};"
-                "Z<>{ZETA};"
-                "{E-MACRON-ACUTE}<>{ETA+};"
-                "{E-MACRON}<>{ETA};"
-                "TH<>{THETA};"
-                "I<>{IOTA};"
-                "\u00cd<>{IOTA+};"
-                "\u00cf<>{IOTA_DIAERESIS};"
-                "K<>{KAPPA};"
-                "L<>{LAMBDA};"
-                "M<>{MU};"
-                "N'' <> {NU} ({gamma-n});"
-                "N<>{NU};"
-                "X<>{XI};"
-                "''O <> ([Oo]) {OMICRON};" // handle OO
-                "O<>{OMICRON};"
-                "\u00d3<>{OMICRON+};"
-                "PH<>{PHI};" // needs ordering before P
-                "PS<>{PSI};" // needs ordering before P
-                "P<>{PI};"
-                "R<>{RHO};"
-                "T<>{TAU};"
-                "U <> ({vowel}) {YPSILON};"
-                "\u00da <> ({vowel}) {YPSILON+};"
-                "\u00dc <> ({vowel}) {YPSILON_DIAERESIS};"
-                "Y<>{YPSILON};"
-                "\u00dd<>{YPSILON+};"
-                "{Y-UMLAUT}<>{YPSILON_DIAERESIS};"
-                "CH<>{CHI};"
-                "{O-MACRON-ACUTE}<>{OMEGA+};"
-                "{O-MACRON}<>{OMEGA};"
-
-                // Extra English Letters. Mapped for completeness
-                
-                "C({softener})>|S;"
-                "C>|K;"
-                "F>|PH;"
-                "H>|CH;"
-                "J>|I;"
-                "Q>|K;"
-                "V>|U;"
-                "W>|U;"
-                
-                // ==============================================
-                // Lowercase Forms. Just copy above and lowercase
-                // ==============================================
-
-                // typing variants, in case the keyboard doesn't have accents
-                
-                "a`>{alpha+};"
-                "e`>{epsilon+};"
-                "ee`>{eta+};"
-                "ee>{eta};" 
-                "i`>{iota+};"
-                "o`>{omicron+};"
-                "oo`>{omega+};"
-                "oo>{omega};"
-                "i\">{iota_diaeresis};"
-                "y\">{ypsilon_diaeresis};"
-                
-                // basic letters
-                
-                "a<>{alpha};"
-                "\u00e1<>{alpha+};"
-                "b<>{beta};"
-                "n ({n-gamma}) <> {gamma} ({gamma-n});"
-                "g<>{gamma};"
-                "d<>{delta};"
-                "''e <> ([Ee]){epsilon};" // handle EE
-                "e<>{epsilon};"
-                "\u00e9<>{epsilon+};"
-                "z<>{zeta};"
-                "{e-macron-acute}<>{eta+};"
-                "{e-macron}<>{eta};"
-                "th<>{theta};"
-                "i<>{iota};"
-                "\u00ed<>{iota+};"
-                "\u00ef<>{iota_diaeresis};"
-                "k<>{kappa};"
-                "l<>{lambda};"
-                "m<>{mu};"
-                "n'' <> {nu} ({gamma-n});"
-                "n<>{nu};"
-                "x<>{xi};"
-                "''o <> ([Oo]) {omicron};" // handle OO
-                "o<>{omicron};"
-                "\u00f3<>{omicron+};"
-                "ph<>{phi};" // needs ordering before p
-                "ps<>{psi};" // needs ordering before p
-                "p<>{pi};"
-                "r<>{rho};"
-                "t<>{tau};"
-                "u <> ({vowel}){ypsilon};"
-                "\u00fa <> ({vowel}){ypsilon+};"
-                "\u00fc <> ({vowel}){ypsilon_diaeresis};"
-                "y<>{ypsilon};"
-                "\u00fd<>{ypsilon+};"
-                "{y-umlaut}<>{ypsilon_diaeresis};"
-                "ch<>{chi};"
-                "{o-macron-acute}<>{omega+};"
-                "{o-macron}<>{omega};"
-
-                // extra english letters. mapped for completeness
-                
-                "c({softener})>|s;"
-                "c>|k;"
-                "f>|ph;"
-                "h>|ch;"
-                "j>|i;"
-                "q>|k;"
-                "v>|u;"
-                "w>|u;"
-                
-                // ====================================
-                // Normal final rule: remove '
-                // ====================================
-                
-                //+ "''>;"
-    }
+  Rule {
+    // ==============================================
+    // Modern Greek Transliteration Rules
+    //
+    // This transliterates modern Greek characters, but using rules
+    // that are traditional for Ancient Greek, and
+    // thus more resemble Greek words that have become part
+    // of English. It differs from the official Greek
+    // transliteration, which is more phonetic (since
+    // most modern Greek vowels, for example, have
+    // degenerated simply to sound like "ee").
+    //
+    // There are only a few tricky parts.
+    // 1. eta and omega don't map directly to Latin vowels,
+    //    so we use a macron on e and o, and some
+    //    other combinations if they are accented.
+    // 2. The accented, diaeresis i and y are substituted too.
+    // 3. Some letters use digraphs, like "ph". While typical,
+    //    they need some special handling.
+    // 4. A gamma before a gamma or a few other letters is
+    //    transliterated as an "n", as in "Anglo"
+    // 5. An ypsilon after a vowel is a "u", as in
+    //    "Mouseio". Otherwise it is a "y" as in "Physikon"
+    // 6. The construction of the rules is made simpler by making sure
+    //    that most rules for lowercase letters exactly correspond to the
+    //    rules for uppercase letters, *except* for the case of the letters
+    //    in the rule itself. That way, after modifying the uppercase rules,
+    //    you can just copy, paste, and "set to lowercase" to get
+    //    the rules for lowercase letters!
+    // ==============================================
+    
+    // ==============================================
+    // Variables, used to make the rules more comprehensible
+    // and for conditionals.
+    // ==============================================
+    
+    "$quote='\"';"
+    
+    // Latin Letters
+    
+    "$E_MACRON=\u0112;"
+    "$e_macron=\u0113;"
+    "$O_MACRON=\u014C;"
+    "$o_macron=\u014D;"
+    "$Y_UMLAUT=\u0178;"
+    "$y_umlaut=\u00FF;"
+    
+    //! // with real accents.
+    //! + "$E_MACRON_ACUTE=\u0112\u0301;"
+    //! + "$e_macron_acute=\u0113\u0301;"
+    //! + "$O_MACRON_ACUTE=\u014C\u0301;"
+    //! + "$o_macron_acute=\u014D\u0301;"
+    //! + "$y_umlaut_acute=\u00FF\u0301;"
+    //! + "$u00ef_acute=\u00ef\u0301;"
+    //! + "$u00fc_acute=\u00fc\u0301;"
+    //! //
+    
+    // single letter equivalents
+    
+    "$E_MACRON_ACUTE=\u00CA;"
+    "$e_macron_acute=\u00EA;"
+    "$O_MACRON_ACUTE=\u00D4;"
+    "$o_macron_acute=\u00F4;"
+    "$y_umlaut_acute=\u0177;"
+    "$u00ef_acute=\u00EE;"
+    "$u00fc_acute=\u00FB;"
+    
+    // Greek Letters
+    
+    "$ALPHA=\u0391;"
+    "$BETA=\u0392;"
+    "$GAMMA=\u0393;"
+    "$DELTA=\u0394;"
+    "$EPSILON=\u0395;"
+    "$ZETA=\u0396;"
+    "$ETA=\u0397;"
+    "$THETA=\u0398;"
+    "$IOTA=\u0399;"
+    "$KAPPA=\u039A;"
+    "$LAMBDA=\u039B;"
+    "$MU=\u039C;"
+    "$NU=\u039D;"
+    "$XI=\u039E;"
+    "$OMICRON=\u039F;"
+    "$PI=\u03A0;"
+    "$RHO=\u03A1;"
+    "$SIGMA=\u03A3;"
+    "$TAU=\u03A4;"
+    "$YPSILON=\u03A5;"
+    "$PHI=\u03A6;"
+    "$CHI=\u03A7;"
+    "$PSI=\u03A8;"
+    "$OMEGA=\u03A9;"
+    
+    "$ALPHA2=\u0386;"
+    "$EPSILON2=\u0388;"
+    "$ETA2=\u0389;"
+    "$IOTA2=\u038A;"
+    "$OMICRON2=\u038C;"
+    "$YPSILON2=\u038E;"
+    "$OMEGA2=\u038F;"
+    "$IOTA_DIAERESIS=\u03AA;"
+    "$YPSILON_DIAERESIS=\u03AB;"
+    
+    "$alpha=\u03B1;"
+    "$beta=\u03B2;"
+    "$gamma=\u03B3;"
+    "$delta=\u03B4;"
+    "$epsilon=\u03B5;"
+    "$zeta=\u03B6;"
+    "$eta=\u03B7;"
+    "$theta=\u03B8;"
+    "$iota=\u03B9;"
+    "$kappa=\u03BA;"
+    "$lambda=\u03BB;"
+    "$mu=\u03BC;"
+    "$nu=\u03BD;"
+    "$xi=\u03BE;"
+    "$omicron=\u03BF;"
+    "$pi=\u03C0;"
+    "$rho=\u03C1;"
+    "$sigma=\u03C3;"
+    "$tau=\u03C4;"
+    "$ypsilon=\u03C5;"
+    "$phi=\u03C6;"
+    "$chi=\u03C7;"
+    "$psi=\u03C8;"
+    "$omega=\u03C9;"
+    
+    //forms
+    
+    "$alpha2=\u03AC;"
+    "$epsilon2=\u03AD;"
+    "$eta2=\u03AE;"
+    "$iota2=\u03AF;"
+    "$omicron2=\u03CC;"
+    "$ypsilon2=\u03CD;"
+    "$omega2=\u03CE;"
+    "$iota_diaeresis=\u03CA;"
+    "$ypsilon_diaeresis=\u03CB;"
+    "$iota_diaeresis2=\u0390;"
+    "$ypsilon_diaeresis2=\u03B0;"
+    "$sigma2=\u03C2;"
+    
+    // Variables for conditional mappings
+    
+    // Use lowercase for all variable names, to allow cut/paste below.
+    
+    "$letter=[~[:Lu:][:Ll:]];"
+    "$lower=[[:Ll:]];"
+    "$softener=[eiyEIY];"
+    "$vowel=[aeiouAEIOU"
+    "$ALPHA$EPSILON$ETA$IOTA$OMICRON$YPSILON$OMEGA"
+    "$ALPHA2$EPSILON2$ETA2$IOTA2$OMICRON2$YPSILON2$OMEGA2"
+    "$IOTA_DIAERESIS$YPSILON_DIAERESIS"
+    "$alpha$epsilon$eta$iota$omicron$ypsilon$omega"
+    "$alpha2$epsilon2$eta2$iota2$omicron2$ypsilon2$omega2"
+    "$iota_diaeresis$ypsilon_diaeresis"
+    "$iota_diaeresis2$ypsilon_diaeresis2"
+    "];"
+    "$n_gamma=[GKXCgkxc];"
+    "$gamma_n=[$GAMMA$KAPPA$CHI$XI$gamma$kappa$chi$xi];"
+    "$pp=[Pp];"
+    
+    // ==============================================
+    // Rules
+    // ==============================================
+    // The following are special titlecases, and should
+    // not be copied when duplicating the lowercase
+    // ==============================================
+    
+    "Th <> $THETA}$lower;"
+    "Ph <> $PHI}$lower;"
+    "Ch <> $CHI}$lower;"
+    //masked: + "Ps<$PHI}$lower;"
+    
+    // Because there is no uppercase forms for final sigma,
+    // we had to move all the sigma rules up here.
+    
+    // Remember to insert ' to preserve round trip, for double letters
+    // don't need to do this for the digraphs with h,
+    // since it is not created when mapping back from greek
+    
+    // use special form for s
+    
+    "''S <> $pp{$SIGMA;" // handle PS
+    "S <> $SIGMA;"
+    
+    // The following are a bit tricky. 's' takes two forms in greek
+    // final or non final.
+    // We use ~s to represent the abnormal form: final before letter
+    // or non-final before non-letter.
+    // We use 's to separate p and s (otherwise ps is one letter)
+    // so, we break out the following forms:
+    
+    "''s < $pp{$sigma}$letter;"
+    "s <          $sigma}$letter;"
+    "'~'s <         $sigma;"
+    
+    "'~'s <         $sigma2}$letter;"
+    "''s < $pp{$sigma2;"
+    "s <          $sigma2;"
+    
+    "'~'s }$letter>$sigma2;"
+    "'~'s             > $sigma;"
+    "''s }$letter>$sigma;"
+    "''s            > $sigma2;"
+    "s }$letter>$sigma;"
+    "s              > $sigma2;"
+    
+    // because there are no uppercase forms, had to move these up too.
+    
+    "i$quote'`'>$iota_diaeresis2;"
+    "y$quote'`'>$ypsilon_diaeresis2;"
+    
+    "$u00ef_acute<>$iota_diaeresis2;"
+    "$u00fc_acute<>$vowel{$ypsilon_diaeresis2;"
+    "$y_umlaut_acute<>$ypsilon_diaeresis2;"
+    
+    // ==============================================
+    // Uppercase Forms.
+    // To make lowercase forms, just copy and lowercase below
+    // ==============================================
+    
+    // Typing variants, in case the keyboard doesn't have accents
+    
+    "'A`'>$ALPHA2;"
+    "'E`'>$EPSILON2;"
+    "'EE`'>$ETA2;"
+    "EE>$ETA;"
+    "'I`'>$IOTA2;"
+    "'O`'>$OMICRON2;"
+    "'OO`'>$OMEGA2;"
+    "OO>$OMEGA;"
+    "I$quote>$IOTA_DIAERESIS;"
+    "Y$quote>$YPSILON_DIAERESIS;"
+    
+    // Basic Letters
+    
+    "A<>$ALPHA;"
+    "\u00c1<>$ALPHA2;"
+    "B<>$BETA;"
+    "N }$n_gamma<>$GAMMA}$gamma_n;"
+    "G<>$GAMMA;"
+    "D<>$DELTA;"
+    "''E <> [Ee]{$EPSILON;" // handle EE
+    "E<>$EPSILON;"
+    "\u00c9<>$EPSILON2;"
+    "Z<>$ZETA;"
+    "$E_MACRON_ACUTE<>$ETA2;"
+    "$E_MACRON<>$ETA;"
+    "TH<>$THETA;"
+    "I<>$IOTA;"
+    "\u00cd<>$IOTA2;"
+    "\u00cf<>$IOTA_DIAERESIS;"
+    "K<>$KAPPA;"
+    "L<>$LAMBDA;"
+    "M<>$MU;"
+    "N'' <> $NU}$gamma_n;"
+    "N<>$NU;"
+    "X<>$XI;"
+    "''O <> [Oo]{ $OMICRON;" // handle OO
+    "O<>$OMICRON;"
+    "\u00d3<>$OMICRON2;"
+    "PH<>$PHI;" // needs ordering before P
+    "PS<>$PSI;" // needs ordering before P
+    "P<>$PI;"
+    "R<>$RHO;"
+    "T<>$TAU;"
+    "U <> $vowel{$YPSILON;"
+    "\u00da <> $vowel{$YPSILON2;"
+    "\u00dc <> $vowel{$YPSILON_DIAERESIS;"
+    "Y<>$YPSILON;"
+    "\u00dd<>$YPSILON2;"
+    "$Y_UMLAUT<>$YPSILON_DIAERESIS;"
+    "CH<>$CHI;"
+    "$O_MACRON_ACUTE<>$OMEGA2;"
+    "$O_MACRON<>$OMEGA;"
+    
+    // Extra English Letters. Mapped for completeness
+    
+    "C}$softener>|S;"
+    "C>|K;"
+    "F>|PH;"
+    "H>|CH;"
+    "J>|I;"
+    "Q>|K;"
+    "V>|U;"
+    "W>|U;"
+    
+    // ==============================================
+    // Lowercase Forms. Just copy above and lowercase
+    // ==============================================
+    
+    // typing variants, in case the keyboard doesn't have accents
+    
+    "'a`'>$alpha2;"
+    "'e`'>$epsilon2;"
+    "'ee`'>$eta2;"
+    "ee>$eta;"
+    "'i`'>$iota2;"
+    "'o`'>$omicron2;"
+    "'oo`'>$omega2;"
+    "oo>$omega;"
+    "i$quote>$iota_diaeresis;"
+    "y$quote>$ypsilon_diaeresis;"
+    
+    // basic letters
+    
+    "a<>$alpha;"
+    "\u00e1<>$alpha2;"
+    "b<>$beta;"
+    "n }$n_gamma<>$gamma}$gamma_n;"
+    "g<>$gamma;"
+    "d<>$delta;"
+    "''e <> [Ee]{$epsilon;" // handle EE
+    "e<>$epsilon;"
+    "\u00e9<>$epsilon2;"
+    "z<>$zeta;"
+    "$e_macron_acute<>$eta2;"
+    "$e_macron<>$eta;"
+    "th<>$theta;"
+    "i<>$iota;"
+    "\u00ed<>$iota2;"
+    "\u00ef<>$iota_diaeresis;"
+    "k<>$kappa;"
+    "l<>$lambda;"
+    "m<>$mu;"
+    "n'' <> $nu}$gamma_n;"
+    "n<>$nu;"
+    "x<>$xi;"
+    "''o <> [Oo]{ $omicron;" // handle OO
+    "o<>$omicron;"
+    "\u00f3<>$omicron2;"
+    "ph<>$phi;" // needs ordering before p
+    "ps<>$psi;" // needs ordering before p
+    "p<>$pi;"
+    "r<>$rho;"
+    "t<>$tau;"
+    "u <> $vowel{$ypsilon;"
+    "\u00fa <> $vowel{$ypsilon2;"
+    "\u00fc <> $vowel{$ypsilon_diaeresis;"
+    "y<>$ypsilon;"
+    "\u00fd<>$ypsilon2;"
+    "$y_umlaut<>$ypsilon_diaeresis;"
+    "ch<>$chi;"
+    "$o_macron_acute<>$omega2;"
+    "$o_macron<>$omega;"
+    
+    // extra english letters. mapped for completeness
+    
+    "c}$softener>|s;"
+    "c>|k;"
+    "f>|ph;"
+    "h>|ch;"
+    "j>|i;"
+    "q>|k;"
+    "v>|u;"
+    "w>|u;"
+    
+    // ====================================
+    // Normal final rule: remove '
+    // ====================================
+    
+    //+ "''>;"
+  }
 }
--- a/icu4c/data/lhebrew.txt
+++ b/icu4c/data/lhebrew.txt
@ -1,278 +1,309 @@
 //--------------------------------------------------------------------
-//  Copyright (C) 1999, International Business Machines
-//  Corporation and others.  All Rights Reserved.
+// Copyright (c) 1999-2000, International Business Machines
+// Corporation and others.  All Rights Reserved.
 //--------------------------------------------------------------------
-//  Date        Name        Description
-//  11/17/99    aliu        Creation.
+// THIS IS A MACHINE-GENERATED FILE
+// Tool: src\com\ibm\tools\translit\dumpICUrules.bat
+// Source: src\com\ibm\text\resources/TransliterationRule_Latin_Hebrew.java
+// Date: Fri May 19 15:50:22 2000
 //--------------------------------------------------------------------

 // Latin-Hebrew

 lhebrew {
-    Rule {
-        //variable names, derived from the Unicode names.
-
-        "POINT_SHEVA=\u05B0;"
-        "POINT_HATAF_SEGOL=\u05B1;"
-        "POINT_HATAF_PATAH=\u05B2;"
-        "POINT_HATAF_QAMATS=\u05B3;"
-        "POINT_HIRIQ=\u05B4;"
-        "POINT_TSERE=\u05B5;"
-        "POINT_SEGOL=\u05B6;"
-        "POINT_PATAH=\u05B7;"
-        "POINT_QAMATS=\u05B8;"
-        "POINT_HOLAM=\u05B9;"
-        "POINT_QUBUTS=\u05BB;"
-        "POINT_DAGESH_OR_MAPIQ=\u05BC;"
-        "POINT_METEG=\u05BD;"
-        "PUNCTUATION_MAQAF=\u05BE;"
-        "POINT_RAFE=\u05BF;"
-        "PUNCTUATION_PASEQ=\u05C0;"
-        "POINT_SHIN_DOT=\u05C1;"
-        "POINT_SIN_DOT=\u05C2;"
-        "PUNCTUATION_SOF_PASUQ=\u05C3;"
-        "ALEF=\u05D0;"
-        "BET=\u05D1;"
-        "GIMEL=\u05D2;"
-        "DALET=\u05D3;"
-        "HE=\u05D4;"
-        "VAV=\u05D5;"
-        "ZAYIN=\u05D6;"
-        "HET=\u05D7;"
-        "TET=\u05D8;"
-        "YOD=\u05D9;"
-        "FINAL_KAF=\u05DA;"
-        "KAF=\u05DB;"
-        "LAMED=\u05DC;"
-        "FINAL_MEM=\u05DD;"
-        "MEM=\u05DE;"
-        "FINAL_NUN=\u05DF;"
-        "NUN=\u05E0;"
-        "SAMEKH=\u05E1;"
-        "AYIN=\u05E2;"
-        "FINAL_PE=\u05E3;"
-        "PE=\u05E4;"
-        "FINAL_TSADI=\u05E5;"
-        "TSADI=\u05E6;"
-        "QOF=\u05E7;"
-        "RESH=\u05E8;"
-        "SHIN=\u05E9;"
-        "TAV=\u05EA;"
-        "YIDDISH_DOUBLE_VAV=\u05F0;"
-        "YIDDISH_VAV_YOD=\u05F1;"
-        "YIDDISH_DOUBLE_YOD=\u05F2;"
-        "PUNCTUATION_GERESH=\u05F3;"
-        "PUNCTUATION_GERSHAYIM=\u05F4;"
-
-        //wildcards
-        //The values can be anything we don't use in this file: start at E000.
-
-        "letter=[abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ];"
-
-        "softvowel=[eiyEIY];"
-
-        "vowellike=[{ALEF}{AYIN}{YOD}{VAV}];"
-
-        //?>{POINT_SHEVA}
-        //?>{POINT_HATAF_SEGOL}
-        //?>{POINT_HATAF_PATAH}
-        //?>{POINT_HATAF_QAMATS}
-        //?>{POINT_HIRIQ}
-        //?>{POINT_TSERE}
-        //?>{POINT_SEGOL}
-        //?>{POINT_PATAH}
-        //?>{POINT_QAMATS}
-        //?>{POINT_HOLAM}
-        //?>{POINT_QUBUTS}
-        //?>{POINT_DAGESH_OR_MAPIQ}
-        //?>{POINT_METEG}
-        //?>{PUNCTUATION_MAQAF}
-        //?>{POINT_RAFE}
-        //?>{PUNCTUATION_PASEQ}
-        //?>{POINT_SHIN_DOT}
-        //?>{POINT_SIN_DOT}
-        //?>{PUNCTUATION_SOF_PASUQ}
-
-        "a>{ALEF};"
-        "A>{ALEF};"
-
-        "b>{BET};"
-        "B>{BET};"
-
-        "c({softvowel}>{SAMEKH};"
-        "C({softvowel}>{SAMEKH};"
-        "c({letter}>{KAF};"
-        "C({letter}>{KAF};"
-        "c>{FINAL_KAF};"
-        "C>{FINAL_KAF};"
-
-        "d>{DALET};"
-        "D>{DALET};"
-
-        "e>{AYIN};"
-        "E>{AYIN};"
-
-        "f({letter}>{PE};"
-        "f>{FINAL_PE};"
-        "F({letter}>{PE};"
-        "F>{FINAL_PE};"
-
-        "g>{GIMEL};"
-        "G>{GIMEL};"
-
-        "h>{HE};"
-        "H>{HE};"
-
-        "i>{YOD};"
-        "I>{YOD};"
-
-        "j>{DALET}{SHIN};"
-        "J>{DALET}{SHIN};"
-
-        "kH>{HET};"
-        "kh>{HET};"
-        "KH>{HET};"
-        "Kh>{HET};"
-        "k({letter}>{KAF};"
-        "K({letter}>{KAF};"
-        "k>{FINAL_KAF};"
-        "K>{FINAL_KAF};"
-
-        "l>{LAMED};"
-        "L>{LAMED};"
-
-        "m({letter}>{MEM};"
-        "m>{FINAL_MEM};"
-        "M({letter}>{MEM};"
-        "M>{FINAL_MEM};"
-
-        "n({letter}>{NUN};"
-        "n>{FINAL_NUN};"
-        "N({letter}>{NUN};"
-        "N>{FINAL_NUN};"
-
-        "o>{VAV};"
-        "O>{VAV};"
-
-        "p({letter}>{PE};"
-        "p>{FINAL_PE};"
-        "P({letter}>{PE};"
-        "P>{FINAL_PE};"
-
-        "q>{QOF};"
-        "Q>{QOF};"
-
-        "r>{RESH};"
-        "R>{RESH};"
-
-        "sH>{SHIN};"
-        "sh>{SHIN};"
-        "SH>{SHIN};"
-        "Sh>{SHIN};"
-        "s>{SAMEKH};"
-        "S>{SAMEKH};"
-
-        "th>{TAV};"
-        "tH>{TAV};"
-        "TH>{TAV};"
-        "Th>{TAV};"
-        "tS({letter}>{TSADI};"
-        "ts({letter}>{TSADI};"
-        "Ts({letter}>{TSADI};"
-        "TS({letter}>{TSADI};"
-        "tS>{FINAL_TSADI};"
-        "ts>{FINAL_TSADI};"
-        "Ts>{FINAL_TSADI};"
-        "TS>{FINAL_TSADI};"
-        "t>{TET};"
-        "T>{TET};"
-
-        "u>{VAV};"
-        "U>{VAV};"
-
-        "v>{VAV};"
-        "V>{VAV};"
-
-        "w>{VAV};"
-        "W>{VAV};"
-
-        "x>{KAF}{SAMEKH};"
-        "X>{KAF}{SAMEKH};"
-
-        "y>{YOD};"
-        "Y>{YOD};"
-
-        "z>{ZAYIN};"
-        "Z>{ZAYIN};"
-
-        //#?>{YIDDISH_DOUBLE_VAV}
-        //?>{YIDDISH_VAV_YOD}
-        //?>{YIDDISH_DOUBLE_YOD}
-        //?>{PUNCTUATION_GERESH}
-        //?>{PUNCTUATION_GERSHAYIM}
-
-        "''>;"
-
-        //{POINT_SHEVA}>@
-        //{POINT_HATAF_SEGOL}>@
-        //{POINT_HATAF_PATAH}>@
-        //{POINT_HATAF_QAMATS}>@
-        //{POINT_HIRIQ}>@
-        //{POINT_TSERE}>@
-        //{POINT_SEGOL}>@
-        //{POINT_PATAH}>@
-        //{POINT_QAMATS}>@
-        //{POINT_HOLAM}>@
-        //{POINT_QUBUTS}>@
-        //{POINT_DAGESH_OR_MAPIQ}>@
-        //{POINT_METEG}>@
-        //{PUNCTUATION_MAQAF}>@
-        //{POINT_RAFE}>@
-        //{PUNCTUATION_PASEQ}>@
-        //{POINT_SHIN_DOT}>@
-        //{POINT_SIN_DOT}>@
-        //{PUNCTUATION_SOF_PASUQ}>@
-
-        "a<{ALEF};"
-        "e<{AYIN};"
-        "b<{BET};"
-        "d<{DALET};"
-        "k<{FINAL_KAF};"
-        "m<{FINAL_MEM};"
-        "n<{FINAL_NUN};"
-        "p<{FINAL_PE};"
-        "ts<{FINAL_TSADI};"
-        "g<{GIMEL};"
-        "kh<{HET};"
-        "h<{HE};"
-        "k''<{KAF}({HE};"
-        "k<{KAF};"
-        "l<{LAMED};"
-        "m<{MEM};"
-        "n<{NUN};"
-        "p<{PE};"
-        "q<{QOF};"
-        "r<{RESH};"
-        "s''<{SAMEKH}({HE};"
-        "s<{SAMEKH};"
-        "sh<{SHIN};"
-        "th<{TAV};"
-        "t''<{TET}({HE};"
-        "t''<{TET}({SAMEKH};"
-        "t''<{TET}({SHIN};"
-        "t<{TET};"
-        "ts<{TSADI};"
-        "v<{VAV}({vowellike};"
-        "u<{VAV};"
-        "y<{YOD};"
-        "z<{ZAYIN};"
-
-        //{YIDDISH_DOUBLE_VAV}>@
-        //{YIDDISH_VAV_YOD}>@
-        //{YIDDISH_DOUBLE_YOD}>@
-        //{PUNCTUATION_GERESH}>@
-        //{PUNCTUATION_GERSHAYIM}>@
-
-        "<'';"
-    }
+  Rule {
+    //variable names, derived from the Unicode names.
+    
+    "$POINT_SHEVA=\u05B0;"
+    "$POINT_HATAF_SEGOL=\u05B1;"
+    "$POINT_HATAF_PATAH=\u05B2;"
+    "$POINT_HATAF_QAMATS=\u05B3;"
+    "$POINT_HIRIQ=\u05B4;"
+    "$POINT_TSERE=\u05B5;"
+    "$POINT_SEGOL=\u05B6;"
+    "$POINT_PATAH=\u05B7;"
+    "$POINT_QAMATS=\u05B8;"
+    "$POINT_HOLAM=\u05B9;"
+    "$POINT_QUBUTS=\u05BB;"
+    "$POINT_DAGESH_OR_MAPIQ=\u05BC;"
+    "$POINT_METEG=\u05BD;"
+    "$PUNCTUATION_MAQAF=\u05BE;"
+    "$POINT_RAFE=\u05BF;"
+    "$PUNCTUATION_PASEQ=\u05C0;"
+    "$POINT_SHIN_DOT=\u05C1;"
+    "$POINT_SIN_DOT=\u05C2;"
+    "$PUNCTUATION_SOF_PASUQ=\u05C3;"
+    "$ALEF=\u05D0;"
+    "$BET=\u05D1;"
+    "$GIMEL=\u05D2;"
+    "$DALET=\u05D3;"
+    "$HE=\u05D4;"
+    "$VAV=\u05D5;"
+    "$ZAYIN=\u05D6;"
+    "$HET=\u05D7;"
+    "$TET=\u05D8;"
+    "$YOD=\u05D9;"
+    "$FINAL_KAF=\u05DA;"
+    "$KAF=\u05DB;"
+    "$LAMED=\u05DC;"
+    "$FINAL_MEM=\u05DD;"
+    "$MEM=\u05DE;"
+    "$FINAL_NUN=\u05DF;"
+    "$NUN=\u05E0;"
+    "$SAMEKH=\u05E1;"
+    "$AYIN=\u05E2;"
+    "$FINAL_PE=\u05E3;"
+    "$PE=\u05E4;"
+    "$FINAL_TSADI=\u05E5;"
+    "$TSADI=\u05E6;"
+    "$QOF=\u05E7;"
+    "$RESH=\u05E8;"
+    "$SHIN=\u05E9;"
+    "$TAV=\u05EA;"
+    "$YIDDISH_DOUBLE_VAV=\u05F0;"
+    "$YIDDISH_VAV_YOD=\u05F1;"
+    "$YIDDISH_DOUBLE_YOD=\u05F2;"
+    "$PUNCTUATION_GERESH=\u05F3;"
+    "$PUNCTUATION_GERSHAYIM=\u05F4;"
+    
+    //wildcards
+    //The values can be anything we don't use in this file: start at E000.
+    
+    "$letter=[abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ];"
+    
+    "$softvowel=[eiyEIY];"
+    
+    "$vowellike=[$ALEF$AYIN$YOD$VAV];"
+    
+    "$hebrew=[\u0590-\u05FF];" // the whole block -liu
+    
+    //?>{POINT_SHEVA}
+    //?>{POINT_HATAF_SEGOL}
+    //?>{POINT_HATAF_PATAH}
+    //?>{POINT_HATAF_QAMATS}
+    //?>{POINT_HIRIQ}
+    //?>{POINT_TSERE}
+    //?>{POINT_SEGOL}
+    //?>{POINT_PATAH}
+    //?>{POINT_QAMATS}
+    //?>{POINT_HOLAM}
+    //?>{POINT_QUBUTS}
+    //?>{POINT_DAGESH_OR_MAPIQ}
+    //?>{POINT_METEG}
+    //?>{PUNCTUATION_MAQAF}
+    //?>{POINT_RAFE}
+    //?>{PUNCTUATION_PASEQ}
+    //?>{POINT_SHIN_DOT}
+    //?>{POINT_SIN_DOT}
+    //?>{PUNCTUATION_SOF_PASUQ}
+    
+    // why is this a special case? -liu
+    "k''h <> $KAF $HE ;"
+    
+    // mark non-final forms in final position as x~ -liu
+    "k    <  $KAF   } $hebrew ;"
+    "m    <  $MEM   } $hebrew ;"
+    "n    <  $NUN   } $hebrew ;"
+    "p    <  $PE    } $hebrew ;"
+    "ts   <  $TSADI } $hebrew ;"
+    "k'~' <> $KAF   ;"
+    "m'~' <> $MEM   ;"
+    "n'~' <> $NUN   ;"
+    "p'~' <> $PE    ;"
+    "ts'~'<> $TSADI ;"
+    
+    // mark final forms in non-final position as x^ -liu
+    "k'^' <> $FINAL_KAF   } $hebrew ;"
+    "m'^' <> $FINAL_MEM   } $hebrew ;"
+    "n'^' <> $FINAL_NUN   } $hebrew ;"
+    "p'^' <> $FINAL_PE    } $hebrew ;"
+    "ts'^'<> $FINAL_TSADI } $hebrew ;"
+    "k    <  $FINAL_KAF;"
+    "m    <  $FINAL_MEM;"
+    "n    <  $FINAL_NUN;"
+    "p    <  $FINAL_PE;"
+    "ts   <  $FINAL_TSADI;"
+    
+    "a>$ALEF;"
+    "A>$ALEF;"
+    
+    "b>$BET;"
+    "B>$BET;"
+    
+    "c}$softvowel>$SAMEKH;"
+    "C}$softvowel>$SAMEKH;"
+    "c}$letter>$KAF;"
+    "C}$letter>$KAF;"
+    "c>$FINAL_KAF;"
+    "C>$FINAL_KAF;"
+    
+    "d>$DALET;"
+    "D>$DALET;"
+    
+    "e>$AYIN;"
+    "E>$AYIN;"
+    
+    "f}$letter>$PE;"
+    "f>$FINAL_PE;"
+    "F}$letter>$PE;"
+    "F>$FINAL_PE;"
+    
+    "g>$GIMEL;"
+    "G>$GIMEL;"
+    
+    "h>$HE;"
+    "H>$HE;"
+    
+    "i>$YOD;"
+    "I>$YOD;"
+    
+    "j>$DALET$SHIN;"
+    "J>$DALET$SHIN;"
+    
+    "kH>$HET;"
+    "kh>$HET;"
+    "KH>$HET;"
+    "Kh>$HET;"
+    "k}$letter>$KAF;"
+    "K}$letter>$KAF;"
+    "k>$FINAL_KAF;"
+    "K>$FINAL_KAF;"
+    
+    "l>$LAMED;"
+    "L>$LAMED;"
+    
+    "m}$letter>$MEM;"
+    "m>$FINAL_MEM;"
+    "M}$letter>$MEM;"
+    "M>$FINAL_MEM;"
+    
+    "n}$letter>$NUN;"
+    "n>$FINAL_NUN;"
+    "N}$letter>$NUN;"
+    "N>$FINAL_NUN;"
+    
+    "o>$VAV;"
+    "O>$VAV;"
+    
+    "p}$letter>$PE;"
+    "p>$FINAL_PE;"
+    "P}$letter>$PE;"
+    "P>$FINAL_PE;"
+    
+    "q>$QOF;"
+    "Q>$QOF;"
+    
+    "r>$RESH;"
+    "R>$RESH;"
+    
+    "sH>$SHIN;"
+    "sh>$SHIN;"
+    "SH>$SHIN;"
+    "Sh>$SHIN;"
+    "s>$SAMEKH;"
+    "S>$SAMEKH;"
+    
+    "th>$TAV;"
+    "tH>$TAV;"
+    "TH>$TAV;"
+    "Th>$TAV;"
+    "tS}$letter>$TSADI;"
+    "ts}$letter>$TSADI;"
+    "Ts}$letter>$TSADI;"
+    "TS}$letter>$TSADI;"
+    "tS>$FINAL_TSADI;"
+    "ts>$FINAL_TSADI;"
+    "Ts>$FINAL_TSADI;"
+    "TS>$FINAL_TSADI;"
+    "t>$TET;"
+    "T>$TET;"
+    
+    "u>$VAV;"
+    "U>$VAV;"
+    
+    "v>$VAV;"
+    "V>$VAV;"
+    
+    "w>$VAV;"
+    "W>$VAV;"
+    
+    "x>$KAF$SAMEKH;"
+    "X>$KAF$SAMEKH;"
+    
+    "y>$YOD;"
+    "Y>$YOD;"
+    
+    "z>$ZAYIN;"
+    "Z>$ZAYIN;"
+    
+    //#?>{YIDDISH_DOUBLE_VAV}
+    //?>{YIDDISH_VAV_YOD}
+    //?>{YIDDISH_DOUBLE_YOD}
+    //?>{PUNCTUATION_GERESH}
+    //?>{PUNCTUATION_GERSHAYIM}
+    
+    "''>;"
+    
+    //{POINT_SHEVA}>@
+    //{POINT_HATAF_SEGOL}>@
+    //{POINT_HATAF_PATAH}>@
+    //{POINT_HATAF_QAMATS}>@
+    //{POINT_HIRIQ}>@
+    //{POINT_TSERE}>@
+    //{POINT_SEGOL}>@
+    //{POINT_PATAH}>@
+    //{POINT_QAMATS}>@
+    //{POINT_HOLAM}>@
+    //{POINT_QUBUTS}>@
+    //{POINT_DAGESH_OR_MAPIQ}>@
+    //{POINT_METEG}>@
+    //{PUNCTUATION_MAQAF}>@
+    //{POINT_RAFE}>@
+    //{PUNCTUATION_PASEQ}>@
+    //{POINT_SHIN_DOT}>@
+    //{POINT_SIN_DOT}>@
+    //{PUNCTUATION_SOF_PASUQ}>@
+    
+    "a<$ALEF;"
+    "e<$AYIN;"
+    "b<$BET;"
+    "d<$DALET;"
+    //+ "k<$FINAL_KAF;"
+    //+ "m<$FINAL_MEM;"
+    //+ "n<$FINAL_NUN;"
+    //+ "p<$FINAL_PE;"
+    //+ "ts<$FINAL_TSADI;"
+    "g<$GIMEL;"
+    "kh<$HET;"
+    "h<$HE;"
+    //+ "k''<$KAF}$HE;"
+    //+ "k<$KAF;"
+    "l<$LAMED;"
+    //+ "m<$MEM;"
+    //+ "n<$NUN;"
+    //+ "p<$PE;"
+    "q<$QOF;"
+    "r<$RESH;"
+    "s''<$SAMEKH}$HE;"
+    "s<$SAMEKH;"
+    "sh<$SHIN;"
+    "th<$TAV;"
+    "t''<$TET}$HE;"
+    "t''<$TET}$SAMEKH;"
+    "t''<$TET}$SHIN;"
+    "t<$TET;"
+    //+ "ts<$TSADI;"
+    "v<$VAV}$vowellike;"
+    "u<$VAV;"
+    "y<$YOD;"
+    "z<$ZAYIN;"
+    
+    //{YIDDISH_DOUBLE_VAV}>@
+    //{YIDDISH_VAV_YOD}>@
+    //{YIDDISH_DOUBLE_YOD}>@
+    //{PUNCTUATION_GERESH}>@
+    //{PUNCTUATION_GERSHAYIM}>@
+    
+    "<'';"
+  }
 }
--- a/icu4c/data/ljamo.txt
+++ b/icu4c/data/ljamo.txt
--- a/icu4c/data/lkana.txt
+++ b/icu4c/data/lkana.txt
--- a/icu4c/data/quotes.txt
+++ b/icu4c/data/quotes.txt
@ -1,83 +1,82 @@
 //--------------------------------------------------------------------
-//  Copyright (C) 1999, International Business Machines
-//  Corporation and others.  All Rights Reserved.
+// Copyright (c) 1999-2000, International Business Machines
+// Corporation and others.  All Rights Reserved.
 //--------------------------------------------------------------------
-//  Date        Name        Description
-//  11/17/99    aliu        Creation.
+// THIS IS A MACHINE-GENERATED FILE
+// Tool: src\com\ibm\tools\translit\dumpICUrules.bat
+// Source: src\com\ibm\text\resources/TransliterationRule_StraightQuotes_CurlyQuotes.java
+// Date: Fri May 19 15:50:23 2000
 //--------------------------------------------------------------------

 // StraightQuotes-CurlyQuotes

 quotes {
-    Rule {
-        // Rewritten using character codes [LIU]
-        "white=[[:Zs:][:Zl:][:Zp:]];"
-        "black=[^{white}];"
-        "open=[:Ps:];"
-        "dquote=\";"
-
-        "lAng=\u3008;"
-        "ldAng=\u300A;"
-        "lBrk='[';"
-        "lBrc='{';"
-
-        "lquote=\u2018;"
-        "rquote=\u2019;"
-        "ldquote=\u201C;"
-        "rdquote=\u201D;"
-
-        "ldguill=\u00AB;"
-        "rdguill=\u00BB;"
-        "lguill=\u2039;"
-        "rguill=\u203A;"
-
-        "mdash=\u2014;"
-
-        //#######################################
-        // Conversions from input
-        //#######################################
-
-        // join single quotes
-        "{lquote}''>{ldquote};"
-        "{lquote}{lquote}>{ldquote};"
-        "{rquote}''>{rdquote};"
-        "{rquote}{rquote}>{rdquote};"
-
-        //smart single quotes
-        "{white})''>{lquote};"
-        "{open})''>{lquote};"
-        "{black})''>{rquote};"
-        "''>{lquote};"
-
-        //smart doubles
-        "{white}){dquote}>{ldquote};"
-        "{open}){dquote}>{ldquote};"
-        "{black}){dquote}>{rdquote};"
-        "{dquote}>{ldquote};"
-
-        // join single guillemets
-        "{rguill}{rguill}>{rdguill};"
-        "'>>'>{rdguill};"
-        "{lguill}{lguill}>{ldguill};"
-        "'<<'>{ldguill};"
-
-        // prevent double spaces
-        "\\ )\\ >;"
-
-        // join hyphens into dash
-        "-->{mdash};"
-
-        //#######################################
-        // Conversions back to input
-        //#######################################
-
-        //smart quotes
-        "''<{lquote};"
-        "''<{rquote};"
-        "{dquote}<{ldquote};"
-        "{dquote}<{rdquote};"
-        
-        //hyphens
-        "--<{mdash};"
-    }
+  Rule {
+    // Rewritten using character codes [LIU]
+    "$white=[[:Zs:][:Zl:][:Zp:]];"
+    "$black=[^$white];"
+    "$open=[:Ps:];"
+    "$dquote='\"';"
+    
+    "$lAng=\u3008;"
+    "$ldAng=\u300A;"
+    "$lBrk='[';"
+    "$lBrc='{';"
+    
+    "$lquote=\u2018;"
+    "$rquote=\u2019;"
+    "$ldquote=\u201C;"
+    "$rdquote=\u201D;"
+    
+    "$ldguill=\u00AB;"
+    "$rdguill=\u00BB;"
+    "$lguill=\u2039;"
+    "$rguill=\u203A;"
+    
+    "$mdash=\u2014;"
+    
+    //#######################################
+    // Conversions from input
+    //#######################################
+    
+    // join single quotes
+    "$lquote''>$ldquote;"
+    "$lquote$lquote>$ldquote;"
+    "$rquote''>$rdquote;"
+    "$rquote$rquote>$rdquote;"
+    
+    //smart single quotes
+    "$white{''>$lquote;"
+    "$open{''>$lquote;"
+    "$black{''>$rquote;"
+    "''>$lquote;"
+    
+    //smart doubles
+    "$white{$dquote>$ldquote;"
+    "$open{$dquote>$ldquote;"
+    "$black{$dquote>$rdquote;"
+    "$dquote>$ldquote;"
+    
+    // join single guillemets
+    "$rguill$rguill>$rdguill;"
+    "'>>'>$rdguill;"
+    "$lguill$lguill>$ldguill;"
+    "'<<'>$ldguill;"
+    
+    // prevent double spaces
+    "\\ {\\ >;"
+    
+    // join hyphens into dash ### BIDIRECTIONAL ###
+    "'--'<>$mdash;"
+    
+    //#######################################
+    // Conversions back to input
+    //#######################################
+    
+    //smart quotes
+    "''<$lquote;"
+    "''<$rquote;"
+    "$dquote<$ldquote;"
+    "$dquote<$rdquote;"
+  }
 }
--- a/icu4c/data/ucname.txt
+++ b/icu4c/data/ucname.txt
--- a/icu4c/source/i18n/rbt.cpp
+++ b/icu4c/source/i18n/rbt.cpp
@ -135,9 +135,8 @@ RuleBasedTransliterator::handleTransliterate(Replaceable& text, Position& index,
                ++cursor;
            }
        } else {
-            text.handleReplaceBetween(cursor, cursor + r->getKeyLength(),
-                                      r->getOutput());
-            limit += r->getOutput().length() - r->getKeyLength();
+            // Delegate replacement to TransliterationRule object
+            limit += r->replace(text, cursor, *data);
            cursor += r->getCursorPos();
            ++loopCount;
        }
--- a/icu4c/source/i18n/rbt_data.cpp
+++ b/icu4c/source/i18n/rbt_data.cpp
@ -10,6 +10,7 @@
 #include "rbt_data.h"
 #include "hash.h"
 #include "unicode/unistr.h"
+#include "unicode/uniset.h"

 TransliterationRuleData::TransliterationRuleData(UErrorCode& status) :
    variableNames(0), setVariables(0) {
@ -17,37 +18,21 @@ TransliterationRuleData::TransliterationRuleData(UErrorCode& status) :
        return;
    }
    variableNames = new Hashtable(status);
+    if (U_SUCCESS(status)) {
+        variableNames->setValueDeleter(uhash_deleteUnicodeString);
+    }
    setVariables = 0;
    setVariablesLength = 0;
 }

 TransliterationRuleData::~TransliterationRuleData() {
    delete variableNames;
-    delete[] setVariables;
-}
-
-void
-TransliterationRuleData::defineVariable(const UnicodeString& name,
-                                        UChar value,
-                                        UErrorCode& status) {
-    int32_t v = value | 0x10000; // Set bit 16
-    variableNames->put(name, (void*) v, status);
-}
-
-UChar
-TransliterationRuleData::lookupVariable(const UnicodeString& name,
-                                        UErrorCode& status) const {
-    if (U_FAILURE(status)) {
-        return 0;
+    if (setVariables != 0) {
+        for (int32_t i=0; i<setVariablesLength; ++i) {
+            delete setVariables[i];
+        }
+        delete[] setVariables;
    }
-    void* value = variableNames->get(name);
-    /* Even U+0000 can be stored in the table because we set
-     * bit 16 in defineVariable().
-     */
-    if (value == 0) {
-        status = U_ILLEGAL_ARGUMENT_ERROR;
-    }
-    return (UChar) (int32_t) (unsigned long) value;
 }

 const UnicodeSet*
@ -56,7 +41,8 @@ TransliterationRuleData::lookupSet(UChar standIn) const {
    return (i >= 0 && i < setVariablesLength) ? setVariables[i] : 0;
 }

-UBool
-TransliterationRuleData::isVariableDefined(const UnicodeString& name) const {
-    return 0 != variableNames->get(name);
+int32_t
+TransliterationRuleData::lookupSegmentReference(UChar c) const {
+    int32_t i = c - segmentBase;
+    return (i >= 0 && i < 9) ? i : -1;
 }
--- a/icu4c/source/i18n/rbt_data.h
+++ b/icu4c/source/i18n/rbt_data.h
@ -35,42 +35,39 @@ class TransliterationRuleData {

 public:

+    // PUBLIC DATA MEMBERS
+
    /**
     * Rule table.  May be empty.
-     *
-     * PUBLIC DATA MEMBER for internal use by RBT
     */
    TransliterationRuleSet ruleSet;

    /**
-     * Map variable name (UnicodeString) to variable (Character).
-     * A variable name may correspond to a single literal
-     * character, in which case the character is stored in this
-     * hash.  It may also correspond to a UnicodeSet, in which
-     * case a character is again stored in this hash, but the
-     * character is a stand-in: it is a key for a secondary lookup
-     * in data.setVariables.  The stand-in also represents the
-     * UnicodeSet in the stored rules.
-     *
-     * PUBLIC DATA MEMBER for internal use by RBT
+     * Map variable name (String) to variable (UnicodeString).  A variable name
+     * corresponds to zero or more characters, stored in a UnicodeString in
+     * this hash.  One or more of these chars may also correspond to a
+     * UnicodeSet, in which case the character in the UnicodeString in this hash is
+     * a stand-in: it is an index for a secondary lookup in
+     * data.setVariables.  The stand-in also represents the UnicodeSet in
+     * the stored rules.
     */
    Hashtable* variableNames;
    
    /**
-     * Map category variable (Character) to set (UnicodeSet).
+     * Map category variable (UChar) to set (UnicodeSet).
     * Variables that correspond to a set of characters are mapped
     * from variable name to a stand-in character in data.variableNames.
     * The stand-in then serves as a key in this hash to lookup the
     * actual UnicodeSet object.  In addition, the stand-in is
     * stored in the rule text to represent the set of characters.
     * setVariables[i] represents character (setVariablesBase + i).
-     *
-     * PUBLIC DATA MEMBER for internal use by RBT
     */
    UnicodeSet** setVariables;
    
    /**
-     * The character represented by setVariables[0].
+     * The character that represents setVariables[0].  Characters
+     * setVariablesBase through setVariablesBase +
+     * setVariables.length - 1 represent UnicodeSet objects.
     */
    UChar setVariablesBase;

@ -79,20 +76,34 @@ public:
     */
    int32_t setVariablesLength;

+    /**
+     * The character that represents segment 1.  Characters segmentBase
+     * through segmentBase + 8 represent segments 1 through 9.
+     */
+    UChar segmentBase;
+
+public:
+
    TransliterationRuleData(UErrorCode& status);

    ~TransliterationRuleData();
    
-    void defineVariable(const UnicodeString& name,
-                        UChar value,
-                        UErrorCode& status);
-        
-    UChar lookupVariable(const UnicodeString& name,
-                         UErrorCode& status) const;
-    
 	const UnicodeSet* lookupSet(UChar standIn) const;

-    UBool isVariableDefined(const UnicodeString& name) const;
+    /**
+     * Return the zero-based index of the segment represented by the given
+     * character, or -1 if none.  Repeat: This is a zero-based return value,
+     * 0..8, even though these are notated "$1".."$9".
+     */
+    int32_t lookupSegmentReference(UChar c) const;
+
+    /**
+     * Return the character used to stand for the given segment reference.
+     * The reference must be in the range 1..9.
+     */
+    UChar getSegmentStandin(int32_t ref) const {
+        return segmentBase + ref - 1;
+    }
 };

 #endif
--- a/icu4c/source/i18n/rbt_pars.cpp
+++ b/icu4c/source/i18n/rbt_pars.cpp
@ -17,27 +17,31 @@
 #include "unicode/parsepos.h"
 #include "symtable.h"
 #include "unicode/parseerr.h"
+#include "hash.h"

 // Operators
-const UChar TransliterationRuleParser::VARIABLE_DEF_OP = 0x003D/*=*/;
-const UChar TransliterationRuleParser::FORWARD_RULE_OP = 0x003E/*>*/;
-const UChar TransliterationRuleParser::REVERSE_RULE_OP = 0x003C/*<*/;
-const UChar TransliterationRuleParser::FWDREV_RULE_OP  = 0x007E/*~*/; // internal rep of <> op
-const UnicodeString TransliterationRuleParser::OPERATORS = UNICODE_STRING("=><", 3);
+#define VARIABLE_DEF_OP ((UChar)0x003D) /*=*/
+#define FORWARD_RULE_OP ((UChar)0x003E) /*>*/
+#define REVERSE_RULE_OP ((UChar)0x003C) /*<*/
+#define FWDREV_RULE_OP  ((UChar)0x007E) /*~*/ // internal rep of <> op
+#define OPERATORS       UNICODE_STRING("=><", 3)

 // Other special characters
-const UChar TransliterationRuleParser::QUOTE = 0x0027/*'*/;
-const UChar TransliterationRuleParser::ESCAPE = 0x005C/*\*/;
-const UChar TransliterationRuleParser::END_OF_RULE = 0x003B/*;*/;
-const UChar TransliterationRuleParser::RULE_COMMENT_CHAR = 0x0023/*#*/;
+#define QUOTE             ((UChar)0x0027) /*'*/
+#define ESCAPE            ((UChar)0x005C) /*\*/
+#define END_OF_RULE       ((UChar)0x003B) /*;*/
+#define RULE_COMMENT_CHAR ((UChar)0x0023) /*#*/

-const UChar TransliterationRuleParser::VARIABLE_REF_OPEN = 0x007B/*{*/;
-const UChar TransliterationRuleParser::VARIABLE_REF_CLOSE = 0x007D/*}*/;
-const UChar TransliterationRuleParser::CONTEXT_OPEN = 0x0028/*(*/;
-const UChar TransliterationRuleParser::CONTEXT_CLOSE = 0x0029/*)*/;
-const UChar TransliterationRuleParser::SET_OPEN = 0x005B/*[*/;
-const UChar TransliterationRuleParser::SET_CLOSE = 0x005D/*]*/;
-const UChar TransliterationRuleParser::CURSOR_POS = 0x007C/*|*/;
+#define SEGMENT_OPEN       ((UChar)0x0028) /*(*/
+#define SEGMENT_CLOSE      ((UChar)0x0029) /*)*/
+#define CONTEXT_ANTE       ((UChar)0x007B) /*{*/
+#define CONTEXT_POST       ((UChar)0x007D) /*}*/
+#define SET_OPEN           ((UChar)0x005B) /*[*/
+#define SET_CLOSE          ((UChar)0x005D) /*]*/
+#define CURSOR_POS         ((UChar)0x007C) /*|*/
+#define CURSOR_OFFSET      ((UChar)0x0040) /*@*/
+
+const UnicodeString TransliterationRuleParser::gOPERATORS = OPERATORS;

 //----------------------------------------------------------------------
 // BEGIN ParseData
@ -58,14 +62,12 @@ public:
    ParseData(const TransliterationRuleData* data = 0,
              const UVector* setVariablesVector = 0);

-    /**
-     * Lookup the object associated with this string and return it.
-     * Return U_ILLEGAL_ARGUMENT_ERROR status if the name does not
-     * exist.  Return a non-NULL set if the name is mapped to a set;
-     * otherwise return a NULL set.
-     */
-    virtual void lookup(const UnicodeString& name, UChar& c, UnicodeSet*& set,
-                        UErrorCode& status) const;
+    virtual const UnicodeString* lookup(const UnicodeString& s) const;
+
+    virtual const UnicodeSet* lookupSet(UChar ch) const;
+
+    virtual UnicodeString parseReference(const UnicodeString& text,
+                                         ParsePosition& pos, int32_t limit) const;
 };

 ParseData::ParseData(const TransliterationRuleData* d,
@ -73,21 +75,389 @@ ParseData::ParseData(const TransliterationRuleData* d,
    data(d), setVariablesVector(sets) {}

 /**
- * Implement SymbolTable API.  Lookup a variable, returning
- * either a Character, a UnicodeSet, or null.
+ * Implement SymbolTable API.
 */
-void ParseData::lookup(const UnicodeString& name, UChar& c, UnicodeSet*& set,
-                       UErrorCode& status) const {
-    c = data->lookupVariable(name, status);
-    if (U_SUCCESS(status)) {
-        int32_t i = c - data->setVariablesBase;
+const UnicodeString* ParseData::lookup(const UnicodeString& name) const {
+    return (const UnicodeString*) data->variableNames->get(name);
+}
+
+/**
+ * Implement SymbolTable API.
+ */
+const UnicodeSet* ParseData::lookupSet(UChar ch) const {
+    // Note that we cannot use data.lookupSet() because the
+    // set array has not been constructed yet.
+    const UnicodeSet* set = NULL;
+    int32_t i = ch - data->setVariablesBase;
+    if (i >= 0 && i < setVariablesVector->size()) {
+        int32_t i = ch - data->setVariablesBase;
        set = (i < setVariablesVector->size()) ?
            (UnicodeSet*) setVariablesVector->elementAt(i) : 0;
    }
+    return set;
+}
+
+/**
+ * Implement SymbolTable API.  Parse out a symbol reference
+ * name.
+ */
+UnicodeString ParseData::parseReference(const UnicodeString& text,
+                                        ParsePosition& pos, int32_t limit) const {
+    int32_t start = pos.getIndex();
+    int32_t i = start;
+    UnicodeString result;
+    while (i < limit) {
+        UChar c = text.charAt(i);
+        if ((i==start && !Unicode::isUnicodeIdentifierStart(c)) ||
+            !Unicode::isUnicodeIdentifierPart(c)) {
+            break;
+        }
+        ++i;
+    }
+    if (i == start) { // No valid name chars
+        return result; // Indicate failure with empty string
+        //if (start > 0) {
+        //    --start;
+        //}
+        //limit = ruleEnd(text, start, limit);
+        //throw new IllegalArgumentException("Illegal variable reference " +
+        //                                   text.substring(start, limit));
+    }
+    pos.setIndex(i);
+    text.extractBetween(start, i, result);
+    return result;
 }

 //----------------------------------------------------------------------
-// END ParseData
+// BEGIN RuleHalf
+//----------------------------------------------------------------------
+
+/**
+ * A class representing one side of a rule.  This class knows how to
+ * parse half of a rule.  It is tightly coupled to the method
+ * RuleBasedTransliterator.Parser.parseRule().
+ */
+class RuleHalf {
+
+public:
+
+    UnicodeString text;
+
+    int32_t cursor; // position of cursor in text
+    int32_t ante;   // position of ante context marker '{' in text
+    int32_t post;   // position of post context marker '}' in text
+
+    // Record the position of the segment substrings and references.  A
+    // given side should have segments or segment references, but not
+    // both.
+    UVector* segments; // ref substring start,limits
+    int32_t maxRef;       // index of largest ref (1..9)
+
+    // Record the offset to the cursor either to the left or to the
+    // right of the key.  This is indicated by characters on the output
+    // side that allow the cursor to be positioned arbitrarily within
+    // the matching text.  For example, abc{def} > | @@@ xyz; changes
+    // def to xyz and moves the cursor to before abc.  Offset characters
+    // must be at the start or end, and they cannot move the cursor past
+    // the ante- or postcontext text.  Placeholders are only valid in
+    // output text.
+    int32_t cursorOffset; // only nonzero on output side
+
+    TransliterationRuleParser& parser;
+
+    static const UnicodeString gOperators;
+
+    //--------------------------------------------------
+    // Methods
+
+    RuleHalf(TransliterationRuleParser& parser);
+    ~RuleHalf();
+
+    /**
+     * Parse one side of a rule, stopping at either the limit,
+     * the END_OF_RULE character, or an operator.  Return
+     * the pos of the terminating character (or limit).
+     */
+    int32_t parse(const UnicodeString& rule, int32_t pos, int32_t limit,
+                  TransliterationRuleParser& parser);
+
+    /**
+     * Remove context.
+     */
+    void removeContext();
+
+    /**
+     * Create and return an int[] array of segments.
+     */
+    int32_t* createSegments() const;
+
+    int syntaxError(int32_t code,
+                    const UnicodeString& rule,
+                    int32_t start) {
+        return parser.syntaxError(code, rule, start);
+    }
+};
+
+const UnicodeString RuleHalf::gOperators = OPERATORS;
+
+RuleHalf::RuleHalf(TransliterationRuleParser& p) : parser(p) {
+    cursor = -1;
+    ante = -1;
+    post = -1;
+    segments = NULL;
+    maxRef = -1;
+    cursorOffset = 0;
+}
+
+RuleHalf::~RuleHalf() {
+    delete segments;
+}
+
+/**
+ * Parse one side of a rule, stopping at either the limit,
+ * the END_OF_RULE character, or an operator.  Return
+ * the pos of the terminating character (or limit).
+ */
+int32_t RuleHalf::parse(const UnicodeString& rule, int32_t pos, int32_t limit,
+              TransliterationRuleParser& parser) {
+    int32_t start = pos;
+    UnicodeString& buf = text;
+    ParsePosition pp;
+    int32_t cursorOffsetPos = 0; // Position of first CURSOR_OFFSET on _right_
+    UnicodeString scratch;
+    bool_t done = FALSE;
+
+    while (pos < limit && !done) {
+        UChar c = rule.charAt(pos++);
+        if (Unicode::isWhitespace(c)) {
+            // Ignore whitespace.  Note that this is not Unicode
+            // spaces, but Java spaces -- a subset, representing
+            // whitespace likely to be seen in code.
+            continue;
+        }
+        // Handle escapes
+        if (c == ESCAPE) {
+            if (pos == limit) {
+                return syntaxError(RuleBasedTransliterator::TRAILING_BACKSLASH, rule, start);
+            }
+
+            // UNLIKE THE JAVA version, we parse \uXXXX escapes.  We
+            // do not do this in Java because the compiler has already
+            // done it when the ResourceBundle file was compiled.
+            // Parse \uXXXX escapes
+            c = rule.charAt(pos++);
+            if (c == 0x0075/*u*/) {
+                if ((pos+4) > limit) {
+                    return syntaxError(RuleBasedTransliterator::MALFORMED_UNICODE_ESCAPE, rule, start);
+                }
+                c = (UChar)0x0000;
+                for (int32_t plim=pos+4; pos<plim; ++pos) { // [sic]
+                    int32_t digit = Unicode::digit(rule.charAt(pos), 16);
+                    if (digit<0) {
+                        return syntaxError(RuleBasedTransliterator::MALFORMED_UNICODE_ESCAPE, rule, start);
+                    }
+                    c = (UChar) ((c << 4) | digit);
+                }
+            }
+ 
+            buf.append(c);
+            continue;
+        }
+        // Handle quoted matter
+        if (c == QUOTE) {
+            int32_t iq = rule.indexOf(QUOTE, pos);
+            if (iq == pos) {
+                buf.append(c); // Parse [''] outside quotes as [']
+                ++pos;
+            } else {
+                /* This loop picks up a segment of quoted text of the
+                 * form 'aaaa' each time through.  If this segment
+                 * hasn't really ended ('aaaa''bbbb') then it keeps
+                 * looping, each time adding on a new segment.  When it
+                 * reaches the final quote it breaks.
+                 */
+                for (;;) {
+                    if (iq < 0) {
+                        return syntaxError(RuleBasedTransliterator::UNTERMINATED_QUOTE, rule, start);
+                    }
+                    scratch.truncate(0);
+                    rule.extractBetween(pos, iq, scratch);
+                    buf.append(scratch);
+                    pos = iq+1;
+                    if (pos < limit && rule.charAt(pos) == QUOTE) {
+                        // Parse [''] inside quotes as [']
+                        iq = rule.indexOf(QUOTE, pos+1);
+                        // Continue looping
+                    } else {
+                        break;
+                    }
+                }
+            }
+            continue;
+        }
+        if (gOperators.indexOf(c) >= 0) {
+            --pos; // Backup to point to operator
+            break;
+        }
+        switch (c) {
+        case SEGMENT_OPEN:
+        case SEGMENT_CLOSE:
+            // Handle segment definitions "(" and ")"
+            // Parse "(", ")"
+            if (segments == NULL) {
+                segments = new UVector();
+            }
+            if ((c == SEGMENT_OPEN) !=
+                (segments->size() % 2 == 0)) {
+                return syntaxError(RuleBasedTransliterator::MISMATCHED_SEGMENT_DELIMITERS,
+                                   rule, start);
+            }
+            segments->addElement((void*) buf.length());
+            break;
+        case END_OF_RULE:
+            --pos; // Backup to point to END_OF_RULE
+            done = TRUE;
+            break;
+        case SymbolTable::SYMBOL_REF:
+            // Handle variable references and segment references "$1" .. "$9"
+            {
+                // A variable reference must be followed immediately
+                // by a Unicode identifier start and zero or more
+                // Unicode identifier part characters, or by a digit
+                // 1..9 if it is a segment reference.
+                if (pos == limit) {
+                    return syntaxError(RuleBasedTransliterator::MALFORMED_SYMBOL_REFERENCE, rule, start);
+                }
+                // Parse "$1" "$2" .. "$9"
+                c = rule.charAt(pos);
+                int32_t r = Unicode::digit(c, 10);
+                if (r >= 1 && r <= 9) {
+                    if (r > maxRef) {
+                        maxRef = r;
+                    }
+                    buf.append(parser.data->getSegmentStandin(r));
+                    ++pos;
+                } else {
+                    pp.setIndex(pos);
+                    UnicodeString name = parser.parseData->
+                                    parseReference(rule, pp, limit);
+                    if (name.length() == 0) {
+                        return syntaxError(RuleBasedTransliterator::MALFORMED_VARIABLE_REFERENCE,
+                                           rule, start);
+                    }
+                    pos = pp.getIndex();
+                    // If this is a variable definition statement,
+                    // then the LHS variable will be undefined.  In
+                    // that case appendVariableDef() will append the
+                    // special placeholder char variableLimit-1.
+
+                    //buf.append(parser.getVariableDef(name));
+                    parser.appendVariableDef(name, buf);
+                }
+            }
+            break;
+        case CONTEXT_ANTE:
+            if (ante >= 0) {
+                return syntaxError(RuleBasedTransliterator::MULTIPLE_ANTE_CONTEXTS, rule, start);
+            }
+            ante = buf.length();
+            break;
+        case CONTEXT_POST:
+            if (post >= 0) {
+                return syntaxError(RuleBasedTransliterator::MULTIPLE_POST_CONTEXTS, rule, start);
+            }
+            post = buf.length();
+            break;
+        case SET_OPEN:
+            pp.setIndex(pos-1); // Backup to opening '['
+            buf.append(parser.parseSet(rule, pp));
+            if (U_FAILURE(parser.status)) {
+                return syntaxError(RuleBasedTransliterator::MALFORMED_SET, rule, start);
+            }
+            pos = pp.getIndex();
+            break;
+        case CURSOR_POS:
+            if (cursor >= 0) {
+                return syntaxError(RuleBasedTransliterator::MULTIPLE_CURSORS, rule, start);
+            }
+            cursor = buf.length();
+            break;
+        case CURSOR_OFFSET:
+            if (cursorOffset < 0) {
+                if (buf.length() > 0) {
+                    return syntaxError(RuleBasedTransliterator::MISPLACED_CURSOR_OFFSET, rule, start);
+                }
+                --cursorOffset;
+            } else if (cursorOffset > 0) {
+                if (buf.length() != cursorOffsetPos || cursor >= 0) {
+                    return syntaxError(RuleBasedTransliterator::MISPLACED_CURSOR_OFFSET, rule, start);
+                }
+                ++cursorOffset;
+            } else {
+                if (cursor == 0 && buf.length() == 0) {
+                    cursorOffset = -1;
+                } else if (cursor < 0) {
+                    cursorOffsetPos = buf.length();
+                    cursorOffset = 1;
+                } else {
+                    return syntaxError(RuleBasedTransliterator::MISPLACED_CURSOR_OFFSET, rule, start);
+                }
+            }
+            break;
+        // case SET_CLOSE:
+        default:
+            // Disallow unquoted characters other than [0-9A-Za-z]
+            // in the printable ASCII range.  These characters are
+            // reserved for possible future use.
+            if (c >= 0x0021 && c <= 0x007E &&
+                !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) ||
+                  (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) ||
+                  (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) {
+                return syntaxError(RuleBasedTransliterator::UNQUOTED_SPECIAL, rule, start);
+            }
+            buf.append(c);
+            break;
+        }
+    }
+
+    if (cursorOffset > 0 && cursor != cursorOffsetPos) {
+        return syntaxError(RuleBasedTransliterator::MISPLACED_CURSOR_OFFSET, rule, start);
+    }
+    // text = buf.toString();
+    return pos;
+}
+
+/**
+ * Remove context.
+ */
+void RuleHalf::removeContext() {
+    //text = text.substring(ante < 0 ? 0 : ante,
+    //                      post < 0 ? text.length() : post);
+    if (post >= 0) {
+        text.remove(post);
+    }
+    if (ante >= 0) {
+        text.removeBetween(0, ante);
+    }
+    ante = post = -1;
+}
+
+/**
+ * Create and return an int32_t[] array of segments.
+ */
+int32_t* RuleHalf::createSegments() const {
+    if (segments == NULL) {
+        return NULL;
+    }
+    int32_t* result = new int32_t[segments->size()];
+    for (int32_t i=0; i<segments->size(); ++i) {
+        result[i] = (int32_t) segments->elementAt(i);
+    }
+    return result;
+}
+
+//----------------------------------------------------------------------
+// END RuleHalf
 //----------------------------------------------------------------------

 TransliterationRuleData*
@ -206,251 +576,142 @@ int32_t TransliterationRuleParser::parseRule(int32_t pos, int32_t limit) {
    // Locate the left side, operator, and right side
    int32_t start = pos;
    UChar op = 0;
+    const UnicodeString& rule = rules; // TEMPORARY: FIX LATER

-    UnicodeString buf;
-    int32_t cursor = -1; // position of cursor in buf
-    int32_t ante = -1;   // position of ante context marker ')' in buf
-    int32_t post = -1;   // position of post context marker '(' in buf
-    int32_t postClose = -1; // position of post context close ')' in buf
+    // Use pointers to automatics to make swapping possible.
+    RuleHalf _left(*this), _right(*this);
+    RuleHalf* left = &_left;
+    RuleHalf* right = &_right;

-    // Assigned to buf and its adjuncts after the LHS has been
-    // parsed.  Thereafter, buf etc. refer to the RHS.
-    UnicodeString left;
-    int32_t leftCursor = -1, leftAnte = -1, leftPost = -1, leftPostClose = -1;
-
-    UnicodeString scratch;
-
-    while (pos < limit) {
-        UChar c = rules.charAt(pos++);
-        if (Unicode::isWhitespace(c)) {
-            // Ignore whitespace.  Note that this is not Unicode
-            // spaces, but Java spaces -- a subset, representing
-            // whitespace likely to be seen in code.
-            continue;
-        }
-        // Handle escapes
-        if (c == ESCAPE) {
-            if (pos == limit) {
-                return syntaxError(RuleBasedTransliterator::TRAILING_BACKSLASH, rules, start);
-            }
-            // Parse \uXXXX escapes
-            c = rules.charAt(pos++);
-            if (c == 0x0075/*u*/) {
-                if ((pos+4) > limit) {
-                    return syntaxError(RuleBasedTransliterator::MALFORMED_UNICODE_ESCAPE, rules, start);
-                }
-                c = (UChar)0x0000;
-                for (int32_t plim=pos+4; pos<plim; ++pos) { // [sic]
-                    int32_t digit = Unicode::digit(rules.charAt(pos), 16);
-                    if (digit<0) {
-                        return syntaxError(RuleBasedTransliterator::MALFORMED_UNICODE_ESCAPE, rules, start);
-                    }
-                    c = (UChar) ((c << 4) | digit);
-                }
-            }
-
-            buf.append(c);
-            continue;
-        }
-        // Handle quoted matter
-        if (c == QUOTE) {
-            int32_t iq = rules.indexOf(QUOTE, pos);
-            if (iq == pos) {
-                buf.append(c); // Parse [''] outside quotes as [']
-                ++pos;
-            } else {
-                /* This loop picks up a segment of quoted text of the
-                 * form 'aaaa' each time through.  If this segment
-                 * hasn't really ended ('aaaa''bbbb') then it keeps
-                 * looping, each time adding on a new segment.  When it
-                 * reaches the final quote it breaks.
-                 */
-                for (;;) {
-                    if (iq < 0) {
-                        return syntaxError(RuleBasedTransliterator::UNTERMINATED_QUOTE, rules, start);
-                    }
-                    scratch.truncate(0);
-                    rules.extractBetween(pos, iq, scratch);
-                    buf.append(scratch);
-                    pos = iq+1;
-                    if (pos < limit && rules.charAt(pos) == QUOTE) {
-                        // Parse [''] inside quotes as [']
-                        iq = rules.indexOf(QUOTE, pos+1);
-                        // Continue looping
-                    } else {
-                        break;
-                    }
-                }
-            }
-            continue;
-        }
-        if (OPERATORS.indexOf(c) >= 0) {
-            if (op != 0) {
-                return syntaxError(RuleBasedTransliterator::UNQUOTED_SPECIAL, rules, start);
-            }
-            // Found an operator char.  Check for forward-reverse operator.
-            if (c == REVERSE_RULE_OP &&
-                (pos < limit && rules.charAt(pos) == FORWARD_RULE_OP)) {
-                ++pos;
-                op = FWDREV_RULE_OP;
-            } else {
-                op = c;
-            }
-            left = buf; // lhs
-            leftCursor = cursor;
-            leftAnte = ante;
-            leftPost = post;
-            leftPostClose = postClose;
-
-            buf.truncate(0);
-            cursor = ante = post = postClose = -1;
-            continue;
-        }
-        if (c == END_OF_RULE) {
-            break;
-        }
-        switch (c) {
-        case VARIABLE_REF_OPEN:
-            {
-                int32_t j = rules.indexOf(VARIABLE_REF_CLOSE, pos);
-                if (pos == j || j < 0) { // empty or unterminated
-                    return syntaxError(RuleBasedTransliterator::MALFORMED_VARIABLE_REFERENCE, rules, start);
-                }
-                scratch.truncate(0);
-                rules.extractBetween(pos, j, scratch);
-                pos = j+1;
-                UChar v = data->lookupVariable(scratch, status);
-                if (U_FAILURE(status)) {
-                    return syntaxError(RuleBasedTransliterator::UNDEFINED_VARIABLE, rules, start);
-                }
-                buf.append(v);
-            }
-            break;
-        case CONTEXT_OPEN:
-            if (post >= 0) {
-                return syntaxError(RuleBasedTransliterator::MULTIPLE_POST_CONTEXTS, rules, start);
-            }
-            // Ignore CONTEXT_OPEN if buffer length is zero -- that means
-            // this is the optional opening delimiter for the ante context.
-            if (buf.length() > 0) {
-                post = buf.length();
-            }
-            break;
-        case CONTEXT_CLOSE:
-            if (postClose >= 0) {
-                return syntaxError(RuleBasedTransliterator::UNEXPECTED_CLOSE_CONTEXT, rules, start);
-            }
-            if (post >= 0) {
-                // This is probably the optional closing delimiter
-                // for the post context; save the pos and check later.
-                postClose = buf.length();
-            } else if (ante >= 0) {
-                return syntaxError(RuleBasedTransliterator::MULTIPLE_ANTE_CONTEXTS, rules, start);
-            } else {
-                ante = buf.length();
-            }
-            break;
-        case SET_OPEN: {
-            ParsePosition pp(pos-1); // Backup to opening '['
-            buf.append(registerSet(new UnicodeSet(rules, pp, *parseData, status)));
-            if (U_FAILURE(status)) {
-                return syntaxError(RuleBasedTransliterator::MALFORMED_SET, rules, start);
-            }
-            pos = pp.getIndex(); }
-            break;
-        case VARIABLE_REF_CLOSE:
-        case SET_CLOSE:
-            return syntaxError(RuleBasedTransliterator::UNQUOTED_SPECIAL, rules, start);
-        case CURSOR_POS:
-            if (cursor >= 0) {
-                return syntaxError(RuleBasedTransliterator::MULTIPLE_CURSORS, rules, start);
-            }
-            cursor = buf.length();
-            break;
-        default:
-            buf.append(c);
-            break;
-        }
-    }
-    if (op == 0) {
-        return syntaxError(RuleBasedTransliterator::MISSING_OPERATOR, rules, start);
+    undefinedVariableName.remove();
+    pos = left->parse(rule, pos, limit, *this);
+    if (U_FAILURE(status)) {
+        return start;
    }

-    // Check context close parameters
-    if ((leftPostClose >= 0 && leftPostClose != left.length()) ||
-        (postClose >= 0 && postClose != buf.length())) {
-        return syntaxError(RuleBasedTransliterator::TEXT_AFTER_CLOSE_CONTEXT, rules, start);
+    if (pos == limit ||
+        gOPERATORS.indexOf(op = rule.charAt(pos++)) < 0) {
+        return syntaxError(RuleBasedTransliterator::MISSING_OPERATOR, rule, start);
    }

-    // Context is only allowed on the input side; that is, the left side
-    // for forward rules.  Cursors are only allowed on the output side;
-    // that is, the right side for forward rules.  Bidirectional rules
-    // ignore elements that do not apply.
+    // Found an operator char.  Check for forward-reverse operator.
+    if (op == REVERSE_RULE_OP &&
+        (pos < limit && rule.charAt(pos) == FORWARD_RULE_OP)) {
+        ++pos;
+        op = FWDREV_RULE_OP;
+    }

-    switch (op) {
-    case VARIABLE_DEF_OP:
+    pos = right->parse(rule, pos, limit, *this);
+    if (U_FAILURE(status)) {
+        return start;
+    }
+
+    if (pos < limit) {
+        if (rule.charAt(pos) == END_OF_RULE) {
+            ++pos;
+        } else {
+            // RuleHalf parser must have terminated at an operator
+            return syntaxError(RuleBasedTransliterator::UNQUOTED_SPECIAL, rule, start);
+        }
+    }
+
+    if (op == VARIABLE_DEF_OP) {
        // LHS is the name.  RHS is a single character, either a literal
        // or a set (already parsed).  If RHS is longer than one
        // character, it is either a multi-character string, or multiple
        // sets, or a mixture of chars and sets -- syntax error.
-        if (buf.length() != 1) {
-            return syntaxError(RuleBasedTransliterator::MALFORMED_RHS, rules, start);
-        }
-        if (data->isVariableDefined(left)) {
-            return syntaxError(RuleBasedTransliterator::DUPLICATE_VARIABLE_DEFINITION, rules, start);
-        }
-        data->defineVariable(left, buf.charAt(0), status);
-        break;

-    case FORWARD_RULE_OP:
-        if (direction == RuleBasedTransliterator::FORWARD) {
-            if (ante >= 0 || post >= 0 || leftCursor >= 0) {
-                return syntaxError(RuleBasedTransliterator::MALFORMED_RULE, rules, start);
-            }
-            data->ruleSet.addRule(new TransliterationRule(
-                                     left, leftAnte, leftPost,
-                                     buf, cursor, status), status);
-        } // otherwise ignore the rule; it's not the direction we want
-        break;
-
-    case REVERSE_RULE_OP:
-        if (direction == RuleBasedTransliterator::REVERSE) {
-            if (leftAnte >= 0 || leftPost >= 0 || cursor >= 0) {
-                return syntaxError(RuleBasedTransliterator::MALFORMED_RULE, rules, start);
-            }
-            data->ruleSet.addRule(new TransliterationRule(
-                                     buf, ante, post,
-                                     left, leftCursor, status), status);
-        } // otherwise ignore the rule; it's not the direction we want
-        break;
-
-    case FWDREV_RULE_OP:
-        if (direction == RuleBasedTransliterator::FORWARD) {
-            // The output side is the right; trim off any context
-            if (post >= 0) {
-                buf.remove(post);
-            }
-            if (ante >= 0) {
-                buf.removeBetween(0, ante);
-            }
-            data->ruleSet.addRule(new TransliterationRule(
-                                     left, leftAnte, leftPost,
-                                     buf, cursor, status), status);
-        } else {
-            // The output side is the left; trim off any context
-            if (leftPost >= 0) {
-                left.remove(leftPost);
-            }
-            if (leftAnte >= 0) {
-                left.removeBetween(0, leftAnte);
-            }
-            data->ruleSet.addRule(new TransliterationRule(
-                                     buf, ante, post,
-                                     left, leftCursor, status), status);
+        // We expect to see a single undefined variable (the one being
+        // defined).
+        if (undefinedVariableName.length() == 0) {
+            // "Missing '$' or duplicate definition"
+            return syntaxError(RuleBasedTransliterator::BAD_VARIABLE_DEFINITION, rule, start);
        }
-        break;
+        if (left->text.length() != 1 || left->text.charAt(0) != variableLimit) {
+            // "Malformed LHS"
+            return syntaxError(RuleBasedTransliterator::MALFORMED_VARIABLE_DEFINITION, rule, start);
+        }
+        // We allow anything on the right, including an empty string.
+        UnicodeString* value = new UnicodeString(right->text);
+        data->variableNames->put(undefinedVariableName, value, status);
+
+        ++variableLimit;
+        return pos;
    }

+    // If this is not a variable definition rule, we shouldn't have
+    // any undefined variable names.
+    if (undefinedVariableName.length() != 0) {
+        syntaxError(// "Undefined variable $" + undefinedVariableName,
+                    RuleBasedTransliterator::UNDEFINED_VARIABLE,
+                    rule, start);
+    }
+
+    // If the direction we want doesn't match the rule
+    // direction, do nothing.
+    if (op != FWDREV_RULE_OP &&
+        ((direction == Transliterator::FORWARD) != (op == FORWARD_RULE_OP))) {
+        return pos;
+    }
+
+    // Transform the rule into a forward rule by swapping the
+    // sides if necessary.
+    if (direction == Transliterator::REVERSE) {
+        left = &_right;
+        right = &_left;
+    }
+
+    // Remove non-applicable elements in forward-reverse
+    // rules.  Bidirectional rules ignore elements that do not
+    // apply.
+    if (op == FWDREV_RULE_OP) {
+        right->removeContext();
+        delete right->segments;
+        right->segments = NULL;
+        left->cursor = left->maxRef = -1;
+        left->cursorOffset = 0;
+    }
+
+    // Normalize context
+    if (left->ante < 0) {
+        left->ante = 0;
+    }
+    if (left->post < 0) {
+        left->post = left->text.length();
+    }
+
+    // Context is only allowed on the input side.  Cursors are only
+    // allowed on the output side.  Segment delimiters can only appear
+    // on the left, and references on the right.  Cursor offset
+    // cannot appear without an explicit cursor.  Cursor offset
+    // cannot place the cursor outside the limits of the context.
+    if (right->ante >= 0 || right->post >= 0 || left->cursor >= 0 ||
+        right->segments != NULL || left->maxRef >= 0 ||
+        (right->cursorOffset != 0 && right->cursor < 0) ||
+        (right->cursorOffset > (left->text.length() - left->post)) ||
+        (-right->cursorOffset > left->ante)) {
+        return syntaxError(RuleBasedTransliterator::MALFORMED_RULE, rule, start);
+    }
+
+    // Check integrity of segments and segment references.  Each
+    // segment's start must have a corresponding limit, and the
+    // references must not refer to segments that do not exist.
+    if (left->segments != NULL) {
+        int n = left->segments->size();
+        if (n % 2 != 0) {
+            return syntaxError(RuleBasedTransliterator::MISSING_SEGMENT_CLOSE, rule, start);
+        }
+        n /= 2;
+        if (right->maxRef > n) {
+            return syntaxError(RuleBasedTransliterator::UNDEFINED_SEGMENT_REFERENCE, rule, start);
+        }
+    }
+
+    data->ruleSet.addRule(new TransliterationRule(
+                                 left->text, left->ante, left->post,
+                                 right->text, right->cursor, right->cursorOffset,
+                                 left->createSegments(), status), status);
+
    return pos;
 }

@ -474,6 +735,9 @@ int32_t TransliterationRuleParser::syntaxError(int32_t parseErrorCode,
        if (end < 0) {
            end = rule.length();
        }
+        if (end > (start + 80)) { // In case end wasn't found
+            end = start + 80;
+        }
        rule.extractBetween(start, end, parseError->context); // Current rule
    }
    status = U_ILLEGAL_ARGUMENT_ERROR;
@ -481,20 +745,52 @@ int32_t TransliterationRuleParser::syntaxError(int32_t parseErrorCode,
 }

 /**
- * Allocate a private-use substitution character for the given set,
- * register it in the setVariables hash, and return the substitution
- * character.
+ * Parse a UnicodeSet out, store it, and return the stand-in character
+ * used to represent it.
 */
-UChar TransliterationRuleParser::registerSet(UnicodeSet* adoptedSet) {
+UChar TransliterationRuleParser::parseSet(const UnicodeString& rule,
+                                          ParsePosition& pos) {
+    UnicodeSet* set = new UnicodeSet(rule, pos, *parseData, status);
    if (variableNext >= variableLimit) {
        // throw new RuntimeException("Private use variables exhausted");
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }
-    setVariablesVector.addElement(adoptedSet);
+    setVariablesVector.addElement(set);
    return variableNext++;
 }

+/**
+ * Append the value of the given variable name to the given
+ * UnicodeString.
+ */
+void TransliterationRuleParser::appendVariableDef(const UnicodeString& name,
+                                                  UnicodeString& buf) {
+    const UnicodeString* s = (const UnicodeString*) data->variableNames->get(name);
+    if (s == NULL) {
+        // We allow one undefined variable so that variable definition
+        // statements work.  For the first undefined variable we return
+        // the special placeholder variableLimit-1, and save the variable
+        // name.
+        if (undefinedVariableName.length() == 0) {
+            undefinedVariableName = name;
+            if (variableNext >= variableLimit) {
+                // throw new RuntimeException("Private use variables exhausted");
+                status = U_ILLEGAL_ARGUMENT_ERROR;
+                return;
+            }
+            buf.append((UChar) --variableLimit);
+        } else {
+            //throw new IllegalArgumentException("Undefined variable $"
+            //                                   + name);
+            status = U_ILLEGAL_ARGUMENT_ERROR;
+            return;
+        }
+    } else {
+        buf.append(*s);
+    }
+}
+
 /**
 * Determines what part of the private use region of Unicode we can use for
 * variable stand-ins.  The correct way to do this is as follows: Parse each
@ -511,7 +807,9 @@ void TransliterationRuleParser::determineVariableRange(void) {
    data->setVariablesBase = variableNext = variableLimit = (UChar) 0;
    
    if (r != 0) {
-        data->setVariablesBase = variableNext = r->start;
+        // Allocate 9 characters for segment references 1 through 9
+        data->segmentBase = r->start;
+        data->setVariablesBase = variableNext = (UChar) (data->segmentBase + 9);
        variableLimit = (UChar) (r->start + r->length);
        delete r;
    }
--- a/icu4c/source/i18n/rbt_pars.h
+++ b/icu4c/source/i18n/rbt_pars.h
@ -15,6 +15,8 @@
 class TransliterationRuleData;
 class UnicodeSet;
 class ParseData;
+class RuleHalf;
+class ParsePosition;

 class TransliterationRuleParser {

@ -67,25 +69,16 @@ class TransliterationRuleParser {
     */
    UChar variableLimit;

-    // Operators
-    static const UChar VARIABLE_DEF_OP;
-    static const UChar FORWARD_RULE_OP;
-    static const UChar REVERSE_RULE_OP;
-    static const UChar FWDREV_RULE_OP; // internal rep of <> op
-    static const UnicodeString OPERATORS;
+    /**
+     * When we encounter an undefined variable, we do not immediately signal
+     * an error, in case we are defining this variable, e.g., "$a = [a-z];".
+     * Instead, we save the name of the undefined variable, and substitute
+     * in the placeholder char variableLimit - 1, and decrement
+     * variableLimit.
+     */
+    UnicodeString undefinedVariableName;

-    // Other special characters
-    static const UChar QUOTE;
-    static const UChar ESCAPE;
-    static const UChar END_OF_RULE;
-    static const UChar RULE_COMMENT_CHAR;
-    static const UChar VARIABLE_REF_OPEN;
-    static const UChar VARIABLE_REF_CLOSE;
-    static const UChar CONTEXT_OPEN;
-    static const UChar CONTEXT_CLOSE;
-    static const UChar SET_OPEN;
-    static const UChar SET_CLOSE;
-    static const UChar CURSOR_POS;
+    static const UnicodeString gOPERATORS;

 public:

@ -151,8 +144,22 @@ private:
     * register it in the setVariables hash, and return the substitution
     * character.
     */
-    UChar registerSet(UnicodeSet* adoptedSet);
+    //UChar registerSet(UnicodeSet* adoptedSet);
 
+    /**
+     * Parse a UnicodeSet out, store it, and return the stand-in character
+     * used to represent it.
+     */
+    UChar parseSet(const UnicodeString& rule,
+                   ParsePosition& pos);
+
+    /**
+     * Append the value of the given variable name to the given
+     * UnicodeString.
+     */
+    void appendVariableDef(const UnicodeString& name,
+                           UnicodeString& buf);
+        
    /**
     * Determines what part of the private use region of Unicode we can use for
     * variable stand-ins.  The correct way to do this is as follows: Parse each
@ -178,6 +185,8 @@ private:
    static int32_t quotedIndexOf(const UnicodeString& text,
                                 int32_t start, int32_t limit,
                                 UChar c);
+
+    friend class RuleHalf;
 };

 #endif
--- a/icu4c/source/i18n/rbt_rule.cpp
+++ b/icu4c/source/i18n/rbt_rule.cpp
@ -13,6 +13,38 @@
 #include "unicode/unifilt.h"
 #include "unicode/uniset.h"

+/**
+ * Construct a new rule with the given input, output text, and other
+ * attributes.  A cursor position may be specified for the output text.
+ * @param input input string, including key and optional ante and
+ * post context
+ * @param anteContextPos offset into input to end of ante context, or -1 if
+ * none.  Must be <= input.length() if not -1.
+ * @param postContextPos offset into input to start of post context, or -1
+ * if none.  Must be <= input.length() if not -1, and must be >=
+ * anteContextPos.
+ * @param output output string
+ * @param cursorPos offset into output at which cursor is located, or -1 if
+ * none.  If less than zero, then the cursor is placed after the
+ * <code>output</code>; that is, -1 is equivalent to
+ * <code>output.length()</code>.  If greater than
+ * <code>output.length()</code> then an exception is thrown.
+ * @param adoptedSegs array of 2n integers.  Each of n pairs consists of offset,
+ * limit for a segment of the input string.  Characters in the output string
+ * refer to these segments if they are in a special range determined by the
+ * associated RuleBasedTransliterator.Data object.  May be null if there are
+ * no segments.
+ */
+TransliterationRule::TransliterationRule(const UnicodeString& input,
+                                         int32_t anteContextPos, int32_t postContextPos,
+                                         const UnicodeString& output,
+                                         int32_t cursorPos, int32_t cursorOffset,
+                                         int32_t* adoptedSegs,
+                                         UErrorCode& status) {
+    init(input, anteContextPos, postContextPos,
+         output, cursorPos, cursorOffset, adoptedSegs, status);
+}
+
 /**
 * Construct a new rule with the given input, output text, and other
 * attributes.  A cursor position may be specified for the output text.
@ -35,6 +67,16 @@ TransliterationRule::TransliterationRule(const UnicodeString& input,
                                         const UnicodeString& output,
                                         int32_t cursorPos,
                                         UErrorCode& status) {
+    init(input, anteContextPos, postContextPos,
+         output, cursorPos, 0, NULL, status);
+}
+
+void TransliterationRule::init(const UnicodeString& input,
+                               int32_t anteContextPos, int32_t postContextPos,
+                               const UnicodeString& output,
+                               int32_t cursorPos, int32_t cursorOffset,
+                               int32_t* adoptedSegs,
+                               UErrorCode& status) {
    if (U_FAILURE(status)) {
        return;
    }
@ -61,35 +103,24 @@ TransliterationRule::TransliterationRule(const UnicodeString& input,
        keyLength = postContextPos - anteContextLength;
    }
    if (cursorPos < 0) {
-        this->cursorPos = output.length();
+        cursorPos = output.length();
    } else {
        if (cursorPos > output.length()) {
            // throw new IllegalArgumentException("Invalid cursor position");
            status = U_ILLEGAL_ARGUMENT_ERROR;
            return;
        }
-        this->cursorPos = cursorPos;
    }
+    this->cursorPos = cursorPos + cursorOffset;
    pattern = input;
    this->output = output;
+    // We don't validate the segments array.  The caller must
+    // guarantee that the segments are well-formed.
+    this->segments = adoptedSegs;
 }

-TransliterationRule::~TransliterationRule() {}
-
-/**
- * Return the length of the key.  Equivalent to <code>getKey().length()</code>.
- * @return the length of the match key.
- */
-int32_t TransliterationRule::getKeyLength(void) const {
-    return keyLength;
-}
-
-/**
- * Return the output string.
- * @return the output string.
- */
-const UnicodeString& TransliterationRule::getOutput(void) const {
-    return output;
+TransliterationRule::~TransliterationRule() {
+    delete[] segments;
 }

 /**
@ -115,7 +146,7 @@ int32_t TransliterationRule::getAnteContextLength(void) const {
 * unless the first character of the key is a set.  If it's a
 * set, or otherwise can match multiple keys, the index value is -1.
 */
-int16_t TransliterationRule::getIndexValue(const TransliterationRuleData& data) {
+int16_t TransliterationRule::getIndexValue(const TransliterationRuleData& data) const {
    if (anteContextLength == pattern.length()) {
        // A pattern with just ante context {such as foo)>bar} can
        // match any key.
@ -125,6 +156,71 @@ int16_t TransliterationRule::getIndexValue(const TransliterationRuleData& data)
    return data.lookupSet(c) == NULL ? (c & 0xFF) : -1;
 }

+/**
+ * Do a replacement of the input pattern with the output text in
+ * the given string, at the given offset.  This method assumes
+ * that a match has already been found in the given text at the
+ * given position.
+ * @param text the text containing the substring to be replaced
+ * @param offset the offset into the text at which the pattern
+ * matches.  This is the offset to the point after the ante
+ * context, if any, and before the match string and any post
+ * context.
+ * @param data the RuleBasedTransliterator.Data object specifying
+ * context for this transliterator.
+ * @return the change in the length of the text
+ */
+int32_t TransliterationRule::replace(Replaceable& text, int32_t offset,
+                                     const TransliterationRuleData& data) const {
+    if (segments == NULL) {
+        text.handleReplaceBetween(offset, offset + keyLength, output);
+        return output.length() - keyLength;
+    } else {
+        /* When there are segments to be copied, use the Replaceable.copy()
+         * API in order to retain out-of-band data.  Copy everything to the
+         * point after the key, then delete the key.  That is, copy things
+         * into offset + keyLength, then replace offset .. offset +
+         * keyLength with the empty string.
+         *
+         * Minimize the number of calls to Replaceable.replace() and
+         * Replaceable.copy().
+         */
+        int32_t textStart = offset - anteContextLength;
+        int32_t dest = offset + keyLength; // copy new text to here
+        UnicodeString buf;
+        for (int32_t i=0; i<output.length(); ++i) {
+            UChar c = output.charAt(i);
+            int32_t b = data.lookupSegmentReference(c);
+            if (b < 0) {
+                // Accumulate straight (non-segment) text.
+                buf.append(c);
+            } else {
+                // Insert any accumulated straight text.
+                if (buf.length() > 0) {
+                    text.handleReplaceBetween(dest, dest, buf);
+                    dest += buf.length();
+                    buf.remove();
+                }
+                // Copy segment with out-of-band data
+                b *= 2;
+                text.copy(textStart + segments[b],
+                          textStart + segments[b+1], dest);
+                dest += segments[b+1] - segments[b];
+            }
+
+        }
+        // Insert any accumulated straight text.
+        if (buf.length() > 0) {
+            text.handleReplaceBetween(dest, dest, buf);
+            dest += buf.length();
+        }
+        // Delete the key
+        buf.remove();
+        text.handleReplaceBetween(offset, offset + keyLength, buf);
+        return dest - (offset + keyLength) - keyLength;
+    }
+}
+
 /**
 * Internal method.  Returns true if this rule matches the given
 * index value.  The index value is an 8-bit integer, 0..255,
@ -136,7 +232,7 @@ int16_t TransliterationRule::getIndexValue(const TransliterationRuleData& data)
 * then it will match any key.
 */
 UBool TransliterationRule::matchesIndexValue(uint8_t v,
-                                   const TransliterationRuleData& data) {
+                                   const TransliterationRuleData& data) const {
    if (anteContextLength == pattern.length()) {
        // A pattern with just ante context {such as foo)>bar} can
        // match any key.
--- a/icu4c/source/i18n/rbt_rule.h
+++ b/icu4c/source/i18n/rbt_rule.h
@ -86,6 +86,21 @@ private:
     */
    UnicodeString output;

+    /**
+     * Array of segments.  These are segments of the input string that may be
+     * referenced and appear in the output string.  Each segment is stored as an
+     * offset, limit pair.  Segments are referenced by a 1-based index;
+     * reference i thus includes characters at offset segments[2*i-2] to
+     * segments[2*i-1]-1 in the pattern string.
+     *
+     * In the output string, a segment reference is indicated by a character in
+     * a special range, as defined by RuleBasedTransliterator.Data.
+     *
+     * Most rules have no segments, in which case segments is null, and the
+     * output string need not be checked for segment reference characters.
+     */
+    int32_t* segments;
+
    /**
     * The length of the string that must match before the key.  If
     * zero, then there is no matching requirement before the key.
@ -109,6 +124,35 @@ private:

 public:

+    /**
+     * Construct a new rule with the given input, output text, and other
+     * attributes.  A cursor position may be specified for the output text.
+     * @param input input string, including key and optional ante and
+     * post context
+     * @param anteContextPos offset into input to end of ante context, or -1 if
+     * none.  Must be <= input.length() if not -1.
+     * @param postContextPos offset into input to start of post context, or -1
+     * if none.  Must be <= input.length() if not -1, and must be >=
+     * anteContextPos.
+     * @param output output string
+     * @param cursorPos offset into output at which cursor is located, or -1 if
+     * none.  If less than zero, then the cursor is placed after the
+     * <code>output</code>; that is, -1 is equivalent to
+     * <code>output.length()</code>.  If greater than
+     * <code>output.length()</code> then an exception is thrown.
+     * @param adoptedSegs array of 2n integers.  Each of n pairs consists of offset,
+     * limit for a segment of the input string.  Characters in the output string
+     * refer to these segments if they are in a special range determined by the
+     * associated RuleBasedTransliterator.Data object.  May be null if there are
+     * no segments.
+     */
+    TransliterationRule(const UnicodeString& input,
+                        int32_t anteContextPos, int32_t postContextPos,
+                        const UnicodeString& output,
+                        int32_t cursorPos, int32_t cursorOffset,
+                        int32_t* adoptedSegs,
+                        UErrorCode& status);
+
    /**
     * Construct a new rule with the given input, output text, and other
     * attributes.  A cursor position may be specified for the output text.
@ -137,18 +181,6 @@ public:
     */
    virtual ~TransliterationRule();

-    /**
-     * Return the length of the key.  Equivalent to <code>getKey().length()</code>.
-     * @return the length of the match key.
-     */
-    virtual int32_t getKeyLength(void) const;
-
-    /**
-     * Return the output string.
-     * @return the output string.
-     */
-    virtual const UnicodeString& getOutput(void) const;
-
    /**
     * Return the position of the cursor within the output string.
     * @return a value from 0 to <code>getOutput().length()</code>, inclusive.
@ -168,7 +200,24 @@ public:
     * unless the first character of the key is a set.  If it's a
     * set, or otherwise can match multiple keys, the index value is -1.
     */
-    int16_t getIndexValue(const TransliterationRuleData& data);
+    int16_t getIndexValue(const TransliterationRuleData& data) const;
+
+    /**
+     * Do a replacement of the input pattern with the output text in
+     * the given string, at the given offset.  This method assumes
+     * that a match has already been found in the given text at the
+     * given position.
+     * @param text the text containing the substring to be replaced
+     * @param offset the offset into the text at which the pattern
+     * matches.  This is the offset to the point after the ante
+     * context, if any, and before the match string and any post
+     * context.
+     * @param data the RuleBasedTransliterator.Data object specifying
+     * context for this transliterator.
+     * @return the change in the length of the text
+     */
+    int32_t replace(Replaceable& text, int32_t offset,
+                    const TransliterationRuleData& data) const;

    /**
     * Internal method.  Returns true if this rule matches the given
@ -181,7 +230,7 @@ public:
     * then it will match any key.
     */
    UBool matchesIndexValue(uint8_t v,
-                             const TransliterationRuleData& data);
+                             const TransliterationRuleData& data) const;

    /**
     * Return true if this rule masks another rule.  If r1 masks r2 then
@ -289,6 +338,15 @@ public:
    virtual UBool charMatches(UChar keyChar, UChar textChar,
                               const TransliterationRuleData& data,
                               const UnicodeFilter* filter) const;
+
+private:
+
+    void init(const UnicodeString& input,
+              int32_t anteContextPos, int32_t postContextPos,
+              const UnicodeString& output,
+              int32_t cursorPos, int32_t cursorOffset,
+              int32_t* adoptedSegs,
+              UErrorCode& status);
 };

 #endif
--- a/icu4c/source/i18n/symtable.h
+++ b/icu4c/source/i18n/symtable.h
@ -10,20 +10,56 @@
 #ifndef SYMTABLE_H
 #define SYMTABLE_H

+class ParsePosition;
+class UnicodeSet;
+class UnicodeString;
+
 /**
- * An abstract class that maps strings to objects.
+ * An interface that maps strings to objects.  This interface defines
+ * both lookup protocol and parsing.  This allows different components
+ * to share a symbol table and to handle name parsing uniformly.  It
+ * is expected that client parse code look for the SYMBOL_REF
+ * character and, when seen, attempt to parse the characters after it
+ * using parseReference().
+ *
+ * <p>Currently, RuleBasedTransliterator and UnicodeSet use this
+ * interface to share variable definitions.
 */
 class SymbolTable {
 public:

    /**
-     * Lookup the object associated with this string and return it.
-     * Return U_ILLEGAL_ARGUMENT_ERROR status if the name does not
-     * exist.  Return a non-NULL set if the name is mapped to a set;
-     * otherwise return a NULL set.
+     * The character preceding a symbol reference name.
     */
-    virtual void lookup(const UnicodeString& name, UChar& c, UnicodeSet*& set,
-                        UErrorCode& status) const = 0;
+    enum { SYMBOL_REF = 0x0024 /*$*/ };
+
+    /**
+     * Lookup the characters associated with this string and return it.
+     * Return <tt>NULL</tt> if no such name exists.  The resultant
+     * string may have length zero.
+     */
+    virtual const UnicodeString* lookup(const UnicodeString& s) const = 0;
+
+    /**
+     * Lookup the UnicodeSet associated with the given character, and
+     * return it.  Return <tt>null</tt> if not found.
+     */
+    virtual const UnicodeSet* lookupSet(UChar ch) const = 0;
+
+    /**
+     * Parse a symbol reference name from the given string, starting
+     * at the given position.  If no valid symbol reference name is
+     * found, throw an exception.
+     * @param text the text to parse for the name
+     * @param pos on entry, the index of the first character to parse.
+     * This is the character following the SYMBOL_REF character.  On
+     * exit, the index after the last parsed character.
+     * @param limit the index after the last character to be parsed.
+     * @return the parsed name.
+     * @exception IllegalArgumentException if no valid name is found.
+     */
+    virtual UnicodeString parseReference(const UnicodeString& text,
+                                         ParsePosition& pos, int32_t limit) const = 0;
 };

 #endif
--- a/icu4c/source/i18n/unicode/rbt.h
+++ b/icu4c/source/i18n/unicode/rbt.h
@ -17,194 +17,237 @@
 class TransliterationRuleData;

 /**
- * A transliterator that reads a set of rules in order to determine how to perform
- * translations. Rules are stored in resource bundles indexed by name. Rules are separated by
- * semicolons (';'). To include a literal semicolon, prefix it with a backslash ('\;').
- * Whitespace, as defined by <code>Character.isWhitespace()</code>, is ignored. If the first
- * non-blank character on a line is '#', the entire line is ignored as a comment. </p>
+ * <code>RuleBasedTransliterator</code> is a transliterator
+ * that reads a set of rules in order to determine how to perform
+ * translations. Rule sets are stored in resource bundles indexed by
+ * name. Rules within a rule set are separated by semicolons (';').
+ * To include a literal semicolon, prefix it with a backslash ('\').
+ * Whitespace, as defined by <code>Character.isWhitespace()</code>,
+ * is ignored. If the first non-blank character on a line is '#',
+ * the entire line is ignored as a comment. </p>
 * 
- * <p>Each set of rules consists of two groups, one forward, and one reverse. This is a
- * convention that is not enforced; rules for one direction may be omitted, with the result
- * that translations in that direction will not modify the source text. </p>
+ * <p>Each set of rules consists of two groups, one forward, and one
+ * reverse. This is a convention that is not enforced; rules for one
+ * direction may be omitted, with the result that translations in
+ * that direction will not modify the source text. In addition,
+ * bidirectional forward-reverse rules may be specified for
+ * symmetrical transformations.</p>
 * 
 * <p><b>Rule syntax</b> </p>
 * 
- * <p>Rule statements take one of the following forms: 
+ * <p>Rule statements take one of the following forms: </p>
 * 
 * <dl>
- *   <dt><code>alefmadda=\u0622</code></dt>
- *   <dd><strong>Variable definition.</strong> The name on the left is assigned the character or
- *     expression on the right. Names may not contain any special characters (see list below).
- *     Duplicate names (including duplicates of simple variables or category names) cause an
- *     exception to be thrown. If the right hand side consists of one character, then the
- *     variable stands for that character. In this example, after this statement, instances of
- *     the left hand name surrounded by braces, &quot;<code>{alefmadda}</code>&quot;, will be
- *     replaced by the Unicode character U+0622. If the right hand side is longer than one
- *     character, then it is interpreted as a character category expression; see below for
- *     details.</dd>
- *   <dt>&nbsp;</dt>
- *   <dt><code>softvowel=[eiyEIY]</code></dt>
- *   <dd><strong>Category definition.</strong> The name on the left is assigned to stand for a
- *     set of characters. The same rules for names of simple variables apply. After this
- *     statement, the left hand variable will be interpreted as indicating a set of characters in
- *     appropriate contexts. The pattern syntax defining sets of characters is defined by {@link
- *     UnicodeSet}. Examples of valid patterns are:<table>
- *       <tr valign="top">
- *         <td nowrap><code>[abc]</code></td>
- *         <td>The set containing the characters 'a', 'b', and 'c'.</td>
- *       </tr>
- *       <tr valign="top">
- *         <td nowrap><code>[^abc]</code></td>
- *         <td>The set of all characters <em>except</em> 'a', 'b', and 'c'.</td>
- *       </tr>
- *       <tr valign="top">
- *         <td nowrap><code>[A-Z]</code></td>
- *         <td>The set of all characters from 'A' to 'Z' in Unicode order.</td>
- *       </tr>
- *       <tr valign="top">
- *         <td nowrap><code>[:Lu:]</code></td>
- *         <td>The set of Unicode uppercase letters. See <a href="http://www.unicode.org">www.unicode.org</a>
- *         for a complete list of categories and their two-letter codes.</td>
- *       </tr>
- *       <tr valign="top">
- *         <td nowrap><code>[^a-z[:Lu:][:Ll:]]</code></td>
- *         <td>The set of all characters <em>except</em> 'a' through 'z' and uppercase or lowercase
- *         letters.</td>
- *       </tr>
- *     </table>
- *     <p>See {@link UnicodeSet} for more documentation and examples. </p>
- *   </dd>
- *   <dt><code>ai&gt;{alefmadda}</code></dt>
- *   <dd><strong>Forward translation rule.</strong> This rule states that the string on the left
- *     will be changed to the string on the right when performing forward transliteration.</dd>
- *   <dt>&nbsp;</dt>
- *   <dt><code>ai&lt;{alefmadda}</code></dt>
- *   <dd><strong>Reverse translation rule.</strong> This rule states that the string on the right
- *     will be changed to the string on the left when performing reverse transliteration.</dd>
+ *     <dt><code>$alefmadda=\u0622;</code></dt>
+ *     <dd><strong>Variable definition.</strong> The name on the
+ *         left is assigned the text on the right. In this example,
+ *         after this statement, instances of the left hand name,
+ *         &quot;<code>$alefmadda</code>&quot;, will be replaced by
+ *         the Unicode character U+0622. Variable names must begin
+ *         with a letter and consist only of letters, digits, and
+ *         underscores. Case is significant. Duplicate names cause
+ *         an exception to be thrown, that is, variables cannot be
+ *         redefined. The right hand side may contain well-formed
+ *         text of any length, including no text at all (&quot;<code>$empty=;</code>&quot;).
+ *         The right hand side may contain embedded <code>UnicodeSet</code>
+ *         patterns, for example, &quot;<code>$softvowel=[eiyEIY]</code>&quot;.</dd>
+ *     <dd>&nbsp;</dd>
+ *     <dt><code>ai&gt;$alefmadda;</code></dt>
+ *     <dd><strong>Forward translation rule.</strong> This rule
+ *         states that the string on the left will be changed to the
+ *         string on the right when performing forward
+ *         transliteration.</dd>
+ *     <dt>&nbsp;</dt>
+ *     <dt><code>ai&lt;$alefmadda;</code></dt>
+ *     <dd><strong>Reverse translation rule.</strong> This rule
+ *         states that the string on the right will be changed to
+ *         the string on the left when performing reverse
+ *         transliteration.</dd>
 * </dl>
 * 
 * <dl>
- *   <dt><code>ai&lt;&gt;{alefmadda}</code></dt>
- *   <dd><strong>Bidirectional translation rule.</strong> This rule states that the string on the
- *     right will be changed to the string on the left when performing forward transliteration,
- *     and vice versa when performing reverse transliteration.</dd>
+ *     <dt><code>ai&lt;&gt;$alefmadda;</code></dt>
+ *     <dd><strong>Bidirectional translation rule.</strong> This
+ *         rule states that the string on the right will be changed
+ *         to the string on the left when performing forward
+ *         transliteration, and vice versa when performing reverse
+ *         transliteration.</dd>
 * </dl>
 * 
- * <p>Forward and reverse translation rules consist of a <em>match pattern</em> and an <em>output
- * string</em>. The match pattern consists of literal characters, optionally preceded by
- * context, and optionally followed by context. Context characters, like literal pattern
- * characters, must be matched in the text being transliterated. However, unlike literal
- * pattern characters, they are not replaced by the output text. For example, the pattern
- * &quot;<code>(abc)def</code>&quot; indicates the characters &quot;<code>def</code>&quot;
- * must be preceded by &quot;<code>abc</code>&quot; for a successful match. If there is a
- * successful match, &quot;<code>def</code>&quot; will be replaced, but not &quot;<code>abc</code>&quot;.
- * The initial '<code>(</code>' is optional, so &quot;<code>abc)def</code>&quot; is
- * equivalent to &quot;<code>(abc)def</code>&quot;. Another example is &quot;<code>123(456)</code>&quot;
- * (or &quot;<code>123(456</code>&quot;) in which the literal pattern &quot;<code>123</code>&quot;
- * must be followed by &quot;<code>456</code>&quot;. </p>
+ * <p>Translation rules consist of a <em>match pattern</em> and an <em>output
+ * string</em>. The match pattern consists of literal characters,
+ * optionally preceded by context, and optionally followed by
+ * context. Context characters, like literal pattern characters,
+ * must be matched in the text being transliterated. However, unlike
+ * literal pattern characters, they are not replaced by the output
+ * text. For example, the pattern &quot;<code>abc{def}</code>&quot;
+ * indicates the characters &quot;<code>def</code>&quot; must be
+ * preceded by &quot;<code>abc</code>&quot; for a successful match.
+ * If there is a successful match, &quot;<code>def</code>&quot; will
+ * be replaced, but not &quot;<code>abc</code>&quot;. The final '<code>}</code>'
+ * is optional, so &quot;<code>abc{def</code>&quot; is equivalent to
+ * &quot;<code>abc{def}</code>&quot;. Another example is &quot;<code>{123}456</code>&quot;
+ * (or &quot;<code>123}456</code>&quot;) in which the literal
+ * pattern &quot;<code>123</code>&quot; must be followed by &quot;<code>456</code>&quot;.
+ * </p>
 * 
- * <p>The output string of a forward or reverse rule consists of characters to replace the
- * literal pattern characters. If the output string contains the character '<code>|</code>',
- * this is taken to indicate the location of the <em>cursor</em> after replacement. The
- * cursor is the point in the text at which the next replacement, if any, will be applied. </p>
- * 
- * <p>In addition to being defined in variables, <code>UnicodeSet</code> patterns may be
- * embedded directly into rule strings. Thus, the following two rules are equivalent:</p>
+ * <p>The output string of a forward or reverse rule consists of
+ * characters to replace the literal pattern characters. If the
+ * output string contains the character '<code>|</code>', this is
+ * taken to indicate the location of the <em>cursor</em> after
+ * replacement. The cursor is the point in the text at which the
+ * next replacement, if any, will be applied. The cursor is usually
+ * placed within the replacement text; however, it can actually be
+ * placed into the precending or following context by using the
+ * special character '<code>@</code>'. Examples:</p>
 * 
 * <blockquote>
- *   <p><code>vowel=[aeiou]; {vowel}&gt;*; # One way to do this<br>
- *   [aeiou]&gt;*;
- *   &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; #
- *   Another way</code></p>
+ *     <p><code>a {foo} z &gt; | @ bar; # foo -&gt; bar, move cursor
+ *     before a<br>
+ *     {foo} xyz &gt; bar @@|; #&nbsp;foo -&gt; bar, cursor between
+ *     y and z</code></p>
 * </blockquote>
 * 
+ * <p><b>UnicodeSet</b></p>
+ * 
+ * <p><code>UnicodeSet</code> patterns may appear anywhere that
+ * makes sense. They may appear in variable definitions.
+ * Contrariwise, <code>UnicodeSet</code> patterns may themselves
+ * contain variable references, such as &quot;<code>$a=[a-z];$not_a=[^$a]</code>&quot;,
+ * or &quot;<code>$range=a-z;$ll=[$range]</code>&quot;.</p>
+ * 
+ * <p><code>UnicodeSet</code> patterns may also be embedded directly
+ * into rule strings. Thus, the following two rules are equivalent:</p>
+ * 
+ * <blockquote>
+ *     <p><code>$vowel=[aeiou]; $vowel&gt;'*'; # One way to do this<br>
+ *     [aeiou]&gt;'*';
+ *     &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;#
+ *     Another way</code></p>
+ * </blockquote>
+ * 
+ * <p>See {@link UnicodeSet} for more documentation and examples.</p>
+ * 
+ * <p><b>Segments</b></p>
+ * 
+ * <p>Segments of the input string can be matched and copied to the
+ * output string. This makes certain sets of rules simpler and more
+ * general, and makes reordering possible. For example:</p>
+ * 
+ * <blockquote>
+ *     <p><code>([a-z]) &gt; $1 $1;
+ *     &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;#
+ *     double lowercase letters<br>
+ *     ([:Lu:]) ([:Ll:]) &gt; $2 $1; # reverse order of Lu-Ll pairs</code></p>
+ * </blockquote>
+ * 
+ * <p>The segment of the input string to be copied is delimited by
+ * &quot;<code>(</code>&quot; and &quot;<code>)</code>&quot;. Up to
+ * nine segments may be defined. Segments may not overlap. In the
+ * output string, &quot;<code>$1</code>&quot; through &quot;<code>$9</code>&quot;
+ * represent the input string segments, in left-to-right order of
+ * definition.</p>
+ * 
 * <p><b>Example</b> </p>
 * 
- * <p>The following example rules illustrate many of the features of the rule language. </p>
+ * <p>The following example rules illustrate many of the features of
+ * the rule language. </p>
 * 
- * <table cellpadding="4">
- *   <tr valign="top">
- *     <td>Rule 1.</td>
- *     <td nowrap><code>(abc)def&gt;x|y</code></td>
- *   </tr>
- *   <tr valign="top">
- *     <td>Rule 2.</td>
- *     <td nowrap><code>xyz&gt;r</code></td>
- *   </tr>
- *   <tr valign="top">
- *     <td>Rule 3.</td>
- *     <td nowrap><code>yz&gt;q</code></td>
- *   </tr>
+ * <table border="0" cellpadding="4">
+ *     <tr>
+ *         <td valign="top">Rule 1.</td>
+ *         <td valign="top" nowrap><code>abc{def}&gt;x|y</code></td>
+ *     </tr>
+ *     <tr>
+ *         <td valign="top">Rule 2.</td>
+ *         <td valign="top" nowrap><code>xyz&gt;r</code></td>
+ *     </tr>
+ *     <tr>
+ *         <td valign="top">Rule 3.</td>
+ *         <td valign="top" nowrap><code>yz&gt;q</code></td>
+ *     </tr>
 * </table>
 * 
- * <p>Applying these rules to the string &quot;<code>adefabcdefz</code>&quot; yields the
- * following results: </p>
+ * <p>Applying these rules to the string &quot;<code>adefabcdefz</code>&quot;
+ * yields the following results: </p>
 * 
- * <table cellpadding="4">
- *   <tr valign="top">
- *     <td nowrap><code>|adefabcdefz</code></td>
- *     <td>Initial state, no rules match. Advance cursor.</td>
- *   </tr>
- *   <tr valign="top">
- *     <td nowrap><code>a|defabcdefz</code></td>
- *     <td>Still no match. Rule 1 does not match because the preceding context is not present.</td>
- *   </tr>
- *   <tr valign="top">
- *     <td nowrap><code>ad|efabcdefz</code></td>
- *     <td>Still no match. Keep advancing until there is a match...</td>
- *   </tr>
- *   <tr valign="top">
- *     <td nowrap><code>ade|fabcdefz</code></td>
- *     <td>...</td>
- *   </tr>
- *   <tr valign="top">
- *     <td nowrap><code>adef|abcdefz</code></td>
- *     <td>...</td>
- *   </tr>
- *   <tr valign="top">
- *     <td nowrap><code>adefa|bcdefz</code></td>
- *     <td>...</td>
- *   </tr>
- *   <tr valign="top">
- *     <td nowrap><code>adefab|cdefz</code></td>
- *     <td>...</td>
- *   </tr>
- *   <tr valign="top">
- *     <td nowrap><code>adefabc|defz</code></td>
- *     <td>Rule 1 matches; replace &quot;<code>def</code>&quot; with &quot;<code>xy</code>&quot;
- *     and back up the cursor to before the '<code>y</code>'.</td>
- *   </tr>
- *   <tr valign="top">
- *     <td nowrap><code>adefabcx|yz</code></td>
- *     <td>Although &quot;<code>xyz</code>&quot; is present, rule 2 does not match because the
- *     cursor is before the '<code>y</code>', not before the '<code>x</code>'. Rule 3 does match.
- *     Replace &quot;<code>yz</code>&quot; with &quot;<code>q</code>&quot;.</td>
- *   </tr>
- *   <tr valign="top">
- *     <td nowrap><code>adefabcxq|</code></td>
- *     <td>The cursor is at the end; transliteration is complete.</td>
- *   </tr>
+ * <table border="0" cellpadding="4">
+ *     <tr>
+ *         <td valign="top" nowrap><code>|adefabcdefz</code></td>
+ *         <td valign="top">Initial state, no rules match. Advance
+ *         cursor.</td>
+ *     </tr>
+ *     <tr>
+ *         <td valign="top" nowrap><code>a|defabcdefz</code></td>
+ *         <td valign="top">Still no match. Rule 1 does not match
+ *         because the preceding context is not present.</td>
+ *     </tr>
+ *     <tr>
+ *         <td valign="top" nowrap><code>ad|efabcdefz</code></td>
+ *         <td valign="top">Still no match. Keep advancing until
+ *         there is a match...</td>
+ *     </tr>
+ *     <tr>
+ *         <td valign="top" nowrap><code>ade|fabcdefz</code></td>
+ *         <td valign="top">...</td>
+ *     </tr>
+ *     <tr>
+ *         <td valign="top" nowrap><code>adef|abcdefz</code></td>
+ *         <td valign="top">...</td>
+ *     </tr>
+ *     <tr>
+ *         <td valign="top" nowrap><code>adefa|bcdefz</code></td>
+ *         <td valign="top">...</td>
+ *     </tr>
+ *     <tr>
+ *         <td valign="top" nowrap><code>adefab|cdefz</code></td>
+ *         <td valign="top">...</td>
+ *     </tr>
+ *     <tr>
+ *         <td valign="top" nowrap><code>adefabc|defz</code></td>
+ *         <td valign="top">Rule 1 matches; replace &quot;<code>def</code>&quot;
+ *         with &quot;<code>xy</code>&quot; and back up the cursor
+ *         to before the '<code>y</code>'.</td>
+ *     </tr>
+ *     <tr>
+ *         <td valign="top" nowrap><code>adefabcx|yz</code></td>
+ *         <td valign="top">Although &quot;<code>xyz</code>&quot; is
+ *         present, rule 2 does not match because the cursor is
+ *         before the '<code>y</code>', not before the '<code>x</code>'.
+ *         Rule 3 does match. Replace &quot;<code>yz</code>&quot;
+ *         with &quot;<code>q</code>&quot;.</td>
+ *     </tr>
+ *     <tr>
+ *         <td valign="top" nowrap><code>adefabcxq|</code></td>
+ *         <td valign="top">The cursor is at the end;
+ *         transliteration is complete.</td>
+ *     </tr>
 * </table>
 * 
- * <p>The order of rules is significant. If multiple rules may match at some point, the first
- * matching rule is applied. </p>
+ * <p>The order of rules is significant. If multiple rules may match
+ * at some point, the first matching rule is applied. </p>
 * 
- * <p>Forward and reverse rules may have an empty output string. Otherwise, an empty left or
- * right hand side of any statement is a syntax error. </p>
+ * <p>Forward and reverse rules may have an empty output string.
+ * Otherwise, an empty left or right hand side of any statement is a
+ * syntax error. </p>
 * 
- * <p>Single quotes are used to quote the special characters <code>=&gt;&lt;{}[]()|</code>.
- * To specify a single quote itself, inside or outside of quotes, use two single quotes in a
- * row. For example, the rule &quot;<code>'&gt;'&gt;o''clock</code>&quot; changes the string
- * &quot;<code>&gt;</code>&quot; to the string &quot;<code>o'clock</code>&quot;. </p>
+ * <p>Single quotes are used to quote any character other than a
+ * digit or letter. To specify a single quote itself, inside or
+ * outside of quotes, use two single quotes in a row. For example,
+ * the rule &quot;<code>'&gt;'&gt;o''clock</code>&quot; changes the
+ * string &quot;<code>&gt;</code>&quot; to the string &quot;<code>o'clock</code>&quot;.
+ * </p>
 * 
 * <p><b>Notes</b> </p>
 * 
- * <p>While a RuleBasedTransliterator is being built, it checks that the rules are added in
- * proper order. For example, if the rule &quot;a&gt;x&quot; is followed by the rule
- * &quot;ab&gt;y&quot;, then the second rule will throw an exception. The reason is that the
- * second rule can never be triggered, since the first rule always matches anything it
- * matches. In other words, the first rule <em>masks</em> the second rule. </p>
- *
+ * <p>While a RuleBasedTransliterator is being built, it checks that
+ * the rules are added in proper order. For example, if the rule
+ * &quot;a&gt;x&quot; is followed by the rule &quot;ab&gt;y&quot;,
+ * then the second rule will throw an exception. The reason is that
+ * the second rule can never be triggered, since the first rule
+ * always matches anything it matches. In other words, the first
+ * rule <em>masks</em> the second rule. </p>
+ * 
 * @author Alan Liu
 * @draft
 */
@ -312,20 +355,23 @@ public:
     */
    enum {
        PARSE_ERROR_BASE = 0x10000,
-        DUPLICATE_VARIABLE_DEFINITION,
-        MALFORMED_RHS,
+        BAD_VARIABLE_DEFINITION,
        MALFORMED_RULE,
        MALFORMED_SET,
+        MALFORMED_SYMBOL_REFERENCE,
        MALFORMED_UNICODE_ESCAPE,
+        MALFORMED_VARIABLE_DEFINITION,
        MALFORMED_VARIABLE_REFERENCE,
+        MISMATCHED_SEGMENT_DELIMITERS,
+        MISPLACED_CURSOR_OFFSET,
        MISSING_OPERATOR,
+        MISSING_SEGMENT_CLOSE,
        MULTIPLE_ANTE_CONTEXTS,
        MULTIPLE_CURSORS,
        MULTIPLE_POST_CONTEXTS,
-        TEXT_AFTER_CLOSE_CONTEXT,
        TRAILING_BACKSLASH,
+        UNDEFINED_SEGMENT_REFERENCE,
        UNDEFINED_VARIABLE,
-        UNEXPECTED_CLOSE_CONTEXT,
        UNQUOTED_SPECIAL,
        UNTERMINATED_QUOTE
    };
--- a/icu4c/source/i18n/unicode/uniset.h
+++ b/icu4c/source/i18n/unicode/uniset.h
@ -276,18 +276,6 @@ class U_I18N_API UnicodeSet : public UnicodeFilter {
     * ":]".  Example: "[:Lu:]".
     */
    static const UnicodeString CATEGORY_CLOSE;
-
-    /**
-     * Delimiter char beginning a variable reference:
-     * "{".  Example: "{var}".
-     */
-    static const UChar VARIABLE_REF_OPEN;
-    
-    /**
-     * Delimiter char ending a variable reference:
-     * "}".  Example: "{var}".
-     */
-    static const UChar VARIABLE_REF_CLOSE;
    
    // More special characters...
    static const UChar SET_OPEN;
--- a/icu4c/source/i18n/uniset.cpp
+++ b/icu4c/source/i18n/uniset.cpp
@ -30,18 +30,6 @@ UnicodeString* UnicodeSet::CATEGORY_PAIRS_CACHE =
 */
 const UnicodeString UnicodeSet::CATEGORY_CLOSE = UNICODE_STRING(":]", 2);

-/**
- * Delimiter char beginning a variable reference:
- * "{".  Example: "{var}".
- */
-const UChar UnicodeSet::VARIABLE_REF_OPEN = 0x007B /*{*/;
-
-/**
- * Delimiter char ending a variable reference:
- * "}".  Example: "{var}".
- */
-const UChar UnicodeSet::VARIABLE_REF_CLOSE = 0x007D /*}*/;
-
 // Define UChar constants using hex for EBCDIC compatibility
 const UChar UnicodeSet::SET_OPEN     = 0x005B; /*[*/
 const UChar UnicodeSet::SET_CLOSE    = 0x005D; /*]*/
@ -497,9 +485,15 @@ UnicodeString& UnicodeSet::parse(UnicodeString& pairsBuf /*result*/,
    int32_t i = pos.getIndex();
    int32_t limit = pattern.length();
    UnicodeString nestedAux;
-    UnicodeString* nestedPairs;
+    const UnicodeString* nestedPairs;
    UnicodeString scratch;
-    for (; i<limit; ++i) {
+    /* In the case of an embedded SymbolTable variable, we look it up and
+     * then take characters from the resultant char[] array.  These chars
+     * are subjected to an extra level of lookup in the SymbolTable in case
+     * they are stand-ins for a nested UnicodeSet.  */
+    const UnicodeString* varValueBuffer = NULL;
+    int32_t ivarValueBuffer = 0;
+    for (; i<limit; i+=((varValueBuffer==NULL)?1:0)) {
        /* If the next element is a single character, c will be set to it,
         * and nestedPairs will be null.  In this case isLiteral indicates
         * whether the character should assume special meaning if it has
@ -508,9 +502,24 @@ UnicodeString& UnicodeSet::parse(UnicodeString& pairsBuf /*result*/,
         * nestedPairs will be set to the pairs list for the nested set, and
         * c's value should be ignored.
         */
-        UChar c = pattern.charAt(i);
        nestedPairs = NULL;
        UBool isLiteral = FALSE;
+        UChar c;
+        if (varValueBuffer != NULL) {
+            if (ivarValueBuffer < varValueBuffer->length()) {
+                c = varValueBuffer->charAt(ivarValueBuffer++);
+                const UnicodeSet* s = symbols->lookupSet(c);
+                if (s != NULL) {
+                    //nestedSet = s;
+                    nestedPairs = &s->pairs;
+                }
+            } else {
+                varValueBuffer = NULL;
+                c = pattern.charAt(i);
+            }
+        } else {
+            c = pattern.charAt(i);
+        }

        // Ignore whitespace.  This is not Unicode whitespace, but Java
        // whitespace, a subset of Unicode whitespace.
@ -556,103 +565,104 @@ UnicodeString& UnicodeSet::parse(UnicodeString& pairsBuf /*result*/,
        // will be 2 if we want a closing ']', or 3 if we should parse a
        // category and close with ":]".

-        /* Handle escapes.  If a character is escaped, then it assumes its
-         * literal value.  This is true for all characters, both special
-         * characters and characters with no special meaning.  We also
-         * interpret '\\uxxxx' Unicode escapes here (as literals).
-         */
-        if (c == BACKSLASH) {
-            ++i;
-            if (i < pattern.length()) {
-                c = pattern.charAt(i);
-                isLiteral = TRUE;
-                if (c == 0x0075 /*u*/) {
-                    if ((i+4) >= pattern.length()) {
-						status = U_ILLEGAL_ARGUMENT_ERROR;
-						return pairsBuf;
-                    }
-                    c = (UChar)0x0000;
-                    for (int32_t j=(++i)+4; i<j; ++i) { // [sic]
-                        int32_t digit = Unicode::digit(pattern.charAt(i), 16);
-                        if (digit<0) {
+        // Only process escapes, variable references, and nested sets
+        // if we are _not_ retrieving characters from the variable
+        // buffer.  Characters in the variable buffer have already
+        // benn through escape and variable reference processing.
+        if (varValueBuffer == NULL) {
+            /* Handle escapes.  If a character is escaped, then it assumes its
+             * literal value.  This is true for all characters, both special
+             * characters and characters with no special meaning.  We also
+             * interpret '\\uxxxx' Unicode escapes here (as literals).
+             */
+            if (c == BACKSLASH) {
+                ++i;
+                if (i < pattern.length()) {
+                    c = pattern.charAt(i);
+                    isLiteral = TRUE;
+                    if (c == 0x0075 /*u*/) {
+                        if ((i+4) >= pattern.length()) {
                            status = U_ILLEGAL_ARGUMENT_ERROR;
                            return pairsBuf;
                        }
-                        c = (UChar) ((c << 4) | digit);
+                        c = (UChar)0x0000;
+                        for (int32_t j=(++i)+4; i<j; ++i) { // [sic]
+                            int32_t digit = Unicode::digit(pattern.charAt(i), 16);
+                            if (digit<0) {
+                                status = U_ILLEGAL_ARGUMENT_ERROR;
+                                return pairsBuf;
+                            }
+                            c = (UChar) ((c << 4) | digit);
+                        }
+                        --i; // Move i back to last parsed character
                    }
-                    --i; // Move i back to last parsed character
-                }
-            } else {
-                status = U_ILLEGAL_ARGUMENT_ERROR;
-                return pairsBuf;
-            }
-        }
-
-        /* Parse variable references.  These are treated as literals.  If a
-         * variable refers to a UnicodeSet, nestedPairs is assigned here.
-         * Variable names are only parsed if varNameToChar is not null.
-         * Set variables are only looked up if varCharToSet is not null.
-         */
-        else if (symbols != NULL && !isLiteral && c == VARIABLE_REF_OPEN) {
-            ++i;
-            int32_t j = pattern.indexOf(VARIABLE_REF_CLOSE, i);
-            UnicodeSet* set = NULL;
-            if (i == j || j < 0) { // empty or unterminated
-                // throw new IllegalArgumentException("Illegal variable reference");
-                status = U_ILLEGAL_ARGUMENT_ERROR;
-            } else {
-                scratch.truncate(0);
-                pattern.extractBetween(i, j, scratch);
-                symbols->lookup(scratch, c, set, status);
-            }
-            if (U_FAILURE(status)) {
-                // Either the reference was ill-formed (empty name, or no
-                // closing '}', or the specified name is not defined.
-                return pairsBuf;
-            }
-            isLiteral = TRUE;
-
-            if (set != NULL) {
-                nestedPairs = &set->pairs;
-            }
-            i = j; // Make i point to '}'
-        }
-
-        /* An opening bracket indicates the first bracket of a nested
-         * subpattern, either a normal pattern or a category pattern.  We
-         * recognize these here and set nestedPairs accordingly.
-         */
-        else if (!isLiteral && c == SET_OPEN) {
-            // Handle "[:...:]", representing a character category
-            UChar d = charAfter(pattern, i);
-            if (d == COLON) {
-                i += 2;
-                int32_t j = pattern.indexOf(CATEGORY_CLOSE, i);
-                if (j < 0) {
-                    // throw new IllegalArgumentException("Missing \":]\"");
+                } else {
                    status = U_ILLEGAL_ARGUMENT_ERROR;
                    return pairsBuf;
                }
-                scratch.truncate(0);
-                pattern.extractBetween(i, j, scratch);
-                nestedPairs = &getCategoryPairs(nestedAux, scratch, status);
-                if (U_FAILURE(status)) {
+            }
+
+            /* Parse variable references.  These are treated as literals.  If a
+             * variable refers to a UnicodeSet, its stand in character is
+             * returned in the UChar[] buffer.
+             * Variable names are only parsed if varNameToChar is not null.
+             * Set variables are only looked up if varCharToSet is not null.
+             */
+            else if (symbols != NULL && !isLiteral && c == SymbolTable::SYMBOL_REF) {
+                pos.setIndex(++i);
+                UnicodeString name = symbols->parseReference(pattern, pos, limit);
+                if (name.length() == 0) {
+                    status = U_ILLEGAL_ARGUMENT_ERROR;
                    return pairsBuf;
                }
-                i = j+1; // Make i point to ']' in ":]"
-                if (mode == 3) {
-                    // Entire pattern is a category; leave parse loop
-                    pairsBuf.append(*nestedPairs);
-                    break;
-                }
-            } else {
-                // Recurse to get the pairs for this nested set.
-                pos.setIndex(i);
-                nestedPairs = &parse(nestedAux, pattern, pos, symbols, status);
-                if (U_FAILURE(status)) {
+                varValueBuffer = symbols->lookup(name);
+                if (varValueBuffer == NULL) {
+                    //throw new IllegalArgumentException("Undefined variable: "
+                    //                                   + name);
+                    status = U_ILLEGAL_ARGUMENT_ERROR;
                    return pairsBuf;
                }
-                i = pos.getIndex() - 1; // - 1 to point at ']'
+                ivarValueBuffer = 0;
+                i = pos.getIndex(); // Make i point PAST last char of var name
+                continue; // Back to the top to get varValueBuffer[0]
+            }
+
+            /* An opening bracket indicates the first bracket of a nested
+             * subpattern, either a normal pattern or a category pattern.  We
+             * recognize these here and set nestedPairs accordingly.
+             */
+            else if (!isLiteral && c == SET_OPEN) {
+                // Handle "[:...:]", representing a character category
+                UChar d = charAfter(pattern, i);
+                if (d == COLON) {
+                    i += 2;
+                    int32_t j = pattern.indexOf(CATEGORY_CLOSE, i);
+                    if (j < 0) {
+                        // throw new IllegalArgumentException("Missing \":]\"");
+                        status = U_ILLEGAL_ARGUMENT_ERROR;
+                        return pairsBuf;
+                    }
+                    scratch.truncate(0);
+                    pattern.extractBetween(i, j, scratch);
+                    nestedPairs = &getCategoryPairs(nestedAux, scratch, status);
+                    if (U_FAILURE(status)) {
+                        return pairsBuf;
+                    }
+                    i = j+1; // Make i point to ']' in ":]"
+                    if (mode == 3) {
+                        // Entire pattern is a category; leave parse loop
+                        pairsBuf.append(*nestedPairs);
+                        break;
+                    }
+                } else {
+                    // Recurse to get the pairs for this nested set.
+                    pos.setIndex(i);
+                    nestedPairs = &parse(nestedAux, pattern, pos, symbols, status);
+                    if (U_FAILURE(status)) {
+                        return pairsBuf;
+                    }
+                    i = pos.getIndex() - 1; // - 1 to point at ']'
+                }
            }
        }

--- a/icu4c/source/test/intltest/transapi.cpp
+++ b/icu4c/source/test/intltest/transapi.cpp
@ -117,9 +117,10 @@ void TransliteratorAPITest::TestgetInverse() {
         Transliterator* invt1 = Transliterator::createInstance("Latin-Kana");
         Transliterator* t2    = Transliterator::createInstance("Latin-Devanagari");
         Transliterator* invt2 = Transliterator::createInstance("Devanagari-Latin");
-         if(t1 == 0 || invt1 == 0 || t2 == 0 || invt2 == 0)
+         if(t1 == 0 || invt1 == 0 || t2 == 0 || invt2 == 0) {
             errln("FAIL: in instantiation");
-			 
+			 return;
+         }

         Transliterator* inverse1=t1->createInverse();
         Transliterator* inverse2=t2->createInverse();
@ -235,7 +236,7 @@ void TransliteratorAPITest::TestTransliterate1(){
             "Unicode-Hex",         "hello",                               "\\u0068\\u0065\\u006C\\u006C\\u006F" ,
             "Hex-Unicode",         "\\u0068\\u0065\\u006C\\u006C\\u006F", "hello"  ,
             "Latin-Devanagari",    "bhaarata",                            CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924") ,
-             "Devanagari-Latin",    CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"),        "bhaaarata" ,
+             "Devanagari-Latin",    CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"),        "bhaarata" ,
         //  "Contracted-Expanded", CharsToUnicodeString("\\u00C0\\u00C1\\u0042"),               CharsToUnicodeString("\\u0041\\u0300\\u0041\\u0301\\u0042") ,
         //  "Expanded-Contracted", CharsToUnicodeString("\\u0041\\u0300\\u0041\\u0301\\u0042"), CharsToUnicodeString("\\u00C0\\u00C1\\u0042") ,
             "Latin-Arabic",        "aap",                                 CharsToUnicodeString("\\u0627\\u06A4")     ,
@ -472,13 +473,17 @@ void TransliteratorAPITest::TestKeyboardTransliterator2(){
        t=Transliterator::createInstance("Unicode-Hex");
 		if(t == 0)
 			errln("FAIL : construction");
-		keyboardAux(t, Data, rs, 0, 20);
+        else {
+		    keyboardAux(t, Data, rs, 0, 20);
+            delete t;
+        }

        rs="Hindi --";
 		t=Transliterator::createInstance("Latin-Devanagari");
 		if(t == 0)
 			errln("FAIL : construction");
-        keyboardAux(t, Data, rs, 20, 40);
+        else
+            keyboardAux(t, Data, rs, 20, 40);

 		
      //  rs="Add here:";
--- a/icu4c/source/test/intltest/transtst.cpp
+++ b/icu4c/source/test/intltest/transtst.cpp
@ -55,6 +55,9 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
        CASE(12,TestJ277);
        CASE(13,TestJ243);
        CASE(14,TestJ329);
+        CASE(15,TestSegments);
+        CASE(16,TestCursorOffset);
+        CASE(17,TestArbitraryVariableValues);
        default: name = ""; break;
    }
 }
@ -140,17 +143,15 @@ void TransliteratorTest::TestSimpleRules(void) {
    UErrorCode status = U_ZERO_ERROR;
    RuleBasedTransliterator t(
        "<ID>",
-        UnicodeString("dummy=").append((UChar)0xE100) +
-        UnicodeString(
-        ";"
-        "          vowel = [aeiouAEIOU];"
-        "             lu = [:Lu:];"
-
-        " {vowel} ({lu}) > ! ;"
-        " {vowel}        > & ;"
-        "        !) {lu} > ^ ;"
-        "           {lu} > * ;"
-        "              a > ERROR", ""),
+        UnicodeString("$dummy=").append((UChar)0xE100) +
+        UnicodeString(";"
+                      "$vowel=[aeiouAEIOU];"
+                      "$lu=[:Lu:];"
+                      "$vowel } $lu > '!';"
+                      "$vowel > '&';"
+                      "'!' { $lu > '^';"
+                      "$lu > '*';"
+                      "a > ERROR", ""),
        status);
    if (U_FAILURE(status)) {
        errln("FAIL: RBT constructor failed");
@ -163,16 +164,16 @@ void TransliteratorTest::TestSimpleRules(void) {
 * Test inline set syntax and set variable syntax.
 */
 void TransliteratorTest::TestInlineSet(void) {
-    expect("[:Ll:] (x) > y; [:Ll:] > z;", "aAbxq", "zAyzz");
+    expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
    expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
    
    expect(UnicodeString(
-           "digit = [0-9];"
-           "alpha = [a-zA-Z];"
-           "alphanumeric = [{digit}{alpha}];" // ***
-           "special = [^{alphanumeric}];"     // ***
-           "{alphanumeric} > -;"
-           "{special} > *;", ""),
+           "$digit = [0-9];"
+           "$alpha = [a-zA-Z];"
+           "$alphanumeric = [$digit $alpha];" // ***
+           "$special = [^$alphanumeric];"     // ***
+           "$alphanumeric > '-';"
+           "$special > '*';", ""),
           
           "thx-1138", "---*----");
 }
@ -498,6 +499,10 @@ void TransliteratorTest::TestPatternQuoting(void) {
 void TransliteratorTest::TestJ277(void) {
    UErrorCode status = U_ZERO_ERROR;
    Transliterator *gl = Transliterator::createInstance("Greek-Latin");
+    if (gl == NULL) {
+        errln("FAIL: createInstance(Greek-Latin) returned NULL");
+        return;
+    }

    UChar sigma = 0x3C3;
    UChar upsilon = 0x3C5;
@ -520,17 +525,17 @@ void TransliteratorTest::TestJ277(void) {

    // Again, using a smaller rule set
    UnicodeString rules(
-                "alpha   = \\u03B1;"
-                "nu      = \\u03BD;"
-                "sigma   = \\u03C3;"
-                "ypsilon = \\u03C5;"
-                "vowel   = [aeiouAEIOU{alpha}{ypsilon}];"
-                "s <>           {sigma};"
-                "a <>           {alpha};"
-                "u <> ({vowel}) {ypsilon};"
-                "y <>           {ypsilon};"
-                "n <>           {nu};"
-                );
+                "$alpha   = \\u03B1;"
+                "$nu      = \\u03BD;"
+                "$sigma   = \\u03C3;"
+                "$ypsilon = \\u03C5;"
+                "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
+                "s <>           $sigma;"
+                "a <>           $alpha;"
+                "u <>  $vowel { $ypsilon;"
+                "y <>           $ypsilon;"
+                "n <>           $nu;",
+                "");
    RuleBasedTransliterator mini("mini", rules, Transliterator::REVERSE, status);
    if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
    expect(mini, syn, "syn");
@ -626,6 +631,100 @@ void TransliteratorTest::TestJ329(void) {
    }
 }

+/**
+ * Test segments and segment references.
+ */
+void TransliteratorTest::TestSegments(void) {
+    // Array of 3n items
+    // Each item is <rules>, <input>, <expected output>
+    UnicodeString DATA[] = {
+        "([a-z]) '.' ([0-9]) > $2 '-' $1",
+        "abc.123.xyz.456",
+        "ab1-c23.xy4-z56",
+    };
+    int32_t DATA_length = sizeof(DATA)/sizeof(*DATA);
+
+    for (int32_t i=0; i<DATA_length; i+=3) {
+        logln("Pattern: " + prettify(DATA[i]));
+        UErrorCode status = U_ZERO_ERROR;
+        RuleBasedTransliterator t("<ID>", DATA[i], status);
+        if (U_FAILURE(status)) {
+            errln("FAIL: RBT constructor");
+        } else {
+            expect(t, DATA[i+1], DATA[i+2]);
+        }
+    }
+}
+
+/**
+ * Test cursor positioning outside of the key
+ */
+void TransliteratorTest::TestCursorOffset(void) {
+    // Array of 3n items
+    // Each item is <rules>, <input>, <expected output>
+    UnicodeString DATA[] = {
+        "pre {alpha} post > | @ ALPHA ;" 
+        "eALPHA > beta ;" 
+        "pre {beta} post > BETA @@ | ;" 
+        "post > xyz",
+
+        "prealphapost prebetapost",
+
+        "prbetaxyz preBETApost",
+    };
+    int32_t DATA_length = sizeof(DATA)/sizeof(*DATA);
+
+    for (int32_t i=0; i<DATA_length; i+=3) {
+        logln("Pattern: " + prettify(DATA[i]));
+        UErrorCode status = U_ZERO_ERROR;
+        RuleBasedTransliterator t("<ID>", DATA[i], status);
+        if (U_FAILURE(status)) {
+            errln("FAIL: RBT constructor");
+        } else {
+            expect(t, DATA[i+1], DATA[i+2]);
+        }
+    }
+}
+
+/**
+ * Test zero length and > 1 char length variable values.  Test
+ * use of variable refs in UnicodeSets.
+ */
+void TransliteratorTest::TestArbitraryVariableValues(void) {
+    // Array of 3n items
+    // Each item is <rules>, <input>, <expected output>
+    UnicodeString DATA[] = {
+        "$abe = ab;" 
+        "$pat = x[yY]z;" 
+        "$ll  = 'a-z';" 
+        "$llZ = [$ll];" 
+        "$llY = [$ll$pat];" 
+        "$emp = ;" 
+
+        "$abe > ABE;" 
+        "$pat > END;" 
+        "$llZ > 1;" 
+        "$llY > 2;" 
+        "7$emp 8 > 9;" 
+        "",
+
+        "ab xYzxyz stY78",
+        "ABE ENDEND 1129",
+    };
+    int32_t DATA_length = sizeof(DATA)/sizeof(*DATA);
+
+    for (int32_t i=0; i<DATA_length; i+=3) {
+        logln("Pattern: " + prettify(DATA[i]));
+        UErrorCode status = U_ZERO_ERROR;
+        RuleBasedTransliterator t("<ID>", DATA[i], status);
+        if (U_FAILURE(status)) {
+            errln("FAIL: RBT constructor");
+        } else {
+            expect(t, DATA[i+1], DATA[i+2]);
+        }
+    }
+}
+
 //======================================================================
 // Support methods
 //======================================================================
--- a/icu4c/source/test/intltest/transtst.h
+++ b/icu4c/source/test/intltest/transtst.h
@ -96,6 +96,22 @@ class TransliteratorTest : public IntlTest {
     */
    void TestJ329(void);

+    /**
+     * Test segments and segment references.
+     */
+    void TestSegments();
+    
+    /**
+     * Test cursor positioning outside of the key
+     */
+    void TestCursorOffset();
+    
+    /**
+     * Test zero length and > 1 char length variable values.  Test
+     * use of variable refs in UnicodeSets.
+     */
+    void TestArbitraryVariableValues();
+
    //======================================================================
    // Support methods
    //======================================================================