diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java b/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java
index cf1c99d5bc6..9f7237e516b 100644
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java,v $
-* $Date: 2004/02/18 03:08:59 $
-* $Revision: 1.11 $
+* $Date: 2004/04/17 18:21:39 $
+* $Revision: 1.12 $
 *
 *******************************************************************************
 */
@@ -782,11 +782,11 @@ abstract public class GenerateBreakTest implements UCD_Types {
 
         public boolean isBreak(String source, int offset) {
             
-            setRule("1: sot ÷");
+            setRule("1: sot Ã·");
             if (offset < 0 || offset > source.length()) return false;
             if (offset == 0) return true;
 
-            setRule("2: ÷ eot");
+            setRule("2: Ã· eot");
             if (offset == source.length()) return true;
 
             // UTF-16: never break in the middle of a code point
@@ -801,29 +801,29 @@ abstract public class GenerateBreakTest implements UCD_Types {
             byte before = getResolvedType(cpBefore);
             byte after = getResolvedType(cpAfter);
 
-            setRule("3: CR × LF");
+            setRule("3: CR Ã— LF");
             if (before == CR && after == LF) return false;
 
-            setRule("4: ( Control | CR | LF ) ÷");
+            setRule("4: ( Control | CR | LF ) Ã·");
             if (before == CR || before == LF || before == Control) return true;
 
-            setRule("5: ÷ ( Control | CR | LF )");
+            setRule("5: Ã· ( Control | CR | LF )");
             if (after == Control || after == LF || after == CR) return true;
 
-            setRule("6: L × ( L | V | LV | LVT )");
+            setRule("6: L Ã— ( L | V | LV | LVT )");
             if (before == L && (after == L || after == V || after == LV || after == LVT)) return false;
 
-            setRule("7: ( LV | V ) × ( V | T )");
+            setRule("7: ( LV | V ) Ã— ( V | T )");
             if ((before == LV || before == V) && (after == V || after == T)) return false;
 
-            setRule("8: ( LVT | T ) × T");
+            setRule("8: ( LVT | T ) Ã— T");
             if ((before == LVT || before == T) && (after == T)) return false;
 
-            setRule("9: × Extend");
+            setRule("9: Ã— Extend");
             if (after == Extend) return false;
 
             // Otherwise break after all characters.
-            setRule("10: Any ÷ Any");
+            setRule("10: Any Ã· Any");
             return true;
 
         }
@@ -914,12 +914,12 @@ abstract public class GenerateBreakTest implements UCD_Types {
 
         public boolean isBreak(String source, int offset) {
 
-            setRule("1: sot ÷");
+            setRule("1: sot Ã·");
             if (offset < 0 || offset > source.length()) return false;
   
             if (offset == 0) return true;
 
-            setRule("2: ÷ eot");
+            setRule("2: Ã· eot");
             if (offset == source.length()) return true;
 
             // Treat a grapheme cluster as if it were a single character:
@@ -943,43 +943,43 @@ abstract public class GenerateBreakTest implements UCD_Types {
 
             //Don't break between most letters
 
-            setRule("5: ALetter × ALetter");
+            setRule("5: ALetter Ã— ALetter");
             if (before == ALetter && after == ALetter) return false;
 
-            // Don’t break letters across certain punctuation
+            // Donâ€™t break letters across certain punctuation
 
-            setRule("6: ALetter × (MidLetter | MidNumLet) ALetter");
+            setRule("6: ALetter Ã— (MidLetter | MidNumLet) ALetter");
             if (before == ALetter && (after == MidLetter || after == MidNumLet) && after2 == ALetter) return false;
 
-            setRule("7: ALetter (MidLetter | MidNumLet) × ALetter");
+            setRule("7: ALetter (MidLetter | MidNumLet) Ã— ALetter");
             if (before2 == ALetter && (before == MidLetter || before == MidNumLet) && after == ALetter) return false;
 
-            // Don’t break within sequences of digits, or digits adjacent to letters.
+            // Donâ€™t break within sequences of digits, or digits adjacent to letters.
 
-            setRule("8: Numeric × Numeric");
+            setRule("8: Numeric Ã— Numeric");
             if (before == Numeric && after == Numeric) return false;
 
-            setRule("9: ALetter × Numeric");
+            setRule("9: ALetter Ã— Numeric");
             if (before == ALetter && after == Numeric) return false;
 
-            setRule("10: Numeric × ALetter");
+            setRule("10: Numeric Ã— ALetter");
             if (before == Numeric && after == ALetter) return false;
 
 
-            // Don’t break within sequences like: '-3.2'
-            setRule("11: Numeric (MidNum | MidNumLet) × Numeric");
+            // Donâ€™t break within sequences like: '-3.2'
+            setRule("11: Numeric (MidNum | MidNumLet) Ã— Numeric");
             if (before2 == Numeric && (before == MidNum || before == MidNumLet) && after == Numeric) return false;
 
-            setRule("12: Numeric × (MidNum | MidNumLet) Numeric");
+            setRule("12: Numeric Ã— (MidNum | MidNumLet) Numeric");
             if (before == Numeric && (after == MidNum || after == MidNumLet) && after2 == Numeric) return false;
 
             // Don't break between Katakana
 
-            setRule("13: Katakana × Katakana");
+            setRule("13: Katakana Ã— Katakana");
             if (before == Katakana && after == Katakana) return false;
 
             // Otherwise break always.
-            setRule("14: Any ÷ Any");
+            setRule("14: Any Ã· Any");
             return true;
 
         }
@@ -1235,7 +1235,7 @@ abstract public class GenerateBreakTest implements UCD_Types {
 
             // LB 2a  Never break at the start of text
 
-            setRule("2a: × sot");
+            setRule("2a: Ã— sot");
             if (offset <= 0) return false;
 
             // LB 2b  Always break at the end of text
@@ -1269,26 +1269,26 @@ abstract public class GenerateBreakTest implements UCD_Types {
             //byte after = getResolvedType(cpAfter);
 
 
-            setRule("3a: CR × LF ; ( BK | CR | LF | NL ) !");
+            setRule("3a: CR Ã— LF ; ( BK | CR | LF | NL ) !");
             
             // Always break after hard line breaks (but never between CR and LF).
             // CR ^ LF
             if (before == LB_CR && after == LB_LF) return false;
             if (before == LB_BK || before == LB_LF || before == LB_CR) return true;
 
-            //LB 3b  Don’t break before hard line breaks.
-            setRule("3b: × ( BK | CR | LF )");
+            //LB 3b  Donâ€™t break before hard line breaks.
+            setRule("3b: Ã— ( BK | CR | LF )");
             if (after == LB_BK || after == LB_LF || after == LB_CR) return false;
 
-            // LB 4  Don’t break before spaces or zero-width space.
-            setRule("4: × ( SP | ZW )");
+            // LB 4  Donâ€™t break before spaces or zero-width space.
+            setRule("4: Ã— ( SP | ZW )");
             if (after == LB_SP || after == LB_ZW) return false;
 
             // LB 5 Break after zero-width space.
-            setRule("5: ZW ÷");
+            setRule("5: ZW Ã·");
             if (before == LB_ZW) return true;
 
-            // LB 6  Don’t break graphemes (before combining marks, around virama or on sequences of conjoining Jamos.
+            // LB 6  Donâ€™t break graphemes (before combining marks, around virama or on sequences of conjoining Jamos.
             setRule("6: DGC -> FC");
             if (!grapheme.isBreak( source,  offset)) return false;
             
@@ -1324,9 +1324,9 @@ abstract public class GenerateBreakTest implements UCD_Types {
             if (setBase && backBase == -1) before = LB_AL;
 
             
-            // LB 8  Don’t break before ‘]’ or ‘!’ or ‘;’ or ‘/’,  even after spaces.
-            // × CL, × EX, × IS, × SY
-            setRule("8: × ( CL | EX | IS | SY )");
+            // LB 8  Donâ€™t break before â€˜]â€™ or â€˜!â€™ or â€˜;â€™ or â€˜/â€™,  even after spaces.
+            // Ã— CL, Ã— EX, Ã— IS, Ã— SY
+            setRule("8: Ã— ( CL | EX | IS | SY )");
             if (after == LB_CL || after == LB_EX || after == LB_SY | after == LB_IS) return false;
 
 
@@ -1339,97 +1339,97 @@ abstract public class GenerateBreakTest implements UCD_Types {
                 }
             }
 
-            // LB 9  Don’t break after ‘[’, even after spaces.
-            // OP SP* ×
-            setRule("9: OP SP* ×");
+            // LB 9  Donâ€™t break after â€˜[â€™, even after spaces.
+            // OP SP* Ã—
+            setRule("9: OP SP* Ã—");
             if (lastNonSpace == LB_OP) return false;
 
-            // LB 10  Don’t break within ‘”[’, , even with intervening spaces.
-            // QU SP* × OP
-            setRule("10: QU SP* × OP");
+            // LB 10  Donâ€™t break within â€˜â€[â€™, , even with intervening spaces.
+            // QU SP* Ã— OP
+            setRule("10: QU SP* Ã— OP");
             if (lastNonSpace == LB_QU && after == LB_OP) return false;
 
-            // LB 11  Don’t break within ‘]h’, even with intervening spaces.
-            // CL SP* × NS
-            setRule("11: CL SP* × NS");
+            // LB 11  Donâ€™t break within â€˜]hâ€™, even with intervening spaces.
+            // CL SP* Ã— NS
+            setRule("11: CL SP* Ã— NS");
             if (lastNonSpace == LB_CL && after == LB_NS) return false;
 
-            // LB 11a  Don’t break within ‘——’, even with intervening spaces.
-            // B2 × B2
-            setRule("11a: B2 × B2");
+            // LB 11a  Donâ€™t break within â€˜â€”â€”â€™, even with intervening spaces.
+            // B2 Ã— B2
+            setRule("11a: B2 Ã— B2");
             if (lastNonSpace == LB_B2 && after == LB_B2) return false;
 
 
-            // LB 13  Don’t break before or after NBSP or WORD JOINER
-            // × GL
-            // GL ×
+            // LB 13  Donâ€™t break before or after NBSP or WORD JOINER
+            // Ã— GL
+            // GL Ã—
 
-            setRule("11b: × WJ ; WJ ×");
+            setRule("11b: Ã— WJ ; WJ Ã—");
             if (after == LB_WJ || before == LB_WJ) return false;
 
             // [Note: by this time, all of the "X" in the table are accounted for. We can safely break after spaces.]
 
             // LB 12  Break after spaces
-            setRule("12: SP ÷");
+            setRule("12: SP Ã·");
             if (before == LB_SP) return true;
 
-            // LB 13  Don’t break before or after NBSP or WORD JOINER
-            setRule("13: × GL ; GL ×");
+            // LB 13  Donâ€™t break before or after NBSP or WORD JOINER
+            setRule("13: Ã— GL ; GL Ã—");
             if (after == LB_GL || before == LB_GL) return false;
 
-            // LB 14  Don’t break before or after ‘”’
-            setRule("14: × QU ; QU ×");
+            // LB 14  Donâ€™t break before or after â€˜â€â€™
+            setRule("14: Ã— QU ; QU Ã—");
             if (before == LB_QU || after == LB_QU) return false;
 
             // LB 14a  Break before and after CB
-            setRule("14a: ÷ CB ; CB ÷");
+            setRule("14a: Ã· CB ; CB Ã·");
             if (before == LB_CB || after == LB_CB) return true;
 
-            // LB 15  Don’t break before hyphen-minus, other hyphens, fixed-width spaces,
+            // LB 15  Donâ€™t break before hyphen-minus, other hyphens, fixed-width spaces,
             // small kana and other non- starters,  or after acute accents:
 
-            setRule("15: × ( BA | HY | NS ) ; BB ×");
+            setRule("15: Ã— ( BA | HY | NS ) ; BB Ã—");
             if (after == LB_NS) return false;
             if (after == LB_HY) return false;
             if (after == LB_BA) return false;
             if (before == LB_BB) return false;
 
 
-            //setRule("15a: HY × NU"); // NEW
+            //setRule("15a: HY Ã— NU"); // NEW
             //if (before == LB_HY && after == LB_NU) return false;
 
-            // LB 16  Don’t break between two ellipses, or between letters or numbers and ellipsis:
-            // Examples: ’9...’, ‘a...’, ‘H...’
-            setRule("16: ( AL | ID | IN | NU ) × IN");
+            // LB 16  Donâ€™t break between two ellipses, or between letters or numbers and ellipsis:
+            // Examples: â€™9...â€™, â€˜a...â€™, â€˜H...â€™
+            setRule("16: ( AL | ID | IN | NU ) Ã— IN");
             if ((before == LB_NU || before == LB_AL || before == LB_ID) && after == LB_IN) return false;
             if (before == LB_IN && after == LB_IN) return false;
 
             // Don't break alphanumerics.
-            // LB 17  Don’t break within ‘a9’, ‘3a’, or ‘H%’
+            // LB 17  Donâ€™t break within â€˜a9â€™, â€˜3aâ€™, or â€˜H%â€™
             // Numbers are of the form PR ? ( OP | HY ) ? NU (NU | IS) * CL ?  PO ?
-            // Examples:   $(12.35)    2,1234    (12)¢    12.54¢
+            // Examples:   $(12.35)    2,1234    (12)Â¢    12.54Â¢
             // This is approximated with the following rules. (Some cases already handled above,
-            // like ‘9,’, ‘[9’.)
-            setRule("17: ID × PO ; AL × NU; NU × AL");
+            // like â€˜9,â€™, â€˜[9â€™.)
+            setRule("17: ID Ã— PO ; AL Ã— NU; NU Ã— AL");
             if (before == LB_ID && after == LB_PO) return false;
             if (before == LB_AL && after == LB_NU) return false;
             if (before == LB_NU && after == LB_AL) return false;
 
-            // LB 18  Don’t break between the following pairs of classes.
-            // CL × PO
-            // HY × NU
-            // IS × NU
-            // NU × NU
-            // NU × PO
-            // PR × AL
-            // PR × HY
-            // PR × ID
-            // PR × NU
-            // PR × OP
-            // SY × NU
-            // Example pairs: ‘$9’, ‘$[’, ‘$-‘, ‘-9’, ‘/9’, ‘99’, ‘,9’,  ‘9%’ ‘]%’
+            // LB 18  Donâ€™t break between the following pairs of classes.
+            // CL Ã— PO
+            // HY Ã— NU
+            // IS Ã— NU
+            // NU Ã— NU
+            // NU Ã— PO
+            // PR Ã— AL
+            // PR Ã— HY
+            // PR Ã— ID
+            // PR Ã— NU
+            // PR Ã— OP
+            // SY Ã— NU
+            // Example pairs: â€˜$9â€™, â€˜$[â€™, â€˜$-â€˜, â€˜-9â€™, â€˜/9â€™, â€˜99â€™, â€˜,9â€™,  â€˜9%â€™ â€˜]%â€™
 
-            setRule("18: CL × PO ; NU × PO ; ( IS | NU | HY | PR | SY ) × NU ; PR × ( AL | HY | ID | OP )");
+            setRule("18: CL Ã— PO ; NU Ã— PO ; ( IS | NU | HY | PR | SY ) Ã— NU ; PR Ã— ( AL | HY | ID | OP )");
             if (before == LB_CL && after == LB_PO) return false;
             if (before == LB_IS && after == LB_NU) return false;
             if (before == LB_NU && after == LB_NU) return false;
@@ -1446,30 +1446,30 @@ abstract public class GenerateBreakTest implements UCD_Types {
             if (before == LB_SY && after == LB_NU) return false;
 
             // LB 15b  Break after hyphen-minus, and before acute accents:
-            setRule("18b: HY ÷ ; ÷ BB");
+            setRule("18b: HY Ã· ; Ã· BB");
             if (before == LB_HY) return true;
             if (after == LB_BB) return true;
 
-            // LB 19  Don’t break between alphabetics (“at”)
-            // AL × AL
+            // LB 19  Donâ€™t break between alphabetics (â€œatâ€)
+            // AL Ã— AL
 
-            setRule("19: AL × AL");
+            setRule("19: AL Ã— AL");
             if (before == LB_AL && after == LB_AL) return false;
 
             // LB 20  Break everywhere else
-            // ALL ÷
-            // ÷ ALL
+            // ALL Ã·
+            // Ã· ALL
 
             if (ucd.getCompositeVersion() > 0x040000) {
-                setRule("19b: IS × AL");
+                setRule("19b: IS Ã— AL");
                 if (before == LB_IS && after == LB_AL) return false;
             }
 
             // LB 20  Break everywhere else
-            // ALL ÷
-            // ÷ ALL
+            // ALL Ã·
+            // Ã· ALL
 
-            setRule("20: ALL ÷ ; ÷ ALL");
+            setRule("20: ALL Ã· ; Ã· ALL");
             return true;
         }
     }
@@ -1498,8 +1498,8 @@ abstract public class GenerateBreakTest implements UCD_Types {
                 "U.S.A\u0300.", 
                 "3.4", 
                 "c.d",
-                "etc.)\u2019 \u2018(the",
-                "etc.)\u2019 \u2018(The",
+                "etc.)\u2019Â \u2018(the",
+                "etc.)\u2019Â \u2018(The",
                 "the resp. leaders are",
                 "\u5B57.\u5B57",
                 "etc.\u5B83",
@@ -1631,15 +1631,15 @@ abstract public class GenerateBreakTest implements UCD_Types {
         public boolean isBreak(String source, int offset) {
     
             // Break at the start and end of text.
-            setRule("1: sot ÷");
+            setRule("1: sot Ã·");
             if (offset < 0 || offset > source.length()) return false;
   
             if (offset == 0) return true;
 
-            setRule("2: ÷ eot");
+            setRule("2: Ã· eot");
             if (offset == source.length()) return true;
 
-            setRule("3: Sep ÷");
+            setRule("3: Sep Ã·");
             byte beforeChar = getResolvedType(source.charAt(offset-1));
             if (beforeChar == Sep) return true;
             
@@ -1662,22 +1662,22 @@ abstract public class GenerateBreakTest implements UCD_Types {
             
             // HACK COPY for rule collection!
             if (collectingRules) {
-                setRule("6: ATerm × ( Numeric | Lower )");
-                setRule("7: Upper ATerm × Upper");
-                setRule("8: ATerm Close* Sp* × ( ¬(OLetter | Upper | Lower) )* Lower");
-                setRule("9: ( Term | ATerm ) Close* × ( Close | Sp | Sep )");
-                setRule("10: ( Term | ATerm ) Close* Sp × ( Sp | Sep )");
-                setRule("11: ( Term | ATerm ) Close* Sp* ÷");
-                setRule("12: Any × Any");
+                setRule("6: ATerm Ã— ( Numeric | Lower )");
+                setRule("7: Upper ATerm Ã— Upper");
+                setRule("8: ATerm Close* Sp* Ã— ( Â¬(OLetter | Upper | Lower) )* Lower");
+                setRule("9: ( Term | ATerm ) Close* Ã— ( Close | Sp | Sep )");
+                setRule("10: ( Term | ATerm ) Close* Sp Ã— ( Sp | Sep )");
+                setRule("11: ( Term | ATerm ) Close* Sp* Ã·");
+                setRule("12: Any Ã— Any");
                 collectingRules = false;
             }
             
             // Do not break after ambiguous terminators like period, if immediately followed by a number or lowercase letter, is between uppercase letters, or if the first following letter (optionally after certain punctuation) is lowercase. For example, a period may be an abbreviation or numeric period, and not mark the end of a sentence.
             
             if (before == ATerm) {
-                setRule("6: ATerm × ( Numeric | Lower )");
+                setRule("6: ATerm Ã— ( Numeric | Lower )");
                 if (after == Lower || after == Numeric) return false;
-                setRule("7: Upper ATerm × Upper");
+                setRule("7: Upper ATerm Ã— Upper");
                 if (DEBUG_GRAPHEMES) System.out.println(context + ", " + Upper);
                 if (before2 == Upper && after == Upper) return false;
             }
@@ -1736,17 +1736,17 @@ abstract public class GenerateBreakTest implements UCD_Types {
             
             if (lookAfter == -1) {
                 // Otherwise, do not break
-                // Any × Any (11)
-                setRule("12: Any × Any");
+                // Any Ã— Any (11)
+                setRule("12: Any Ã— Any");
                 return false;
             }
                 
-            // ATerm Close* Sp*×(¬( OLetter))* Lower(8)
+            // ATerm Close* Sp*Ã—(Â¬( OLetter))* Lower(8)
             
             // Break after sentence terminators, but include closing punctuation, trailing spaces, and (optionally) a paragraph separator.
-            // ( Term | ATerm ) Close*×( Close | Sp | Sep )(9)
-            // ( Term | ATerm ) Close* Sp×( Sp | Sep )(10)
-            // ( Term | ATerm ) Close* Sp*÷(11)
+            // ( Term | ATerm ) Close*Ã—( Close | Sp | Sep )(9)
+            // ( Term | ATerm ) Close* SpÃ—( Sp | Sep )(10)
+            // ( Term | ATerm ) Close* Sp*Ã·(11)
 
                         
             // We DID find one. Loop to see if the right side is ok.
@@ -1764,16 +1764,16 @@ abstract public class GenerateBreakTest implements UCD_Types {
                 if (isFirst) {
                     isFirst = false;
                     if (lookAfter == ATerm && t == Upper) {
-                        setRule("8: ATerm Close* Sp* × ( ¬(OLetter | Upper | Lower) )* Lower");
+                        setRule("8: ATerm Close* Sp* Ã— ( Â¬(OLetter | Upper | Lower) )* Lower");
                         return false;
                     }
                     if (gotSpace) {
                         if (t == Sp || t == Sep) {
-                            setRule("10: ( Term | ATerm ) Close* Sp × ( Sp | Sep )");
+                            setRule("10: ( Term | ATerm ) Close* Sp Ã— ( Sp | Sep )");
                             return false;
                         }
                     } else if (t == Close || t == Sp || t == Sep) {
-                        setRule("9: ( Term | ATerm ) Close* × ( Close | Sp | Sep )");
+                        setRule("9: ( Term | ATerm ) Close* Ã— ( Close | Sp | Sep )");
                         return false;
                     }
                     if (lookAfter == Term) break;
@@ -1782,12 +1782,12 @@ abstract public class GenerateBreakTest implements UCD_Types {
                 // at this point, we have an ATerm. All other conditions are ok, but we need to verify 6
                 if (t != OLetter && t != Upper && t != Lower) continue;
                 if (t == Lower) {
-                    setRule("8: ATerm Close* Sp* × ( ¬(OLetter | Upper | Lower) )* Lower");
+                    setRule("8: ATerm Close* Sp* Ã— ( Â¬(OLetter | Upper | Lower) )* Lower");
                     return false;
                 }
                 break;
             }
-            setRule("11: ( Term | ATerm ) Close* Sp* ÷");
+            setRule("11: ( Term | ATerm ) Close* Sp* Ã·");
             return true;
         }
     }
diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java b/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java
index 3c4fc7a9a65..6816bd49df8 100644
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateHanTransliterator.java,v $
-* $Date: 2004/02/07 01:01:15 $
-* $Revision: 1.14 $
+* $Date: 2004/04/17 18:21:39 $
+* $Revision: 1.15 $
 *
 *******************************************************************************
 */
@@ -547,18 +547,18 @@ public final class GenerateHanTransliterator implements UCD_Types {
         "e", "ei", "er", "en", "eng",
         "i", "ia", "iao", "ie", "iu", "ian", "in", "iang", "ing", "iong",
         "u", "ua", "uo", "uai", "ui", "uan", "un", "uang", "ueng",
-        "ü", "üe", "üan", "ün"
+        "Ã¼", "Ã¼e", "Ã¼an", "Ã¼n"
     };
     // Don't bother with the following rules; just add w,y to initials
-    // When “i” stands alone, a “y” will be added before it as “yi”. 
-    //      If “i” is the first letter of the syllable it will be changed to “y”. 
-    // When “u” stands alone, a “w” will be added before it as “wu”. 
-    //      If “u” is the first letter of the syllable it will be changed to “w”. e.g. “uang -> wang”. 
-    // When “ü” stands alone, a “y” will be added before it and “ü” will be changed to “u” as “yu”. 
-    //      If “ü” is the first letter of the syllable, then the spelling will be changed to “yu”. e.g. “üan -> yuan”. 
-    //Note: The nasal final “ueng” never occurs after an initial but always form a syllable by itself.
-    // The “o” in “iou” is hidden, so it will be wrote as “iu”. But, don’t forget to pronounce it. 
-    // The “e” in “uei” is hidden, so it will be wrote as “ui”. But, don’t forget to pronounce it. 
+    // When â€œiâ€ stands alone, a â€œyâ€ will be added before it as â€œyiâ€. 
+    //      If â€œiâ€ is the first letter of the syllable it will be changed to â€œyâ€. 
+    // When â€œuâ€ stands alone, a â€œwâ€ will be added before it as â€œwuâ€. 
+    //      If â€œuâ€ is the first letter of the syllable it will be changed to â€œwâ€. e.g. â€œuang -> wangâ€. 
+    // When â€œÃ¼â€ stands alone, a â€œyâ€ will be added before it and â€œÃ¼â€ will be changed to â€œuâ€ as â€œyuâ€. 
+    //      If â€œÃ¼â€ is the first letter of the syllable, then the spelling will be changed to â€œyuâ€. e.g. â€œÃ¼an -> yuanâ€. 
+    //Note: The nasal final â€œuengâ€ never occurs after an initial but always form a syllable by itself.
+    // The â€œoâ€ in â€œiouâ€ is hidden, so it will be wrote as â€œiuâ€. But, donâ€™t forget to pronounce it. 
+    // The â€œeâ€ in â€œueiâ€ is hidden, so it will be wrote as â€œuiâ€. But, donâ€™t forget to pronounce it. 
     
     
     public static final String[] pinyin_bopomofo = {
@@ -749,9 +749,9 @@ public final class GenerateHanTransliterator implements UCD_Types {
 	"long", "\u310c\u3128\u3125",
 	"lou", "\u310c\u3121",
 	"lu", "\u310c\u3128",
-	"lü", "\u310c\u3129",
+	"lÃ¼", "\u310c\u3129",
 	"luan", "\u310c\u3128\u3122",
-	"lüe", "\u310c\u3129\u311d",
+	"lÃ¼e", "\u310c\u3129\u311d",
 	"lun", "\u310c\u3128\u3123",
 	"luo", "\u310c\u3128\u311b",
 	"m", "\u3107",
@@ -796,9 +796,9 @@ public final class GenerateHanTransliterator implements UCD_Types {
 	"nong", "\u310b\u3128\u3125",
 	"nou", "\u310b\u3121",
 	"nu", "\u310b\u3128",
-	"nü", "\u310b\u3129",
+	"nÃ¼", "\u310b\u3129",
 	"nuan", "\u310b\u3128\u3122",
-	"nüe", "\u310b\u3129\u311d",
+	"nÃ¼e", "\u310b\u3129\u311d",
 	"nuo", "\u310b\u3128\u311b",
 	"o", "\u311b",
 	"ou", "\u3121",
@@ -1007,52 +1007,52 @@ public final class GenerateHanTransliterator implements UCD_Types {
     }
     
     /*
-    U+347C	·	liù	#lyuè  
-U+3500	·	lüè	#lvè
-U+3527	·	liù	#lyù
-U+3729	·	ào	#àu
-U+380E	·	jí	#jjí
-U+3825	·	l·	#lv·
-U+3A3C	·	lüè	#luè
-U+3B5A	·	li·	#ly· *** lü?
-U+3CB6	·	l·	#lv·
-U+3D56	·	niù	#nyù *** nü?
-U+3D88	·	li·ng	#li·ng
-U+3EF2	·	li·	#ly·*** lü?
-U+3F94	·	li·	#ly·*** lü?
-U+4071	·	ào	#àu
-U+40AE	·	liù	#lyuè *** lüe?
-U+430E	·	liù	#lyuè *** lüe?
-U+451E	·	liù	#lyù *** lü?
-U+4588	·	nüè	#nuè
-U+458B	·	nüè	#nuè
-U+45A1	·	niù	#nyù *** nü?
-U+4610	·	niù	#nyù *** nü?
-U+46BC	·	niù	#nyù *** nü?
-U+46DA	·	liù	#lyuè *** lüe?
-U+4896	·	liù	#lyù *** lü?
-U+4923	·	liù	#lyuè *** lüe?
-U+4968	·	liù	#lyù *** lü?
-U+4A0B	·	niù	#nyuè *** nüe?
-U+4AC4	·	chuò	#chuà
-U+4D08	·	·o	#·u
-U+4D8A	·	niù	#nyù *** nü?
-U+51CA	·	qíng	#qýng
-U+51D6	·	zhu·n	#zhu·n *** this is probably zh·n 
-U+5481	·	gàn	#gèm
-U+5838	·	féng	#fúng
-U+639F	·	lü·	#lu· *** this pronunciation surprises me, but I don't know...
-U+66D5	·	yàn	#yiàn
-U+6B3B	·	chu·	#chu· *** chua _is_ ok after all, my table missed an entry
-U+6B56	·	chu·	#chu· *** chua 
-U+6C7C	·	ni·	#ni·u
-U+6E6D	·	qiú	#qióu
-U+6F71	·	y·	#yi·
-U+7493	·	xiù	#xiòu
-U+7607	·	zh·ng	#zh·ng *** I suspect zh·ng
-U+7674	·	luán	#lüán
-U+7867	·	y·ng	#i·ng
-U+7878	·	nüè	#nuè
+    U+347C	Â·	liÃ¹	#lyuÃ¨  
+U+3500	Â·	lÃ¼Ã¨	#lvÃ¨
+U+3527	Â·	liÃ¹	#lyÃ¹
+U+3729	Â·	Ã o	#Ã u
+U+380E	Â·	jÃ­	#jjÃ­
+U+3825	Â·	lÂ·	#lvÂ·
+U+3A3C	Â·	lÃ¼Ã¨	#luÃ¨
+U+3B5A	Â·	liÂ·	#lyÂ· *** lÃ¼?
+U+3CB6	Â·	lÂ·	#lvÂ·
+U+3D56	Â·	niÃ¹	#nyÃ¹ *** nÃ¼?
+U+3D88	Â·	liÂ·ng	#liÂ·ng
+U+3EF2	Â·	liÂ·	#lyÂ·*** lÃ¼?
+U+3F94	Â·	liÂ·	#lyÂ·*** lÃ¼?
+U+4071	Â·	Ã o	#Ã u
+U+40AE	Â·	liÃ¹	#lyuÃ¨ *** lÃ¼e?
+U+430E	Â·	liÃ¹	#lyuÃ¨ *** lÃ¼e?
+U+451E	Â·	liÃ¹	#lyÃ¹ *** lÃ¼?
+U+4588	Â·	nÃ¼Ã¨	#nuÃ¨
+U+458B	Â·	nÃ¼Ã¨	#nuÃ¨
+U+45A1	Â·	niÃ¹	#nyÃ¹ *** nÃ¼?
+U+4610	Â·	niÃ¹	#nyÃ¹ *** nÃ¼?
+U+46BC	Â·	niÃ¹	#nyÃ¹ *** nÃ¼?
+U+46DA	Â·	liÃ¹	#lyuÃ¨ *** lÃ¼e?
+U+4896	Â·	liÃ¹	#lyÃ¹ *** lÃ¼?
+U+4923	Â·	liÃ¹	#lyuÃ¨ *** lÃ¼e?
+U+4968	Â·	liÃ¹	#lyÃ¹ *** lÃ¼?
+U+4A0B	Â·	niÃ¹	#nyuÃ¨ *** nÃ¼e?
+U+4AC4	Â·	chuÃ²	#chuÃ 
+U+4D08	Â·	Â·o	#Â·u
+U+4D8A	Â·	niÃ¹	#nyÃ¹ *** nÃ¼?
+U+51CA	Â·	qÃ­ng	#qÃ½ng
+U+51D6	Â·	zhuÂ·n	#zhuÂ·n *** this is probably zhÂ·n 
+U+5481	Â·	gÃ n	#gÃ¨m
+U+5838	Â·	fÃ©ng	#fÃºng
+U+639F	Â·	lÃ¼Â·	#luÂ· *** this pronunciation surprises me, but I don't know...
+U+66D5	Â·	yÃ n	#yiÃ n
+U+6B3B	Â·	chuÂ·	#chuÂ· *** chua _is_ ok after all, my table missed an entry
+U+6B56	Â·	chuÂ·	#chuÂ· *** chua 
+U+6C7C	Â·	niÂ·	#niÂ·u
+U+6E6D	Â·	qiÃº	#qiÃ³u
+U+6F71	Â·	yÂ·	#yiÂ·
+U+7493	Â·	xiÃ¹	#xiÃ²u
+U+7607	Â·	zhÂ·ng	#zhÂ·ng *** I suspect zhÂ·ng
+U+7674	Â·	luÃ¡n	#lÃ¼Ã¡n
+U+7867	Â·	yÂ·ng	#iÂ·ng
+U+7878	Â·	nÃ¼Ã¨	#nuÃ¨
 */
     
     static Transliterator fixTypos = Transliterator.createFromRules("fix_typos", 
@@ -1061,12 +1061,12 @@ U+7878	
         +"$cons{iou}$nlet   > iu;"
         +"$cons{em}$nlet    > an;"
         +"$cons{uen}$nlet   > ueng;"
-        +"$cons{ve}$nlet    > üe;"
-        +"$cons{v}$nlet     > ü;"
+        +"$cons{ve}$nlet    > Ã¼e;"
+        +"$cons{v}$nlet     > Ã¼;"
         +"$cons{yue}$nlet   > iu;"
         +"$cons{yng}$nlet   > ing;"
         +"$cons{yu}$nlet    > iu;"
-        //+"$cons{ue}       > üe;"
+        //+"$cons{ue}       > Ã¼e;"
         +"jj                > j;"
         //+"$nlet{ng}$nlet  > eng;"
         //+"$nlet{n}$nlet   > en;"
@@ -1076,13 +1076,13 @@ U+7878	
         // new fixes        
         +"zhueng}$nlet       > zhong;"
         +"zhuen}$nlet       > zhuan;"
-        +"lue > lüe;"
+        +"lue > lÃ¼e;"
         +"liong > liang;"
-        +"nue > nüe;"
+        +"nue > nÃ¼e;"
         +"chua > chuo;"
         +"yian > yan;"
         +"yie > ye;"
-        +"lüan > luan;"
+        +"lÃ¼an > luan;"
         +"iong > yong;"
         , Transliterator.FORWARD);
     
@@ -1113,7 +1113,7 @@ U+7878	
         try {
             
             // chinese_frequency.txt
-            // 1	çš„	1588561	1588561	3.5008%
+            // 1	Ã§Å¡â€ž	1588561	1588561	3.5008%
             // japanese_frequency.txt
             // 1 ? 17176
             
@@ -1421,7 +1421,7 @@ U+7878	
     @Unihan Data
 
 Bad pinyin data: \u4E7F	?	LE
-\u7684	?	de, de, dí, dì
+\u7684	?	de, de, dÃ­, dÃ¬
 */
 
     static void fixChineseOverrides() throws IOException {
@@ -2024,7 +2024,7 @@ Bad pinyin data: \u4E7F	?	LE
                     + "# otherwise 'o'\n"
                     + "# otherwise last vowel\n"
                     + "::NFC;\n"
-                    + "$vowel = [aAeEiIoOuUüÜ];\n"
+                    + "$vowel = [aAeEiIoOuUÃ¼Ãœ];\n"
                     + "$consonant = [[a-z A-Z] - [$vowel]];\n"
                     + "$digit = [1-5];\n"
                     + "([aAeE]) ($vowel* $consonant*) ($digit) > $1 &digit-tone($3) $2;\n"
@@ -2054,10 +2054,10 @@ Bad pinyin data: \u4E7F	?	LE
                         if (i > 0) {
                             char last = result.charAt(result.length()-1);
                             if (last == 'u') {
-                                result.setCharAt(result.length()-1, 'ü');
+                                result.setCharAt(result.length()-1, 'Ã¼');
                                 continue main;
                             } else if (last == 'U') {
-                                result.setCharAt(result.length()-1, 'Ü');
+                                result.setCharAt(result.length()-1, 'Ãœ');
                                 continue main;
                             }
                         }
@@ -2085,22 +2085,22 @@ Bad pinyin data: \u4E7F	?	LE
             for (int i = source.length()-2; i >= 0; --i) {
                 ch = source.charAt(i);
                 if (ch == ':') {
-                    ch = 'Ü';
+                    ch = 'Ãœ';
                     --i;
                 }
                 if ('0' <= ch && ch <= '9') break;
-                if (ch != 'Ü' && (ch < 'A' || ch > 'Z')) {
+                if (ch != 'Ãœ' && (ch < 'A' || ch > 'Z')) {
                     Utility.fixDot();
                     System.out.println("Warning: non-ASCII in " + hex.transliterate(source) + " (" + hex.transliterate(debugLine) + ")");
                     break;
                 }
                 if (!gotIt) switch (ch) {
-                    case 'A': ch = "AÁ\u0102À\u0100".charAt(num); gotIt = true; break;
-                    case 'E': ch = "EÉ\u0114È\u0112".charAt(num); gotIt = true; break;
-                    case 'I': ch = "IÍ\u012CÌ\u012A".charAt(num); gotIt = true; break;
-                    case 'O': ch = "OÓ\u014EÒ\u014C".charAt(num); gotIt = true; break;
-                    case 'U': ch = "UÚ\u016CÙ\u016A".charAt(num); gotIt = true; break;
-                    case 'Ü': ch = "Ü\u01D7\u01D9\u01DB\u01D5".charAt(num); gotIt = true; break;
+                    case 'A': ch = "AÃ\u0102Ã€\u0100".charAt(num); gotIt = true; break;
+                    case 'E': ch = "EÃ‰\u0114Ãˆ\u0112".charAt(num); gotIt = true; break;
+                    case 'I': ch = "IÃ\u012CÃŒ\u012A".charAt(num); gotIt = true; break;
+                    case 'O': ch = "OÃ“\u014EÃ’\u014C".charAt(num); gotIt = true; break;
+                    case 'U': ch = "UÃš\u016CÃ™\u016A".charAt(num); gotIt = true; break;
+                    case 'Ãœ': ch = "Ãœ\u01D7\u01D9\u01DB\u01D5".charAt(num); gotIt = true; break;
                 }
                 handlePinyinTemp.insert(0,ch);
             }
diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java b/tools/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java
index fbb451d2465..f77a3760400 100644
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateLineBreakTest.java,v $
-* $Date: 2004/02/07 01:01:15 $
-* $Revision: 1.4 $
+* $Date: 2004/04/17 18:21:39 $
+* $Revision: 1.5 $
 *
 *******************************************************************************
 */
@@ -398,23 +398,23 @@ public class GenerateLineBreakTest implements UCD_Types {
         if (before == LB_CR && after == LB_LF) return false;
         if (before == LB_BK || before == LB_LF || before == LB_CR) return true;
 
-        //LB 3b  Don’t break before hard line breaks.
+        //LB 3b  Donâ€™t break before hard line breaks.
         rule="3b";
         if (after == LB_BK || after == LB_LF | after == LB_CR) return false;
 
-        // LB 4  Don’t break before spaces or zero-width space.
-        // × SP
-        // × ZW
+        // LB 4  Donâ€™t break before spaces or zero-width space.
+        // Ã— SP
+        // Ã— ZW
 
         rule="4";
         if (after == LB_SP || after == LB_ZW) return false;
 
         // LB 5 Break after zero-width space.
-        // ZW ÷
+        // ZW Ã·
         rule="5";
         if (before == LB_ZW) return true;
 
-        // LB 6  Don’t break graphemes (before combining marks, around virama or on sequences of conjoining Jamos.
+        // LB 6  Donâ€™t break graphemes (before combining marks, around virama or on sequences of conjoining Jamos.
         rule="6";
         if (after == LB_CM) return false;
         
@@ -441,8 +441,8 @@ public class GenerateLineBreakTest implements UCD_Types {
         rule="7";
         if (setBase && before == LB_SP) before = LB_ID;
 
-        // LB 8  Don’t break before ‘]’ or ‘!’ or ‘;’ or ‘/’,  even after spaces.
-        // × CL, × EX, × IS, × SY
+        // LB 8  Donâ€™t break before â€˜]â€™ or â€˜!â€™ or â€˜;â€™ or â€˜/â€™,  even after spaces.
+        // Ã— CL, Ã— EX, Ã— IS, Ã— SY
         rule="8";
         if (after == LB_CL || after == LB_EX || after == LB_SY | after == LB_IS) return false;
 
@@ -456,31 +456,31 @@ public class GenerateLineBreakTest implements UCD_Types {
             }
         }
 
-        // LB 9  Don’t break after ‘[’, even after spaces.
-        // OP SP* ×
+        // LB 9  Donâ€™t break after â€˜[â€™, even after spaces.
+        // OP SP* Ã—
         rule="9";
         if (lastNonSpace == LB_OP) return false;
 
-        // LB 10  Don’t break within ‘”[’, , even with intervening spaces.
-        // QU SP* × OP
+        // LB 10  Donâ€™t break within â€˜â€[â€™, , even with intervening spaces.
+        // QU SP* Ã— OP
         rule="10";
         if (lastNonSpace == LB_QU && after == LB_OP) return false;
 
-        // LB 11  Don’t break within ‘]h’, even with intervening spaces.
-        // CL SP* × NS
+        // LB 11  Donâ€™t break within â€˜]hâ€™, even with intervening spaces.
+        // CL SP* Ã— NS
         rule="11";
         if (lastNonSpace == LB_CL && after == LB_NS) return false;
 
-        // LB 11a  Don’t break within ‘——’, even with intervening spaces.
-        // B2 × B2
+        // LB 11a  Donâ€™t break within â€˜â€”â€”â€™, even with intervening spaces.
+        // B2 Ã— B2
         rule="11a";
         if (lastNonSpace == LB_B2 && after == LB_B2) return false;
 
 
         if (recommended) {
-            // LB 13  Don’t break before or after NBSP or WORD JOINER
-            // × GL
-            // GL ×
+            // LB 13  Donâ€™t break before or after NBSP or WORD JOINER
+            // Ã— GL
+            // GL Ã—
 
             rule="11b";
             if (after == LB_GL || before == LB_GL) return false;
@@ -490,36 +490,36 @@ public class GenerateLineBreakTest implements UCD_Types {
 
         rule="12";
         // LB 12  Break after spaces
-        // SP ÷
+        // SP Ã·
 
         if (before == LB_SP) return true;
 
         if (!recommended) {
-            // LB 13  Don’t break before or after NBSP or WORD JOINER
-            // × GL
-            // GL ×
+            // LB 13  Donâ€™t break before or after NBSP or WORD JOINER
+            // Ã— GL
+            // GL Ã—
 
             rule="13";
             if (after == LB_GL || before == LB_GL) return false;
         }
 
         rule="14";
-        // LB 14  Don’t break before or after ‘”’
-        // × QU
-        // QU ×
+        // LB 14  Donâ€™t break before or after â€˜â€â€™
+        // Ã— QU
+        // QU Ã—
         if (before == LB_QU || after == LB_QU) return false;
 
-        // LB 15  Don’t break before hyphen-minus, other hyphens, fixed-width spaces,
+        // LB 15  Donâ€™t break before hyphen-minus, other hyphens, fixed-width spaces,
         // small kana and other non- starters,  or after acute accents:
-        // × BA
-        // × HY
-        // × NS
-        // BB ×
+        // Ã— BA
+        // Ã— HY
+        // Ã— NS
+        // BB Ã—
         
         if (recommended) {
         // LB 14a  Break before and after CB
-        // CB ÷
-        // ÷ CB
+        // CB Ã·
+        // Ã· CB
             if (before == LB_CB || after == LB_CB) return true;       
         
         }
@@ -532,51 +532,51 @@ public class GenerateLineBreakTest implements UCD_Types {
 
         if (!recommended) {
             // LB 15b  Break after hyphen-minus, and before acute accents:
-            // HY ÷
-            // ÷ BB
+            // HY Ã·
+            // Ã· BB
 
             rule="15b";
             if (before == LB_HY) return true;
             if (after == LB_BB) return true;
         }
 
-        // LB 16  Don’t break between two ellipses, or between letters or numbers and ellipsis:
-        // AL × IN
-        // ID × IN
-        // IN × IN
-        // NU × IN
-        // Examples: ’9...’, ‘a...’, ‘H...’
+        // LB 16  Donâ€™t break between two ellipses, or between letters or numbers and ellipsis:
+        // AL Ã— IN
+        // ID Ã— IN
+        // IN Ã— IN
+        // NU Ã— IN
+        // Examples: â€™9...â€™, â€˜a...â€™, â€˜H...â€™
         rule="16";
         if ((before == LB_NU || before == LB_AL || before == LB_ID) && after == LB_IN) return false;
         if (before == LB_IN && after == LB_IN) return false;
 
         // Don't break alphanumerics.
-        // LB 17  Don’t break within ‘a9’, ‘3a’, or ‘H%’
-        // ID × PO
-        // AL × NU
-        // NU × AL
+        // LB 17  Donâ€™t break within â€˜a9â€™, â€˜3aâ€™, or â€˜H%â€™
+        // ID Ã— PO
+        // AL Ã— NU
+        // NU Ã— AL
         // Numbers are of the form PR ? ( OP | HY ) ? NU (NU | IS) * CL ?  PO ?
-        // Examples:   $(12.35)    2,1234    (12)¢    12.54¢
+        // Examples:   $(12.35)    2,1234    (12)Â¢    12.54Â¢
         // This is approximated with the following rules. (Some cases already handled above,
-        // like ‘9,’, ‘[9’.)
+        // like â€˜9,â€™, â€˜[9â€™.)
         rule="17";
         if (before == LB_ID && after == LB_PO) return false;
         if (before == LB_AL && after == LB_NU) return false;
         if (before == LB_NU && after == LB_AL) return false;
 
-        // LB 18  Don’t break between the following pairs of classes.
-        // CL × PO
-        // HY × NU
-        // IS × NU
-        // NU × NU
-        // NU × PO
-        // PR × AL
-        // PR × HY
-        // PR × ID
-        // PR × NU
-        // PR × OP
-        // SY × NU
-        // Example pairs: ‘$9’, ‘$[’, ‘$-‘, ‘-9’, ‘/9’, ‘99’, ‘,9’,  ‘9%’ ‘]%’
+        // LB 18  Donâ€™t break between the following pairs of classes.
+        // CL Ã— PO
+        // HY Ã— NU
+        // IS Ã— NU
+        // NU Ã— NU
+        // NU Ã— PO
+        // PR Ã— AL
+        // PR Ã— HY
+        // PR Ã— ID
+        // PR Ã— NU
+        // PR Ã— OP
+        // SY Ã— NU
+        // Example pairs: â€˜$9â€™, â€˜$[â€™, â€˜$-â€˜, â€˜-9â€™, â€˜/9â€™, â€˜99â€™, â€˜,9â€™,  â€˜9%â€™ â€˜]%â€™
 
         rule="18";
         if (before == LB_CL && after == LB_PO) return false;
@@ -595,23 +595,23 @@ public class GenerateLineBreakTest implements UCD_Types {
 
         if (recommended) {
             // LB 15b  Break after hyphen-minus, and before acute accents:
-            // HY ÷
-            // ÷ BB
+            // HY Ã·
+            // Ã· BB
 
             rule="18b";
             if (before == LB_HY) return true;
             if (after == LB_BB) return true;
         }
 
-        // LB 19  Don’t break between alphabetics (“at”)
-        // AL × AL
+        // LB 19  Donâ€™t break between alphabetics (â€œatâ€)
+        // AL Ã— AL
 
         rule="19";
         if (before == LB_AL && after == LB_AL) return false;
 
         // LB 20  Break everywhere else
-        // ALL ÷
-        // ÷ ALL
+        // ALL Ã·
+        // Ã· ALL
 
         rule="20";
         return true;
@@ -754,7 +754,7 @@ public class GenerateLineBreakTest implements UCD_Types {
                 // Do not break between linking characters and letters, or before linking characters. This provides for Indic graphemes, where virama (halant) will link character clusters together.
      
                 rule = "12";
-                //Link Extend* × LetterBase  (12) 
+                //Link Extend* Ã— LetterBase  (12) 
                 if (after == LetterBase || after == L || after == V || after == T || after == LV || after == LVT) {
                     int backOffset = findLastNon(source, offset, Extend, recommended);
                     if (backOffset >= 0) {
diff --git a/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java b/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java
index f175b5e531e..9e272ecfdcb 100644
--- a/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java
@@ -1169,6 +1169,7 @@ public class MakeUnicodeFiles {
             String line = in.readLine();
             if (line == null) break;
             if (line.startsWith("\uFEFF")) line = line.substring(1);
+            out.println(line);
             line = line.trim();
             int pos = line.indexOf('#');
             if (pos >= 0) line = line.substring(0,pos).trim();
@@ -1232,9 +1233,9 @@ public class MakeUnicodeFiles {
                     break;
                 default: throw new IllegalArgumentException("Internal Error");
             }
-            out.println(String.valueOf(ok).toUpperCase(Locale.ENGLISH) + ":\t" + line);
             if (ok) continue;
             out.println();
+            out.println(String.valueOf(ok).toUpperCase(Locale.ENGLISH));
             out.println("**** START Error Info ****");
             bf.showSetDifferences(out, rightSide, rightSet, leftSide, leftSet);
             out.println("**** END Error Info ****");
diff --git a/tools/unicodetools/com/ibm/text/UCD/Normalizer.java b/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
index beedcc1dd69..163bf2de87a 100644
--- a/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Normalizer.java,v $
-* $Date: 2004/02/06 18:30:20 $
-* $Revision: 1.15 $
+* $Date: 2004/04/17 18:21:39 $
+* $Revision: 1.16 $
 *
 *******************************************************************************
 */
@@ -22,7 +22,7 @@ import com.ibm.text.utility.*;
 /**
  * Implements Unicode Normalization Forms C, D, KC, KD.<br>
  * See UTR#15 for details.<br>
- * Copyright © 1998-1999 Unicode, Inc. All Rights Reserved.<br>
+ * Copyright Â© 1998-1999 Unicode, Inc. All Rights Reserved.<br>
  * The Unicode Consortium makes no expressed or implied warranty of any
  * kind, and assumes no liability for errors or omissions.
  * No liability is assumed for incidental and consequential damages
diff --git a/tools/unicodetools/com/ibm/text/UCD/NormalizerSample.java b/tools/unicodetools/com/ibm/text/UCD/NormalizerSample.java
index acd52ebe4bf..9c780e844e4 100644
--- a/tools/unicodetools/com/ibm/text/UCD/NormalizerSample.java
+++ b/tools/unicodetools/com/ibm/text/UCD/NormalizerSample.java
@@ -10,7 +10,7 @@ import com.ibm.text.utility.*;
 /**
  * Implements Unicode Normalization Forms C, D, KC, KD.<br>
  * See UTR#15 for details.<br>
- * Copyright © 1998-1999 Unicode, Inc. All Rights Reserved.<br>
+ * Copyright Â© 1998-1999 Unicode, Inc. All Rights Reserved.<br>
  * The Unicode Consortium makes no expressed or implied warranty of any
  * kind, and assumes no liability for errors or omissions.
  * No liability is assumed for incidental and consequential damages
diff --git a/tools/unicodetools/com/ibm/text/UCD/UnicodeInvariants.txt b/tools/unicodetools/com/ibm/text/UCD/UnicodeInvariants.txt
index bc101f29ef0..6d587175267 100644
--- a/tools/unicodetools/com/ibm/text/UCD/UnicodeInvariants.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/UnicodeInvariants.txt
@@ -41,6 +41,8 @@
 #$East_Asian_Width:Neutral ? $GC:Uppercase_Letter
 $GC:Zs ? $Name:Â«.*SPACE.*Â»
 
+[$script:greek&$gc:Â«.*letter.*Â»] = [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126]
+
 # Examples of parsing errors
 
 # $LBA:Neutral =  $GC:Zp # example of non-existant property
@@ -54,7 +56,35 @@ $Line_Break:Unknown = [$General_Category:Unassigned $GeneralCategory:PrivateUse]
 $LB:OP = $GC:Ps
 $General_Category:Decimal_Number = $Numeric_Type:Decimal
 $Whitespace âŠƒ [$GC:Zs $GC:Zp $GC:Zl]
+
+# Comparisons across versions
+
 $ID_Start âŠ‡ $Ã—ID_Start
 $ID_Continue âŠ‡ $Ã—ID_Continue
 
+#$age:4.0.1 = $age4.0.0
 
+# Derivations
+
+$Math = [$GC:Sm $Other_Math]
+$Alphabetic = [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl $Other_Alphabetic]
+$Lowercase = [$GC:Ll $Other_Lowercase]
+$Uppercase = [$GC:Lu $Other_Uppercase]
+$ID_Start = [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl $Other_ID_Start]
+$ID_Continue = [$ID_Start $GC:Mn $GC:Mc $GC:Nd $GC:Pc]
+$Default_Ignorable_Code_Point = [[$Other_Default_Ignorable_Code_Point $GC:Cf $GC:Cc $GC:Cs $Variation_Selector $Noncharacter_Code_Point] - [$White_Space\uFFF9-\uFFFB]]
+$Grapheme_Extend = [$GC:Me $GC:Mn $Other_Grapheme_Extend]
+$Grapheme_Base = [^$GC:Cc $GC:Cf $GC:Cs $GC:Co $GC:Cn $GC:Zl $GC:Zp $Grapheme_Extend]
+
+# "Minimal" Other_: NOT hard requirements; just if we want to be minimal
+
+$Other_Math = [$Math - $GC:Sm]
+$Other_Alphabetic = [$Alphabetic - [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl]]
+$Other_Lowercase = [$Lowercase - $GC:Ll]
+$Other_Uppercase = [$Uppercase - $GC:Lu]
+$Other_ID_Start = [$ID_Start - [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl]]
+$Other_Default_Ignorable_Code_Point = [$Default_Ignorable_Code_Point - [[$GC:Cf $GC:Cc $GC:Cs $Variation_Selector $Noncharacter_Code_Point] - [$White_Space\uFFF9-\uFFFB]]]
+$Other_Grapheme_Extend = [$Grapheme_Extend - [$GC:Me $GC:Mn]]
+
+# Testing
+$script:greek = $Ã—script:greek
diff --git a/tools/unicodetools/com/ibm/text/utility/UTF32.java b/tools/unicodetools/com/ibm/text/utility/UTF32.java
index 5e34251930c..138abbcfa15 100644
--- a/tools/unicodetools/com/ibm/text/utility/UTF32.java
+++ b/tools/unicodetools/com/ibm/text/utility/UTF32.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/UTF32.java,v $
-* $Date: 2001/08/31 00:19:16 $
-* $Revision: 1.2 $
+* $Date: 2004/04/17 18:21:38 $
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */
@@ -53,28 +53,28 @@ package com.ibm.text.utility;
 <pre>
 // iteration forwards: Original
 for (int i = 0; i < s.length(); ++i) {
-    char ch = s.charAt(i);
-    doSomethingWith(ch);
+Â Â Â Â char ch = s.charAt(i);
+Â Â Â Â doSomethingWith(ch);
 }
 
 // iteration forwards: Changes for UTF-32
 int ch;
 for (int i = 0; i < s.length(); i+=UTF32.count16(ch)) {
-    ch = UTF32.char32At(s,i);
-    doSomethingWith(ch);
+Â Â Â Â ch = UTF32.char32At(s,i);
+Â Â Â Â doSomethingWith(ch);
 }
 
 // iteration backwards: Original
 for (int i = s.length()-1; i >= 0; --i) {
-    char ch = s.charAt(i);
-    doSomethingWith(ch);
+Â Â Â Â char ch = s.charAt(i);
+Â Â Â Â doSomethingWith(ch);
 }
 
 // iteration backwards: Changes for UTF-32
 int ch;
 for (int i = s.length()-1; i > 0; i-=UTF32.count16(ch)) {
-    ch = UTF32.char32At(s,i);
-    doSomethingWith(ch);
+Â Â Â Â ch = UTF32.char32At(s,i);
+Â Â Â Â doSomethingWith(ch);
 }
 
 * </pre>