ICU-5149 update unicode tools after cvs problems.

X-SVN-Rev: 19520
2025-04-05 21:45:37 +00:00 · 2006-04-05 22:13:04 +00:00 · 2006-04-05 22:13:04 +00:00 · 557bade86a
commit 557bade86a
parent ea4cd7f0fa
33 changed files with 964 additions and 346 deletions
--- a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $ 
-* $Date: 2005/06/08 01:44:48 $ 
-* $Revision: 1.42 $
+* $Date: 2006/04/05 22:12:46 $ 
+* $Revision: 1.43 $
 *
 *******************************************************************************
 */
@ -18,6 +18,7 @@ import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.CanonicalIterator;
 import com.ibm.icu.dev.test.util.BagFormatter;
+import com.ibm.icu.dev.test.util.TransliteratorUtilities;
 import com.ibm.icu.dev.test.util.UnicodeProperty;
 import com.ibm.icu.dev.test.util.UnicodePropertySource;
 import com.ibm.icu.impl.UCharacterProperty;
@ -33,7 +34,6 @@ import java.text.DateFormat;
 import java.text.SimpleDateFormat;

 import com.ibm.text.UCD.*;
-import com.ibm.text.UCD.UCD_Types;
 import com.ibm.text.utility.*;
 import com.ibm.text.UCD.Normalizer;

@ -4104,8 +4104,8 @@ F900..FAFF; CJK Compatibility Ideographs
        bf.setLineSeparator("<br>\r\n");
        ToolUnicodePropertySource ups = ToolUnicodePropertySource.make("");
        bf.setUnicodePropertyFactory(ups);
-        bf.setShowLiteral(bf.toHTML);
-        bf.setFixName(bf.toHTML);
+        bf.setShowLiteral(TransliteratorUtilities.toHTML);
+        bf.setFixName(TransliteratorUtilities.toHTML);
        UCD ucd = Default.ucd();
        UnicodeProperty cat = ups.getProperty("gc");
        UnicodeSet ucd410 = cat.getSet("Cn")
--- a/tools/unicodetools/com/ibm/text/UCD/CaseFoldingHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/CaseFoldingHeader.txt
@ -10,7 +10,7 @@
 # The data supports both implementations that require simple case foldings
 # (where string lengths don't change), and implementations that allow full case folding
 # (where string lengths may grow). Note that where they can be supported, the
-# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
+# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
 #
 # All code points not listed in this file map to themselves.
 #
--- a/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ConvertUCD.java,v $
-* $Date: 2005/11/01 00:10:53 $
-* $Revision: 1.17 $
+* $Date: 2006/04/05 22:12:44 $
+* $Revision: 1.18 $
 *
 *******************************************************************************
 */
@ -840,6 +840,13 @@ public final class ConvertUCD implements UCD_Types {

            } else if (fieldName.equals("gc")) {
                uData.generalCategory = Utility.lookup(fieldValue, UCD_Names.GENERAL_CATEGORY, true);
+//                if (major >= 5 && uData.script == Unknown_Script
+//                		&& uData.generalCategory != Cn
+//                		&& uData.generalCategory != Cs
+//                		&& uData.generalCategory != Co) {
+//                	uData.script = COMMON_SCRIPT;
+//                	System.out.println("Resetting to Common Script: " + Utility.hex(uData.codePoint));
+//                }
            } else if (fieldName.equals("bc")) {
                uData.bidiClass = Utility.lookup(fieldValue, UCD_Names.BIDI_CLASS, true);
            } else if (fieldName.equals("dt")) {
@ -878,8 +885,17 @@ public final class ConvertUCD implements UCD_Types {
                uData.numericValue = Utility.doubleFrom(fieldValue);
            } else if (fieldName.equals("cc")) {
                uData.combiningClass = (byte)Utility.intFrom(fieldValue);
+                if (uData.combiningClass == 9 && major >= 5) {
+                	System.out.println("setting Grapheme_Link " + Utility.hex(uData.codePoint) + "\t" + uData.name);
+                	uData.binaryProperties |= (1<<GraphemeLink);
+                	System.out.println(uData);
+            	}
            } else if (fieldName.equals("bp")) {
                uData.binaryProperties = (byte)Utility.longFrom(fieldValue);
+//                if (major >= 5 && (uData.binaryProperties & 1<<Noncharacter_Code_Point) != 0) {
+//                	uData.script = Unknown_Script;
+//                }
+                System.out.println("Resetting: " + uData);
            } else {
                throw new IllegalArgumentException("Unknown fieldName");
            }
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateBreakTest.java,v $
-* $Date: 2005/03/30 17:19:32 $
-* $Revision: 1.13 $
+* $Date: 2006/04/05 22:12:44 $
+* $Revision: 1.14 $
 *
 *******************************************************************************
 */
@ -310,7 +310,7 @@ abstract public class GenerateBreakTest implements UCD_Types {
        //printLine(out, samples[LB_ZW], "", samples[LB_CL]);
        //printLine(out, samples[LB_ZW], " ", samples[LB_CL]);

-        UnicodeDataFile fc = UnicodeDataFile.openHTMLAndWriteHeader("auxiliary\\", fileName + "BreakTest");
+        UnicodeDataFile fc = UnicodeDataFile.openHTMLAndWriteHeader("DerivedData\\auxiliary\\", fileName + "BreakTest");
        PrintWriter out = fc.out;

 /*        PrintWriter out = Utility.openPrintWriter("auxiliary\\" 
@ -354,7 +354,7 @@ abstract public class GenerateBreakTest implements UCD_Types {
        String[] testCase = new String[50];
        // do main test

-        UnicodeDataFile fc = UnicodeDataFile.openAndWriteHeader("auxiliary\\", fileName + "BreakTest" 
+        UnicodeDataFile fc = UnicodeDataFile.openAndWriteHeader("DerivedData\\auxiliary\\", fileName + "BreakTest" 
                + (shortVersion ? "_SHORT" : ""));
        PrintWriter out = fc.out;
 /*        PrintWriter out = Utility.openPrintWriter("TR29\\" + fileName + "BreakTest" 
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $
-* $Date: 2005/03/26 05:40:04 $
-* $Revision: 1.17 $
+* $Date: 2006/04/05 22:12:45 $
+* $Revision: 1.18 $
 *
 *******************************************************************************
 */
@ -585,8 +585,8 @@ public class GenerateCaseFolding implements UCD_Types {
        out.println("# SpecialCasing" + UnicodeDataFile.getFileSuffix(false));
        out.println(UnicodeDataFile.generateDateLine());
        out.println("#");
-        Utility.appendFile("SpecialCasingHeader.txt", Utility.UTF8, out);
 */        
+        //Utility.appendFile("com/ibm/text/UCD/SpecialCasingHeader.txt", Utility.UTF8, out);

        Iterator it = sorted.keySet().iterator();
        int lastOrder = -1;
@ -609,7 +609,7 @@ public class GenerateCaseFolding implements UCD_Types {
                case 3: out.println("# Ligatures"); break;
                case 4: skipLine = true; break;
                case 5: out.println("# No corresponding uppercase precomposed character"); break;
-                case 6: Utility.appendFile("SpecialCasingIota.txt", Utility.UTF8, out); break;
+                case 6: Utility.appendFile("com/ibm/text/UCD/SpecialCasingIota.txt", Utility.UTF8, out); break;
                case 7: out.println("# Some characters with YPOGEGRAMMENI also have no corresponding titlecases"); break;
                case 8: skipLine = true; break;
                }
@ -617,7 +617,7 @@ public class GenerateCaseFolding implements UCD_Types {
            }
            out.println(line);
        }
-        //Utility.appendFile("SpecialCasingFooter.txt", Utility.UTF8, out);
+        Utility.appendFile("com/ibm/text/UCD/SpecialCasingFooter.txt", Utility.UTF8, out);
        udf.close();
        //Utility.renameIdentical(mostRecent, Utility.getOutputName(newFile), batName[0]);
    }
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateConfusables.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateConfusables.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateConfusables.java,v $
-* $Date: 2005/11/19 05:39:39 $
-* $Revision: 1.9 $
+* $Date: 2006/04/05 22:12:45 $
+* $Revision: 1.10 $
 *
 *******************************************************************************
 */
@ -38,6 +38,7 @@ import com.ibm.icu.dev.demo.translit.InfoDialog;
 import com.ibm.icu.dev.test.util.ArrayComparator;
 import com.ibm.icu.dev.test.util.BagFormatter;
 import com.ibm.icu.dev.test.util.ICUPropertyFactory;
+import com.ibm.icu.dev.test.util.TransliteratorUtilities;
 import com.ibm.icu.dev.test.util.UnicodeLabel;
 import com.ibm.icu.dev.test.util.UnicodeMap;
 import com.ibm.icu.dev.test.util.UnicodeProperty;
@ -423,15 +424,15 @@ public class GenerateConfusables {
 			BagFormatter bf = new BagFormatter();
 			bf.setUnicodePropertyFactory(ups);
 			bf.setLabelSource(null);
-			bf.setShowLiteral(bf.toHTMLControl);
+			bf.setShowLiteral(TransliteratorUtilities.toHTMLControl);
 			bf.setMergeRanges(true);

 			PrintWriter out = BagFormatter.openUTF8Writer(outdir, "review.txt");
 			//reviews.putAll(UNASSIGNED, "");
 			out.print("\uFEFF");
 			out.println("# Review List for IDN");
-			out.println("# $Revision: 1.9 $");
-			out.println("# $Date: 2005/11/19 05:39:39 $");
+			out.println("# $Revision: 1.10 $");
+			out.println("# $Date: 2006/04/05 22:12:45 $");
 			out.println("");

 			UnicodeSet fullSet = reviews.getSet("").complement();
@ -478,7 +479,7 @@ public class GenerateConfusables {
 			BagFormatter bf = new BagFormatter();
 			bf.setUnicodePropertyFactory(ups);
 			bf.setLabelSource(null);
-			bf.setShowLiteral(bf.toHTMLControl);
+			bf.setShowLiteral(TransliteratorUtilities.toHTMLControl);
 			bf.setMergeRanges(true);
 			
 			UnicodeSet letters = new UnicodeSet("[[:Alphabetic:][:Mark:][:Nd:]]");
@ -486,8 +487,8 @@ public class GenerateConfusables {
 			PrintWriter out = BagFormatter.openUTF8Writer(outdir, "idnchars.txt");

 			out.println("# Recommended Identifier Profiles for IDN");
-			out.println("# $Revision: 1.9 $");
-			out.println("# $Date: 2005/11/19 05:39:39 $");
+			out.println("# $Revision: 1.10 $");
+			out.println("# $Date: 2006/04/05 22:12:45 $");

 			out.println("");
 			out.println("# Output Characters");
@ -549,15 +550,15 @@ public class GenerateConfusables {
 			BagFormatter bf = new BagFormatter();
 			bf.setUnicodePropertyFactory(ups);
 			bf.setLabelSource(null);
-			bf.setShowLiteral(bf.toHTMLControl);
+			bf.setShowLiteral(TransliteratorUtilities.toHTMLControl);
 			bf.setMergeRanges(true);

 			PrintWriter out = BagFormatter.openUTF8Writer(outdir,
 					"xidmodifications.txt");

 			out.println("# Security Profile for General Identifiers");
-			out.println("# $Revision: 1.9 $");
-			out.println("# $Date: 2005/11/19 05:39:39 $");
+			out.println("# $Revision: 1.10 $");
+			out.println("# $Date: 2006/04/05 22:12:45 $");
 			out.println("");

 			out.println("# Characters restricted");
@ -613,8 +614,8 @@ public class GenerateConfusables {
 			//someRemovals = removals;
 			out = BagFormatter.openUTF8Writer(outdir, "draft-restrictions.txt");
 			out.println("# Characters restricted in domain names");
-			out.println("# $Revision: 1.9 $");
-			out.println("# $Date: 2005/11/19 05:39:39 $");
+			out.println("# $Revision: 1.10 $");
+			out.println("# $Date: 2006/04/05 22:12:45 $");
 			out.println("#");
 			out.println("# This file contains a draft list of characters for use in");
 			out.println("#     UTR #36: Unicode Security Considerations");
@ -1148,8 +1149,8 @@ public class GenerateConfusables {
 		public void writeSource(String directory, String filename) throws IOException {
 			PrintWriter out = BagFormatter.openUTF8Writer(directory, filename);
 			out.println("# Source File for IDN Confusables");
-			out.println("# $Revision: 1.9 $");
-			out.println("# $Date: 2005/11/19 05:39:39 $");
+			out.println("# $Revision: 1.10 $");
+			out.println("# $Date: 2006/04/05 22:12:45 $");
 			out.println("");
 			dataMixedAnycase.writeSource(out);
 			out.close();
@ -1159,8 +1160,8 @@ public class GenerateConfusables {
 			PrintWriter out = BagFormatter.openUTF8Writer(directory, filename);
 			out.print('\uFEFF');
 			out.println("# Recommended confusable mapping for IDN");
-			out.println("# $Revision: 1.9 $");
-			out.println("# $Date: 2005/11/19 05:39:39 $");
+			out.println("# $Revision: 1.10 $");
+			out.println("# $Date: 2006/04/05 22:12:45 $");
 			out.println("");

 			if (appendFile) {
@ -1347,7 +1348,7 @@ public class GenerateConfusables {
 			}				
 		}

-		static class MyCollectionFilter implements CollectionUtilities.Filter {
+		static class MyCollectionFilter implements CollectionUtilities.ObjectMatcher {
 			UnicodeSet outputAllowed;
 			int minLength;
 			public boolean matches(Object o) {
@ -1368,8 +1369,8 @@ public class GenerateConfusables {
 			UnicodeSet representable = new UnicodeSet();
 			out.print('\uFEFF');
 			out.println("# Summary: Recommended confusable mapping for IDN");
-			out.println("# $Revision: 1.9 $");
-			out.println("# $Date: 2005/11/19 05:39:39 $");
+			out.println("# $Revision: 1.10 $");
+			out.println("# $Date: 2006/04/05 22:12:45 $");
 			out.println("");
 			MyEquivalenceClass data = dataMixedAnycase;
 			Set items = data.getOrderedExplicitItems();
@ -1446,7 +1447,7 @@ public class GenerateConfusables {
 				representable.removeAll(script);
 				BagFormatter bf = new BagFormatter();
 				bf.setValueSource(ups.getProperty("script"));
-				bf.setShowLiteral(bf.toHTMLControl);
+				bf.setShowLiteral(TransliteratorUtilities.toHTMLControl);
 				bf.showSetNames(out, representable);
 			}
 			out.close();
@ -1493,8 +1494,8 @@ public class GenerateConfusables {
 			PrintWriter out = BagFormatter.openUTF8Writer(outdir, filename);
 			out.print('\uFEFF');
 			out.println("# Summary: Whole-Script Confusables");
-			out.println("# $Revision: 1.9 $");
-			out.println("# $Date: 2005/11/19 05:39:39 $");
+			out.println("# $Revision: 1.10 $");
+			out.println("# $Date: 2006/04/05 22:12:45 $");
 			out.println("# This data is used for determining whether a strings is a");
 			out.println("# whole-script or mixed-script confusable.");
 			out.println("# The mappings here ignore common and inherited script characters,");
@ -1539,7 +1540,7 @@ public class GenerateConfusables {
 				script_set[i] = new UnicodeSet("[:script=" + UScript.getName(i) + ":]"); // ugly hack
 			}
 			bf.setValueSource(ups.getProperty("script"));
-			bf.setShowLiteral(bf.toHTMLControl);
+			bf.setShowLiteral(TransliteratorUtilities.toHTMLControl);
 			bf.setLabelSource(UnicodeLabel.NULL);
 		}
 		WholeScript(UnicodeSet filterSet, String label) {
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
-* $Date: 2005/10/11 19:39:15 $
-* $Revision: 1.39 $
+* $Date: 2006/04/05 22:12:44 $
+* $Revision: 1.40 $
 *
 *******************************************************************************
 */
@ -756,41 +756,41 @@ public class GenerateData implements UCD_Types {

        //log.println("# " + fileName + UnicodeDataFile.getFileSuffix(false));
        //log.println(UnicodeDataFile.generateDateLine());
-        log.println("#");
-        log.println("# Normalization Test Suite");
-        log.println("# Format:");
-        log.println("#");
-        log.println("#   Columns (c1, c2,...) are separated by semicolons");
-        log.println("#   Comments are indicated with hash marks");
-        log.println("#");
-        log.println("# CONFORMANCE:");
-        log.println("# 1. The following invariants must be true for all conformant implementations");
-        log.println("#");
-        log.println("#    NFC");
-        log.println("#      c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3)");
-        log.println("#      c4 ==  NFC(c4) ==  NFC(c5)");
-        log.println("#");
-        log.println("#    NFD");
-        log.println("#      c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3)");
-        log.println("#      c5 ==  NFD(c4) ==  NFD(c5)");
-        log.println("#");
-        log.println("#    NFKC");
-        log.println("#      c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)");
-        log.println("#");
-        log.println("#    NFKD");
-        log.println("#      c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)");
-        log.println("#");
-        log.println("# 2. For every code point X assigned in this version of Unicode that is not specifically");
-        log.println("#    listed in Part 1, the following invariants must be true for all conformant");
-        log.println("#    implementations:");
-        log.println("#");
-        log.println("#      X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)");
+//        log.println("#");
+//        log.println("# Normalization Test Suite");
+//        log.println("# Format:");
+//        log.println("#");
+//        log.println("#   Columns (c1, c2,...) are separated by semicolons");
+//        log.println("#   Comments are indicated with hash marks");
+//        log.println("#");
+//        log.println("# CONFORMANCE:");
+//        log.println("# 1. The following invariants must be true for all conformant implementations");
+//        log.println("#");
+//        log.println("#    NFC");
+//        log.println("#      c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3)");
+//        log.println("#      c4 ==  NFC(c4) ==  NFC(c5)");
+//        log.println("#");
+//        log.println("#    NFD");
+//        log.println("#      c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3)");
+//        log.println("#      c5 ==  NFD(c4) ==  NFD(c5)");
+//        log.println("#");
+//        log.println("#    NFKC");
+//        log.println("#      c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)");
+//        log.println("#");
+//        log.println("#    NFKD");
+//        log.println("#      c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)");
+//        log.println("#");
+//        log.println("# 2. For every code point X assigned in this version of Unicode that is not specifically");
+//        log.println("#    listed in Part 1, the following invariants must be true for all conformant");
+//        log.println("#    implementations:");
+//        log.println("#");
+//        log.println("#      X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)");

        System.out.println("Writing Part 1");

-        log.println("#");
-        log.println("@Part0 # Specific cases");
-        log.println("#");
+//        log.println("#");
+//        log.println("@Part0 # Specific cases");
+//        log.println("#");

        for (int j = 0; j < testSuiteCases.length; ++j) {
            writeLine(testSuiteCases[j], log, false);
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateNamedSequences.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateNamedSequences.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateNamedSequences.java,v $
-* $Date: 2005/05/27 21:40:51 $
-* $Revision: 1.1 $
+* $Date: 2006/04/05 22:12:45 $
+* $Revision: 1.2 $
 *
 *******************************************************************************
 */
@ -117,7 +117,7 @@ public final class GenerateNamedSequences implements UCD_Types {
            "@date@", Default.getDate(),
            "@table@", table};
                
-        Utility.appendFile("NamedSequences-Template.html", Utility.UTF8, out, replacementList);
+        Utility.appendFile("com/ibm/text/UCD/NamedSequences-Template.html", Utility.UTF8, out, replacementList);
     
        out.close();
        //Utility.renameIdentical(mostRecent, Utility.getOutputName(filename), batName[0]);
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java,v $
-* $Date: 2005/10/11 19:39:15 $
-* $Revision: 1.6 $
+* $Date: 2006/04/05 22:12:44 $
+* $Revision: 1.7 $
 *
 *******************************************************************************
 */
@ -104,18 +104,31 @@ public final class GenerateStandardizedVariants implements UCD_Types {
        
        String version = Default.ucd().getVersion();
        int lastDot = version.lastIndexOf('.');
-        String updateDirectory = version.substring(0,lastDot) + "-Update";
-        int updateV = version.charAt(version.length()-1) - '0';
-        if (updateV != 0) updateDirectory += (char)('1' + updateV);
-        if (DEBUG) System.out.println("updateDirectory: " + updateDirectory);
+        String updateDirectory;
+        String partialFilename;
+        if (version.compareTo("4.1.0") < 0) {
+        	updateDirectory = version.substring(0,lastDot) + "-Update";
+            int updateV = version.charAt(version.length()-1) - '0';
+            if (updateV != 0) updateDirectory += (char)('1' + updateV);
+            if (DEBUG) System.out.println("updateDirectory: " + updateDirectory);
+            partialFilename = "StandardizedVariants-" + Default.ucd().getVersion();
+        } else if (version.compareTo("4.1.0") == 0) {			
+        	updateDirectory = version.substring(0,lastDot) + "/ucd";
+            partialFilename = "StandardizedVariants";
+        } else {			
+        	updateDirectory = version + "/ucd";
+            partialFilename = "StandardizedVariants";
+        }
+
        
        String[] replacementList = {
            "@revision@", Default.ucd().getVersion(),
            "@updateDirectory@", updateDirectory,
+            "@filename@", partialFilename,
            "@date@", Default.getDate(),
            "@table@", table};
                
-        Utility.appendFile("StandardizedVariants-Template.html", Utility.UTF8, out, replacementList);
+        Utility.appendFile("com/ibm/text/UCD/StandardizedVariants-Template.html", Utility.UTF8, out, replacementList);
     
        out.close();
        //Utility.renameIdentical(mostRecent, Utility.getOutputName(filename), batName[0]);
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateStringPrep.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateStringPrep.java
@ -15,6 +15,7 @@ import java.util.Set;
 import java.util.TreeSet;

 import com.ibm.icu.dev.test.util.BagFormatter;
+import com.ibm.icu.dev.test.util.TransliteratorUtilities;
 import com.ibm.icu.dev.test.util.UnicodeLabel;
 import com.ibm.icu.dev.test.util.UnicodeMap;
 import com.ibm.icu.dev.test.util.UnicodeMap.Composer;
@ -91,7 +92,7 @@ class GenerateStringPrep implements UCD_Types {

 	void genStringPrep() throws IOException {
 		//showScriptToBlock();
-		bf.setShowLiteral(BagFormatter.toHTMLControl);
+		bf.setShowLiteral(TransliteratorUtilities.toHTMLControl);
 		bf.setUnicodePropertyFactory(ups);
 		//bf.setValueSource(UnicodeLabel.NULL);
 		if (false) {
@ -508,7 +509,7 @@ class GenerateStringPrep implements UCD_Types {
 		}	
 		return "<span title='" + ucd.getCodeAndName(string) + "'>"
 		+ pad1
-		+ BagFormatter.toHTMLControl.transliterate(string)
+		+ TransliteratorUtilities.toHTMLControl.transliterate(string)
 		+ pad
 		+ "</span> ";
 	}
--- a/tools/unicodetools/com/ibm/text/UCD/IDNTester.java
+++ b/tools/unicodetools/com/ibm/text/UCD/IDNTester.java
@ -0,0 +1,142 @@
+package com.ibm.text.UCD;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+
+import com.ibm.icu.dev.test.util.BagFormatter;
+import com.ibm.icu.impl.PrettyPrinter;
+import com.ibm.icu.text.IDNA;
+import com.ibm.icu.text.StringPrepParseException;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.text.utility.Utility;
+
+public class IDNTester {
+	static StringBuffer inbuffer = new StringBuffer();
+	static StringBuffer intermediate, outbuffer;
+	static final int OK = 0, DELETED = 1, ILLEGAL = 2, REMAPPED = 3, IDNA_TYPE_LIMIT = 4;
+	static UnicodeSet IDNInputOnly = new UnicodeSet();
+	static UnicodeSet IDNOutput = new UnicodeSet();
+	static boolean initialized = false;
+	static UnicodeSet IDInputOnly32 = new UnicodeSet();
+	static UnicodeSet IDOutput32 = new UnicodeSet();
+	static UnicodeSet IDInputOnly50 = new UnicodeSet();
+	static UnicodeSet IDOutput50 = new UnicodeSet();
+	static PrettyPrinter pp = new PrettyPrinter();
+	static PrintWriter pw;
+	
+	public static void main(String[] args) throws IOException {
+		initialize();
+		pw = BagFormatter.openUTF8Writer(Utility.GEN_DIR, "idnCount.html");
+		pw.println("<html><body>");
+		showSet("IDN InputOnly: ", IDNInputOnly);
+		showSet("IDN Output: ", IDNOutput);
+		showSet("ID InputOnly, U3.2: ", IDInputOnly32);
+		showSet("ID Output, U3.2: ", IDOutput32);
+		
+		showSet("IDN Output - ID Output, U3.2: ", new UnicodeSet(IDNOutput).removeAll(IDOutput32));
+		showSet("IDN Output & ID Output, U3.2: ", new UnicodeSet(IDNOutput).retainAll(IDOutput32));
+		showSet("ID Output - IDN Output, U3.2: ", new UnicodeSet(IDOutput32).removeAll(IDNOutput));
+		
+		showSet("ID InputOnly, U5.0: ", IDInputOnly50);
+		showSet("ID Output, U5.0: ", IDOutput50);
+		showSet("ID Output, U5.0 - U3.2: ", new UnicodeSet(IDOutput50).removeAll(IDOutput32));
+		
+		pw.println("</body></html>");
+
+		pw.close();
+	}
+	
+	public static void showSet(String title, UnicodeSet set) {
+		pw.println("<h2>" + title + set.size() + "</h2>" + "<p>" + pp.toPattern(set) + "</p>");
+		pw.println();
+	}
+	
+	static UnicodeSet getIDNInput() {
+		if (!initialized) initialize();
+		return IDNInputOnly;
+	}
+
+	static UnicodeSet getIDNOutput() {
+		if (!initialized) initialize();
+		return IDNInputOnly;
+	}
+
+	private static void initialize() {
+		UnicodeSet oddballs = new UnicodeSet("[\u034F \u180B-\u180D \uFE00-\uFE0F _]");
+		UCD U32 = UCD.make("3.2.0");
+		Normalizer nfkc32 = new Normalizer(Normalizer.NFKC, "3.2.0");
+		UCDProperty xid32 = DerivedProperty.make(UCD.Mod_ID_Continue_NO_Cf,U32);
+		UnicodeSet IDInput32 = xid32.getSet();
+		IDInput32.add('-').removeAll(oddballs);
+		
+		UCD U50 = UCD.make("5.0.0");
+		Normalizer nfkc50 = new Normalizer(Normalizer.NFKC, "5.0.0");
+		UCDProperty xid50 = DerivedProperty.make(UCD.Mod_ID_Continue_NO_Cf,U50);
+		UnicodeSet IDInput50 = xid50.getSet();
+		IDInput50.add('-').removeAll(oddballs);
+		
+		for (int i = 0; i < 0x10FFFF; ++i) {
+			if ((i & 0xFFF) == 0) {
+				System.out.println(i);
+				System.out.flush();
+			}
+			int type = getIDNAType(i);
+			if (type == OK) {
+				IDNOutput.add(i);
+			} else if (type != ILLEGAL) {
+				IDNInputOnly.add(i);
+			}
+			if (IDInput32.contains(i)) {
+				splitSet(IDInputOnly32, IDOutput32, U32, nfkc32, i);
+			}
+			if (IDInput50.contains(i)) {
+				splitSet(IDInputOnly50, IDOutput50, U50, nfkc50, i);
+			}
+		}
+		initialized = true;
+	}
+
+	private static void splitSet(UnicodeSet inputOnlySet, UnicodeSet outputSet, UCD ucd, Normalizer nfkc, int i) {
+		if (i < 0x7F) {
+			outputSet.add(i);
+			return;
+		}
+		String v = UTF16.valueOf(i);
+		String s = ucd.getCase(i, UCD.FULL, UCD.FOLD);
+		if (s.equals(v)) {
+			s = nfkc.normalize(s);
+			if (s.equals(v)) {
+				s = ucd.getCase(s, UCD.FULL, UCD.FOLD);
+				if (s.equals(v)) {
+					outputSet.add(i);
+					return;
+				}
+			}
+		}
+		inputOnlySet.add(i);
+	}
+
+	static public int getIDNAType(int cp) {
+		if (cp == '-') return OK;
+		inbuffer.setLength(0);
+		UTF16.append(inbuffer, cp);
+		try {
+			intermediate = IDNA.convertToASCII(inbuffer,
+					IDNA.DEFAULT); // USE_STD3_RULES
+			if (intermediate.length() == 0)
+				return DELETED;
+			outbuffer = IDNA.convertToUnicode(intermediate,
+					IDNA.USE_STD3_RULES);
+		} catch (StringPrepParseException e) {
+			return ILLEGAL;
+		} catch (Exception e) {
+			System.out.println("Failure at: " + Utility.hex(cp));
+			return ILLEGAL;
+		}
+		if (!TestData.equals(inbuffer, outbuffer))
+			return REMAPPED;
+		return OK;
+	}
+
+}
--- a/tools/unicodetools/com/ibm/text/UCD/InvariantTest.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/InvariantTest.txt
@ -0,0 +1,75 @@
+Let $letter = [$gc:Lu $gc:Ll $gc:Lt $gc:Lo $gc:Lm];
+Let $number = [$gc:Nd $gc:Nl $gc:No]
+Let $mark = [$gc:mn $gc:me $gc:mc]
+Let $LMN = [$letter $number $mark]
+Let $gcAllPunctuation = [$gc:Open_Punctuation $gc:Close_Punctuation $gc:Dash_Punctuation $gc:Connector_Punctuation $gc:Other_Punctuation $gc:Initial_Punctuation $gc:Final_Punctuation]
+Let $gcAllSymbols = [$gc:Currency_Symbol $gc:Modifier_Symbol $gc:Math_Symbol $gc:Other_Symbol]
+Let $nfc = [^$NFC_Quick_Check:No]
+
+Show $nfc
+
+Show [$alphabetic - [$mark $letter $number]]
+
+
+Let $oldCJK = [\u1100-\u11FF \u3040-\u30FF \u3130-\u318F \u31F0-\u31FF \u3400-\u4DBF \u4E00-\u9FFF \uAC00-\uD7AF \uF900-\uFAFF \uFF65-\uFFDC]
+
+Show [$oldCJK & $gc:cn]
+
+Let $fixedOld = [$oldCJK-$gc:cn]
+
+
+#List the non-alphabetic old items
+#Show [$oldCJK-$gc:cn-$alphabetic]
+
+#Check for differences
+#Test $fixedOld = $trialNew
+
+#ShowEach $mark
+
+Let $uax29_outliers = [\u3031-\u3035 \u309B-\u309C \u30A0 \u30FC \uFF70 \uFF9E-\uFF9F]
+Let $other_outliers = [\u3099-\u309A \u3006 \u303C \u302A-\u302E \u302F \U000E0100-\U000E01EF]
+
+# ==========================================
+
+# Outliers from UAX29
+Show $uax29_outliers
+
+# Additional outliers
+Show $other_outliers
+
+# Take the 5 CJK scripts
+Let $trialScripts = [$script:hani $script:hang $script:kana $script:hira $script:bopo]
+
+# Remove the non-LMN
+Let $trialNewBase = [$trialScripts & $LMN]
+
+# Add the outliers
+Let $trialNew = [$trialNewBase $uax29_outliers $other_outliers]
+
+# Show our result
+Show $trialNew
+
+# As a double-check, show script characters we're tossing
+Show [$trialScripts - $trialNew]
+
+# Compare snippets stuff
+Let $guessClose = [$lb:QU $lb:Close_Punctuation]
+Let $__closing_punc = ["')>\]`\}\u00AB\u00BB\u2018\u2019\u201C\u201D\u2039\u203A\u207E\u208E\u27E7\u27E9\u27EB\u2984\u2986\u2988\u298A\u298C\u298E\u2990\u2992\u2994\u2996\u2998\u29D9\u29DB\u29FD\u3009\u300B\u300D\u300F\u3011\u3015\u3017\u3019\u301B\u301E\u301F\uFD3F\uFE42\uFE44\uFE5A\uFE5C\uFF02\uFF07\uFF09\uFF3D\uFF5D\uFF63]
+
+$guessClose = $__closing_punc
+
+Let $guessClose = [$gc:pf $gc:pe $gc:pi]
+$guessClose = $__closing_punc
+
+Let $guessTerm = [$sb:aterm $sb:sterm]
+$guessTerm = [? ? !?? ? ? ? ? ??? ? ? ? ? ? ? ? .?? … ? ? ? ? ? ? ? ?? ? ? ? ? ? ? ?]
+
+Let $__issymotherr = [\u00A6\u00A7\u06FD\u06FE\u0F01-\u0F03\u0F13-\u0F17\u0F1A-\u0F1F\u0FBE-\u0FC5\u0FC7-\u0FCC\u2100\u2101\u2104-\u2106\u2108\u2109\u2117\u2118\u211E-\u2121\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u2400-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u2613\u2619-\u266E\u2670\u2671\u2701-\u2704\u2706-\u2709\u270C-\u2727\u2729-\u274B\u274F-\u2752\u2758-\u275E\u2761-\u2794\u2798-\u27AF\u27B1-\u27BE\u2800-\u28FF\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3012\u3013\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u3200-\u321C\u322A-\u3243\u3260-\u327B\u328A-\u32B0\u32C0-\u32CB\u32D0-\u32FE\u3300-\u3376\u337B-\u33DD\u33E0-\u33FE\uA490-\uA4A1\uA4A4-\uA4B3\uA4B5-\uA4C0\uA4C2-\uA4C4\uFFED\uFFEE\uFFFC\uFFFD]
+Let $__issymothers = [\u00B6\u0482\u06E9\u09FA\u0B70\u0F34\u0F36\u0F38\u0FCF\u2114\u2123\u2125\u2127\u2129\u212E\u2132\u213A\u21D3\u220E\u2617\u274D\u2756\u3004\u3020\u327F\uA4C6\uFFE4\uFFE8]
+
+Let $symOther = [$__issymotherr $__issymothers]
+
+$symOther = $gcAllSymbols
+
+
+[$symOther & $nfc] = [$gcAllSymbols & $nfc]
--- a/tools/unicodetools/com/ibm/text/UCD/Main.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Main.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
-* $Date: 2005/10/11 19:39:15 $
-* $Revision: 1.36 $
+* $Date: 2006/04/05 22:12:44 $
+* $Revision: 1.37 $
 *
 *******************************************************************************
 */
@ -160,8 +160,9 @@ public final class Main implements UCD_Types {
                //else if (arg.equalsIgnoreCase("TrailingZeros")) GenerateData.genTrailingZeros();
                else if (arg.equalsIgnoreCase("GenerateThaiBreaks")) GenerateThaiBreaks.main(null);
                
-    			else if (arg.equalsIgnoreCase("TestData")) TestData.main(new String[]{args[++i]});
-                
+                else if (arg.equalsIgnoreCase("TestData")) TestData.main(new String[]{args[++i]});
+                else if (arg.equalsIgnoreCase("MakeUnicodeFiles")) MakeUnicodeFiles.main(new String[]{});
+
                //else if (arg.equalsIgnoreCase("checkAgainstUInfo")) checkAgainstUInfo();
                else if (arg.equalsIgnoreCase("checkScripts")) VerifyUCD.checkScripts();
                else if (arg.equalsIgnoreCase("IdentifierTest")) VerifyUCD.IdentifierTest();
--- a/tools/unicodetools/com/ibm/text/UCD/MakeNamesChart.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MakeNamesChart.java
@ -16,6 +16,7 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import com.ibm.icu.dev.test.util.BagFormatter;
+import com.ibm.icu.dev.test.util.TransliteratorUtilities;
 import com.ibm.icu.dev.test.util.UnicodeMap;
 import com.ibm.icu.dev.test.util.UnicodePropertySource;
 import com.ibm.icu.text.Collator;
@ -71,7 +72,7 @@ public class MakeNamesChart {
 			System.out.println("file: " + chartPrefix + fileName);
 			PrintWriter out = BagFormatter.openUTF8Writer("C:/DATA/GEN/charts/namelist/", chartPrefix + fileName);
 			out.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'><title>" +
-					BagFormatter.toHTML.transliterate(getHeading(lineParts[2])) +
+					TransliteratorUtilities.toHTML.transliterate(getHeading(lineParts[2])) +
 					"</title><link rel='stylesheet' type='text/css' href='namelist.css'>" +
 					"<base target='names'></head><body>");

@ -117,7 +118,7 @@ public class MakeNamesChart {
 					String hexcp = Utility.hex(it.codepoint, 4);
 					String title = "";
 					String name = Default.ucd().getName(it.codepoint);
-					if (name != null) title = " title='" + BagFormatter.toHTML.transliterate(name.toLowerCase()) + "'";
+					if (name != null) title = " title='" + TransliteratorUtilities.toHTML.transliterate(name.toLowerCase()) + "'";
 					out.println("<td class='" + tdclass + "'"
 							+ title
 							+ ">\u00A0"
@ -347,7 +348,7 @@ public class MakeNamesChart {
 	static Matcher escapeMatch = Pattern.compile("\\&[A-Z][a-z]*\\;").matcher("");
 	
 	private static String showTextConvertingHex(String body, boolean addCharToHex) {
-		body = BagFormatter.toHTML.transliterate(body);
+		body = TransliteratorUtilities.toHTML.transliterate(body);
 		if (addCharToHex) {
 			int position = 0;
 			while (position < body.length()) {
@ -411,7 +412,7 @@ public class MakeNamesChart {
 		if (type == UCD.Cn || type == UCD.Co || type == UCD.Cs) {
 			return "\u2588";
 		}
-		String result = BagFormatter.toHTML.transliterate(UTF16.valueOf(cp));
+		String result = TransliteratorUtilities.toHTML.transliterate(UTF16.valueOf(cp));
 		if (type == UCD.Me || type == UCD.Mn) {
 			result = "\u25CC" + result;
 		} else if (rtl.contains(cp)) {
--- a/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.java
@ -68,6 +68,7 @@ public class MakeUnicodeFiles {
    
    public static void main(String[] args) throws IOException {
        generateFile();
+        System.out.println("DONE");
    }

    static class Format {
@ -294,7 +295,7 @@ public class MakeUnicodeFiles {
            */
            try {
                BufferedReader br =
-                    Utility.openReadFile("MakeUnicodeFiles.txt", Utility.UTF8);
+                    Utility.openReadFile("com/ibm/text/UCD/MakeUnicodeFiles.txt", Utility.UTF8);
                String key = null;
                String file = null, property = null, value = "", comments = "";
                while (true) {
@ -594,6 +595,7 @@ public class MakeUnicodeFiles {
        pw.println(SEPARATOR);
        pw.println("# Total:    " + count);
        pw.println();
+        pw.println("# EOF");
        udf.close();       
    }
    
@ -710,6 +712,8 @@ public class MakeUnicodeFiles {
                pw.println(line);
            }
        }
+        pw.println();
+        pw.println("# EOF");
        udf.close();
    }
    
@ -769,10 +773,16 @@ public class MakeUnicodeFiles {
                 ps.valueStyle = "none";
             }

-             if (ps.noLabel) bf.setLabelSource(null);
-             if (ps.nameStyle.equals("none")) bf.setPropName(null);
-             else if (ps.nameStyle.equals("short")) bf.setPropName(prop.getFirstNameAlias());
-             else bf.setPropName(name);
+             if (ps.noLabel) {
+            	 bf.setLabelSource(null);
+             }
+             if (ps.nameStyle.equals("none")) {
+            	 bf.setPropName(null);
+             } else if (ps.nameStyle.equals("short")) {
+            	 bf.setPropName(prop.getFirstNameAlias());
+             } else {
+            	 bf.setPropName(name);
+             }
            
             if (ps.interleaveValues) {
                writeInterleavedValues(pw, bf, prop, ps);
@ -784,6 +794,8 @@ public class MakeUnicodeFiles {
                 writeEnumeratedValues(pw, bf, unassigned, prop, ps);
             }
         }
+         pw.println();
+         pw.println("# EOF");
         udf.close();
     }
     
@ -809,6 +821,15 @@ public class MakeUnicodeFiles {
             temp2.addAll(aliases);
             aliases = temp2;
         }
+         System.out.println("Check: " + prop.getValue(0xE000));
+         String missing = ps.skipUnassigned != null ? ps.skipUnassigned : ps.skipValue;
+         if (missing != null && !missing.equals("False")) {
+        	 pw.println();
+        	 String propName = bf.getPropName();
+        	 if (propName == null) propName = "";
+        	 else if (propName.length() != 0) propName = propName + "; ";
+        	 pw.println("# @missing: 0000..10FFFF; " + propName + missing);
+         }
         for (Iterator it = aliases.iterator(); it.hasNext();) {
             String value = (String)it.next();
             if (DEBUG) System.out.println("Getting value " + value);
@ -891,6 +912,7 @@ public class MakeUnicodeFiles {
             pw.println();
             //if (s.size() != 0) 
             bf.showSetNames(pw, s);
+             //System.out.println(bf.showSetNames(s));
        }
        
    }
--- a/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.txt
@ -1,6 +1,6 @@
-Generate: NamedSequences
+Generate: .*
 DeltaVersion: 14
-CopyrightYear: 2005
+CopyrightYear: 2006

 File: auxiliary/GraphemeBreakProperty
 Property: Grapheme_Cluster_Break
@ -65,7 +65,10 @@ Value:	4.0
 # Newly assigned in Unicode 4.0.0 (April, 2003)

 Value:	4.1
-# Newly assigned in Unicode 4.1.0 (XXX, 2005)
+# Newly assigned in Unicode 4.1.0 (March, 2005)
+
+Value:	5.0
+# Newly assigned in Unicode 5.0.0 (XXX, 2006)

 File:	extracted/DerivedBidiClass
 Property:	Bidi_Class
@ -158,6 +161,10 @@ Property:	Grapheme_Base
 #  Note: depending on an application's interpretation of Co (private use),
 #  they may be either in Grapheme_Base, or in Grapheme_Extend, or in neither.

+Property:	Grapheme_Link
+# Derived Property: Grapheme_Link (deprecated)
+#  Generated from: Canonical_Combining_Class=Virama
+#  Use Canonical_Combining_Class=Virama directly instead

 File:	extracted/DerivedDecompositionType
 Property:	Decomposition_Type
@ -316,8 +323,6 @@ Property:	Noncharacter_Code_Point

 Property:	Other_Grapheme_Extend

-Property:	Grapheme_Link
-
 Property:	IDS_Binary_Operator

 Property:	IDS_Trinary_Operator
@ -353,7 +358,7 @@ Property: SPECIAL

 File:	Scripts
 Property:	Script
-Format:	nameStyle=none skipUnassigned=Common
+Format:	nameStyle=none skipValue=Unknown

 File:	SpecialCasing
 Property: SPECIAL
--- a/tools/unicodetools/com/ibm/text/UCD/NFSkippable.java
+++ b/tools/unicodetools/com/ibm/text/UCD/NFSkippable.java
@ -1,6 +1,10 @@
 package com.ibm.text.UCD;
+import com.ibm.icu.impl.CollectionUtilities;
+import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.util.ULocale;
+
 import java.util.BitSet;
 import com.ibm.text.utility.*;
 import java.io.PrintWriter;
@ -194,6 +198,7 @@ public final class NFSkippable extends UCDProperty {
        
        
        PrintWriter out = Utility.openPrintWriter("NFSafeSets.txt", Utility.UTF8_WINDOWS);
+        out.println(Utility.BOM);
        out.println("NFSafeSets");
        out.println("Version: " + Default.ucd().getVersion());
        out.println("Date: " + Default.getDate());
@ -212,6 +217,8 @@ public final class NFSkippable extends UCDProperty {
        out.close();
    }
    
+    static Collator UCA = Collator.getInstance(ULocale.ROOT);
+    
    static void generateSet(PrintWriter out, String label, UCDProperty up) {
        System.out.println("Generating: " + up.getName(NORMAL));
        UnicodeSet result = new UnicodeSet();
@ -227,11 +234,17 @@ public final class NFSkippable extends UCDProperty {
        out.println(label + " = new UnicodeSet(");
        writeStringInPieces(out, rSet, ", false);");
            
-        rSet = result.toPattern(false);
+        if (true) {
+        	rSet = result.toPattern(false);
+        } else {
+        	rSet = CollectionUtilities.prettyPrint(result, true, null, null, UCA, UCA);
+        }
+        
        out.println("/*Unicode: ");
        writeStringInPieces(out, rSet, "*/");
        out.println();
        out.flush();
+        System.out.println("Done");
    }
    
            /*
--- a/tools/unicodetools/com/ibm/text/UCD/QuickTest.java
+++ b/tools/unicodetools/com/ibm/text/UCD/QuickTest.java
@ -5,30 +5,42 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/QuickTest.java,v $
-* $Date: 2005/11/19 05:39:39 $
-* $Revision: 1.10 $
+* $Date: 2006/04/05 22:12:43 $
+* $Revision: 1.11 $
 *
 *******************************************************************************
 */

 package com.ibm.text.UCD;

-import java.util.*;
-import java.io.*;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.PrintStream;
+import java.io.PrintWriter;
+import java.io.StreamTokenizer;
+import java.io.StringReader;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.LinkedHashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.StringTokenizer;
+import java.util.TreeMap;
+import java.util.TreeSet;

 import com.ibm.icu.dev.demo.translit.CaseIterator;
 import com.ibm.icu.dev.test.util.BagFormatter;
 import com.ibm.icu.dev.test.util.UnicodeMap;
-import com.ibm.icu.dev.test.util.UnicodeProperty;
-import com.ibm.icu.dev.test.util.UnicodePropertySource;
-import com.ibm.icu.dev.test.util.UnicodeMap.MapIterator;
 import com.ibm.icu.impl.PrettyPrinter;
 import com.ibm.icu.impl.Utility;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.lang.UProperty;
 import com.ibm.icu.text.CanonicalIterator;
 import com.ibm.icu.text.Collator;
-import com.ibm.icu.text.Normalizer;
+//import com.ibm.icu.text.Normalizer;
 import com.ibm.icu.text.RuleBasedCollator;
 import com.ibm.icu.text.Transliterator;
 import com.ibm.icu.text.UTF16;
@ -36,27 +48,27 @@ import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.text.UnicodeSetIterator;
 import com.ibm.icu.util.ULocale;

-import com.ibm.text.utility.*;
-
 public class QuickTest implements UCD_Types {
 	public static void main(String[] args) throws IOException {
 		try {
-			
+
+			checkCase();
+			if (true) return;
+
+			getCaseFoldingUnstable();
+
 			getCaseLengths("Lower", UCD.LOWER);
 			getCaseLengths("Upper", UCD.UPPER);
 			getCaseLengths("Title", UCD.TITLE);
 			getCaseLengths("Fold", UCD.FOLD);

-			if (true) return;
 			checkUnicodeSet();
 			getLengths("NFC", Default.nfc());
 			getLengths("NFD", Default.nfd());
 			getLengths("NFKC", Default.nfkc());
 			getLengths("NFKD", Default.nfkd());

-			//getCaseFoldingUnstable();
 			
-			checkCase();
 			if (true) return;
 			tem();
 			//checkPrettyPrint();
@ -643,13 +655,13 @@ public class QuickTest implements UCD_Types {
 		if (!text.equals(x)) alpha.put("Lowercase", x);
 		String title = x = UCharacter.toTitleCase(ULocale.ENGLISH,text,null);
 		if (!text.equals(x)) alpha.put("Titlecase", x);
-		String nfc = x = Normalizer.normalize(text,Normalizer.NFC);
+		String nfc = x = com.ibm.icu.text.Normalizer.normalize(text,com.ibm.icu.text.Normalizer.NFC);
 		if (!text.equals(x)) alpha.put("NFC", x);
-		String nfd = x = Normalizer.normalize(text,Normalizer.NFD);
+		String nfd = x = com.ibm.icu.text.Normalizer.normalize(text,com.ibm.icu.text.Normalizer.NFD);
 		if (!text.equals(x)) alpha.put("NFD", x);
-		x = Normalizer.normalize(text,Normalizer.NFKD);
+		x = com.ibm.icu.text.Normalizer.normalize(text,com.ibm.icu.text.Normalizer.NFKD);
 		if (!text.equals(x)) alpha.put("NFKD", x);
-		x = Normalizer.normalize(text,Normalizer.NFKC);
+		x = com.ibm.icu.text.Normalizer.normalize(text,com.ibm.icu.text.Normalizer.NFKC);
 		if (!text.equals(x)) alpha.put("NFKC", x);
 		
 		CanonicalIterator ci = new CanonicalIterator(text);
--- a/tools/unicodetools/com/ibm/text/UCD/SpecialCasingFooter.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/SpecialCasingFooter.txt
@ -70,3 +70,6 @@
 # Note: the following case is already in the UnicodeData file.

 # 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I
+
+# EOF
+
--- a/tools/unicodetools/com/ibm/text/UCD/SpecialCasingHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/SpecialCasingHeader.txt
@ -31,9 +31,10 @@
 # A locale ID is defined by taking any language tag as defined by
 # RFC 3066 (or its successor), and replacing '-' by '_'.
 #
-# A context for a character C is defined by Section 3.13 Default Case Operations,
-# on p. 89-90 of The Unicode Standard, Version 4.0, as amended by Unicode 4.1.0,
-# as specified in http://www.unicode.org/versions/Unicode4.1.0/
+# A context for a character C is defined by Section 3.13 Default Case 
+# Operations, of The Unicode Standard, Version 5.0.
+# (This is identical to the context defined by Unicode 4.1.0,
+#  as specified in http://www.unicode.org/versions/Unicode4.1.0/)
 #
 # Parsers of this file must be prepared to deal with future additions to this format:
 #  * Additional contexts
--- a/tools/unicodetools/com/ibm/text/UCD/StandardizedVariants-Template.html
+++ b/tools/unicodetools/com/ibm/text/UCD/StandardizedVariants-Template.html
@ -1,13 +1,10 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
-
-       "http://www.w3.org/TR/REC-html40/loose.dtd"> 
-
+<!doctype HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
 <html>

 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 <meta http-equiv="Content-Language" content="en-us">
-<meta name="GENERATOR" content="Microsoft FrontPage 4.0">
+<meta name="GENERATOR" content="Microsoft FrontPage 5.0">
 <meta name="ProgId" content="FrontPage.Editor.Document">
 <meta name="keywords" content="unicode, variant glyphs">
 <meta name="description" content="Describes and displays standardized variant glyphs">
@ -19,8 +16,9 @@

 <table class="header">
  <tr>
-    <td class="icon"><a href="http://www.unicode.org"><img align="middle" alt="[Unicode]" border="0" src="http://www.unicode.org/webscripts/logo60s2.gif" width="34" height="33"></a>&nbsp;&nbsp;<a class="bar" href="http://www.unicode.org/ucd">Unicode 
-      Character Database</a></td>
+    <td class="icon"><a href="http://www.unicode.org">
+    <img align="middle" alt="[Unicode]" border="0" src="http://www.unicode.org/webscripts/logo60s2.gif" width="34" height="33"></a>&nbsp;&nbsp;<a class="bar" href="http://www.unicode.org/ucd">Unicode 
+    Character Database</a></td>
  </tr>
  <tr>
    <td class="gray">&nbsp;</td>
@ -29,105 +27,78 @@
 <blockquote>
  <h1>Standardized Variants</h1>
  <table class="wide">
-    <tbody>
-      <tr>
-        <td valign="top" width="144">Revision</td>
-        <td valign="top">@revision@</td>
-      </tr>
-      <tr>
-        <td valign="top" width="144">Authors</td>
-        <td valign="top">Members of the Editorial Committee</td>
-      </tr>
-      <tr>
-        <td valign="top" width="144">Date</td>
-        <td valign="top">@date@</td>
-      </tr>
-      <tr>
-        <td valign="top" width="144">This Version</td>
-        <td valign="top"><a href="http://www.unicode.org/Public/@updateDirectory@/StandardizedVariants-@revision@.html">http://www.unicode.org/Public/@updateDirectory@/StandardizedVariants-@revision@.html</a></td>
-      </tr>
-      <tr>
-        <td valign="top" width="144">Previous Version</td>
-        <td valign="top"><a href="http://www.unicode.org/Public/3.2-Update/StandardizedVariants-3.2.0.html">http://www.unicode.org/Public/3.2-Update/StandardizedVariants-3.2.0.html</a></td>
-      </tr>
-      <tr>
-        <td valign="top" width="144">Latest Version</td>
-        <td valign="top"><a href="http://www.unicode.org/Public/UNIDATA/StandardizedVariants.html">http://www.unicode.org/Public/UNIDATA/StandardizedVariants.html</a></td>
-      </tr>
-    </tbody>
+    <tr>
+      <td valign="top" width="144">Revision</td>
+      <td valign="top">@revision@</td>
+    </tr>
+    <tr>
+      <td valign="top" width="144">Authors</td>
+      <td valign="top">Members of the Editorial Committee</td>
+    </tr>
+    <tr>
+      <td valign="top" width="144">Date</td>
+      <td valign="top">@date@</td>
+    </tr>
+    <tr>
+      <td valign="top" width="144">This Version</td>
+      <td valign="top"><a href="http://www.unicode.org/Public/@updateDirectory@/@filename@.html">
+      http://www.unicode.org/Public/@updateDirectory@/@filename@.html</a></td>
+    </tr>
+    <tr>
+      <td valign="top" width="144">Previous Version</td>
+      <td valign="top"><a href="http://www.unicode.org/Public/4.1.0/ucd/StandardizedVariants.html">
+      http://www.unicode.org/Public/4.1.0/ucd/StandardizedVariants.html</a></td>
+    </tr>
+    <tr>
+      <td valign="top" width="144">Latest Version</td>
+      <td valign="top"><a href="http://www.unicode.org/Public/UNIDATA/StandardizedVariants.html">
+      http://www.unicode.org/Public/UNIDATA/StandardizedVariants.html</a></td>
+    </tr>
  </table>
  <h3><br>
  <i>Summary</i></h3>
  <blockquote>
-    <p>This file provides a visual display of the standard variant sequences 
-    derived from StandardizedVariants.txt.</p>
+    <p>This file provides a visual display of the standard variant sequences derived from 
+    StandardizedVariants.txt.</p>
  </blockquote>
  <h3><i>Status</i></h3>
  <blockquote>
-    <p><i>The file and the files described herein are part of the <a href="http://www.unicode.org/ucd">Unicode 
-    Character Database</a> (UCD) and are governed by the <a href="#Terms of Use">UCD 
-    Terms of Use</a> stated at the end.</i></p>
+    <p><i>This file and the files described herein are part of the Unicode Character Database and 
+    are governed by the terms of use at <a href="http://www.unicode.org/terms_of_use.html">
+    http://www.unicode.org/terms_of_use.html</a>.</i></p>
  </blockquote>
  <hr width="50%">
  <h2>Introduction</h2>
-  <p>The tables here <i>exhaustively</i> lists the valid, registered 
-  combinations of base character plus variation indicator. All combinations not 
-  listed in StandardizedVariants.txt are unspecified and are reserved for future 
-  standardization; no conformant process may interpret them as standardized 
-  variants. Variation selectors and their use are described in The Unicode 
-  Standard.</p>
-  <p>These mathematical variants are all produced with the addition of Variation 
-  Selector 1 (VS1 or U+FE00) to mathematical operator base characters. There is 
-  no variation according to context. The Mongolian variants use the Mongolian 
-  Variant Selectors, and may vary according to context. That is, if a contextual 
-  shape is not listed below, then the variation sequence has an unmodified 
+  <p>The tables here <i>exhaustively</i> lists the valid, registered combinations of base character 
+  plus variation indicator. All combinations not listed in StandardizedVariants.txt are unspecified 
+  and are reserved for future standardization; no conformant process may interpret them as 
+  standardized variants. Variation selectors and their use are described in The Unicode Standard.</p>
+  <p>These mathematical variants are all produced with the addition of Variation Selector 1 (VS1 or 
+  U+FE00) to mathematical operator base characters. There is no variation according to context. The 
+  Mongolian variants use the Mongolian Variant Selectors, and may vary according to context. That 
+  is, if a contextual shape is not listed below, then the variation sequence has an unmodified 
  appearance. At this time no Han variants exist.</p>
  <blockquote>
-    <p><a name="fonts"><b>Note: </b></a>The glyphs used to show the variations 
-    are often derived from different physical fonts than the representative 
-    glyphs in the standard. They may therefore exhibit minor differences in 
-    size, proportion, or weight <i>unrelated</i> to the intentional difference 
-    in feature that is the defining element of the variation. Such minor 
-    differences should be ignored. Likewise, in some cases the existing 
-    representative fonts may not yet contain newly encoded characters and hence 
-    some representative glyphs shown in these tables may have a slightly 
-    different style than others.</p>
+    <p><a name="fonts"><b>Note: </b></a>The glyphs used to show the variations are often derived 
+    from different physical fonts than the representative glyphs in the standard. They may therefore 
+    exhibit minor differences in size, proportion, or weight <i>unrelated</i> to the intentional 
+    difference in feature that is the defining element of the variation. Such minor differences 
+    should be ignored. Likewise, in some cases the existing representative fonts may not yet contain 
+    newly encoded characters and hence some representative glyphs shown in these tables may have a 
+    slightly different style than others.</p>
  </blockquote>
  <p>@table@</p>
  <hr width="50%">
-  <h2>UCD <a name="Terms of Use">Terms of Use</a></h2>
-  <h3><i>Disclaimer</i></h3>
-  <blockquote>
-    <p><i>The Unicode Character Database is provided as is by Unicode, Inc. No 
-    claims are made as to fitness for any particular purpose. No warranties of 
-    any kind are expressed or implied. The recipient agrees to determine 
-    applicability of information provided. If this file has been purchased on 
-    magnetic or optical media from Unicode, Inc., the sole remedy for any claim 
-    will be exchange of defective media within 90 days of receipt.</i></p>
-    <p><i>This disclaimer is applicable for all other data files accompanying 
-    the Unicode Character Database, some of which have been compiled by the 
-    Unicode Consortium, and some of which have been supplied by other sources.</i></p>
-  </blockquote>
-  <h3><i>Limitations on Rights to Redistribute This Data</i></h3>
-  <blockquote>
-    <p><i>Recipient is granted the right to make copies in any form for internal 
-    distribution and to freely use the information supplied in the creation of 
-    products supporting the Unicode<sup>TM</sup> Standard. The files in the 
-    Unicode Character Database can be redistributed to third parties or other 
-    organizations (whether for profit or not) as long as this notice and the 
-    disclaimer notice are retained. Information can be extracted from these 
-    files and used in documentation or programs, as long as there is an 
-    accompanying notice indicating the source.</i></p>
-  </blockquote>
-  <hr width="50%">
  <div align="center">
    <center>
    <table cellspacing="0" cellpadding="0" border="0">
      <tr>
-        <td><a href="http://www.unicode.org/unicode/copyright.html"><img src="http://www.unicode.org/img/hb_notice.gif" border="0" alt="Access to Copyright and terms of use" width="216" height="50"></a></td>
+        <td><a href="http://www.unicode.org/unicode/copyright.html">
+        <img src="http://www.unicode.org/img/hb_notice.gif" border="0" alt="Access to Copyright and terms of use" width="216" height="50"></a></td>
      </tr>
    </table>
-    <script language="Javascript" type="text/javascript" src="http://www.unicode.org/webscripts/lastModified.js"></script>
+    <script language="Javascript" type="text/javascript" src="http://www.unicode.org/webscripts/lastModified.js">
+    </script>
    </center>
  </div>
 </blockquote>
--- a/tools/unicodetools/com/ibm/text/UCD/TestData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/TestData.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $
-* $Date: 2005/11/19 05:39:39 $
-* $Revision: 1.24 $
+* $Date: 2006/04/05 22:12:43 $
+* $Revision: 1.25 $
 *
 *******************************************************************************
 */
@ -27,6 +27,7 @@ import com.ibm.icu.impl.CollectionUtilities;
 import com.ibm.icu.impl.ICUData;
 import com.ibm.icu.impl.ICUResourceBundle;
 import com.ibm.icu.impl.UCharArrayIterator;
+import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.text.NumberFormat;
 import com.ibm.icu.text.StringPrep;
 import com.ibm.icu.text.StringPrepParseException;
@ -45,8 +46,17 @@ public class TestData implements UCD_Types {
    static UnicodeProperty.Factory upf;
    
 	public static void main (String[] args) throws IOException {
-		//checkChars(false);
-        
+		tryConsole2();
+		if (true) return;
+		
+		showNonCompatFull(false);
+		showNonCompatFull(true);
+		
+
+		checkForCaseStability(false);
+		//countChars();
+		foo();
+       
        System.out.println("main: " + Default.getDate());
        upf = ICUPropertyFactory.make();
        System.out.println("after factory: " + Default.getDate());
@ -146,8 +156,152 @@ public class TestData implements UCD_Types {
 			}
 		} finally {
 			log.close();
+				}
+	}
+	
+	private static void showNonCompatFull(boolean compat) {
+		UCD ucd = UCD.make("4.1.0");
+		Normalizer nfkc = new Normalizer(Normalizer.NFKC, ucd.getVersion());
+		System.out.println();
+		System.out.println(compat ? "Full Fold = Simple Lower of NFKC" : "Full Fold != Simple Lower of NFKC");
+		System.out.println();
+		int count = 0;
+		for (int i = 0; i <= 0x10FFFF; ++i) {
+			int gc = ucd.getCategory(i);
+			if (gc == Cn || gc == PRIVATE_USE) continue;
+			//if (compat == (ucd.getDecompositionType(i) > UCD.CANONICAL)) continue;
+			String str = UTF16.valueOf(i);
+			String simpleLower = ucd.getCase(str, SIMPLE, LOWER);
+			String fullFold = ucd.getCase(str, FULL, FOLD);
+			
+			if (!simpleLower.equals(fullFold)) {
+				String nfkcStr = nfkc.normalize(str);
+				String simpleLowerNfkc = ucd.getCase(nfkcStr, SIMPLE, LOWER);
+				if (compat != (fullFold.equals(simpleLowerNfkc))) continue;
+				System.out.println(ucd.getCodeAndName(i));
+				System.out.println("\tSimple Lower:\t" + ucd.getCodeAndName(simpleLower));
+				System.out.println("\tFull Fold:\t" + ucd.getCodeAndName(fullFold));
+				count++;
+			}
+		}
+		System.out.println("Count:\t" + count);
+	}
+
+	private static void tryConsole() throws UnsupportedEncodingException {
+		for (int i = 1; i < 0xFFFF; ++i) {
+			String s = UTF32.valueOf32(i);
+			byte[] bytes = s.getBytes("UTF-8");
+			String utf8bytes = "";
+			for (int j = 0; j < bytes.length; ++j) {
+				if (j != 0) utf8bytes += " ";
+				utf8bytes += Utility.hex(bytes[j]&0xFF,2);
+			}
+			String name = UCharacter.getExtendedName(i);
+			System.out.println(Utility.hex(i) + "\t(" + s + ")\t[" + utf8bytes + "]\t" + name);
 		}
 	}
+	
+	private static void tryConsole2() throws UnsupportedEncodingException {
+		UnicodeSet failures = new UnicodeSet();
+		check:
+		for (int i = 1; i <= 0x10FFFF; ++i) {
+			String s = UTF32.valueOf32(i);
+			byte[] bytes = s.getBytes("UTF-8");
+			for (int j = 0; j < bytes.length; ++j) {
+				switch (bytes[j]&0xFF) {
+				case 0x81: case 0x8D: case 0x8F: case 0x90: case 0x9D:
+					failures.add(i);
+					continue check;
+				}
+			}
+		}
+		System.out.println("Total corrupted characters: " + failures.size());
+		System.out.println("Percent corrupted characters: " + ((failures.size() + 0.0) / 0x110000 * 100.0 + "%"));
+		//BagFormatter bf = new BagFormatter();
+		//System.out.println(bf.showSetNames(failures));
+	}
+
+
+	private static void countChars() {
+		int[][] count = new int[AGE_VERSIONS.length][50];
+		for (int j = 1; j < AGE_VERSIONS.length; ++j) {
+			UCD ucd = UCD.make(AGE_VERSIONS[j]);
+			UCDProperty alpha = DerivedProperty.make(ucd.PropAlphabetic, ucd);
+
+			int alphaCount = 0;
+			for (int i = 0; i <=0x10FFFF; ++i) {
+				int type = ucd.getCategory(i);
+				if (ucd.isNoncharacter(i)) type = LIMIT_CATEGORY;
+				++count[j][type];
+				if (alpha.hasValue(i) || type == ucd.Nd) ++count[j][LIMIT_CATEGORY+1];
+			}
+		}
+
+		for (byte i = -1; i < LIMIT_CATEGORY+2; ++i) {
+			switch(i) {
+			case -1: System.out.print("\t\t"); break;
+			default: System.out.print(UCD.getCategoryID_fromIndex(i,UCD.LONG) + "\t" + UCD.getCategoryID_fromIndex(i)); break;
+			case LIMIT_CATEGORY: System.out.print("Noncharacter" + "\t" + "NCCP"); break;
+			case LIMIT_CATEGORY+1: System.out.print("Alphabetic" + "\t" + "alpha"); break;
+			}
+			for (int j = 1; j < AGE_VERSIONS.length; ++j) {
+				if (i < 0) System.out.print("\t*" + AGE_VERSIONS[j] + "*");
+				else System.out.print("\t" + count[j][i]);
+			}
+			System.out.println();
+		}
+
+	}
+
+	private static void foo() {
+		String[] test = {
+				"vicepresident",
+				"vice president",
+				"vice-president",
+				"vice-président",
+				"vice-president's offices",
+				"vice-presidents' offices",
+				"vice-presidents offices",
+				"vice-presidentsoffices",
+		};
+		RuleBasedCollator col = (RuleBasedCollator) Collator.getInstance(new ULocale("fr"));
+		col.setStrength(col.QUATERNARY);
+		col.setAlternateHandlingShifted(false);
+
+		Arrays.sort(test, col);
+		List s = Arrays.asList(test);
+		String last = "";
+		int[] level = new int[1];
+		for (Iterator it = s.iterator(); it.hasNext();) {
+			String current = (String) it.next();
+			int order = levelCompare(col, last, current, level);
+			//System.out.print(levelStrings[level[0]]);
+			//System.out.print(order < 0 ? "<" : order == 0 ? "=" : ">");
+			System.out.println("\t" + current);
+			last = current;
+		}
+		for (int i = 0; i < test.length; ++i) {
+			System.out.print(test[i] + ";");
+		}
+		System.out.println();
+	}
+	
+	static String[] levelStrings = {".", "..", "...", "....", "....."};
+	
+	static int levelCompare(RuleBasedCollator col, String a, String b, int[] level) {
+		int diff = 0;
+		level[0] = 0;
+		for (int i = 0; i < 15; ++i) {
+			col.setStrength(i);
+			diff = col.compare(a, b);
+			if (diff != 0) {
+				level[0] = i;
+				break;
+			}
+		}
+		return diff;
+	}
+
 	Matcher m;
 	
 	/**
@ -163,12 +317,12 @@ public class TestData implements UCD_Types {
 		return true;
 	}

-	private static void checkChars(boolean mergeRanges) {
+	private static void checkForCaseStability(boolean mergeRanges) {
 		UCD ucd = Default.ucd();
 		ToolUnicodePropertySource ups = ToolUnicodePropertySource.make("");
-		UnicodeSet isUpper = ups.getSet("Uppercase=true");
-		UnicodeSet isLower = ups.getSet("Lowercase=true");
-		UnicodeSet isTitle = ups.getSet("gc=Lt");
+		UnicodeSet propUppercase = ups.getSet("Uppercase=true");
+		UnicodeSet propLowercase = ups.getSet("Lowercase=true");
+		UnicodeSet isGcLt = ups.getSet("gc=Lt");
 		UnicodeSet otherAlphabetic = ups.getSet("Alphabetic=true").addAll(ups.getSet("gc=Sk"));
 		// create the following
 		UnicodeSet hasFold = new UnicodeSet();
@ -177,6 +331,10 @@ public class TestData implements UCD_Types {
 		UnicodeSet hasTitle = new UnicodeSet();
 		UnicodeSet compat = new UnicodeSet();
 		UnicodeSet bicameralsScripts = new UnicodeSet();
+		
+		UnicodeSet isFUppercase = new UnicodeSet();
+		UnicodeSet isFLowercase = new UnicodeSet();
+		UnicodeSet isFTitlecase = new UnicodeSet();

 		UCD u40 = UCD.make("4.0.0");
 		BitSet scripts = new BitSet();
@ -184,41 +342,83 @@ public class TestData implements UCD_Types {
 			int gc = ucd.getCategory(i);
 			if (gc == Cn || gc == PRIVATE_USE) continue;
 			String str = UTF16.valueOf(i);
-			if (!str.equals(ucd.getCase(str, FULL, FOLD))) hasFold.add(i);
-			if (!str.equals(ucd.getCase(str, FULL, UPPER))) hasUpper.add(i);
+			if (!str.equals(ucd.getCase(str, FULL, FOLD))) {
+				hasFold.add(i);
+				scripts.set(ucd.getScript(i));
+			}
+			if (!str.equals(ucd.getCase(str, FULL, UPPER))) {
+				hasUpper.add(i);
+				scripts.set(ucd.getScript(i));
+			} else {
+				isFUppercase.add(i);
+			}
 			if (!str.equals(ucd.getCase(str, FULL, LOWER))) {
 				hasLower.add(i);
 				scripts.set(ucd.getScript(i));
+			} else {
+				isFLowercase.add(i);
+			}
+			if (!str.equals(ucd.getCase(str, FULL, TITLE))) {
+				hasTitle.add(i);
+				scripts.set(ucd.getScript(i));
+			} else {
+				isFTitlecase.add(i);
 			}
-			if (!str.equals(ucd.getCase(str, FULL, TITLE))) hasTitle.add(i);
 			if (!str.equals(Default.nfkd().normalize(str))) compat.add(i);
 			//System.out.println(ucd.getCodeAndName(i) + "\t" + (u40.isAllocated(i) ? "already in 4.0" : "new in 4.1"));
 		}
 		BagFormatter bf = new BagFormatter();
+		Transliterator nullTrans = Transliterator.getInstance("null");
+		bf.setShowLiteral(nullTrans);
 		bf.setMergeRanges(mergeRanges);
 		bf.setUnicodePropertyFactory(ups);
-		printItems(bf, compat, "isUpper or isTitle without hasLower", 
-				new UnicodeSet(isUpper).addAll(isTitle).removeAll(hasLower));
-		printItems(bf, compat, "hasLower, but not isUpper or isTitle", 
-				new UnicodeSet(hasLower).removeAll(isTitle).removeAll(isUpper));
-		printItems(bf, compat, "isLower without hasUpper", 
-				new UnicodeSet(isLower).addAll(isTitle).removeAll(hasUpper));
-		printItems(bf, compat, "hasUpper, but not isLower or isTitle", 
-				new UnicodeSet(hasUpper).removeAll(isTitle).removeAll(isLower));
+		
+		UnicodeSet allCased = new UnicodeSet().addAll(hasUpper).addAll(hasLower).addAll(hasTitle);
+		isFUppercase.retainAll(allCased);
+		isFLowercase.retainAll(allCased);
+		isFTitlecase.retainAll(allCased);
+		System.out.println(Utility.BOM);
+
+		printItems(bf, compat, "Uppercase=true or gc=Lt without hasLower", 
+				new UnicodeSet(propUppercase).addAll(isGcLt).removeAll(hasLower));
+		printItems(bf, compat, "hasLower, but not (Uppercase=true or gc=Lt)", 
+				new UnicodeSet(hasLower).removeAll(isGcLt).removeAll(propUppercase));
+		printItems(bf, compat, "Lowercase=true without hasUpper", 
+				new UnicodeSet(propLowercase).addAll(isGcLt).removeAll(hasUpper));
+		printItems(bf, compat, "hasUpper, but not (Lowercase=true or gc=Lt)", 
+				new UnicodeSet(hasUpper).removeAll(isGcLt).removeAll(propLowercase));
+
+		
+		printItems(bf, compat, "Functionally Uppercase, but not Uppercase=true", 
+				new UnicodeSet(isFUppercase).removeAll(propUppercase));
+		printItems(bf, compat, "Uppercase=true, but not functionally Uppercase", 
+				new UnicodeSet(propUppercase).removeAll(isFUppercase));
+		
+		printItems(bf, compat, "Functionally Lowercase, but not Lowercase=true", 
+				new UnicodeSet(isFLowercase).removeAll(propLowercase));
+		printItems(bf, compat, "Lowercase=true, but not functionally Lowercase", 
+				new UnicodeSet(propLowercase).removeAll(isFLowercase));
+

 		UnicodeSet scriptSet = new UnicodeSet();
 		UnicodeProperty scriptProp = ups.getProperty("Script");
+		bf.setMergeRanges(true);
+		System.out.println();
+		System.out.println("Bicameral Scripts: those with at least one functionally cased character.");
+		System.out.println();
 		for (int i = 0; i < scripts.size(); ++i) {
 			if (!scripts.get(i)) continue;
-			if (i == COMMON_SCRIPT) continue;
+			//if (i == COMMON_SCRIPT) continue;
 			String scriptName = ucd.getScriptID_fromIndex((byte)i);
-			System.out.println(scriptName);
-			scriptSet.addAll(scriptProp.getSet(scriptName));
+			UnicodeSet scriptUSet = scriptProp.getSet(scriptName);
+			scriptSet.addAll(scriptUSet);
+			printItems(bf, compat, "Bicameral Script: " + scriptName,
+					new UnicodeSet(allCased).retainAll(scriptUSet));
 		}
-		UnicodeSet allCased = new UnicodeSet().addAll(isUpper).addAll(isLower).addAll(isTitle);
-		printItems(bf, compat, "(Bicameral) isAlpha or Symbol Modifier, but not isCased", 
+		bf.setMergeRanges(false);
+		printItems(bf, compat, "Bicameral Script: isAlpha or Symbol Modifier, but not isCased", 
 				new UnicodeSet(scriptSet).retainAll(otherAlphabetic).removeAll(allCased));
-		printItems(bf, compat, "(Bicameral) isCased, but not isAlpha or Symbol Modifier", 
+		printItems(bf, compat, "Bicameral Script: isCased, but not isAlpha or Symbol Modifier", 
 				new UnicodeSet(scriptSet).retainAll(allCased).removeAll(otherAlphabetic));
 	}

@ -302,21 +502,21 @@ public class TestData implements UCD_Types {
    	}
 	}

-	public static class RegexMatcher implements UnicodeProperty.Matcher {
+	public static class RegexMatcher implements UnicodeProperty.PatternMatcher {
        private Matcher matcher;
        
-        public UnicodeProperty.Matcher set(String pattern) {
+        public UnicodeProperty.PatternMatcher set(String pattern) {
            matcher = Pattern.compile(pattern).matcher("");
            return this;
        }
-        public boolean matches(String value) {
-            matcher.reset(value);
+        public boolean matches(Object value) {
+            matcher.reset((String)value);
            return matcher.matches();
        }       
    }

    static BagFormatter bf = new BagFormatter();
-    static UnicodeProperty.Matcher matcher = new RegexMatcher();
+    static UnicodeProperty.PatternMatcher matcher = new RegexMatcher();

    private static void showPropDiff(String p1, UnicodeSet s1, String p2, UnicodeSet s2) {
        System.out.println("Property Listing");
--- a/tools/unicodetools/com/ibm/text/UCD/TestIdentifiers.java
+++ b/tools/unicodetools/com/ibm/text/UCD/TestIdentifiers.java
@ -26,8 +26,16 @@ public class TestIdentifiers {
 	public static void main(String[] args) throws IOException {
 		String[] tests = { "SØS", "façade", "MOPE", "VOP", "scope", "ibm", "vop",
 				"toys-я-us", "1iνе", "back", "boгing" };
+		
 		TestIdentifiers ti = new TestIdentifiers("L");
 		TestIdentifiers tiany = new TestIdentifiers("A");
+		ti.loadIdentifiers();
+		UnicodeSet idnCharSet = ti.idnChars.getSet("output", new UnicodeSet());
+		System.out.println("idnCharSet: " + idnCharSet.size());
+		UnicodeSet idnCharNonStarting = ti.nonstarting;
+		System.out.println("idnCharNonStarting: " + idnCharSet);
+		if (true) return;
+		
 		for (int i = 0; i < tests.length; ++i) {
 			System.out.print(tests[i]);
 			String folded = UCharacter.foldCase(tests[i], true);
--- a/tools/unicodetools/com/ibm/text/UCD/TestUnicodeInvariants.java
+++ b/tools/unicodetools/com/ibm/text/UCD/TestUnicodeInvariants.java
@ -10,6 +10,7 @@ import java.util.List;
 import java.util.Locale;

 import com.ibm.icu.dev.test.util.BagFormatter;
+import com.ibm.icu.dev.tool.UOption;
 import com.ibm.icu.text.SymbolTable;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeMatcher;
@ -17,9 +18,26 @@ import com.ibm.icu.text.UnicodeSet;
 import com.ibm.text.utility.Utility;

 public class TestUnicodeInvariants {
+    private static final int
+    HELP1 = 0,
+    FILE = 1,
+    RANGE = 2
+    ;

+    private static final UOption[] options = {
+        UOption.HELP_H(),
+        UOption.create("file", 'f', UOption.REQUIRES_ARG),
+        UOption.create("range", 'r', UOption.NO_ARG),
+    };
+    
    public static void main(String[] args) throws IOException {
-        testInvariants();
+        UOption.parseArgs(args, options);
+
+    	String file = "UnicodeInvariants.txt";
+    	if (options[FILE].doesOccur) file = options[FILE].value;
+    	boolean doRange = options[RANGE].doesOccur;
+    	
+        testInvariants(file, doRange);
    }

    /**
@ -68,19 +86,19 @@ public class TestUnicodeInvariants {
   
   static final UnicodeSet INVARIANT_RELATIONS = new UnicodeSet("[\\~ \\= \\! \\? \\< \\> \u2264 \u2265 \u2282 \u2286 \u2283 \u2287]");
   
-   public static void testInvariants() throws IOException {
+   public static void testInvariants(String outputFile, boolean doRange) throws IOException {
       String[][] variables = new String[100][2];
       int variableCount = 0;
       PrintWriter out = BagFormatter.openUTF8Writer(UCD_Types.GEN_DIR, "UnicodeInvariantResults.txt");
       out.write('\uFEFF'); // BOM
-       BufferedReader in = BagFormatter.openUTF8Reader("com/ibm/text/UCD/", "UnicodeInvariants.txt");
+       BufferedReader in = BagFormatter.openUTF8Reader("com/ibm/text/UCD/", outputFile);
       BagFormatter bf = new BagFormatter();
       bf.setUnicodePropertyFactory(ToolUnicodePropertySource.make(""));
       BagFormatter bf2 = new BagFormatter();
       bf2.setUnicodePropertyFactory(ToolUnicodePropertySource.make(""));
-       bf2.setMergeRanges(false);
+       bf2.setMergeRanges(doRange);
       ChainedSymbolTable st = new ChainedSymbolTable(new SymbolTable[] {
-           ToolUnicodePropertySource.make("4.0.0").getSymbolTable("\u00D7"),
+           ToolUnicodePropertySource.make(UCD.lastVersion).getSymbolTable("\u00D7"),
           ToolUnicodePropertySource.make(Default.ucdVersion()).getSymbolTable("")});
       ParsePosition pp = new ParsePosition(0);
       int parseErrorCount = 0;
@ -113,11 +131,20 @@ public class TestUnicodeInvariants {
           // detect variables
           if (line.startsWith("Show")) {
           		String part = line.substring(4).trim();
+           		if (part.startsWith("Each")) {
+           			part = part.substring(4).trim();
+           			bf2.setMergeRanges(false);
+           		}
           		pp.setIndex(0);
           		UnicodeSet leftSet = new UnicodeSet(part, pp, st);
           		bf2.showSetNames(out, leftSet);
+           		bf2.setMergeRanges(doRange);
 				continue;
           }
+           
+           if (line.startsWith("Test")) {
+        	   line = line.substring(4).trim();
+           }

          char relation = 0;
           String rightSide = null;
@ -166,7 +193,7 @@ public class TestUnicodeInvariants {
           
           boolean ok = true;
           switch(relation) {
-               case '=': ok = leftSet.equals(rightSet); break;
+               case '=': case '\u2261': ok = leftSet.equals(rightSet); break;
               case '<': case '\u2282': ok = rightSet.containsAll(leftSet) && !leftSet.equals(rightSet); break;
               case '>': case '\u2283': ok = leftSet.containsAll(rightSet) && !leftSet.equals(rightSet); break;
               case '\u2264': case '\u2286': ok = rightSet.containsAll(leftSet); break;
--- a/tools/unicodetools/com/ibm/text/UCD/ToolUnicodePropertySource.java
+++ b/tools/unicodetools/com/ibm/text/UCD/ToolUnicodePropertySource.java
@ -254,18 +254,19 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
 						, "Katakana");
        		Object foo = unicodeMap.getSet("Katakana");
        		UnicodeSet graphemeExtend = getProperty("Grapheme_Extend").getSet("true");
+        		UnicodeProperty lineBreak = getProperty("Line_Break");
        		unicodeMap.putAll(getProperty("Alphabetic").getSet("true")
        				.add(0xA0).add(0x05F3)
 						.removeAll(getProperty("Ideographic").getSet("true"))
 						.removeAll(unicodeMap.getSet("Katakana"))
-						.removeAll(script.getSet("Thai"))
-						.removeAll(script.getSet("Lao"))
+						//.removeAll(script.getSet("Thai"))
+						//.removeAll(script.getSet("Lao"))
+						.removeAll(lineBreak.getSet("SA"))
 						.removeAll(script.getSet("Hiragana"))
 						.removeAll(graphemeExtend),
 						"ALetter");
        		unicodeMap.putAll(new UnicodeSet("[\\u0027\\u00B7\\u05F4\\u2019\\u2027\\u003A]")
 								,"MidLetter");
-        		UnicodeProperty lineBreak = getProperty("Line_Break");
        		unicodeMap.putAll(lineBreak.getSet("Infix_Numeric")
        				.remove(0x003A), "MidNum");
        		unicodeMap.putAll(lineBreak.getSet("Numeric"), "Numeric");
--- a/tools/unicodetools/com/ibm/text/UCD/UCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
-* $Date: 2005/11/01 00:10:54 $
-* $Revision: 1.40 $
+* $Date: 2006/04/05 22:12:44 $
+* $Revision: 1.41 $
 *
 *******************************************************************************
 */
@ -43,7 +43,8 @@ public final class UCD implements UCD_Types {
    /**
     * Used for the default version.
     */
-    public static final String latestVersion = "5.1.0";
+    public static final String latestVersion = "5.0.0";
+    public static final String lastVersion = "4.1.0";

    /**
     * Create singleton instance for default (latest) version
@ -803,6 +804,9 @@ public final class UCD implements UCD_Types {
    }

    public byte getScript(int codePoint) {
+    	if (codePoint == 0xE000) {
+    		codePoint += 0;
+    	}
        return get(codePoint, false).script;
    }
    
@ -1398,6 +1402,7 @@ to guarantee identifier closure.
        }
        if (isHangul) {
            if (fixStrings) result.decompositionMapping = getHangulDecompositionPair(codePoint);
+            if (isLV(codePoint)) result.lineBreak = LB_H2; else result.lineBreak = LB_H3;
            result.decompositionType = CANONICAL;
        }
        return result;
@ -1612,6 +1617,9 @@ to guarantee identifier closure.
                }

                combiningClassSet.set(uData.combiningClass & 0xFF);
+                if (cp == 0xE000) {
+                	System.out.println("Check: " + uData.script);
+                }
                add(uData);
            }
            /*
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
-* $Date: 2005/03/10 02:37:20 $
-* $Revision: 1.31 $
+* $Date: 2006/04/05 22:12:44 $
+* $Revision: 1.32 $
 *
 *******************************************************************************
 */
@ -345,7 +345,12 @@ final class UCD_Names implements UCD_Types {
    "SYLOTI_NAGRI",
    "OLD_PERSIAN",
    "KHAROSHTHI",
-    
+    "Balinese",
+    "Cuneiform",
+    "Phoenician",
+    "Phags-pa",
+    "Nko",
+    "Unknown"
  };

 	public static final Map EXTRA_SCRIPT = new HashMap();
@ -426,11 +431,14 @@ final class UCD_Names implements UCD_Types {
    "Sylo",
    "Xpeo",
    "Khar",
-
+    "Bali",
+    "Xsux",
+    "Phnx",
+    "Phag",
+    "Nkoo",
+    "Zzzz"
  };

-
-
  static final String[] AGE = {
    "unassigned",
    "1.1",
@ -441,9 +449,9 @@ final class UCD_Names implements UCD_Types {
    "3.2",
    "4.0",
 	"4.1",
+	"5.0",
  };

-
    static final String[] GENERAL_CATEGORY = {
        "Cn", // = Other, Not Assigned 0

--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
-* $Date: 2005/11/01 00:10:54 $
-* $Revision: 1.32 $
+* $Date: 2006/04/05 22:12:44 $
+* $Revision: 1.33 $
 *
 *******************************************************************************
 */
@ -398,8 +398,14 @@ public interface UCD_Types {
 	    SYLOTI_NAGRI = 60,
 	    OLD_PERSIAN = 61,
 	    KHAROSHTHI = 62,
+	    Balinese = 63,
+	    Cuneiform = 64,
+	    Phoenician = 65,
+	    Phags_Pa = 66,
+	    NKo = 67,
+	    Unknown_Script = 68,

-        LIMIT_SCRIPT = 63;
+        LIMIT_SCRIPT = 69;

  static final int
    UNKNOWN = 0,
@ -411,7 +417,8 @@ public interface UCD_Types {
    AGE32 = 6,
    AGE40 = 7,
    AGE41 = 8,
-    LIMIT_AGE = 9;
+    AGE50 = 9,
+    LIMIT_AGE = 10;

    static final String[] AGE_VERSIONS = {
        "?",
@ -422,7 +429,8 @@ public interface UCD_Types {
        "3.1.0",
        "3.2.0",
        "4.0.0",  
-        "4.1.0"       
+        "4.1.0",      
+        "5.0.0"       
    };

 public static byte
--- a/tools/unicodetools/com/ibm/text/UCD/UData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UData.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UData.java,v $
-* $Date: 2004/02/12 08:23:16 $
-* $Revision: 1.11 $
+* $Date: 2006/04/05 22:12:44 $
+* $Revision: 1.12 $
 *
 *******************************************************************************
 */
@ -45,7 +45,7 @@ class UData implements UCD_Types {
    byte lineBreak = LB_XX;
    byte joiningType = -1;
    byte joiningGroup = NO_SHAPING;
-    byte script = COMMON_SCRIPT;
+    byte script = Unknown_Script;
    byte age = 0;

    static final UData UNASSIGNED = new UData();
--- a/tools/unicodetools/com/ibm/text/data/chinese_override.txt
+++ b/tools/unicodetools/com/ibm/text/data/chinese_override.txt
@ -1,10 +1,4 @@
-#/**
-# *******************************************************************************
-# * Copyright (C) 2002-2004, International Business Machines Corporation and    *
-# * others. All Rights Reserved.                                                *
-# *******************************************************************************
-# */
-#Override	List
+#Override	List
 #Format is <code><tab><char><tab><pinyin>(<tab><comment>)?
 #Note: the 'code' field is currently discarded; only the char is important.
 #Note: if there is conflict, the FIRST char wins.
--- a/tools/unicodetools/com/ibm/text/utility/Counter.java
+++ b/tools/unicodetools/com/ibm/text/utility/Counter.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Counter.java,v $
-* $Date: 2005/10/11 19:39:15 $
-* $Revision: 1.3 $
+* $Date: 2006/04/05 22:12:45 $
+* $Revision: 1.4 $
 *
 *******************************************************************************
 */
@ -21,7 +21,7 @@ import java.io.*;
 import java.text.*;

 public final class Counter {
-    Map map = new HashMap();
+    private Map map = new HashMap();

    static public final class RWInteger implements Comparable {
        static int uniqueCount;
@ -92,5 +92,11 @@ public final class Counter {
        return result;
    }

-
+    public Set keySet() {
+        return map.keySet();
+    }
+    
+    public Map getMap() {
+        return Collections.unmodifiableMap(map);
+    }
 }
--- a/tools/unicodetools/com/ibm/text/utility/UnicodeDataFile.java
+++ b/tools/unicodetools/com/ibm/text/utility/UnicodeDataFile.java
@ -48,7 +48,7 @@ public class UnicodeDataFile {
 	        out.println("# For documentation, see UCD.html");
    	}
        try {
-            Utility.appendFile(filename + "Header" + fileType, Utility.UTF8_UNIX, out);
+            Utility.appendFile("com/ibm/text/UCD/" + filename + "Header" + fileType, Utility.UTF8_UNIX, out);
        } catch (FileNotFoundException e) {
            /*
            out.println("# Unicode Character Database: Derived Property Data");
--- a/tools/unicodetools/readme.html
+++ b/tools/unicodetools/readme.html
@ -77,7 +77,10 @@ exist:<br>
 &lt;UCD_DIR&gt;/EXTRAS-Update</p>
 <h3>2. Download all of the UnicodeData files for each version into UCD_DIR.</h3>
 <p>The folder names must be of the form: &quot;3.2.0-Update&quot;, so rename the folders on the<br>
-Unicode site to this format.</p>
+Unicode site to this format. I<span style="background-color: #FFFF00">f the 
+folder contains ucd, then make the contents of that directory be the contents of 
+the x.x.x-Update directory. That is, each directory will directly contain files 
+like PropList....txt</span></p>
 <h4>2a Ensure Complete Release</h4>
 <p>If you are downloading any &quot;incomplete&quot; release (one that does not contain a complete set of data 
 files for that release, you need to also download the previous complete release). Most of the N.M-Update 
@ -87,6 +90,7 @@ directoriess are complete, *except*:</p>
 <p>Also, make the following changes to UnicodeData for 1.1.5:</p>
 <p><b>Delete</b></p>
 <pre>3400;HANGUL SYLLABLE KIYEOK A;Lo;0;L;1100 1161;;;;N;;;;;
+...
 4DFF;HANGUL SYLLABLE MIEUM WEO RIEUL-THIEUTH;Lo;0;L;1106 116F 11B4;;;;N;;;;;
 4E00;<cjk IDEOGRAPH REPRESENTATIVE>;Lo;0;L;;;;;N;;;;;</pre>
 <p><b>Add:</b></p>
@ -106,18 +110,19 @@ BASE_DIR + &quot;Collation\allkeys&quot; + VERSION + &quot;.txt&quot;.<br>
 <br>
 If you have it in a different location, change that value for KEYS in UCA.java, and <br>
 the value for BASE_DIR</p>
-<h4>2c. Here is an example of the default directory structure with files:</h4>
+<h4>2c. Here is an example of the default directory structure with files. All of 
+the yellow ones should exist</h4>
 <pre>C://DATA/

        BIN/
    
-        Collation/
+<span style="background-color: #FFFF00">        Collation/
            allkeys-3.1.1.txt
-        
+</span>        
        GEN/
            DerivedData/
                ExtractedProperties
-        UCD/
+<span style="background-color: #FFFF00">        </span><span style="background-color: #FFFF00">UCD/
            3.0.0-Update/
                Unihan-3.2.0.txt
                ...
@ -133,69 +138,145 @@ the value for BASE_DIR</p>
                ArabicShaping-4.0.0d14b.txt
                BidiMirroring-4.0.0d1b.txt
                ...
-            EXTRAS-Update/</pre>
+            EXTRAS-Update/</span></pre>
 <h3>3. Versions</h3>
 <p>All of the following have &quot;version X&quot; in the options you give to Java (either on the&nbsp; 
 command line, or in the Eclipse 'run' options. If you want a specific version like 3.1.0, then you 
 would write &quot;version 3.1.1&quot;. If you want the latest version (4.1.0), you can omit the &quot;version X&quot;.</p>
-<h3>4. Running UCD, you will use com.ibm.text.UCD.Main as your main class.</h3>
-<p>The Working directory has to be C:\ICU4J\unicodetools\com\ibm\text\UCD<br>
-(In Eclipse you can also use ${workspace_loc:UnicodeTools/com/ibm/text/UCD}, which abstracts away 
-the location.)<br>
-<br>
-The same for UCA:</p>
-<p>main: com.ibm.text.UCD.Main<br>
-directory: <a href="file:///C:/ICU4J/unicodetools/com/ibm/text/UCA">
-C:\ICU4J\unicodetools\com\ibm\text\UCA</a></p>
-<h4>4a. BIN</h4>
-<p>For each version, the tools build a set of binary data in BIN that contain the information for 
-that release. This is done automatically, or you can manually do it with the options<br>
-<br>
-version X build<br>
-<br>
-This builds an compressed format of all the UCD data (except blocks and Unihan) into the BIN 
-directory. Don't worry about the voluminous console messages, unless one says &quot;FAIL&quot;.<br>
-<br>
-<font color="#FF0000"><i>You have to manually do this if you change any of the data files in that 
-version!!</i></font></p>
-<p>Note: if for any reason you modify the binary format of the BIN files, you also have to bump the 
-value in that file:<br>
-<br>
-static final byte BINARY_FORMAT = 8; // bumped if binary format of UCD changes</p>
-<h4>4b. To build the Unicode files for a particular version X, run the Main with the following 
-argument:</h4>
-<p>MakeUnicodeFiles.generateFile</p>
-<p>This will execute the commands in the file MakeUnicodeFiles.txt.</p>
-<p>You will edit that file if you want a different 'd' version for the files, OR if you want to 
-change which files are built. At the top of the file you will see the following text:</p>
-<pre>Generate: </pre>
-<pre>DeltaVersion: 7</pre>
-<h4>4c. To change which files are built, put any number of regular expressions separated by spaces 
-after Generate. Eg,</h4>
-<pre>Generate: .*line.* prop.*</pre>
-<p>The matching is case-insensitive.</p>
-<h4>4d. To change the 'd' number that is appended to the generated files names, change the 
-DeltaVersion.</h4>
-<h4>4e. To run basic consistency checking, run:</h4>
-<p>version X verify<br>
-<br>
-Don't worry about any console messages except those that say FAIL.</p>
-<h4>4f. Output</h4>
-<p>The files will be generated in the GEN directories.</p>
-<ul>
-  <li>If they are the same as previous files (except for the first line and Date), they will be 
-  renamed to UNCHANGED... </li>
-  <li>If they are not, then a bat file will be generated in the DIFF directory. Double-clicking on 
-  this file will launch CompareIt, which is a nice diff program. Get compareIt from
-  <a class="xurl" href="http://www.grigsoft.com/files.htm">http://www.grigsoft.com/files.htm</a> (be 
-  sure to get the Unicode version),then you can also set it as the diff program in CVS with 
-  Admin/Preferences/WinCVS, External Diff = C:\Program Files\Compare It!\wincmp3.exe (or equiv).</li>
-</ul>
-<h3>5. Running UCA, you will use com.ibm.text.UCA.Main as your main class.</h3>
-<h4>5a. To build all the UCA files used by ICU, use the option:</h4>
-<p>java &lt;UCA&gt;Main ICU</p>
-<h4>6. To build all the charts, use the UCA project, with options: normalizationChart caseChart 
-scriptChart indexChart</h4>
+<h3>4. Building Files</h3>
+<ol>
+	<li><b>Setup</b><ol>
+		<li>In Eclipse, open the Package Explorer (Use Window&gt;Show View if you 
+		don't see it)</li>
+		<li>Open UnicodeTools<ul>
+			<li>com.ibm.text.UCD<ul>
+				<li>MakeUnicodeFiles.<span style="background-color: #FFFF00">txt</span><p>This file drives the production of 
+				the derived Unicode files. The first three lines contain 
+				parameters that you may want to modify at some times:</p>
+				<pre>Generate: <b>.*script.*</b> <i>// this is a regular expression. Use .* for all files</i>
+DeltaVersion: <b>10</b> <i>    // This gets appended to the file name. Pick 1+ the highest value in Public</i>
+CopyrightYear: <b>2006</b> <i> // Pick the current year</i></pre>
+				</li>
+			</ul>
+			</li>
+		</ul>
+		</li>
+		<li>Open in Package Explorer 
+		<ul>
+			<li>com.ibm.text.UCD<ul>
+				<li>Main</li>
+			</ul>
+			</li>
+		</ul>
+		</li>
+		<li>Run&gt;Run As...<ol>
+			<li>Choose Java Application<ul>
+				<li>it will fail, don't worry; you need to set some parameters</li>
+			</ul>
+			</li>
+		</ol>
+		</li>
+		<li>Run&gt;Run...<ul>
+			<li>Select the Arguments tab, and fill in the following<ul>
+				<li>Program arguments:<pre>build 5.0 MakeUnicodeFiles</pre>
+				</li>
+				<li>VM arguments: 
+				<pre>-Xms512m -Xmx512m</pre>
+				</li>
+			</ul>
+			</li>
+			<li>Close and Save</li>
+		</ul>
+		</li>
+	</ol>
+	</li>
+	<li><b>Run</b><ol>
+		<li>You'll see it build the 5.0 files, with something like the following 
+		results:<pre>Writing UCD_Data5.0.0
+Data Size: 109,802
+Wrote Data 109802</pre>
+		</li>
+		<li>For each version, the tools build a set of binary data in BIN that 
+		contain the information for that release. This is done automatically, or 
+		you can manually do it with the Program Arguments<pre>version X build</pre>
+		<p>This builds an compressed format of all the UCD data (except blocks 
+		and Unihan) into the BIN directory. Don't worry about the voluminous 
+		console messages, unless one says &quot;FAIL&quot;.</p>
+		<p><font color="#FF0000"><i>You have to manually do this if you change 
+		any of the data files in that version!</i></font></p>
+		<p>Note: if for any reason you modify the binary format of the BIN files, you also have to bump the 
+value in that file:</p>
+		<pre>static final byte BINARY_FORMAT = 8; // bumped if binary format of UCD changes</pre>
+		</li>
+	</ol>
+	</li>
+	<li>Results in <a href="file:///C:/DATA/GEN/DerivedData">
+	C:\DATA\GEN\DerivedData</a><ol>
+		<li>The files will be in this directory.</li>
+		<li>There are also DIFF folders, that contain BAT files that you can run 
+		on Windows with CompareIt. (You can modify the code to build BATs with 
+		another Diff program if you want).<ol>
+			<li>For any file with a significant difference, it will build two 
+			BAT files, such as the first two below.<pre>Diff_PropList-5.0.0d10.txt.bat
+OLDER-Diff_PropList-5.0.0d10.txt.bat
+
+UNCHANGED-Diff_PropertyValueAliases-5.0.0d10.txt.bat</pre>
+			</li>
+		</ol>
+		</li>
+		<li>Any files without significant changes will have &quot;UNCHANGED&quot; as a 
+		prefix: ignore them.&nbsp; The OLDER prefix is the comparison to the 
+		last version of Unicode.</li>
+		<li>On Windows you can run these BATs to compare files:</li>
+	</ol>
+	</li>
+</ol>
+<h3>5. Invariant Checking</h3>
+<ol>
+	<li>Setup<ol>
+		<li>Open in Package Explorer<ul>
+			<li>com.ibm.text.UCD<ul>
+				<li>TestUnicodeInvariants.java</li>
+			</ul>
+			</li>
+		</ul>
+		</li>
+		<li>Run&gt;Run As... Java Application<br>
+		Will create the following file of results:<pre><a href="file:///C:/DATA/GEN/UnicodeInvariantResults.txt/">C:\DATA\GEN\UnicodeInvariantResults.txt\</a></pre>
+		</li>
+		<li>Open that file and search for &quot;**** START Error Info ****&quot; Each such 
+		point provides a dump of comparison information.</li>
+	</ol>
+	</li>
+</ol>
+<h3>6. Options</h3>
+<ol>
+	<li>If you want to see files that are opened while processing, do the 
+	following:<ol>
+		<li>Run&gt;Run</li>
+		<li>Select the Arguments tab, and add the following<ol>
+			<li>VM arguments:
+			<pre>-DSHOW_FILES</pre>
+			</li>
+		</ol>
+		</li>
+	</ol>
+	</li>
+</ol>
+<h3>5. UCA</h3>
+<ol>
+	<li>
+	<h3>You will use com.ibm.text.UCA.Main as your main class, creating along 
+	the same lines as above.</h3></li>
+	<li>
+	<h4>To build all the UCA files used by ICU, use the Program arguments:</h4>
+	<pre>Main ICU</pre>
+	</li>
+	<li>
+	<h4>To build all the charts, use the UCA project, with options: </h4>
+	<pre>normalizationChart caseChart scriptChart indexChart</pre>
+	</li>
+</ol>

 </body>