Revisions for specialcasing & misc fixes

X-SVN-Rev: 7349
2025-04-21 12:40:02 +00:00 · 2001-12-13 23:36:29 +00:00 · 2001-12-13 23:36:29 +00:00 · a903b84867
commit a903b84867
parent ac085286cd
26 changed files with 1742 additions and 814 deletions
--- a/tools/unicodetools/com/ibm/text/UCD/BuildNames.java
+++ b/tools/unicodetools/com/ibm/text/UCD/BuildNames.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/BuildNames.java,v $
-* $Date: 2001/08/31 00:30:17 $
-* $Revision: 1.2 $
+* $Date: 2001/12/13 23:35:54 $
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */
@ -26,12 +26,9 @@ public class BuildNames implements UCD_Types {

    static final boolean DEBUG = true;

-    static UCD ucd;
-
    public static void main(String[] args) throws IOException {

-        ucd = UCD.make();
-
+        Main.setUCD();
        collectWords();
    }

@ -85,8 +82,8 @@ public class BuildNames implements UCD_Types {
        int used = 0;
        int sum = 0;
        for (int i = 0; i < 0x10FFFF; ++i) {
-            if (ucd.hasComputableName(i)) continue;
-            String name = transform(ucd.getName(i));
+            if (Main.ucd.hasComputableName(i)) continue;
+            String name = transform(Main.ucd.getName(i));


            sum += name.length();
--- a/tools/unicodetools/com/ibm/text/UCD/CaseFoldingHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/CaseFoldingHeader.txt
@ -0,0 +1,41 @@
+# Case Folding Properties
+#
+# This file is a supplement to the UnicodeData file.
+# It provides a case folding mapping generated from the Unicode Character Database.
+# If all characters are mapped according to the full mapping below, then
+# case differences (according to UnicodeData.txt and SpecialCasing.txt)
+# are eliminated.
+#
+# The data supports both implementations that require simple case foldings
+# (where string lengths don't change), and implementations that allow full case folding
+# (where string lengths may grow). Note that where they can be supported, the
+# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
+#
+# NOTE: case folding does not preserve normalization formats!
+#
+# For information on case folding, see
+# UTR #21 Case Mappings, at http://www.unicode.org/unicode/reports/tr21/
+#
+# ================================================================================
+# Format
+# ================================================================================
+# The entries in this file are in the following machine-readable format:
+#
+# <code>; <status>; <mapping>; # <name>
+#
+# The status field is:
+# C: common case folding, common mappings shared by both simple and full mappings.
+# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
+# S: simple case folding, mappings to single characters where different from F.
+# I: special case for dotted uppercase I and dotless lowercase i
+#    - If this mapping is included, the result is case-insensitive, but dotless and dotted I's are not distinguished.
+#    - If this mapping is excluded, the result is not fully case-insensitive, but dotless and dotted I's are distinguished.
+#
+# Usage:
+#  A. To do a simple case folding, use the mappings with status C + S + I.
+#  B. To do a full case folding, use the mappings with status C + F + I.
+#    The mappings with status I can be omitted depending on the desired case-folding
+#    behavior. (The default option is to retain them.)
+#
+# =================================================================
+
--- a/tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java
+++ b/tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
-* $Date: 2001/12/06 00:05:53 $
-* $Revision: 1.9 $
+* $Date: 2001/12/13 23:35:54 $
+* $Revision: 1.10 $
 *
 *******************************************************************************
 */
@ -87,8 +87,6 @@ public final class DerivedProperty implements UCD_Types {
    }
    */
    private UnicodeProperty[] dprops = new UnicodeProperty[50];
-    private Normalizer[] nf = new Normalizer[4];
-    private Normalizer nfd, nfc, nfkd, nfkc;

    static final String[] CaseNames = {
                "Uppercase", 
@ -99,7 +97,7 @@ public final class DerivedProperty implements UCD_Types {
        Normalizer nfx;
        ExDProp(int i) {
            type = DERIVED_NORMALIZATION;
-            nfx = nf[i];
+            nfx = Main.nf[i];
            name = "Expands_On_" + nfx.getName();
            shortName = "XO_" + nfx.getName();
            header = "# Derived Property: " + name
@ -123,7 +121,7 @@ public final class DerivedProperty implements UCD_Types {
        NF_UnsafeStartProp(int i) {
            isStandard = false;
            type = DERIVED_NORMALIZATION;
-            nfx = nf[i];
+            nfx = Main.nf[i];
            name = nfx.getName() + "_UnsafeStart";
            shortName = nfx.getName() + "_SS";
            header = "# Derived Property: " + name
@ -159,7 +157,7 @@ public final class DerivedProperty implements UCD_Types {
                case NFC_TrailingNonZero: bitsets[1] = bitset = new BitSet(); break;
            }
            filter = bitsets[1] != null;
-            nfc.getCompositionStatus(bitsets[0], bitsets[1], bitsets[2]);
+            Main.nfc.getCompositionStatus(bitsets[0], bitsets[1], bitsets[2]);
            
            name = Names[i-NFC_Leading];
            shortName = SNames[i-NFC_Leading];
@ -193,19 +191,19 @@ public final class DerivedProperty implements UCD_Types {
        
        GenDProp (int i) {
            isStandard = false;
-            valueVaries = true;
+            setValueType(NON_ENUMERATED);
            type = DERIVED_NORMALIZATION;
-            nfx = nf[i];
+            nfx = Main.nf[i];
            name = nfx.getName();
            String compName = "the character itself";
            
            if (i == NFKC || i == NFD) {
                name += "-NFC";
-                nfComp = nfc;
+                nfComp = Main.nfc;
                compName = "NFC for the character";
            } else if (i == NFKD) {
                name += "-NFD";
-                nfComp = nfd;
+                nfComp = Main.nfd;
                compName = "NFD for the character";
            }
            header = "# Derived Property: " + name              
@ -269,9 +267,9 @@ public final class DerivedProperty implements UCD_Types {
        String MAYBE;
        Normalizer nfx;
        QuickDProp (int i) {
-            valueVaries = true;
+            setValueType((i == NFC || i == NFKC) ? ENUMERATED : BINARY);
            type = DERIVED_NORMALIZATION;
-            nfx = nf[i];
+            nfx = Main.nf[i];
            NO = nfx.getName() + "_NO";
            MAYBE = nfx.getName() + "_MAYBE";
            name = nfx.getName() + "_QuickCheck";
@ -291,11 +289,6 @@ public final class DerivedProperty implements UCD_Types {
    };

    {
-        nfd = nf[0] = new Normalizer(Normalizer.NFD);
-        nfc = nf[1] = new Normalizer(Normalizer.NFC);
-        nfkd = nf[2] = new Normalizer(Normalizer.NFKD);
-        nfkc = nf[3] = new Normalizer(Normalizer.NFKC);
-
        for (int i = ExpandsOnNFD; i <= ExpandsOnNFKC; ++i) {
            dprops[i] = new ExDProp(i-ExpandsOnNFD);
        }
@ -493,7 +486,7 @@ of characters, the first of which has a non-zero combining class.
        dprops[FC_NFKC_Closure] = new UnicodeProperty() {
            {
                type = DERIVED_NORMALIZATION;
-                valueVaries = true;
+                setValueType(NON_ENUMERATED);
                name = "FC_NFKC_Closure";
                shortName = "FC_NFKC";
                header = "# Derived Property: " + name
@ -503,8 +496,8 @@ of characters, the first of which has a non-zero combining class.
            }
            public String getValue(int cp, byte style) { 
                if (!ucdData.isRepresented(cp)) return "";
-                String b = nfkc.normalize(fold(cp));
-                String c = nfkc.normalize(fold(b));
+                String b = Main.nfkc.normalize(fold(cp));
+                String c = Main.nfkc.normalize(fold(b));
                if (c.equals(b)) return "";
                return "FNC; " + Utility.hex(c);
            } // default
@ -516,7 +509,7 @@ of characters, the first of which has a non-zero combining class.
                type = DERIVED_NORMALIZATION;
                isStandard = false;
                name = "FC_NFC_Closure";
-                valueVaries = true;
+                setValueType(NON_ENUMERATED);
                shortName = "FC_NFC";
                header = "# Derived Property: " + name
                    + "\r\n#  Generated from computing: b = NFC(Fold(a)); c = NFC(Fold(b));"
@ -525,8 +518,8 @@ of characters, the first of which has a non-zero combining class.
            }
            public String getValue(int cp, byte style) { 
                if (!ucdData.isRepresented(cp)) return "";
-                String b = nfc.normalize(fold(cp));
-                String c = nfc.normalize(fold(b));
+                String b = Main.nfc.normalize(fold(cp));
+                String c = Main.nfc.normalize(fold(b));
                if (c.equals(b)) return "";
                return "FN; " + Utility.hex(c);
            } // default
@ -603,8 +596,9 @@ of characters, the first of which has a non-zero combining class.
        dprops[Type_i] = new UnicodeProperty() {
            {
                type = DERIVED_CORE;
-                name = "Soft_Dotted";
-                shortName = "SDot";
+                isStandard = false;
+                name = "DSoft_Dotted";
+                shortName = "DSDot";
                header = header = "# Derived Property: " + name
                    + "\r\n#  Generated from: all characters whose canonical decompositions end with a combining character sequence that"
                    + "\r\n# - starts with i or j"
@ -613,21 +607,24 @@ of characters, the first of which has a non-zero combining class.
                ;
            }
            boolean hasValue(int cp) {
-                if (cp == 'i' || cp == 'j') return true;
-                if (!nfkd.hasDecomposition(cp)) return false;
-                String decomp = nfd.normalize(cp);
+                if (hasSoftDot(cp)) return true;
+                if (!Main.nfkd.hasDecomposition(cp)) return false;
+                String decomp = Main.nfd.normalize(cp);
                boolean ok = false;
                for (int i = decomp.length()-1; i >= 0; --i) {
-                    char ch = decomp.charAt(i);
+                    int ch = UTF16.charAt(decomp, i);
                    int cc = ucdData.getCombiningClass(ch);
                    if (cc == 230) return false;
                    if (cc == 0) {
-                        if (ch == 'i' || ch == 'j') ok = true;
-                        else return false;
+                        if (!hasSoftDot(ch)) return false;
+                        ok = true;
                    }
                }
                return ok;
            }
+            boolean hasSoftDot(int ch) {
+                return ch == 'i' || ch == 'j' || ch == 0x0268 || ch == 0x0456 || ch == 0x0458;
+            }
        };
        
        dprops[Case_Ignorable] = new UnicodeProperty() {
@ -666,7 +663,7 @@ of characters, the first of which has a non-zero combining class.
        for (int i = 0; i < dprops.length; ++i) {
            UnicodeProperty up = dprops[i];
            if (up == null) continue;
-            if (up.valueVaries()) continue;
+            if (up.getValueType() != BINARY) continue;
            up.setValue(NUMBER, "1");
            up.setValue(SHORT, "Y");
            up.setValue(LONG, "YES");
@ -681,11 +678,11 @@ of characters, the first of which has a non-zero combining class.
            || ucdData.getBinaryProperty(cp, Other_Lowercase)) return Ll;
        if (cat == Lt || cat == Lo || cat == Lm || cat == Nl) return cat;
        
-       // if (true) throw new IllegalArgumentException("FIX nf[2]");
+       // if (true) throw new IllegalArgumentException("FIX Main.nf[2]");
        
-        if (!nf[NFKD].normalizationDiffers(cp)) return Lo;
+        if (!Main.nf[NFKD].normalizationDiffers(cp)) return Lo;

-        String norm = nf[NFKD].normalize(cp);
+        String norm = Main.nf[NFKD].normalize(cp);
        int cp2;
        boolean gotUpper = false;
        boolean gotLower = false;
@ -723,8 +720,8 @@ of characters, the first of which has a non-zero combining class.
    }
    
    public static void test() {
-        UCD ucd = UCD.make();
-        DerivedProperty dprop = new DerivedProperty(ucd);
+        Main.setUCD();
+        DerivedProperty dprop = new DerivedProperty(Main.ucd);
        /*
        for (int j = 0; j < LIMIT; ++j) {
            System.out.println();
@ -735,9 +732,9 @@ of characters, the first of which has a non-zero combining class.
        
        for (int cp = 0xA0; cp < 0xFF; ++cp) {
            System.out.println();
-            System.out.println(ucd.getCodeAndName(cp));
+            System.out.println(Main.ucd.getCodeAndName(cp));
            for (int j = 0; j < DERIVED_PROPERTY_LIMIT; ++j) {
-                String prop = make(j, ucd).getValue(cp);
+                String prop = make(j, Main.ucd).getValue(cp);
                if (prop.length() != 0) System.out.println("\t" + prop);
            }
        }
--- a/tools/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java,v $
-* $Date: 2001/12/06 00:05:53 $
-* $Revision: 1.7 $
+* $Date: 2001/12/13 23:35:56 $
+* $Revision: 1.8 $
 *
 *******************************************************************************
 */
@ -34,7 +34,7 @@ final class DerivedPropertyLister extends PropertyLister {
        this.ucdData = ucd;
        // this.dprop = new DerivedProperty(ucd);
        uprop = DerivedProperty.make(propMask, ucd);
-        varies = uprop.valueVaries();
+        varies = uprop.getValueType() != BINARY;

        width = super.minPropertyWidth();
        switch (propMask) {
@ -56,7 +56,7 @@ final class DerivedPropertyLister extends PropertyLister {
    }

    public String valueName(int cp) {
-        if (uprop.valueVaries()) return uprop.getValue(cp, LONG);
+        if (uprop.getValueType() != BINARY) return uprop.getValue(cp, LONG);
        return uprop.getProperty(LONG);
    }

--- a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $
-* $Date: 2001/09/19 23:33:16 $
-* $Revision: 1.3 $
+* $Date: 2001/12/13 23:35:56 $
+* $Revision: 1.4 $
 *
 *******************************************************************************
 */
@ -15,38 +15,72 @@ package com.ibm.text.UCD;

 import java.util.*;
 import java.io.*;
+import com.ibm.text.UTF16;

 import com.ibm.text.utility.*;

 public class GenerateCaseFolding implements UCD_Types {
    public static boolean DEBUG = false;
-    public static UCD ucd = UCD.make("");
-
-    public static void main(String[] args) throws java.io.IOException {
-        makeCaseFold();
+    public static boolean COMMENT_DIFFS = false; // ON if we want a comment on mappings != lowercase
+    public static boolean PICK_SHORT = false; // picks short value for SIMPLE if in FULL, changes weighting
+    public static boolean NF_CLOSURE = false; // picks short value for SIMPLE if in FULL, changes weighting
+    // PICK_SHORT & NF_CLOSURE = false for old style
+    
+    
+    /*public static void main(String[] args) throws java.io.IOException {
+        makeCaseFold(arg[0]);
        //getAge();
    }
-
-    public static void makeCaseFold() throws java.io.IOException {
+    */
+    
+    static PrintWriter log;
+    
+    public static void makeCaseFold(boolean normalized) throws java.io.IOException {
+        PICK_SHORT = NF_CLOSURE = normalized;
+        
+        Main.setUCD();
+        log = Utility.openPrintWriter("CaseFoldingLog" + GenerateData.getFileSuffix(true));
+        System.out.println("Writing Log: " + "CaseFoldingLog" + GenerateData.getFileSuffix(true));
+        
        System.out.println("Making Full Data");
-        Map fullData = getCaseFolding(true);
+        Map fullData = getCaseFolding(true, NF_CLOSURE);
+        Utility.fixDot();
        System.out.println("Making Simple Data");
-        Map simpleData = getCaseFolding(false);
+        Map simpleData = getCaseFolding(false, NF_CLOSURE);
        // write the data

+        Utility.fixDot();
        System.out.println("Writing");
+        String filename = "CaseFolding";
+        if (normalized) filename += "-Normalized";
+        String directory = "DerivedData/";
+        PrintWriter out = Utility.openPrintWriter(directory + filename + GenerateData.getFileSuffix(true));
+        GenerateData.generateBat(directory, filename, GenerateData.getFileSuffix(true));
+        
+        out.println("# CaseFolding" + GenerateData.getFileSuffix(false));
+        out.println("#");
+        out.println("# Generated: " + new Date() + ", MD");
+        Utility.appendFile("CaseFoldingHeader.txt", false, out);
+        
+        /*
        PrintWriter out = new PrintWriter(
            new BufferedWriter(
            new OutputStreamWriter(
-                new FileOutputStream("CaseFoldingSample.txt"),
+                new FileOutputStream(directory + fileRoot + GenerateData.getFileSuffix()),
                "UTF8"),
            4*1024));
+        */
+        
+        for (int ch = 0; ch <= 0x10FFFF; ++ch) {
+            Utility.dot(ch);

-        for (int ch = 0; ch < 0x10FFFF; ++ch) {
+            if (!charsUsed.get(ch)) continue;
+            
            String rFull = (String)fullData.get(UTF32.valueOf32(ch));
            String rSimple = (String)simpleData.get(UTF32.valueOf32(ch));
            if (rFull == null && rSimple == null) continue;
-            if (rFull != null && rFull.equals(rSimple)) {
+            if (rFull != null && rFull.equals(rSimple) 
+              || (PICK_SHORT && UTF16.countCodePoint(rFull) == 1)) {
                String type = "C";
                if (ch == 0x130 || ch == 0x131) type = "I";
                drawLine(out, ch, type, rFull);
@ -60,35 +94,63 @@ public class GenerateCaseFolding implements UCD_Types {
            }
        }
        out.close();
+        log.close();
    }

    static void drawLine(PrintWriter out, int ch, String type, String result) {
+        String comment = "";
+        if (COMMENT_DIFFS) {
+            String lower = Main.ucd.getCase(UTF16.valueOf(ch), FULL, LOWER);
+            if (!lower.equals(result)) {
+                String upper = Main.ucd.getCase(UTF16.valueOf(ch), FULL, UPPER);
+                String lower2 = Main.ucd.getCase(UTF16.valueOf(ch), FULL, LOWER);
+                if (lower.equals(lower2)) {
+                    comment = "[Diff " + Utility.hex(lower, " ") + "] ";
+                } else {
+                    Utility.fixDot();
+                    System.out.println("PROBLEM WITH: " + Main.ucd.getCodeAndName(ch));
+                    comment = "[DIFF " + Utility.hex(lower, " ") + ", " + Utility.hex(lower2, " ") + "] ";
+                }
+            }
+        }
+        
        out.println(Utility.hex(ch)
-            + "; " + type +
-            "; " + Utility.hex(result, " ") +
-            "; # " + ucd.getName(ch));
+            + "; " + type
+            + "; " + Utility.hex(result, " ")
+            + "; # " + comment + Main.ucd.getName(ch));
    }

+    static int probeCh = 0x01f0;
+    static String shower = UTF16.valueOf(probeCh);

-    static Map getCaseFolding(boolean full) throws java.io.IOException {
+    static Map getCaseFolding(boolean full, boolean nfClose) throws java.io.IOException {
        Map data = new TreeMap();
        Map repChar = new TreeMap();
        //String option = "";

        // get the equivalence classes

-        for (int ch = 0; ch < 0x10FFFF; ++ch) {
-            if ((ch & 0x3FF) == 0) System.out.println(Utility.hex(ch));
-            if (!ucd.isRepresented(ch)) continue;
-            getClosure(ch, data, full);
+        for (int ch = 0; ch <= 0x10FFFF; ++ch) {
+            Utility.dot(ch);
+            //if ((ch & 0x3FF) == 0) System.out.println(Utility.hex(ch));
+            if (!Main.ucd.isRepresented(ch)) continue;
+            getClosure(ch, data, full, nfClose);
        }

        // get the representative characters
-
+        
        Iterator it = data.keySet().iterator();
        while (it.hasNext()) {
            String s = (String) it.next();
            Set set = (Set) data.get(s);
+            show = set.contains(shower);
+            if (show) {
+                Utility.fixDot();
+                System.out.println(toString(set));
+            }
+            
+        // Pick the best available representative
+            
            String rep = null;
            int repGood = 0;
            String dup = null;
@ -104,30 +166,63 @@ public class GenerateCaseFolding implements UCD_Types {
                    dup = s2;
                }
            }
-            if (rep == null) System.err.println("No representative for: " + toString(set));
-            else if (repGood < 128) {
-                System.err.println("Non-optimal!!: "
-                    + ucd.getName(rep) + ", " + toString(set,true));
+            if (rep == null) {
+                Utility.fixDot();
+                System.err.println("No representative for: " + toString(set));
+            } else if ((repGood & (NFC_FORMAT | ISLOWER)) != (NFC_FORMAT | ISLOWER)) {
+                String message = "";
+                if ((repGood & NFC_FORMAT) == 0) {
+                    message += " [NOT NFC FORMAT]";
+                }
+                if ((repGood & ISLOWER) == 0) {
+                    message += " [NOT LOWERCASE]";
+                }
+                Utility.fixDot();
+                log.println("Non-Optimal Representative " + message);
+                log.println(" Rep:\t" + Main.ucd.getCodeAndName(rep));
+                log.println(" Set:\t" + toString(set,true, true));
            }
+            
+        // Add it for all the elements of the set
+        
            it2 = set.iterator();
            while (it2.hasNext()) {
                String s2 = (String)it2.next();
-                if (s2.length() == 1 && !s2.equals(rep)) repChar.put(UTF32.getCodePointSubstring(s2,0), rep);
+                if (UTF16.countCodePoint(s2) == 1 && !s2.equals(rep)) {
+                    repChar.put(UTF32.getCodePointSubstring(s2,0), rep);
+                    charsUsed.set(UTF16.charAt(s2, 0));
+                }
            }
        }
        return repChar;
    }
+    
+    static BitSet charsUsed = new BitSet();
+    static boolean show = false;
+    static final int NFC_FORMAT = 64;
+    static final int ISLOWER = 128;

    static int goodness(String s, boolean full) {
        if (s == null) return 0;
-        int result = s.length();
-        if (s.equals(lower(upper(s, full), full))) result |= 128;
-        if (s.equals(NFC.normalize(s))) result |= 64;
+        int result = 32-s.length();
+        if (!PICK_SHORT) {
+            result = s.length();
+        }
+        if (!full) result <<= 8;
+        String low = lower(upper(s, full), full);
+        if (s.equals(low)) result |= ISLOWER;
+        else if (PICK_SHORT && Main.nfd.normalize(s).equals(Main.nfd.normalize(low))) result |= ISLOWER;
+        
+        if (s.equals(Main.nfc.normalize(s))) result |= NFC_FORMAT;
+        
+        if (show) {
+            Utility.fixDot();
+            System.out.println(Utility.hex(result) + ", " + Main.ucd.getCodeAndName(s));
+        }
        return result;
    }


-    static Normalizer NFC = new Normalizer(Normalizer.NFC);
    /*
    static HashSet temp = new HashSet();
    static void normalize(HashSet set) {
@ -151,33 +246,33 @@ public class GenerateCaseFolding implements UCD_Types {

            /*
            String
-            String lower1 = ucd.getLowercase(ch);
-            String lower2 = ucd.toLowercase(ch,option);
+            String lower1 = Main.ucd.getLowercase(ch);
+            String lower2 = Main.ucd.toLowercase(ch,option);

-            char ch2 = ucd.getLowercase(ucd.getUppercase(ch).charAt(0)).charAt(0);
-            //String lower1 = String.valueOf(ucd.getLowercase(ch));
-            //String lower = ucd.toLowercase(ch2,option);
-            String upper = ucd.toUppercase(ch2,option);
-            String lowerUpper = ucd.toLowercase(upper,option);
-            //String title = ucd.toTitlecase(ch2,option);
-            //String lowerTitle = ucd.toLowercase(upper,option);
+            char ch2 = Main.ucd.getLowercase(Main.ucd.getUppercase(ch).charAt(0)).charAt(0);
+            //String lower1 = String.valueOf(Main.ucd.getLowercase(ch));
+            //String lower = Main.ucd.toLowercase(ch2,option);
+            String upper = Main.ucd.toUppercase(ch2,option);
+            String lowerUpper = Main.ucd.toLowercase(upper,option);
+            //String title = Main.ucd.toTitlecase(ch2,option);
+            //String lowerTitle = Main.ucd.toLowercase(upper,option);

            if (ch != ch2 || lowerUpper.length() != 1 || ch != lowerUpper.charAt(0)) { //
                output.println(Utility.hex(ch)
                    + "; " + (lowerUpper.equals(lower1) ? "L" : lowerUpper.equals(lower2) ? "S" : "E")
                    + "; " + Utility.hex(lowerUpper," ")
-                    + ";\t#" + ucd.getName(ch)
+                    + ";\t#" + Main.ucd.getName(ch)
                    );
                //if (!lowerUpper.equals(lower)) {
-                //    output.println("Warning1: " + Utility.hex(lower) + " " + ucd.getName(lower));
+                //    output.println("Warning1: " + Utility.hex(lower) + " " + Main.ucd.getName(lower));
                //}
                //if (!lowerUpper.equals(lowerTitle)) {
-                //    output.println("Warning2: " + Utility.hex(lowerTitle) + " " + ucd.getName(lowerTitle));
+                //    output.println("Warning2: " + Utility.hex(lowerTitle) + " " + Main.ucd.getName(lowerTitle));
                //}
            }
            */

-    static void getClosure(int ch, Map data, boolean full) {
+    static void getClosure(int ch, Map data, boolean full, boolean nfClose) {
        String charStr = UTF32.valueOf32(ch);
        String lowerStr = lower(charStr, full);
        String titleStr = title(charStr, full);
@ -202,7 +297,13 @@ public class GenerateCaseFolding implements UCD_Types {
            while (it.hasNext()) {
                String s = (String) it.next();
                // do funny stuff since we can't modify set while iterating
-                //if (add(set, NFC.normalize(s), data)) continue main;
+                // We don't do this because if the source is not normalized, we don't want to normalize
+                if (nfClose) {
+                    if (add(set, Main.nfd.normalize(s), data)) continue main;
+                    if (add(set, Main.nfc.normalize(s), data)) continue main;
+                    if (add(set, Main.nfkd.normalize(s), data)) continue main;
+                    if (add(set, Main.nfkc.normalize(s), data)) continue main;
+                }
                if (add(set, lower(s, full), data)) continue main;
                if (add(set, title(s, full), data)) continue main;
                if (add(set, upper(s, full), data)) continue main;
@ -216,31 +317,34 @@ public class GenerateCaseFolding implements UCD_Types {
        return result.replace('\u03C2', '\u03C3'); // HACK for lower
    }

-    // These functions are no longer necessary, since UCD is parameterized,
+    // These functions are no longer necessary, since Main.ucd is parameterized,
    // but it's not worth changing

    static String lower2(String s, boolean full) {
-        if (!full) {
+        /*if (!full) {
            if (s.length() != 1) return s;
-            return ucd.getCase(UTF32.char32At(s,0), SIMPLE, LOWER);
+            return Main.ucd.getCase(UTF32.char32At(s,0), SIMPLE, LOWER);
        }
-        return ucd.getCase(s, FULL, LOWER);
+        */
+        return Main.ucd.getCase(s, full ? FULL : SIMPLE, LOWER);
    }

    static String upper(String s, boolean full) {
-        if (!full) {
+        /* if (!full) {
            if (s.length() != 1) return s;
-            return ucd.getCase(UTF32.char32At(s,0), FULL, UPPER);
+            return Main.ucd.getCase(UTF32.char32At(s,0), FULL, UPPER);
        }
-        return ucd.getCase(s, SIMPLE, UPPER);
+        */
+        return Main.ucd.getCase(s, full ? FULL : SIMPLE, UPPER);
    }

    static String title(String s, boolean full) {
-        if (!full) {
+        /*if (!full) {
            if (s.length() != 1) return s;
-            return ucd.getCase(UTF32.char32At(s,0), FULL, TITLE);
+            return Main.ucd.getCase(UTF32.char32At(s,0), FULL, TITLE);
        }
-        return ucd.getCase(s, SIMPLE, TITLE);
+        */
+        return Main.ucd.getCase(s, full ? FULL : SIMPLE, TITLE);
    }

    static boolean add(Set set, String s, Map data) {
@ -261,28 +365,173 @@ public class GenerateCaseFolding implements UCD_Types {
    }

    static String toString(Set set) {
-        String result = "{";
-        Iterator it2 = set.iterator();
-        boolean first = true;
-        while (it2.hasNext()) {
-            String s2 = (String) it2.next();
-            if (!first) result += ", ";
-            first = false;
-            result += Utility.hex(s2, " ");
-        }
-        return result + "}";
+        return toString(set, false, false);
    }

-    static String toString(Set set, boolean t) {
+    static String toString(Set set, boolean name, boolean crtab) {
        String result = "{";
        Iterator it2 = set.iterator();
        boolean first = true;
        while (it2.hasNext()) {
            String s2 = (String) it2.next();
-            if (!first) result += ", ";
+            if (!first) {
+                if (crtab) {
+                    result += ";\r\n\t";
+                } else {
+                    result += "; ";
+                }
+            }
            first = false;
-            result += ucd.getName(s2);
+            if (name) {
+                result += Main.ucd.getCodeAndName(s2);
+            } else {
+                result += Utility.hex(s2, " ");
+            }
        }
        return result + "}";
    }
+    
+    static boolean specialNormalizationDiffers(int ch) {
+        if (ch == 0x00DF) return true;                  // es-zed
+        return Main.nfkd.normalizationDiffers(ch);
+    }
+    
+    static String specialNormalization(String s) {
+        if (s.equals("\u00DF")) return "ss";
+        return Main.nfkd.normalize(s);
+    }
+    
+    static boolean isExcluded(int ch) {
+        if (ch == 0x130) return true;                  // skip LATIN CAPITAL LETTER I WITH DOT ABOVE
+        if (ch == 0x0132 || ch == 0x0133) return true; // skip IJ, ij
+        if (ch == 0x037A) return true;                 // skip GREEK YPOGEGRAMMENI
+        if (0x249C <= ch && ch <= 0x24B5) return true; // skip PARENTHESIZED LATIN SMALL LETTER A..
+        if (0x20A8 <= ch && ch <= 0x217B) return true; // skip Rupee..
+        
+        byte type = Main.ucd.getDecompositionType(ch);  
+        if (type == COMPAT_SQUARE) return true;
+        //if (type == COMPAT_UNSPECIFIED) return true;
+        return false;
+    }
+    
+    static void generateSpecialCasing() throws IOException {
+        Main.setUCD();
+        Map sorted = new TreeMap();
+        
+        PrintWriter log = Utility.openPrintWriter("SpecialCasingExceptions" + GenerateData.getFileSuffix(true));
+        
+        for (int ch = 0; ch <= 0x10FFFF; ++ch) {
+            Utility.dot(ch);
+            if (!Main.ucd.isRepresented(ch)) continue;
+            if (!specialNormalizationDiffers(ch)) continue;
+
+            String lower = Main.nfc.normalize(Main.ucd.getCase(ch, SIMPLE, LOWER));
+            String upper = Main.nfc.normalize(Main.ucd.getCase(ch, SIMPLE, UPPER));
+            String title = Main.nfc.normalize(Main.ucd.getCase(ch, SIMPLE, TITLE));
+            
+            String chstr = UTF16.valueOf(ch);
+            
+            String decomp = specialNormalization(chstr);
+            String flower = Main.nfc.normalize(Main.ucd.getCase(decomp, SIMPLE, LOWER));
+            String fupper = Main.nfc.normalize(Main.ucd.getCase(decomp, SIMPLE, UPPER));
+            String ftitle = Main.nfc.normalize(Main.ucd.getCase(decomp, SIMPLE, TITLE));
+            
+            String base = Main.nfc.normalize(decomp);
+            String blower = Main.nfc.normalize(specialNormalization(lower));
+            String bupper = Main.nfc.normalize(specialNormalization(upper));
+            String btitle = Main.nfc.normalize(specialNormalization(title));
+            
+            if (ch == 0x249c) {
+                System.out.println("Code: " + Main.ucd.getCodeAndName(ch));
+                System.out.println("Decomp: " + Main.ucd.getCodeAndName(decomp));
+                System.out.println("Base: " + Main.ucd.getCodeAndName(base));
+                System.out.println("SLower: " + Main.ucd.getCodeAndName(lower));
+                System.out.println("FLower: " + Main.ucd.getCodeAndName(flower));
+                System.out.println("BLower: " + Main.ucd.getCodeAndName(blower));
+                System.out.println("STitle: " + Main.ucd.getCodeAndName(title));
+                System.out.println("FTitle: " + Main.ucd.getCodeAndName(ftitle));
+                System.out.println("BTitle: " + Main.ucd.getCodeAndName(btitle));
+                System.out.println("SUpper: " + Main.ucd.getCodeAndName(upper));
+                System.out.println("FUpper: " + Main.ucd.getCodeAndName(fupper));
+                System.out.println("BUpper: " + Main.ucd.getCodeAndName(bupper));
+            }
+            
+            // presumably if there is a single code point, it would already be in the simple mappings
+            
+            if (UTF16.countCodePoint(flower) == 1 && UTF16.countCodePoint(fupper) == 1 
+                && UTF16.countCodePoint(title) == 1) continue;
+            
+            // if there is no change from the base, skip
+            
+            if (flower.equals(base) && fupper.equals(base) && ftitle.equals(base)) continue;
+            
+            // fix special cases
+            // if (flower.equals(blower) && fupper.equals(bupper) && ftitle.equals(btitle)) continue;
+            if (flower.equals(blower)) flower = lower;
+            if (fupper.equals(bupper)) fupper = upper;
+            if (ftitle.equals(btitle)) ftitle = title;
+            
+            // if there are no changes from the original, or the expanded original, skip
+            
+            if (flower.equals(lower) && fupper.equals(upper) && ftitle.equals(title)) continue;
+            
+            String name = Main.ucd.getName(ch);
+            String mapping = Utility.hex(ch)
+                + "; " + Utility.hex(flower.equals(base) ? chstr : flower)
+                + "; " + Utility.hex(ftitle.equals(base) ? chstr : ftitle)
+                + "; " + Utility.hex(fupper.equals(base) ? chstr : fupper)
+                + "; # " + Main.ucd.getName(ch);
+            
+            int order = name.equals("LATIN SMALL LETTER SHARP S") ? 1
+                : name.indexOf("ARMENIAN SMALL LIGATURE") >= 0 ? 3
+                : name.indexOf("LIGATURE") >= 0 ? 2
+                : name.indexOf("GEGRAMMENI") < 0 ? 4
+                : UTF16.countCodePoint(ftitle) == 1 ? 5
+                : UTF16.countCodePoint(fupper) == 2 ? 6
+                : 7;
+            
+            
+            // special exclusions 
+            if (isExcluded(ch)) {
+                log.println("# " + mapping);
+            } else {
+                sorted.put(new Integer((order << 24) | ch), mapping);
+            }
+        }
+        log.close();
+        
+        System.out.println("Writing");
+        PrintWriter out = Utility.openPrintWriter("DerivedData/SpecialCasing" + GenerateData.getFileSuffix(true));
+        GenerateData.generateBat("DerivedData/", "SpecialCasing", GenerateData.getFileSuffix(true));
+        Utility.appendFile("SpecialCasingHeader.txt", true, out);
+
+        Iterator it = sorted.keySet().iterator();
+        int lastOrder = -1;
+        while (it.hasNext()) {
+            Integer key = (Integer) it.next();
+            String line = (String) sorted.get(key);
+            int order = key.intValue() >> 24;
+            if (order != lastOrder) {
+                lastOrder = order;
+                out.println();
+                boolean skipLine = false;
+                switch(order) {
+                case 1: 
+                    out.println("# The German es-zed is special--the normal mapping is to SS.");
+                    out.println("# Note: the titlecase should never occur in practice. It is equal to titlecase(uppercase(<es-zed>))");
+                    break;
+                case 2: out.println("# Ligatures"); break;
+                case 3: skipLine = true; break;
+                case 4: out.println("# No corresponding uppercase precomposed character"); break;
+                case 5: Utility.appendFile("SpecialCasingIota.txt", true, out); break;
+                case 6: out.println("# Some characters with YPOGEGRAMMENI are also have no corresponding titlecases"); break;
+                case 7: skipLine = true; break;
+                }
+                if (!skipLine) out.println();
+            }
+            out.println(line);
+        }
+        Utility.appendFile("SpecialCasingFooter.txt", true, out);
+        out.close();
+    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
--- a/tools/unicodetools/com/ibm/text/UCD/MLStreamWriter.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MLStreamWriter.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MLStreamWriter.java,v $
-* $Date: 2001/08/31 00:30:17 $
-* $Revision: 1.2 $
+* $Date: 2001/12/13 23:35:57 $
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */
@ -245,7 +245,7 @@ public class MLStreamWriter extends Writer {
    boolean isHTML;
    ArrayList stack = new ArrayList();
    boolean inElement = false;
-    Normalizer formC = new Normalizer(Normalizer.NFC);
+    Normalizer formC = new Normalizer(Normalizer.NFC, "");
    int len;
    int maxLineLength = 60;
    // later, add better line end management, indenting
--- a/tools/unicodetools/com/ibm/text/UCD/Main.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Main.java
@ -5,17 +5,59 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
-* $Date: 2001/12/06 00:05:53 $
-* $Revision: 1.7 $
+* $Date: 2001/12/13 23:35:56 $
+* $Revision: 1.8 $
 *
 *******************************************************************************
 */

 package com.ibm.text.UCD;
 import com.ibm.text.utility.*;
+import java.util.Date;

-public final class Main {
-    static String ucdVersion = "";
+public final class Main implements UCD_Types {
+    
+    static String ucdVersion = UCD.latestVersion;
+    static UCD ucd;
+    static Normalizer nfc;
+    static Normalizer nfd;
+    static Normalizer nfkc;
+    static Normalizer nfkd;
+    static Normalizer[] nf = new Normalizer[4];
+    
+    static void setUCD() {
+        ucd = UCD.make(Main.ucdVersion);
+        nfd = nf[NFD] = new Normalizer(Normalizer.NFD, Main.ucdVersion);
+        nfc = nf[NFC] = new Normalizer(Normalizer.NFC, Main.ucdVersion);
+        nfkd = nf[NFKD] = new Normalizer(Normalizer.NFKD, Main.ucdVersion);
+        nfkc = nf[NFKC] = new Normalizer(Normalizer.NFKC, Main.ucdVersion);
+        System.out.println("Loaded UCD" + ucd.getVersion() + " " + (new Date(Main.ucd.getDate())));
+    }
+
+    static final String[] ALL_FILES = {
+        "CaseFolding",
+        "CompositionExclusions",
+        "DerivedBidiClass",
+        "DerivedBinaryProperties",
+        "DerivedCombiningClass",
+        "DerivedCoreProperties",
+        "DerivedDecompositionType",
+        "DerivedEastAsianWidth",
+        "DerivedGeneralCategory",
+        "DerivedJoiningGroup",
+        "DerivedJoiningType",
+        "DerivedLineBreak",
+        "DerivedNormalizationProperties",
+        "DerivedNumericType",
+        "DerivedNumericValues",
+        "NormalizationTest",
+        "PropertyAliases",
+        "PropList",
+        "Scripts",
+        "SpecialCasing",
+        "DerivedAge",
+        //"OtherDerivedProperties",
+    };

    public static void main (String[] args) throws Exception {

@ -26,19 +68,19 @@ public final class Main {
            Utility.fixDot();
            System.out.println("Argument: " + args[i]);

-            if (arg.equalsIgnoreCase("all")) {
-                //checkCase();
+            if (arg.equalsIgnoreCase("verify")) {
+                VerifyUCD.verify();
                VerifyUCD.checkCanonicalProperties();
                VerifyUCD.CheckCaseFold();
                VerifyUCD.checkAgainstUInfo();

-            } else if (arg.equalsIgnoreCase("build")) {
-                ConvertUCD.main(new String[]{ucdVersion});
-            } else if (arg.equalsIgnoreCase("version")) ucdVersion = args[++i];
+            } else if (arg.equalsIgnoreCase("build")) ConvertUCD.main(new String[]{ucdVersion});
+            else if (arg.equalsIgnoreCase("version")) ucdVersion = args[++i];
            else if (arg.equalsIgnoreCase("testskippable")) NFSkippable.main(null);
            else if (arg.equalsIgnoreCase("generateXML")) VerifyUCD.generateXML();
            else if (arg.equalsIgnoreCase("checkSpeed")) VerifyUCD.checkSpeed();
            else if (arg.equalsIgnoreCase("generateHanTransliterator")) GenerateHanTransliterator.main();
+            else if (arg.equalsIgnoreCase("compareBlueberry")) VerifyUCD.compareBlueberry();

            else if (arg.equalsIgnoreCase("testDerivedProperties")) DerivedProperty.test();
            else if (arg.equalsIgnoreCase("checkCase")) VerifyUCD.checkCase();
@ -52,19 +94,180 @@ public final class Main {
            //else if (arg.equalsIgnoreCase("checkAgainstUInfo")) checkAgainstUInfo();
            else if (arg.equalsIgnoreCase("checkScripts")) VerifyUCD.checkScripts();
            else if (arg.equalsIgnoreCase("IdentifierTest")) VerifyUCD.IdentifierTest();
-            else if (arg.equalsIgnoreCase("Generate")) GenerateData.main(ucdVersion, Utility.split(args[++i],','));
            else if (arg.equalsIgnoreCase("BuildNames")) BuildNames.main(null);
            else if (arg.equalsIgnoreCase("JavascriptProperties")) WriteJavaScriptInfo.assigned();
            /*else if (arg.equalsIgnoreCase("writeNormalizerTestSuite"))
                GenerateData.writeNormalizerTestSuite("NormalizationTest-3.1.1d1.txt");
                */
-            else {
-                System.out.println("Unknown option -- must be one of the following (case-insensitive)");
-                System.out.println("generateXML, checkCase, checkCanonicalProperties, CheckCaseFold,");
-                System.out.println("VerifyIDN, NFTest, test1, ");
-                // System.out.println(checkAgainstUInfo,");
-                System.out.println("checkScripts, IdentifierTest, writeNormalizerTestSuite");
-            }
+            else extras(new String[] {arg});
        }
    }
+    
+    public static void extras (String[] args) throws Exception {
+        //ubp = new UnifiedBinaryProperty(ucd);
+        
+        boolean expanding = false;
+        
+        for (int i = 0; i < args.length; ++i) {
+            String arg = args[i];
+            if (arg.charAt(0) == '#') return; // skip rest of line
+            long mask = 0;
+
+            Utility.fixDot();
+            if (expanding) System.out.println("Argument: " + args[i]);
+
+            if (arg.equalsIgnoreCase("All")) {
+                // Append all args at end
+                String[] temp = new String[args.length + ALL_FILES.length];
+                System.arraycopy(args, 0, temp, 0, args.length);
+                System.arraycopy(ALL_FILES, 0, temp, args.length, ALL_FILES.length);
+                args = temp;
+                expanding = true;
+
+            // EXTRACTED PROPERTIES
+            
+            } else if (arg.equalsIgnoreCase("DerivedBidiClass")) {
+                GenerateData.generateVerticalSlice(BIDI_CLASS, BIDI_CLASS+NEXT_ENUM, GenerateData.HEADER_DERIVED,
+                    "DerivedData/DerivedExtractedProperties/", "DerivedBidiClass");
+                    
+            } else if (arg.equalsIgnoreCase("DerivedBinaryProperties")) {
+                GenerateData.generateVerticalSlice(BINARY_PROPERTIES, BINARY_PROPERTIES+1, GenerateData.HEADER_DERIVED,
+                    "DerivedData/DerivedExtractedProperties/", "DerivedBinaryProperties" );
+                    
+            } else if (arg.equalsIgnoreCase("DerivedCombiningClass")) {
+                GenerateData.generateVerticalSlice(COMBINING_CLASS, COMBINING_CLASS+NEXT_ENUM, GenerateData.HEADER_DERIVED,
+                    "DerivedData/DerivedExtractedProperties/", "DerivedCombiningClass" );
+                    
+            } else if (arg.equalsIgnoreCase("DerivedDecompositionType")) {
+                GenerateData.generateVerticalSlice(DECOMPOSITION_TYPE, DECOMPOSITION_TYPE+NEXT_ENUM, GenerateData.HEADER_DERIVED,
+                    "DerivedData/DerivedExtractedProperties/", "DerivedDecompositionType" );
+
+            } else if (arg.equalsIgnoreCase("DerivedEastAsianWidth")) {
+                GenerateData.generateVerticalSlice(EAST_ASIAN_WIDTH, EAST_ASIAN_WIDTH+NEXT_ENUM, GenerateData.HEADER_DERIVED,
+                    "DerivedData/DerivedExtractedProperties/", "DerivedEastAsianWidth" );
+                    
+            } else if (arg.equalsIgnoreCase("DerivedGeneralCategory")) {
+                GenerateData.generateVerticalSlice(CATEGORY, CATEGORY+NEXT_ENUM, GenerateData.HEADER_DERIVED,
+                    "DerivedData/DerivedExtractedProperties/", "DerivedGeneralCategory" );
+                    
+            } else if (arg.equalsIgnoreCase("DerivedJoiningGroup")) {
+                GenerateData.generateVerticalSlice(JOINING_GROUP, JOINING_GROUP+NEXT_ENUM, GenerateData.HEADER_DERIVED,
+                    "DerivedData/DerivedExtractedProperties/", "DerivedJoiningGroup" );
+                    
+            } else if (arg.equalsIgnoreCase("DerivedJoiningType")) {
+                GenerateData.generateVerticalSlice(JOINING_TYPE, JOINING_TYPE+NEXT_ENUM, GenerateData.HEADER_DERIVED,
+                    "DerivedData/DerivedExtractedProperties/", "DerivedJoiningType" );
+                    
+            } else if (arg.equalsIgnoreCase("DerivedLineBreak")) {
+                GenerateData.generateVerticalSlice(LINE_BREAK, LINE_BREAK+NEXT_ENUM, GenerateData.HEADER_DERIVED,
+                    "DerivedData/DerivedExtractedProperties/", "DerivedLineBreak" );
+
+            } else if (arg.equalsIgnoreCase("DerivedNumericType")) {
+                GenerateData.generateVerticalSlice(NUMERIC_TYPE, NUMERIC_TYPE+NEXT_ENUM, GenerateData.HEADER_DERIVED,
+                    "DerivedData/DerivedExtractedProperties/", "DerivedNumericType" );
+
+            } else if (arg.equalsIgnoreCase("DerivedNumericValues")) {
+                GenerateData.generateVerticalSlice(LIMIT_ENUM, LIMIT_ENUM, GenerateData.HEADER_DERIVED,
+                    "DerivedData/DerivedExtractedProperties/", "DerivedNumericValues" );
+            
+    // OTHER STANDARD PROPERTIES
+    
+            } else if (arg.equalsIgnoreCase("CaseFolding")) {
+                GenerateCaseFolding.makeCaseFold(true);
+                GenerateCaseFolding.makeCaseFold(false);
+                    
+            } else if (arg.equalsIgnoreCase("SpecialCasing")) {
+                GenerateCaseFolding.generateSpecialCasing();
+                    
+            } else if (arg.equalsIgnoreCase("CompositionExclusions")) {
+                GenerateData.generateCompExclusions();
+
+            } else if (arg.equalsIgnoreCase("DerivedAge")) {
+                GenerateData.generateAge("DerivedData/", "DerivedAge");
+                
+            } else if (arg.equalsIgnoreCase("DerivedCoreProperties")) {
+                GenerateData.generateDerived(DERIVED_CORE, true, GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedCoreProperties");
+                
+            } else if (arg.equalsIgnoreCase("DerivedNormalizationProperties")) {
+                GenerateData.generateDerived(DERIVED_NORMALIZATION, true, GenerateData.HEADER_DERIVED, "DerivedData/", 
+                    "DerivedNormalizationProperties" );
+                    
+            } else if (arg.equalsIgnoreCase("NormalizationTest")) {
+                GenerateData.writeNormalizerTestSuite("DerivedData/", "NormalizationTest");
+                
+            } else if (arg.equalsIgnoreCase("PropertyAliases")) {
+                GenerateData.generatePropertyAliases();                
+
+            } else if (arg.equalsIgnoreCase("PropList")) {
+                GenerateData.generateVerticalSlice(BINARY_PROPERTIES + White_space, BINARY_PROPERTIES + NEXT_ENUM,
+                        GenerateData.HEADER_EXTEND, "DerivedData/", "PropList");
+
+            } else if (arg.equalsIgnoreCase("Scripts")) {
+                GenerateData.generateVerticalSlice(SCRIPT+1, SCRIPT + NEXT_ENUM, 
+                        GenerateData.HEADER_SCRIPTS, "DerivedData/", "Scripts");
+    // OTHER TESTING
+            
+            } else if (arg.equalsIgnoreCase("OtherDerivedProperties")) {
+                //mask = Utility.setBits(0, NFC_Leading, NFC_Resulting);
+                GenerateData.generateDerived(ALL, false, GenerateData.HEADER_DERIVED, "OtherData/", "OtherDerivedProperties");
+
+            } else if (arg.equalsIgnoreCase("AllBinary")) {
+                GenerateData.generateVerticalSlice(BINARY_PROPERTIES, BINARY_PROPERTIES + NEXT_ENUM,
+                        GenerateData.HEADER_EXTEND, "OtherDerived/", "AllBinary");
+                        
+            } else if (arg.equalsIgnoreCase("DerivedGeneralCategoryTEST")) {
+                GenerateData.generateVerticalSlice(CATEGORY+29, CATEGORY+32, GenerateData.HEADER_DERIVED,
+                    "DerivedData/", "DerivedGeneralCategory" );
+                    
+            } else if (arg.equalsIgnoreCase("differences")) {
+                GenerateData.listDifferences();
+                
+            } else if (arg.equalsIgnoreCase("partition")) {
+                GenerateData.partitionProperties();
+                
+            } else if (arg.equalsIgnoreCase("listAccents")) {
+                GenerateData.listCombiningAccents();
+                
+            } else if (arg.equalsIgnoreCase("listGreekVowels")) {
+                GenerateData.listGreekVowels();
+                
+            } else if (arg.equalsIgnoreCase("listKatakana")) {
+                GenerateData.listKatakana();
+                
+            /* 
+            } else if (arg.equalsIgnoreCase("DerivedFullNormalization")) {
+                mask = Utility.setBits(0, DerivedProperty.GenNFD, DerivedProperty.GenNFKC);
+                GenerateData.generateDerived(mask, GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedFullNormalization" );
+            } else if (arg.equalsIgnoreCase("caseignorable")) {
+                mask = Utility.setBits(0, DerivedProperty.Other_Case_Ignorable, DerivedProperty.Type_i);
+                GenerateData.generateDerived(mask, GenerateData.HEADER_DERIVED, "OtherData/", "CaseIgnorable" );
+            } else if (arg.equalsIgnoreCase("nfunsafestart")) {
+                mask = Utility.setBits(0, NFD_UnsafeStart, NFKC_UnsafeStart);
+                GenerateData.generateDerived(mask, GenerateData.HEADER_DERIVED, "OtherData/", "NFUnsafeStart");
+            */
+            
+            } else {
+                throw new IllegalArgumentException(" ! Unknown option -- see Main.java for options");
+            }
+
+
+            //checkHoffman("\u05B8\u05B9\u05B1\u0591\u05C3\u05B0\u05AC\u059F");
+            //checkHoffman("\u0592\u05B7\u05BC\u05A5\u05B0\u05C0\u05C4\u05AD");
+
+
+                //GenerateData.generateDerived(Utility.setBits(0, DerivedProperty.PropMath, DerivedProperty.Mod_ID_Continue_NO_Cf),
+                //    GenerateData.HEADER_DERIVED, "DerivedData/", "DerivedPropData2" );
+            //GenerateData.generateVerticalSlice(SCRIPT, SCRIPT+1, "ScriptCommon" );
+            //listStrings("LowerCase" , 0,0);
+            //GenerateData.generateVerticalSlice(0, LIMIT_ENUM, SKIP_SPECIAL, PROPLIST1, "DerivedData/", "DerivedPropData1" );
+
+            // AGE stuff
+            //UCD ucd = UCD.make();
+            //System.out.println(ucd.getAgeID(0x61));
+            //System.out.println(ucd.getAgeID(0x2FA1D));
+
+            //
+        }
+    }
+
 }
--- a/tools/unicodetools/com/ibm/text/UCD/MyPropertyLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MyPropertyLister.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MyPropertyLister.java,v $
-* $Date: 2001/12/06 00:05:53 $
-* $Revision: 1.6 $
+* $Date: 2001/12/13 23:35:57 $
+* $Revision: 1.7 $
 *
 *******************************************************************************
 */
@ -53,6 +53,7 @@ final class MyPropertyLister extends PropertyLister {
    }

    public String valueName(int cp) {
+        if (up.getValueType() == BINARY) return up.getName();
        return up.getValue(cp);
    }

--- a/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Normalizer.java,v $
-* $Date: 2001/12/03 19:29:35 $
-* $Revision: 1.5 $
+* $Date: 2001/12/13 23:35:57 $
+* $Revision: 1.6 $
 *
 *******************************************************************************
 */
@ -49,9 +49,9 @@ public final class Normalizer implements UCD_Types {
    /**
     * Create a normalizer for a given form.
     */
-    public Normalizer(byte form) {
-        this(form,"");
-    }
+    // public Normalizer(byte form) {
+    //    this(form,"");
+    //}

    /**
     * Return string name
--- a/tools/unicodetools/com/ibm/text/UCD/PropertyAliasHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/PropertyAliasHeader.txt
@ -26,13 +26,12 @@
 #
 # NOTE: The property value names are NOT unique across properties, especially
 # with loose matches. For example,
+#
 # AL means Arabic Letter for the Bidi_Class property, and
 # AL means Alpha_Left for the Combining_Class property, and
 # AL means Alphabetic for the Line_Break property.
 #
-# In addition, some property names may be the same as some property value names:
-# cc means Combining_Class property, and
-# cc means the General_Category property value Control (cc)
+# In addition, some property names may be the same as some property value names.
 #
 # The combination of property value and property name is, however, unique.
 # For more information, see UTR #24: Regular Expression Guidelines
--- a/tools/unicodetools/com/ibm/text/UCD/PropertyLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/PropertyLister.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/PropertyLister.java,v $
-* $Date: 2001/12/06 00:05:53 $
-* $Revision: 1.6 $
+* $Date: 2001/12/13 23:35:57 $
+* $Revision: 1.7 $
 *
 *******************************************************************************
 */
@ -168,13 +168,20 @@ abstract public class PropertyLister implements UCD_Types {
        return lastSpace;
    }
    
+    private static final byte FAKERC = 63; // fake category for comparison
    private static final byte FAKELC = 63; // fake category for comparison
    private static final byte FAKENC = 64; // fake category for comparison
    
    private byte getModCat(int cp) {
        byte cat = ucdData.getCategory(cp);
-        if (cat == Lt || cat == Ll || cat == Lu) cat = FAKELC;
-        if (cat == Cn && ucdData.isNoncharacter(cp)) cat = FAKENC;
+        if (cat == UNASSIGNED && ucdData.isNoncharacter(cp)) cat = FAKENC;
+        else if (breakByCategory) {
+            if (cat == Lt || cat == Ll || cat == Lu) cat = FAKELC;
+        } else {
+            // MASH almost everything together
+            if (cat != CONTROL && cat != FORMAT && cat != SURROGATE 
+                && cat != PRIVATE_USE && cat != UNASSIGNED) cat = FAKERC;
+        }
        return cat;
    }

@ -196,7 +203,7 @@ abstract public class PropertyLister implements UCD_Types {
            byte s = status(cp);
            if (alwaysBreaks && s == INCLUDE) s = BREAK;
            if (s == INCLUDE && firstRealCp != -1) {
-                if (breakByCategory && getModCat(cp) != firstRealCpCat) s = BREAK;
+                if (getModCat(cp) != firstRealCpCat) s = BREAK;
            }

            switch(s) {
--- a/tools/unicodetools/com/ibm/text/UCD/PropertyValueAliasHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/PropertyValueAliasHeader.txt
@ -22,6 +22,9 @@
 #
 # Third Field: The third field is a long name.
 #
+# In the case of ccc, their are 4 fields. The second field is numeric, third
+# is abbreviated, and fourth is long.
+#
 # With loose matching of property names, the case distinctions, whitespace,
 # and '_' are ignored.
 #
--- a/tools/unicodetools/com/ibm/text/UCD/SpecialCasingFooter.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/SpecialCasingFooter.txt
@ -0,0 +1,67 @@
+# ================================================================================
+# Conditional mappings
+# ================================================================================
+
+# Special case for final form of sigma
+
+03A3; 03C2; 03A3; 03A3; FINAL_SIGMA; # GREEK CAPITAL LETTER SIGMA
+
+# Note: the following cases for non-final are already in the UnicodeData file.
+
+# 03A3; 03C3; 03A3; 03A3; # GREEK CAPITAL LETTER SIGMA
+# 03C3; 03C3; 03A3; 03A3; # GREEK SMALL LETTER SIGMA
+# 03C2; 03C2; 03A3; 03A3; # GREEK SMALL LETTER FINAL SIGMA
+
+# Note: the following cases are not included, since they would case-fold in lowercasing
+
+# 03C3; 03C2; 03A3; 03A3; FINAL_SIGMA; # GREEK SMALL LETTER SIGMA
+# 03C2; 03C3; 03A3; 03A3; NOT_FINAL_SIGMA; # GREEK SMALL LETTER FINAL SIGMA
+
+# ================================================================================
+# Locale-sensitive mappings
+# ================================================================================
+
+# Lithuanian
+
+# Lithuanian retains the dot in a lowercase i when followed by accents.
+
+# Remove DOT ABOVE after "i" with upper or titlecase
+
+0307; 0307; ; ; lt AFTER_i # COMBINING DOT ABOVE
+
+# Introduce an explicit dot above when lowercasing capital I's and J's
+# whenever there are more accents above
+# (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
+
+0049; 0069 0307; 0049; 0049; lt MORE_ABOVE # LATIN CAPITAL LETTER I
+004A; 006A 0307; 004A; 004A; lt MORE_ABOVE # LATIN CAPITAL LETTER J
+012E; 012F 0307; 012E; 012E; lt MORE_ABOVE # LATIN CAPITAL LETTER I WITH OGONEK
+00CC; 0069 0307 0300; 00CC; 00CC; lt # LATIN CAPITAL LETTER I WITH GRAVE
+00CD; 0069 0307 0301; 00CD; 00CD; lt # LATIN CAPITAL LETTER I WITH ACUTE
+0128; 0069 0307 0303; 0128; 0128; lt # LATIN CAPITAL LETTER I WITH TILDE
+
+# ================================================================================
+
+# Turkish and Azeri
+
+# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
+# The following rules handle those cases.
+
+# When lowercasing, remove dot_ above in the sequence I + dot_ above, which will turn into i.
+# This matches the behavior of the canonically equivalent I-dot_above
+
+0307; ; 0307; 0307; AFTER_I # COMBINING DOT ABOVE
+# When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
+
+0049; 0131; 0049; 0049; tr NOT_BEFORE_DOT; # LATIN CAPITAL LETTER I
+0049; 0131; 0049; 0049; az NOT_BEFORE_DOT; # LATIN CAPITAL LETTER I
+
+# When uppercasing, i turns into a dotted capital I
+
+0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
+0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
+
+# Note: the following cases are already in the UnicodeData file.
+
+# 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I
+# 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
--- a/tools/unicodetools/com/ibm/text/UCD/SpecialCasingHeader.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/SpecialCasingHeader.txt
@ -0,0 +1,60 @@
+# SpecialCasing-6.txt
+#
+# Special Casing Properties
+#
+# This file is a supplement to the UnicodeData file.
+# It contains additional information about the casing of Unicode characters.
+# (For compatibility, the UnicodeData.txt file only contains case mappings for
+# characters where they are 1-1, and does not have locale-specific mappings.)
+# For more information, see
+# UTR #21 Case Mappings, at http://www.unicode.org/unicode/reports/tr21/
+#
+# ================================================================================
+# Format
+# ================================================================================
+# The entries in this file are in the following machine-readable format:
+#
+# <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)? # <comment>
+#
+# <code>, <lower>, <title>, and <upper> provide character values in hex. If there is more than
+# one character, they are separated by spaces.
+#
+# The <condition_list> is optional. Where present, it consists of one or more locales or contexts,
+# separated by spaces. In these conditions:
+# - A condition list overrides the normal behavior if all of the listed conditions are true.
+# - Case distinctions in the condition list are not significant.
+# - Conditions preceded by "NOT_" represent the negation of the condition.
+# - A cased letter is any character with general category = Ll or Lo or Lt
+# - An ignorable sequence is a sequence of *zero* or more characters from
+#    the set {HYPHEN, SOFT HYPHEN, general category = Mn}.
+#
+# A locale is defined as:
+# <locale> := <ISO_639_code> ( "_" <ISO_3166_code> ( "_" <variant> )? )?
+# <ISO_3166_code> := 2-letter ISO country code,
+# <ISO_639_code> :=  2-letter ISO language code
+#
+# A context is a locale or one of the following choices:
+#   CFINAL:      The character is not followed by a sequence consisting of
+#                an ignorable sequence and then a cased letter.
+#   CINITIAL:    The character is not preceded by a sequence consisting of
+#                a cased letter and an ignorable sequence.
+#   FINAL_SIGMA: CFINAL and NOT_CINITIAL
+#   TYPE_i:      The character is "i" (0069), "j" (006A),
+#                or has a canonical decomposition that begins with an "i" or "j"
+#                but has no combining characters above (i.e., i-ogonek (012F),
+#                i-tilde-below (1E2D), or i-dot-below (1ECB)).
+#   AFTER_i:     The last preceding base character was TYPE_i, and
+#                no combining character class 230 (above) has intervened.
+#   MORE_ABOVE:  The character is followed by one or more characters of
+#                combining class 230 (ABOVE) in the combining character sequence
+#
+# Other than as used to separate elements, spaces are to be ignored.
+#
+# Parsers of this file must be prepared to deal future additions to this format:
+#  * Additional contexts
+#  * Additional fields
+# ================================================================================
+
+# ================================================================================
+# Unconditional mappings
+# ================================================================================
--- a/tools/unicodetools/com/ibm/text/UCD/SpecialCasingIota.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/SpecialCasingIota.txt
@ -0,0 +1,13 @@
+# IMPORTANT-when capitalizing iota-subscript (0345)
+#  It MUST be in normalized form--moved to the end of any sequence of combining marks.
+#  This is because logically it represents a following base character!
+#  E.g. <iota_subscript> (<Mn> | <Mc> | <Me>)+ => (<Mn> | <Mc> | <Me>)+ <iota_subscript>
+# It should never be the first character in a word, so in titlecasing it can be left as is.
+
+# The following cases are already in the UnicodeData file, so are only commented here.
+
+# 0345; 0345; 0345; 0399; # COMBINING GREEK YPOGEGRAMMENI
+
+# All letters with YPOGEGRAMMENI (iota-subscript) or PROSGEGRAMMENI (iota adscript)
+# have special uppercases.
+# Note: characters with PROSGEGRAMMENI are actually titlecase, not uppercase!
--- a/tools/unicodetools/com/ibm/text/UCD/TestData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/TestData.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestData.java,v $
-* $Date: 2001/12/06 00:05:53 $
-* $Revision: 1.7 $
+* $Date: 2001/12/13 23:35:57 $
+* $Revision: 1.8 $
 *
 *******************************************************************************
 */
@ -21,6 +21,7 @@ import java.text.SimpleDateFormat;
 import com.ibm.text.utility.*;

 public class TestData implements UCD_Types {
+    /*

    public static void main (String[] args) throws IOException {
        System.out.println("START");
@ -200,7 +201,6 @@ public class TestData implements UCD_Types {
        }
        output.close();
    }
-    */

    public static void generateCompExclusions() throws IOException {
        PrintWriter output = Utility.openPrintWriter("CompositionExclusionsDelta.txt");
@ -246,7 +246,7 @@ public class TestData implements UCD_Types {
        System.out.println(ucd.getData(0x100000-3));
        if (true) return;
        String test2 = ucd.getName(0x2A6D6);
-        //*/
+        //* /


        PrintWriter output = Utility.openPrintWriter(file);
@ -485,5 +485,5 @@ public class TestData implements UCD_Types {
        "E\u0304\u0300",
        "E\u0300\u0304",
    };
-
+//*/
 }
--- a/tools/unicodetools/com/ibm/text/UCD/TestNormalization.java
+++ b/tools/unicodetools/com/ibm/text/UCD/TestNormalization.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestNormalization.java,v $
-* $Date: 2001/08/31 00:30:17 $
-* $Revision: 1.2 $
+* $Date: 2001/12/13 23:35:57 $
+* $Revision: 1.3 $
 *
 *******************************************************************************
 */
@ -25,12 +25,6 @@ public final class TestNormalization {
    static PrintWriter out = null;
    static BufferedReader in = null;

-    static Normalizer nfc;
-    static Normalizer nfd;
-    static Normalizer nfkc;
-    static Normalizer nfkd;
-    static UCD ucd;
-
    static BitSet charsListed = new BitSet(0x110000);
    static int errorCount = 0;
    static int lineErrorCount = 0;
@ -39,18 +33,14 @@ public final class TestNormalization {

    public static void main(String[] args)  throws java.io.IOException {
        System.out.println("Creating Normalizers");
-        ucd = UCD.make("");
+        Main.setUCD();

-        nfc = new Normalizer(Normalizer.NFC);
-        nfd = new Normalizer(Normalizer.NFD);
-        nfkc = new Normalizer(Normalizer.NFKC);
-        nfkd = new Normalizer(Normalizer.NFKD);

            String x = UTF32.valueOf32(0x10000);
-            check("NFC", nfc, x);
-            check("NFD", nfd, x);
-            check("NFKC", nfkc, x);
-            check("NFKD", nfkd, x);
+            check("NFC", Main.nfc, x);
+            check("NFD", Main.nfd, x);
+            check("NFKC", Main.nfkc, x);
+            check("NFKD", Main.nfkd, x);


        out = new PrintWriter(
@ -97,36 +87,36 @@ public final class TestNormalization {
                }

                // c2 == NFC(c1) == NFC(c2) == NFC(c3)
-                errorCount += check("NFCa", nfc, parts[1], parts[0]);
-                errorCount += check("NFCb", nfc, parts[1], parts[1]);
-                errorCount += check("NFCc", nfc, parts[1], parts[2]);
+                errorCount += check("NFCa", Main.nfc, parts[1], parts[0]);
+                errorCount += check("NFCb", Main.nfc, parts[1], parts[1]);
+                errorCount += check("NFCc", Main.nfc, parts[1], parts[2]);

                // c4 == NFC(c4) == NFC(c5)
-                errorCount += check("NFCd", nfc, parts[3], parts[3]);
-                errorCount += check("NFCe", nfc, parts[3], parts[4]);
+                errorCount += check("NFCd", Main.nfc, parts[3], parts[3]);
+                errorCount += check("NFCe", Main.nfc, parts[3], parts[4]);

                // c3 == NFD(c1) == NFD(c2) == NFD(c3)
-                errorCount += check("NFDa", nfd, parts[2], parts[0]);
-                errorCount += check("NFDb", nfd, parts[2], parts[1]);
-                errorCount += check("NFDc", nfd, parts[2], parts[2]);
+                errorCount += check("NFDa", Main.nfd, parts[2], parts[0]);
+                errorCount += check("NFDb", Main.nfd, parts[2], parts[1]);
+                errorCount += check("NFDc", Main.nfd, parts[2], parts[2]);

                // c5 == NFD(c4) == NFD(c5)
-                errorCount += check("NFDd", nfd, parts[4], parts[3]);
-                errorCount += check("NFDe", nfd, parts[4], parts[4]);
+                errorCount += check("NFDd", Main.nfd, parts[4], parts[3]);
+                errorCount += check("NFDe", Main.nfd, parts[4], parts[4]);

                // c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
-                errorCount += check("NFKCa", nfkc, parts[3], parts[0]);
-                errorCount += check("NFKCb", nfkc, parts[3], parts[1]);
-                errorCount += check("NFKCc", nfkc, parts[3], parts[2]);
-                errorCount += check("NFKCd", nfkc, parts[3], parts[3]);
-                errorCount += check("NFKCe", nfkc, parts[3], parts[4]);
+                errorCount += check("NFKCa", Main.nfkc, parts[3], parts[0]);
+                errorCount += check("NFKCb", Main.nfkc, parts[3], parts[1]);
+                errorCount += check("NFKCc", Main.nfkc, parts[3], parts[2]);
+                errorCount += check("NFKCd", Main.nfkc, parts[3], parts[3]);
+                errorCount += check("NFKCe", Main.nfkc, parts[3], parts[4]);

                // c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
-                errorCount += check("NFKDa", nfkd, parts[4], parts[0]);
-                errorCount += check("NFKDb", nfkd, parts[4], parts[1]);
-                errorCount += check("NFKDc", nfkd, parts[4], parts[2]);
-                errorCount += check("NFKDd", nfkd, parts[4], parts[3]);
-                errorCount += check("NFKDe", nfkd, parts[4], parts[4]);
+                errorCount += check("NFKDa", Main.nfkd, parts[4], parts[0]);
+                errorCount += check("NFKDb", Main.nfkd, parts[4], parts[1]);
+                errorCount += check("NFKDc", Main.nfkd, parts[4], parts[2]);
+                errorCount += check("NFKDd", Main.nfkd, parts[4], parts[3]);
+                errorCount += check("NFKDe", Main.nfkd, parts[4], parts[4]);
            }
            System.out.println("Total errors in file: " + errorCount
                + ", lines: " + lineErrorCount);
@ -160,21 +150,21 @@ public final class TestNormalization {
                }
                String otherList = "";
                if (!base.equals(other)) {
-                    otherList = "(" + ucd.getCodeAndName(other) + ")";
+                    otherList = "(" + Main.ucd.getCodeAndName(other) + ")";
                }
                out.println("DIFF " + type + ": "
-                    + ucd.getCodeAndName(base) + " != "
+                    + Main.ucd.getCodeAndName(base) + " != "
                    + type
                    + otherList
-                    + " == " + ucd.getCodeAndName(trans)
+                    + " == " + Main.ucd.getCodeAndName(trans)
                    + temp
                );
                return 1;
            }
        } catch (Exception e) {
            throw new ChainException("DIFF " + type + ": "
-                + ucd.getCodeAndName(base) + " != "
-                + type + "(" + ucd.getCodeAndName(other) + ")", new Object[]{}, e);
+                + Main.ucd.getCodeAndName(base) + " != "
+                + type + "(" + Main.ucd.getCodeAndName(other) + ")", new Object[]{}, e);
        }
        return 0;
    }
@ -188,10 +178,10 @@ public final class TestNormalization {
            if ((missing & 0xFFF) == 0) System.out.println("# " + Utility.hex(missing));
            if (charsListed.get(missing)) continue;
            String x = UTF32.valueOf32(missing);
-            errorCount += check("NFC", nfc, x);
-            errorCount += check("NFD", nfd, x);
-            errorCount += check("NFKC", nfkc, x);
-            errorCount += check("NFKD", nfkd, x);
+            errorCount += check("NFC", Main.nfc, x);
+            errorCount += check("NFD", Main.nfd, x);
+            errorCount += check("NFKC", Main.nfkc, x);
+            errorCount += check("NFKD", Main.nfkd, x);
        }
    }

--- a/tools/unicodetools/com/ibm/text/UCD/UCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
-* $Date: 2001/12/06 00:05:53 $
-* $Revision: 1.8 $
+* $Date: 2001/12/13 23:35:57 $
+* $Revision: 1.9 $
 *
 *******************************************************************************
 */
@ -48,6 +48,7 @@ public final class UCD implements UCD_Types {
        if (version.indexOf('.') < 0) throw new IllegalArgumentException("Version must be of form 3.1.1");
        UCD result = (UCD)versionCache.get(version);
        if (result == null) {
+            //System.out.println(Utility.getStack());
            result = new UCD();
            result.fillFromFile(version);
            versionCache.put(version, result);
@ -569,7 +570,8 @@ public final class UCD implements UCD_Types {
    }

    static String getCombiningClassID_fromIndex (short index, byte style) {
-        if (style == NORMAL || style == NUMBER) return String.valueOf(index & 0xFF);
+        index &= 0xFF;
+        if (style == NORMAL || style == NUMBER) return String.valueOf(index);
        String s = "Fixed";
        switch (index) {
            case 0: s = style < LONG ? "NR" : "NotReordered"; break;
@ -619,7 +621,7 @@ public final class UCD implements UCD_Types {
    }

    public static String getDecompositionTypeID_fromIndex(byte prop) {
-        return getDecompositionTypeID_fromIndex(NORMAL);
+        return getDecompositionTypeID_fromIndex(prop, NORMAL);
    }
    public static String getDecompositionTypeID_fromIndex(byte prop, byte style) {
        return style == SHORT ? UCD_Names.SHORT_DT[prop] : UCD_Names.DT[prop];
@ -1069,7 +1071,7 @@ to guarantee identifier closure.
                    uData.joiningType = JT_T;
                }
                if (!didJoiningHack && uData.joiningType != old) {
-                    System.out.println("HACK: Setting "
+                    System.out.println("HACK " + foundVersion + ": Setting "
                        + UCD_Names.LONG_JOINING_TYPE[uData.joiningType]
                        + ": " + Utility.hex(cp) + " " + uData.name);
                    didJoiningHack = true;
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
-* $Date: 2001/12/06 00:05:53 $
-* $Revision: 1.10 $
+* $Date: 2001/12/13 23:35:57 $
+* $Revision: 1.11 $
 *
 *******************************************************************************
 */
@ -119,6 +119,8 @@ final class UCD_Names implements UCD_Types {
        "Unified_Ideograph",
        "Other_Default_Ignorable_Code_Point",
        "Deprecated",
+        "Soft_Dotted",
+        "Logical_Order_Exception",
    };

    static final String[] SHORT_BP = {
@ -151,6 +153,8 @@ final class UCD_Names implements UCD_Types {
        "UIdeo",
        "ODI",
        "Dep",
+        "SD",
+        "LOE",
    };

    /*
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
-* $Date: 2001/12/06 00:05:53 $
-* $Revision: 1.7 $
+* $Date: 2001/12/13 23:35:57 $
+* $Revision: 1.8 $
 *
 *******************************************************************************
 */
@ -28,6 +28,11 @@ public interface UCD_Types {
        DERIVED_NORMALIZATION = 4, 
        DERIVED_ALL = 6, 
        ALL = (byte)-1;
+        
+     static final byte
+        NON_ENUMERATED = -1,
+        ENUMERATED = 0,
+        BINARY = 1;
    
    /*
  0	Code value in 4-digit hexadecimal format.
@ -180,7 +185,9 @@ public interface UCD_Types {
        UnifiedIdeograph = 26,
        Reserved_Cf_Code_Point = 27,
        Deprecated = 28,
-	    LIMIT_BINARY_PROPERTIES = 29;
+        Soft_Dotted = 29,
+        Logical_Order_Exception = 30,
+	    LIMIT_BINARY_PROPERTIES = 31;

 	/*
    static final int
--- a/tools/unicodetools/com/ibm/text/UCD/UnicodeProperty.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UnicodeProperty.java
@ -1,11 +1,15 @@
 package com.ibm.text.UCD;
+import com.ibm.text.UnicodeSet;
+import com.ibm.text.utility.*;
+
 public abstract class UnicodeProperty implements UCD_Types {
  
    protected UCD       ucd;
    protected boolean   isStandard = true;
    protected byte      type = NOT_DERIVED;
+    private byte      valueType = BINARY;
    protected boolean   hasUnassigned = false;
-    protected boolean   valueVaries = false;
+    protected boolean   isBinary = true;
    protected byte      defaultValueStyle = SHORT;
    protected byte      defaultPropertyStyle = LONG;
    protected String    valueName;
@ -29,11 +33,17 @@ public abstract class UnicodeProperty implements UCD_Types {
      public void setStandard(boolean in) { isStandard = in; }
      
      /**
-       * What type is it?
+       * What type is it? DERIVED..
       */
      public byte getType() { return type; }
      public void setType(byte in) { type = in; }
      
+      /**
+       * Does getProperty vary in contents? ENUMERATED,...
+       */
+      public byte getValueType() { return valueType; }
+      public void setValueType(byte in) { valueType = in; }
+            
      /**
       * Does it apply to any unassigned characters?
       */
@ -66,7 +76,7 @@ public abstract class UnicodeProperty implements UCD_Types {
      public String getProperty(byte style) { 
            if (style == NORMAL) style = defaultPropertyStyle;
            switch (style) {
-                case LONG: return name.toString();
+                case LONG: return Utility.getUnskeleton(name.toString(), false);
                case SHORT: return shortName.toString();
                case NUMBER: return numberName.toString();
                default: throw new IllegalArgumentException("Bad property: " + style);
@ -78,7 +88,7 @@ public abstract class UnicodeProperty implements UCD_Types {
      public void setProperty(byte style, String in) {
            if (style == NORMAL) style = defaultPropertyStyle;
            switch (style) {
-              case LONG: name = in; break;
+              case LONG: name = Utility.getUnskeleton(in, false); break;
              case SHORT: shortName = in; break;
              case NUMBER: numberName = in; break;
              default: throw new IllegalArgumentException("Bad property: " + style);
@ -98,10 +108,10 @@ public abstract class UnicodeProperty implements UCD_Types {
      public String getValue(int cp) { return getValue(cp, NORMAL); }

      public void setValue(byte style, String in) {
-            if (valueVaries) throw new IllegalArgumentException("Can't set varying value: " + style);
+            if (getValueType() != BINARY) throw new IllegalArgumentException("Can't set varying value: " + style);
            if (style == NORMAL) style = defaultValueStyle;
            switch (style) {
-              case LONG: valueName = in; break;
+              case LONG: valueName = Utility.getUnskeleton(in, false); break;
              case SHORT: shortValueName = in; break;
              case NUMBER: numberValueName = in; break;
              default: throw new IllegalArgumentException("Bad value: " + style);
@ -109,12 +119,12 @@ public abstract class UnicodeProperty implements UCD_Types {
      }
      
      public String getValue(byte style) {
-            if (valueVaries) throw new IllegalArgumentException(
+            if (getValueType() != BINARY) throw new IllegalArgumentException(
                "Value varies in " + getName(LONG) + "; call getValue(cp)");
            try {
                if (style == NORMAL) style = defaultValueStyle;
                switch (style) {
-                    case LONG: return valueName.toString();
+                    case LONG: return Utility.getUnskeleton(valueName.toString(), false);
                    case SHORT: return shortValueName.toString();
                    case NUMBER: return numberValueName.toString();
                    default: throw new IllegalArgumentException("Bad property: " + style);
@ -124,17 +134,27 @@ public abstract class UnicodeProperty implements UCD_Types {
            }
      }
      
-      /**
-       * Does getProperty vary in contents?
-       */
-      public boolean valueVaries() { return valueVaries; }
-      public void setValueVaries(boolean in) { valueVaries = in; }
-      
      /**
       * Does it have the propertyValue?
       */
      abstract boolean hasValue(int cp);
      
+      /**
+       * Get the set of characters it contains
+       */
+      
+      private UnicodeSet cache = null;
+      
+      public UnicodeSet getSet() {
+        if (cache == null) {
+            cache = new UnicodeSet();
+            for (int cp = 0; cp <= 0x10FFFF; ++cp) {
+                if (hasValue(cp)) cache.add(cp);
+            }
+        }
+        return (UnicodeSet) cache.clone();
+      }
+      
      ///////////////////////////////////////////
      
      // Old Name for compatibility
--- a/tools/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UnifiedBinaryProperty.java,v $
-* $Date: 2001/12/06 00:05:53 $
-* $Revision: 1.3 $
+* $Date: 2001/12/13 23:35:57 $
+* $Revision: 1.4 $
 *
 *******************************************************************************
 */
@ -16,6 +16,7 @@ import java.io.*;
 import java.util.*;

 import com.ibm.text.utility.*;
+import com.ibm.text.UnicodeSet;

 final class UnifiedBinaryProperty extends UnicodeProperty {
    int majorProp;
@ -30,6 +31,54 @@ final class UnifiedBinaryProperty extends UnicodeProperty {
        return getCached(propMask, ucd);
    }
    
+    public static UnicodeProperty make(String propAndValue, UCD ucd) {
+        return make(getPropmask(propAndValue, ucd), ucd);
+    }
+    
+    public static UnicodeSet getSet(int propMask, UCD ucd) {
+        UnicodeProperty up = make(propMask, ucd);
+        return up.getSet();
+    }
+    
+    public static UnicodeSet getSet(String propAndValue, UCD ucd) {
+        return getSet(getPropmask(propAndValue, ucd), ucd);
+    }
+    
+    private static Map propNameCache = null;
+    
+    public static int getPropmask(String propAndValue, UCD ucd) {
+        
+        // cache the names
+        if (propNameCache == null) {
+            System.out.println("Caching Property Names");
+            propNameCache = new HashMap();
+        
+            for (int i = 0; i < LIMIT_ENUM; ++i) {
+                UnicodeProperty up = UnifiedBinaryProperty.make(i, ucd);
+                if (up == null) continue;
+                if (!up.isStandard()) continue;
+                if (up.getValueType() != BINARY) continue;
+                String shortValue = Utility.getSkeleton(up.getValue(SHORT));
+                String shortName = Utility.getSkeleton(up.getProperty(SHORT));
+                String longValue = Utility.getSkeleton(up.getValue(LONG));
+                String longName = Utility.getSkeleton(up.getProperty(LONG));
+                Integer result = new Integer(i);
+                propNameCache.put(longName + "=" + longValue, result);
+                propNameCache.put(longName + "=" + shortValue, result);
+                propNameCache.put(shortName + "=" + longValue, result);
+                propNameCache.put(shortName + "=" + shortValue, result);
+            }
+            System.out.println("Done Caching");
+        }
+        
+        propAndValue = Utility.getSkeleton(propAndValue);
+        Integer indexObj = (Integer) propNameCache.get(propAndValue);
+        if (indexObj == null) {
+            throw new IllegalArgumentException("No property found for " + propAndValue);
+        }
+        return indexObj.intValue();
+    }
+    
    static Map cache = new HashMap();
    static UCD lastUCD = null;
    static int lastPropMask = -1;
@ -76,7 +125,16 @@ final class UnifiedBinaryProperty extends UnicodeProperty {
        shortValueName = _getValue(SHORT);
        numberValueName = _getValue(NUMBER);
        defaultValueStyle = _getDefaultStyle();
-        System.out.println("Value = " + getValue(defaultValueStyle));
+        
+        if (majorProp == (BINARY_PROPERTIES>>8)) {
+            name = valueName;
+            shortName = shortValueName;
+            defaultPropertyStyle = defaultValueStyle;
+            valueName = "YES";
+            shortValueName = "Y";
+        }
+        
+        // System.out.println("Value = " + getValue(defaultValueStyle));
        // System.out.println(majorProp + ", " + propValue + ", " + name);
        // dp = new DerivedProperty(ucd);
    }
@ -247,9 +305,7 @@ final class UnifiedBinaryProperty extends UnicodeProperty {
                return UCD_Names.LONG_JOINING_TYPE[propValue];
            case JOINING_GROUP>>8: if (propValue >= LIMIT_JOINING_GROUP) break;
                return ucd.getJoiningGroupID_fromIndex((byte)propValue);
-            case BINARY_PROPERTIES>>8: if (propValue >= LIMIT_BINARY_PROPERTIES) break;
-                if (style != SHORT) return ucd.getBinaryPropertiesID_fromIndex((byte)propValue);
-                return UCD_Names.SHORT_BP[propValue];
+            case BINARY_PROPERTIES>>8: return ucd.getBinaryPropertiesID_fromIndex((byte)propValue, style);
            case SCRIPT>>8: if (propValue >= LIMIT_SCRIPT) break;
                if (style != SHORT) return ucd.getScriptID_fromIndex((byte)propValue);
                return UCD_Names.ABB_SCRIPT[propValue];
@ -263,7 +319,7 @@ final class UnifiedBinaryProperty extends UnicodeProperty {
                */
            }
        } catch (RuntimeException e) {
-            throw new ChainException("Illegal property Number {0}, {1}", new Object[]{
+            throw new ChainException("Illegal property Number* {0}, {1}", new Object[]{
                 new Integer(majorProp), new Integer(propValue)}, e);
        }
        throw new ChainException("Illegal property Number {0}, {1}", new Object[]{
--- a/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java
--- a/tools/unicodetools/com/ibm/text/utility/Utility.java
+++ b/tools/unicodetools/com/ibm/text/utility/Utility.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
-* $Date: 2001/12/06 00:05:52 $
-* $Revision: 1.9 $
+* $Date: 2001/12/13 23:35:57 $
+* $Revision: 1.10 $
 *
 *******************************************************************************
 */
@ -16,6 +16,8 @@ package com.ibm.text.utility;
 import java.util.*;
 import java.text.*;
 import java.io.*;
+import com.ibm.text.UnicodeSet;
+import com.ibm.text.UCD.*;

 public final class Utility {    // COMMON UTILITIES

@ -85,7 +87,65 @@ public final class Utility {    // COMMON UTILITIES
        }
        return -1;
    }
-
+    
+    /**
+     * These routines use the Java functions, because they only need to act on ASCII.
+     * Removes space, _, and lowercases.
+     */
+    
+    public static String getSkeleton(String source) {
+        StringBuffer result = new StringBuffer();
+        boolean gotOne = false;
+        // remove spaces, '_'
+        // we can do this with char, since no surrogates are involved
+        for (int i = 0; i < source.length(); ++i) {
+            char ch = source.charAt(i);
+            if (ch == '_' || ch == ' ') {
+                gotOne = true;
+            } else {
+                char ch2 = Character.toLowerCase(ch);
+                if (ch2 != ch) {
+                    gotOne = true;
+                    result.append(ch2);
+                } else {
+                    result.append(ch);
+                }
+            }
+        }
+        if (!gotOne) return source; // avoid string creation
+        return result.toString();
+    }
+    
+    /**
+     * These routines use the Java functions, because they only need to act on ASCII
+     * Changes space, - into _, inserts _ between lower and UPPER.
+     */
+    
+    public static String getUnskeleton(String source, boolean titlecaseStart) {
+        StringBuffer result = new StringBuffer();
+        int lastCat = -1;
+        boolean haveFirstCased = true;
+        for (int i = 0; i < source.length(); ++i) {
+            char c = source.charAt(i);
+            if (c == ' ' || c == '-') c = '_';
+            int cat = Character.getType(c);
+            if (lastCat == Character.LOWERCASE_LETTER && cat == Character.UPPERCASE_LETTER) {
+                result.append('_');
+            }
+            if (haveFirstCased && (cat == Character.LOWERCASE_LETTER 
+                    || cat == Character.TITLECASE_LETTER || cat == Character.UPPERCASE_LETTER)) {
+                if (titlecaseStart) {
+                    c = Character.toUpperCase(c);
+                }
+                haveFirstCased = false;
+            }
+            result.append(c);
+            lastCat = cat;
+        }
+        return result.toString();
+    }
+    
+    
    public static String findSubstring(String source, Set target, boolean invert) {
        Iterator it = target.iterator();
        while (it.hasNext()) {
@ -178,6 +238,10 @@ public final class Utility {    // COMMON UTILITIES
 	    return result.toString();
 	}

+    /**
+     * Returns a string containing count copies of s.
+     * If count <= 0, returns "".
+     */
 	public static String repeat(String s, int count) {
 	    if (count <= 0) return "";
 	    if (count == 1) return s;
@ -260,6 +324,10 @@ public final class Utility {    // COMMON UTILITIES
        return output.toString();
    }

+    /**
+     * Splits a string containing divider into pieces, storing in output
+     * and returns the number of pieces.
+     */
 	public static int split(String s, char divider, String[] output) {
 	    int last = 0;
 	    int current = 0;
@ -407,19 +475,22 @@ public final class Utility {    // COMMON UTILITIES
        return (aEnd - aStart) - (bEnd - bStart);
    }

-    public static String join(int[] array, String sep) {
+    /**
+     * Joins an array together, using divider between the pieces
+     */
+    public static String join(int[] array, String divider) {
        String result = "{";
        for (int i = 0; i < array.length; ++i) {
-            if (i != 0) result += sep;
+            if (i != 0) result += divider;
            result += array[i];
        }
        return result + "}";
    }

-    public static String join(long[] array, String sep) {
+    public static String join(long[] array, String divider) {
        String result = "{";
        for (int i = 0; i < array.length; ++i) {
-            if (i != 0) result += sep;
+            if (i != 0) result += divider;
            result += array[i];
        }
        return result + "}";
@ -506,16 +577,18 @@ public final class Utility {    // COMMON UTILITIES
    }

    public static BufferedReader openUnicodeFile(String filename, String version, boolean show) throws IOException {
-        String name = getMostRecentUnicodeDataFile(filename, version, show);
+        String name = getMostRecentUnicodeDataFile(filename, version, true, show);
        if (name == null) return null;
        return new BufferedReader(new FileReader(name),32*1024);
    }

-    public static String getMostRecentUnicodeDataFile(String filename, String version, boolean show) throws IOException {
+    public static String getMostRecentUnicodeDataFile(String filename, String version, 
+      boolean acceptLatest, boolean show) throws IOException {
        // get all the files in the directory

+        int compValue = acceptLatest ? 0 : 1;
        for (int i = 0; i < searchPath.length; ++i) {
-            if (version.length() != 0 && version.compareTo(searchPath[i]) < 0) continue;
+            if (version.length() != 0 && version.compareTo(searchPath[i]) < compValue) continue;

            String directoryName = DATA_DIR + File.separator + searchPath[i] + "-Update" + File.separator;
            if (show) System.out.println("Trying: '" + directoryName + "', '" + filename + "'");
@ -549,6 +622,9 @@ public final class Utility {    // COMMON UTILITIES
        log.println("</head><body>");
    }
    
+    /**
+     * Replaces all occurances of piece with replacement, and returns new String
+     */
    public static String replace(String source, String piece, String replacement) {
        while (true) {
            int pos = source.indexOf(piece);
@ -556,4 +632,30 @@ public final class Utility {    // COMMON UTILITIES
            source = source.substring(0,pos) + source.substring(pos + piece.length());
        }
    }
+    
+    public static String getStack() {
+        Exception e = new Exception();
+        StringWriter sw = new StringWriter();
+        PrintWriter pw = new PrintWriter(sw);
+        e.printStackTrace(pw);
+        pw.flush();
+        return "Showing Stack with fake " + sw.getBuffer().toString();
+    }
+    
+    public static void showSetNames(String prefix, UnicodeSet set, boolean all, UCD ucd) {
+        int count = set.getRangeCount();
+        for (int i = 0; i < count; ++i) {
+            int start = set.getRangeStart(i);
+            int end = set.getRangeEnd(i);
+            if (all) {
+                for (int cp = start; cp <= end; ++cp) {
+                    if (!set.contains(cp)) continue;
+                    System.out.println(prefix + ucd.getCodeAndName(cp));
+                }
+            } else {
+                System.out.println(prefix + ucd.getCodeAndName(start) + 
+                    ((start != end) ? (".." + ucd.getCodeAndName(end)) : ""));
+            }
+        }
+    }
 }
--- a/tools/unicodetools/readme.txt
+++ b/tools/unicodetools/readme.txt
@ -3,7 +3,54 @@ WARNING!!
 These directories contain some Unicode tools used to build various files,
 and to check the consistency of the Unicode releases.

-They are NOT production level code, and should never be used in programs.
-The API is subject to change without notice, and will not be maintained.
-The source is uncommented, and not well structured -- classic spaghetti style.
-There is no build mechanism.
+- They are NOT production level code, and should never be used in programs.
+- The API is subject to change without notice, and will not be maintained.
+- The source is uncommented, and not well structured -- classic spaghetti style.
+- There is no build mechanism.
+- I have not checked to make sure it works on Unix; probably the only change that
+  needs to be made is to fix the file separator.
+
+Instructions:
+
+1. You must edit UCD_Types at the top, to set the directories for the build:
+
+    public static final String DATA_DIR = "C:\\DATA\\";
+    public static final String BIN_DIR = DATA_DIR + "BIN\\";
+    public static final String GEN_DIR = DATA_DIR + "GEN\\";
+
+Make sure that each of these directories exist. Also make sure that
+<GEN_DIR>/DerivedData
+<GEN_DIR>/DerivedData/ExtractedProperties
+
+
+2. Download all of the UnicodeData files for each version into DATA_DIR
+The folder names must be of the form: "3.2.0-Update"
+
+
+3. For each version X (like 3.1.0), run
+
+  java version X build
+
+This builds an compressed format of all the UCD data (except blocks and Unihan)
+into the BIN directory. Don't worry about the voluminous console messages, unless one says
+"FAIL".
+
+
+4. To build all of the files for a particular version X, run
+
+  java version X all
+
+To build a particular file, like CaseFolding, use that file name instead of all
+
+  java version X CaseFolding
+
+To change the D version, edit the link in GenerateData.java:
+
+    static final int dVersion = 2; // change to fix the generated file D version. If less than zero, no "d"
+
+
+5. To run basic consistency checking, run:
+
+  java version X verify
+
+Don't worry about any console messages except those that say FAIL.