diff --git a/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java b/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java
index 74ecd74f009..a30d9585cef 100644
--- a/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ConvertUCD.java,v $
-* $Date: 2002/03/15 00:34:46 $
-* $Revision: 1.5 $
+* $Date: 2002/03/20 00:21:43 $
+* $Revision: 1.6 $
 *
 *******************************************************************************
 */
@@ -25,7 +25,7 @@ import java.io.*;
  */
 
 public final class ConvertUCD implements UCD_Types {
-    public static final boolean SHOW = true;
+    public static final boolean SHOW = false;
     public static final boolean DEBUG = false;
 
     public static int major;
@@ -201,7 +201,7 @@ public final class ConvertUCD implements UCD_Types {
     // MAIN!!
 
     public static void main (String[] args) throws Exception {
-        System.out.println("ConvertUCD");
+        System.out.println("Building binary version of UCD");
 
         log = new PrintWriter(new BufferedWriter(
             new OutputStreamWriter(
@@ -260,8 +260,17 @@ public final class ConvertUCD implements UCD_Types {
             UData value = (UData) charData.get(key);
             value.compact();
         }
-        UData ud = getEntry(0x2A6D6);
+        
+        UData ud;
+        ud = getEntry(0x5e);
+        System.out.println("SPOT-CHECK: 5e: " + ud);
+        
+        ud = getEntry(0x130);
+        System.out.println("SPOT-CHECK: 130: " + ud);
+        
+        ud = getEntry(0x2A6D6);
         System.out.println("SPOT-CHECK: 2A6D6: " + ud);
+        
         ud = getEntry(0xFFFF);
         System.out.println("SPOT-CHECK: FFFF: " + ud);
 
@@ -493,7 +502,16 @@ public final class ConvertUCD implements UCD_Types {
                                 if (type.equals("I")) {
                                     data.simpleCaseFolding = val;
                                     setBinaryProperty(cps, CaseFoldTurkishI);
-                                    System.out.println("SPOT-CHECK: <" + parts[i-1] + "> Setting " + Utility.hex(cps) + ": " + Utility.hex(val));
+                                    System.out.println("SPOT-CHECK: <" + parts[i-1] + "> Setting " 
+                                    	+ Utility.hex(cps) + ": " + Utility.hex(val));
+                                }
+                            } else if (labels[0].equals("SpecialCasing")   // special handling for special casing
+                            			&& labels[4].equals("sc")
+                                		&& parts[4].trim().length() > 0) {
+                                if (i < 4) {
+                                	if (DEBUG) System.out.println("Got special: " + Utility.hex(cps) + ", " 
+                                		+ Utility.hex(key) + ":" + Utility.hex(val));
+                                	addCharData(cps, "sc", parts[4].trim() + ":" + key + ":" + val);
                                 }
                             } else {
                                 /*if (key.equals("sn")) { // SKIP UNDEFINED!!
@@ -782,12 +800,16 @@ public final class ConvertUCD implements UCD_Types {
             } else if (fieldName.equals("su")) {
                 uData.fullUppercase = fieldValue;
             } else if (fieldName.equals("sl")) {
+            	if (DEBUG) System.out.println("Setting full lowercase to " + Utility.hex(fieldValue) + uData);
                 uData.fullLowercase = fieldValue;
             } else if (fieldName.equals("st")) {
                 uData.fullTitlecase = fieldValue;
 
             } else if (fieldName.equals("sc")) {
-                uData.specialCasing = fieldValue;
+            	if (uData.specialCasing.length() > 0) {
+            		uData.specialCasing += ";";
+            	}
+                uData.specialCasing += fieldValue;
 
             } else if (fieldName.equals("xp")) {
                 uData.binaryProperties |= 1 << Utility.lookup(fieldValue, UCD_Names.BP, true);
diff --git a/tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java b/tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java
index ab472d32619..fe7ce3ba69d 100644
--- a/tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java
+++ b/tools/unicodetools/com/ibm/text/UCD/DerivedProperty.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
-* $Date: 2002/03/15 01:57:01 $
-* $Revision: 1.11 $
+* $Date: 2002/03/20 00:21:43 $
+* $Revision: 1.12 $
 *
 *******************************************************************************
 */
@@ -285,6 +285,11 @@ public final class DerivedProperty implements UCD_Types {
             else if (nfx.isTrailing(cp)) return MAYBE;
             else return "";
         }
+        
+		public String getListingValue(int cp) {
+    		return getValue(cp, LONG);
+    	}
+        
         boolean hasValue(int cp) { return getValue(cp).length() != 0; }
     };
 
@@ -460,6 +465,12 @@ of characters, the first of which has a non-zero combining class.
                 if (isCompEx(cp)) return true;
                 return false;
             }
+            /*
+			public String getListingValue(int cp) {
+        		if (getValueType() != BINARY) return getValue(cp, SHORT);
+        		return getProperty(SHORT);
+			}
+			*/
         };
         
         dprops[FullCompInclusion] = new UnicodeProperty() {
@@ -537,37 +548,15 @@ of characters, the first of which has a non-zero combining class.
                 hasUnassigned = true;
                 shortName = "DI";
                 header = header = "# Derived Property: " + name
-                    + "\r\n#  Generated from Other_Default_Ignorable_Code_Point + Cf + Cc + Cs - White_Space";
+                    + "\r\n#  Generated from <2060..206F, FFF0..FFFB, E0000..E0FFF>"
+                    + "\r\n#    + Other_Default_Ignorable_Code_Point + (Cf + Cc + Cs - White_Space)";
             }
             boolean hasValue(int cp) {
+            	if (0x2060 <= cp && cp <= 0x206F || 0xFFF0 <= cp && cp <= 0xFFFB || 0xE0000 <= cp && cp <= 0xE0FFF) return true;
+                if (ucdData.getBinaryProperty(cp,Other_Default_Ignorable_Code_Point)) return true;
                 if (ucdData.getBinaryProperty(cp, White_space)) return false;
                 byte cat = ucdData.getCategory(cp);
-                if (cat == Cf || cat == Cs || cat == Cc
-                || ucdData.getBinaryProperty(cp,Reserved_Cf_Code_Point)) return true;
-                return false;
-            }
-        };
-
-/*
-        GraphemeExtend = 27,
-        GraphemeBase = 28,
-# GraphemeExtend := Me + Mn + Mc + Other_GraphemeExtend - GraphemeLink
-# GraphemeBase := 
-
-*/
-        dprops[GraphemeExtend] = new UnicodeProperty() {
-            {
-                type = DERIVED_CORE;
-                name = "Grapheme_Extend";
-                shortName = "GrExt";
-                header = header = "# Derived Property: " + name
-                    + "\r\n#  Generated from: Me + Mn + Mc + Other_Grapheme_Extend - Grapheme_Link";
-            }
-            boolean hasValue(int cp) {
-                if (ucdData.getBinaryProperty(cp, GraphemeExtend)) return false;
-                byte cat = ucdData.getCategory(cp);
-                if (cat == Me || cat == Mn || cat == Mc
-                || ucdData.getBinaryProperty(cp,Other_GraphemeExtend)) return true;
+                if (cat == Cf || cat == Cs || cat == Cc) return true;
                 return false;
             }
         };
@@ -576,6 +565,7 @@ of characters, the first of which has a non-zero combining class.
             {
                 name = "Other_Case_Ignorable";
                 shortName = "OCI";
+                isStandard = false;
                 
                 header = header = "# Binary Property";
             }
@@ -608,7 +598,7 @@ of characters, the first of which has a non-zero combining class.
             }
             boolean hasValue(int cp) {
                 if (hasSoftDot(cp)) return true;
-                if (!Main.nfkd.hasDecomposition(cp)) return false;
+                if (!Main.nfkd.normalizationDiffers(cp)) return false;
                 String decomp = Main.nfd.normalize(cp);
                 boolean ok = false;
                 for (int i = decomp.length()-1; i >= 0; --i) {
@@ -630,6 +620,7 @@ of characters, the first of which has a non-zero combining class.
         dprops[Case_Ignorable] = new UnicodeProperty() {
             {
                 name = "Case_Ignorable";
+                isStandard = false;
                 shortName = "CI";
                 header = header = "# Derived Property: " + name
                     + "\r\n#  Generated from: Other_Case_Ignorable + Lm + Mn + Me + Cf";
@@ -642,6 +633,33 @@ of characters, the first of which has a non-zero combining class.
             }
         };
         
+/*
+        GraphemeExtend = 27,
+        GraphemeBase = 28,
+# GraphemeExtend := Me + Mn + Mc + Other_GraphemeExtend - GraphemeLink
+# GraphemeBase := 
+
+*/
+        dprops[GraphemeExtend] = new UnicodeProperty() {
+            {
+                type = DERIVED_CORE;
+                name = "Grapheme_Extend";
+                shortName = "GrExt";
+                header = header = "# Derived Property: " + name
+                    + "\r\n#  Generated from: Me + Mn + Mc + Other_Grapheme_Extend - Grapheme_Link - CGJ"
+                    + "\r\n#  (CGJ = U+034F)";
+                     
+            }
+            boolean hasValue(int cp) {
+            	if (cp == 0x034F) return false;
+                if (ucdData.getBinaryProperty(cp, GraphemeLink)) return false;
+                byte cat = ucdData.getCategory(cp);
+                if (cat == Me || cat == Mn || cat == Mc
+                || ucdData.getBinaryProperty(cp,Other_GraphemeExtend)) return true;
+                return false;
+            }
+        };
+
         dprops[GraphemeBase] = new UnicodeProperty() {
             {
                 type = DERIVED_CORE;
@@ -649,9 +667,11 @@ of characters, the first of which has a non-zero combining class.
                 shortName = "GrBase";
                 
                 header = header = "# Derived Property: " + name
-                    + "\r\n#  Generated from: [0..10FFFF] - Cc - Cf - Cs - Co - Cn - Zl - Zp - Grapheme_Link - Grapheme_Extend";
+                    + "\r\n#  Generated from: [0..10FFFF] - Cc - Cf - Cs - Co - Cn - Zl - Zp"
+                    + "\r\n#    - Grapheme_Extend - Grapheme_Link - CGJ";
             }
             boolean hasValue(int cp) {
+            	if (cp == 0x034F) return false;
                 byte cat = ucdData.getCategory(cp);
                 if (cat == Cc || cat == Cf || cat == Cs || cat == Co || cat == Cn || cat == Zl || cat == Zp
                 || ucdData.getBinaryProperty(cp,GraphemeLink)) return false;
diff --git a/tools/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java b/tools/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java
index c25cd6ff2ae..23834bc8458 100644
--- a/tools/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java,v $
-* $Date: 2002/03/15 00:34:46 $
-* $Revision: 1.9 $
+* $Date: 2002/03/20 00:21:43 $
+* $Revision: 1.10 $
 *
 *******************************************************************************
 */
@@ -56,8 +56,7 @@ final class DerivedPropertyLister extends PropertyLister {
     }
 
     public String valueName(int cp) {
-        if (uprop.getValueType() != BINARY) return uprop.getValue(cp, LONG);
-        return uprop.getProperty(LONG);
+    	return uprop.getListingValue(cp);
     }
 
     //public String optionalComment(int cp) {
diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java
index a69dac59e38..534b264ccf9 100644
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java,v $
-* $Date: 2002/03/15 01:57:01 $
-* $Revision: 1.6 $
+* $Date: 2002/03/20 00:21:43 $
+* $Revision: 1.7 $
 *
 *******************************************************************************
 */
@@ -24,6 +24,8 @@ public class GenerateCaseFolding implements UCD_Types {
     public static boolean COMMENT_DIFFS = false; // ON if we want a comment on mappings != lowercase
     public static boolean PICK_SHORT = false; // picks short value for SIMPLE if in FULL, changes weighting
     public static boolean NF_CLOSURE = false; // picks short value for SIMPLE if in FULL, changes weighting
+    static final int CHECK_CHAR = 0x130; // for debugging, change to actual character, otherwise -1
+     
     // PICK_SHORT & NF_CLOSURE = false for old style
     
     
@@ -83,8 +85,14 @@ public class GenerateCaseFolding implements UCD_Types {
             if (rFull != null && rFull.equals(rSimple) 
               || (PICK_SHORT && UTF16.countCodePoint(rFull) == 1)) {
                 String type = "C";
-                if (ch == 0x130 || ch == 0x131) type = "I";
-                drawLine(out, ch, type, rFull);
+                if (ch == 0x130) {
+                	drawLine(out, ch, "F", "i\u0307");
+                	drawLine(out, ch, "I", "\u0130");
+                } else if (ch == 0x131) {
+                	drawLine(out, ch, "I", "i");
+                } else {
+                	drawLine(out, ch, type, rFull);
+                }
             } else {
                 if (rFull != null) {
                     drawLine(out, ch, "F", rFull);
@@ -404,7 +412,7 @@ public class GenerateCaseFolding implements UCD_Types {
     }
     
     static boolean isExcluded(int ch) {
-        if (ch == 0x130) return true;                  // skip LATIN CAPITAL LETTER I WITH DOT ABOVE
+        // if (ch == 0x130) return true;                  // skip LATIN CAPITAL LETTER I WITH DOT ABOVE
         if (ch == 0x0132 || ch == 0x0133) return true; // skip IJ, ij
         if (ch == 0x037A) return true;                 // skip GREEK YPOGEGRAMMENI
         if (0x249C <= ch && ch <= 0x24B5) return true; // skip PARENTHESIZED LATIN SMALL LETTER A..
@@ -456,7 +464,7 @@ public class GenerateCaseFolding implements UCD_Types {
                 btitle = Main.nfc.normalize(btitle);
             }
             
-            if (ch == -1) {// for debugging, change to actual character
+            if (ch == CHECK_CHAR) {
                 System.out.println("Code: " + Main.ucd.getCodeAndName(ch));
                 System.out.println("Decomp: " + Main.ucd.getCodeAndName(decomp));
                 System.out.println("Base: " + Main.ucd.getCodeAndName(base));
@@ -474,11 +482,17 @@ public class GenerateCaseFolding implements UCD_Types {
             // presumably if there is a single code point, it would already be in the simple mappings
             
             if (UTF16.countCodePoint(flower) == 1 && UTF16.countCodePoint(fupper) == 1 
-                && UTF16.countCodePoint(title) == 1) continue;
+                	&& UTF16.countCodePoint(title) == 1) {
+            	if (ch == CHECK_CHAR) System.out.println("Skipping single code point: " + Main.ucd.getCodeAndName(ch));
+            	continue;
+            }
             
             // if there is no change from the base, skip
             
-            if (flower.equals(base) && fupper.equals(base) && ftitle.equals(base)) continue;
+            if (flower.equals(base) && fupper.equals(base) && ftitle.equals(base)) {
+            	if (ch == CHECK_CHAR) System.out.println("Skipping equals base: " + Main.ucd.getCodeAndName(ch));
+            	continue;
+            }
             
             // fix special cases
             // if (flower.equals(blower) && fupper.equals(bupper) && ftitle.equals(btitle)) continue;
@@ -488,20 +502,26 @@ public class GenerateCaseFolding implements UCD_Types {
             
             // if there are no changes from the original, or the expanded original, skip
             
-            if (flower.equals(lower) && fupper.equals(upper) && ftitle.equals(title)) continue;
+            if (flower.equals(lower) && fupper.equals(upper) && ftitle.equals(title)) {
+            	if (ch == CHECK_CHAR) System.out.println("Skipping unchanged: " + Main.ucd.getCodeAndName(ch));
+            	continue;
+            }
             
             String name = Main.ucd.getName(ch);
             
             int order = name.equals("LATIN SMALL LETTER SHARP S") ? 1
-                : name.indexOf("ARMENIAN SMALL LIGATURE") >= 0 ? 3
-                : name.indexOf("LIGATURE") >= 0 ? 2
-                : name.indexOf("GEGRAMMENI") < 0 ? 4
-                : UTF16.countCodePoint(ftitle) == 1 ? 5
-                : UTF16.countCodePoint(fupper) == 2 ? 6
-                : 7;
+                : ch == 0x130 ? 2
+                : name.indexOf("ARMENIAN SMALL LIGATURE") >= 0 ? 4
+                : name.indexOf("LIGATURE") >= 0 ? 3
+                : name.indexOf("GEGRAMMENI") < 0 ? 5
+                : UTF16.countCodePoint(ftitle) == 1 ? 6
+                : UTF16.countCodePoint(fupper) == 2 ? 7
+                : 8;
+            
+            if (ch == CHECK_CHAR) System.out.println("Order: " + order + " for " + Main.ucd.getCodeAndName(ch));
             
             // HACK
-            boolean denormalize = !normalize && order != 5 && order != 6;
+            boolean denormalize = !normalize && order != 6 && order != 7;
             
             String mapping = Utility.hex(ch)
                 + "; " + Utility.hex(flower.equals(base) ? chstr : denormalize ? Main.nfd.normalize(flower) : flower)
@@ -544,12 +564,15 @@ public class GenerateCaseFolding implements UCD_Types {
                     out.println("# The German es-zed is special--the normal mapping is to SS.");
                     out.println("# Note: the titlecase should never occur in practice. It is equal to titlecase(uppercase(<es-zed>))");
                     break;
-                case 2: out.println("# Ligatures"); break;
-                case 3: skipLine = true; break;
-                case 4: out.println("# No corresponding uppercase precomposed character"); break;
-                case 5: Utility.appendFile("SpecialCasingIota.txt", true, out); break;
-                case 6: out.println("# Some characters with YPOGEGRAMMENI are also have no corresponding titlecases"); break;
-                case 7: skipLine = true; break;
+                case 2:
+                    out.println("# Preserve canonical equivalence for I with dot. Turkic is handled below.");
+					break;                	
+                case 3: out.println("# Ligatures"); break;
+                case 4: skipLine = true; break;
+                case 5: out.println("# No corresponding uppercase precomposed character"); break;
+                case 6: Utility.appendFile("SpecialCasingIota.txt", true, out); break;
+                case 7: out.println("# Some characters with YPOGEGRAMMENI are also have no corresponding titlecases"); break;
+                case 8: skipLine = true; break;
                 }
                 if (!skipLine) out.println();
             }
diff --git a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
index 2c2f6a69631..83b000f167e 100644
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
-* $Date: 2002/03/15 01:57:01 $
-* $Revision: 1.15 $
+* $Date: 2002/03/20 00:21:42 $
+* $Revision: 1.16 $
 *
 *******************************************************************************
 */
@@ -1183,7 +1183,7 @@ public class GenerateData implements UCD_Types {
             Utility.dot(i);
             if (!Main.ucd.isRepresented(i)) continue;
             
-            if (!Main.nfd.hasDecomposition(i)) {
+            if (!Main.nfd.normalizationDiffers(i)) {
                 if (Main.ucd.getScript(i) == LATIN_SCRIPT) {
                     int cp = i;
                     String hex = "u" + Utility.hex(cp, 4);
diff --git a/tools/unicodetools/com/ibm/text/UCD/Main.java b/tools/unicodetools/com/ibm/text/UCD/Main.java
index 85117936b80..a7984d422d1 100644
--- a/tools/unicodetools/com/ibm/text/UCD/Main.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Main.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
-* $Date: 2002/03/15 00:34:46 $
-* $Revision: 1.9 $
+* $Date: 2002/03/20 00:21:42 $
+* $Revision: 1.10 $
 *
 *******************************************************************************
 */
@@ -77,8 +77,11 @@ public final class Main implements UCD_Types {
             } else if (arg.equalsIgnoreCase("build")) ConvertUCD.main(new String[]{ucdVersion});
             else if (arg.equalsIgnoreCase("version")) ucdVersion = args[++i];
             else if (arg.equalsIgnoreCase("testskippable")) NFSkippable.main(null);
+            else if (arg.equalsIgnoreCase("diffIgnorable")) VerifyUCD.diffIgnorable();
             else if (arg.equalsIgnoreCase("generateXML")) VerifyUCD.generateXML();
             else if (arg.equalsIgnoreCase("checkSpeed")) VerifyUCD.checkSpeed();
+            else if (arg.equalsIgnoreCase("verifyNormalizationStability")) VerifyUCD.verifyNormalizationStability();
+            
             else if (arg.equalsIgnoreCase("generateHanTransliterator")) GenerateHanTransliterator.main();
             else if (arg.equalsIgnoreCase("compareBlueberry")) VerifyUCD.compareBlueberry();
 
diff --git a/tools/unicodetools/com/ibm/text/UCD/MyPropertyLister.java b/tools/unicodetools/com/ibm/text/UCD/MyPropertyLister.java
index 7c64b852b60..17249bcbd0e 100644
--- a/tools/unicodetools/com/ibm/text/UCD/MyPropertyLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MyPropertyLister.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MyPropertyLister.java,v $
-* $Date: 2001/12/13 23:35:57 $
-* $Revision: 1.7 $
+* $Date: 2002/03/20 00:21:42 $
+* $Revision: 1.8 $
 *
 *******************************************************************************
 */
@@ -85,7 +85,7 @@ final class MyPropertyLister extends PropertyLister {
 
         if (cat == Cn
             && propMask != (BINARY_PROPERTIES | Noncharacter_Code_Point)
-            && propMask != (BINARY_PROPERTIES | Reserved_Cf_Code_Point)
+            && propMask != (BINARY_PROPERTIES | Other_Default_Ignorable_Code_Point)
             && propMask != (CATEGORY | Cn)) {
             if (BRIDGE) return CONTINUE;
             else return EXCLUDE;
diff --git a/tools/unicodetools/com/ibm/text/UCD/Normalizer.java b/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
index 69faa4382bb..f5409577b0c 100644
--- a/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Normalizer.java,v $
-* $Date: 2002/03/15 01:57:01 $
-* $Revision: 1.7 $
+* $Date: 2002/03/20 00:21:42 $
+* $Revision: 1.8 $
 *
 *******************************************************************************
 */
@@ -67,6 +67,13 @@ public final class Normalizer implements UCD_Types {
         return getName(form);
     }
 
+    /**
+     * Return string name
+     */
+    public String getUCDVersion() {
+        return data.getUCDVersion();
+    }
+
     /**
      * Does compose?
      */
@@ -120,7 +127,6 @@ public final class Normalizer implements UCD_Types {
     }
 
     /**
-    */
     private StringBuffer hasDecompositionBuffer = new StringBuffer();
 
     public boolean hasDecomposition(int cp) {
@@ -129,6 +135,7 @@ public final class Normalizer implements UCD_Types {
         if (hasDecompositionBuffer.length() != 1) return true;
         return cp != hasDecompositionBuffer.charAt(0);
     }
+    */
 
     /**
      * Does a quick check to see if the string is in the current form. Checks canonical order and
@@ -427,6 +434,11 @@ public final class Normalizer implements UCD_Types {
                 if (ucd.
             */
         }
+        
+        String getUCDVersion() {
+        	return ucd.getVersion();
+        }
+        
         /*
 Problem: differs: true, call: false U+0385 GREEK DIALYTIKA TONOS
 Problem: differs: true, call: false U+03D3 GREEK UPSILON WITH ACUTE AND HOOK SYMBOL
diff --git a/tools/unicodetools/com/ibm/text/UCD/SpecialCasingFooter.txt b/tools/unicodetools/com/ibm/text/UCD/SpecialCasingFooter.txt
index 93feb19c801..3cba2ab3a89 100644
--- a/tools/unicodetools/com/ibm/text/UCD/SpecialCasingFooter.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/SpecialCasingFooter.txt
@@ -48,10 +48,14 @@
 # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
 # The following rules handle those cases.
 
+0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
+
 # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
 # This matches the behavior of the canonically equivalent I-dot_above
 
-0307; ; 0307; 0307; After_Soft_Dotted; # COMBINING DOT ABOVE
+0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE
+0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE
 
 # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
 
@@ -63,7 +67,6 @@
 0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
 
-# Note: the following cases are already in the UnicodeData file.
+# Note: the following case is already in the UnicodeData file.
 
 # 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I
-# 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD.java b/tools/unicodetools/com/ibm/text/UCD/UCD.java
index 9a0cb8a2592..84410be901d 100644
--- a/tools/unicodetools/com/ibm/text/UCD/UCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
-* $Date: 2001/12/13 23:35:57 $
-* $Revision: 1.9 $
+* $Date: 2002/03/20 00:21:42 $
+* $Revision: 1.10 $
 *
 *******************************************************************************
 */
@@ -1027,6 +1027,19 @@ to guarantee identifier closure.
     }
 
     private void fillFromFile(String version) {
+    	try {
+    		fillFromFile2(version);
+    	} catch (ChainException e) {
+    		try {
+    			ConvertUCD.main(new String[]{version});
+    		} catch (Exception e2) {
+            	throw new ChainException("Can't build data file for {0}", new Object[]{version}, e2);
+    		}
+    		fillFromFile2(version);
+    	}
+    }
+    
+    private void fillFromFile2(String version) {
         DataInputStream dataIn = null;
         String fileName = BIN_DIR + "UCD_Data" + version + ".bin";
         int uDataFileCount = 0;
diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
index 4502192e1b0..faab929bdd1 100644
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
-* $Date: 2002/03/15 00:34:46 $
-* $Revision: 1.12 $
+* $Date: 2002/03/20 00:21:42 $
+* $Revision: 1.13 $
 *
 *******************************************************************************
 */
@@ -636,6 +636,7 @@ final class UCD_Names implements UCD_Types {
         "TEH_MARBUTA",
         "TETH",
         "WAW",
+        "SYRIAC WAW",
         "YEH",
         "YEH_BARREE",
         "YEH_WITH_TAIL",
@@ -652,21 +653,21 @@ final class UCD_Names implements UCD_Types {
         "BEH",
         "BETH",
         "DAL",
-        "DALATH RISH",
+        "DALATH_RISH",
         "E",
         "FEH",
-        "FINAL SEMKATH",
+        "FINAL_SEMKATH",
         "GAF",
         "GAMAL",
         "HAH",
-        "HAMZA ON HEH GOAL",
+        "HAMZA_ON_HEH_GOAL",
         "HE",
         "HEH",
-        "HEH GOAL",
+        "HEH_GOAL",
         "HETH",
         "KAF",
         "KAPH",
-        "KNOTTED HEH",
+        "KNOTTED_HEH",
         "LAM",
         "LAMADH",
         "MEEM",
@@ -677,23 +678,24 @@ final class UCD_Names implements UCD_Types {
         "QAF",
         "QAPH",
         "REH",
-        "REVERSED PE",
+        "REVERSED_PE",
         "SAD",
         "SADHE",
         "SEEN",
         "SEMKATH",
         "SHIN",
-        "SWASH KAF",
+        "SWASH_KAF",
         "TAH",
         "TAW",
-        "TEH MARBUTA",
+        "TEH_MARBUTA",
         "TETH",
         "WAW",
+        "SYRIAC WAW",
         "YEH",
-        "YEH BARREE",
-        "YEH WITH TAIL",
+        "YEH_BARREE",
+        "YEH_WITH_TAIL",
         "YUDH",
-        "YUDH HE",
+        "YUDH_HE",
         "ZAIN",
     };
 
diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
index 2a76aea26e6..060beaf4eb9 100644
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
-* $Date: 2002/03/15 00:34:46 $
-* $Revision: 1.9 $
+* $Date: 2002/03/20 00:21:42 $
+* $Revision: 1.10 $
 *
 *******************************************************************************
 */
@@ -15,7 +15,7 @@ package com.ibm.text.UCD;
 
 public interface UCD_Types {
     
-    public static final int dVersion = 7; // change to fix the generated file D version. If less than zero, no "d"
+    public static final int dVersion = 8; // change to fix the generated file D version. If less than zero, no "d"
     
     public static final String BASE_DIR = "C:\\DATA\\";
     public static final String UCD_DIR = BASE_DIR + "UCD\\";
@@ -23,7 +23,7 @@ public interface UCD_Types {
     public static final String GEN_DIR = BASE_DIR + "GEN\\";
 
 
-    static final byte BINARY_FORMAT = 5; // bumped if binary format of UCD changes
+    static final byte BINARY_FORMAT = 6; // bumped if binary format of UCD changes
     
     // Unicode Property Types
     static final byte 
@@ -188,7 +188,7 @@ public interface UCD_Types {
         IDS_TrinaryOperator = 24,
         Radical = 25,
         UnifiedIdeograph = 26,
-        Reserved_Cf_Code_Point = 27,
+        Other_Default_Ignorable_Code_Point = 27,
         Deprecated = 28,
         Soft_Dotted = 29,
         Logical_Order_Exception = 30,
@@ -407,13 +407,14 @@ public static byte
     TEH_MARBUTA = 41,
     TETH = 42,
     WAW = 43,
-    YEH = 44,
-    YEH_BARREE = 45,
-    YEH_WITH_TAIL = 46,
-    YUDH = 47,
-    YUDH_HE = 48,
-    ZAIN = 49,
-    LIMIT_JOINING_GROUP = 50;
+    SYRIAC_WAW = 44,
+    YEH = 45,
+    YEH_BARREE = 46,
+    YEH_WITH_TAIL = 47,
+    YUDH = 48,
+    YUDH_HE = 49,
+    ZAIN = 50,
+    LIMIT_JOINING_GROUP = 51;
     
     static final byte NFD = 0, NFC = 1, NFKD = 2, NFKC = 3;    
     public static final int
diff --git a/tools/unicodetools/com/ibm/text/UCD/UnicodeProperty.java b/tools/unicodetools/com/ibm/text/UCD/UnicodeProperty.java
index ae344277088..027839710c8 100644
--- a/tools/unicodetools/com/ibm/text/UCD/UnicodeProperty.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UnicodeProperty.java
@@ -137,6 +137,14 @@ public abstract class UnicodeProperty implements UCD_Types {
             }
       }
       
+      /**
+       * special hack for NFD/NFKD
+       */
+		public String getListingValue(int cp) {
+        	if (getValueType() != BINARY) return getValue(cp, LONG);
+        	return getProperty(LONG);
+		}
+      
       /**
        * Does it have the propertyValue?
        */
diff --git a/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java b/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java
index d10b87d675f..bf2a1b9a4f4 100644
--- a/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/VerifyUCD.java,v $
-* $Date: 2002/03/15 01:57:01 $
-* $Revision: 1.10 $
+* $Date: 2002/03/20 00:21:42 $
+* $Revision: 1.11 $
 *
 *******************************************************************************
 */
@@ -674,12 +674,12 @@ can help you narrow these down.
             if (cp == 0x3131) {
                 System.out.println("Debug: " + idnProhibited
                     + ", " + idnUnassigned
-                    + ", " + Main.nfkc.hasDecomposition(cp)
+                    + ", " + Main.nfkd.normalizationDiffers(cp)
                     + ", " + Main.ucd.getCodeAndName(Main.nfkc.normalize(cp))
                     + ", " + Main.ucd.getCodeAndName(Main.nfc.normalize(cp)));
             } 
             
-            if (!idnProhibited && ! idnUnassigned && Main.nfkc.hasDecomposition(cp)) {
+            if (!idnProhibited && ! idnUnassigned && Main.nfkd.normalizationDiffers(cp)) {
                 String kc = Main.nfkc.normalize(cp);
                 String c = Main.nfc.normalize(cp);
                 if (kc.equals(c)) continue;
@@ -1045,6 +1045,47 @@ E0020-E007F; [TAGGING CHARACTERS]
         }
         return result;
     }
+    
+    /*
+                    + "\r\n#  Generated from <2060..206F, FFF0..FFFB, E0000..E0FFF>"
+                    + "\r\n#    + Other_Default_Ignorable_Code_Point + (Cf + Cc + Cs - White_Space)";
+    */
+    
+    public static void diffIgnorable () {
+        Main.setUCD();
+    	
+    	UnicodeSet control = UnifiedBinaryProperty.make(CATEGORY + Cf, Main.ucd).getSet();
+    	
+    	System.out.println("Cf");
+    	Utility.showSetNames("", control, false, Main.ucd);
+    	
+    	control.addAll(UnifiedBinaryProperty.make(CATEGORY + Cc, Main.ucd).getSet());
+
+    	System.out.println("Cf + Cc");
+    	Utility.showSetNames("", control, false, Main.ucd);
+    	
+    	control.addAll(UnifiedBinaryProperty.make(CATEGORY + Cs, Main.ucd).getSet());
+
+    	System.out.println("Cf + Cc + Cs");
+    	Utility.showSetNames("", control, false, Main.ucd);
+    	
+    	control.removeAll(UnifiedBinaryProperty.make(BINARY_PROPERTIES + White_space, Main.ucd).getSet());
+    	
+    	System.out.println("Cf + Cc + Cs - WhiteSpace");
+    	Utility.showSetNames("", control, false, Main.ucd);
+
+    	control.add(0x2060,0x206f).add(0xFFF0,0xFFFB).add(0xE0000,0xE0FFF);
+    	
+    	System.out.println("(Cf + Cc + Cs - WhiteSpace) + ranges");
+    	Utility.showSetNames("", control, false, Main.ucd);
+
+    	UnicodeSet odicp = UnifiedBinaryProperty.make(BINARY_PROPERTIES + Other_Default_Ignorable_Code_Point, Main.ucd).getSet();
+    	
+    	odicp.removeAll(control);
+    	
+    	System.out.println("Minimal Default Ignorable Code Points");
+    	Utility.showSetNames("", odicp, true, Main.ucd);
+    }
 
 
     public static void IdentifierTest() {
@@ -1241,6 +1282,95 @@ E0020-E007F; [TAGGING CHARACTERS]
         if (cat == Lu || cat == Lt || cat == Ll) return "LC";
         return Main.ucd.getCategoryID(cp);
     }
+    
+    static public void verifyNormalizationStability() {
+        Main.setUCD();
+		verifyNormalizationStability2("3.1.0");
+		verifyNormalizationStability2("3.0.0");
+    }
+    
+    static public void verifyNormalizationStability2(String version) {
+        
+        Main.nfd.normalizationDiffers(0x10300);
+        
+        UCD older = UCD.make(version); // Main.ucd.getPreviousVersion();
+        
+        Normalizer oldNFC = new Normalizer(Normalizer.NFC, older.getVersion());
+        Normalizer oldNFD = new Normalizer(Normalizer.NFD, older.getVersion());
+        Normalizer oldNFKC = new Normalizer(Normalizer.NFKC, older.getVersion());
+        Normalizer oldNFKD = new Normalizer(Normalizer.NFKD, older.getVersion());
+        
+        System.out.println("Testing " + Main.nfd.getUCDVersion() + " against " + oldNFD.getUCDVersion());
+        
+        for (int i = 0; i <= 0x10FFFF; ++i) {
+        	Utility.dot(i);
+            if (!Main.ucd.isAssigned(i)) continue;
+            byte cat = Main.ucd.getCategory(i);
+            if (cat == Cs || cat == PRIVATE_USE) continue;
+            
+            if (i == 0x5e) {
+            	System.out.println("debug");
+            	String test1 = Main.nfkd.normalize(i);
+            	String test2 = oldNFKD.normalize(i);
+        		System.out.println("Testing (new/old)" + Main.ucd.getCodeAndName(i));
+    			System.out.println("\t" + Main.ucd.getCodeAndName(test1));
+    			System.out.println("\t" + Main.ucd.getCodeAndName(test2));
+            }
+            	
+            if (older.isAssigned(i)) {
+            	
+            	int newCan = Main.ucd.getCombiningClass(i);
+            	int oldCan = older.getCombiningClass(i);
+            	if (newCan != oldCan) {
+            		System.out.println("FAILS CCC STABILITY: " + newCan + " != " + oldCan
+            			+ "; " + Main.ucd.getCodeAndName(i));
+            	}
+            	
+            	verifyEquals(i, "NFD STABILITY (new/old)", Main.nfd.normalize(i), oldNFD.normalize(i));
+            	verifyEquals(i, "NFC STABILITY (new/old)", Main.nfc.normalize(i), oldNFC.normalize(i));
+            	verifyEquals(i, "NFKD STABILITY (new/old)", Main.nfkd.normalize(i), oldNFKD.normalize(i));
+            	verifyEquals(i, "NFKC STABILITY (new/old)", Main.nfkc.normalize(i), oldNFKC.normalize(i));
+            	
+            } else {
+            	// not in older version. 
+            	// (1) If there is a decomp, and it is composed of all OLD characters, then it must NOT compose
+            	if (Main.nfd.normalizationDiffers(i)) {
+            		String decomp = Main.nfd.normalize(i);
+            		if (noneHaveCategory(decomp, Cn, older)) {
+            			String recomp = Main.nfc.normalize(decomp);
+            			if (recomp.equals(UTF16.valueOf(i))) {
+        					Utility.fixDot();
+            				System.out.println("FAILS COMP STABILITY: " + Main.ucd.getCodeAndName(i));
+    						System.out.println("\t" + Main.ucd.getCodeAndName(decomp));
+    						System.out.println("\t" + Main.ucd.getCodeAndName(recomp));
+    						System.out.println();
+    						throw new IllegalArgumentException("Comp stability");
+            			}
+            		}
+            	}
+            }
+        }
+    }
+    
+    public static boolean noneHaveCategory(String s, byte cat, UCD ucd) {
+    	int cp;
+    	for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
+    		cp = UTF16.charAt(s, i);
+    		byte cat2 = ucd.getCategory(i);
+    		if (cat == cat2) return false;
+    	}
+    	return true;
+    }
+    
+    public static void verifyEquals(int cp, String message, String a, String b) {
+    	if (!a.equals(b)) {
+        	Utility.fixDot();
+    		System.out.println("FAILS " + message + ": " + Main.ucd.getCodeAndName(cp));
+    		System.out.println("\t" + Main.ucd.getCodeAndName(a));
+    		System.out.println("\t" + Main.ucd.getCodeAndName(b));
+    		System.out.println();
+    	}
+    }
 
     public static void checkAgainstUInfo() {
     /*