First check-in

X-SVN-Rev: 5636
2025-04-11 08:01:32 +00:00 · 2001-08-30 20:50:18 +00:00 · 2001-08-30 20:50:18 +00:00 · 1cd275c205
commit 1cd275c205
parent b3321bad52
48 changed files with 20878 additions and 0 deletions
--- a/tools/unicodetools/com/ibm/text/UCA/CEList.java
+++ b/tools/unicodetools/com/ibm/text/UCA/CEList.java
@ -0,0 +1,216 @@
+package com.ibm.text.UCA;
+import com.ibm.text.UCD.*;
+import com.ibm.text.utility.*;
+
+public final class CEList implements java.lang.Comparable, UCD_Types {
+  int[] contents;
+  int startOffset;
+  int endOffset;
+  int count;
+  
+  public CEList (int[] source, int start, int end) {
+    count = end-start;
+    contents = new int[count];
+    System.arraycopy(source, start, contents, 0, count);
+    startOffset = 0;
+    endOffset = count;
+  }
+  
+  public CEList(int[] source) {
+    this(source, 0, source.length);
+  }
+  
+  private CEList(int[] source, int start, int end, boolean spare) {
+    contents = source;
+    startOffset = start;
+    endOffset = end;
+    count = end - start;
+  }
+  
+  public CEList append(CEList that) {
+    int[] newContents = new int[count + that.count];
+    System.arraycopy(contents, startOffset, newContents, 0, count);
+    System.arraycopy(that.contents, that.startOffset, newContents, count, that.count);
+    return new CEList(newContents, 0, count + that.count, true);
+  }
+  
+  public CEList sub(int start, int end) {
+    return new CEList(contents, start, end, true);
+  }
+  
+  public CEList start(int end) {
+    return new CEList(contents, 0, end, true);
+  }
+  
+  public CEList end(int start) {
+    return new CEList(contents, start, contents.length, true);
+  }
+  
+  public int length() {
+    return count;
+  }
+  
+  public int at(int i) {
+    i -= startOffset;
+    if (i < 0 || i >= count) throw new ArrayIndexOutOfBoundsException(i);
+    return contents[i];
+  }
+  
+  public int hashCode() {
+    int result = count;
+    for (int i = startOffset; i < endOffset; ++i) {
+      result *= 37;
+      result += contents[i];
+    }
+    return result;
+  }
+  
+  public boolean equals(Object other) {
+    try {
+      CEList that = (CEList)other;
+      if (count != that.count) return false;
+      int delta = that.startOffset - startOffset;
+      for (int i = startOffset; i < endOffset; ++i) {
+        if (contents[i] != that.contents[i + delta]) return false;
+      }
+      return true;
+    } catch (Exception e) {
+      return false;
+    }
+  }
+  
+    public int compareTo(Object other) {
+        CEList that = (CEList)other;
+        try {
+            int delta = that.startOffset - startOffset;
+            int min = endOffset;
+            int min2 = that.endOffset - delta;
+            if (min > min2) min = min2;
+
+            for (int i = startOffset; i < min; ++i) {
+                if (contents[i] != that.contents[i + delta]) {
+                    if (contents[i] < that.contents[i + delta]) return -1;
+                    return 1;
+                }
+            }
+            if (count < that.count) return -1;
+            if (count > that.count) return 1;
+            return 0;
+        } catch (RuntimeException e) {
+            System.out.println("This: " + this + ", that: " + other);
+            System.out.println(startOffset + ", " + endOffset
+                + ", " + count + ", " + contents.length);
+            System.out.println(that.startOffset + ", " + that.endOffset
+                + ", " + that.count + ", " + that.contents.length);
+            throw e;
+        }
+    }
+    
+    public static byte remap(int ch, byte type, int t) {
+        if (type != CANONICAL) {
+            if (0x3041 <= ch && ch <= 0x3094) t = 0xE; // hiragana
+            else if (0x30A1 <= ch && ch <= 0x30FA) t = 0x11; // katakana
+        }
+        switch (type) {
+            case COMPATIBILITY: t = (t == 8) ? 0xA : 4; break;
+            case COMPAT_FONT:  t = (t == 8) ? 0xB : 5; break;
+            case COMPAT_NOBREAK: t = 0x1B; break;
+            case COMPAT_INITIAL: t = 0x17; break;
+            case COMPAT_MEDIAL: t = 0x18; break;
+            case COMPAT_FINAL: t = 0x19; break;
+            case COMPAT_ISOLATED: t = 0x1A; break;
+            case COMPAT_CIRCLE: t = (t == 0x11) ? 0x13 : (t == 8) ? 0xC : 6; break;
+            case COMPAT_SUPER: t = 0x14; break;
+            case COMPAT_SUB: t = 0x15; break;
+            case COMPAT_VERTICAL: t = 0x16; break;
+            case COMPAT_WIDE: t= (t == 8) ? 9 : 3; break;
+            case COMPAT_NARROW: t = (0xFF67 <= ch && ch <= 0xFF6F) ? 0x10 : 0x12; break;
+            case COMPAT_SMALL: t = (t == 0xE) ? 0xE : 0xF; break;
+            case COMPAT_SQUARE: t = (t == 8) ? 0x1D : 0x1C; break;
+            case COMPAT_FRACTION: t = 0x1E; break;
+        }
+        return (byte)t;
+    }
+    
+  
+    public String toString() {
+        StringBuffer result = new StringBuffer();
+        for (int i = startOffset; i < endOffset; ++i) {
+            if (i != startOffset) result.append(' ');
+            result.append(toString(contents[i]));
+        }
+        return result.toString();
+    }
+    
+    public static String toString(int ce) {
+        return "[" + Utility.hex(UCA.getPrimary(ce)) + "." 
+          + Utility.hex(UCA.getSecondary(ce)) + "."
+          + Utility.hex(UCA.getTertiary(ce)) + "](" + NAME3[UCA.getTertiary(ce)] + ")";
+    }
+    
+    static final String[] NAME3 = {
+        "IGNORE",    // 0
+        "BLK",     // Unused?
+        "MIN",
+        "WIDE",
+        "COMPAT",
+        "FONT",
+        "CIRCLE",
+        "RES-2",
+        "CAP",
+        "WIDECAP",
+        "COMPATCAP",
+        "FONTCAP",
+        "CIRCLECAP",
+        "HIRA-SMALL",
+        "HIRA",
+        "SMALL",
+        "SMALL-NARROW",
+        "KATA",
+        "NARROW",
+        "CIRCLE-KATA",
+        "SUP-MNN",
+        "SUB-MNS",
+        "VERT", // Missing??
+        "AINI",
+        "AMED",
+        "AFIN",
+        "AISO",
+        "NOBREAK", // Missing?
+        "SQUARED",
+        "SQUAREDCAP",
+        "FRACTION",
+        "MAX"
+    };
+    
+    // testing
+    
+    public static void main(String args[]) throws Exception {
+        /* This: [0241.0020.0004], that: [0F6B.0020.0002]
+            1, 2, 1, 2
+            0, 1, 1, 1
+        */
+        CEList t1 = new CEList(new int[] {0, 0x02412004});
+        t1 = t1.sub(1,2);
+        CEList t2 = new CEList(new int[] {0x0F6B2002});
+        System.out.println(t1.compareTo(t2));
+        
+        
+        CEList foo = new CEList(new int[] {0, 1, 2, 3, 4});
+        CEList fuu = new CEList(new int[] {});
+        int cc = foo.compareTo(fuu);
+        System.out.println(cc);
+        
+        System.out.println(foo);
+        System.out.println(foo.start(2));
+        System.out.println(foo.end(1));
+        CEList fii = new CEList(new int[] {2, 3});
+        CEList foo2 = foo.sub(2,4);
+        System.out.println(fii.equals(foo2));
+        System.out.println(fii.compareTo(foo2));
+        System.out.println(fii.compareTo(foo));
+        System.out.println(fii.hashCode() == foo2.hashCode());
+        
+    }
+}
+    
--- a/tools/unicodetools/com/ibm/text/UCA/Case.java
+++ b/tools/unicodetools/com/ibm/text/UCA/Case.java
@ -0,0 +1,813 @@
+package com.ibm.text.UCA;
+
+public final class Case {
+    
+    static StringBuffer out = new StringBuffer();
+    
+    static String fold(char c) {
+        return fold(String.valueOf(c));
+    }
+    
+    static String fold(String in) {
+        synchronized (out) {
+            out.setLength(0);
+            for (int i = 0; i < in.length(); ++i) {
+                char c = in.charAt(i);
+                String f = CF[c];
+                if (f == null) out.append(c);
+                else out.append(f);
+            }
+            return out.toString();
+        }
+    }
+    
+    static String[] CF = new String[65536];
+    static {
+	 CF[0x0041]="\u0061";
+	 CF[0x0042]="\u0062";
+	 CF[0x0043]="\u0063";
+	 CF[0x0044]="\u0064";
+	 CF[0x0045]="\u0065";
+	 CF[0x0046]="\u0066";
+	 CF[0x0047]="\u0067";
+	 CF[0x0048]="\u0068";
+	 CF[0x0049]="\u0069";
+	 CF[0x004A]="\u006A";
+	 CF[0x004B]="\u006B";
+	 CF[0x004C]="\u006C";
+	 CF[0x004D]="\u006D";
+	 CF[0x004E]="\u006E";
+	 CF[0x004F]="\u006F";
+	 CF[0x0050]="\u0070";
+	 CF[0x0051]="\u0071";
+	 CF[0x0052]="\u0072";
+	 CF[0x0053]="\u0073";
+	 CF[0x0054]="\u0074";
+	 CF[0x0055]="\u0075";
+	 CF[0x0056]="\u0076";
+	 CF[0x0057]="\u0077";
+	 CF[0x0058]="\u0078";
+	 CF[0x0059]="\u0079";
+	 CF[0x005A]="\u007A";
+	 CF[0x00B5]="\u03BC";
+	 CF[0x00C0]="\u00E0";
+	 CF[0x00C1]="\u00E1";
+	 CF[0x00C2]="\u00E2";
+	 CF[0x00C3]="\u00E3";
+	 CF[0x00C4]="\u00E4";
+	 CF[0x00C5]="\u00E5";
+	 CF[0x00C6]="\u00E6";
+	 CF[0x00C7]="\u00E7";
+	 CF[0x00C8]="\u00E8";
+	 CF[0x00C9]="\u00E9";
+	 CF[0x00CA]="\u00EA";
+	 CF[0x00CB]="\u00EB";
+	 CF[0x00CC]="\u00EC";
+	 CF[0x00CD]="\u00ED";
+	 CF[0x00CE]="\u00EE";
+	 CF[0x00CF]="\u00EF";
+	 CF[0x00D0]="\u00F0";
+	 CF[0x00D1]="\u00F1";
+	 CF[0x00D2]="\u00F2";
+	 CF[0x00D3]="\u00F3";
+	 CF[0x00D4]="\u00F4";
+	 CF[0x00D5]="\u00F5";
+	 CF[0x00D6]="\u00F6";
+	 CF[0x00D8]="\u00F8";
+	 CF[0x00D9]="\u00F9";
+	 CF[0x00DA]="\u00FA";
+	 CF[0x00DB]="\u00FB";
+	 CF[0x00DC]="\u00FC";
+	 CF[0x00DD]="\u00FD";
+	 CF[0x00DE]="\u00FE";
+	 CF[0x00DF]="\u0073\u0073";
+	 CF[0x0100]="\u0101";
+	 CF[0x0102]="\u0103";
+	 CF[0x0104]="\u0105";
+	 CF[0x0106]="\u0107";
+	 CF[0x0108]="\u0109";
+	 CF[0x010A]="\u010B";
+	 CF[0x010C]="\u010D";
+	 CF[0x010E]="\u010F";
+	 CF[0x0110]="\u0111";
+	 CF[0x0112]="\u0113";
+	 CF[0x0114]="\u0115";
+	 CF[0x0116]="\u0117";
+	 CF[0x0118]="\u0119";
+	 CF[0x011A]="\u011B";
+	 CF[0x011C]="\u011D";
+	 CF[0x011E]="\u011F";
+	 CF[0x0120]="\u0121";
+	 CF[0x0122]="\u0123";
+	 CF[0x0124]="\u0125";
+	 CF[0x0126]="\u0127";
+	 CF[0x0128]="\u0129";
+	 CF[0x012A]="\u012B";
+	 CF[0x012C]="\u012D";
+	 CF[0x012E]="\u012F";
+	 CF[0x0130]="\u0069";
+	 CF[0x0131]="\u0069";
+	 CF[0x0132]="\u0133";
+	 CF[0x0134]="\u0135";
+	 CF[0x0136]="\u0137";
+	 CF[0x0139]="\u013A";
+	 CF[0x013B]="\u013C";
+	 CF[0x013D]="\u013E";
+	 CF[0x013F]="\u0140";
+	 CF[0x0141]="\u0142";
+	 CF[0x0143]="\u0144";
+	 CF[0x0145]="\u0146";
+	 CF[0x0147]="\u0148";
+	 CF[0x0149]="\u02BC\u006E";
+	 CF[0x014A]="\u014B";
+	 CF[0x014C]="\u014D";
+	 CF[0x014E]="\u014F";
+	 CF[0x0150]="\u0151";
+	 CF[0x0152]="\u0153";
+	 CF[0x0154]="\u0155";
+	 CF[0x0156]="\u0157";
+	 CF[0x0158]="\u0159";
+	 CF[0x015A]="\u015B";
+	 CF[0x015C]="\u015D";
+	 CF[0x015E]="\u015F";
+	 CF[0x0160]="\u0161";
+	 CF[0x0162]="\u0163";
+	 CF[0x0164]="\u0165";
+	 CF[0x0166]="\u0167";
+	 CF[0x0168]="\u0169";
+	 CF[0x016A]="\u016B";
+	 CF[0x016C]="\u016D";
+	 CF[0x016E]="\u016F";
+	 CF[0x0170]="\u0171";
+	 CF[0x0172]="\u0173";
+	 CF[0x0174]="\u0175";
+	 CF[0x0176]="\u0177";
+	 CF[0x0178]="\u00FF";
+	 CF[0x0179]="\u017A";
+	 CF[0x017B]="\u017C";
+	 CF[0x017D]="\u017E";
+	 CF[0x017F]="\u0073";
+	 CF[0x0181]="\u0253";
+	 CF[0x0182]="\u0183";
+	 CF[0x0184]="\u0185";
+	 CF[0x0186]="\u0254";
+	 CF[0x0187]="\u0188";
+	 CF[0x0189]="\u0256";
+	 CF[0x018A]="\u0257";
+	 CF[0x018B]="\u018C";
+	 CF[0x018E]="\u01DD";
+	 CF[0x018F]="\u0259";
+	 CF[0x0190]="\u025B";
+	 CF[0x0191]="\u0192";
+	 CF[0x0193]="\u0260";
+	 CF[0x0194]="\u0263";
+	 CF[0x0196]="\u0269";
+	 CF[0x0197]="\u0268";
+	 CF[0x0198]="\u0199";
+	 CF[0x019C]="\u026F";
+	 CF[0x019D]="\u0272";
+	 CF[0x019F]="\u0275";
+	 CF[0x01A0]="\u01A1";
+	 CF[0x01A2]="\u01A3";
+	 CF[0x01A4]="\u01A5";
+	 CF[0x01A6]="\u0280";
+	 CF[0x01A7]="\u01A8";
+	 CF[0x01A9]="\u0283";
+	 CF[0x01AC]="\u01AD";
+	 CF[0x01AE]="\u0288";
+	 CF[0x01AF]="\u01B0";
+	 CF[0x01B1]="\u028A";
+	 CF[0x01B2]="\u028B";
+	 CF[0x01B3]="\u01B4";
+	 CF[0x01B5]="\u01B6";
+	 CF[0x01B7]="\u0292";
+	 CF[0x01B8]="\u01B9";
+	 CF[0x01BC]="\u01BD";
+	 CF[0x01C4]="\u01C6";
+	 CF[0x01C5]="\u01C6";
+	 CF[0x01C7]="\u01C9";
+	 CF[0x01C8]="\u01C9";
+	 CF[0x01CA]="\u01CC";
+	 CF[0x01CB]="\u01CC";
+	 CF[0x01CD]="\u01CE";
+	 CF[0x01CF]="\u01D0";
+	 CF[0x01D1]="\u01D2";
+	 CF[0x01D3]="\u01D4";
+	 CF[0x01D5]="\u01D6";
+	 CF[0x01D7]="\u01D8";
+	 CF[0x01D9]="\u01DA";
+	 CF[0x01DB]="\u01DC";
+	 CF[0x01DE]="\u01DF";
+	 CF[0x01E0]="\u01E1";
+	 CF[0x01E2]="\u01E3";
+	 CF[0x01E4]="\u01E5";
+	 CF[0x01E6]="\u01E7";
+	 CF[0x01E8]="\u01E9";
+	 CF[0x01EA]="\u01EB";
+	 CF[0x01EC]="\u01ED";
+	 CF[0x01EE]="\u01EF";
+	 CF[0x01F0]="\u006A\u030C";
+	 CF[0x01F1]="\u01F3";
+	 CF[0x01F2]="\u01F3";
+	 CF[0x01F4]="\u01F5";
+	 CF[0x01F6]="\u0195";
+	 CF[0x01F7]="\u01BF";
+	 CF[0x01F8]="\u01F9";
+	 CF[0x01FA]="\u01FB";
+	 CF[0x01FC]="\u01FD";
+	 CF[0x01FE]="\u01FF";
+	 CF[0x0200]="\u0201";
+	 CF[0x0202]="\u0203";
+	 CF[0x0204]="\u0205";
+	 CF[0x0206]="\u0207";
+	 CF[0x0208]="\u0209";
+	 CF[0x020A]="\u020B";
+	 CF[0x020C]="\u020D";
+	 CF[0x020E]="\u020F";
+	 CF[0x0210]="\u0211";
+	 CF[0x0212]="\u0213";
+	 CF[0x0214]="\u0215";
+	 CF[0x0216]="\u0217";
+	 CF[0x0218]="\u0219";
+	 CF[0x021A]="\u021B";
+	 CF[0x021C]="\u021D";
+	 CF[0x021E]="\u021F";
+	 CF[0x0222]="\u0223";
+	 CF[0x0224]="\u0225";
+	 CF[0x0226]="\u0227";
+	 CF[0x0228]="\u0229";
+	 CF[0x022A]="\u022B";
+	 CF[0x022C]="\u022D";
+	 CF[0x022E]="\u022F";
+	 CF[0x0230]="\u0231";
+	 CF[0x0232]="\u0233";
+	 CF[0x0345]="\u03B9";
+	 CF[0x0386]="\u03AC";
+	 CF[0x0388]="\u03AD";
+	 CF[0x0389]="\u03AE";
+	 CF[0x038A]="\u03AF";
+	 CF[0x038C]="\u03CC";
+	 CF[0x038E]="\u03CD";
+	 CF[0x038F]="\u03CE";
+	 CF[0x0390]="\u03B9\u0308\u0301";
+	 CF[0x0391]="\u03B1";
+	 CF[0x0392]="\u03B2";
+	 CF[0x0393]="\u03B3";
+	 CF[0x0394]="\u03B4";
+	 CF[0x0395]="\u03B5";
+	 CF[0x0396]="\u03B6";
+	 CF[0x0397]="\u03B7";
+	 CF[0x0398]="\u03B8";
+	 CF[0x0399]="\u03B9";
+	 CF[0x039A]="\u03BA";
+	 CF[0x039B]="\u03BB";
+	 CF[0x039C]="\u03BC";
+	 CF[0x039D]="\u03BD";
+	 CF[0x039E]="\u03BE";
+	 CF[0x039F]="\u03BF";
+	 CF[0x03A0]="\u03C0";
+	 CF[0x03A1]="\u03C1";
+	 CF[0x03A3]="\u03C2";
+	 CF[0x03A4]="\u03C4";
+	 CF[0x03A5]="\u03C5";
+	 CF[0x03A6]="\u03C6";
+	 CF[0x03A7]="\u03C7";
+	 CF[0x03A8]="\u03C8";
+	 CF[0x03A9]="\u03C9";
+	 CF[0x03AA]="\u03CA";
+	 CF[0x03AB]="\u03CB";
+	 CF[0x03B0]="\u03C5\u0308\u0301";
+	 CF[0x03C3]="\u03C2";
+	 CF[0x03D0]="\u03B2";
+	 CF[0x03D1]="\u03B8";
+	 CF[0x03D5]="\u03C6";
+	 CF[0x03D6]="\u03C0";
+	 CF[0x03DA]="\u03DB";
+	 CF[0x03DC]="\u03DD";
+	 CF[0x03DE]="\u03DF";
+	 CF[0x03E0]="\u03E1";
+	 CF[0x03E2]="\u03E3";
+	 CF[0x03E4]="\u03E5";
+	 CF[0x03E6]="\u03E7";
+	 CF[0x03E8]="\u03E9";
+	 CF[0x03EA]="\u03EB";
+	 CF[0x03EC]="\u03ED";
+	 CF[0x03EE]="\u03EF";
+	 CF[0x03F0]="\u03BA";
+	 CF[0x03F1]="\u03C1";
+	 CF[0x03F2]="\u03C2";
+	 CF[0x0400]="\u0450";
+	 CF[0x0401]="\u0451";
+	 CF[0x0402]="\u0452";
+	 CF[0x0403]="\u0453";
+	 CF[0x0404]="\u0454";
+	 CF[0x0405]="\u0455";
+	 CF[0x0406]="\u0456";
+	 CF[0x0407]="\u0457";
+	 CF[0x0408]="\u0458";
+	 CF[0x0409]="\u0459";
+	 CF[0x040A]="\u045A";
+	 CF[0x040B]="\u045B";
+	 CF[0x040C]="\u045C";
+	 CF[0x040D]="\u045D";
+	 CF[0x040E]="\u045E";
+	 CF[0x040F]="\u045F";
+	 CF[0x0410]="\u0430";
+	 CF[0x0411]="\u0431";
+	 CF[0x0412]="\u0432";
+	 CF[0x0413]="\u0433";
+	 CF[0x0414]="\u0434";
+	 CF[0x0415]="\u0435";
+	 CF[0x0416]="\u0436";
+	 CF[0x0417]="\u0437";
+	 CF[0x0418]="\u0438";
+	 CF[0x0419]="\u0439";
+	 CF[0x041A]="\u043A";
+	 CF[0x041B]="\u043B";
+	 CF[0x041C]="\u043C";
+	 CF[0x041D]="\u043D";
+	 CF[0x041E]="\u043E";
+	 CF[0x041F]="\u043F";
+	 CF[0x0420]="\u0440";
+	 CF[0x0421]="\u0441";
+	 CF[0x0422]="\u0442";
+	 CF[0x0423]="\u0443";
+	 CF[0x0424]="\u0444";
+	 CF[0x0425]="\u0445";
+	 CF[0x0426]="\u0446";
+	 CF[0x0427]="\u0447";
+	 CF[0x0428]="\u0448";
+	 CF[0x0429]="\u0449";
+	 CF[0x042A]="\u044A";
+	 CF[0x042B]="\u044B";
+	 CF[0x042C]="\u044C";
+	 CF[0x042D]="\u044D";
+	 CF[0x042E]="\u044E";
+	 CF[0x042F]="\u044F";
+	 CF[0x0460]="\u0461";
+	 CF[0x0462]="\u0463";
+	 CF[0x0464]="\u0465";
+	 CF[0x0466]="\u0467";
+	 CF[0x0468]="\u0469";
+	 CF[0x046A]="\u046B";
+	 CF[0x046C]="\u046D";
+	 CF[0x046E]="\u046F";
+	 CF[0x0470]="\u0471";
+	 CF[0x0472]="\u0473";
+	 CF[0x0474]="\u0475";
+	 CF[0x0476]="\u0477";
+	 CF[0x0478]="\u0479";
+	 CF[0x047A]="\u047B";
+	 CF[0x047C]="\u047D";
+	 CF[0x047E]="\u047F";
+	 CF[0x0480]="\u0481";
+	 CF[0x048C]="\u048D";
+	 CF[0x048E]="\u048F";
+	 CF[0x0490]="\u0491";
+	 CF[0x0492]="\u0493";
+	 CF[0x0494]="\u0495";
+	 CF[0x0496]="\u0497";
+	 CF[0x0498]="\u0499";
+	 CF[0x049A]="\u049B";
+	 CF[0x049C]="\u049D";
+	 CF[0x049E]="\u049F";
+	 CF[0x04A0]="\u04A1";
+	 CF[0x04A2]="\u04A3";
+	 CF[0x04A4]="\u04A5";
+	 CF[0x04A6]="\u04A7";
+	 CF[0x04A8]="\u04A9";
+	 CF[0x04AA]="\u04AB";
+	 CF[0x04AC]="\u04AD";
+	 CF[0x04AE]="\u04AF";
+	 CF[0x04B0]="\u04B1";
+	 CF[0x04B2]="\u04B3";
+	 CF[0x04B4]="\u04B5";
+	 CF[0x04B6]="\u04B7";
+	 CF[0x04B8]="\u04B9";
+	 CF[0x04BA]="\u04BB";
+	 CF[0x04BC]="\u04BD";
+	 CF[0x04BE]="\u04BF";
+	 CF[0x04C1]="\u04C2";
+	 CF[0x04C3]="\u04C4";
+	 CF[0x04C7]="\u04C8";
+	 CF[0x04CB]="\u04CC";
+	 CF[0x04D0]="\u04D1";
+	 CF[0x04D2]="\u04D3";
+	 CF[0x04D4]="\u04D5";
+	 CF[0x04D6]="\u04D7";
+	 CF[0x04D8]="\u04D9";
+	 CF[0x04DA]="\u04DB";
+	 CF[0x04DC]="\u04DD";
+	 CF[0x04DE]="\u04DF";
+	 CF[0x04E0]="\u04E1";
+	 CF[0x04E2]="\u04E3";
+	 CF[0x04E4]="\u04E5";
+	 CF[0x04E6]="\u04E7";
+	 CF[0x04E8]="\u04E9";
+	 CF[0x04EA]="\u04EB";
+	 CF[0x04EC]="\u04ED";
+	 CF[0x04EE]="\u04EF";
+	 CF[0x04F0]="\u04F1";
+	 CF[0x04F2]="\u04F3";
+	 CF[0x04F4]="\u04F5";
+	 CF[0x04F8]="\u04F9";
+	 CF[0x0531]="\u0561";
+	 CF[0x0532]="\u0562";
+	 CF[0x0533]="\u0563";
+	 CF[0x0534]="\u0564";
+	 CF[0x0535]="\u0565";
+	 CF[0x0536]="\u0566";
+	 CF[0x0537]="\u0567";
+	 CF[0x0538]="\u0568";
+	 CF[0x0539]="\u0569";
+	 CF[0x053A]="\u056A";
+	 CF[0x053B]="\u056B";
+	 CF[0x053C]="\u056C";
+	 CF[0x053D]="\u056D";
+	 CF[0x053E]="\u056E";
+	 CF[0x053F]="\u056F";
+	 CF[0x0540]="\u0570";
+	 CF[0x0541]="\u0571";
+	 CF[0x0542]="\u0572";
+	 CF[0x0543]="\u0573";
+	 CF[0x0544]="\u0574";
+	 CF[0x0545]="\u0575";
+	 CF[0x0546]="\u0576";
+	 CF[0x0547]="\u0577";
+	 CF[0x0548]="\u0578";
+	 CF[0x0549]="\u0579";
+	 CF[0x054A]="\u057A";
+	 CF[0x054B]="\u057B";
+	 CF[0x054C]="\u057C";
+	 CF[0x054D]="\u057D";
+	 CF[0x054E]="\u057E";
+	 CF[0x054F]="\u057F";
+	 CF[0x0550]="\u0580";
+	 CF[0x0551]="\u0581";
+	 CF[0x0552]="\u0582";
+	 CF[0x0553]="\u0583";
+	 CF[0x0554]="\u0584";
+	 CF[0x0555]="\u0585";
+	 CF[0x0556]="\u0586";
+	 CF[0x0587]="\u0565\u0582";
+	 CF[0x1E00]="\u1E01";
+	 CF[0x1E02]="\u1E03";
+	 CF[0x1E04]="\u1E05";
+	 CF[0x1E06]="\u1E07";
+	 CF[0x1E08]="\u1E09";
+	 CF[0x1E0A]="\u1E0B";
+	 CF[0x1E0C]="\u1E0D";
+	 CF[0x1E0E]="\u1E0F";
+	 CF[0x1E10]="\u1E11";
+	 CF[0x1E12]="\u1E13";
+	 CF[0x1E14]="\u1E15";
+	 CF[0x1E16]="\u1E17";
+	 CF[0x1E18]="\u1E19";
+	 CF[0x1E1A]="\u1E1B";
+	 CF[0x1E1C]="\u1E1D";
+	 CF[0x1E1E]="\u1E1F";
+	 CF[0x1E20]="\u1E21";
+	 CF[0x1E22]="\u1E23";
+	 CF[0x1E24]="\u1E25";
+	 CF[0x1E26]="\u1E27";
+	 CF[0x1E28]="\u1E29";
+	 CF[0x1E2A]="\u1E2B";
+	 CF[0x1E2C]="\u1E2D";
+	 CF[0x1E2E]="\u1E2F";
+	 CF[0x1E30]="\u1E31";
+	 CF[0x1E32]="\u1E33";
+	 CF[0x1E34]="\u1E35";
+	 CF[0x1E36]="\u1E37";
+	 CF[0x1E38]="\u1E39";
+	 CF[0x1E3A]="\u1E3B";
+	 CF[0x1E3C]="\u1E3D";
+	 CF[0x1E3E]="\u1E3F";
+	 CF[0x1E40]="\u1E41";
+	 CF[0x1E42]="\u1E43";
+	 CF[0x1E44]="\u1E45";
+	 CF[0x1E46]="\u1E47";
+	 CF[0x1E48]="\u1E49";
+	 CF[0x1E4A]="\u1E4B";
+	 CF[0x1E4C]="\u1E4D";
+	 CF[0x1E4E]="\u1E4F";
+	 CF[0x1E50]="\u1E51";
+	 CF[0x1E52]="\u1E53";
+	 CF[0x1E54]="\u1E55";
+	 CF[0x1E56]="\u1E57";
+	 CF[0x1E58]="\u1E59";
+	 CF[0x1E5A]="\u1E5B";
+	 CF[0x1E5C]="\u1E5D";
+	 CF[0x1E5E]="\u1E5F";
+	 CF[0x1E60]="\u1E61";
+	 CF[0x1E62]="\u1E63";
+	 CF[0x1E64]="\u1E65";
+	 CF[0x1E66]="\u1E67";
+	 CF[0x1E68]="\u1E69";
+	 CF[0x1E6A]="\u1E6B";
+	 CF[0x1E6C]="\u1E6D";
+	 CF[0x1E6E]="\u1E6F";
+	 CF[0x1E70]="\u1E71";
+	 CF[0x1E72]="\u1E73";
+	 CF[0x1E74]="\u1E75";
+	 CF[0x1E76]="\u1E77";
+	 CF[0x1E78]="\u1E79";
+	 CF[0x1E7A]="\u1E7B";
+	 CF[0x1E7C]="\u1E7D";
+	 CF[0x1E7E]="\u1E7F";
+	 CF[0x1E80]="\u1E81";
+	 CF[0x1E82]="\u1E83";
+	 CF[0x1E84]="\u1E85";
+	 CF[0x1E86]="\u1E87";
+	 CF[0x1E88]="\u1E89";
+	 CF[0x1E8A]="\u1E8B";
+	 CF[0x1E8C]="\u1E8D";
+	 CF[0x1E8E]="\u1E8F";
+	 CF[0x1E90]="\u1E91";
+	 CF[0x1E92]="\u1E93";
+	 CF[0x1E94]="\u1E95";
+	 CF[0x1E96]="\u0068\u0331";
+	 CF[0x1E97]="\u0074\u0308";
+	 CF[0x1E98]="\u0077\u030A";
+	 CF[0x1E99]="\u0079\u030A";
+	 CF[0x1E9A]="\u0061\u02BE";
+	 CF[0x1E9B]="\u1E61";
+	 CF[0x1EA0]="\u1EA1";
+	 CF[0x1EA2]="\u1EA3";
+	 CF[0x1EA4]="\u1EA5";
+	 CF[0x1EA6]="\u1EA7";
+	 CF[0x1EA8]="\u1EA9";
+	 CF[0x1EAA]="\u1EAB";
+	 CF[0x1EAC]="\u1EAD";
+	 CF[0x1EAE]="\u1EAF";
+	 CF[0x1EB0]="\u1EB1";
+	 CF[0x1EB2]="\u1EB3";
+	 CF[0x1EB4]="\u1EB5";
+	 CF[0x1EB6]="\u1EB7";
+	 CF[0x1EB8]="\u1EB9";
+	 CF[0x1EBA]="\u1EBB";
+	 CF[0x1EBC]="\u1EBD";
+	 CF[0x1EBE]="\u1EBF";
+	 CF[0x1EC0]="\u1EC1";
+	 CF[0x1EC2]="\u1EC3";
+	 CF[0x1EC4]="\u1EC5";
+	 CF[0x1EC6]="\u1EC7";
+	 CF[0x1EC8]="\u1EC9";
+	 CF[0x1ECA]="\u1ECB";
+	 CF[0x1ECC]="\u1ECD";
+	 CF[0x1ECE]="\u1ECF";
+	 CF[0x1ED0]="\u1ED1";
+	 CF[0x1ED2]="\u1ED3";
+	 CF[0x1ED4]="\u1ED5";
+	 CF[0x1ED6]="\u1ED7";
+	 CF[0x1ED8]="\u1ED9";
+	 CF[0x1EDA]="\u1EDB";
+	 CF[0x1EDC]="\u1EDD";
+	 CF[0x1EDE]="\u1EDF";
+	 CF[0x1EE0]="\u1EE1";
+	 CF[0x1EE2]="\u1EE3";
+	 CF[0x1EE4]="\u1EE5";
+	 CF[0x1EE6]="\u1EE7";
+	 CF[0x1EE8]="\u1EE9";
+	 CF[0x1EEA]="\u1EEB";
+	 CF[0x1EEC]="\u1EED";
+	 CF[0x1EEE]="\u1EEF";
+	 CF[0x1EF0]="\u1EF1";
+	 CF[0x1EF2]="\u1EF3";
+	 CF[0x1EF4]="\u1EF5";
+	 CF[0x1EF6]="\u1EF7";
+	 CF[0x1EF8]="\u1EF9";
+	 CF[0x1F08]="\u1F00";
+	 CF[0x1F09]="\u1F01";
+	 CF[0x1F0A]="\u1F02";
+	 CF[0x1F0B]="\u1F03";
+	 CF[0x1F0C]="\u1F04";
+	 CF[0x1F0D]="\u1F05";
+	 CF[0x1F0E]="\u1F06";
+	 CF[0x1F0F]="\u1F07";
+	 CF[0x1F18]="\u1F10";
+	 CF[0x1F19]="\u1F11";
+	 CF[0x1F1A]="\u1F12";
+	 CF[0x1F1B]="\u1F13";
+	 CF[0x1F1C]="\u1F14";
+	 CF[0x1F1D]="\u1F15";
+	 CF[0x1F28]="\u1F20";
+	 CF[0x1F29]="\u1F21";
+	 CF[0x1F2A]="\u1F22";
+	 CF[0x1F2B]="\u1F23";
+	 CF[0x1F2C]="\u1F24";
+	 CF[0x1F2D]="\u1F25";
+	 CF[0x1F2E]="\u1F26";
+	 CF[0x1F2F]="\u1F27";
+	 CF[0x1F38]="\u1F30";
+	 CF[0x1F39]="\u1F31";
+	 CF[0x1F3A]="\u1F32";
+	 CF[0x1F3B]="\u1F33";
+	 CF[0x1F3C]="\u1F34";
+	 CF[0x1F3D]="\u1F35";
+	 CF[0x1F3E]="\u1F36";
+	 CF[0x1F3F]="\u1F37";
+	 CF[0x1F48]="\u1F40";
+	 CF[0x1F49]="\u1F41";
+	 CF[0x1F4A]="\u1F42";
+	 CF[0x1F4B]="\u1F43";
+	 CF[0x1F4C]="\u1F44";
+	 CF[0x1F4D]="\u1F45";
+	 CF[0x1F50]="\u03C5\u0313";
+	 CF[0x1F52]="\u03C5\u0313\u0300";
+	 CF[0x1F54]="\u03C5\u0313\u0301";
+	 CF[0x1F56]="\u03C5\u0313\u0342";
+	 CF[0x1F59]="\u1F51";
+	 CF[0x1F5B]="\u1F53";
+	 CF[0x1F5D]="\u1F55";
+	 CF[0x1F5F]="\u1F57";
+	 CF[0x1F68]="\u1F60";
+	 CF[0x1F69]="\u1F61";
+	 CF[0x1F6A]="\u1F62";
+	 CF[0x1F6B]="\u1F63";
+	 CF[0x1F6C]="\u1F64";
+	 CF[0x1F6D]="\u1F65";
+	 CF[0x1F6E]="\u1F66";
+	 CF[0x1F6F]="\u1F67";
+	 CF[0x1F80]="\u1F00\u03B9";
+	 CF[0x1F81]="\u1F01\u03B9";
+	 CF[0x1F82]="\u1F02\u03B9";
+	 CF[0x1F83]="\u1F03\u03B9";
+	 CF[0x1F84]="\u1F04\u03B9";
+	 CF[0x1F85]="\u1F05\u03B9";
+	 CF[0x1F86]="\u1F06\u03B9";
+	 CF[0x1F87]="\u1F07\u03B9";
+	 CF[0x1F88]="\u1F00\u03B9";
+	 CF[0x1F89]="\u1F01\u03B9";
+	 CF[0x1F8A]="\u1F02\u03B9";
+	 CF[0x1F8B]="\u1F03\u03B9";
+	 CF[0x1F8C]="\u1F04\u03B9";
+	 CF[0x1F8D]="\u1F05\u03B9";
+	 CF[0x1F8E]="\u1F06\u03B9";
+	 CF[0x1F8F]="\u1F07\u03B9";
+	 CF[0x1F90]="\u1F20\u03B9";
+	 CF[0x1F91]="\u1F21\u03B9";
+	 CF[0x1F92]="\u1F22\u03B9";
+	 CF[0x1F93]="\u1F23\u03B9";
+	 CF[0x1F94]="\u1F24\u03B9";
+	 CF[0x1F95]="\u1F25\u03B9";
+	 CF[0x1F96]="\u1F26\u03B9";
+	 CF[0x1F97]="\u1F27\u03B9";
+	 CF[0x1F98]="\u1F20\u03B9";
+	 CF[0x1F99]="\u1F21\u03B9";
+	 CF[0x1F9A]="\u1F22\u03B9";
+	 CF[0x1F9B]="\u1F23\u03B9";
+	 CF[0x1F9C]="\u1F24\u03B9";
+	 CF[0x1F9D]="\u1F25\u03B9";
+	 CF[0x1F9E]="\u1F26\u03B9";
+	 CF[0x1F9F]="\u1F27\u03B9";
+	 CF[0x1FA0]="\u1F60\u03B9";
+	 CF[0x1FA1]="\u1F61\u03B9";
+	 CF[0x1FA2]="\u1F62\u03B9";
+	 CF[0x1FA3]="\u1F63\u03B9";
+	 CF[0x1FA4]="\u1F64\u03B9";
+	 CF[0x1FA5]="\u1F65\u03B9";
+	 CF[0x1FA6]="\u1F66\u03B9";
+	 CF[0x1FA7]="\u1F67\u03B9";
+	 CF[0x1FA8]="\u1F60\u03B9";
+	 CF[0x1FA9]="\u1F61\u03B9";
+	 CF[0x1FAA]="\u1F62\u03B9";
+	 CF[0x1FAB]="\u1F63\u03B9";
+	 CF[0x1FAC]="\u1F64\u03B9";
+	 CF[0x1FAD]="\u1F65\u03B9";
+	 CF[0x1FAE]="\u1F66\u03B9";
+	 CF[0x1FAF]="\u1F67\u03B9";
+	 CF[0x1FB2]="\u1F70\u03B9";
+	 CF[0x1FB3]="\u03B1\u03B9";
+	 CF[0x1FB4]="\u03AC\u03B9";
+	 CF[0x1FB6]="\u03B1\u0342";
+	 CF[0x1FB7]="\u03B1\u0342\u03B9";
+	 CF[0x1FB8]="\u1FB0";
+	 CF[0x1FB9]="\u1FB1";
+	 CF[0x1FBA]="\u1F70";
+	 CF[0x1FBB]="\u1F71";
+	 CF[0x1FBC]="\u03B1\u03B9";
+	 CF[0x1FBE]="\u03B9";
+	 CF[0x1FC2]="\u1F74\u03B9";
+	 CF[0x1FC3]="\u03B7\u03B9";
+	 CF[0x1FC4]="\u03AE\u03B9";
+	 CF[0x1FC6]="\u03B7\u0342";
+	 CF[0x1FC7]="\u03B7\u0342\u03B9";
+	 CF[0x1FC8]="\u1F72";
+	 CF[0x1FC9]="\u1F73";
+	 CF[0x1FCA]="\u1F74";
+	 CF[0x1FCB]="\u1F75";
+	 CF[0x1FCC]="\u03B7\u03B9";
+	 CF[0x1FD2]="\u03B9\u0308\u0300";
+	 CF[0x1FD3]="\u03B9\u0308\u0301";
+	 CF[0x1FD6]="\u03B9\u0342";
+	 CF[0x1FD7]="\u03B9\u0308\u0342";
+	 CF[0x1FD8]="\u1FD0";
+	 CF[0x1FD9]="\u1FD1";
+	 CF[0x1FDA]="\u1F76";
+	 CF[0x1FDB]="\u1F77";
+	 CF[0x1FE2]="\u03C5\u0308\u0300";
+	 CF[0x1FE3]="\u03C5\u0308\u0301";
+	 CF[0x1FE4]="\u03C1\u0313";
+	 CF[0x1FE6]="\u03C5\u0342";
+	 CF[0x1FE7]="\u03C5\u0308\u0342";
+	 CF[0x1FE8]="\u1FE0";
+	 CF[0x1FE9]="\u1FE1";
+	 CF[0x1FEA]="\u1F7A";
+	 CF[0x1FEB]="\u1F7B";
+	 CF[0x1FEC]="\u1FE5";
+	 CF[0x1FF2]="\u1F7C\u03B9";
+	 CF[0x1FF3]="\u03C9\u03B9";
+	 CF[0x1FF4]="\u03CE\u03B9";
+	 CF[0x1FF6]="\u03C9\u0342";
+	 CF[0x1FF7]="\u03C9\u0342\u03B9";
+	 CF[0x1FF8]="\u1F78";
+	 CF[0x1FF9]="\u1F79";
+	 CF[0x1FFA]="\u1F7C";
+	 CF[0x1FFB]="\u1F7D";
+	 CF[0x1FFC]="\u03C9\u03B9";
+	 CF[0x2126]="\u03C9";
+	 CF[0x212A]="\u006B";
+	 CF[0x212B]="\u00E5";
+	 CF[0x2160]="\u2170";
+	 CF[0x2161]="\u2171";
+	 CF[0x2162]="\u2172";
+	 CF[0x2163]="\u2173";
+	 CF[0x2164]="\u2174";
+	 CF[0x2165]="\u2175";
+	 CF[0x2166]="\u2176";
+	 CF[0x2167]="\u2177";
+	 CF[0x2168]="\u2178";
+	 CF[0x2169]="\u2179";
+	 CF[0x216A]="\u217A";
+	 CF[0x216B]="\u217B";
+	 CF[0x216C]="\u217C";
+	 CF[0x216D]="\u217D";
+	 CF[0x216E]="\u217E";
+	 CF[0x216F]="\u217F";
+	 CF[0x24B6]="\u24D0";
+	 CF[0x24B7]="\u24D1";
+	 CF[0x24B8]="\u24D2";
+	 CF[0x24B9]="\u24D3";
+	 CF[0x24BA]="\u24D4";
+	 CF[0x24BB]="\u24D5";
+	 CF[0x24BC]="\u24D6";
+	 CF[0x24BD]="\u24D7";
+	 CF[0x24BE]="\u24D8";
+	 CF[0x24BF]="\u24D9";
+	 CF[0x24C0]="\u24DA";
+	 CF[0x24C1]="\u24DB";
+	 CF[0x24C2]="\u24DC";
+	 CF[0x24C3]="\u24DD";
+	 CF[0x24C4]="\u24DE";
+	 CF[0x24C5]="\u24DF";
+	 CF[0x24C6]="\u24E0";
+	 CF[0x24C7]="\u24E1";
+	 CF[0x24C8]="\u24E2";
+	 CF[0x24C9]="\u24E3";
+	 CF[0x24CA]="\u24E4";
+	 CF[0x24CB]="\u24E5";
+	 CF[0x24CC]="\u24E6";
+	 CF[0x24CD]="\u24E7";
+	 CF[0x24CE]="\u24E8";
+	 CF[0x24CF]="\u24E9";
+	 CF[0xFB00]="\u0066\u0066";
+	 CF[0xFB01]="\u0066\u0069";
+	 CF[0xFB02]="\u0066\u006C";
+	 CF[0xFB03]="\u0066\u0066\u0069";
+	 CF[0xFB04]="\u0066\u0066\u006C";
+	 CF[0xFB05]="\u0073\u0074";
+	 CF[0xFB06]="\u0073\u0074";
+	 CF[0xFB13]="\u0574\u0576";
+	 CF[0xFB14]="\u0574\u0565";
+	 CF[0xFB15]="\u0574\u056B";
+	 CF[0xFB16]="\u057E\u0576";
+	 CF[0xFB17]="\u0574\u056D";
+	 CF[0xFF21]="\uFF41";
+	 CF[0xFF22]="\uFF42";
+	 CF[0xFF23]="\uFF43";
+	 CF[0xFF24]="\uFF44";
+	 CF[0xFF25]="\uFF45";
+	 CF[0xFF26]="\uFF46";
+	 CF[0xFF27]="\uFF47";
+	 CF[0xFF28]="\uFF48";
+	 CF[0xFF29]="\uFF49";
+	 CF[0xFF2A]="\uFF4A";
+	 CF[0xFF2B]="\uFF4B";
+	 CF[0xFF2C]="\uFF4C";
+	 CF[0xFF2D]="\uFF4D";
+	 CF[0xFF2E]="\uFF4E";
+	 CF[0xFF2F]="\uFF4F";
+	 CF[0xFF30]="\uFF50";
+	 CF[0xFF31]="\uFF51";
+	 CF[0xFF32]="\uFF52";
+	 CF[0xFF33]="\uFF53";
+	 CF[0xFF34]="\uFF54";
+	 CF[0xFF35]="\uFF55";
+	 CF[0xFF36]="\uFF56";
+	 CF[0xFF37]="\uFF57";
+	 CF[0xFF38]="\uFF58";
+	 CF[0xFF39]="\uFF59";
+	 CF[0xFF3A]="\uFF5A";
+// 785 case foldings total
+}
+}
--- a/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java
+++ b/tools/unicodetools/com/ibm/text/UCA/GenOverlap.java
@ -0,0 +1,490 @@
+package com.ibm.text.UCA;
+
+import java.util.*;
+import java.io.*;
+import com.ibm.text.UCD.*;
+import com.ibm.text.utility.*;
+import com.ibm.text.UTF16;
+
+public class GenOverlap {
+  
+    static Map completes = new TreeMap();
+    static Map back = new HashMap();
+    static Map initials = new HashMap();
+    static int[] ces = new int[50];
+    static UCA collator;
+    static UCD ucd;
+    static Normalizer nfd;
+    static Normalizer nfkd;
+    
+    public static void test(UCA collatorIn) throws Exception {
+        collator = collatorIn;
+            
+        CEList.main(null);
+            
+        System.out.println("# Overlap");
+        System.out.println("# Generated " + new Date());
+            
+        ucd = UCD.make();
+
+        nfd = new Normalizer(Normalizer.NFD);
+        nfkd = new Normalizer(Normalizer.NFKD);
+            
+        UCA.CollationContents cc = collator.getCollationContents(UCA.FIXED_CE, nfd);
+            
+        // store data for faster lookup
+            
+        System.out.println("# Gathering Data");
+        int counter = 0;
+            
+        int[] lenArray = new int[1];
+            
+        while (true) {
+                
+            Utility.dot(counter++);
+            String s = cc.next(ces, lenArray);
+            if (s == null) break;
+            int len = lenArray[0];
+                  
+            CEList currCEList = new CEList(ces, 0, len);
+            addString(s, currCEList);
+        }
+        
+        for (int cp = 0x10000; cp <= 0x10FFFF; ++cp) {
+            if (!ucd.isRepresented(cp)) continue;
+            byte decompType = ucd.getDecompositionType(cp);
+            if (decompType >= UCD.COMPATIBILITY) {
+                String decomp = nfkd.normalize(cp);
+                CEList celist = getCEList(cp, decomp, decompType);
+                addString(decomp, celist);
+                System.out.println("Adding: " + ucd.getCodeAndName(cp) + "\t" + celist);
+            }
+        }
+            
+        Utility.fixDot();
+        System.out.println("# Completes Count: " + completes.size());
+        System.out.println("# Initials Count: " + initials.size());
+        System.out.println("# Writing Overlaps");
+            
+        // simpleList();
+        fullCheck();
+    }
+    
+    public static void addString(String s, CEList currCEList) {
+        back.put(s, currCEList);
+        completes.put(currCEList, s);
+              
+        for (int i = 1; i < currCEList.length(); ++i) {
+            CEList start = currCEList.start(i);
+            Set bag = (Set) initials.get(start);
+            if (bag == null) {
+                bag = new TreeSet();
+                initials.put(start, bag);
+            }
+            bag.add(s);
+        }
+    }
+    
+  
+    static void simpleList() {
+        Iterator it = completes.keySet().iterator();
+        int counter = 0;
+        int foundCount = 0;
+            
+        while (it.hasNext()) {
+            Utility.dot(counter++);
+                
+            // see if the ces for the current element are the start of something else
+            CEList key = (CEList) it.next();
+            String val = (String) completes.get(key);
+            Set probe = (Set) initials.get(key);
+              
+            if (probe != null) {
+            Utility.fixDot();
+            foundCount++;
+            System.out.println("Possible Overlap: ");
+            System.out.println("  " + ucd.getCodeAndName(val));
+            System.out.println("\t" + key);
+                
+            Iterator it2 = probe.iterator();
+            int count2 = 0;
+            while (it2.hasNext()) {
+                String match = (String) it2.next();
+                CEList ceList = (CEList) back.get(match);
+                System.out.println((count2++) + ".  " + ucd.getCodeAndName(match));
+                System.out.println("\t" + ceList);
+            }
+            }
+        }
+        System.out.println("# Found Count: " + foundCount);
+    }
+    
+    static boolean PROGRESS = false;
+      
+    static void fullCheck() throws IOException {
+        PrintWriter log = Utility.openPrintWriter("Overlap.html");
+        PrintWriter simpleList = Utility.openPrintWriter("Overlap.txt");
+        
+        Iterator it = completes.keySet().iterator();
+        int counter = 0;
+        int foundCount = 0;
+            
+        String [] goalChars = new String[1];
+        String [] matchChars = new String[1];
+        
+        // CEList show = getCEList("\u2034");
+        log.println("<html><head>");
+        log.println("<meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
+        log.println("<title>New Page 1</title>");
+        log.println("<style><!--");
+        log.println("table        { border-style: solid; border-width: 1 }");
+        log.println("td           { border-style: solid; border-width: 1 }");
+        log.println("--></style>");
+        log.println("</head><body><table>");
+
+            
+        while (it.hasNext()) {
+            Utility.dot(counter++);
+            CEList key = (CEList) it.next();
+            if (key.length() < 2) continue;
+            
+            String val = (String) completes.get(key);
+            goalChars[0] = "";
+            matchChars[0] = "";
+            if (matchWhole(val, key, 0, goalChars, matchChars)) {
+                
+                simpleList.println(ucd.getCodeAndName(val));
+                
+                goalChars[0] = val + goalChars[0]; // fix first char
+                
+                if (!getCEList(goalChars[0]).equals(getCEList(matchChars[0]))) {
+                    log.println("<tr><td colspan='6'>WARNING:" + getCEList(matchChars[0]) + "</td></tr>");
+                }
+                foundCount++;
+                log.println("<tr><td>" + val + "</td>");
+                log.println("<td>" + goalChars[0] + "</td>");
+                log.println("<td>" + matchChars[0] + "</td>");
+                log.println("<td>" + ucd.getCodeAndName(goalChars[0]) + "</td>");
+                log.println("<td>" + ucd.getCodeAndName(matchChars[0]) + "</td>");
+                log.println("<td>" + getCEList(goalChars[0]) + "</td></tr>");
+                //log.println("\t" + );
+            }
+        }
+        log.println("</tr></table>Number of Overlapping characters: " + foundCount + "</body>");
+        log.close();
+        simpleList.close();
+    }
+  
+    static private CEList getCEList(String s) {
+        int len = collator.getCEs(s, true, ces);
+        return new CEList(ces, 0, len);
+    }
+  
+    static private CEList getCEList(int originalChar, String s, byte type) {
+        int len = collator.getCEs(s, true, ces);
+        for (int i = 0; i < len; ++i) {
+            ces[i] = UCA.makeKey(UCA.getPrimary(ces[i]), 
+                UCA.getSecondary(ces[i]),
+                CEList.remap(originalChar, type, UCA.getTertiary(ces[i])));
+        }
+        return new CEList(ces, 0, len);
+    }
+  
+    static boolean matchWhole(String goalStr, CEList goal, int depth, String[] goalChars, String[] otherChars) {
+        
+        if (PROGRESS) System.out.println(Utility.repeat(". ", depth) + "Trying: " + ucd.getCodeAndName(goalStr) + ", " + goal);
+        
+        // to stop infinite loops, we limit the depth to 5
+        if (depth > 5) {
+            if (PROGRESS) System.out.println(Utility.repeat(". ", depth) + "stack exhausted");
+            return false;
+        }
+        
+        String match;
+        
+        // There are 3 possible conditions. Any of which work.
+        
+        // To eliminate double matches at the top level, we test depth > 0
+        
+        if (depth > 0) {
+        
+            // Condition 1.
+            // we have an exact match
+            
+            match = (String) completes.get(goal);
+            if (match != null) {
+                if (PROGRESS) System.out.println(Utility.repeat(". ", depth) + "Matches Exactly: " + ucd.getCodeAndName(match));
+                otherChars[0] = match + otherChars[0];
+                if (PROGRESS) System.out.println(Utility.repeat(". ", depth)
+                    + ucd.getCode(goalChars[0])
+                    + " / " + ucd.getCode(otherChars[0])
+                );
+                return true;
+            }
+            
+            
+            // Condition 2
+            // this whole string matches some initial portion of another string
+            // AND the remainder of that other string also does a matchWhole.
+            // Example: if we get the following, we search for a match to "de"
+            // abc...
+            // abcde
+            // If we find a match, we append to the strings, the string for abc
+            // and the one for abcde
+            
+            Set probe = (Set) initials.get(goal);
+            if (probe != null) {
+                Iterator it2 = probe.iterator();
+                while (it2.hasNext()) {
+                    match = (String) it2.next();
+                    if (PROGRESS) System.out.println(Utility.repeat(". ", depth) + "Matches Longer: " + ucd.getCodeAndName(match)
+                        + "\t\tswitching");
+                    CEList trail = ((CEList) back.get(match)).end(goal.length());
+                    boolean doesMatch = matchWhole(match, trail, depth+1, otherChars, goalChars);
+                    if (doesMatch) {
+                        otherChars[0] = match + otherChars[0];
+                        if (PROGRESS) System.out.println(Utility.repeat(". ", depth)
+                            + ucd.getCode(goalChars[0])
+                            + " / " + ucd.getCode(otherChars[0])
+                        );
+                        return true;
+                    }
+                }
+            }
+        }
+        
+        // Condition 3
+        // the first part of this string matches a whole other string
+        // and the remainder of this string also does a matchWhole
+        // Example: if we get the following, we search for a match to "de"
+        // abcde..
+        // abc..
+        // if we find a match
+
+        for (int i = goal.length() - 1; i > 0; --i) {
+            CEList first = goal.start(i);
+            match = (String) completes.get(first);
+            if (match != null) {
+                if (PROGRESS) System.out.println(Utility.repeat(". ", depth) + "Matches Shorter: " + ucd.getCodeAndName(match));
+                boolean doesMatch = matchWhole("", goal.end(i), depth+1, goalChars, otherChars);
+                if (doesMatch) {
+                    otherChars[0] = match + otherChars[0];
+                    if (PROGRESS) System.out.println(Utility.repeat(". ", depth)
+                        + ucd.getCode(goalChars[0])
+                        + " / " + ucd.getCode(otherChars[0])
+                    );
+                    return true;
+                }
+            }
+        }
+        
+        // if we get this far, we failed.
+        
+        return false;
+    }
+    
+    public static void generateRevision (UCA collatorIn) throws Exception {
+        generateRevision(collatorIn, false);
+        generateRevision(collatorIn, true);
+    }
+        
+    public static void generateRevision (UCA collatorIn, boolean doMax) throws Exception {
+        collator = collatorIn;
+            
+        CEList.main(null);
+            
+        System.out.println("# Generate");
+        System.out.println("# Generated " + new Date());
+            
+        ucd = UCD.make();
+
+        nfd = new Normalizer(Normalizer.NFD);
+        nfkd = new Normalizer(Normalizer.NFKD);
+            
+        UCA.CollationContents cc = collator.getCollationContents(UCA.FIXED_CE, nfd);
+            
+        // store data for faster lookup
+            
+        System.out.println("# Gathering Data");
+        int counter = 0;
+            
+        int[] lenArray = new int[1];
+        
+        Set list = new TreeSet();
+        Map newCollisions = new HashMap();
+        Map oldCollisions = new HashMap();
+        Map newProblems = new TreeMap();
+        Map oldProblems = new TreeMap();
+        
+        CEList nullCEList = new CEList(new int[1]);
+            
+        while (true) {
+            Utility.dot(counter++);
+            String str = cc.next(ces, lenArray);
+            if (str == null) break;
+            int len = lenArray[0];
+                  
+            CEList oldList = new CEList(ces, 0, len);
+            
+            CEList newList = new CEList(ces,0,0);
+            int cp;
+            for (int i = 0; i < str.length(); i += UTF16.getCharCount(cp)) {
+                cp = UTF16.charAt(str, i);
+                if (0xFF67 <= cp && cp <= 0xFF6F) {
+                    System.out.println("debug");
+                }
+                boolean mashLast = false;
+                if (nfkd.normalizationDiffers(cp)) {
+                    String decomp = nfkd.normalize(cp);
+                    String canon = nfd.normalize(cp);
+                    len = collator.getCEs(decomp, true, ces);
+                    if (!decomp.equals(canon)) {
+                        byte type = ucd.getDecompositionType(cp);
+                        for (int j = 0; j < len; ++j) {
+                            int p = (i == 0 && decomp.length() > 1 && decomp.charAt(0) == ' ' ? 0x20A : UCA.getPrimary(ces[j]));
+                            int s = UCA.getSecondary(ces[j]);
+                            boolean needsFix = (s != 0x20 && p != 0);
+                            if (needsFix) ++len;
+                            int t = (doMax && len > 1 && j == len-1 ? 0x1F : CEList.remap(cp, type, UCA.getTertiary(ces[j])));
+                            if (needsFix) {
+                                ces[j++] = UCA.makeKey(p, 0x20, t);             // Set Extra
+                                System.arraycopy(ces, j, ces, j+1, len - j);    // Insert HOLE!
+                                p = 0;
+                            }
+                            ces[j] = UCA.makeKey(p, s, t);
+                        }
+                    }
+                } else {
+                    len = collator.getCEs(UTF16.valueOf(cp), true, ces);
+                }
+                CEList inc = new CEList(ces, 0, len);
+                
+                if (cp == 0xFF71 || cp == 0xFF67) {
+                    System.out.println("  String: " + ucd.getCodeAndName(cp));
+                    System.out.println("  Type: " + ucd.getDecompositionTypeID(cp));
+                    System.out.println("  xxx: " + inc);
+                }
+                
+                newList = newList.append(inc);
+                
+            }
+            if (newList.length() == 0) newList = nullCEList;
+            if (oldList.length() == 0) oldList = nullCEList;
+            
+            if (!newList.equals(oldList)) {
+                /*
+                System.out.println("String: " + ucd.getCodeAndName(str));
+                System.out.println("\tOld: " + oldList);
+                System.out.println("\tNew: " + newList);
+                */
+                list.add(new Pair(newList, new Pair(str, oldList)));
+            }
+            
+            // check for collisions
+            if (str.equals("\u206F")) {
+                System.out.println("debug");
+            }
+            Object probe = newCollisions.get(newList);
+            if (probe == null) {
+                newCollisions.put(newList, str);
+            } else {
+                newProblems.put(str, new Pair((String)probe, newList));
+            }
+  
+            probe = oldCollisions.get(oldList);
+            if (probe == null) {
+                oldCollisions.put(oldList, str);
+            } else {
+                oldProblems.put(str, new Pair((String)probe, oldList));
+            }
+            
+        }
+        
+        Set newKeys = new TreeSet(newProblems.keySet());
+        Set oldKeys = new TreeSet(oldProblems.keySet());
+        Set joint = new TreeSet(newKeys);
+        joint.retainAll(oldKeys);
+        newKeys.removeAll(joint);
+        oldKeys.removeAll(joint);
+        
+        PrintWriter log = Utility.openPrintWriter("UCA-old-vs-new" + (doMax ? "-MAX.txt" : ".txt"));
+        Iterator it = list.iterator();
+        int last = -1;
+        while (it.hasNext()) {
+            Utility.dot(counter++);
+            Pair value = (Pair) it.next();
+            CEList newList = (CEList)value.first;
+            int cur = UCA.getPrimary(newList.at(0));
+            if (cur != last) {
+                log.println();
+                last = cur;
+            }
+            Pair v2 = (Pair) value.second;
+            String ss = (String)v2.first;
+            log.println(ucd.getCodeAndName(ss) + "\t\t" + ucd.getDecompositionTypeID(ss.charAt(0)));
+            log.println("\tnew:\t" + value.first);
+            log.println("\told:\t" + v2.second);
+        }
+        
+        /*
+        log.println();
+        log.println("New Collisions: " + newKeys.size());
+        it = newKeys.iterator();
+        while (it.hasNext()) {
+            String key = (String) it.next();
+            CEList cel = (CEList) newProblems.get(key);
+            String other = (String) newCollisions.get(cel);
+            log.println(ucd.getCodeAndName(key) + " collides with " + ucd.getCodeAndName(other));
+            log.println("\t" + cel);
+        }
+        
+        log.println("Removed Collisions: " + oldKeys.size());
+        it = oldKeys.iterator();
+        while (it.hasNext()) {
+            String key = (String) it.next();
+            CEList cel = (CEList) oldProblems.get(key);
+            String other = (String) oldCollisions.get(cel);
+            log.println(ucd.getCodeAndName(key) + " collides with " + ucd.getCodeAndName(other));
+            log.println("\t" + cel);
+        }
+        */
+        
+        showCollisions(log, "New Collisions:", newKeys, newProblems);
+        showCollisions(log, "Old Collisions:", oldKeys, oldProblems);
+        showCollisions(log, "In Both:", joint, oldProblems);
+        log.close();
+    }
+    
+    static void showCollisions(PrintWriter log, String title, Set bad, Map probs) {
+        log.println();
+        log.println(title + bad.size());
+        Iterator it = bad.iterator();
+        Set lister = new TreeSet();
+        
+        while (it.hasNext()) {
+            String key = (String) it.next();
+            Pair pair = (Pair) probs.get(key);
+            String other = (String) pair.first;
+            CEList cel = (CEList) pair.second;
+            if (key.equals("\u0001")) {
+                System.out.println("debug");
+            }
+            lister.add(new Pair(cel, ucd.getCodeAndName(key) + ",\t" + ucd.getCodeAndName(other)));
+        }
+        
+        it = lister.iterator();
+        int last = -1;
+        while (it.hasNext()) {
+            Pair pair = (Pair) it.next();
+            CEList cel = (CEList) pair.first;
+            int curr = UCA.getPrimary(cel.at(0));
+            if (curr != last) {
+                last = curr;
+                log.println();
+            }
+            log.println("Collision between: " + pair.second);
+            log.println("\t" + pair.first);
+        }
+        log.flush();
+    }
+}
--- a/tools/unicodetools/com/ibm/text/UCA/RuleComparator.java
+++ b/tools/unicodetools/com/ibm/text/UCA/RuleComparator.java
@ -0,0 +1,54 @@
+package com.ibm.text.UCA;
+
+import com.ibm.text.UCD.*;
+import com.ibm.text.utility.*;
+
+public final class RuleComparator implements java.util.Comparator {
+    
+    public int compare(Object s, Object t) {
+        String ss = (String)s;
+        String tt = (String)t;
+        
+        // compare just the initial portions of each level, FIRST
+        // only if there is a difference outside of the initial level do we stop
+        // we assume that there are the same number of levels!!
+        
+        int si = 0;
+        int ti = 0;
+        int result = 0;
+        try {
+            while (si < ss.length() && ti < tt.length()) {
+                char cs = ss.charAt(si++);
+                char ct = tt.charAt(ti++);
+                
+                if (cs == ct) continue;
+                /*
+                if (cs == 0) {
+                    if (result == 0) result = -1;
+                    while (ct != 0 && ti < tt.length()) {
+                        ct = tt.charAt(ti++);
+                    }
+                    continue;
+                }
+                if (ct == 0) {
+                    if (result == 0) result = 1;
+                    while (cs != 0 && si < ss.length()) {
+                        cs = ss.charAt(si++);
+                    }
+                    continue;
+                }
+                */
+                if (cs < ct) return -1;
+                return  1;
+            }
+        } catch (StringIndexOutOfBoundsException e) {
+            System.out.println("WHOOPS: ");
+            System.out.println(si + ", " + Utility.hex(ss));
+            System.out.println(ti + ", " + Utility.hex(tt));
+        }
+        if (result != 0) return result;
+        if (ss.length() > tt.length()) return 1;
+        if (ss.length() < tt.length()) return -1;
+        return 0;
+    }
+}
--- a/tools/unicodetools/com/ibm/text/UCA/UCA.java
+++ b/tools/unicodetools/com/ibm/text/UCA/UCA.java
--- a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
--- a/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java
--- a/tools/unicodetools/com/ibm/text/UCD/BuildNames.java
+++ b/tools/unicodetools/com/ibm/text/UCD/BuildNames.java
@ -0,0 +1,538 @@
+package com.ibm.text.UCD;
+
+import java.io.IOException;
+//import com.ibm.text.unicode.UInfo;
+import java.util.*;
+import java.io.*;
+//import java.text.*;
+
+import com.ibm.text.utility.*;
+
+
+public class BuildNames implements UCD_Types {
+    
+    static final boolean DEBUG = true;
+    
+    static UCD ucd;
+    
+    public static void main(String[] args) throws IOException {
+        
+        ucd = UCD.make();
+    
+        collectWords();
+    }
+    
+    static Set words = new TreeSet(new LengthFirstComparator());
+    static Set lines = new TreeSet(new LengthFirstComparator());
+    static int[] letters = new int[128];
+    
+    static void stash(String word) {
+        words.add(word);
+        for (int i = 0; i < word.length(); ++i) {
+            letters[word.charAt(i)]++;
+        }
+    }
+    
+    static String transform(String line) {
+        StringBuffer result = new StringBuffer();
+        boolean changed = false;
+        for (int i = 0; i < line.length(); ++i) {
+            char c = line.charAt(i);
+            
+            if (c == '-' || c == '<' || c == '>') {
+                if (result.length() > 0 && result.charAt(result.length()-1) != ' ') result.append(' ');
+                result.append(c);
+                if (i + 1 < line.length() && line.charAt(i+1) != ' ') result.append(' ');
+                changed = true;
+                continue;
+            }
+            
+            if ('a' <= c && c <= 'z') {
+                result.append((char)(c - 'a' + 'A'));
+                changed = true;
+                continue;
+            }
+            if ('0' <= c && c <= '9') {
+                result.append('*').append((char)(c - '0' + 'A'));
+                changed = true;
+                continue;
+            }                
+            result.append(c);
+        }
+        if (!changed) return line;
+        return result.toString().trim();
+    }
+    
+    static void collectWords() throws IOException {
+        
+        System.out.println("Gathering data");
+        //Counter counter = new Counter();
+        String[] parts = new String[100];
+        //int total = 0;
+        int used = 0;
+        int sum = 0;
+        for (int i = 0; i < 0x10FFFF; ++i) {
+            if (ucd.hasComputableName(i)) continue;
+            String name = transform(ucd.getName(i));
+            
+            
+            sum += name.length();
+            used++;
+            
+            // replace numbers & letters
+            
+            int len = Utility.split(name, ' ', parts);
+            for (int j = 0; j < len; ++j) {
+                stash(parts[j]);
+            }
+            
+            lines.add(name);
+        }
+        System.out.println("Overhead: " + (lastLink - used) + ", " + ((lastLink - used) * 100 / used) + "%");
+        System.out.println("Strings: " + sum + ", " + (lastLink*4));
+        
+        System.out.println();
+        System.out.println("Compacting Words");
+        System.out.println();
+        Iterator it = words.iterator();
+        int i = 0;
+        while (it.hasNext()) {
+            String s = (String) it.next();
+            int test = CompactName.addWord(s);
+            String round = CompactName.stringFromToken(test);
+            boolean goesRound = round.equals(s);
+            if (false || !goesRound) System.out.println("Compacting: '" + s + "': " + i++ + "(" + CompactName.lastToken + ")"
+                + (goesRound ? ": NO RT: '" + round + "'" : ""));
+        }
+        
+        System.out.println();
+        System.out.println("Compacting Lines");
+        System.out.println();
+        CompactName.startLines();
+        it = lines.iterator();
+        i = 0;
+        while (it.hasNext()) {
+            String s = (String) it.next();
+            if (s.equals("< BELL >")) {
+                System.out.println("DEBUG");
+            }
+            int test = CompactName.addLine(s);
+            String round = CompactName.stringFromToken(test);
+            boolean goesRound = round.equals(s);
+            if (false || !goesRound) System.out.println("Compacting: '" + s + "': " + i++ + "(" + CompactName.lastToken + ")"
+                + (!goesRound ? ": NO RT: '" + round + "'" : ""));
+        }
+        
+        /*System.out.println("Printing Compact Forms");
+        for (int i = 0; i < CompactName.lastToken; ++i) {
+            String s = CompactName.stringFromToken(i);
+            System.out.println(i + ": '" + s + "'");
+        }*/
+        
+        System.out.println("Strings: " + sum
+            + ", " + (CompactName.spacedMinimum*4)
+            + ", " + (CompactName.lastToken*4)
+        );
+        
+    }
+    /*
+        Set stuff = new TreeSet();
+        for (int i = 0; i < letters.length; ++i) {
+            if (letters[i] != 0) {
+                stuff.add(new Integer((letters[i] << 8) + i));
+            }
+        }
+        
+        it = stuff.iterator();
+        while (it.hasNext()) {
+            int in = ((Integer) it.next()).intValue();
+            System.out.println((char)(in & 0xFF) + ":\t" + String.valueOf(in >> 8));
+        }
+            int r = addString(name);
+            if (!DEBUG && !rname.equals(name)) {
+                System.out.println("\tNo Round Trip: '" + rname + "'");
+            }
+    */
+    
+    static Map stringToInt = new HashMap();
+    static Map intToString = new HashMap();
+    
+    static final int[] remap = new int['Z'+1];
+    static final int maxToken;
+    
+    static {
+        int counter = 1;
+        remap[' '] = counter++;
+        remap['-'] = counter++;
+        remap['>'] = counter++;
+        remap['<'] = counter++;
+        for (int i = 'A'; i <= 'Z'; ++i) {
+            remap[i] = counter++;
+        }
+        for (int i = '0'; i <= '9'; ++i) {
+            remap[i] = counter++;
+        }
+        maxToken = counter;
+    }
+    
+    static final String[] unmap = new String[maxToken];
+    static {
+        unmap[0] = "";
+        for (int i = 0; i < remap.length; ++i) {
+            int x = remap[i];
+            if (x != 0) unmap[x] = String.valueOf((char)i);
+        }
+    }
+    
+    static int[] links = new int[40000];
+    static final int linkStart = 0;
+    static int lastLink = 0;
+    static final int LITERAL_BOUND = 0x7FFF - maxToken * maxToken;
+    
+    static boolean isLiteral(int i) {
+        return (i & 0x7FFF) > LITERAL_BOUND;
+    }
+    
+    static String lookup(int i) {
+        String result;
+        boolean trailingSpace = false;
+        if ((i & 0x8000) != 0) {
+            i ^= 0x8000;
+            trailingSpace = true;
+        }
+        if (i > LITERAL_BOUND) {
+            i = i - LITERAL_BOUND;
+            int first = i / maxToken;
+            int second = i % maxToken;
+            result = unmap[first] + unmap[second];
+        } else {
+            int value = links[i];
+            int lead = value >>> 16;
+            int trail = value & 0xFFFF;
+            //if (DEBUG) System.out.println("lead: " + lead + ", trail: " + trail);
+            result = lookup(lead) + lookup(trail);
+        }
+        if (trailingSpace) result += ' ';
+        if (DEBUG) System.out.println("token: " + i + " => '" + result + "'");
+        return result;
+    }
+    
+    static int getInt(String s) {
+        if (s.length() < 3) {
+            if (s.length() == 0) return 0;
+            int first = s.charAt(0);
+            int second = s.length() > 1 ? s.charAt(1) : 0;
+            return LITERAL_BOUND + (remap[first] * maxToken + remap[second]);
+        }
+        Object in = stringToInt.get(s);
+        if (in == null) return -1;
+        return ((Integer)in).intValue();
+    }
+    
+    static int putString(String s, int lead, int trail) {
+        Object in = stringToInt.get(s);
+        if (in != null) throw new IllegalArgumentException();
+        int value = (lead << 16) + (trail & 0xFFFF);
+        int result = lastLink;
+        links[lastLink++] = value;
+        
+        if (DEBUG) {
+            System.out.println("'" + s + "', link[" + result + "] = lead: " + lead + ", trail: " + trail);
+            String roundTrip = lookup(result);
+            if (!roundTrip.equals(s)) {
+                System.out.println("\t*** No Round Trip: '" + roundTrip + "'");
+            }
+        }
+        stringToInt.put(s, new Integer(result));
+        return result;
+    }
+    
+    // s cannot have a trailing space. Must be <,>,-,SPACE,0-9,A-Z
+    static int addString(String s) {
+        int result = getInt(s);
+        if (result != -1) return result;
+        int limit = s.length() - 1;
+        int bestLen = 0;
+        int best_i = 0;
+        int bestSpaceLen = 0;
+        int bestSpace_i = 0;
+        int lastSpace = -1;
+        int spaceBits;
+        int endOfFirst;
+        
+        // invariant. We break after a space if there is one.
+        
+        for (int i = 1; i < limit; ++i) {
+            char c = s.charAt(i-1);
+            spaceBits = 0;
+            endOfFirst = i;
+            if (c == ' ') {
+                lastSpace = i;
+                endOfFirst--;
+                spaceBits = 0x8000;
+            }
+            
+            String firstPart = s.substring(0, endOfFirst);
+            String lastPart = s.substring(i);
+            if (firstPart.equals("<START OF ")) {
+                System.out.println("HUH");
+            }
+            int lead = getInt(firstPart);
+            int trail = getInt(lastPart);
+            if (lead >= 0 && trail >= 0) { // if both match, return immediately with pair
+                if (DEBUG) System.out.println(s + " => '" + firstPart + (spaceBits != 0 ? "*" : "")
+                    + "' # '" + lastPart + "' MATCH BOTH");
+                return putString(s, spaceBits | lead, trail);
+            }
+            if (!isLiteral(lead)) {
+                if (i > bestLen) {
+                    bestLen = i;
+                    best_i = i;
+                }
+                if (i > bestSpaceLen && c == ' ') {
+                    bestSpaceLen = i;
+                    bestSpace_i = i + 1;
+                }                    
+            }
+            int end_i = s.length() - i;
+            if (!isLiteral(trail)) {
+                if (end_i > bestLen) {
+                    bestLen = end_i;
+                    best_i = i;
+                }
+                if (end_i > bestSpaceLen && c == ' ') {
+                    bestSpaceLen = end_i;
+                    bestSpace_i = i + 1;
+                }
+            }
+        }
+        if (lastSpace >= 0) {
+            bestLen = bestSpaceLen;
+            best_i = bestSpace_i;
+        }
+        
+        spaceBits = 0;
+        
+        if (bestLen > 0) { // if one matches, recurse -- and return pair
+            endOfFirst = best_i;
+            if (lastSpace > 0) {
+                --endOfFirst;
+                spaceBits = 0x8000;
+            }
+            String firstPart = s.substring(0, endOfFirst);
+            String lastPart = s.substring(best_i);
+            int lead = getInt(firstPart);
+            int trail = getInt(lastPart);
+            if (lead >= 0) {
+                if (DEBUG) System.out.println(s + " => '" + firstPart + (spaceBits != 0 ? "*" : "")
+                    + "' # '" + lastPart + "' MATCH FIRST");
+                return putString(s, spaceBits | lead, addString(lastPart));
+            } else {
+                if (DEBUG) System.out.println(s + " => '" + firstPart + (spaceBits != 0 ? "*" : "")
+                    + "' # '" + lastPart + "' MATCH SECOND");
+                return putString(s, spaceBits | addString(firstPart), trail);
+            }
+        }
+        // otherwise, we failed to find anything. Then break before the last word, if there is one
+        // otherwise break in the middle (but at even value)
+        
+        
+        if (lastSpace >= 0) {
+            best_i = lastSpace;
+            endOfFirst = lastSpace - 1;
+            spaceBits = 0x8000;
+        } else {
+            endOfFirst = best_i = ((s.length() + 1) / 4) * 2;
+        }
+        String firstPart = s.substring(0, endOfFirst);
+        String lastPart = s.substring(best_i);
+        if (DEBUG) System.out.println(s + " => '" + firstPart + (spaceBits != 0 ? "*" : "")
+            + "' # '" + lastPart + "' FALLBACK");
+        return putString(s, spaceBits | addString(firstPart), addString(lastPart));
+    }
+    
+    /*
+    static int addCompression(String s) {
+        Object in = stringToInt.get(s);
+        if (in != null) return ((Integer) in).intValue();
+        // find best match, recursively
+        int bestBreak = -1;
+        boolean pickFirst = false;
+        for (int i = 1; i < s.length() - 1; ++i) {
+            char c = s.charAt(i);
+            if (c == ' ' || c == '-') {
+                Object pos1 = stringToInt.get(s.substring(0,i+1));
+                //Object pos23 = stringToInt.get(s..substring(i));
+                
+
+                    if (pos2 >= 0 && pos3 >= 0) {
+                        fullToCompressed.put(value, new Integer(index + reserved));
+                        continue main;
+                    }
+                    if (pos2 >= 0) {
+                         if (k > bestBreak) {
+                            bestBreak = k;
+                            pickFirst = true;
+                         }
+                    } else if (pos3 >= 0) {
+                        if (value.length() - k > bestBreak) {
+                            bestBreak = k;
+                            pickFirst = false;
+                        }
+                    }
+                }
+                
+            }
+        }
+    }
+        
+    static void gatherData() throws IOException {
+        System.out.println("Gathering data");
+        Counter counter = new Counter();
+        String[] parts = new String[100];
+        String[] parts2 = new String[100];
+        int total = 0;
+        for (int i = 0; i < 0x10FFFF; ++i) {
+            //if ((i & 0xFF) == 0) System.out.println(Utility.hex(i));
+            if (!ucd.isRepresented(i)) continue;
+            String s = ucd.getName(i);
+            total += s.length();
+            int len = Utility.split(s, ' ', parts);
+            for (int j = 0; j < len; ++j) {
+                if (parts[j].indexOf('-') >= 0) {
+                    // hyphen stuff
+                    int len2 = Utility.split(parts[j], '-', parts2);
+                    for (int k = 0; k < len2; ++k) {
+                        if (k == len2 - 1) {
+                            counter.add(parts2[k] + '-');
+                        } else {
+                            counter.add(parts2[k] + " ");
+                        }
+                    }
+                } else {
+                   // normal
+                    counter.add(parts[j] + " ");
+                }
+            }
+        }
+        
+        System.out.println("Sorting data");
+        Map m = counter.extract();
+        
+        System.out.println("Printing data");
+        
+        PrintWriter log = new PrintWriter(
+            new BufferedWriter(
+            new OutputStreamWriter(
+                new FileOutputStream(GEN_DIR + "NameCompression.txt")),
+            32*1024));
+        
+        log.println("total: " + total);
+        
+        Iterator it = m.keySet().iterator();
+        
+        String mondo = "";
+        int i = 0;
+        int strTotal = 0;
+        
+        int index = 0;
+        Map fullToCompressed = new HashMap();
+        
+        String mondoIndex = "";
+
+        main:
+        while (it.hasNext()) {
+            index++;
+            if ((i & 255) == 0) System.out.println("#" + i);
+            Counter.RWInteger key = (Counter.RWInteger) it.next();
+            String value =  (String)m.get(key);
+            log.println(i++ + ": " + key + ": \"" + value + "\"");
+            strTotal += value.length();
+            
+            
+            // first 128 are the highest frequency, inc. space
+            
+            if (index < 128 - SINGLES) {
+                mondo += value;
+                fullToCompressed.put(value, new String((char)(index + reserved)));
+                continue;
+            }
+            
+            int pos = mondo.indexOf(value);
+            if (pos >= 0) {
+                // try splitting!
+                
+                int bestBreak = -1;
+                boolean pickFirst = false;
+                if (value.length() > 2) for (int k = 1; k < value.length()-1; ++k) {
+                    int pos2 = mondo.indexOf(value.substring(0,k) + " ");
+                    int pos3 = mondo.indexOf(value.substring(k));
+                    if (pos2 >= 0 && pos3 >= 0) {
+                        fullToCompressed.put(value, new Integer(index + reserved));
+                        continue main;
+                    }
+                    if (pos2 >= 0) {
+                         if (k > bestBreak) {
+                            bestBreak = k;
+                            pickFirst = true;
+                         }
+                    } else if (pos3 >= 0) {
+                        if (value.length() - k > bestBreak) {
+                            bestBreak = k;
+                            pickFirst = false;
+                        }
+                    }
+                }
+                if (bestBreak > 0) {
+                    if (pickFirst) {
+                        mondo += value.substring(bestBreak);
+                    } else {
+                        mondo += value.substring(0, bestBreak) + " ";
+                    }
+                } else {
+                    mondo += value;
+                }
+            }
+            
+            // high bit on, means 2 bytes, look in array
+        }
+ 
+        log.println("strTotal: " + strTotal);
+        log.println("mondo: " + mondo.length());
+        
+        int k = 80;
+        for (; k < mondo.length(); k += 80) {
+            log.println(mondo.substring(k-80, k));
+        }
+        log.println(mondo.substring(k-80)); // last line
+        
+        log.close();
+    }
+    
+    static int indexOf(StringBuffer target, String source) {
+        int targetLen = target.length() - source.length();
+        main:
+        for (int i = 0; i <= targetLen; ++i) {
+            for (int j = 0; j < source.length(); ++j) {
+                if (target.charAt(i) != source.charAt(j)) continue main;
+            }
+            return i;
+        }
+        return -1;
+    }
+    
+    static final int SINGLES = 26 + 10 + 2;
+    */
+    
+    /*
+    static String decode(int x) {
+        if (x < SINGLES) {
+            if (x < 26) return String.valueOf(x + 'A');
+            if (x < 36) return String.valueOf(x - 26 + '0');
+            if (x == 36) return "-";
+            return " ";
+        }
+        if (x < binaryLimit) {
+            x = 
+    */
+}
--- a/tools/unicodetools/com/ibm/text/UCD/CompactName.java
+++ b/tools/unicodetools/com/ibm/text/UCD/CompactName.java
@ -0,0 +1,260 @@
+package com.ibm.text.UCD;
+
+import java.io.IOException;
+import java.util.*;
+import java.io.*;
+import java.text.*;
+
+public class CompactName {
+    
+    static final boolean DEBUG = false;
+    
+    public static void main(String[] args) throws IOException {
+        
+        int test = tokenFromString("ABZ");
+        String ss = stringFromToken(test);
+        System.out.println(ss);
+        
+        CompactName.addWord("ABSOLUTEISM");
+        
+        for (int i = 0; i < CompactName.lastToken; ++i) {
+            String s = CompactName.stringFromToken(i);
+            System.out.println(s);
+        }
+        
+    }
+    
+    
+    static final char[] compactMap = new char[128];
+    static final char[] compactUnmap = new char[128];
+    
+    static {
+        char counter = 0;
+        compactMap[0] = counter++;
+        for (int i = 'A'; i <= 'Z'; ++i) {
+            compactMap[i] = counter++;
+        }
+        compactMap['-'] = counter++;
+        compactMap['>'] = counter++;
+        compactMap['<'] = counter++;
+        compactMap['*'] = counter++;
+        
+        compactUnmap[0] = 0;
+        for (char i = 0; i < compactUnmap.length; ++i) {
+            int x = compactMap[i];
+            if (x != 0) compactUnmap[x] = i;
+        }
+    }
+    
+    /*
+    static String expand(String s) {
+        StringBuffer result = new StringBuffer();
+        for (int i = 0; i < s.length(); ++i) {
+            int m = s.charAt(i);
+            if (m == 31 && i < s.length() + 1) {
+                m = 31 + s.charAt(++i);
+            }
+            result.append(compactUnmap[m]);
+        }
+        return result.toString();
+    }
+    
+    static String compact(String s) {
+        StringBuffer result = new StringBuffer();
+        for (int i = 0; i < s.length(); ++i) {
+            int m = compactMap[s.charAt(i)];
+            if (m >= 31) {
+                result.append((char)31);
+                m -= 31;
+            }
+            result.append(m);
+        }
+        return result.toString();
+    }
+    */
+    
+    static Map string_token = new HashMap();
+    static Map token_string = new HashMap();
+    
+    static int[] tokenList = new int[40000];
+    static final int tokenStart = 0;
+    static int lastToken = 0;
+    
+    static int spacedMinimum = Integer.MAX_VALUE;
+    
+    static boolean isLiteral(int i) {
+        return (i & 0x8000) != 0;
+    }
+    
+    static int addTokenForString(String s, int lead, int trail) {
+        Object in = string_token.get(s);
+        if (in != null) throw new IllegalArgumentException();
+        int value = (lead << 16) + (trail & 0xFFFF);
+        int result = lastToken;
+        tokenList[lastToken++] = value;
+        
+        if (DEBUG) {
+            System.out.println("'" + s + "', tokenList[" + result + "] = lead: " + lead + ", trail: " + trail);
+            String roundTrip = stringFromToken(result);
+            if (!roundTrip.equals(s)) {
+                System.out.println("\t*** No Round Trip: '" + roundTrip + "'");
+            }
+        }
+        string_token.put(s, new Integer(result));
+        return result;
+    }
+    
+    static String stringFromToken(int i) {
+        String result;
+        if ((i & 0x8000) != 0) {
+            char first = compactUnmap[(i >> 10) & 0x1F];
+            char second = compactUnmap[(i >> 5) & 0x1F];
+            char third = compactUnmap[i & 0x1F];
+            result = String.valueOf(first);
+            if (second != 0) result += String.valueOf(second);
+            if (third != 0) result += String.valueOf(third);
+        } else if (i > lastToken) {
+            throw new IllegalArgumentException("bad token: " + i);
+        } else {
+            int value = tokenList[i];
+            int lead = value >>> 16;
+            int trail = value & 0xFFFF;
+            if (i >= spacedMinimum) result = stringFromToken(lead) + ' ' + stringFromToken(trail);
+            else result = stringFromToken(lead) + stringFromToken(trail);
+        }
+        if (DEBUG) System.out.println("token: " + i + " => '" + result + "'");
+        return result;
+    }
+    
+    static int tokenFromString(String s) {
+        if (s.length() <= 3) {
+            int first = compactMap[s.charAt(0)];
+            int second = compactMap[s.length() > 1 ? s.charAt(1) : 0];
+            int third = compactMap[s.length() > 2 ? s.charAt(2) : 0];
+            return 0x8000 + (first << 10) + (second << 5) + third;
+        }
+        Object in = string_token.get(s);
+        if (in == null) return -1;
+        return ((Integer)in).intValue();
+    }
+    
+    
+    static int addWord(String s) {
+        
+        int result = tokenFromString(s);
+        if (result != -1) return result;
+        int bestLen = 0;
+        int best_i = 0;
+        
+        int limit = s.length() - 1;
+        
+        for (int i = limit; i >= 1; --i) {
+
+            String firstPart = s.substring(0, i);
+            String lastPart = s.substring(i);
+
+            int lead = tokenFromString(firstPart);
+            int trail = tokenFromString(lastPart);
+            
+            if (lead >= 0 && trail >= 0) { // if both match, return immediately with pair
+                if (DEBUG) show(s, firstPart, lastPart, "MATCH BOTH");
+                return addTokenForString(s, lead, trail);
+            }
+            if (!isLiteral(lead)) {
+                if (i > bestLen) {
+                    bestLen = i;
+                    best_i = i;
+                }
+            }
+            if (!isLiteral(trail)) {
+                int end_i = s.length() - i;
+                if (end_i > bestLen) {
+                    bestLen = end_i;
+                    best_i = i;
+                }
+            }
+        }
+        if (bestLen > 0) { // if one matches, recurse -- and return pair
+            String firstPart = s.substring(0, best_i);
+            String lastPart = s.substring(best_i);
+            int lead = tokenFromString(firstPart);
+            int trail = tokenFromString(lastPart);
+            if (lead >= 0) {
+                if (DEBUG) show(s, firstPart, lastPart, "MATCH FIRST");
+                return addTokenForString(s, lead, addWord(lastPart));
+            } else {
+                if (DEBUG) show(s, firstPart, lastPart, "MATCH SECOND");
+                return addTokenForString(s, addWord(firstPart), trail);
+            }
+        }
+        
+        // break at multiple of 3
+        
+        best_i = ((s.length() + 1) / 6) * 3;
+        String firstPart = s.substring(0, best_i);
+        String lastPart = s.substring(best_i);
+        if (DEBUG) show(s, firstPart, lastPart, "Fallback");
+        return addTokenForString(s, addWord(firstPart), addWord(lastPart));
+    }
+    
+    static void show(String s, String firstPart, String lastPart, String comment) {
+        System.out.println((s) + " => '" + (firstPart)
+            + "' # '" + (lastPart) + "' " + comment);
+    }
+    
+    static void startLines() {
+        spacedMinimum = lastToken;
+    }
+    
+    static int addLine(String s) {
+        
+        int result = tokenFromString(s);
+        if (result != -1) return result;
+        int bestLen = 0;
+        int best_i = 0;
+        
+        int limit = s.length() - 2;
+        
+        for (int i = limit; i >= 1; --i) {
+            char c = s.charAt(i);
+            if (c != ' ') continue;
+
+            String firstPart = s.substring(0, i);
+            String lastPart = s.substring(i+1);
+
+            int lead = tokenFromString(firstPart);
+            int trail = tokenFromString(lastPart);
+            
+            if (lead >= 0 && trail >= 0) { // if both match, return immediately with pair
+                if (DEBUG) show(s, firstPart, lastPart, "MATCH BOTH");
+                return addTokenForString(s, lead, trail);
+            }
+            if (i > bestLen) {
+                bestLen = i;
+                best_i = i;
+            }
+
+            int end_i = s.length() - i - 1;
+            if (end_i > bestLen) {
+                bestLen = end_i;
+                best_i = i;
+            }
+        }
+        if (bestLen > 0) { // if one matches, recurse -- and return pair
+            String firstPart = s.substring(0, best_i);
+            String lastPart = s.substring(best_i + 1);
+            int lead = tokenFromString(firstPart);
+            int trail = tokenFromString(lastPart);
+            if (lead >= 0) {
+                if (DEBUG) show(s, firstPart, lastPart, "MATCH FIRST");
+                return addTokenForString(s, lead, addLine(lastPart));
+            } else {
+                if (DEBUG) show(s, firstPart, lastPart, "MATCH SECOND");
+                return addTokenForString(s, addLine(firstPart), trail);
+            }
+        }
+        
+        System.out.println("SHOULD HAVE MATCHED!!");
+        throw new IllegalArgumentException("SHOULD HAVE MATCHED!! " + s);
+    }
+}
--- a/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java
@ -0,0 +1,831 @@
+package com.ibm.text.UCD;
+
+import com.ibm.text.utility.*;
+
+import java.util.*;
+import java.text.NumberFormat;
+import java.io.*;
+
+
+/** Simple program to merge UCD files into XML. Not yet documented!!         
+ * @author Mark Davis
+ */
+
+public final class ConvertUCD implements UCD_Types {
+    public static final boolean SHOW = true;
+    public static final boolean DEBUG = false;
+    
+    public static int major;
+    public static int minor;
+    public static int update;
+    
+    static String version;
+    
+    // varies by version
+    /*
+    public static final String BASE_DIR11 = DATA_DIR + "\\Versions\\";
+    public static final String BASE_DIR20 = DATA_DIR + "\\Versions\\";
+    public static final String BASE_DIR21 = DATA_DIR + "\\Versions\\";
+    public static final String BASE_DIR30 = DATA_DIR + "\\Update 3.0.1\\";
+    public static final String BASE_DIR31 = DATA_DIR + "\\3.1-Update\\";
+    */
+    
+    //public static final String blocksnamePlain = "Blocks.txt";
+    //public static final String blocksname31 = "Blocks-4d2.beta";
+    
+    /** First item is file name, rest are field names (skipping character).
+     *  "OMIT" is special -- means don't record
+     */
+
+    static String[][] labelList = {
+        // Labels for the incoming files. Labels MUST match field order in file.
+        // IMPORTANT - defaults of form y-=x must occur after x is encountered!
+        // The one exception is "st", which is handled specially.
+        // So file order is important.
+        //*
+        // 01CA;LATIN CAPITAL LETTER NJ;Lu;0; L; <compat> 004E 004A;  ;  ;  ;N ;LATIN CAPITAL LETTER N J;    ;  ;01CC;01CB
+        //      n                       gc cc bc dm                 dd dv nv bm on                       cm,  uc lc   tc
+        {"UnicodeData", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"},
+        {"ExtraProperties", "xp"},
+        {"PropList", "binary"}, 
+        
+        //{"ExtraProperties", "xp"},
+        
+        {"EastAsianWidth", "ea", "OMIT"},
+        {"LineBreak", "lb", "OMIT"},
+        {"SpecialCasing", "*sl", "*st", "*su", "sc"},
+        {"CompositionExclusions", "ce"},
+        {"CaseFolding", "OMIT", "*fc"},
+        {"ArabicShaping", "OMIT", "jt", "jg"},
+        {"BidiMirroring", "*bg"},
+        {"Scripts", "sn"},
+        //{"Jamo", "jn"},
+        //{"Scripts-1d4", "RANGE", "sn"},
+        //{"Age", "*sn"},
+         //*/
+         /*
+        //*/
+    };
+    /*
+    static String[][] labelList31 = {
+        // Labels for the incoming files. Labels MUST match field order in file.
+        // IMPORTANT - defaults of form y-=x must occur after x is encountered!
+        // The one exception is "st", which is handled specially.
+        // So file order is important.
+        //*
+        // 01CA;LATIN CAPITAL LETTER NJ;Lu;0; L; <compat> 004E 004A;  ;  ;  ;N ;LATIN CAPITAL LETTER N J;    ;  ;01CC;01CB
+        //      n                       gc cc bc dm                 dd dv nv bm on                       cm,  uc lc   tc
+        {"UnicodeData-3.1.0d8.beta", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"},
+        {"PropList-3.1.0d5.beta", "binary"}, 
+        
+        {"ExtraProperties", "xp"},
+        
+        {"EastAsianWidth-4d7.beta", "ea", "OMIT"},
+        {"LineBreak-6d6.beta", "lb", "OMIT"},
+        {"SpecialCasing-4d1.beta", "*sl", "*st", "*su", "sc"},
+        {"CompositionExclusions-3d6.beta", "ce"},
+        {"CaseFolding-3d4.beta", "OMIT", "*fc"},
+        {"ArabicShaping", "OMIT", "jt", "jg"},
+        {"BidiMirroring", "*bg"},
+        {"Scripts-3.1.0d4.beta", "sn"},
+        //{"Scripts-1d4", "RANGE", "sn"},
+        //{"Age", "*sn"},
+         //*/
+         /*
+        {"Jamo", "jn"},
+        //
+    };
+    /*
+        {"UnicodeData-3.1.0d8.beta", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"},
+        {"ExtraProperties", "xp"},
+        
+        {"EastAsianWidth-4d7.beta", "ea", "OMIT"},
+        {"LineBreak-6d6.beta", "lb", "OMIT"},
+        {"SpecialCasing-4d1.beta", "*sl", "*st", "*su", "sc"},
+        {"CompositionExclusions-3d6.beta", "ce"},
+        {"CaseFolding-3d4.beta", "OMIT", "*fc"},
+        {"PropList-3.1.0d2.beta", "PROP", "OMIT"}, 
+        {"ArabicShaping", "OMIT", "jt", "jg"},
+        {"BidiMirroring", "*bg"},
+        {"Scripts-1d4", "sn"},
+        //{"Scripts-1d4", "RANGE", "sn"},
+        //{"Age", "*sn"},
+         //*/
+         /*
+        {"Jamo", "jn"},
+        //
+    
+    //"NamesList-3.1.0d1.beta"
+    
+    static String[][] labelList30 = {
+        // Labels for the incoming files. Labels MUST match field order in file.
+        // IMPORTANT - defaults of form y-=x must occur after x is encountered!
+        // The one exception is "st", which is handled specially.
+        // So file order is important.
+        //*
+        {"UnicodeData", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"},
+        {"CompositionExclusions", "ce"},
+        {"EastAsianWidth", "ea", "OMIT"},
+        {"LineBreak", "lb", "OMIT"},
+        {"SpecialCasing", "*sl", "*st", "*su", "sc"},
+        {"CaseFolding", "OMIT", "*fc"},
+        {"ArabicShaping", "OMIT", "jt", "jg"},
+        {"BidiMirroring", "*bg"},
+        /*
+        {"Jamo", "jn"},
+        {"PropList.alpha", "RANGE", "OMIT"}, 
+        //
+    };
+    
+    static String[][] labelList11 = {
+        {"UnicodeData-1.1", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"},
+    };
+    
+    static String[][] labelList20 = {
+        {"UnicodeData-2.0", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"},
+    };
+    
+    static String[][] labelList21 = {
+        {"UnicodeData-2.1", "n", "gc", "cc", "bc", "dm", "dd", "dv", "nv", "bm", "on", "OMIT", "*uc", "*lc", "*tc"},
+    };
+    */
+    
+    // handles
+    public static final String blocksname = "Blocks";
+    //public static final String[][] labelList;
+    public static final boolean NEWPROPS = true;
+    
+    /*
+    static {
+        switch (major*10 + minor) {
+        case 31:
+            blocksname = blocksname31;
+            labelList = labelList31;
+            break;
+        case 30:
+            blocksname = blocksnamePlain;
+            labelList = labelList30;
+            break;
+        case 21:
+            blocksname = blocksnamePlain;
+            labelList = labelList21;
+            break;
+        case 20:
+            blocksname = blocksnamePlain;
+            labelList = labelList20;
+            break;
+        default:
+            blocksname = blocksnamePlain;
+            labelList = labelList11;
+            break;
+        }
+    }
+    
+    */
+    static final String dataFilePrefix = "UCD_Data";
+    
+    
+    // MAIN!!
+    
+    public static void main (String[] args) throws Exception {
+        System.out.println("ConvertUCD");
+        
+        log = new PrintWriter(new BufferedWriter(
+            new OutputStreamWriter(
+                new FileOutputStream(GEN_DIR + "UCD-log.txt"),
+                "UTF8"),
+            32*1024));
+        log.write("\uFEFF"); // BOM
+        
+        try {
+            for (int i = 0; i < args.length; ++i) {
+                version = args[i];
+                if (version.length() == 0) version = UCD.latestVersion;
+                String[] parts = new String[3];
+                Utility.split(version, '.', parts);
+                major = Integer.parseInt(parts[0]);
+                minor = Integer.parseInt(parts[1]);
+                update = Integer.parseInt(parts[2]);
+                
+                toJava();
+            }
+        } finally {
+            log.close();
+        }
+    }
+    
+    /*
+    static void toXML() throws Exception {
+        // Blocks is special
+        // Unihan is special
+        // collect all the other .txt files in the directory
+        if (false) readBlocks();
+        if (true) for (int i = 0; i < labelList.length; ++i) {
+            readSemi(labelList[i]);
+        } else {
+            readSemi(labelList[0]); // TESTING ONLY
+        }
+        writeXML();
+    }
+    */
+    
+    static void toJava() throws Exception {
+        // Blocks is special
+        // Unihan is special
+        // collect all the other .txt files in the directory
+        if (false) readBlocks();
+        if (true) for (int i = 0; i < labelList.length; ++i) {
+            readSemi(labelList[i]);
+        } else {
+            readSemi(labelList[0]); // TESTING ONLY
+        }
+        
+        Iterator it = charData.keySet().iterator();
+        while (it.hasNext()) {
+            Object key = it.next();
+            UData value = (UData) charData.get(key);
+            value.compact();
+        }
+        UData ud = getEntry(0x2A6D6);
+        System.out.println("SPOT-CHECK: 2A6D6: " + ud);
+        ud = getEntry(0xFFFF);
+        System.out.println("SPOT-CHECK: FFFF: " + ud);
+
+        writeJavaData();
+    }
+    
+    static PrintWriter log;
+    //static String directory = BASE_DIR;
+    //static Map appendDuplicates = new HashMap();
+    
+    /** First item in labels is file name, rest are field names (skipping character).
+     *  "OMIT" is special -- means don't record
+     */
+    
+    static HashMap isHex = new HashMap();
+    static HashMap defaults = new HashMap();
+    
+    static {
+        for (int j = 0; j < labelList.length; ++j) {
+            String[] labels = labelList[j];
+            
+            for (int i = 1; i < labels.length; ++i) {
+                boolean hex = false;
+                String def = null;
+                //char appendChar = '\u0000';
+                
+                // pull off "*": hex interpretation
+                if (labels[i].charAt(0) == '*') { // HEX value
+                    hex = true;
+                    labels[i] = labels[i].substring(1);
+                }
+                
+                /*
+                // pull off "$": append duplicates
+                if (labels[i].charAt(0) == '$') { // HEX value
+                    appendChar = labels[i].charAt(1);
+                    labels[i] = labels[i].substring(2);
+                }
+                
+                // pull off default values
+                int pos = labels[i].indexOf('-');
+                if (pos >= 0) {
+                    def = labels[i].substring(pos+1);
+                    labels[i] = labels[i].substring(0,pos);
+                }
+                */
+                // store results
+                // we do this after all processing, so that the label is clean!!
+                
+                if (hex) isHex.put(labels[i], "");
+                //if (appendChar != 0) appendDuplicates.put(labels[i], String.valueOf(appendChar));
+                defaults.put(labels[i], def);
+            }
+        }
+    }
+    
+    static List blockData = new LinkedList();
+    
+    static void readBlocks() throws Exception {
+        System.out.println("Reading 'Blocks'");
+        BufferedReader input = Utility.openUnicodeFile(blocksname, version);
+        String line = "";
+        try {
+    	    String[] parts = new String[20];
+            for (int lineNumber = 1; ; ++lineNumber) {
+                line = input.readLine();
+			    if (line == null) break;
+			    if (SHOW && (lineNumber % 500) == 0) System.out.println("//" + lineNumber + ": '" + line + "'");
+			    
+                //String original = line;
+			    String comment = "";
+			    int commentPos = line.indexOf('#');
+			    if (commentPos >= 0) {
+			        comment = line.substring(commentPos+1).trim();
+			        line = line.substring(0, commentPos);
+			    }
+			    line = line.trim();
+			    if (line.length() == 0) continue;
+    			
+                int count = Utility.split(line,';',parts);
+                if (count != 3) throw new ChainException("Bad count in Blocks", null);
+                blockData.add(new String[] {Utility.fromHex(parts[0]), Utility.fromHex(parts[1]), parts[2].trim()});
+            }
+        
+        } catch (Exception e) {
+            System.out.println("Exception at: " + line);
+            throw e;
+        } finally {
+            input.close();
+        }
+    }
+    
+    static Set properties = new TreeSet();
+    
+    static void readSemi(String[] labels) throws Exception {
+        System.out.println();
+        System.out.println("Reading '" + labels[0] + "'");
+        if (major < 3 || (major == 3 && minor < 1)) {
+            if (labels[0] == "PropList") {
+                System.out.println("SKIPPING old format of Proplist for " + version);
+                return;
+            }
+        }
+        String tempVersion = version;
+        if (version.equals(UCD.latestVersion)) tempVersion = "";
+        BufferedReader input = Utility.openUnicodeFile(labels[0], tempVersion);
+        if (input == null) {
+            System.out.println("COULDN'T OPEN: " + labels[0]);
+            return;
+        }
+        boolean showedSemi = false;
+        boolean showedShort = false;
+        String line = "";
+        
+        try {
+    	    String[] parts = new String[20];
+            for (int lineNumber = 1; ; ++lineNumber) {
+                line = input.readLine();
+			    if (line == null) break;
+			    if (SHOW && (lineNumber % 500) == 0) System.out.println("//" + lineNumber + ": '" + line + "'");
+			    
+                String original = line;
+			    String comment = "";
+			    int commentPos = line.indexOf('#');
+			    if (commentPos >= 0) {
+			        comment = line.substring(commentPos+1).trim();
+			        line = line.substring(0, commentPos);
+			    }
+			    line = line.trim();
+			    if (line.length() == 0) continue;
+    			
+                int count = Utility.split(line,';',parts);
+                
+                if (parts[0].equals("2801")) {
+                    System.out.println("debug?");
+                }
+                
+                // fix malformed or simple lists.
+                
+                if (count != labels.length) {
+                    if (count == labels.length + 1 && parts[count-1].equals("")) {
+                        if (!showedSemi) System.out.println("Extra semicolon in: " + original);
+                        showedSemi = true;
+                    } else if (count == 1) { // fix simple list
+                        ++count;
+                        parts[1] = "Y";
+                    } else if (count < labels.length) {
+                        if (!showedShort) System.out.println("Line shorter than labels: " + original);
+                        showedShort = true;
+                        for (int i = count; i < labels.length; ++i) {
+                            parts[i] = "";
+                        }
+                    } else {
+                        throw new ChainException("wrong count: {0}", 
+                            new Object[] {new Integer(line), new Integer(count)});
+                    }
+                }
+                
+                // store char
+                 // first field is always character OR range. May be UTF-32
+                int cpTop;
+                int cpStart;
+                int ddot = parts[0].indexOf(".");
+                if (ddot >= 0) {
+                    cpStart = UTF32.char32At(Utility.fromHex(parts[0].substring(0,ddot)),0);
+                    cpTop = UTF32.char32At(Utility.fromHex(parts[0].substring(ddot+2)),0);
+                    System.out.println(Utility.hex(cpStart) + " ... " + Utility.hex(cpTop));
+                } else {
+                    cpStart = UTF32.char32At(Utility.fromHex(parts[0]),0);
+                    cpTop = cpStart;
+                    if (labels[1].equals("RANGE")) UTF32.char32At(Utility.fromHex(parts[1]),0);
+                }
+                
+                
+                
+                // properties first
+                if (labels[1].equals("PROP")) {
+                    String prop = parts[2].trim();
+                    // FIX!!
+                    boolean skipLetters = false;
+                    if (prop.equals("Alphabetic")) {
+                        prop = "Other_Alphabetic";
+                        skipLetters = true;
+                    }
+                    // END FIX!!
+                    properties.add(prop);
+                    if (Utility.find(prop, UCD_Names.DeletedProperties) == -1) { // only undeleted
+                        int end = UTF32.char32At(Utility.fromHex(parts[1]),0);
+                        if (end == 0) end = cpStart; 
+
+                        for (int j = cpStart; j <= end; ++j) {
+                            if (j != UCD.mapToRepresentative(j, false)) continue;
+                            if (skipLetters && getEntry(cpStart).isLetter()) continue;
+                            appendCharProperties(j, prop);
+                        }
+                    }
+                } else { // not range!
+                    String val = "";
+                    String lastVal;
+                    
+                    for (int i = 1; i < labels.length; ++i) {
+                        String key = labels[i];
+                        lastVal = val;
+                        if (isHex.get(key) != null) {
+                            val = Utility.fromHex(parts[i]);
+                        } else {
+                            val = parts[i].trim();
+                        }
+                        if (key.equals("OMIT")) continue; // do after val, so lastVal is correct
+                        if (key.equals("RANGE")) continue; // do after val, so lastVal is correct
+                        if (val.equals("")) continue; // skip empty values, they mean default
+
+                        for (int cps = cpStart; cps <= cpTop; ++cps) {
+                            if (UCD.mapToRepresentative(cps, false) != cps) continue;    // skip condensed ranges
+                            
+                            if (key.equals("binary")) {
+                                appendCharProperties(cps, val);
+                            } else if (key.equals("fc")) {
+                                UData data = getEntry(cps);
+                                String type = parts[i-1].trim();
+                                if (type.equals("F") || type.equals("C") || type.equals("E") || type.equals("L")) {
+                                    data.fullCaseFolding = val;
+                                    //System.out.println("*<" + parts[i-1] + "> Setting " + Utility.hex(cps) + ": " + Utility.hex(val));
+                                }
+                                if (type.equals("S") || type.equals("C") || type.equals("L")) {
+                                    data.simpleCaseFolding = val;
+                                    //System.out.println("<" + parts[i-1] + "> Setting " + Utility.hex(cps) + ": " + Utility.hex(val));
+                                }
+                                if (type.equals("I")) {
+                                    data.simpleCaseFolding = val;
+                                    setBinaryProperty(cps, CaseFoldTurkishI);
+                                    System.out.println("SPOT-CHECK: <" + parts[i-1] + "> Setting " + Utility.hex(cps) + ": " + Utility.hex(val));
+                                }
+                            } else {
+                                /*if (key.equals("sn")) { // SKIP UNDEFINED!!
+                                    UData data = getEntryIfExists(cps);
+                                    if (data == null || data.generalCategory == Cn) continue;
+                                }
+                                */
+                                addCharData(cps, key, val);
+                            }
+                        }
+                    }
+                }
+            }
+        } catch (Exception e) {
+            System.out.println("Exception at: " + line + ", " + e.getMessage());
+            throw e;
+        } finally {
+            input.close();
+        }
+        //printValues("JOINING_TYPE", jtSet);
+        //printValues("JOINING_GROUP", jgSet);
+    }
+    
+    static void printValues(String title, Set s) {
+            Iterator it = s.iterator();
+            System.out.println("public static String[] " + title + " = {");
+            while (it.hasNext()) {
+                String value = (String) it.next();
+                System.out.println("    \"" + value + "\",");
+            }
+            System.out.println("};");
+            it = s.iterator();
+            System.out.println("public static byte ");
+            int count = 0;
+            while (it.hasNext()) {
+                String value = (String) it.next();
+                System.out.println("    " + value.replace(' ', '-').toUpperCase() + " = " + (count++) + ",");
+            }
+            System.out.println("    LIMIT_" + title + " = " + count);
+            System.out.println(";");
+    }
+    
+    static Map charData = new TreeMap();
+    
+    static void writeXML() throws IOException {
+        System.out.println("Writing 'UCD-Main.xml'");
+        BufferedWriter output = new BufferedWriter(
+            new OutputStreamWriter(
+                new FileOutputStream(UCD.BIN_DIR + "UCD_Data.xml"),
+                "UTF8"),
+            32*1024);
+        
+        try {
+            // write header
+            
+            output.write("<?xml version='1.0' encoding='utf-8'?>\r\n");
+            output.write("<UnicodeCharacterDatabase>\r\n");
+            output.write(" <!-- IMPORTANT: see UCD-Notes.html for information on the format. This file CANNOT be read correctly without that information. -->\r\n");
+            output.write(" <unicode version='" + major + "' minor='" + minor + "' update='" + update + "'/>\r\n");
+            output.write(" <fileVersion status='DRAFT' date='" + new Date() + "'/>\r\n");
+            
+            // write blocks
+            
+            Iterator it = blockData.iterator();
+            while (it.hasNext()) {
+                String[] block = (String[]) it.next();
+                output.write(" <block start='" + Utility.quoteXML(block[0]) 
+                    + "' end='" + Utility.quoteXML(block[1])
+                    + "' name='" + Utility.quoteXML(block[2])
+                    + "'/>\r\n" );
+            }
+            
+            // write char data
+            
+            it = charData.keySet().iterator();
+            while (it.hasNext()) {
+                Integer cc = (Integer) it.next();
+                output.write(" <e c='" + Utility.quoteXML(cc.intValue()) + "'" );
+                /*
+                UData data = (UData) charData.get(cc);
+                Iterator dataIt = data.keySet().iterator();
+                while (dataIt.hasNext()) {
+                    String label = (String) dataIt.next();
+                    if (label.equals("c")) continue; // already wrote it.
+                    if (label.equals("fc")) {
+                        String fc = getResolved(data, "fc");
+                        String lc = getResolved(data, "lc");
+                        if (!fc.equals(lc) && !lc.equals(cc)) log.println("FC " + fc.length() + ": " + toString(cc));
+                    }
+                    String value = Utility.quoteXML((String) data.get(label));
+                    output.write(" " + label + "='" + value + "'");
+                }
+                */
+                output.write("/>\r\n");
+            }
+            
+            // write footer
+            
+            output.write("</UnicodeCharacterDatabase>\r\n");
+        } finally {
+            output.close();
+        }
+    }
+
+    static void writeJavaData() throws IOException {
+        Iterator it = charData.keySet().iterator();
+        int codePoint = -1;
+        System.out.println("Writing " + dataFilePrefix + version);
+        DataOutputStream dataOut = new DataOutputStream(
+            new BufferedOutputStream(
+                new FileOutputStream(UCD.BIN_DIR +  dataFilePrefix + version + ".bin"),
+                128*1024));
+                
+        // write header
+        dataOut.writeByte(BINARY_FORMAT);
+        dataOut.writeByte(major);
+        dataOut.writeByte(minor);
+        dataOut.writeByte(update);
+        long millis = System.currentTimeMillis();
+        dataOut.writeLong(millis);
+        dataOut.writeInt(charData.size());
+        System.out.println("Data Size: " + NumberFormat.getInstance().format(charData.size()));
+        int count = 0;
+        
+        // write records
+        try {
+            // write char data
+
+            while (it.hasNext()) {
+                Object cc = (Object) it.next();
+                //codePoint = UTF32.char32At(cc,0);
+                if (DEBUG) System.out.println(Utility.hex(cc));
+                
+                UData uData = (UData) charData.get(cc);
+                if (false && uData.name == null) {
+                    System.out.println("Warning: NULL name\r\n" + uData);
+                    System.out.println();
+                }
+                if (uData.codePoint == 0x2801) {
+                    System.out.println("SPOT-CHECK: " + uData);
+                }
+                uData.writeBytes(dataOut);
+                count++;
+                if (DEBUG) System.out.println("Setting2");
+            }
+            System.out.println("Wrote Data " + count);
+        } catch (Exception e) {
+            throw new ChainException("Bad data write {0}", new Object [] {Utility.hex(codePoint)}, e);
+        } finally {
+            dataOut.close();
+        }
+    }
+    
+    static String[] xsSplit = new String[40];
+    
+    // Cache a little bit for speed
+    static int getEntryCodePoint = -1;
+    static UData getEntryUData = null;
+    
+    static UData getEntryIfExists(int cp) {
+        if (cp == getEntryCodePoint) return getEntryUData;
+        Integer cc = new Integer(cp);
+        UData charEntry = (UData) charData.get(cc);
+        if (charEntry == null) return null;
+        getEntryCodePoint = cp;
+        getEntryUData = charEntry;
+        return charEntry;
+    }
+    
+    /* Get entry in table for cc
+     */
+    static UData getEntry(int cp) {
+        if (cp == getEntryCodePoint) return getEntryUData;
+        Integer cc = new Integer(cp);
+        UData charEntry = (UData) charData.get(cc);
+        if (charEntry == null) {
+            charEntry = new UData(cp);
+            charData.put(cc, charEntry);
+            //charEntry.put("c", cc);
+        }
+        getEntryCodePoint = cp;
+        getEntryUData = charEntry;
+        return charEntry;
+    }
+    /** Adds the character data. Signals duplicates with an exception
+     */
+
+    static void setBinaryProperty(int cp, int binProp) {
+        UData charEntry = getEntry(cp);
+        charEntry.binaryProperties |= (1 << binProp);
+    }
+    
+    static void appendCharProperties(int cp, String key) {
+        int ind;
+        //if (true || NEWPROPS) {
+            ind = Utility.lookup(key, UCD_Names.BP);
+        /*} else {
+            ind = Utility.lookup(key, UCD_Names.BP_OLD);
+        }
+        */
+        //charEntry.binaryProperties |= (1 << ind);
+        setBinaryProperty(cp, ind);
+    }
+    
+    static Set jtSet = new TreeSet();
+    static Set jgSet = new TreeSet();
+    
+    /** Adds the character data. Signals duplicates with an exception
+     */
+    static void addCharData(int cp, String key, String value) {
+        //if (cp < 10) System.out.println("A: " + Utility.hex(cp) + ", " + key + ", " + Utility.quoteJavaString(value));
+        UData charEntry = getEntry(cp);
+        //if (cp < 10) System.out.println("   " + charEntry);
+        
+        if (key.equals("bm")) {
+            if (value.equals("Y")) charEntry.binaryProperties |= 1;
+        } else if (key.equals("ce")) {
+            charEntry.binaryProperties |= 2;
+        } else if (key.equals("on")) {
+            if (charEntry.name.charAt(0) == '<') {
+                charEntry.name = '<' + value + '>';
+            }
+        } else if (key.equals("dm")) {
+            charEntry.decompositionType = CANONICAL;
+            if (value.charAt(0) == '<') {
+                int pos = value.indexOf('>');
+                String dType = value.substring(1,pos);
+                if (major < 2) if (dType.charAt(0) == '+') dType = dType.substring(1);
+                value = value.substring(pos+1);
+                setField(charEntry, "dt", dType);
+            }
+            // FIX OLD
+            if (major < 2) {
+                int oldStyle = value.indexOf('<');
+                if (oldStyle > 0) {
+                    value = value.substring(0,oldStyle);
+                }
+                oldStyle = value.indexOf('{');
+                if (oldStyle > 0) {
+                    value = value.substring(0,oldStyle);
+                }
+            }
+            setField(charEntry, key, Utility.fromHex(value));
+            
+        // fix the numeric fields to be more sensible
+        } else if (key.equals("dd")) {
+            if (charEntry.numericType < UCD_Types.DECIMAL) {
+                charEntry.numericType = UCD_Types.DECIMAL;
+            }
+            setField(charEntry, "nv", value);
+        } else if (key.equals("dv")) {
+            if (charEntry.numericType < UCD_Types.DIGIT) {
+                charEntry.numericType = UCD_Types.DIGIT;
+            }
+            setField(charEntry, "nv", value);
+        } else if (key.equals("nv")) {
+            if (charEntry.numericType < UCD_Types.NUMERIC) {
+                charEntry.numericType = UCD_Types.NUMERIC;
+            }
+            setField(charEntry, "nv", value);
+        /*} else if (key.equals("jt")) {
+            jtSet.add(value);
+        } else if (key.equals("jg")) {
+            jgSet.add(value);
+            */
+        } else {
+            setField(charEntry, key, value);
+        }
+    }
+    
+    static public void setField(UData uData, String fieldName, String fieldValue) {
+        try {
+            if (fieldName.equals("n")) {
+                uData.name = fieldValue;
+            } else if (fieldName.equals("dm")) {
+                uData.decompositionMapping = fieldValue;
+            } else if (fieldName.equals("bg")) {
+                uData.bidiMirror = fieldValue;
+            } else if (fieldName.equals("uc")) {
+                uData.simpleUppercase = fieldValue;
+            } else if (fieldName.equals("lc")) {
+                uData.simpleLowercase = fieldValue;
+            } else if (fieldName.equals("tc")) {
+                uData.simpleTitlecase = fieldValue;
+                
+            } else if (fieldName.equals("su")) {
+                uData.fullUppercase = fieldValue;
+            } else if (fieldName.equals("sl")) {
+                uData.fullLowercase = fieldValue;
+            } else if (fieldName.equals("st")) {
+                uData.fullTitlecase = fieldValue;
+            
+            } else if (fieldName.equals("sc")) {
+                uData.specialCasing = fieldValue;
+            
+            } else if (fieldName.equals("xp")) {
+                uData.binaryProperties |= 1 << Utility.lookup(fieldValue, UCD_Names.BP);
+                //UCD_Names.BP_OLD
+
+            } else if (fieldName.equals("gc")) {
+                uData.generalCategory = Utility.lookup(fieldValue, UCD_Names.GC);
+            } else if (fieldName.equals("bc")) {
+                uData.bidiClass = Utility.lookup(fieldValue, UCD_Names.BC);
+            } else if (fieldName.equals("dt")) {
+                if (major < 2) {
+                    if (fieldValue.equals("no-break")) fieldValue = "noBreak";
+                    else if (fieldValue.equals("circled")) fieldValue = "circle";
+                    else if (fieldValue.equals("sup")) fieldValue = "super";
+                    else if (fieldValue.equals("break")) fieldValue = "compat";
+                    else if (fieldValue.equals("font variant")) fieldValue = "font";
+                    else if (fieldValue.equals("no-join")) fieldValue = "compat";
+                    else if (fieldValue.equals("join")) fieldValue = "compat";
+                }
+                uData.decompositionType = Utility.lookup(fieldValue, UCD_Names.DT);
+            } else if (fieldName.equals("nt")) {
+                uData.numericType = Utility.lookup(fieldValue, UCD_Names.NT);
+                
+            } else if (fieldName.equals("ea")) {
+                uData.eastAsianWidth = Utility.lookup(fieldValue, UCD_Names.EA);
+            } else if (fieldName.equals("lb")) {
+                uData.lineBreak = Utility.lookup(fieldValue, UCD_Names.LB);
+                
+            } else if (fieldName.equals("sn")) {
+                uData.script = Utility.lookup(fieldValue, UCD_Names.SCRIPT);
+                
+            } else if (fieldName.equals("jt")) {
+                uData.joiningType = Utility.lookup(fieldValue, UCD_Names.JOINING_TYPE);
+            } else if (fieldName.equals("jg")) {
+                uData.joiningGroup = Utility.lookup(fieldValue, UCD_Names.OLD_JOINING_GROUP);
+                
+            } else if (fieldName.equals("nv")) {
+                if (major < 2) {
+                    if (fieldValue.equals("-")) return;
+                }
+                uData.numericValue = Utility.floatFrom(fieldValue);
+            } else if (fieldName.equals("cc")) {
+                uData.combiningClass = (byte)Utility.intFrom(fieldValue);
+            } else if (fieldName.equals("bp")) {
+                uData.binaryProperties = (byte)Utility.intFrom(fieldValue);
+            } else {
+                throw new IllegalArgumentException("Unknown fieldName");
+            }
+        } catch (Exception e) {
+            throw new ChainException(
+            "Bad field name= \"{0}\", value= \"{1}\"", new Object[] {fieldName, fieldValue}, e);
+        }
+    }
+    
+}
--- a/tools/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/DerivedPropertyLister.java
@ -0,0 +1,440 @@
+package com.ibm.text.UCD;
+import java.io.*;
+import java.util.*;
+
+import com.ibm.text.utility.*;
+
+final class DerivedPropertyLister extends PropertyLister {
+    static final boolean BRIDGE = false;
+    
+    static int enum = 0;
+    static final int
+        PropMath = 0,
+        PropAlphabetic = 1,
+        PropLowercase = 2,
+        PropUppercase = 3,
+        
+        ID_Start = 4,
+        ID_Continue_NO_Cf = 5,
+        
+        Mod_ID_Start = 6,
+        Mod_ID_Continue_NO_Cf = 7,
+        
+        Missing_Uppercase = 8,
+        Missing_Lowercase = 9,
+        Missing_Mixedcase = 10,
+        
+        FC_NFKC_Closure = 11,
+        
+        FullCompExclusion = 12,
+        FullCompInclusion = 13,
+        
+        QuickNFD = 14,
+        QuickNFC = 15,
+        QuickNFKD = 16,
+        QuickNFKC = 17,
+        
+        ExpandsOnNFD = 18,
+        ExpandsOnNFC = 19,
+        ExpandsOnNFKD = 20,
+        ExpandsOnNFKC = 21,
+        
+        GenNFD = 22,
+        GenNFC = 23,
+        GenNFKD = 24,
+        GenNFKC = 25,
+                
+        LIMIT = 26;
+   ;
+    
+    private int propMask;
+    private Normalizer[] nf = new Normalizer[4];
+    private Normalizer nfd, nfc, nfkd, nfkc;
+    int width;
+    
+    public DerivedPropertyLister(UCD ucd, int propMask, PrintStream output) {
+        this.propMask = propMask;
+        this.output = output;
+        this.ucdData = ucd;
+        nfd = nf[0] = new Normalizer(Normalizer.NFD);
+        nfc = nf[1] = new Normalizer(Normalizer.NFC);
+        nfkd = nf[2] = new Normalizer(Normalizer.NFKD);
+        nfkc = nf[3] = new Normalizer(Normalizer.NFKC);
+        
+        width = super.minPropertyWidth();
+        switch (propMask) {
+          case GenNFD: case GenNFC: case GenNFKD: case GenNFKC:
+            alwaysBreaks = true;
+            break;
+          case FC_NFKC_Closure:
+            alwaysBreaks = true;
+            width = 21;
+            break;
+          case QuickNFC: case QuickNFKC:
+            width = 11;
+            break;
+        }
+    }
+    
+    public String headerString() {
+        String result = "# Derived Property: ";
+        switch (propMask) {
+          case ExpandsOnNFD: case ExpandsOnNFC: case ExpandsOnNFKD: case ExpandsOnNFKC:
+            result += "Expands_On_" + NAME[propMask-ExpandsOnNFD] + "\r\n#   Generated according to UAX #15."
+            + "\r\n#   Characters whose normalized length is not one."
+            + "\r\n#   WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact."
+            + "\r\n#            The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!";
+            break;
+          case GenNFD: case GenNFC: case GenNFKD: case GenNFKC:
+            result += NAME[propMask-GenNFD] + "\r\n#   Generated according to UAX #15."
+            + "\r\n#   Normalized forms, where different from the characters themselves."
+            + ((propMask == 5 || propMask == 3) 
+              ? ""
+              : "\r\n#   HANGUL SYLLABLES are algorithmically decomposed, and not listed explicitly.")
+            + "\r\n#   WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact."
+            + "\r\n#            It is NOT sufficient to replace characters one-by-one with these results!";
+            break;
+          case ID_Start: result += 
+            "ID_Start"
+            + "\r\n#  Characters that can start an identifier."
+            + "\r\n#  Generated from Lu+Ll+Lt+Lm+Lo+Nl";
+            break;
+          case ID_Continue_NO_Cf: result += 
+            "ID_Continue"
+            + "\r\n#  Characters that can continue an identifier."
+            + "\r\n#  Generated from: ID_Start + Mn+Mc+Nd+Pc"
+            + "\r\n#  NOTE: Cf characters should be filtered out.";
+            break;
+          case Mod_ID_Start: result += 
+            "XID_Start"
+            + "\r\n#  ID_Start modified for closure under NFKx"
+            + "\r\n#  Modified as described in UAX #15"
+            + "\r\n#  NOTE: Does NOT remove the non-NFKx characters."
+            + "\r\n#        Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string))";
+            break;
+          case Mod_ID_Continue_NO_Cf: result += 
+            "XID_Continue"
+            + "\r\n#  Mod_ID_Continue modified for closure under NFKx"
+            + "\r\n#  Modified as described in UAX #15"
+            + "\r\n#  NOTE: Cf characters should be filtered out."
+            + "\r\n#  NOTE: Does NOT remove the non-NFKx characters."
+            + "\r\n#        Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string))";
+            break;
+          case PropMath:
+            result += "Math"
+             + "\r\n#  Generated from: Sm + Other_Math";
+            break;
+          case PropAlphabetic: 
+            result += "Alphabetic"
+            + "\r\n#  Generated from: Lu+Ll+Lt+Lm+Lo+Nl + Other_Alphabetic";
+            break;
+          case PropLowercase:
+            result += "Lowercase"
+            + "\r\n#  Generated from: Ll + Other_Lowercase";
+            break;
+          case PropUppercase: result +=
+            "Uppercase"
+            + "\r\n#  Generated from: Lu + Other_Uppercase";
+            break;
+          case Missing_Uppercase: result +=
+            "Missing_Uppercase"
+            + "\r\n#  Generated from: NFKD has >0 Uppercase, no other cases";
+            break;
+          case Missing_Lowercase: result +=
+            "Missing_Lowercase"
+            + "\r\n#  Generated from: NFKD has >0 Lowercase, no other cases";
+            break;
+          case Missing_Mixedcase: result +=
+            "Missing_Mixedcase"
+            + "\r\n#  Generated from: NFKD has >0 Mixedcase, no other cases";
+            break;
+          case FullCompExclusion: result +=
+            "Full Composition Exclusion"
+            + "\r\n#  Generated from: Composition Exclusions + Singletons + Non-Starter Decompositions";
+            break;
+          case FullCompInclusion: result +=
+            "Full Composition Inclusion"
+            + "\r\n#  characters with Canonical Decompositions MINUS Full Composition Exclusion";
+            break;
+          case FC_NFKC_Closure: result +=
+            "FC_NFKC_Closure"
+            + "\r\n#  Generated from computing: b = NFKC(Fold(a)); c = NFKC(Fold(b));"
+            + "\r\n#  Then if (c != b) add the mapping from a to c to the set of"
+            + "\r\n#  mappings that constitute the FC_NFKC_Closure list";
+            break;
+          case QuickNFD: case QuickNFC: case QuickNFKD: case QuickNFKC:
+            result += NAME[propMask-QuickNFD] + "_QuickCheck"
+            + "\r\n#  Generated from computing decomposibles"
+            + ((propMask == QuickNFC || propMask == QuickNFKC)
+                ? " (and characters that may compose with previous ones)" : "");
+            break;
+          default: result += "Unimplemented!!";
+        }
+        return result;
+    }
+
+    public String propertyName(int cp) {
+        switch (propMask) {
+          case ExpandsOnNFD: case ExpandsOnNFC: case ExpandsOnNFKD: case ExpandsOnNFKC:
+            return "Expands_On_" + NAME[propMask-ExpandsOnNFD];
+          case GenNFD: case GenNFC: case GenNFKD: case GenNFKC:
+            if (cp >= 0xAC00 && cp <= 0xD7A3) return NAME[propMask-GenNFD] + "; " + "<algorithmic normalization>";
+            String norm = Utility.hex(nf[propMask-GenNFD].normalize(cp));
+            String pad = Utility.repeat(" ", 14-norm.length());
+            return NAME[propMask-GenNFD] + "; " + norm + pad;
+          case ID_Start: return "ID_Start";
+          case ID_Continue_NO_Cf: return "ID_Continue";
+          case Mod_ID_Start: return "XID_Start";
+          case Mod_ID_Continue_NO_Cf: return "XID_Continue";
+          case PropMath: return "Math";
+          case PropAlphabetic: return "Alphabetic";
+          case PropLowercase: return "Lowercase";
+          case PropUppercase: return "Uppercase";
+          case Missing_Uppercase: return "Possible_Missing_Uppercase";
+          case Missing_Lowercase: return "Possible_Missing_Lowercase";
+          case Missing_Mixedcase: return "Possible_Missing_Titlecase";
+          case FullCompExclusion: return "Comp_Ex";
+          case FullCompInclusion: return "Comp_In";
+          case FC_NFKC_Closure: return "FNC; " + Utility.hex(getComputedValue(cp));
+          case QuickNFD: case QuickNFC: case QuickNFKD: case QuickNFKC:
+            return NAME[propMask-QuickNFD] + "_" + getComputedValue(cp);
+          default: return "Unimplemented!!";
+        }
+    }
+    
+    //public String optionalComment(int cp) {
+    //    return super.optionalComment(cp) + " [" + ucdData.getCodeAndName(computedValue) + "]";
+    //}
+    
+    
+    public int minPropertyWidth() {
+        return width;
+    }
+    
+    
+    static final String[] NAME = {"NFD", "NFC", "NFKD", "NFKC"};
+    /*
+    public String optionalComment(int cp) {
+        String id = ucdData.getCategoryID(cp);
+        if (UCD.mainCategoryMask(ucdData.getCategory(cp)) == LETTER_MASK) return id.substring(0,1) + "*";
+        return id;
+    }
+    */
+    /*
+    public String optionalName(int cp) {
+        if ((propMask & 0xFF00) == DECOMPOSITION_TYPE) {
+            return Utility.hex(ucdData.getDecompositionMapping(cp));
+        } else {
+            return "";
+        }
+    }
+    */
+    
+        
+    public byte status(int cp) {
+        if (!ucdData.isAssigned(cp)) return EXCLUDE;
+        //if (cp == 0xFFFF) {
+        //    System.out.println("# " + Utility.hex(cp));
+        //}
+        byte cat = ucdData.getCategory(cp);
+        //if (cp == 0x0385) {
+        //    System.out.println(Utility.hex(firstRealCp));
+        //}
+        
+        String cps;
+        byte xCat;
+        
+        switch (propMask) {
+          default: return EXCLUDE;
+            
+          case ExpandsOnNFD: case ExpandsOnNFC: case ExpandsOnNFKD: case ExpandsOnNFKC:
+            if (ucdData.getDecompositionType(cp) == NONE) return EXCLUDE;
+            cps = UTF32.valueOf32(cp);
+            if (UTF32.length32(nf[propMask-ExpandsOnNFD].normalize(cps)) == UTF32.length32(cps)) return EXCLUDE;
+            break;
+          case GenNFD: case GenNFC: case GenNFKD: case GenNFKC:
+            if (ucdData.getDecompositionType(cp) == NONE) return EXCLUDE;
+            cps = UTF32.valueOf32(cp);
+            if (cps.equals(nf[propMask-GenNFD].normalize(cps))) {
+                return EXCLUDE;
+            }
+            if (cp >= 0xAC00 && cp <= 0xD7A3) return INCLUDE;
+            //System.out.println(Utility.hex(cps) + " => " + Utility.hex(nf[propMask-4].normalize(cps)));
+            return BREAK;
+          case ID_Start:
+            if (ucdData.isIdentifierStart(cp, false)) return INCLUDE;
+            return EXCLUDE;
+          case ID_Continue_NO_Cf:
+            if (ucdData.isIdentifierContinue_NO_Cf(cp, false)) return INCLUDE;
+            return EXCLUDE;
+          case Mod_ID_Start:
+            if (ucdData.isIdentifierStart(cp, true)) return INCLUDE;
+            return EXCLUDE;
+          case Mod_ID_Continue_NO_Cf:
+            if (ucdData.isIdentifierContinue_NO_Cf(cp, true)) return INCLUDE;
+            return EXCLUDE;
+          case PropMath:
+            if (cat == Sm
+             || ucdData.getBinaryProperty(cp,Math_Property)) return INCLUDE;
+            return EXCLUDE;
+          case PropAlphabetic:
+            if (cat == Lu || cat == Ll || cat == Lt || cat == Lm || cat == Lo || cat == Nl
+             || ucdData.getBinaryProperty(cp, Alphabetic)) return INCLUDE;
+          case PropLowercase:
+            if (cat == Ll
+             || ucdData.getBinaryProperty(cp, Other_Lowercase)) return INCLUDE;
+            return EXCLUDE;
+          case PropUppercase:
+            if (cat == Lu
+             || ucdData.getBinaryProperty(cp, Other_Uppercase)) return INCLUDE;
+            return EXCLUDE;
+          case Missing_Uppercase:
+            if (cat == Lu
+             || ucdData.getBinaryProperty(cp, Other_Uppercase)) return EXCLUDE;
+            xCat = getDecompCat(cp);
+            if (xCat == Lu) return INCLUDE;
+            return EXCLUDE;
+          case Missing_Lowercase:
+            if (cat == Ll
+             || ucdData.getBinaryProperty(cp, Other_Lowercase)) return EXCLUDE;
+            xCat = getDecompCat(cp);
+            if (xCat == Ll) return INCLUDE;
+            return EXCLUDE;
+          case Missing_Mixedcase:
+            if (cat == Lt) return EXCLUDE;
+            xCat = getDecompCat(cp);
+            if (xCat == Lt) return INCLUDE;
+            return EXCLUDE;
+          case FullCompExclusion:
+            /*
+(3) Singleton Decompositions: characters that  can be derived from the UnicodeData file by 
+including all characters whose canonical decomposition consists of a single character.
+(4) Non-Starter Decompositions: characters that  can be derived from the UnicodeData
+file by including all characters whose canonical decomposition consists of a sequence
+of characters, the first of which has a non-zero combining class.
+*/          
+            {
+                if (!ucdData.isRepresented(cp)) return EXCLUDE;
+                byte dtype = ucdData.getDecompositionType(cp);
+                if (dtype != CANONICAL) return EXCLUDE;
+                
+                if (isCompEx(cp)) return INCLUDE;
+                return EXCLUDE;
+            }
+          case FullCompInclusion:
+            {
+                if (!ucdData.isRepresented(cp)) return EXCLUDE;
+                byte dtype = ucdData.getDecompositionType(cp);
+                if (dtype != CANONICAL) return EXCLUDE;
+                
+                if (isCompEx(cp)) return EXCLUDE;
+                return INCLUDE;
+            }
+          case FC_NFKC_Closure:
+            if (!ucdData.isRepresented(cp)) return EXCLUDE;
+          
+          /*
+            b = Normalize(Fold(a));
+            c = Normalize(Fold(b));
+            if (c != b) add a => c
+          */
+            {
+                String b = nfkc.normalize(fold(cp));
+                String c = nfkc.normalize(fold(b));
+                if (c.equals(b)) return EXCLUDE;
+                setComputedValue(cp, c);
+                if (cp == 0x1F88) {
+                    System.out.println(ucdData.toString(cp));
+                    System.out.println("cp: " + ucdData.getCodeAndName(cp));
+                    System.out.println("fold(cp): " + ucdData.getCodeAndName(fold(cp)));
+                    System.out.println("b: " + ucdData.getCodeAndName(b));
+                    System.out.println("fold(b): " + ucdData.getCodeAndName(fold(b)));
+                    System.out.println("c: " + ucdData.getCodeAndName(c));
+                }
+                return BREAK;
+            }
+            
+         case QuickNFD: case QuickNFC: case QuickNFKD: case QuickNFKC:
+            lastValue = currentValue;
+            Normalizer nfx = nf[propMask - QuickNFD];
+            if (nfx.normalizationDiffers(cp)) currentValue = "NO";
+            else if (nfx.isTrailing(cp)) currentValue = "MAYBE";
+            else return EXCLUDE;
+            setComputedValue(cp, currentValue);
+            if (currentValue != lastValue) return BREAK;
+            return INCLUDE;
+        }
+        
+        
+        // handle script stuff
+        /*
+        if (firstRealCp == -1) return INCLUDE;
+        byte cat2 = ucdData.getCategory(firstRealCp);
+        if (cat == cat2) return INCLUDE;
+        int mc = UCD.mainCategoryMask(cat);
+        if (LETTER_MASK == mc && mc == UCD.mainCategoryMask(cat2)) return INCLUDE;
+        
+        return BREAK;
+        */
+        return INCLUDE;
+    }
+    
+    static Map computedValue = new HashMap();
+    static String getComputedValue(int cp) {
+        return (String) computedValue.get(new Integer(cp));
+    }
+    static void setComputedValue(int cp, String value) {
+        computedValue.put(new Integer(cp), value);
+    }
+    static String lastValue = "";
+    static String currentValue = "";
+    
+    boolean isCompEx(int cp) {     
+        if (ucdData.getBinaryProperty(cp, CompositionExclusion)) return true;
+        String decomp = ucdData.getDecompositionMapping(cp);
+        if (UTF32.length32(decomp) == 1) return true;
+        int first = UTF32.char32At(decomp,0);
+        if (ucdData.getCombiningClass(first) != 0) return true;
+        return false;
+    }
+    
+    StringBuffer foldBuffer = new StringBuffer();
+    
+    String fold(int cp) {
+        return ucdData.getCase(cp, FULL, FOLD);
+    }
+    
+    String fold(String s) {
+        return ucdData.getCase(s, FULL, FOLD);
+    }
+    
+    byte getDecompCat(int cp) {
+        byte cat = ucdData.getCategory(cp);
+        if (cat == Lu
+            || ucdData.getBinaryProperty(cp, Other_Uppercase)) return Lu;
+        if (cat == Ll
+            || ucdData.getBinaryProperty(cp, Other_Lowercase)) return Ll;
+        if (cat == Lt || cat == Lo || cat == Lm || cat == Nl) return cat;
+        if (!nf[2].normalizationDiffers(cp)) return Lo;
+        
+        String norm = nf[2].normalize(cp);
+        int cp2;
+        boolean gotUpper = false;
+        boolean gotLower = false;
+        boolean gotTitle = false;
+        for (int i = 0; i < norm.length(); i += UTF32.count16(cp2)) {
+            cp2 = UTF32.char32At(norm, i);
+            byte catx = ucdData.getCategory(cp2);
+            boolean upx = ucdData.getBinaryProperty(cp, Other_Uppercase);
+            boolean lowx = ucdData.getBinaryProperty(cp, Other_Lowercase);
+            if (catx == Ll || lowx || cp2 == 0x345) gotLower = true;
+            if (catx == Lu || upx) gotUpper = true;
+            if (catx == Lt) gotTitle = true;
+        }
+        if (gotLower && !gotUpper && !gotTitle) return Ll;
+        if (!gotLower && gotUpper && !gotTitle) return Lu;
+        if (gotLower || gotUpper || gotTitle) return Lt;
+        return cat;
+    }
+}
+    
--- a/tools/unicodetools/com/ibm/text/UCD/DiffPropertyLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/DiffPropertyLister.java
@ -0,0 +1,65 @@
+package com.ibm.text.UCD;
+import java.io.*;
+
+class DiffPropertyLister extends PropertyLister {
+    private UCD oldUCD;
+        
+    public DiffPropertyLister(String oldUCDName, String newUCDName, PrintStream output) {
+        this.output = output;
+        this.ucdData = UCD.make(newUCDName);
+        if (oldUCDName != null) this.oldUCD = UCD.make(oldUCDName);
+    }
+    
+    public byte status (int cp) {
+        return INCLUDE;
+    }
+        
+    public String propertyName(int cp) {
+        return ucdData.getVersion();
+    }
+    
+    /*
+    public String optionalName(int cp) {
+        if ((propMask & 0xFF00) == DECOMPOSITION_TYPE) {
+            return Utility.hex(ucdData.getDecompositionMapping(cp));
+        } else {
+            return "";
+        }
+    }
+    */
+        
+
+    public byte status(int lastCp, int cp) {
+        /*if (cp == 0xFFFF) {
+            System.out.println("# " + Utility.hex(cp));
+        }
+        */
+        return ucdData.isAllocated(cp) && (oldUCD == null || !oldUCD.isAllocated(cp)) ? INCLUDE : EXCLUDE;
+    }
+    
+    public int print() {
+        String status;
+        if (oldUCD != null) {
+            status = "# Differences between " + ucdData.getVersion() + " and " + oldUCD.getVersion();
+        } else {
+            status = "# Allocated as of " + ucdData.getVersion();
+        }
+        output.println();
+        output.println();
+        output.println(status);
+        output.println();
+        System.out.println(status);
+        int count = super.print();
+        output.println();
+        if (oldUCD != null) {
+            output.println("# Total " + count + " new code points allocated in " + ucdData.getVersion());
+        } else {
+            output.println("# Total " + count + " code points allocated in " + ucdData.getVersion());
+        }
+        
+        output.println();
+        return count;
+    }
+        
+}
+    
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateCaseFolding.java
@ -0,0 +1,342 @@
+package com.ibm.text.UCD;
+
+import java.util.*;
+import java.io.*;
+
+import com.ibm.text.utility.*;
+
+public class GenerateCaseFolding implements UCD_Types {
+    public static boolean DEBUG = false;
+    public static UCD ucd = UCD.make("310");
+    
+    public static void main(String[] args) throws java.io.IOException {
+        makeCaseFold();
+        //getAge();
+    }
+    
+    public static void makeCaseFold() throws java.io.IOException {
+        System.out.println("Making Full Data");
+        Map fullData = getCaseFolding(true);
+        System.out.println("Making Simple Data");
+        Map simpleData = getCaseFolding(false);
+        // write the data
+        
+        System.out.println("Writing");
+        PrintWriter out = new PrintWriter(
+            new BufferedWriter(
+            new OutputStreamWriter(
+                new FileOutputStream("CaseFoldingSample.txt"),
+                "UTF8"),
+            4*1024));
+
+        for (int ch = 0; ch < 0x10FFFF; ++ch) {
+            String rFull = (String)fullData.get(UTF32.valueOf32(ch));
+            String rSimple = (String)simpleData.get(UTF32.valueOf32(ch));
+            if (rFull == null && rSimple == null) continue;
+            if (rFull != null && rFull.equals(rSimple)) {
+                String type = "C";
+                if (ch == 0x130 || ch == 0x131) type = "I";
+                drawLine(out, ch, type, rFull);
+            } else {
+                if (rFull != null) {
+                    drawLine(out, ch, "F", rFull);
+                }
+                if (rSimple != null) {
+                    drawLine(out, ch, "S", rSimple);
+                }
+            }
+        }
+        out.close();
+    }
+    
+    static void drawLine(PrintWriter out, int ch, String type, String result) {
+        out.println(Utility.hex(ch) 
+            + "; " + type + 
+            "; " + Utility.hex(result, " ") +  
+            "; # " + ucd.getName(ch));
+    }
+    
+    
+    static Map getCaseFolding(boolean full) throws java.io.IOException {
+        Map data = new TreeMap();
+        Map repChar = new TreeMap();
+        //String option = "";
+        
+        // get the equivalence classes
+        
+        for (int ch = 0; ch < 0x10FFFF; ++ch) {
+            if ((ch & 0x3FF) == 0) System.out.println(Utility.hex(ch));
+            if (!ucd.isRepresented(ch)) continue;
+            getClosure(ch, data, full);
+        }
+        
+        // get the representative characters
+        
+        Iterator it = data.keySet().iterator();
+        while (it.hasNext()) {
+            String s = (String) it.next();
+            Set set = (Set) data.get(s);
+            String rep = null;
+            int repGood = 0;
+            String dup = null;
+            Iterator it2 = set.iterator();
+            while (it2.hasNext()) {
+                String s2 = (String)it2.next();
+                int s2Good = goodness(s2, full);
+                if (s2Good > repGood) {
+                    rep = s2;
+                    repGood = s2Good;
+                    dup = null;
+                } else if (s2Good == repGood) {
+                    dup = s2;
+                }
+            }
+            if (rep == null) System.err.println("No representative for: " + toString(set));
+            else if (repGood < 128) {
+                System.err.println("Non-optimal!!: " 
+                    + ucd.getName(rep) + ", " + toString(set,true));
+            }
+            it2 = set.iterator();
+            while (it2.hasNext()) {
+                String s2 = (String)it2.next();
+                if (s2.length() == 1 && !s2.equals(rep)) repChar.put(UTF32.getCodePointSubstring(s2,0), rep);
+            }
+        }
+        return repChar;
+    }
+    
+    static int goodness(String s, boolean full) {
+        if (s == null) return 0;
+        int result = s.length();
+        if (s.equals(lower(upper(s, full), full))) result |= 128;
+        if (s.equals(NFC.normalize(s))) result |= 64;
+        return result;
+    }
+
+    
+    static Normalizer NFC = new Normalizer(Normalizer.NFC);
+    /*
+    static HashSet temp = new HashSet();
+    static void normalize(HashSet set) {
+        temp.clear();
+        temp.addAll(set);
+        set.clear();
+        Iterator it = temp.iterator();
+        while (it.hasNext()) {
+            String s = (String) it.next();
+            String s2 = KC.normalize(s);
+            set.add(s);
+            data2.put(s,set);
+            if (!s.equals(s2)) {
+                set.add(s2);
+                data2.put(s2,set);
+                System.err.println("Adding " + Utility.hex(s) + " by " + Utility.hex(s2));
+            }
+        }
+    }
+    */
+        
+            /*
+            String 
+            String lower1 = ucd.getLowercase(ch);
+            String lower2 = ucd.toLowercase(ch,option);
+            
+            char ch2 = ucd.getLowercase(ucd.getUppercase(ch).charAt(0)).charAt(0);
+            //String lower1 = String.valueOf(ucd.getLowercase(ch));
+            //String lower = ucd.toLowercase(ch2,option);
+            String upper = ucd.toUppercase(ch2,option);
+            String lowerUpper = ucd.toLowercase(upper,option);
+            //String title = ucd.toTitlecase(ch2,option);
+            //String lowerTitle = ucd.toLowercase(upper,option);
+            
+            if (ch != ch2 || lowerUpper.length() != 1 || ch != lowerUpper.charAt(0)) { // 
+                output.println(Utility.hex(ch) 
+                    + "; " + (lowerUpper.equals(lower1) ? "L" : lowerUpper.equals(lower2) ? "S" : "E")
+                    + "; " + Utility.hex(lowerUpper," ")
+                    + ";\t#" + ucd.getName(ch)
+                    );
+                //if (!lowerUpper.equals(lower)) {
+                //    output.println("Warning1: " + Utility.hex(lower) + " " + ucd.getName(lower));
+                //}
+                //if (!lowerUpper.equals(lowerTitle)) {
+                //    output.println("Warning2: " + Utility.hex(lowerTitle) + " " + ucd.getName(lowerTitle));
+                //}
+            }
+            */
+        
+    static void getClosure(int ch, Map data, boolean full) {
+        String charStr = UTF32.valueOf32(ch);
+        String lowerStr = lower(charStr, full);
+        String titleStr = title(charStr, full);
+        String upperStr = upper(charStr, full);
+        if (charStr.equals(lowerStr) && charStr.equals(upperStr) && charStr.equals(titleStr)) return;
+        if (DEBUG) System.err.println("Closure for " + Utility.hex(ch));
+        
+        // make new set
+        Set set = new TreeSet();
+        set.add(charStr);
+        data.put(charStr, set);
+        
+        // add cases to get started
+        add(set, lowerStr, data);
+        add(set, upperStr, data);
+        add(set, titleStr, data);
+        
+        // close it
+        main:
+        while (true) {
+            Iterator it = set.iterator();
+            while (it.hasNext()) {
+                String s = (String) it.next();
+                // do funny stuff since we can't modify set while iterating
+                //if (add(set, NFC.normalize(s), data)) continue main;
+                if (add(set, lower(s, full), data)) continue main;
+                if (add(set, title(s, full), data)) continue main;
+                if (add(set, upper(s, full), data)) continue main;
+            }
+            break;
+        }
+    }
+    
+    static String lower(String s, boolean full) {
+        String result = lower2(s,full);
+        return result.replace('\u03C2', '\u03C3'); // HACK for lower
+    }
+    
+    // These functions are no longer necessary, since UCD is parameterized,
+    // but it's not worth changing
+    
+    static String lower2(String s, boolean full) {
+        if (!full) {
+            if (s.length() != 1) return s;
+            return ucd.getCase(UTF32.char32At(s,0), SIMPLE, LOWER);
+        }
+        return ucd.getCase(s, FULL, LOWER);
+    }
+    
+    static String upper(String s, boolean full) {
+        if (!full) {
+            if (s.length() != 1) return s;
+            return ucd.getCase(UTF32.char32At(s,0), FULL, UPPER);
+        }
+        return ucd.getCase(s, SIMPLE, UPPER);
+    }
+    
+    static String title(String s, boolean full) {
+        if (!full) {
+            if (s.length() != 1) return s;
+            return ucd.getCase(UTF32.char32At(s,0), FULL, TITLE);
+        }
+        return ucd.getCase(s, SIMPLE, TITLE);
+    }
+    
+    static boolean add(Set set, String s, Map data) {
+        if (set.contains(s)) return false;
+        set.add(s);
+        if (DEBUG) System.err.println("adding: " + toString(set));
+        Set other = (Set) data.get(s);
+        if (other != null && other != set) { // merge
+            // make all the items in set point to merged set
+            Iterator it = other.iterator();
+            while (it.hasNext()) {
+                data.put(it.next(), set);
+            }
+            set.addAll(other);
+        }
+        if (DEBUG) System.err.println("done adding: " + toString(set));
+        return true;
+    }
+    
+    static String toString(Set set) {
+        String result = "{";
+        Iterator it2 = set.iterator();
+        boolean first = true;
+        while (it2.hasNext()) {
+            String s2 = (String) it2.next();
+            if (!first) result += ", ";
+            first = false;
+            result += Utility.hex(s2, " ");
+        }
+        return result + "}";
+    }
+    
+    static String toString(Set set, boolean t) {
+        String result = "{";
+        Iterator it2 = set.iterator();
+        boolean first = true;
+        while (it2.hasNext()) {
+            String s2 = (String) it2.next();
+            if (!first) result += ", ";
+            first = false;
+            result += ucd.getName(s2);
+        }
+        return result + "}";
+    }
+    
+    static final void getAge() throws IOException {
+        PrintStream log = new PrintStream(
+            new BufferedOutputStream (
+            new FileOutputStream("UnicodeAge.txt"),
+            4*1024));
+        try {
+            log.println("# Derived file showing when various code points were allocated in Unicode");
+            log.println("# author: M. Davis");
+            log.println("# generated: " + new Date());
+            log.println("# Notes:");
+            log.println("# - The old Hangul Syllables (removed from 2.0) are not included in the 110 listing.");
+            log.println("# - The supplementary private use code points, although allocated earlier,");
+            log.println("#   were NOT specifically listed in the UCD until 3.0.1, and are not included until then.");
+            new DiffPropertyLister(null, "110", log).print();
+            new DiffPropertyLister("110", "200", log).print();
+            new DiffPropertyLister("200", "210", log).print();
+            new DiffPropertyLister("210", "300", log).print();
+            new DiffPropertyLister("300", "310", log).print();
+            /*
+            printDiff("110", "200");
+	        UnicodeSet u11 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-1.1.txt", false);
+	        UnicodeSet u20 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-2.0.txt", false);
+	        UnicodeSet u21 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-2.1.txt", false);
+	        UnicodeSet u30 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-3.0.txt", false);
+	        UnicodeSet u31 = fromFile(BASE_DIR + "UnicodeData\\Versions\\UnicodeData-3.1.txt", false);
+	        
+            log.println();
+            log.println("# Code points assigned in Unicode 1.1 (minus Hangul Syllables): " 
+                + n.format(u11.count()));
+            log.println();
+            u11.print(log, false, false, "1.1");
+            
+            UnicodeSet u20m = new UnicodeSet(u20).remove(u11);
+            log.println();
+            log.println("# Code points assigned in Unicode 2.0 (minus Unicode 1.1): " 
+                + n.format(u20m.count()));
+            log.println();
+            u20m.print(log, false, false, "2.0");
+
+            UnicodeSet u21m = new UnicodeSet(u21).remove(u20);
+            log.println();
+            log.println("# Code points assigned in Unicode 2.1 (minus Unicode 2.0): " 
+                + n.format(u21m.count()));
+            log.println();
+            u21m.print(log, false, false, "2.1");
+
+            UnicodeSet u30m = new UnicodeSet(u30).remove(u21);
+            log.println();
+            log.println("# Code points assigned in Unicode 3.0 (minus Unicode 2.1): " 
+                + n.format(u30m.count()));
+            log.println();
+            u30m.print(log, false, false, "3.0");
+
+            UnicodeSet u31m = new UnicodeSet(u31).remove(u30);
+            log.println();
+            log.println("# Code points assigned in Unicode 3.1 (minus Unicode 3.0): " 
+                + n.format(u31m.count()));
+            log.println();
+            u31m.print(log, false, false, "3.1");
+            */
+        } finally {
+            if (log != null) log.close();
+        }
+        
+    }
+    
+}
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateData.java
@ -0,0 +1,667 @@
+package com.ibm.text.UCD;
+
+import java.util.*;
+import java.io.*;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+
+import com.ibm.text.utility.*;
+
+public class GenerateData implements UCD_Types {
+    
+    public static void main (String[] args) throws IOException {
+        System.out.println("START");
+        ucd = UCD.make();
+        System.out.println("Loaded UCD " + ucd.getVersion() + " " + (new Date(ucd.getDate())));
+        String version = ucd.getVersion();
+        
+        for (int i = 0; i < args.length; ++i) {
+            String arg = args[i];
+            if (arg.charAt(0) == '#') return; // skip rest of line
+            int mask = 0;
+            
+            Utility.fixDot();
+            System.out.println("Argument: " + args[i]);
+            
+            if (arg.equalsIgnoreCase("version")) {
+                version = args[++i];
+                ucd = UCD.make(version);
+            } else if (arg.equalsIgnoreCase("partition")) {
+                partitionProperties();
+            } else if (arg.equalsIgnoreCase("list")) {
+                listProperties();
+            } else if (arg.equalsIgnoreCase("diff")) {
+                listDifferences();
+            } else if (arg.equalsIgnoreCase("DerivedBidiClass")) {
+                generateVerticalSlice(BIDI_CLASS, BIDI_CLASS+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
+                    "DerivedBidiClass-" + version );
+            } else if (arg.equalsIgnoreCase("DerivedNormalizationProperties")) {
+                mask = Utility.setBits(0, DerivedPropertyLister.FC_NFKC_Closure, DerivedPropertyLister.ExpandsOnNFKC);
+                mask = Utility.clearBit(mask, DerivedPropertyLister.FullCompInclusion);       
+                generateDerived(mask, HEADER_DERIVED, "DerivedNormalizationProperties-" + version );
+            } else if (arg.equalsIgnoreCase("DerivedEastAsianWidth")) {
+                generateVerticalSlice(EAST_ASIAN_WIDTH, EAST_ASIAN_WIDTH+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
+                    "DerivedEastAsianWidth-" + version );
+            } else if (arg.equalsIgnoreCase("DerivedGeneralCategory")) {
+                generateVerticalSlice(CATEGORY, CATEGORY+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED, 
+                    "DerivedGeneralCategory-" + version );
+            } else if (arg.equalsIgnoreCase("DerivedCombiningClass")) {
+                generateVerticalSlice(COMBINING_CLASS, COMBINING_CLASS+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
+                    "DerivedCombiningClass-" + version );
+            } else if (arg.equalsIgnoreCase("DerivedDecompositionType")) {
+                generateVerticalSlice(DECOMPOSITION_TYPE, DECOMPOSITION_TYPE+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
+                    "DerivedDecompositionType-" + version );
+            } else if (arg.equalsIgnoreCase("DerivedNumericType")) {
+                generateVerticalSlice(NUMERIC_TYPE, NUMERIC_TYPE+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
+                    "DerivedNumericType-" + version );
+            } else if (arg.equalsIgnoreCase("DerivedEastAsianWidth")) {
+                generateVerticalSlice(EAST_ASIAN_WIDTH, EAST_ASIAN_WIDTH+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
+                    "DerivedEastAsianWidth-" + version );
+            } else if (arg.equalsIgnoreCase("DerivedJoiningType")) {
+                generateVerticalSlice(JOINING_TYPE, JOINING_TYPE+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
+                    "DerivedJoiningType-" + version );
+            } else if (arg.equalsIgnoreCase("DerivedJoiningGroup")) {
+                generateVerticalSlice(JOINING_GROUP, JOINING_GROUP+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
+                    "DerivedJoiningGroup-" + version );
+            } else if (arg.equalsIgnoreCase("DerivedBinaryProperties")) {
+                generateVerticalSlice(BINARY_PROPERTIES, BINARY_PROPERTIES+1, KEEP_SPECIAL, HEADER_DERIVED,
+                    "DerivedBinaryProperties-" + version );
+            } else if (arg.equalsIgnoreCase("DerivedNumericValues")) {
+                generateVerticalSlice(LIMIT_ENUM, LIMIT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
+                    "DerivedNumericValues-" + version );
+            } else if (arg.equalsIgnoreCase("DerivedCoreProperties")) {
+                mask = Utility.setBits(0, DerivedPropertyLister.PropMath, DerivedPropertyLister.Mod_ID_Continue_NO_Cf);
+                generateDerived(mask, HEADER_DERIVED, "DerivedCoreProperties-" + version );
+            } else if (arg.equalsIgnoreCase("DerivedLineBreak")) {
+                generateVerticalSlice(LINE_BREAK, LINE_BREAK+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
+                    "DerivedLineBreak-" + version );
+            } else if (arg.equalsIgnoreCase("Scripts")) {
+                generateVerticalSlice(SCRIPT+1, SCRIPT + NEXT_ENUM, KEEP_SPECIAL, HEADER_SCRIPTS, "Scripts-");
+            } else if (arg.equalsIgnoreCase("PropList")) {
+                generateVerticalSlice(BINARY_PROPERTIES + White_space, BINARY_PROPERTIES + Noncharacter_Code_Point + 1,
+                        KEEP_SPECIAL, HEADER_EXTEND, "PropList-" + version);
+            } else if (arg.equalsIgnoreCase("AllBinary")) {
+                generateVerticalSlice(BINARY_PROPERTIES, BINARY_PROPERTIES + NEXT_ENUM,
+                        KEEP_SPECIAL, HEADER_EXTEND, "AllBinary-" + version);
+            } else if (arg.equalsIgnoreCase("NormalizationTest")) {
+                writeNormalizerTestSuite("NormalizationTest-" + version + ".txt" );
+            } else if (arg.equalsIgnoreCase("generateCompExclusions")) {
+                generateCompExclusions();
+            }else {
+                System.out.println(" ! Unknown option -- must be one of the following (case-insensitive)");
+                System.out.println(" ! generateCompExclusions,...");
+            }
+            
+            
+            //checkHoffman("\u05B8\u05B9\u05B1\u0591\u05C3\u05B0\u05AC\u059F");
+            //checkHoffman("\u0592\u05B7\u05BC\u05A5\u05B0\u05C0\u05C4\u05AD");
+            
+            
+                //generateDerived(Utility.setBits(0, DerivedPropertyLister.PropMath, DerivedPropertyLister.Mod_ID_Continue_NO_Cf), 
+                //    HEADER_DERIVED, "DerivedPropData2-" + version );
+            //generateVerticalSlice(SCRIPT, SCRIPT+1, KEEP_SPECIAL, "ScriptCommon-" + version );
+            //listStrings("LowerCase-" + version , 0,0);
+            //generateVerticalSlice(0, LIMIT_ENUM, SKIP_SPECIAL, PROPLIST1, "DerivedPropData1-" + version );
+            
+            // AGE stuff
+            //UCD ucd = UCD.make();
+            //System.out.println(ucd.getAgeID(0x61));
+            //System.out.println(ucd.getAgeID(0x2FA1D));
+            
+            //
+        }
+        System.out.println("END");
+    }
+    
+   static Normalizer nfkc = new Normalizer(Normalizer.NFKC);
+        
+    public static void checkHoffman(String test) {
+        String result = nfkc.normalize(test);
+        System.out.println(Utility.hex(test) + " => " + Utility.hex(result));
+        System.out.println();
+        show(test, 0);
+        System.out.println();
+        show(result, 0);
+    }
+    
+    public static void show(String s, int indent) {
+        int cp;
+        for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
+            cp = UTF32.char32At(s, i);
+            String cc = " " + ucd.getCombiningClass(cp);
+            cc = Utility.repeat(" ", 4 - cc.length()) + cc;
+            System.out.println(Utility.repeat(" ", indent) + ucd.getCode(cp) + cc + " " + ucd.getName(cp));
+            String decomp = nfkc.normalize(cp);
+            if (!decomp.equals(UTF32.valueOf32(cp))) {
+                show(decomp, indent + 4);
+            }
+        }
+    }
+    
+    
+    static DateFormat myDateFormat = new SimpleDateFormat("yyyy-MM-dd' 'HH:mm:ss.S' GMT'");
+    
+    static {
+        myDateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
+    }
+    
+    //Remove "d1" from DerivedJoiningGroup-3.1.0d1.txt type names
+    
+    public static String fixFile(String s) {
+        int len = s.length();
+        if (!s.endsWith(".txt")) return s;
+        if (s.charAt(len-6) != 'd') return s;
+        char c = s.charAt(len-5);
+        if (c < '0' || '9' < c) return s;
+        System.out.println("Fixing File Name");
+        return s.substring(0,len-6) + s.substring(len-4);
+    }
+    
+    static final int HEADER_EXTEND = 0, HEADER_DERIVED = 1, HEADER_SCRIPTS = 2;
+    
+    public static void doHeader(String fileName, PrintStream output, int headerChoice) {
+        output.println("# " + fileName + ".txt");
+        output.println("#");
+        if (headerChoice == HEADER_SCRIPTS) {
+            output.println("# For documentation, see UTR #24: Script Names");
+            output.println("#   http://www.unicode.org/unicode/reports/tr24/");
+        } else if (headerChoice == HEADER_EXTEND) {
+            output.println("# Unicode Character Database: Extended Properties");
+            output.println("# For documentation, see PropList.html");
+        } else {
+            output.println("# Unicode Character Database: Derived Property Data");
+            output.println("# Generated algorithmically from the Unicode Character Database");
+            output.println("# For documentation, see DerivedProperties.html");
+        }
+        output.println("# Date: " + myDateFormat.format(new Date()) + " [MD]");
+        output.println("# Note: Unassigned and Noncharacter codepoints are omitted,");
+        output.println("#       except when listing Noncharacter or Cn.");
+        output.println("# ================================================");
+        output.println();
+    }
+   
+    public static void generateDerived (int bitMask, int headerChoice, String fileName) throws IOException {
+        ucd = UCD.make("310");
+        PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + fileName));
+        doHeader(fileName, output, headerChoice);
+        for (int i = 0; i < 32; ++i) {
+            if ((bitMask & (1<<i)) == 0) continue;
+            if (i >= DerivedPropertyLister.LIMIT) break;
+            System.out.print('.');
+            output.println("# ================================================");
+            output.println();
+            new DerivedPropertyLister(ucd, i, output).print();
+        }
+        output.close();
+    }
+    
+    /*
+    public static void listStrings(String file, int type, int subtype) throws IOException {
+        ucd = UCD.make("310");
+        UCD ucd30 = UCD.make("300");
+        PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + file));
+        
+        for (int i = 0; i < 0x10FFFF; ++i) {
+            if ((i & 0xFFF) == 0) System.out.println("# " + i);
+            if (!ucd.isRepresented(i)) continue;
+            if (ucd30.isRepresented(i)) continue;
+            String string = "";
+            switch(type) {
+                case 0: string = ucd.getSimpleLowercase(i);
+            }
+            if (UTF32.length32(string) == 1 && UTF32.char32At(string,0) == i) continue;
+            output.println(Utility.hex(i) + "; C; " + Utility.hex(string) + "; # " + ucd.getName(i));
+        }
+        output.close();
+    }
+    */
+    
+    public static void generateCompExclusions() throws IOException {
+        PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + "CompositionExclusionsDelta.txt"));
+        new CompLister(output).print();
+        output.close();
+    }
+    
+    static class CompLister extends PropertyLister {
+        UCD oldUCD;
+        int oldLength = 0;
+        
+        public CompLister(PrintStream output) {
+            this.output = output;
+            ucdData = UCD.make("310");
+            oldUCD = UCD.make("300");
+            showOnConsole = true;
+        }
+        public String propertyName(int cp) {
+            return UTF32.length32(ucdData.getDecompositionMapping(cp)) + "";
+        }
+        public byte status(int cp) {
+            if (ucdData.getDecompositionType(cp) == CANONICAL 
+              && oldUCD.getDecompositionType(cp) != CANONICAL) {
+                int temp = oldLength;
+                oldLength = UTF32.length32(ucdData.getDecompositionMapping(cp));
+                if (temp != oldLength) return BREAK;
+                return INCLUDE;
+            }
+            return EXCLUDE;
+        }
+    }
+    
+    public static void partitionProperties() throws IOException {
+        
+        // find properties
+        
+        int count = 0;
+        int[] props = new int[500];
+        for (int i = 1; i < LIMIT_ENUM; ++i) { //   || iType == SCRIPT
+            int iType = i & 0xFF00;
+            if (iType == JOINING_GROUP || iType == AGE || iType == COMBINING_CLASS) continue;
+            if (!MyPropertyLister.isUnifiedBinaryPropertyDefined(ucd, i)) continue;
+            props[count++] = i;
+        }
+        System.out.println("props: " + count);
+        
+        BitSet probe = new BitSet();
+        Map map = new HashMap();
+        int total = 0;
+        for (int cp = 0; cp <= 0x10FFFF; ++cp) {
+            Utility.dot(cp);
+            int cat = ucd.getCategory(cp);
+            if (cat == UNASSIGNED || cat == PRIVATE_USE || cat == SURROGATE) continue;
+            if (!ucd.isAllocated(cp)) continue;
+        
+            for (int i = 0; i < count; ++i) {
+                boolean iProp = MyPropertyLister.getUnifiedBinaryProperty(ucd, cp, props[i]);
+                if (iProp) probe.set(i); else probe.clear(i);
+            }
+            
+            ++total;
+            if (!map.containsKey(probe)) {
+                map.put(probe.clone(), UTF32.valueOf32(cp));
+                Utility.fixDot();
+                System.out.println("Set Size: " + map.size() + ", total: " + total + ", " + ucd.getCodeAndName(cp));
+            }
+        }
+        
+        Utility.fixDot();
+        System.out.println("Set Size: " + map.size());
+    }
+    
+    public static void listDifferences() throws IOException {
+        
+        PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + "PropertyDifferences.txt"));
+        
+        for (int i = 1; i < LIMIT_ENUM; ++i) {
+            int iType = i & 0xFF00;
+            if (iType == JOINING_GROUP || iType == AGE || iType == COMBINING_CLASS || iType == SCRIPT) continue;
+            if (!MyPropertyLister.isUnifiedBinaryPropertyDefined(ucd, i)) continue;
+            String iNameShort = MyPropertyLister.getFullUnifiedBinaryPropertyID(ucd, i, MyPropertyLister.SHORT);
+            String iNameLong = MyPropertyLister.getFullUnifiedBinaryPropertyID(ucd, i, MyPropertyLister.LONG);
+            
+            System.out.println();
+            System.out.println();
+            System.out.println(iNameLong);
+            output.println("#" + iNameLong);
+            
+            int last = -1;
+            for (int j = i+1; j < LIMIT_ENUM; ++j) {
+                int jType = j & 0xFF00;
+                if (jType == JOINING_GROUP || jType == AGE || jType == COMBINING_CLASS || jType == SCRIPT
+                    || (jType == iType && jType != BINARY_PROPERTIES)) continue;
+                if (!MyPropertyLister.isUnifiedBinaryPropertyDefined(ucd, j)) continue;
+
+                if ((j >> 8) != last) {
+                    last = j >> 8;
+                    System.out.println();
+                    System.out.print("\t" + UCD_Names.SHORT_UNIFIED_PROPERTIES[last]);
+                    output.flush();
+                    output.println("#\t" + UCD_Names.SHORT_UNIFIED_PROPERTIES[last]);
+                } else {
+                    System.out.print('.');
+                }
+                System.out.flush();
+                
+                int bothCount = 0, i_jPropCount = 0, j_iPropCount = 0, iCount = 0, jCount = 0;
+                
+                for (int cp = 0; cp <= 0x10FFFF; ++cp) {
+                    int cat = ucd.getCategory(cp);
+                    if (cat == UNASSIGNED || cat == PRIVATE_USE || cat == SURROGATE) continue;
+                    if (!ucd.isAllocated(cp)) continue;
+                    
+                    boolean iProp = MyPropertyLister.getUnifiedBinaryProperty(ucd, cp, i);
+                    boolean jProp = MyPropertyLister.getUnifiedBinaryProperty(ucd, cp, j);
+                    
+                    if (jProp) ++jCount;
+                    if (iProp) {
+                        ++iCount;
+                        if (jProp) ++bothCount;
+                        else ++i_jPropCount;
+                    } else if (jProp) ++j_iPropCount;
+                }
+                if (iCount == 0 || jCount == 0) continue;
+                
+                String jNameShort = MyPropertyLister.getFullUnifiedBinaryPropertyID(ucd, j, MyPropertyLister.SHORT);
+                //String jNameLong = MyPropertyLister.getFullUnifiedBinaryPropertyID(ucd, j, MyPropertyLister.LONG);
+                
+                String rel = bothCount == 0 ? "DISJOINT"
+                    : i_jPropCount == 0 && j_iPropCount == 0 ? "EQUALS"
+                    : i_jPropCount == 0 ? "CONTAINS" // depends on reverse output
+                    : j_iPropCount == 0 ? "CONTAINS"
+                    : "OVERLAPS";
+                    
+                if (j_iPropCount > i_jPropCount) {
+                    // reverse output
+                    output.println(jNameShort + "\t" + iNameShort + "\t" + rel
+                        + "\t" + bothCount + "\t" + j_iPropCount + "\t" + i_jPropCount);
+               } else {                
+                    output.println(iNameShort + "\t" + jNameShort + "\t" + rel
+                        + "\t" + bothCount + "\t" + i_jPropCount + "\t" + j_iPropCount);
+                }
+            }
+        }
+        output.close();
+    }
+    
+    
+    public static void listProperties() {
+        for (int i = 0; i < LIMIT_ENUM; ++i) {
+            int type = i & 0xFF00;
+            if (type == JOINING_GROUP || type == AGE) continue;
+            if (!MyPropertyLister.isUnifiedBinaryPropertyDefined(ucd, i)) continue;
+            String value = MyPropertyLister.getUnifiedBinaryPropertyID(ucd, i, MyPropertyLister.LONG);
+            if (value.length() == 0) value = "none";
+            else if (value.equals("<unused>")) continue;
+            String abbvalue = MyPropertyLister.getUnifiedBinaryPropertyID(ucd, i, MyPropertyLister.SHORT);
+            if (abbvalue.length() == 0) abbvalue = "no";
+            
+            if (type == COMBINING_CLASS) {
+                value = MyPropertyLister.getCombiningName(i);
+                if (value.length() == 0) {
+                    if ((i & 0xFF) == 0) value = "99";
+                    else continue;
+                }
+                abbvalue = value;
+            }
+            
+            String elide = "";
+            if (type == CATEGORY || type == SCRIPT || type == BINARY_PROPERTIES) elide = "\\p{" 
+                + abbvalue
+                + "}";
+            String abb = "";
+            if (type != BINARY_PROPERTIES) abb = "\\p{" 
+                + UCD_Names.ABB_UNIFIED_PROPERTIES[i>>8] 
+                + "="
+                + abbvalue
+                + "}";
+            String norm = "";
+            if (type != BINARY_PROPERTIES) norm = "\\p{" 
+                + UCD_Names.SHORT_UNIFIED_PROPERTIES[i>>8] 
+                + "="
+                + value
+                + "}";
+            System.out.println("<tr><td>" + elide + "</td><td>" + abb + "</td><td>" + norm + "</td></tr>");
+        }
+    }
+    
+    static final byte KEEP_SPECIAL = 0, SKIP_SPECIAL = 1;
+    
+    public static void generateVerticalSlice(int startEnum, int endEnum, byte skipSpecial, 
+            int headerChoice, String file) throws IOException {
+        
+        //System.out.println(ucd.toString(0x1E0A));
+        /*
+        System.out.println(ucd.getData(0xFFFF));
+        System.out.println(ucd.getData(0x100000));
+        System.out.println(ucd.getData(0x100000-1));
+        System.out.println(ucd.getData(0x100000-2));
+        System.out.println(ucd.getData(0x100000-3));
+        if (true) return;
+        String test2 = ucd.getName(0x2A6D6);
+        //*/
+        
+        
+        PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + file + "dX.txt"));
+        doHeader(file, output, headerChoice);
+        int last = -1;
+        for (int i = startEnum; i < endEnum; ++i) {
+            if (!MyPropertyLister.isUnifiedBinaryPropertyDefined(ucd, i)) continue;
+            if (i == DECOMPOSITION_TYPE || i == NUMERIC_TYPE 
+                || i == (BINARY_PROPERTIES | Non_break)
+                || i == (JOINING_TYPE | JT_U)
+                || i == (JOINING_GROUP | NO_SHAPING)
+                ) continue; // skip zero case
+            if (skipSpecial == SKIP_SPECIAL
+                    && i >= (BINARY_PROPERTIES | CompositionExclusion)
+                    && i < (AGE + NEXT_ENUM)) continue;
+            if ((last & 0xFF00) != (i & 0xFF00) && (i <= BINARY_PROPERTIES || i >= SCRIPT)) {
+                output.println();
+                output.println("# ================================================");
+                output.println("# " + UCD_Names.UNIFIED_PROPERTIES[i>>8]);
+                output.println("# ================================================");
+                output.println();
+                System.out.println();
+                System.out.println(UCD_Names.UNIFIED_PROPERTIES[i>>8]);
+                last = i;
+            } else {
+                output.println("# ================================================");
+                output.println();
+            }
+            System.out.print(".");
+            new MyPropertyLister(ucd, i, output).print();        
+        }
+        if (endEnum == LIMIT_ENUM) {
+            output.println();
+                output.println("# ================================================");
+            output.println("# Numeric Values (from UnicodeData.txt, field 6/7/8)");
+                output.println("# ================================================");
+            output.println();
+            System.out.println();
+            System.out.println("@NUMERIC VALUES");
+            
+            Set floatSet = new TreeSet();
+            for (int i = 0; i < 0x10FFFF; ++i) {
+                float nv = ucd.getNumericValue(i);
+                if (Float.isNaN(nv)) continue;
+                floatSet.add(new Float(nv));
+            }
+            Iterator it = floatSet.iterator();
+            while(it.hasNext()) {
+                new MyFloatLister(ucd, ((Float)it.next()).floatValue(), output).print();
+                output.println();
+                System.out.print(".");
+            }
+        }
+        output.close();
+        System.out.println();
+    }
+    
+    static UCD ucd;
+
+    static public Normalizer formC, formD, formKC, formKD;
+    
+    static public void writeNormalizerTestSuite(String fileName) throws IOException {
+        ucd = UCD.make();
+        
+        PrintWriter log = Utility.openPrintWriter(fileName);
+                
+	    formC = new Normalizer(Normalizer.NFC);
+	    formD = new Normalizer(Normalizer.NFD);
+	    formKC = new Normalizer(Normalizer.NFKC);
+	    formKD = new Normalizer(Normalizer.NFKD);
+	    
+        String[] example = new String[256];
+
+        log.println("# " + fixFile(fileName));
+        log.println("#");
+        log.println("# Normalization Test Suite");
+        log.println("# Date: " + myDateFormat.format(new Date()) + " [MD]");
+        log.println("# Format:");
+        log.println("#");
+        log.println("#   Columns (c1, c2,...) are separated by semicolons");
+        log.println("#   Comments are indicated with hash marks");
+        log.println("#");
+        log.println("# CONFORMANCE:");
+        log.println("# 1. The following invariants must be true for all conformant implementations");
+        log.println("#");
+        log.println("#    NFC");
+        log.println("#      c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3)");
+        log.println("#      c4 ==  NFC(c4) ==  NFC(c5)");
+        log.println("#");
+        log.println("#    NFD");
+        log.println("#      c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3)");
+        log.println("#      c5 ==  NFD(c4) ==  NFD(c5");
+        log.println("#");
+        log.println("#    NFKC");
+        log.println("#      c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)");
+        log.println("#");
+        log.println("#    NFKD");
+        log.println("#      c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)");
+        log.println("#");
+        log.println("# 2. For every assigned Unicode 3.1.0 code point X that is not specifically");
+        log.println("#    listed in Part 1, the following invariants must be true for all conformant");
+        log.println("#    implementations:");
+        log.println("#");
+        log.println("#      X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)");
+        
+        System.out.println("Writing Part 1");
+
+        log.println("#");
+        log.println("@Part0 # Specific cases");
+        log.println("#");
+        
+        for (int j = 0; j < testSuiteCases.length; ++j) {
+            writeLine(testSuiteCases[j], log, false);
+        }
+        
+        System.out.println("Writing Part 2");
+        
+        log.println("#");
+        log.println("@Part1 # Character by character test");
+        log.println("# All characters not explicitly occurring in c1 of Part 1 have identical NFC, D, KC, KD forms.");
+        log.println("#");
+        
+        for (int ch = 0; ch < 0x10FFFF; ++ch) {
+            Utility.dot(ch);
+            if (!ucd.isAssigned(ch)) continue;
+            if (ucd.isPUA(ch)) continue;
+            String cc = UTF32.valueOf32(ch);
+            writeLine(cc,log, true);
+        }
+        Utility.fixDot();
+        
+        System.out.println("Finding Examples");
+
+        for (int ch = 0; ch < 0x10FFFF; ++ch) {
+            Utility.dot(ch);
+            if (!ucd.isAssigned(ch)) continue;
+            if (ucd.isPUA(ch)) continue;
+            int cc = ucd.getCombiningClass(ch);
+            if (example[cc] == null) example[cc] = UTF32.valueOf32(ch);
+        }
+        
+        Utility.fixDot();
+        System.out.println("Writing Part 2");
+        
+        log.println("#");
+        log.println("@Part2 # Canonical Order Test");
+        log.println("#");
+
+        for (int ch = 0; ch < 0x10FFFF; ++ch) {
+            
+            Utility.dot(ch);
+            if (!ucd.isAssigned(ch)) continue;
+            if (ucd.isPUA(ch)) continue;
+            short c = ucd.getCombiningClass(ch);
+            if (c == 0) continue;
+            
+            // add character with higher class, same class, lower class
+            
+            String sample = "";
+            for (int i = c+1; i < example.length; ++i) {
+                if (example[i] == null) continue;
+                sample += example[i];
+                break;
+            }
+            sample += example[c];
+            for (int i = c-1; i > 0; --i) {
+                if (example[i] == null) continue;
+                sample += example[i];
+                break;
+            }
+            
+            writeLine("a" + sample + UTF32.valueOf32(ch) + "b", log, false);
+            writeLine("a" + UTF32.valueOf32(ch) + sample + "b", log, false);
+        }
+        Utility.fixDot();
+        log.println("#");
+        log.println("# END OF FILE");
+        log.close();
+    }
+    
+    static void writeLine(String cc, PrintWriter log, boolean check) {
+        String c = formC.normalize(cc);
+        String d = formD.normalize(cc);
+        String kc = formKC.normalize(cc);
+        String kd = formKD.normalize(cc);
+        if (check & cc.equals(c) && cc.equals(d) && cc.equals(kc) && cc.equals(kd)) return;
+        
+        // consistency check
+        String dc = formD.normalize(c);
+        String dkc = formD.normalize(kc);
+        if (!dc.equals(d) || !dkc.equals(kd)) {
+            System.out.println("Danger Will Robinson!");
+            Normalizer.SHOW_PROGRESS = true;
+            d = formD.normalize(cc);
+        }
+        
+        // printout
+        log.println(
+            Utility.hex(cc," ") + ";" + Utility.hex(c," ") + ";" + Utility.hex(d," ") + ";"
+            + Utility.hex(kc," ") + ";" + Utility.hex(kd," ")
+            + "; # (" 
+            + comma(cc) + "; " + comma(c) + "; " + comma(d) + "; " + comma(kc) + "; " + comma(kd) + "; "
+            + ") " + ucd.getName(cc));
+    }
+    
+    static StringBuffer commaResult = new StringBuffer();
+    
+    // not recursive!!!
+    static final String comma(String s) {
+        commaResult.setLength(0);
+        int cp;
+        for (int i = 0; i < s.length(); i += UTF32.count16(i)) {
+            cp = UTF32.char32At(s, i);
+            if (ucd.getCategory(cp) == Mn) commaResult.append('\u25CC');
+            UTF32.append32(commaResult, cp);
+        }
+        return commaResult.toString();
+    }
+    
+    static final String[] testSuiteCases = {
+        "\u1E0A",
+        "\u1E0C",
+        "\u1E0A\u0323",
+        "\u1E0C\u0307",
+        "D\u0307\u0323",
+        "D\u0323\u0307",
+        "\u1E0A\u031B",
+        "\u1E0C\u031B",
+        "\u1E0A\u031B\u0323",
+        "\u1E0C\u031B\u0307",
+        "D\u031B\u0307\u0323",
+        "D\u031B\u0323\u0307",
+        "\u00C8",
+        "\u0112",
+        "E\u0300",
+        "E\u0304",
+        "\u1E14",
+        "\u0112\u0300",
+        "\u1E14\u0304",
+        "E\u0304\u0300",
+        "E\u0300\u0304",
+        "\u05B8\u05B9\u05B1\u0591\u05C3\u05B0\u05AC\u059F",
+        "\u0592\u05B7\u05BC\u05A5\u05B0\u05C0\u05C4\u05AD"
+
+    };
+
+}
--- a/tools/unicodetools/com/ibm/text/UCD/MLStreamWriter.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MLStreamWriter.java
@ -0,0 +1,314 @@
+package com.ibm.text.utility;
+
+import java.io.*;
+import java.util.*;
+import com.ibm.text.UCD.*;
+
+public class MLStreamWriter extends Writer {
+    public static final String copyright =
+      "Copyright (C) 2000, IBM Corp. and others. All Rights Reserved.";
+
+    public MLStreamWriter (PrintWriter output, boolean HTML) {
+        out = output;
+        isHTML = HTML;
+    }
+
+    public MLStreamWriter (PrintWriter output) {
+        this(output,true);
+    }
+
+    public MLStreamWriter el(String elementName) {
+        closeIfOpen();
+        print('<', AFTER);
+        print(elementName, elementName.equals("!--") ? AFTER+FORCE : AFTER);
+        stack.add(elementName);
+        inElement = true;
+        return this;
+    }
+
+    private MLStreamWriter closeIfOpen() {
+        if (inElement && !"!--".equals(stack.get(stack.size()-1))) {
+            print('>',BEFORE+FORCE);
+        }
+        inElement = false;
+        return this;
+    }
+
+    final public MLStreamWriter cel(String elementName) {
+        return cl().tx(elementName);
+    }
+
+    public MLStreamWriter at(String attributeName, String attributeValue) {
+        if (!inElement) {
+            throw new IllegalArgumentException("attribute \"" + attributeName + "\" not in element");
+        }
+        print(' ', BOTH);
+        print(attributeName, AFTER);
+        print('=', AFTER);
+        print('"');
+        print(quoted(attributeValue));
+        print('"', AFTER);
+        return this;
+    }
+
+    public MLStreamWriter at(String attributeName, int value) {
+        return at(attributeName, String.valueOf(value));
+    }
+
+    public MLStreamWriter CR() {
+        closeIfOpen();
+        out.println();
+        return this;
+    }
+
+    /*public MLStreamWriter comment() {
+        closeIfOpen();
+        print("<!--");
+        CR();
+        return this;
+    }
+
+    public MLStreamWriter endComment() {
+        print("-->");
+        return this;
+    }
+    */
+
+    public MLStreamWriter tx(String text) {
+        closeIfOpen();
+        print(quoted(text));
+        return this;
+    }
+
+    final public MLStreamWriter tx(char text) {
+        return tx(String.valueOf(text));
+    }
+
+    final public MLStreamWriter tx(int text) {
+        return tx(String.valueOf(text));
+    }
+
+    final public MLStreamWriter tx16(String text) {
+        return tx(hex(text));
+    }
+
+    final public MLStreamWriter tx16(char text) {
+        return tx(hex(text));
+    }
+
+    final public MLStreamWriter tx16(int text) {
+        return tx(hex(text));
+    }
+
+    public MLStreamWriter cl(String closingElement) {
+        closeIfOpen();
+        String lastElement = (String)stack.remove(stack.size()-1);
+        if (closingElement != null && !closingElement.equals(lastElement)) {
+            throw new IllegalArgumentException("mismatch when closing \"" + closingElement
+                + "\", current active element is \"" + lastElement + "\"");
+        }
+        if (lastElement.equals("!--")) {// hack for XML/HTML
+            print("-->",BEFORE+FORCE);
+        } else {
+            print("</");
+            print(lastElement);
+            print('>',BEFORE);
+        }
+        return this;
+    }
+
+    final public MLStreamWriter cl() {
+        return cl(null);
+    }
+
+    public MLStreamWriter closeAllElements() {
+        for (int i = stack.size()-1; i >= 0; --i) {
+            cl(null);
+        }
+        return this;
+    }
+
+    // stream stuff
+
+    public void write(char[] source, int start, int len) {
+        closeIfOpen();
+        // later make more efficient!!
+        out.print(quoted(new String(source, start, len)));
+    }
+
+    public void close() {
+        closeAllElements();
+        out.close();
+    }
+
+    public void flush() {
+        out.flush();
+    }
+
+    // Utility methods
+
+    final public MLStreamWriter cell(String ch, String type, String codepoint, String cat) {
+        if (codepoint == null) codepoint = ch;
+        int dotpos = type.indexOf('.');
+        if (dotpos == -1) el(type);
+        else {
+            el(type.substring(0,dotpos));
+            at("class",type.substring(dotpos+1));
+        }
+        /*
+        if (color == -1) {
+            el("th");
+        } else {
+            el("td");
+            if (color != 0xFFFFFF) {
+                at("bgcolor","#"+hex(color,6));
+            }
+        }
+        */
+        tx(ch).el("br").el("tt").tx16(codepoint);
+        if (cat != null) tx(" ").tx(cat);
+        cl().cl().cl();
+        return this;
+    }
+
+    final public MLStreamWriter cell(String ch) {
+        return cell(ch,"td",null,null);
+    }
+
+    final public MLStreamWriter cell(String ch, String type) {
+        return cell(ch,type,null,null);
+    }
+
+    final public MLStreamWriter cell(String ch, String type, String codepoint) {
+        return cell(ch,type,codepoint,null);
+    }
+
+    static public String hex(int i, int width) {
+        String result = Long.toString(i & 0xFFFFFFFFL, 16).toUpperCase();
+        return "00000000".substring(result.length(),width) + result;
+    }
+
+    /**
+     * Supplies a zero-padded hex representation of an integer (without 0x)
+     */
+    static public String hex(int i) {
+        return hex(i,8);
+    }
+
+    /**
+     * Supplies a zero-padded hex representation of a Unicode character (without 0x, \\u)
+     */
+    static public String hex(char i) {
+        return hex(i,4);
+    }
+
+    /**
+     * Supplies a zero-padded hex representation of a Unicode String (without 0x, \\u)
+     *@param sep can be used to give a sequence, e.g. hex("ab", ",") gives "0061,0062"
+     */
+    static public String hex(String s, String sep) {
+        StringBuffer result = new StringBuffer();
+        for (int i = 0; i < s.length(); ++i) {
+            if (i != 0) result.append(sep);
+            result.append(hex(s.charAt(i)));
+        }
+        return result.toString();
+    }
+
+    static public String hex(String s) {
+        return hex(s," ");
+    }
+
+
+    public void author(String name, String url) {
+        el("font").at("size","-3").tx("[").el("a").at("href",url).tx(name).cl("a").el("script").el("!--");
+        tx("document.write(', ', document.lastModified);");
+        cl("!--").cl("script").tx("]").cl("font");
+    }
+
+    // ================== PRIVATES =================
+
+    PrintWriter out;
+    boolean isHTML;
+    ArrayList stack = new ArrayList();
+    boolean inElement = false;
+    Normalizer formC = new Normalizer(Normalizer.NFC);
+    int len;
+    int maxLineLength = 60;
+    // later, add better line end management, indenting
+
+    static final int NONE=0, BEFORE=1, AFTER=2, BOTH=3, FORCE = 4; // chosen for bits!!
+
+    final void print(String s) {
+        print(s,NONE);
+    }
+
+    final void print(char c) {
+        print(c,NONE);
+    }
+
+    final void print(String s, int doesBreak) {
+        if ((doesBreak & BEFORE) != 0) tryBreak(s.length(), doesBreak);
+        len += s.length();
+        out.print(s);
+        if ((doesBreak & AFTER) != 0) tryBreak(0, doesBreak);
+    }
+
+    final void print(char c, int doesBreak) {
+        if ((doesBreak & BEFORE) != 0) tryBreak(1, doesBreak);
+        ++len;
+        out.print(c);
+        if ((doesBreak & AFTER) != 0) tryBreak(0, doesBreak);
+    }
+
+    void tryBreak(int toAdd, int doesBreak) {
+        if ((doesBreak & FORCE) != 0 || (len + toAdd) > maxLineLength) {
+            out.println();
+            len = stack.size();
+            for (int i = 0; i < len; ++i) out.print(' ');
+        }
+    }
+
+    public String quoted(String source) {
+        source = formC.normalize(source);
+        StringBuffer result = new StringBuffer();
+        for (int i = 0; i < source.length(); ++i) {
+            char ch = source.charAt(i);
+            switch(ch) {
+            case '\'':
+                if (!isHTML) {
+                    result.append("&apos;");
+                } else {
+                    result.append(ch);
+                }
+                break;
+            case '\"':
+                result.append("&quot;");
+                break;
+            case '<':
+                result.append("&lt;");
+                break;
+            case '&':
+                result.append("&amp;");
+                break;
+            case '>':
+                result.append("&gt;");
+                break;
+            case '\n': case '\r': case '\t':
+                result.append(ch);
+                break;
+            default: if (ch < ' ' // do surrogates later
+                || ch >= '\u007F' && ch <= '\u009F'
+                || ch >= '\uD800' && ch <= '\uDFFF'
+                || ch >= '\uFFFE') {
+                    result.append('\uFFFD');
+                } else {
+                    result.append(ch);
+                }
+                break;
+            }
+        }
+        return result.toString();
+    }
+
+}
--- a/tools/unicodetools/com/ibm/text/UCD/MyFloatLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MyFloatLister.java
@ -0,0 +1,31 @@
+package com.ibm.text.UCD;
+import java.io.*;
+
+class MyFloatLister extends PropertyLister {
+    private float propMask;
+        
+    public MyFloatLister(UCD ucd, float f, PrintStream output) {
+        this.propMask = f;
+        this.output = output;
+        this.ucdData = ucd;
+    }
+        
+    public String propertyName(int cp) {
+        return ""+ucdData.getNumericValue(cp);
+    }
+        
+    public String optionalName(int cp) {
+        return ucdData.getNumericTypeID(cp);
+    }
+        
+    public byte status(int cp) {
+        //if ((cp & 0xFFF) == 0) System.out.println("# " + Utility.hex(cp));
+        if (!ucdData.isRepresented(cp)) {
+            if (ucdData.mapToRepresentative(cp, false) != cp) return PropertyLister.CONTINUE;
+            return PropertyLister.CONTINUE;
+        }
+        if (ucdData.getCategory(cp) == Cn) return PropertyLister.CONTINUE;
+        return ucdData.getNumericValue(cp) == propMask ? INCLUDE : EXCLUDE;
+    }
+}
+    
--- a/tools/unicodetools/com/ibm/text/UCD/MyPropertyLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MyPropertyLister.java
@ -0,0 +1,270 @@
+package com.ibm.text.UCD;
+import java.io.*;
+
+import com.ibm.text.utility.*;
+
+final class MyPropertyLister extends PropertyLister {
+    
+    static final boolean BRIDGE = false;
+    
+    private int propMask;
+        
+    public MyPropertyLister(UCD ucd, int propMask, PrintStream output) {
+        this.propMask = propMask;
+        this.output = output;
+        this.ucdData = ucd;
+        if (propMask < COMBINING_CLASS) usePropertyComment = false; // skip gen cat
+    }
+    
+    static String getCombiningName (int propMask) {
+        String s = "";
+        switch (propMask & 0xFF) {
+            case 0: s = "NotReordered"; break;
+            case 1: s = "Overlay"; break;
+            case 7: s = "Nukta"; break;
+            case 8: s = "KanaVoicing"; break;
+            case 9: s = "Virama"; break;
+            case 202: s = "AttachedBelowLeft"; break;
+            case 204: s = "AttachedBelow"; break;
+            case 206: s = "AttachedBelowRight"; break;
+            case 208: s = "AttachedLeft"; break;
+            case 210: s = "AttachedRight"; break;
+            case 212: s = "AttachedAboveLeft"; break;
+            case 214: s = "AttachedAbove"; break;
+            case 216: s =  "AttachedAboveRight"; break;
+            case 218: s =  "BelowLeft"; break;
+            case 220: s =  "Below"; break;
+            case 222: s =  "BelowRight"; break;
+            case 224: s =  "Left"; break;
+            case 226: s =  "Right"; break;
+            case 228: s =  "AboveLeft"; break;
+            case 230: s =  "Above"; break;
+            case 232: s =  "AboveRight"; break;
+            case 233: s =  "DoubleBelow"; break;
+            case 234: s =  "DoubleAbove"; break;
+            case 240: s =  "IotaSubscript"; break;
+        }
+        return s;
+    }
+    
+    public String headerString() {
+        int main = (propMask & 0xFF00);
+        if (main == COMBINING_CLASS) {
+            String s = getCombiningName(propMask);
+            if (s.length() == 0) s = "Other Combining Class";
+            return "# " + s;
+        } else if (main == BINARY_PROPERTIES) {
+            return "# Binary Property";
+        } else if (main == JOINING_GROUP) {
+            return "";
+        } else {
+            String shortID = getUnifiedBinaryPropertyID(ucdData, propMask, SHORT);
+            String longID = getUnifiedBinaryPropertyID(ucdData, propMask, LONG);
+            return "# " + shortID + (shortID.equals(longID) ? "" : "\t(" + longID + ")");
+        }
+    }
+        
+    public String propertyName(int cp) {
+        return getUnifiedBinaryPropertyID(propMask);
+    }
+    
+    public String optionalComment(int cp) {
+        if (propMask < COMBINING_CLASS) return ""; // skip gen cat
+        int cat = ucdData.getCategory(cp);
+        if (cat == Lt || cat == Ll || cat == Lu) return "L&";
+        return ucdData.getCategoryID(cp);
+    }
+    
+    /*
+    public String optionalName(int cp) {
+        if ((propMask & 0xFF00) == DECOMPOSITION_TYPE) {
+            return Utility.hex(ucdData.getDecompositionMapping(cp));
+        } else {
+            return "";
+        }
+    }
+    */
+        
+    public byte status(int cp) {
+        //if (cp == 0xFFFF) {
+        //    System.out.println("# " + Utility.hex(cp));
+        //}
+        byte cat = ucdData.getCategory(cp);
+        //if (cp == 0x0385) {
+        //    System.out.println(Utility.hex(firstRealCp));
+        //}
+        
+        if (cat == Cn
+            && propMask != (BINARY_PROPERTIES | Noncharacter_Code_Point)
+            && propMask != (BINARY_PROPERTIES | Reserved_Cf_Code_Point)
+            && propMask != (CATEGORY | Cn)) {
+            if (BRIDGE) return CONTINUE;
+            else return EXCLUDE;
+        }
+        
+        boolean inSet = getUnifiedBinaryProperty(cp, propMask);
+        /*
+        if (cp >= 0x1D400 && cp <= 0x1D7C9 && cat != Cn) {
+            if (propMask == (SCRIPT | LATIN_SCRIPT)) inSet = cp <= 0x1D6A3;
+            else if (propMask == (SCRIPT | GREEK_SCRIPT)) inSet = cp > 0x1D6A3;
+        }
+        */
+/* HACK
+1D400;MATHEMATICAL BOLD CAPITAL A;Lu;0;L;<font> 0041;;;;N;;;;;
+1D6A3;MATHEMATICAL MONOSPACE SMALL Z;Ll;0;L;<font> 007A;;;;N;;;;;
+1D6A8;MATHEMATICAL BOLD CAPITAL ALPHA;Lu;0;L;<font> 0391;;;;N;;;;;
+1D7C9;MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL;Ll;0;L;<font> 03D6;;;;N;;;;;
+*/
+
+        if (!inSet) return EXCLUDE;
+        return INCLUDE;
+    }
+    
+    /**
+     * @return unified property number
+     */
+    public static boolean isUnifiedBinaryPropertyDefined(UCD ucd, int propMask) {
+        int enum = propMask >> 8;
+        propMask &= 0xFF;
+        switch (enum) {
+          case CATEGORY>>8: return propMask != UNUSED_CATEGORY && propMask < LIMIT_CATEGORY;
+          case COMBINING_CLASS>>8: return ucd.isCombiningClassUsed((byte)propMask);
+          case BIDI_CLASS>>8: return propMask != BIDI_UNUSED && propMask < LIMIT_BIDI_CLASS;
+          case DECOMPOSITION_TYPE>>8: return propMask < LIMIT_DECOMPOSITION_TYPE;
+          case NUMERIC_TYPE>>8: return propMask < LIMIT_NUMERIC_TYPE;
+          case EAST_ASIAN_WIDTH>>8: return propMask < LIMIT_EAST_ASIAN_WIDTH;
+          case LINE_BREAK>>8: return propMask < LIMIT_LINE_BREAK;
+          case JOINING_TYPE>>8: return propMask < LIMIT_JOINING_TYPE;
+          case JOINING_GROUP>>8: return propMask < LIMIT_JOINING_GROUP;
+          case BINARY_PROPERTIES>>8: return propMask < LIMIT_BINARY_PROPERTIES;
+          case SCRIPT>>8: return propMask != UNUSED_SCRIPT && propMask < LIMIT_SCRIPT;
+          case AGE>>8: return propMask < LIMIT_AGE;
+          default: return false;
+        }
+    }    
+    
+    public boolean getUnifiedBinaryProperty(int cp, int propMask) {
+        return getUnifiedBinaryProperty(ucdData, cp, propMask);
+    }
+        
+    static public boolean getUnifiedBinaryProperty(UCD ucd, int cp, int propMask) {
+        int enum = propMask >> 8;
+        propMask &= 0xFF;
+        switch (enum) {
+          case CATEGORY>>8: if (propMask >= LIMIT_CATEGORY) break;
+            return ucd.getCategory(cp) == propMask;
+          case COMBINING_CLASS>>8: if (propMask >= LIMIT_COMBINING_CLASS) break;
+            return ucd.getCombiningClass(cp) == propMask;
+          case BIDI_CLASS>>8: if (propMask >= LIMIT_BIDI_CLASS) break;
+            return ucd.getBidiClass(cp) == propMask;
+          case DECOMPOSITION_TYPE>>8: if (propMask >= LIMIT_DECOMPOSITION_TYPE) break;
+            return ucd.getDecompositionType(cp) == propMask;
+          case NUMERIC_TYPE>>8: if (propMask >= LIMIT_NUMERIC_TYPE) break;
+            return ucd.getNumericType(cp) == propMask;
+          case EAST_ASIAN_WIDTH>>8: if (propMask >= LIMIT_EAST_ASIAN_WIDTH) break;
+            return ucd.getEastAsianWidth(cp) == propMask;
+          case LINE_BREAK>>8:  if (propMask >= LIMIT_LINE_BREAK) break;
+            return ucd.getLineBreak(cp) == propMask;
+          case JOINING_TYPE>>8: if (propMask >= LIMIT_JOINING_TYPE) break;
+            return ucd.getJoiningType(cp) == propMask;
+          case JOINING_GROUP>>8: if (propMask >= LIMIT_JOINING_GROUP) break;
+            return ucd.getJoiningGroup(cp) == propMask;
+          case BINARY_PROPERTIES>>8: if (propMask >= LIMIT_BINARY_PROPERTIES) break;
+            return ucd.getBinaryProperty(cp, propMask);
+          case SCRIPT>>8: if (propMask >= LIMIT_SCRIPT) break;
+            return ucd.getScript(cp) == propMask;
+          case AGE>>8: if (propMask >= LIMIT_AGE) break;
+            return ucd.getAge(cp) == propMask;
+        }
+        throw new ChainException("Illegal property Number {0}", new Object[]{new Integer(propMask)});
+    }    
+    
+    static final int SHORT = -1, NORMAL = 0, LONG = 1, BOTH = 2;
+    
+    public String getUnifiedBinaryPropertyID(int unifiedPropMask) {
+        return getUnifiedBinaryPropertyID(ucdData, unifiedPropMask, NORMAL);
+    }
+    
+    public static String getUnifiedBinaryPropertyID(UCD ucd, int unifiedPropMask) {
+        String longOne = getUnifiedBinaryPropertyID(ucd, unifiedPropMask, LONG);
+        String shortOne = getUnifiedBinaryPropertyID(ucd, unifiedPropMask, SHORT);
+        if (longOne.equals(shortOne)) return longOne;
+        return shortOne + "(" + longOne + ")";
+    }
+    
+    public static String getFullUnifiedBinaryPropertyID(UCD ucd, int unifiedPropMask, int style) {
+        String pre = "";
+        if ((unifiedPropMask & 0xFF00) != BINARY_PROPERTIES) {
+            String preShort = UCD_Names.ABB_UNIFIED_PROPERTIES[unifiedPropMask>>8] + "=";
+            String preLong = UCD_Names.SHORT_UNIFIED_PROPERTIES[unifiedPropMask>>8] + "=";
+            if (style < LONG) pre = preShort;
+            else if (style == LONG || preShort.equals(preLong)) pre = preLong;
+            else pre = preShort + "(" + preLong + ")";
+        }
+        String shortOne = getUnifiedBinaryPropertyID(ucd, unifiedPropMask, SHORT);
+        if (shortOne.length() == 0) shortOne = "xx";
+        String longOne = getUnifiedBinaryPropertyID(ucd, unifiedPropMask, LONG);
+        if (longOne.length() == 0) longOne = "none";
+        
+        String post;
+        if (style < LONG) post = shortOne;
+        else if (style == LONG || shortOne.equals(longOne)) post = longOne;
+        else post = shortOne + "(" + longOne + ")";
+
+        if (pre.length() == 0) {
+            pre = post + "=";
+            post = "T";
+        }
+        
+        return pre + post;
+    }
+    
+    public static String getUnifiedBinaryPropertyID(UCD ucd, int unifiedPropMask, int style) {
+        int enum = unifiedPropMask >> 8;
+        byte propMask = (byte)unifiedPropMask;
+        switch (enum) {
+          case CATEGORY>>8: if (propMask >= LIMIT_CATEGORY) break;
+            if (style != LONG) return ucd.getCategoryID_fromIndex(propMask);
+            return UCD_Names.LONG_GC[propMask];
+          case COMBINING_CLASS>>8: if (propMask >= LIMIT_COMBINING_CLASS) break;
+            String s = "";
+            if (style == LONG) {
+                s = getCombiningName(unifiedPropMask);
+                if (s.length() != 0) return s;
+                s = "fixed_";
+            }
+            return s + ucd.getCombiningClassID_fromIndex((short)(0xFF & propMask));
+          case BIDI_CLASS>>8: if (propMask >= LIMIT_BIDI_CLASS) break;
+            if (style != LONG) return ucd.getBidiClassID_fromIndex(propMask);
+            return UCD_Names.LONG_BC[propMask];
+          case DECOMPOSITION_TYPE>>8: if (propMask >= LIMIT_DECOMPOSITION_TYPE) break;
+            if (style != SHORT) return ucd.getDecompositionTypeID_fromIndex(propMask);
+            return UCD_Names.SHORT_DT[propMask];
+          case NUMERIC_TYPE>>8: if (propMask >= LIMIT_NUMERIC_TYPE) break;
+            if (style != SHORT) return ucd.getNumericTypeID_fromIndex(propMask);
+            return UCD_Names.SHORT_NT[propMask];
+          case EAST_ASIAN_WIDTH>>8: if (propMask >= LIMIT_EAST_ASIAN_WIDTH) break;
+            if (style != LONG) return ucd.getEastAsianWidthID_fromIndex(propMask);
+            return UCD_Names.SHORT_EA[propMask];
+          case LINE_BREAK>>8:  if (propMask >= LIMIT_LINE_BREAK) break;
+            if (style != LONG) return ucd.getLineBreakID_fromIndex(propMask);
+            return UCD_Names.LONG_LB[propMask];
+          case JOINING_TYPE>>8: if (propMask >= LIMIT_JOINING_TYPE) break;
+            if (style != LONG) return ucd.getJoiningTypeID_fromIndex(propMask);
+            return UCD_Names.LONG_JOINING_TYPE[propMask];
+          case JOINING_GROUP>>8: if (propMask >= LIMIT_JOINING_GROUP) break;
+            return ucd.getJoiningGroupID_fromIndex(propMask);
+          case BINARY_PROPERTIES>>8: if (propMask >= LIMIT_BINARY_PROPERTIES) break;
+            if (style != SHORT) return ucd.getBinaryPropertiesID_fromIndex(propMask);
+            return UCD_Names.SHORT_BP[propMask];
+          case SCRIPT>>8: if (propMask >= LIMIT_SCRIPT) break;
+            if (style != SHORT) return ucd.getScriptID_fromIndex(propMask);
+            return UCD_Names.ABB_SCRIPT[propMask];
+          case AGE>>8: if (propMask >= LIMIT_AGE) break;
+            return ucd.getAgeID_fromIndex(propMask);
+        }
+        throw new ChainException("Illegal property Number {0}", new Object[]{new Integer(propMask)});
+    }    
+    
+}
+    
--- a/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
@ -0,0 +1,475 @@
+package com.ibm.text.UCD;
+
+import java.util.*;
+import com.ibm.text.*;
+
+import com.ibm.text.utility.*;
+
+
+/**
+ * Implements Unicode Normalization Forms C, D, KC, KD.<br>
+ * See UTR#15 for details.<br>
+ * Copyright © 1998-1999 Unicode, Inc. All Rights Reserved.<br>
+ * The Unicode Consortium makes no expressed or implied warranty of any
+ * kind, and assumes no liability for errors or omissions.
+ * No liability is assumed for incidental and consequential damages
+ * in connection with or arising out of the use of the information here.
+ * @author Mark Davis
+ */
+ 
+public final class Normalizer implements UCD_Types {
+    public static final String copyright = 
+      "Copyright (C) 2000, IBM Corp. and others. All Rights Reserved.";
+      
+    public static boolean SHOW_PROGRESS = false;
+      
+    /**
+     * Create a normalizer for a given form.
+     */
+    public Normalizer(byte form, String unicodeVersion) {
+        this.composition = (form & COMPOSITION_MASK) != 0;
+        this.compatibility = (form & COMPATIBILITY_MASK) != 0;
+        this.data = getData(unicodeVersion);
+    }
+    
+    /**
+     * Create a normalizer for a given form.
+     */
+    public Normalizer(byte form) {
+        this(form,"");
+    }
+    
+    /**
+    * Masks for the form selector
+    */
+    public static final byte 
+        COMPATIBILITY_MASK = 1,
+        COMPOSITION_MASK = 2;
+        
+    /**
+    * Normalization Form Selector
+    */
+    public static final byte 
+        NFD = 0 , 
+        NFKD = COMPATIBILITY_MASK,
+        NFC = COMPOSITION_MASK,
+        NFKC = (byte)(COMPATIBILITY_MASK + COMPOSITION_MASK);
+    
+    /**
+    * Normalizes text according to the chosen form, 
+    * replacing contents of the target buffer.
+    * @param   source      the original text, unnormalized
+    * @param   target      the resulting normalized text
+    */
+    public StringBuffer normalize(String source, StringBuffer target) {
+        
+        // First decompose the source into target,
+        // then compose if the form requires.
+        
+        if (source.length() != 0) {
+            internalDecompose(source, target);
+            if (composition) {
+                internalCompose(target);
+            }
+        }
+        return target;
+    }
+
+    /**
+    * Normalizes text according to the chosen form
+    * @param   source      the original text, unnormalized
+    * @return  target      the resulting normalized text
+    */
+    public String normalize(String source) {
+        return normalize(source, new StringBuffer()).toString();
+    }
+    
+    /**
+    * Normalizes text according to the chosen form
+    * @param   source      the original text, unnormalized
+    * @return  target      the resulting normalized text
+    */
+    public String normalize(int cp) {
+        return normalize(UTF16.valueOf(cp));
+    }
+    
+    /**
+    */
+    private StringBuffer hasDecompositionBuffer = new StringBuffer();
+    
+    public boolean hasDecomposition(int cp) {
+        hasDecompositionBuffer.setLength(0);
+        normalize(UTF16.valueOf(cp), hasDecompositionBuffer);
+        if (hasDecompositionBuffer.length() != 1) return true;
+        return cp != hasDecompositionBuffer.charAt(0);
+    }
+    
+    /**
+     * Does a quick check to see if the string is in the current form. Checks canonical order and
+     * isAllowed().
+     * @param   source  source text
+     * @return YES, NO, MAYBE
+     */
+     /*
+    public static final int NO = 0, YES = 1, MAYBE = -1;
+     
+    public int quickCheck(String source) {
+        short lastCanonicalClass = 0;
+        int result = YES;
+        for (int i = 0; i < source.length(); ++i) {
+            char ch = source.charAt(i);
+            short canonicalClass = data.getCanonicalClass(ch);
+            if (lastCanonicalClass > canonicalClass && canonicalClass != 0) {
+                return NO;
+            }
+            int check = isAllowed(ch);
+            if (check == NO) return NO;
+            if (check == MAYBE) result = MAYBE;
+        }
+        return result;
+    }
+    
+    /**
+     * Find whether the given character is allowed in the current form.
+     * @return YES, NO, MAYBE
+     */
+     /*
+    public int isAllowed(char ch) {
+        if (composition) {
+            if (compatibility) {
+                if (data.isCompatibilityExcluded(ch)) {
+                    return NO;
+                }
+            } else {
+                if (data.isExcluded(ch)) {
+                    return NO;
+                }
+            }
+            if (data.isTrailing(ch)) {
+                return MAYBE;
+            }
+        } else { // decomposition: both NFD and NFKD
+            if (data.normalizationDiffers(compatibility,ch)) return NO;
+        }
+        return YES;
+    }
+    
+    /**
+    * Utility: Gets the combining class of a character from the
+    * Unicode Character Database. Only a byte is needed, but since they are signed in Java
+    * return an int to forstall problems.
+    * @param   ch      the source character
+    * @return          value from 0 to 255
+    */
+     
+    public short getCanonicalClass(char ch) {
+        return data.getCanonicalClass(ch);
+    }
+    
+    /**
+    * Utility: Checks whether there is a recursive decomposition of a character from the 
+    * Unicode Character Database. It is compatibility or canonical according to the particular
+    * normalizer.
+    * @param   ch      the source character
+    */
+    public boolean normalizationDiffers(int ch) {
+        return data.normalizationDiffers(ch, composition, compatibility);
+    }
+    
+    /**
+    * Utility: Gets recursive decomposition of a character from the 
+    * Unicode Character Database.
+    * @param   compatibility    If false selects the recursive 
+    *                  canonical decomposition, otherwise selects
+    *                  the recursive compatibility AND canonical decomposition.
+    * @param   ch      the source character
+    * @param   buffer  buffer to be filled with the decomposition
+    */
+    public void getRecursiveDecomposition(char ch, StringBuffer buffer) {
+        data.getRecursiveDecomposition(ch, buffer, compatibility);
+    }
+        
+    /**
+    * Utility: Gets composition mapping.
+    * @return IntEnumeration with the pair -> value mapping, where the
+    * pair is firstChar << 16 | secondChar.
+    * Will need to be fixed for surrogates.
+    */
+    /*
+    public IntHashtable.IntEnumeration getComposition() {
+        return data.getComposition();
+    }
+    
+    */
+    
+    public boolean isTrailing(int cp) {
+        return this.composition ? data.isTrailing(cp) : false;
+    }
+    
+    
+    // ======================================
+    //                  PRIVATES
+    // ======================================
+    
+    /**
+     * The current form.
+     */
+    private boolean composition;
+    private boolean compatibility;
+
+    /**
+    * Decomposes text, either canonical or compatibility,
+    * replacing contents of the target buffer.
+    * @param   form        the normalization form. If COMPATIBILITY_MASK
+    *                      bit is on in this byte, then selects the recursive 
+    *                      compatibility decomposition, otherwise selects
+    *                      the recursive canonical decomposition.
+    * @param   source      the original text, unnormalized
+    * @param   target      the resulting normalized text
+    */
+    private void internalDecompose(String source, StringBuffer target) {
+        StringBuffer buffer = new StringBuffer();
+        int ch32;
+        for (int i = 0; i < source.length(); i += UTF16.getCharCount(ch32)) {
+            buffer.setLength(0);
+            ch32 = UTF16.charAt(source, i);
+            data.getRecursiveDecomposition(ch32, buffer, compatibility);
+            
+            // add all of the characters in the decomposition.
+            // (may be just the original character, if there was
+            // no decomposition mapping)
+            
+            int ch;
+            for (int j = 0; j < buffer.length(); j += UTF16.getCharCount(ch)) {
+                ch = UTF16Plus.charAt(buffer, j);
+                int chClass = data.getCanonicalClass(ch);
+                int k = target.length(); // insertion point
+                if (chClass != 0) {
+                    
+                    // bubble-sort combining marks as necessary
+                    
+                    int ch2;
+                    for (; k > 0; k -= UTF16.getCharCount(ch2)) {
+                        ch2 = UTF16Plus.charAt(target, k-1);
+                        if (data.getCanonicalClass(ch2) <= chClass) break;
+                    }
+                }
+                target.insert(k, UTF16.valueOf(ch));
+            }
+        }
+    }
+
+    /**
+    * Composes text in place. Target must already
+    * have been decomposed.
+    * Uses UTF16, which is a utility class for supplementary character support in Java.
+    * @param   target      input: decomposed text.
+    *                      output: the resulting normalized text.
+    */
+    private void internalCompose(StringBuffer target) {
+        int starterPos = 0;
+        int starterCh = UTF16Plus.charAt(target,0);
+        int compPos = UTF16.getCharCount(starterCh); // length of last composition
+        int lastClass = data.getCanonicalClass(starterCh);
+        if (lastClass != 0) lastClass = 256; // fix for strings staring with a combining mark
+        int oldLen = target.length();
+        
+        // Loop on the decomposed characters, combining where possible
+        
+        int ch;
+        for (int decompPos = compPos; decompPos < target.length(); decompPos += UTF16.getCharCount(ch)) {
+            ch = UTF16Plus.charAt(target, decompPos);
+            if (SHOW_PROGRESS) System.out.println(Utility.hex(target)
+                + ", decompPos: " + decompPos
+                + ", compPos: " + compPos
+                + ", ch: " + Utility.hex(ch)
+                );
+            int chClass = data.getCanonicalClass(ch);
+            int composite = data.getPairwiseComposition(starterCh, ch);
+            if (composite != data.NOT_COMPOSITE
+            && (lastClass < chClass || lastClass == 0)) {
+                UTF16.setCharAt(target, starterPos, composite);
+                // we know that we will only be replacing non-supplementaries by non-supplementaries
+                // so we don't have to adjust the decompPos
+                starterCh = composite;
+            } else {
+                if (chClass == 0) {
+                    starterPos = compPos;
+                    starterCh  = ch;
+                }
+                lastClass = chClass;
+                UTF16.setCharAt(target, compPos, ch);
+                if (target.length() != oldLen) { // MAY HAVE TO ADJUST!
+                    System.out.println("ADJUSTING: " + Utility.hex(target));
+                    decompPos += target.length() - oldLen;
+                    oldLen = target.length();
+                }
+                compPos += UTF16.getCharCount(ch);
+            }
+        }
+        target.setLength(compPos);
+    }
+
+    static class Stub {
+        private UCD ucd;
+        private HashMap compTable = new HashMap();
+        private BitSet isSecond = new BitSet();
+        private BitSet canonicalRecompose = new BitSet();
+        private BitSet compatibilityRecompose = new BitSet();
+        static final int NOT_COMPOSITE = 0xFFFF;
+        
+        Stub(String version) {
+            ucd = UCD.make(version);
+            for (int i = 0; i < 0x10FFFF; ++i) {
+                if (!ucd.isAssigned(i)) continue;
+                if (ucd.isPUA(i)) continue;
+                if (ucd.isTrailingJamo(i)) isSecond.set(i);
+                byte dt = ucd.getDecompositionType(i);
+                if (dt != CANONICAL) continue;
+                if (!ucd.getBinaryProperty(i, CompositionExclusion)) {
+                    try {
+                        String s = ucd.getDecompositionMapping(i);
+                        int len = UTF16.countCodePoint(s);
+                        if (len != 2) {
+                            if (len > 2) throw new IllegalArgumentException("BAD LENGTH: " + len + ucd.toString(i));
+                            continue;
+                        }
+                        int a = UTF16.charAt(s, 0);
+                        if (ucd.getCombiningClass(a) != 0) continue;
+                        
+                        int b = UTF16.charAt(s, UTF16.getCharCount(a));
+                        isSecond.set(b);
+                        
+                        // have a recomposition, so set the bit
+                        canonicalRecompose.set(i);
+                        
+                        // set the compatibility recomposition bit 
+                        // ONLY if the component characters
+                        // don't compatibility decompose
+                        if (ucd.getDecompositionType(a) <= CANONICAL
+                         && ucd.getDecompositionType(b) <= CANONICAL) {
+                            compatibilityRecompose.set(i);
+                         }
+                        
+                        long key = (((long)a)<<32) | b;
+                        
+                        /*if (i == '\u1E0A' || key == 0x004400000307) {
+                            System.out.println(Utility.hex(s));
+                            System.out.println(Utility.hex(i));
+                            System.out.println(Utility.hex(key));
+                        }*/
+                        compTable.put(new Long(key), new Integer(i));
+                    } catch (Exception e) {
+                        throw new ChainException("Error: {0}", new Object[]{ucd.toString(i)}, e);
+                    }
+                }
+            }
+            // process compatibilityRecompose
+            // have to do this afterwards, since we don't know whether the pieces
+            // are allowable until we have processed all the characters
+            /*
+            Iterator it = compTable.keySet().iterator();
+            while (it.hasNext()) {
+                Long key = (Long)it.next();
+                int cp = compTable.get(key);
+                long keyLong = key.longValue();
+                int first = (int)(keyLong >>> 32);
+                int second = (int)keyLong;
+                if (ucd.
+            */
+        }
+        /*
+Problem: differs: true, call: false U+0385 GREEK DIALYTIKA TONOS
+Problem: differs: true, call: false U+03D3 GREEK UPSILON WITH ACUTE AND HOOK SYMBOL
+Problem: differs: true, call: false U+03D4 GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL
+Problem: differs: true, call: false U+1E9B LATIN SMALL LETTER LONG S WITH DOT ABOVE
+Problem: differs: true, call: false U+1FC1 GREEK DIALYTIKA AND PERISPOMENI
+Problem: differs: true, call: false U+1FCD GREEK PSILI AND VARIA
+Problem: differs: true, call: false U+1FCE GREEK PSILI AND OXIA
+Problem: differs: true, call: false U+1FCF GREEK PSILI AND PERISPOMENI
+Problem: differs: true, call: false U+1FDD GREEK DASIA AND VARIA
+Problem: differs: true, call: false U+1FDE GREEK DASIA AND OXIA
+Problem: differs: true, call: false U+1FDF GREEK DASIA AND PERISPOMENI
+Problem: differs: true, call: false U+1FED GREEK DIALYTIKA AND VARIA
+*/
+        
+        short getCanonicalClass(int cp) {
+            return ucd.getCombiningClass(cp);
+        }
+        
+        boolean isTrailing(int cp) {
+            return isSecond.get(cp);
+        }
+        
+        boolean normalizationDiffers(int cp, boolean composition, boolean compatibility) {
+            byte dt = ucd.getDecompositionType(cp);
+            if (!composition) {
+                if (compatibility) return dt >= CANONICAL;
+                else return dt == CANONICAL;
+            } else {
+                // almost the same, except that we add back in the characters
+                // that RECOMPOSE
+                if (compatibility) return dt >= CANONICAL && !compatibilityRecompose.get(cp);
+                else return dt == CANONICAL && !canonicalRecompose.get(cp);
+            }
+        }
+        
+        public void getRecursiveDecomposition(int cp, StringBuffer buffer, boolean compatibility) {
+            byte dt = ucd.getDecompositionType(cp);
+            // we know we decompose all CANONICAL, plus > CANONICAL if compatibility is TRUE.
+            if (dt == CANONICAL || dt > CANONICAL && compatibility) {
+                String s = ucd.getDecompositionMapping(cp);
+                for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
+                    cp = UTF16.charAt(s, i);
+                    getRecursiveDecomposition(cp, buffer, compatibility);
+                }
+            } else {
+                UTF16.append(buffer, cp);
+            }
+        }
+        
+        int getPairwiseComposition(int starterCh, int ch) {
+            int hangulPoss = UCD.composeHangul(starterCh, ch);
+            if (hangulPoss != 0xFFFF) return hangulPoss;
+            Object obj = compTable.get(new Long((((long)starterCh)<<32) | ch));
+            if (obj == null) return 0xFFFF;
+            return ((Integer)obj).intValue();
+        }
+        
+    }
+    
+    /**
+    * Contains normalization data from the Unicode Character Database.
+    * use false for the minimal set, true for the real set.  
+    */
+    private Stub data;
+    
+    private static HashMap versionCache = new HashMap();
+    
+    private static Stub getData (String version) {
+        if (version.length() == 0) version = UCD.latestVersion;
+        Stub result = (Stub)versionCache.get(version);
+        if (result == null) {
+            result = new Stub(version);
+            versionCache.put(version, result);
+        }
+        return result;
+    }
+    
+    /**
+    * Just accessible for testing.
+    */
+    /*
+    boolean isExcluded (char ch) {
+        return data.isExcluded(ch);
+    }
+   
+    /**
+    * Just accessible for testing.
+    */
+    /*
+    String getRawDecompositionMapping (char ch) {
+        return data.getRawDecompositionMapping(ch);
+    }
+    //*/
+}
--- a/tools/unicodetools/com/ibm/text/UCD/PropertyLister.java
+++ b/tools/unicodetools/com/ibm/text/UCD/PropertyLister.java
@ -0,0 +1,203 @@
+package com.ibm.text.UCD;
+
+import java.io.*;
+import com.ibm.text.utility.*;
+
+
+abstract public class PropertyLister implements UCD_Types {
+    
+    static final boolean COMPRESS_NAMES = false;
+    static final boolean DROP_INDICATORS = true;
+    
+    
+    protected UCD ucdData;
+    protected PrintStream output;
+    protected boolean showOnConsole;
+    protected boolean usePropertyComment = true;
+    protected int firstRealCp = -2;
+    protected int lastRealCp = -2;
+    protected boolean alwaysBreaks = false; // set to true if property only breaks
+    
+    public static final byte INCLUDE = 0, BREAK = 1, CONTINUE = 2, EXCLUDE = 3;
+    
+    /** 
+     * @return status. Also have access to firstRealCp, lastRealCp
+     */
+    abstract public byte status(int cp);
+    
+    public String headerString() {
+        return "";
+    }
+    
+    public String propertyName(int cp) {
+        return "";
+    }
+    
+    public String optionalName(int cp) {
+        return "";
+    }
+    
+    public String optionalComment(int cp) {
+        if (!usePropertyComment) return "";
+        int cat = ucdData.getCategory(cp);
+        if (cat == Lt || cat == Ll || cat == Lu) return "L&";
+        return ucdData.getCategoryID(cp);
+    }
+    
+    public int minPropertyWidth() {
+        return 1;
+    }
+    
+    public void format(int startCp, int endCp, int realCount) {
+        try {
+            String prop = propertyName(startCp);
+            if (prop.length() > 0) prop = "; " + prop;
+            String opt = optionalName(startCp);
+            if (opt.length() > 0) opt = "; " + opt;
+            String optCom = optionalComment(startCp);
+            if (optCom.length() > 0) optCom += " ";
+            String startName = getKenName(startCp);
+            String line;
+            String pgap = Utility.repeat(" ", minPropertyWidth() - prop.length() - opt.length());
+            if (startCp != endCp) {
+                String endName = getKenName(endCp);
+                int bridge = endCp - startCp + 1 - realCount;
+                String count = (bridge == 0) ? "" + realCount : realCount + "/" + bridge;
+                String countStr = Utility.repeat(" ", 3-count.length()) + "[" + count + "] ";
+                String gap = Utility.repeat(" ", 12 - width(startCp) - width(endCp));
+                
+                line = Utility.hex(startCp,4) + ".." + Utility.hex(endCp,4) + gap
+                        + prop + opt + pgap + " # " + optCom
+                        + countStr;
+                if (startName.length() != 0 || endName.length() != 0) {
+                    int com = 0;
+                    if (COMPRESS_NAMES) com = commonInitialWords(startName, endName);
+                    if (com == 0) {
+                        line += startName + ".." + endName;
+                    } else {
+                        line += startName.substring(0,com) 
+                            + "(" + startName.substring(com) + ".." + endName.substring(com) + ")";
+                    }
+                }
+            } else {
+                String gap = alwaysBreaks
+                    ? Utility.repeat(" ", 6 - width(startCp))
+                    : Utility.repeat(" ", 14 - width(startCp));
+                String gap2 = alwaysBreaks
+                    ? " "
+                    : "      ";
+                line = Utility.hex(startCp,4) + gap
+                        + prop + opt + pgap + " # " + optCom + gap2
+                        + startName;
+            }
+            output.println(line);
+            if (showOnConsole) System.out.println(line);
+        } catch (Exception e) {
+            throw new ChainException("Format error {0}, {1}", 
+                new Object[]{new Integer(startCp), new Integer(endCp)}, e);
+        }
+    }
+    
+    int width(int cp) {
+        return cp <= 0xFFFF ? 4 
+             : cp <= 0xFFFFF ? 5 
+             : 6;
+    }
+    
+    String getKenName(int cp) {
+        String result = ucdData.getName(cp);
+        if (result == null) return "";
+        if (DROP_INDICATORS && result.charAt(0) == '<') {
+            if (cp < 0xFF) return "<control>";
+            return "";
+        }
+        return result;
+    }
+    
+    
+    /**
+     * @return common initial substring length ending with SPACE or HYPHEN-MINUS. 0 if there is none
+     */
+    public static int commonInitialWords(String a, String b) {
+        if (a.length() > b.length()) {
+            String temp = a;
+            a = b;
+            b = temp;
+        }
+        int lastSpace = 0;
+        for (int i = 0; i < a.length(); ++i) {
+            char ca = a.charAt(i);
+            char cb = b.charAt(i);
+            if (ca != cb) return lastSpace;
+            if (ca == ' ' || ca == '-') lastSpace = i + 1;
+        }
+        if (b.length() == a.length() || b.charAt(a.length()) == ' ' || b.charAt(a.length()) == '-') {
+            lastSpace = a.length();
+        }
+        return lastSpace;
+    }
+    
+    public int print() {
+        int count = 0;
+        firstRealCp = -1;
+        byte firstRealCpCat = -1;
+        lastRealCp = -1;
+        int realRangeCount = 0;
+        
+        String header = headerString();
+        if (header.length() != 0) {
+            output.println(header);
+            output.println();
+        }
+        for (int cp = 0; cp <= 0x10FFFF; ++cp) {
+            byte s = status(cp);
+            if (s == INCLUDE && firstRealCp != -1) {
+                byte cat = ucdData.getCategory(cp);
+                if (cat == Lt || cat == Ll) cat = Lu;
+                if (cat != firstRealCpCat) s = BREAK;
+            }
+            
+            switch(s) {
+              case CONTINUE:
+                break; // do nothing
+              case INCLUDE:
+                if (firstRealCp == -1) {
+                    firstRealCp = cp;
+                    firstRealCpCat = ucdData.getCategory(firstRealCp);
+                    if (firstRealCpCat == Lt || firstRealCpCat == Ll) firstRealCpCat = Lu;
+                }
+                lastRealCp = cp;
+                count++;
+                realRangeCount++;
+                break;
+              case BREAK:
+                if (firstRealCp != -1) {
+                    format(firstRealCp, lastRealCp, realRangeCount);
+                }
+                lastRealCp = firstRealCp = cp;
+                firstRealCpCat = ucdData.getCategory(firstRealCp);
+                if (firstRealCpCat == Lt || firstRealCpCat == Ll) firstRealCpCat = Lu;
+                
+                realRangeCount = 1;
+                count++;
+                break;
+              case EXCLUDE:
+                if (firstRealCp != -1) {
+                    format(firstRealCp, lastRealCp, realRangeCount);
+                    firstRealCp = -1;
+                    realRangeCount = 0;
+                }
+                break;
+            }
+        }
+        if (firstRealCp != -1) {
+            format(firstRealCp, lastRealCp, realRangeCount);
+        }
+        
+        if (count == 0) System.out.println("WARNING -- ZERO COUNT FOR " + header);
+        output.println();
+        output.println("# Total code points: " + count);
+        output.println();
+        return count;
+    }
+}
--- a/tools/unicodetools/com/ibm/text/UCD/TestData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/TestData.java
@ -0,0 +1,473 @@
+package com.ibm.text.UCD;
+
+import java.util.*;
+import java.io.*;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+
+import com.ibm.text.utility.*;
+
+public class TestData implements UCD_Types {
+    
+    public static void main (String[] args) throws IOException {
+        System.out.println("START");
+        ucd = UCD.make();
+        System.out.println("Loaded UCD " + ucd.getVersion() + " " + (new Date(ucd.getDate())));
+        
+        checkHoffman("\u05B8\u05B9\u05B1\u0591\u05C3\u05B0\u05AC\u059F");
+        checkHoffman("\u0592\u05B7\u05BC\u05A5\u05B0\u05C0\u05C4\u05AD");
+        
+        int mask = 0;
+        
+        if (false) {
+            
+        generateVerticalSlice(BIDI_CLASS, BIDI_CLASS+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
+            "DerivedBidiClass-3.1.1d1.txt");
+        
+        
+        mask = Utility.setBits(0, DerivedPropertyLister.FC_NFKC_Closure, DerivedPropertyLister.ExpandsOnNFKC);
+        mask = Utility.clearBit(mask, DerivedPropertyLister.FullCompInclusion);       
+        generateDerived(mask, HEADER_DERIVED, "DerivedNormalizationProperties-3.1.0d1.txt");
+
+        generateVerticalSlice(EAST_ASIAN_WIDTH, EAST_ASIAN_WIDTH+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
+            "DerivedEastAsianWidth-3.1.0d1.txt");
+       
+        generateVerticalSlice(CATEGORY, CATEGORY+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED, 
+            "DerivedGeneralCategory-3.1.0d1.txt");
+        generateVerticalSlice(COMBINING_CLASS, COMBINING_CLASS+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
+            "DerivedCombiningClass-3.1.0d1.txt");
+        generateVerticalSlice(DECOMPOSITION_TYPE, DECOMPOSITION_TYPE+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
+            "DerivedDecompositionType-3.1.0d1.txt");
+        generateVerticalSlice(NUMERIC_TYPE, NUMERIC_TYPE+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
+            "DerivedNumericType-3.1.0d1.txt");
+        generateVerticalSlice(EAST_ASIAN_WIDTH, EAST_ASIAN_WIDTH+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
+            "DerivedEastAsianWidth-3.1.0d1.txt");
+        generateVerticalSlice(JOINING_TYPE, JOINING_TYPE+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
+            "DerivedJoiningType-3.1.0d1.txt");
+        generateVerticalSlice(JOINING_GROUP, JOINING_GROUP+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
+            "DerivedJoiningGroup-3.1.0d1.txt");
+        generateVerticalSlice(BINARY_PROPERTIES, BINARY_PROPERTIES+1, KEEP_SPECIAL, HEADER_DERIVED,
+            "DerivedBinaryProperties-3.1.0d1.txt");
+        generateVerticalSlice(LIMIT_ENUM, LIMIT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
+            "DerivedNumericValues-3.1.0d1.txt");
+
+        mask = Utility.setBits(0, DerivedPropertyLister.PropMath, DerivedPropertyLister.Mod_ID_Continue_NO_Cf);
+        generateDerived(mask, HEADER_DERIVED, "DerivedCoreProperties-3.1.0d1.txt");
+                
+        generateVerticalSlice(LINE_BREAK, LINE_BREAK+NEXT_ENUM, KEEP_SPECIAL, HEADER_DERIVED,
+            "DerivedLineBreak-3.1.0d1.txt");
+            
+        generateVerticalSlice(SCRIPT+1, SCRIPT + NEXT_ENUM, KEEP_SPECIAL, HEADER_SCRIPTS, "Scripts-3.1.0d4.txt");
+        
+        generateVerticalSlice(BINARY_PROPERTIES + White_space, BINARY_PROPERTIES + Noncharacter_Code_Point + 1,
+                KEEP_SPECIAL, HEADER_EXTEND, "PropList-3.1.0d5.txt");
+                
+                
+            writeNormalizerTestSuite("NormalizationTest-3.1.0d1.txt");
+
+        }
+        
+
+        
+        
+            //generateDerived(Utility.setBits(0, DerivedPropertyLister.PropMath, DerivedPropertyLister.Mod_ID_Continue_NO_Cf), 
+            //    HEADER_DERIVED, "DerivedPropData2-3.1.0d1.txt");
+        //generateVerticalSlice(SCRIPT, SCRIPT+1, KEEP_SPECIAL, "ScriptCommon-3.1.0d1.txt");
+        //listStrings("LowerCase-3.1.0d1.txt", 0,0);
+        //generateVerticalSlice(0, LIMIT_ENUM, SKIP_SPECIAL, PROPLIST1, "DerivedPropData1-3.1.0d1.txt");
+        
+        // AGE stuff
+        //UCD ucd = UCD.make();
+        //System.out.println(ucd.getAgeID(0x61));
+        //System.out.println(ucd.getAgeID(0x2FA1D));
+        
+        
+        //generateCompExclusions();
+        System.out.println("END");
+    }
+    
+   static Normalizer nfkc = new Normalizer(Normalizer.NFKC);
+        
+    public static void checkHoffman(String test) {
+        String result = nfkc.normalize(test);
+        System.out.println(Utility.hex(test) + " => " + Utility.hex(result));
+        System.out.println();
+        show(test, 0);
+        System.out.println();
+        show(result, 0);
+    }
+    
+    public static void show(String s, int indent) {
+        int cp;
+        for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
+            cp = UTF32.char32At(s, i);
+            String cc = " " + ucd.getCombiningClass(cp);
+            cc = Utility.repeat(" ", 4 - cc.length()) + cc;
+            System.out.println(Utility.repeat(" ", indent) + ucd.getCode(cp) + cc + " " + ucd.getName(cp));
+            String decomp = nfkc.normalize(cp);
+            if (!decomp.equals(UTF32.valueOf32(cp))) {
+                show(decomp, indent + 4);
+            }
+        }
+    }
+    
+    
+    static DateFormat myDateFormat = new SimpleDateFormat("yyyy-MM-dd' 'HH:mm:ss.S' GMT'");
+    
+    static {
+        myDateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
+    }
+    
+    //Remove "d1" from DerivedJoiningGroup-3.1.0d1.txt type names
+    
+    public static String fixFile(String s) {
+        int len = s.length();
+        if (!s.endsWith(".txt")) return s;
+        if (s.charAt(len-6) != 'd') return s;
+        char c = s.charAt(len-5);
+        if (c < '0' || '9' < c) return s;
+        System.out.println("Fixing File Name");
+        return s.substring(0,len-6) + s.substring(len-4);
+    }
+    
+    static final int HEADER_EXTEND = 0, HEADER_DERIVED = 1, HEADER_SCRIPTS = 2;
+    
+    public static void doHeader(String fileName, PrintStream output, int headerChoice) {
+        output.println("# " + fixFile(fileName));
+        output.println("#");
+        if (headerChoice == HEADER_SCRIPTS) {
+            output.println("# For documentation, see UTR #24: Script Names");
+            output.println("#   http://www.unicode.org/unicode/reports/tr24/");
+        } else if (headerChoice == HEADER_EXTEND) {
+            output.println("# Unicode Character Database: Extended Properties");
+            output.println("# For documentation, see PropList.html");
+        } else {
+            output.println("# Unicode Character Database: Derived Property Data");
+            output.println("# Generated algorithmically from the Unicode Character Database");
+            output.println("# For documentation, see DerivedProperties.html");
+        }
+        output.println("# Date: " + myDateFormat.format(new Date()) + " [MD]");
+        output.println("# Note: Unassigned and Noncharacter codepoints are omitted,");
+        output.println("#       except when listing Noncharacter or Cn.");
+        output.println("# ================================================");
+        output.println();
+    }
+   
+    public static void generateDerived (int bitMask, int headerChoice, String fileName) throws IOException {
+        ucd = UCD.make("310");
+        PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + fileName));
+        doHeader(fileName, output, headerChoice);
+        for (int i = 0; i < 32; ++i) {
+            if ((bitMask & (1<<i)) == 0) continue;
+            if (i >= DerivedPropertyLister.LIMIT) break;
+            System.out.print('.');
+            output.println("# ================================================");
+            output.println();
+            new DerivedPropertyLister(ucd, i, output).print();
+        }
+        output.close();
+    }
+    
+    /*
+    public static void listStrings(String file, int type, int subtype) throws IOException {
+        ucd = UCD.make("310");
+        UCD ucd30 = UCD.make("300");
+        PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + file));
+        
+        for (int i = 0; i < 0x10FFFF; ++i) {
+            if ((i & 0xFFF) == 0) System.out.println("# " + i);
+            if (!ucd.isRepresented(i)) continue;
+            if (ucd30.isRepresented(i)) continue;
+            String string = "";
+            switch(type) {
+                case 0: string = ucd.getSimpleLowercase(i);
+            }
+            if (UTF32.length32(string) == 1 && UTF32.char32At(string,0) == i) continue;
+            output.println(Utility.hex(i) + "; C; " + Utility.hex(string) + "; # " + ucd.getName(i));
+        }
+        output.close();
+    }
+    */
+    
+    public static void generateCompExclusions() throws IOException {
+        PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + "CompositionExclusionsDelta.txt"));
+        new CompLister(output).print();
+        output.close();
+    }
+    
+    static class CompLister extends PropertyLister {
+        UCD oldUCD;
+        int oldLength = 0;
+        
+        public CompLister(PrintStream output) {
+            this.output = output;
+            ucdData = UCD.make("310");
+            oldUCD = UCD.make("300");
+            showOnConsole = true;
+        }
+        public String propertyName(int cp) {
+            return UTF32.length32(ucdData.getDecompositionMapping(cp)) + "";
+        }
+        public byte status(int cp) {
+            if (ucdData.getDecompositionType(cp) == CANONICAL 
+              && oldUCD.getDecompositionType(cp) != CANONICAL) {
+                int temp = oldLength;
+                oldLength = UTF32.length32(ucdData.getDecompositionMapping(cp));
+                if (temp != oldLength) return BREAK;
+                return INCLUDE;
+            }
+            return EXCLUDE;
+        }
+    }
+    
+    static final byte KEEP_SPECIAL = 0, SKIP_SPECIAL = 1;
+    
+    public static void generateVerticalSlice(int startEnum, int endEnum, byte skipSpecial, int headerChoice, String file) throws IOException {
+        
+        //System.out.println(ucd.toString(0x1E0A));
+        /*
+        System.out.println(ucd.getData(0xFFFF));
+        System.out.println(ucd.getData(0x100000));
+        System.out.println(ucd.getData(0x100000-1));
+        System.out.println(ucd.getData(0x100000-2));
+        System.out.println(ucd.getData(0x100000-3));
+        if (true) return;
+        String test2 = ucd.getName(0x2A6D6);
+        //*/
+        
+        
+        PrintStream output = new PrintStream(new FileOutputStream(GEN_DIR + file));
+        doHeader(file, output, headerChoice);
+        int last = -1;
+        for (int i = startEnum; i < endEnum; ++i) {
+            if (!MyPropertyLister.isUnifiedBinaryPropertyDefined(ucd, i)) continue;
+            if (i == DECOMPOSITION_TYPE || i == NUMERIC_TYPE 
+                || i == (CATEGORY | UNUSED_CATEGORY)
+                || i == (BINARY_PROPERTIES | Non_break)
+                || i == (JOINING_TYPE | JT_U)
+                || i == (SCRIPT | UNUSED_SCRIPT)
+                || i == (JOINING_GROUP | NO_SHAPING)
+                ) continue; // skip zero case
+            if (skipSpecial == SKIP_SPECIAL
+                    && i >= (BINARY_PROPERTIES | CompositionExclusion)
+                    && i < (AGE + NEXT_ENUM)) continue;
+            if ((last & 0xFF00) != (i & 0xFF00) && (i <= BINARY_PROPERTIES || i >= SCRIPT)) {
+                output.println();
+                output.println("# ================================================");
+                output.println("# " + UCD_Names.UNIFIED_PROPERTIES[i>>8]);
+                output.println("# ================================================");
+                output.println();
+                System.out.println();
+                System.out.println(UCD_Names.UNIFIED_PROPERTIES[i>>8]);
+                last = i;
+            } else {
+                output.println("# ================================================");
+                output.println();
+            }
+            System.out.print(".");
+            new MyPropertyLister(ucd, i, output).print();        
+        }
+        if (endEnum == LIMIT_ENUM) {
+            output.println();
+                output.println("# ================================================");
+            output.println("# Numeric Values (from UnicodeData.txt, field 6/7/8)");
+                output.println("# ================================================");
+            output.println();
+            System.out.println();
+            System.out.println("@NUMERIC VALUES");
+            
+            Set floatSet = new TreeSet();
+            for (int i = 0; i < 0x10FFFF; ++i) {
+                float nv = ucd.getNumericValue(i);
+                if (Float.isNaN(nv)) continue;
+                floatSet.add(new Float(nv));
+            }
+            Iterator it = floatSet.iterator();
+            while(it.hasNext()) {
+                new MyFloatLister(ucd, ((Float)it.next()).floatValue(), output).print();
+                output.println();
+                System.out.print(".");
+            }
+        }
+        output.close();
+        System.out.println();
+    }
+    
+    static UCD ucd;
+
+    static public Normalizer formC, formD, formKC, formKD;
+    
+    static public void writeNormalizerTestSuite(String fileName) throws IOException {
+        
+        PrintWriter log = new PrintWriter(
+            new BufferedWriter(
+            new OutputStreamWriter(
+                new FileOutputStream(GEN_DIR + fileName),
+                "UTF8"),
+            32*1024));
+	    formC = new Normalizer(Normalizer.NFC);
+	    formD = new Normalizer(Normalizer.NFD);
+	    formKC = new Normalizer(Normalizer.NFKC);
+	    formKD = new Normalizer(Normalizer.NFKD);
+	    
+        log.println("# " + fixFile(fileName));
+        log.println("#");
+        log.println("# Normalization Test Suite");
+        log.println("# Date: " + myDateFormat.format(new Date()) + " [MD]");
+        log.println("# Format:");
+        log.println("#");
+        log.println("#   Columns (c1, c2,...) are separated by semicolons");
+        log.println("#   Comments are indicated with hash marks");
+        log.println("#");
+        log.println("# CONFORMANCE:");
+        log.println("# 1. The following invariants must be true for all conformant implementations");
+        log.println("#");
+        log.println("#    NFC");
+        log.println("#      c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3)");
+        log.println("#      c4 ==  NFC(c4) ==  NFC(c5)");
+        log.println("#");
+        log.println("#    NFD");
+        log.println("#      c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3)");
+        log.println("#      c5 ==  NFD(c4) ==  NFD(c5");
+        log.println("#");
+        log.println("#    NFKC");
+        log.println("#      c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)");
+        log.println("#");
+        log.println("#    NFKD");
+        log.println("#      c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)");
+        log.println("#");
+        log.println("# 2. For every assigned Unicode 3.1.0 code point X that is not specifically");
+        log.println("#    listed in Part 1, the following invariants must be true for all conformant");
+        log.println("#    implementations:");
+        log.println("#");
+        log.println("#      X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)");
+        
+        System.out.println("Writing Part 1");
+
+        log.println("#");
+        log.println("@Part0 # Specific cases");
+        log.println("#");
+        
+        for (int j = 0; j < testSuiteCases.length; ++j) {
+            writeLine(testSuiteCases[j], log, false);
+        }
+        
+        System.out.println("Writing Part 2");
+        
+        log.println("#");
+        log.println("@Part1 # Character by character test");
+        log.println("# All characters not explicitly occurring in c1 of Part 1 have identical NFC, D, KC, KD forms.");
+        log.println("#");
+        
+        for (int ch = 0; ch < 0x10FFFF; ++ch) {
+            Utility.dot(ch);
+            if (!ucd.isAssigned(ch)) continue;
+            if (ucd.isPUA(ch)) continue;
+            String cc = UTF32.valueOf32(ch);
+            writeLine(cc,log, true);
+        }
+        Utility.fixDot();
+        
+        System.out.println("Finding Examples");
+
+        String[] example = new String[256];
+
+        for (int ch = 0; ch < 0x10FFFF; ++ch) {
+            Utility.dot(ch);
+            if (!ucd.isAssigned(ch)) continue;
+            if (ucd.isPUA(ch)) continue;
+            int cc = ucd.getCombiningClass(ch);
+            if (example[cc] == null) example[cc] = UTF32.valueOf32(ch);
+        }
+        
+        Utility.fixDot();
+        System.out.println("Writing Part 3");
+
+        log.println("#");
+        log.println("@Part2 # Canonical Order Test");
+        log.println("#");
+
+        for (int ch = 0; ch < 0x10FFFF; ++ch) {
+            Utility.dot(ch);
+            if (!ucd.isAssigned(ch)) continue;
+            if (ucd.isPUA(ch)) continue;
+            short c = ucd.getCombiningClass(ch);
+            if (c == 0) continue;
+            
+            // add character with higher class, same class, lower class
+            
+            String sample = "";
+            for (int i = c+1; i < example.length; ++i) {
+                if (example[i] == null) continue;
+                sample += example[i];
+                break;
+            }
+            sample += example[c];
+            for (int i = c-1; i > 0; --i) {
+                if (example[i] == null) continue;
+                sample += example[i];
+                break;
+            }
+            
+            writeLine("a" + sample + UTF32.valueOf32(ch) + "b", log, false);
+            writeLine("a" + UTF32.valueOf32(ch) + sample + "b", log, false);
+        }
+        Utility.fixDot();
+        log.println("#");
+        log.println("# END OF FILE");
+        log.close();
+    }
+    
+    static void writeLine(String cc, PrintWriter log, boolean check) {
+        String c = formC.normalize(cc);
+        String d = formD.normalize(cc);
+        String kc = formKC.normalize(cc);
+        String kd = formKD.normalize(cc);
+        if (check & cc.equals(c) && cc.equals(d) && cc.equals(kc) && cc.equals(kd)) return;
+        log.println(
+            Utility.hex(cc," ") + ";" + Utility.hex(c," ") + ";" + Utility.hex(d," ") + ";"
+            + Utility.hex(kc," ") + ";" + Utility.hex(kd," ")
+            + "; # (" 
+            + comma(cc) + "; " + comma(c) + "; " + comma(d) + "; " + comma(kc) + "; " + comma(kd) + "; "
+            + ") " + ucd.getName(cc));
+    }
+    
+    static StringBuffer commaResult = new StringBuffer();
+    
+    // not recursive!!!
+    static final String comma(String s) {
+        commaResult.setLength(0);
+        int cp;
+        for (int i = 0; i < s.length(); i += UTF32.count16(i)) {
+            cp = UTF32.char32At(s, i);
+            if (ucd.getCategory(cp) == Mn) commaResult.append('\u25CC');
+            UTF32.append32(commaResult, cp);
+        }
+        return commaResult.toString();
+    }
+    
+    static final String[] testSuiteCases = {
+        "\u1E0A",
+        "\u1E0C",
+        "\u1E0A\u0323",
+        "\u1E0C\u0307",
+        "D\u0307\u0323",
+        "D\u0323\u0307",
+        "\u1E0A\u031B",
+        "\u1E0C\u031B",
+        "\u1E0A\u031B\u0323",
+        "\u1E0C\u031B\u0307",
+        "D\u031B\u0307\u0323",
+        "D\u031B\u0323\u0307",
+        "\u00C8",
+        "\u0112",
+        "E\u0300",
+        "E\u0304",
+        "\u1E14",
+        "\u0112\u0300",
+        "\u1E14\u0304",
+        "E\u0304\u0300",
+        "E\u0300\u0304",
+    };
+
+}
--- a/tools/unicodetools/com/ibm/text/UCD/TestNormalization.java
+++ b/tools/unicodetools/com/ibm/text/UCD/TestNormalization.java
@ -0,0 +1,185 @@
+package com.ibm.text.UCD;
+
+import java.util.*;
+import java.io.*;
+
+import com.ibm.text.utility.*;
+
+public final class TestNormalization {
+    static final String DIR = "C:\\Documents and Settings\\Davis\\My Documents\\UnicodeData\\Update 3.0.1\\";
+    static final boolean SKIP_FILE = true;
+    
+    static PrintWriter out = null;
+    static BufferedReader in = null;
+    
+    static Normalizer nfc;
+    static Normalizer nfd;
+    static Normalizer nfkc;
+    static Normalizer nfkd;
+    static UCD ucd;
+    
+    static BitSet charsListed = new BitSet(0x110000);
+    static int errorCount = 0;
+    static int lineErrorCount = 0;
+    static String originalLine = "";
+    static String lastLine = "";
+    
+    public static void main(String[] args)  throws java.io.IOException {
+        System.out.println("Creating Normalizers");
+        ucd = UCD.make("");
+        
+        nfc = new Normalizer(Normalizer.NFC);
+        nfd = new Normalizer(Normalizer.NFD);
+        nfkc = new Normalizer(Normalizer.NFKC);
+        nfkd = new Normalizer(Normalizer.NFKD);
+        
+            String x = UTF32.valueOf32(0x10000);
+            check("NFC", nfc, x);
+            check("NFD", nfd, x);
+            check("NFKC", nfkc, x);
+            check("NFKD", nfkd, x);
+        
+        
+        out = new PrintWriter(
+            new BufferedWriter(
+            new OutputStreamWriter(
+                new FileOutputStream("NormalizationTestLog.txt"),
+                "UTF8"),
+            32*1024));
+        
+        in = new BufferedReader (
+            new FileReader (DIR + "NormalizationTest.txt"),
+            32*1024);
+        
+        try {
+            String[] parts = new String[10];
+            
+            System.out.println("Checking files");
+            
+            int count = 0;
+            
+            while (true) {
+                String line = in.readLine();
+                if ((count++ & 0x3FF) == 0) System.out.println("#LINE: " + line);
+                if (line == null) break;
+                originalLine = line;
+                int pos = line.indexOf('#');
+                if (pos >= 0) {
+                    line = line.substring(0,pos);
+                }
+                line = line.trim();
+                if (line.length() == 0) continue;
+                
+                
+                int splitCount = Utility.split(line, ';', parts);
+                // FIX check splitCount
+                for (int i = 0; i < splitCount; ++i) {
+                    parts[i] = Utility.fromHex(parts[i]);
+                }
+                
+                if (UTF32.length32(parts[0]) == 1) {
+                    int code = UTF32.char32At(parts[0],0);
+                    charsListed.set(code);
+                    if ((code & 0x3FF) == 0) System.out.println("# " + Utility.hex(code));
+                }
+                
+                // c2 == NFC(c1) == NFC(c2) == NFC(c3)
+                errorCount += check("NFCa", nfc, parts[1], parts[0]); 
+                errorCount += check("NFCb", nfc, parts[1], parts[1]); 
+                errorCount += check("NFCc", nfc, parts[1], parts[2]); 
+                
+                // c4 == NFC(c4) == NFC(c5)
+                errorCount += check("NFCd", nfc, parts[3], parts[3]); 
+                errorCount += check("NFCe", nfc, parts[3], parts[4]); 
+
+                // c3 == NFD(c1) == NFD(c2) == NFD(c3)
+                errorCount += check("NFDa", nfd, parts[2], parts[0]); 
+                errorCount += check("NFDb", nfd, parts[2], parts[1]); 
+                errorCount += check("NFDc", nfd, parts[2], parts[2]); 
+                
+                // c5 == NFD(c4) == NFD(c5)
+                errorCount += check("NFDd", nfd, parts[4], parts[3]); 
+                errorCount += check("NFDe", nfd, parts[4], parts[4]); 
+                
+                // c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
+                errorCount += check("NFKCa", nfkc, parts[3], parts[0]); 
+                errorCount += check("NFKCb", nfkc, parts[3], parts[1]); 
+                errorCount += check("NFKCc", nfkc, parts[3], parts[2]); 
+                errorCount += check("NFKCd", nfkc, parts[3], parts[3]); 
+                errorCount += check("NFKCe", nfkc, parts[3], parts[4]); 
+                
+                // c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
+                errorCount += check("NFKDa", nfkd, parts[4], parts[0]); 
+                errorCount += check("NFKDb", nfkd, parts[4], parts[1]); 
+                errorCount += check("NFKDc", nfkd, parts[4], parts[2]); 
+                errorCount += check("NFKDd", nfkd, parts[4], parts[3]); 
+                errorCount += check("NFKDe", nfkd, parts[4], parts[4]); 
+            }
+            System.out.println("Total errors in file: " + errorCount
+                + ", lines: " + lineErrorCount);
+            errorCount = lineErrorCount = 0;
+            
+            System.out.println("Checking Missing");
+            checkMissing();
+            System.out.println("Total errors in unlisted items: " + errorCount
+                + ", lines: " + lineErrorCount);
+            
+        } finally {
+            if (in != null) in.close();
+            if (out != null) out.close();
+        }
+    }
+    
+    static String lastBase = "";
+    
+    public static int check(String type, Normalizer n, String base, String other) {
+        try {
+            String trans = n.normalize(other);
+            if (!trans.equals(base)) {
+                String temp = "";
+                if (!lastLine.equals(originalLine)) {
+                    temp = "// " + originalLine;
+                    lastLine = originalLine;
+                }
+                if (!base.equals(lastBase)) {
+                    lastBase = base;
+                    lineErrorCount++;
+                }
+                String otherList = "";
+                if (!base.equals(other)) {
+                    otherList = "(" + ucd.getCodeAndName(other) + ")";
+                }
+                out.println("DIFF " + type + ": " 
+                    + ucd.getCodeAndName(base) + " != " 
+                    + type
+                    + otherList
+                    + " == " + ucd.getCodeAndName(trans)
+                    + temp
+                );
+                return 1;
+            }
+        } catch (Exception e) {
+            throw new ChainException("DIFF " + type + ": " 
+                + ucd.getCodeAndName(base) + " != " 
+                + type + "(" + ucd.getCodeAndName(other) + ")", new Object[]{}, e);
+        }
+        return 0;
+    }
+    
+    public static int check(String type, Normalizer n, String base) {
+        return check(type, n, base, base);
+    }
+    
+    static void checkMissing() {
+        for (int missing = 0; missing < 0x100000; ++missing) {
+            if ((missing & 0xFFF) == 0) System.out.println("# " + Utility.hex(missing));
+            if (charsListed.get(missing)) continue;
+            String x = UTF32.valueOf32(missing);
+            errorCount += check("NFC", nfc, x);
+            errorCount += check("NFD", nfd, x);
+            errorCount += check("NFKC", nfkc, x);
+            errorCount += check("NFKD", nfkd, x);
+        }
+    }     
+    
+}
--- a/tools/unicodetools/com/ibm/text/UCD/UCD-in-XML-Notes.htm
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD-in-XML-Notes.htm
@ -0,0 +1,226 @@
+<html>
+
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
+<meta name="GENERATOR" content="Microsoft FrontPage 4.0">
+<meta name="ProgId" content="FrontPage.Editor.Document">
+<title>Unicode Character Database</title>
+<style>
+<!--
+table        { padding: 4 }
+td           { padding: 4 }
+-->
+</style>
+</head>
+
+<body>
+
+<span class="cb" id style="DISPLAY: block">
+<h1 align="center">Unicode Character Database (UCD) in XML Format</h1>
+<h1 align="center"><b><font color="#FF0000">WARNING: FORMAT IS DRAFT!</font></b></h1>
+<p align="center">MD 2000.10.16</p>
+<table border="1" width="40%" align="right" cellspacing="4" cellpadding="0">
+  <tr>
+    <td width="100%" bgcolor="#C0C0C0"><span class="cb" id
+      style="DISPLAY: block">
+      <h4 align="center">Using Internet Explorer</h4>
+      <p>The UCD-Main.xml file can be read in Internet Explorer (5.0 and above).
+      However:</p>
+      <ul>
+        <li>It may take a few minutes to load completely.</li>
+        <li>The XML parser in IE does not appear to be conformant: it seems to
+          break on</span> the following valid code points (and others):
+        <ul>
+          <li>&lt;IEbugs<br>
+            c1='&amp;#xFFF9;'<br>
+            c2='&amp;#xFFFA;'<br>
+            c3='&amp;#xFFFB;'<br>
+            c4='&amp;#xFFFC;'<br>
+            c5='&amp;#xFFFD;'<br>
+            c6='&amp;#xF0000;'<br>
+            c7='&amp;#xFFFFD;'<br>
+            c8='&amp;#x100000;'<br>
+            c9='&amp;#x10FFFD;'/&gt;</li>
+        </ul>
+      </li>
+      </ul>
+    </td>
+  </tr>
+</table>
+<p><a href="UCD-Main.xml">UCD-Main.xml</a> provides an XML format for the main
+files in the Unicode Character Database. These include:</p>
+<ul>
+  <li><code>UnicodeData.txt</code></li>
+  <li><code>ArabicShaping.txt</code></li>
+  <li><code>Jamo.txt</code></li>
+  <li><code>SpecialCasing.txt</code></li>
+  <li><code>CompositionExclusions.txt</code></li>
+  <li><code>EastAsianWidth.txt</code></li>
+  <li><code>LineBreak.txt</code></li>
+  <li><code>BidiMirroring.txt</code></li>
+  <li><code>CaseFolding.txt</code></li>
+  <li><code>Blocks.txt</code></li>
+  <li><code>PropList.alpha.txt</code></li>
+</ul>
+<p>Other files in the UCD have very different structure or purpose, and are best
+expressed with separate files. Some annotational data, such as that in
+NamesList.txt or the 10646 comment in UnicodeData, is also best served with
+separate files. The current UCD files not yet in XML format are:</p>
+<ul>
+  <li><code>Unihan.txt</code></li>
+  <li><code>NamesList.txt</code></li>
+  <li><code>Index.txt</code></li>
+  <li><code>NormalizationTest.txt</code></li>
+</ul>
+<h3>Format</h3>
+<p>The Unicode blocks are provided as a list of &lt;block .../&gt; elements,
+with attributes providing the start, end, and name.</p>
+<p>Each assigned code point is a &lt;e .../&gt; element, with attributes
+supplying specific properties. The meaning of the attributes is specified below.
+There is one exception: large ranges of code points&nbsp; for characters such as
+Hangul Syllables are abbreviated by indicating the start and end of the range.</p>
+<p>Because of the volume of data, the attribute names are abbreviated. A <a
+href="#AttributeAbbreviations">key</a> explains the abbreviations, and relates
+them to the fields and values of the original UCD semicolon-delimited files.
+With few exceptions, the values in the XML are directly copied from data in the
+original UCD semicolon-delimited files. Those exceptions are described <a
+href="http://www.unicode.org/Public/3.0-Update1/UnicodeCharacterDatabase-3.0.1.html#DataModifications">below</a>.</p>
+<p>Numeric character references (NCRs) are used to encode the Unicode code
+points. Some Unicode code points cannot be transmitted in XML, even as NCRs (see
+<a href="http://www.w3.org/TR/REC-xml#charsets">http://www.w3.org/TR/REC-xml#charsets</a>),
+or would not be visibly distinct (TAB, CR, LF) in the data. Such code points are
+represented by '#xX;', where X is a hex number.</p>
+<h3><a name="AttributeAbbreviations">Attribute Abbreviations</a></h3>
+<p>To reduce the size of the document, the following attribute abbreviations are
+used. If an attribute is missing, that means it gets a default value. The
+defaults are listed in parentheses below. If there is no specific default, then
+a missing attribute should be read as N/A (not applicable). A default with '='
+means the default is the value of another other field (recursively!). Thus if
+the titlecase attribute is missing, then the value is the same as the uppercase.
+If that in turn is missing, then the value is the same as the code point itself.</p>
+<p>For a description of the source files, see <a
+href="http://www.unicode.org/Public/UNIDATA/UnicodeCharacterDatabase.html">UnicodeCharacterDatabase.html</a>.
+That file also has links to the descriptions of the fields within the files.
+Since the PropList values are so long, they will probably also be abbreviated in
+the future.</p>
+<table border="1" width="100%">
+  <tr>
+    <td width="50%" valign="top"><span class="cb" id style="DISPLAY: block">
+      <h4>UnicodeData</h4>
+      <p>&nbsp; c: code point<br>
+      &nbsp; n: name<br>
+      &nbsp; gc: general category (Lo)<br>
+      &nbsp; cc: combining class (0)<br>
+      &nbsp; bc: bidi category (L)<br>
+      &nbsp; dm: decomposition mapping<br>
+      &nbsp; dt: decomposition type (canonical)<br>
+      &nbsp; nt: numeric type<br>
+      &nbsp; nv: numeric value<br>
+      &nbsp; bm: bidi mirrored (N)<br>
+      &nbsp; uc: uppercase (=c)<br>
+      &nbsp; lc: lowercase (=c)<br>
+      &nbsp; tc: titlecase (=uc)</p>
+      <h4>SpecialCasing:</h4>
+      <p>&nbsp; sl: special lower (=lc)<br>
+      &nbsp; su: special upper (=uc)<br>
+      &nbsp; st: special title (=su)<br>
+      &nbsp; sc: special case condition</p>
+      <h4>CaseFolding:</h4>
+      <p>&nbsp; fc: foldcase (=sl)</span></td>
+    <td width="50%" valign="top"><span class="cb" id style="DISPLAY: block">
+      <h4>CompositionExclusions:</h4>
+      <p>&nbsp; ce: composition exclusion (N)</p>
+      <h4>EastAsianWidth:</h4>
+      <p>&nbsp; ea: east asian width (N)</p>
+      <h4>Jamo:</h4>
+      <p>&nbsp; jn: jamo name</p>
+      <h4>LineBreak:</h4>
+      <p>&nbsp; lb: line break class (AL)</p>
+      <h4>ArabicShaping:</h4>
+      <p>&nbsp; jt: joining type<br>
+      &nbsp; jg: joining group</p>
+      <h4>BidiMirroring:</h4>
+      <p>&nbsp; bg: bidi mirroring glyph (=c)</p>
+      <p><b>PropList:</b></p>
+      <p>&nbsp; xs: space-delimited list of properties from the file</p>
+      <p><b><i>WARNING: these values are likely to change!</i></b></span></td>
+  </tr>
+</table>
+<br>
+<h3><a name="DataModifications">Data Modifications</a></h3>
+</span>
+<p>The XML format is generated from the original semicolon-delimited UCD files.
+In general, all fields and values are direct copies. However, there are some
+changes, detailed below.</p>
+<h4>1. Some redundant or annotational fields are omitted</h4>
+<table border="1" width="100%">
+  <tr>
+    <td width="50%" valign="top"><b>UnicodeData<br>
+      </b>1.0 Name<br>
+      10646 comment<br>
+      <br>
+      <b>CaseFolding<br>
+      </b>Type (since it is computable from whether the fold equals the normal
+      lowercase)
+      <p><b>ArabicShaping<br>
+      </b>Name<br>
+      <br>
+      <b>EastAsianWidth<br>
+      </b>Name<br>
+      <br>
+      <b>LineBreak<br>
+      </b>Name</p>
+    </td>
+    <td width="50%" valign="top"><b>PropList</b><font face="Times New Roman"
+      color="#000000">
+      <p>The fields are based on the proposed PropList.alpha, which changes the
+      fields considerably.</p>
+      </font>
+      <p><span class="cb" id style="display: block"><b><i>WARNING: other values
+      are also likely to change!</i></b></span></p>
+    </td>
+  </tr>
+</table>
+<h4>2. Some fields are broken into several fields; others may be combined into a
+single field</h4>
+<ul>
+  <li><b>dt: </b>decomposition tag
+    <ul>
+      <li>the 'tag' field extracted from the decomposition mapping. If there is
+        no tag, the value is &quot;canonical&quot;. Only has meaning if there is
+        a decomposition (<b>dm</b>).</li>
+    </ul>
+  </li>
+  <li><b>nt: </b>numeric type
+    <ul>
+      <li>an enumeration [decimal, digit, numeric] for the type of number. It
+        replaces having duplicate field values for numbers</li>
+    </ul>
+  </li>
+  <li><b>rg: </b>range
+    <ul>
+      <li>used for ranges of values that share characteristics, instead of
+        having to do a substring check.<br>
+        &quot;START&quot; corresponds to &quot;&lt;..., First&gt;&quot;<br>
+        &quot;END&quot; corresponds to &quot;&lt;..., Last&gt;&quot;</li>
+    </ul>
+  </li>
+  <li><b>nc: </b>name computed
+    <ul>
+      <li>if &quot;COMPUTED&quot;, indicates that the name must be computed:
+        e.g. Hangul Syllables, Ideographs</li>
+    </ul>
+  </li>
+  <li><b>na: </b>name annotation
+    <ul>
+      <li>used for code points that do not really have associated names, like
+        control characters and private use characters. The data in that case is
+        either extracted from the &quot;&lt;...&gt;&quot; style name in the old
+        format, or gotten from the &quot;1.0 Unicode name&quot;.</li>
+    </ul>
+  </li>
+</ul>
+
+</body>
+
+</html>
--- a/tools/unicodetools/com/ibm/text/UCD/UCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD.java
@ -0,0 +1,974 @@
+package com.ibm.text.UCD;
+
+import java.util.HashMap;
+import java.util.BitSet;
+import java.util.Map;
+import java.io.IOException;
+import java.io.DataInputStream;
+import java.io.BufferedInputStream;
+import java.io.FileInputStream;
+
+import com.ibm.text.utility.*;
+
+
+public final class UCD implements UCD_Types {
+    /**
+     * Used for the default version.
+     */
+    public static final String latestVersion = "3.1.1";
+    
+    /**
+     * Create singleton instance for default (latest) version
+     */
+    public static UCD make() {
+        return make("");
+    }
+    
+    /**
+     * Create singleton instance for the specific version
+     */
+    public static UCD make(String version) {
+        if (version == null || version.length() == 0) version = latestVersion;
+        UCD result = (UCD)versionCache.get(version);
+        if (result == null) {
+            result = new UCD();
+            result.fillFromFile(version);
+            versionCache.put(version, result);
+        }
+        return result;
+    }
+    
+    /**
+     * Get the version of the UCD
+     */
+    public String getVersion() {
+        return version;
+    }
+    
+    /**
+     * Get the date that the data was parsed
+     */
+    public long getDate() {
+        return date;
+    }
+    
+    /**
+     * Is the code point allocated?
+     */
+    public boolean isAllocated(int codePoint) {
+        if (getCategory(codePoint) != Cn) return true;
+        if ((codePoint & 0xFFFE) == 0xFFFE) {
+            if (major < 2 && codePoint > 0xFFFF) return false;
+            return true;         // Noncharacter
+        }
+        if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF && major >= 3 && minor >= 1) return true;
+        return false;
+    }
+    
+    /**
+     * Is the code point assigned to a character (or surrogate)
+     */
+    public boolean isAssigned(int codePoint) {
+        return getCategory(codePoint) != Cn;
+    }
+    
+    /**
+     * Is the code point a PUA character (fast check)
+     */
+    public boolean isPUA(int codePoint) {
+        return (codePoint >= 0xE000 && codePoint < 0xF900
+             || codePoint >= 0xF0000 && codePoint < 0xFFFFE
+             || codePoint >= 0x100000 && codePoint < 0x10FFFE);
+    }
+    
+    /**
+     * Many ranges are elided in the UCD. All but the first are not actually
+     * represented in the data internally. This detects such cases.
+     */
+    public boolean isRepresented(int codePoint) {
+        return getRaw(codePoint) != null;
+    }
+    
+    /**
+     * Return XML version of the data associated with the code point.
+     */
+    public String toString(int codePoint) {
+        return get(codePoint, true).toString(FULL);
+    }    
+    
+    /**
+     * Get the character name.
+     */
+    public String getName(int codePoint) {
+        return get(codePoint, true).name;
+    }
+    
+    /**
+     * Get the character names for the code points in a string, separated by ", "
+     */
+    public String getName(String s) {
+        if (s.length() == 1) return get(s.charAt(0), true).name;
+        StringBuffer result = new StringBuffer();
+        int cp;
+        for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
+            cp = UTF32.char32At(s, i);
+            if (i > 0) result.append(", ");
+            result.append(getName(cp));
+        }
+        return result.toString();
+    }
+    
+    /**
+     * Get the code in U+ notation
+     */
+    public static String getCode(int codePoint) {
+        return "U+" + Utility.hex(codePoint);
+    }
+    
+    /**
+     * Get the code in U+ notation
+     */
+    public static String getCode(String s) {
+        if (s.length() == 1) return getCode(s.charAt(0)); // fast path
+        StringBuffer result = new StringBuffer();
+        int cp;
+        for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
+            cp = UTF32.char32At(s, i);
+            if (i > 0) result.append(", ");
+            result.append(getCode(cp));
+        }
+        return result.toString();
+    }
+    
+    /**
+     * Get the name and number (U+xxxx NAME) for a code point
+     */
+    public String getCodeAndName(int codePoint) {
+        return getCode(codePoint) + " " + getName(codePoint);
+    }
+    
+    /**
+     * Get the name and number (U+xxxx NAME) for the code points in a string,
+     * separated by ", "
+     */
+    public String getCodeAndName(String s) {
+        if (s == null || s.length() == 0) return "NULL";
+        if (s.length() == 1) return getCodeAndName(s.charAt(0)); // fast path
+        StringBuffer result = new StringBuffer();
+        int cp;
+        for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
+            cp = UTF32.char32At(s, i);
+            if (i > 0) result.append(", ");
+            result.append(getCodeAndName(cp));
+        }
+        return result.toString();
+    }
+    
+    /**
+     * Get the general category
+     */
+    public byte getCategory(int codePoint) {
+        return get(codePoint, false).generalCategory;
+    }
+    
+    /**
+     * Get the main category, as a mask
+     */
+    public static int mainCategoryMask(byte cat) {
+        switch (cat) {
+          case Lu: case Ll: case Lt: case Lm: case Lo: return LETTER_MASK;
+          case Mn: case Me: case Mc: return MARK_MASK;
+          case Nd: case Nl: case No: return NUMBER_MASK;
+          case Zs: case Zl: case Zp: return SEPARATOR_MASK;
+          case Cc: case Cf: case Cs: case Co: return CONTROL_MASK;
+          case Pc: case Pd: case Ps: case Pe: case Po: case Pi: case Pf: return PUNCTUATION_MASK;
+          case Sm: case Sc: case Sk: case So: return SYMBOL_MASK;
+          case Cn: return UNASSIGNED_MASK;
+        }
+        throw new IllegalArgumentException ("Illegal General Category " + cat);
+    }
+    
+    /**
+     * Get the combining class, a number between zero and 255. Returned
+     * as a short to avoid the signed-byte problem in Java
+     */
+    public short getCombiningClass(int codePoint) {
+        return (short)(get(codePoint, false).combiningClass & 0xFF);
+    }
+    
+    /**
+     * Does this combining class actually occur in this version of the data.
+     */
+    public boolean isCombiningClassUsed(byte value) {
+        return combiningClassSet.get(0xFF & value);
+    }
+    
+    /**
+     * Get the bidi class
+     */
+    public byte getBidiClass(int codePoint) {
+        return get(codePoint, false).bidiClass;
+    }
+    
+    /**
+     * Get the RAW decomposition mapping. Must be used recursively for the full mapping!
+     */
+    public String getDecompositionMapping(int codePoint) {
+        return get(codePoint, true).decompositionMapping;
+    }
+    
+    /**
+     * Get BIDI mirroring character, if there is one.
+     */
+    public String getBidiMirror(int codePoint) {
+        return get(codePoint, true).bidiMirror;
+    }
+    
+    /**
+     * Get the RAW decomposition type: the <...> field in the UCD data.
+     */
+    public byte getDecompositionType(int codePoint) {
+        return get(codePoint, false).decompositionType;
+    }
+    
+    public float getNumericValue(int codePoint) {
+        return get(codePoint, false).numericValue;
+    }
+    
+    public byte getNumericType(int codePoint) {
+        return get(codePoint, false).numericType;
+    }
+
+    public String getCase(int codePoint, byte simpleVsFull, byte caseType) {
+        return getCase(codePoint, simpleVsFull, caseType, "");
+    }
+    
+    public String getCase(String s, byte simpleVsFull, byte caseType) {
+        return getCase(s, simpleVsFull, caseType, "");
+    }
+    
+    public String getCase(int codePoint, byte simpleVsFull, byte caseType, String condition) {
+        UData udata = get(codePoint, true);
+        if (caseType < LOWER || caseType > FOLD
+          || (simpleVsFull != SIMPLE && simpleVsFull != FULL)) {
+            throw new IllegalArgumentException("simpleVsFull or caseType out of bounds");
+        }
+        if (caseType < FOLD) {
+            if (simpleVsFull == FULL && udata.specialCasing.length() != 0) {
+                if (condition.length() == 0 
+                || udata.specialCasing.indexOf(condition) < 0) {
+                    simpleVsFull = SIMPLE;
+                }
+            }
+        } else {
+            // special case. For these characters alone, use "I" as option meaning collapse to "i"
+            //if (codePoint == 0x0131 || codePoint == 0x0130) { // special case turkish i
+            if (getBinaryProperty(codePoint, CaseFoldTurkishI)) {
+                if (!udata.specialCasing.equals("I")) simpleVsFull = SIMPLE;
+                else simpleVsFull = FULL;
+            }
+        }
+        
+        switch (caseType + simpleVsFull) {
+            case SIMPLE + UPPER: return udata.simpleUppercase;
+            case SIMPLE + LOWER: return udata.simpleLowercase;
+            case SIMPLE + TITLE: return udata.simpleTitlecase;
+            case SIMPLE + FOLD: return udata.simpleCaseFolding;
+            case FULL + UPPER: return udata.fullUppercase;
+            case FULL + LOWER: return udata.fullLowercase;
+            case FULL + TITLE: return udata.fullTitlecase;
+            case FULL + FOLD: return udata.fullCaseFolding;
+        }
+        throw new IllegalArgumentException("getCase: " + caseType + ", " + simpleVsFull);
+    }
+    
+    public String getCase(String s, byte simpleVsFull, byte caseType, String condition) {
+        if (UTF32.length32(s) == 1) return getCase(UTF32.char32At(s, 0), simpleVsFull, caseType);
+        StringBuffer result = new StringBuffer();
+        int cp;
+        byte currentCaseType = caseType;
+        for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
+            cp = UTF32.char32At(s, i);
+            String mappedVersion = getCase(cp, simpleVsFull, currentCaseType, condition);
+            result.append(mappedVersion);
+            if (caseType == TITLE) { 
+                // if letter is cased, change to lowercase, otherwise change to TITLE
+                byte cat = getCategory(cp);
+                if (cat == Mn || cat == Me || cat == Mc) {
+                    // ignore!
+                } else if (cat == Lu || cat == Ll || cat == Lt
+                  || getBinaryProperty(cp, Other_Lowercase)
+                  || getBinaryProperty(cp, Other_Uppercase)) {
+                    currentCaseType = LOWER;
+                } else {
+                    currentCaseType = TITLE;
+                }
+            }
+        }
+        return result.toString();
+    }
+    
+    /*
+    public String getSimpleLowercase(int codePoint) {
+        return get(codePoint, true).simpleLowercase;
+    }
+    
+    public String getSimpleUppercase(int codePoint) {
+        return get(codePoint, true).simpleUppercase;
+    }
+    
+    public String getSimpleTitlecase(int codePoint) {
+        return get(codePoint, true).simpleTitlecase;
+    }
+    
+    public String getSimpleCaseFolding(int codePoint) {
+        return get(codePoint, true).simpleCaseFolding;
+    }
+    
+    public String getFullLowercase(int codePoint) {
+        return get(codePoint, true).fullLowercase;
+    }
+    
+    public String getFullUppercase(int codePoint) {
+        return get(codePoint, true).fullUppercase;
+    }
+    
+    public String getFullTitlecase(int codePoint) {
+        return get(codePoint, true).fullTitlecase;
+    }
+    
+    public String getFullCaseFolding(int codePoint) {
+        return get(codePoint, true).simpleCaseFolding;
+    }
+    
+    public String getLowercase(int codePoint, boolean full) {
+        if (full) return getFullLowercase(codePoint);
+        return getSimpleLowercase(codePoint);
+    }
+    
+    public String getUppercase(int codePoint, boolean full) {
+        if (full) return getFullUppercase(codePoint);
+        return getSimpleLowercase(codePoint);
+    }
+    
+    public String getTitlecase(int codePoint, boolean full) {
+        if (full) return getFullTitlecase(codePoint);
+        return getSimpleTitlecase(codePoint);
+    }
+    
+    public String getCaseFolding(int codePoint, boolean full) {
+        if (full) return getFullCaseFolding(codePoint);
+        return getSimpleCaseFolding(codePoint);
+    }
+    
+    public String getLowercase(String s, boolean full) {
+        if (s.length() == 1) return getLowercase(s.charAt(0), true);
+        StringBuffer result = new StringBuffer();
+        int cp;
+        for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
+            cp = UTF32.char32At(s, i);
+            if (i > 0) result.append(", ");
+            result.append(getLowercase(cp, true));
+        }
+        return result.toString();
+    }
+    
+    public String getUppercase(String s, boolean full) {
+        if (s.length() == 1) return getUppercase(s.charAt(0), true);
+        StringBuffer result = new StringBuffer();
+        int cp;
+        for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
+            cp = UTF32.char32At(s, i);
+            if (i > 0) result.append(", ");
+            result.append(getUppercase(cp, true));
+        }
+        return result.toString();
+    }
+    
+    public String getTitlecase(String s, boolean full) {
+        if (s.length() == 1) return getTitlecase(s.charAt(0), true);
+        StringBuffer result = new StringBuffer();
+        int cp;
+        for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
+            cp = UTF32.char32At(s, i);
+            if (i > 0) result.append(", ");
+            result.append(getTitlecase(cp, true));
+        }
+        return result.toString();
+    }
+    
+    public String getCaseFolding(String s, boolean full) {
+        if (s.length() == 1) return getCaseFolding(s.charAt(0), true);
+        StringBuffer result = new StringBuffer();
+        int cp;
+        for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
+            cp = UTF32.char32At(s, i);
+            if (i > 0) result.append(", ");
+            result.append(getCaseFolding(cp, true));
+        }
+        return result.toString();
+    }
+    */
+    
+    public String getSpecialCase(int codePoint) {
+        return get(codePoint, true).specialCasing;
+    }
+    
+    public byte getEastAsianWidth(int codePoint) {
+        return get(codePoint, false).eastAsianWidth;
+    }
+    
+    public byte getLineBreak(int codePoint) {
+        return get(codePoint, false).lineBreak;
+    }
+    
+    public byte getScript(int codePoint) {
+        return get(codePoint, false).script;
+    }
+    
+    public byte getAge(int codePoint) {
+        return get(codePoint, false).age;
+    }
+    
+    public byte getJoiningType(int codePoint) {
+        return get(codePoint, false).joiningType;
+    }
+    
+    public byte getJoiningGroup(int codePoint) {
+        return get(codePoint, false).joiningGroup;
+    }
+    
+    public int getBinaryProperties(int codePoint) {
+        return get(codePoint, false).binaryProperties;
+    }    
+    
+    public boolean getBinaryProperty(int codePoint, int bit) {
+        return (get(codePoint, false).binaryProperties & (1<<bit)) != 0;
+    }    
+    
+    // ENUM Mask Utilties
+    
+    public int getCategoryMask(int codePoint) {
+        return 1<<get(codePoint, false).generalCategory;
+    }
+    
+    public int getBidiClassMask(int codePoint) {
+        return 1<<get(codePoint, false).bidiClass;
+    }
+    
+    public int getNumericTypeMask(int codePoint) {
+        return 1<<get(codePoint, false).numericType;
+    }
+    
+    public int getDecompositionTypeMask(int codePoint) {
+        return 1<<get(codePoint, false).decompositionType;
+    }
+    
+    public int getEastAsianWidthMask(int codePoint) {
+        return 1<<get(codePoint, false).eastAsianWidth;
+    }
+    
+    public int getLineBreakMask(int codePoint) {
+        return 1<<get(codePoint, false).lineBreak;
+    }
+    
+    public int getScriptMask(int codePoint) {
+        return 1<<get(codePoint, false).script;
+    }
+    
+    public int getAgeMask(int codePoint) {
+        return 1<<get(codePoint, false).age;
+    }
+    
+    public int getJoiningTypeMask(int codePoint) {
+        return 1<<get(codePoint, false).joiningType;
+    }
+    
+    public int getJoiningGroupMask(int codePoint) {
+        return 1<<get(codePoint, false).joiningGroup;
+    }
+    
+
+    // VERSIONS WITH NAMES
+    
+    public String getCategoryID(int codePoint) {
+        return getCategoryID_fromIndex(getCategory(codePoint));
+    }
+    
+    public static String getCategoryID_fromIndex(byte prop) {
+        return UCD_Names.GC[prop];
+    }
+    
+    public String getBidiClassID(int codePoint) {
+        return getBidiClassID_fromIndex(getBidiClass(codePoint));
+    }
+    
+    public static String getBidiClassID_fromIndex(byte prop) {
+        return UCD_Names.BC[prop];
+    }
+    
+    public String getCombiningClassID(int codePoint) {
+        return getCombiningClassID_fromIndex(getCombiningClass(codePoint));
+    }
+    
+    public static String getCombiningClassID_fromIndex(short cc) {
+        return cc + "";
+    }
+    
+    public String getDecompositionTypeID(int codePoint) {
+        return getDecompositionTypeID_fromIndex(getDecompositionType(codePoint));
+    }
+    
+    public static String getDecompositionTypeID_fromIndex(byte prop) {
+        return UCD_Names.DT[prop];
+    }
+    
+    public String getNumericTypeID(int codePoint) {
+        return getNumericTypeID_fromIndex(getNumericType(codePoint));
+    }
+    
+    public static String getNumericTypeID_fromIndex(byte prop) {
+        return UCD_Names.NT[prop];
+    }
+    
+    public String getEastAsianWidthID(int codePoint) {
+        return getEastAsianWidthID_fromIndex(getEastAsianWidth(codePoint));
+    }
+    
+    public static String getEastAsianWidthID_fromIndex(byte prop) {
+        return UCD_Names.EA[prop];
+    }
+    
+    public String getLineBreakID(int codePoint) {
+        return getLineBreakID_fromIndex(getLineBreak(codePoint));
+    }
+    
+    public static String getLineBreakID_fromIndex(byte prop) {
+        return UCD_Names.LB[prop];
+    }
+    
+    public String getJoiningTypeID(int codePoint) {
+        return getJoiningTypeID_fromIndex(getJoiningType(codePoint));
+    }
+    
+    public static String getJoiningTypeID_fromIndex(byte prop) {
+        return UCD_Names.JOINING_TYPE[prop];
+    }
+    
+    public String getJoiningGroupID(int codePoint) {
+        return getJoiningGroupID_fromIndex(getJoiningGroup(codePoint));
+    }
+    
+    public static String getJoiningGroupID_fromIndex(byte prop) {
+        return UCD_Names.JOINING_GROUP[prop];
+    }
+    
+    public String getScriptID(int codePoint) {
+        return getScriptID_fromIndex(getScript(codePoint));
+    }
+    
+    public static String getScriptID_fromIndex(byte prop) {
+        return UCD_Names.SCRIPT[prop];
+    }
+    
+    public String getAgeID(int codePoint) {
+        return getAgeID_fromIndex(getAge(codePoint));
+    }
+    
+    public static String getAgeID_fromIndex(byte prop) {
+        return UCD_Names.AGE[prop];
+    }
+    
+    public String getBinaryPropertiesID(int codePoint, byte bit) {
+        return (getBinaryProperties(codePoint) & (1<<bit)) != 0 ? "Y" : "N";
+    }
+    
+    public static String getBinaryPropertiesID_fromIndex(byte bit) {
+        return UCD_Names.BP[bit];
+    }
+    
+    public static int mapToRepresentative(int ch, boolean old) {
+        if (ch <= 0xFFFD) {
+            //if (ch <= 0x2800) return ch;
+            //if (ch <= 0x28FF) return 0x2800;    // braille
+            if (ch <= 0x3400) return ch;         // CJK Ideograph Extension A
+            if (ch <= 0x4DB5) return 0x3400;
+            if (ch <= 0x4E00) return ch;         // CJK Ideograph
+            if (ch <= 0x9FA5) return 0x4E00;
+            if (ch <= 0xAC00) return ch;         // Hangul Syllable
+            if (ch <= 0xD7A3) return 0xAC00;
+            if (ch <= 0xD800) return ch;         // Non Private Use High Surrogate
+            if (ch <= 0xDB7F) return 0xD800;
+            if (ch <= 0xDB80) return ch;         // Private Use High Surrogate
+            if (ch <= 0xDBFF) return 0xDB80;
+            if (ch <= 0xDC00) return ch;         // Low Surrogate
+            if (ch <= 0xDFFF) return 0xDC00;
+            if (ch <= 0xE000) return ch;         // Private Use
+            if (ch <= 0xF8FF) return 0xE000;
+            if (old) {
+                if (ch <= 0xF900) return ch;         // CJK Compatibility Ideograp
+                if (ch <= 0xFA2D) return 0xF900;
+            }
+            if (ch <  0xFDD0) return ch;         // Noncharacter
+            if (ch <= 0xFDEF) return 0xFFFF;
+        } else {
+            if ((ch & 0xFFFE) == 0xFFFE) return 0xFFFF;         // Noncharacter
+            if (ch <= 0x20000) return ch;         // Extension B
+            if (ch <= 0x2A6D6) return 0x20000;
+            //if (ch <= 0x2F800) return ch;
+            //if (ch <= 0x2FA1D) return 0x2F800;      // compat ideographs
+            if (ch <= 0xF0000) return ch;       // Plane 15 Private Use
+            if (ch <= 0xFFFFD) return 0xF0000;       // Plane 16 Private Use
+            if (ch <= 0x100000) return ch;       // Plane 15 Private Use
+            if (ch <= 0x10FFFD) return 0x100000;       // Plane 16 Private Use
+        }
+        return ch;
+    }
+    
+    public boolean isIdentifierStart(int cp, boolean extended) {
+        if (extended) {
+            if (cp == 0x0E33 || cp == 0x0EB3 || cp == 0xFF9E || cp == 0xFF9F) return false;
+            if (cp == 0x037A || cp >= 0xFC5E && cp <= 0xFC63 || cp == 0xFDFA || cp == 0xFDFB) return false;
+            if (cp >= 0xFE70 && cp <= 0xFE7E && (cp & 1) == 0) return false;
+        }
+        byte cat = getCategory(cp);
+        if (cat == Lu || cat == Ll || cat == Lt || cat == Lm || cat == Lo || cat == Nl) return true;
+        return false;
+    }
+    
+    public boolean isIdentifierContinue_NO_Cf(int cp, boolean extended) {
+        if (isIdentifierStart(cp, extended)) return true;
+        if (extended) {
+            if (cp == 0x00B7) return true;
+            if (cp == 0x0E33 || cp == 0x0EB3 || cp == 0xFF9E || cp == 0xFF9F) return true;
+        }
+        byte cat = getCategory(cp);
+        if (cat == Mn || cat == Mc || cat == Nd || cat == Pc) return true;
+        return false;
+    }
+    
+    public boolean isIdentifier(String s, boolean extended) {
+        if (s.length() == 0) return false; // at least one!
+        int cp;
+        for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
+            cp = UTF32.char32At(s, i);
+            if (i == 0) {
+                if (!isIdentifierStart(cp, extended)) return false;
+            } else {
+                if (!isIdentifierContinue_NO_Cf(cp, extended)) return false;
+            }
+        }
+        return true;
+    }
+    /*
+Middle Dot. Because most Catalan legacy data will be encoded in Latin-1, U+00B7 MIDDLE DOT needs to be 
+allowed in <identifier_extend>.
+
+In particular, the following four characters should be in <identifier_extend> and not <identifier_start>: 
+0E33 THAI CHARACTER SARA AM 
+0EB3 LAO VOWEL SIGN AM 
+FF9E HALFWIDTH KATAKANA VOICED SOUND MARK 
+FF9F HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK 
+Irregularly decomposing characters. U+037A GREEK YPOGEGRAMMENI and certain Arabic presentation
+forms have irregular compatibility decompositions, and need to be excluded from both <identifier_start>
+and <identifier_extend>. It is recommended that all Arabic presentation forms be excluded from identifiers
+in any event, although only a few of them are required to be excluded for normalization
+to guarantee identifier closure. 
+*/
+    
+    // ******************* 
+    // PRIVATES
+    // ******************* 
+    
+        // cache of singletons
+    private static Map versionCache = new HashMap();
+    
+    private static final int LIMIT_CODE_POINT = 0x110000;
+    private static final UData[] ALL_NULLS = new UData[1024];
+    
+    // main data
+    private UData[][] data = new UData[LIMIT_CODE_POINT>>10][];
+    
+    // extras
+    private BitSet combiningClassSet = new BitSet(256);
+    private String version;
+    private String file;
+    private long date = -1;
+    private byte format = -1;
+    private byte major = -1;
+    private byte minor = -1;
+    private byte update = -1;
+    private int size = -1;
+    
+    // cache last UData
+    private int lastCode = Integer.MIN_VALUE;
+    private UData lastResult = UData.UNASSIGNED;
+    private boolean lastCodeFixed = false;
+    
+    // hide constructor
+    private UCD() {
+        for (int i = 0; i < data.length; ++i) {
+            data[i] = ALL_NULLS;
+        }
+    }
+    
+    private void add(UData uData) {
+        int high = uData.codePoint>>10;
+        if (data[high] == ALL_NULLS) {
+            UData[] temp = new UData[1024];
+            data[high] = temp;
+        }
+        data[high][uData.codePoint & 0x3FF] = uData;
+    }
+    
+    public boolean hasComputableName(int codePoint) {
+        if (codePoint >= 0xF900 && codePoint <= 0xFA2D) return true;
+        int rangeStart = mapToRepresentative(codePoint, major < 2);
+        switch (rangeStart) {
+          default:
+            return getRaw(codePoint) == null;
+          case 0x2800: // braille
+          case 0xF900: // compat ideos
+          case 0x2F800: // compat ideos
+          case 0x3400: // CJK Ideograph Extension A
+          case 0x4E00: // CJK Ideograph
+          case 0x20000: // Extension B
+          case 0xAC00: // Hangul Syllable
+          case 0xE000: // Private Use
+          case 0xF0000: // Private Use
+          case 0x100000: // Private Use
+          case 0xD800: // Surrogate
+          case 0xDB80: // Private Use
+          case 0xDC00: // Private Use
+          case 0xFFFF: // Noncharacter
+            return true;
+        }
+    }
+    
+    private UData getRaw(int codePoint) {
+        return data[codePoint>>10][codePoint & 0x3FF];
+    }
+    
+    // access data for codepoint
+    UData get(int codePoint, boolean fixStrings) {
+        //if (codePoint == lastCode && fixStrings <= lastCodeFixed) return lastResult;
+        /*
+        // we play some funny tricks for performance
+        // if cp is not represented, it is either in a elided block or missing.
+        // elided blocks are either CONTINUE or FFFF
+            
+        byte cat;
+        if (!ucdData.isRepresented(cp)) {
+            int rep = UCD.mapToRepresentative(cp);
+            if (rep == 0xFFFF) cat = Cn;
+            else if (rep != cp) return CONTINUE;
+            else if (!ucdData.isRepresented(rep)) cat = Cn;
+            else cat = ucdData.getCategory(rep);
+        } else {
+            cat = ucdData.getCategory(cp);
+        }
+        */
+        
+        UData result = null;
+        
+        // do range stuff
+        String constructedName = null;
+        int rangeStart = mapToRepresentative(codePoint, major < 2);
+        boolean isHangul = false;
+        switch (rangeStart) {
+          case 0xF900:
+            if (major < 2) {
+                if (fixStrings) constructedName = "CJK COMPATIBILITY IDEOGRAPH-" + Utility.hex(codePoint, 4);
+                break;
+            }
+            // FALL THROUGH!!!!
+          default:
+            result = getRaw(codePoint);
+            if (result == null) {
+                result = UData.UNASSIGNED;
+                if (fixStrings) result.name = "<unassigned-" + Utility.hex(codePoint, 4) + ">";
+            }
+            return result;
+          case 0x3400: // CJK Ideograph Extension A
+          case 0x4E00: // CJK Ideograph
+          case 0x20000: // Extension B
+            if (fixStrings) constructedName = "CJK UNIFIED IDEOGRAPH-" + Utility.hex(codePoint, 4);
+            break;
+          case 0xAC00: // Hangul Syllable
+            isHangul = true;
+            if (fixStrings) {
+                constructedName = "HANGUL SYLLABLE " + getHangulName(codePoint);
+            }
+            break;
+          case   0xE000: // Private Use
+          case  0xF0000: // Private Use
+          case 0x100000: // Private Use
+            if (fixStrings) constructedName = "<private use-" + Utility.hex(codePoint, 4) + ">";
+            break;
+          case 0xD800: // Surrogate
+          case 0xDB80: // Private Use
+          case 0xDC00: // Private Use
+            if (fixStrings) constructedName = "<surrogate-" + Utility.hex(codePoint, 4) + ">";
+            break;
+          case 0xFFFF: // Noncharacter
+            if (fixStrings) constructedName = "<noncharacter-" + Utility.hex(codePoint, 4) + ">";
+            break;
+        }
+        result = getRaw(rangeStart);
+        if (result == null) {
+            result = UData.UNASSIGNED;
+            if (fixStrings) result.name = "<unassigned-" + Utility.hex(codePoint, 4) + ">";
+            return result;
+        }
+        
+        result.codePoint = codePoint;
+        if (fixStrings) {
+            result.name = constructedName;
+            result.decompositionMapping = result.bidiMirror
+            = result.simpleLowercase = result.simpleUppercase = result.simpleTitlecase = result.simpleCaseFolding
+            = result.fullLowercase = result.fullUppercase = result.fullTitlecase = result.fullCaseFolding
+            = UTF32.valueOf32(codePoint);
+        }
+        if (isHangul) {
+            if (fixStrings) result.decompositionMapping = getHangulDecompositionPair(codePoint);
+            result.decompositionType = CANONICAL;
+        }
+        return result;
+    }
+    
+    // Hangul constants
+    
+    static final int 
+        SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11A7,
+        LCount = 19, VCount = 21, TCount = 28,
+        NCount = VCount * TCount,   // 588
+        SCount = LCount * NCount,   // 11172
+        LLimit = LBase + LCount,    // 1113
+        VLimit = VBase + VCount,    // 1176
+        TLimit = TBase + TCount,    // 11C3
+        SLimit = SBase + SCount;    // D7A4
+
+    private static String getHangulName(int s) {
+        int SIndex = s - SBase;
+        if (0 > SIndex || SIndex >= SCount) {
+            throw new IllegalArgumentException("Not a Hangul Syllable: " + s);
+        }
+        int LIndex = SIndex / NCount;
+        int VIndex = (SIndex % NCount) / TCount;
+        int TIndex = SIndex % TCount;
+        // if (true) return "?";
+        return UCD_Names.JAMO_L_TABLE[LIndex] + UCD_Names.JAMO_V_TABLE[VIndex] + UCD_Names.JAMO_T_TABLE[TIndex];
+    }
+    
+    private static final char[] pair = new char[2];
+    
+    static String getHangulDecompositionPair(int ch) {
+        int SIndex = ch - SBase;
+        if (0 > SIndex || SIndex >= SCount) {
+            return "";
+        }
+        int TIndex = SIndex % TCount;
+        if (TIndex != 0) { // triple
+            pair[0] = (char)(SBase + SIndex - TIndex);
+            pair[1] = (char)(TBase + TIndex);
+        } else {
+            pair[0] = (char)(LBase + SIndex / NCount);
+            pair[1] = (char)(VBase + (SIndex % NCount) / TCount);
+        }
+        return String.valueOf(pair);
+    }
+    
+    static int composeHangul(int char1, int char2) {
+        if (LBase <= char1 && char1 < LLimit && VBase <= char2 && char2 < VLimit) {
+            return (SBase + ((char1 - LBase) * VCount + (char2 - VBase)) * TCount);
+        }
+        if (SBase <= char1 && char1 < SLimit && TBase <= char2 && char2 < TLimit
+                && ((char1 - SBase) % TCount) == 0) {
+            return char1 + (char2 - TBase);
+        }
+        return 0xFFFF; // no composition
+    }
+    
+    static boolean isTrailingJamo(int cp) {
+        return (VBase <= cp && cp < VLimit) || (TBase <= cp && cp < TLimit);
+    }
+    
+    private void fillFromFile(String version) {
+        DataInputStream dataIn = null;
+        String fileName = BIN_DIR + "UCD_Data" + version + ".bin";
+        int uDataFileCount = 0;
+        try {
+            dataIn = new DataInputStream(
+                new BufferedInputStream(
+                    new FileInputStream(fileName),
+                    128*1024));
+            // header
+            format = dataIn.readByte();
+            major = dataIn.readByte();
+            minor = dataIn.readByte();
+            update = dataIn.readByte();
+            String foundVersion = major + "." + minor + "." + update;
+            if (format != BINARY_FORMAT || !version.equals(foundVersion)) {
+                throw new ChainException("Illegal data file format for {0}: {1}, {2}", 
+                    new Object[]{version, new Byte(format), foundVersion});
+            }
+            date = dataIn.readLong();
+            size = uDataFileCount = dataIn.readInt();
+            
+            boolean didJoiningHack = false;
+            
+
+            // records
+            for (int i = 0; i < uDataFileCount; ++i) {
+                UData uData = new UData();
+                uData.readBytes(dataIn);
+                
+                if (uData.codePoint == 0x2801) {
+                    System.out.println("SPOT-CHECK: " + uData);
+                }
+                
+                //T = Mc + (Cf - ZWNJ - ZWJ)
+                int cp = uData.codePoint;
+                    byte old = uData.joiningType;
+                    byte cat = uData.generalCategory;
+                //if (cp == 0x200D) {
+                  //  uData.joiningType = JT_C;
+                //} else 
+                if (cp != 0x200D && cp != 0x200C && (cat == Mn || cat == Cf)) {
+                    uData.joiningType = JT_T;
+                }
+                if (!didJoiningHack && uData.joiningType != old) {
+                    System.out.println("HACK: Setting " 
+                        + UCD_Names.LONG_JOINING_TYPE[uData.joiningType] 
+                        + ": " + Utility.hex(cp) + " " + uData.name);
+                    didJoiningHack = true;
+                }
+                
+                combiningClassSet.set(uData.combiningClass & 0xFF);
+                add(uData);
+            }
+            /*
+            if (update == -1) {
+                throw new ChainException("Data File truncated for ", 
+                    new Object[]{version}, e);
+            }
+            if (size != fileSize) {
+                throw new ChainException("Counts do not match: file {0}, records {1}", 
+                    new Object[]{new Integer(fileSize), new Integer(size)});
+            }
+            */
+            // everything is ok!
+            this.version = version;
+            this.file = fileName;
+            //+ " " + new File(fileName).lastModified();            
+        } catch (IOException e) {
+            throw new ChainException("Can't read data file for {0}", new Object[]{version}, e);
+        } finally {
+            if (dataIn != null) {
+                try {
+                    dataIn.close();
+                } catch (IOException e) {}
+            }
+        }
+    }
+}    
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Names.java
@ -0,0 +1,750 @@
+package com.ibm.text.UCD;
+
+import com.ibm.text.utility.*;
+
+
+final class UCD_Names implements UCD_Types {
+    
+    static final String[] UNIFIED_PROPERTIES = {
+        "General Category (listing UnicodeData.txt, field 2: see UnicodeData.html)",
+        "Combining Class (listing UnicodeData.txt, field 3: see UnicodeData.html)",
+        "Bidi Class (listing UnicodeData.txt, field 4: see UnicodeData.html)",
+        "Decomposition Type (from UnicodeData.txt, field 5: see UnicodeData.html)",
+        "Numeric Type (from UnicodeData.txt, field 6/7/8: see UnicodeData.html)", 
+        "East Asian Width (listing EastAsianWidth.txt, field 1)",
+        "Line Break (listing LineBreak.txt, field 1)",
+        "Joining Type (listing ArabicShaping.txt, field 1).\r\n"
+            + "#\tType T is derived from Mn + Cf - ZWNJ - ZWJ\r\n"
+            + "#\tAll other code points have the type U",
+        "Joining Group (listing ArabicShaping.txt, field 2)",
+        "BidiMirrored (listing UnicodeData.txt, field 9: see UnicodeData.html)",
+        "Script",
+        "Age (from a comparison of UCD versions 1.1 [minus Hangul], 2.0, 2.1, 3.0, 3.1)"
+    };
+    
+    static final String[] SHORT_UNIFIED_PROPERTIES = {
+        "GeneralCategory",
+        "CombiningClass",
+        "BidiClass",
+        "DecompositionType",
+        "NumericType", 
+        "EastAsianWidth",
+        "LineBreak",
+        "JoiningType",
+        "JoiningGroup",
+        "Value",
+        "Script",
+        "Age"
+    };
+    
+    static final String[] ABB_UNIFIED_PROPERTIES = {
+        "gc",
+        "cc",
+        "bc",
+        "dt",
+        "nt", 
+        "ea",
+        "lb",
+        "jt",
+        "jg",
+        "va",
+        "sc",
+        "Ag"
+    };
+    
+    
+    static final String[] BP = {
+	    "BidiMirrored",
+	    "CompositionExclusion", 
+        "White_Space",
+        "NonBreak",
+	    "Bidi_Control",
+        "Join_Control",
+        "Dash",
+        "Hyphen",
+        "Quotation_Mark",
+        "Terminal_Punctuation",
+        "Other_Math",
+        "Hex_Digit",
+        "ASCII_Hex_Digit",
+	    "Other_Alphabetic",
+        "Ideographic",
+        "Diacritic",
+        "Extender",
+        "Other_Lowercase",
+        "Other_Uppercase",
+        "Noncharacter_Code_Point",
+        "CaseFoldTurkishI",
+        "Other_GraphemeExtend",
+        "GraphemeLink",
+        "IDS_BinaryOperator",
+        "IDS_TrinaryOperator",
+        "Radical",
+        "UnifiedIdeograph",
+        "Reserved_Cf_Code_Point",
+        "Deprecated",
+    };
+    
+    static final String[] SHORT_BP = {
+	    "BidiM",
+	    "CExc", 
+        "WhSp",
+        "NBrk",
+	    "BdCon",
+        "JCon",
+        "Dash",
+        "Hyph",
+        "QMark",
+        "TPunc",
+        "OMath",
+        "HexD",
+        "AHexD",
+	    "OAlph",
+        "Ideo",
+        "Diac",
+        "Ext",
+        "OLoc",
+        "OUpc",
+        "NChar",
+        "TurkI",
+        "OGrX",
+        "GrLink",
+        "IDSB",
+        "IDST",
+        "Radical",
+        "UCJK",
+        "RCf",
+        "Dep",
+    };
+    
+    /*
+    static final String[] BP_OLD = {
+	    "BidiMirrored",
+	    "CompositionExclusion", 
+        "White_space",
+        "Non_break",
+	    "Bidi_Control",
+        "Join_Control",
+        "Dash",
+        "Hyphen",
+        "Quotation_Mark",
+        "Terminal_Punctuation",
+        "Math",
+        "Hex_Digit",
+	    "Other_Alphabetic",
+        "Ideographic",
+        "Diacritic",
+        "Extender",
+        "Other_Lowercase",
+        "Other_Uppercase",
+        "Noncharacter_Code_Point",
+        "Other_GraphemeExtend",
+        "GraphemeLink",
+        "IDS_BinaryOperator",
+        "IDS_TrinaryOperator",
+        "Radical",
+        "UnifiedIdeograph"
+    };
+    */
+    
+    static final String[] DeletedProperties = {
+        "Private_Use",
+        "Composite",
+        "Format_Control",
+        "High_Surrogate",
+        "Identifier_Part_Not_Cf",
+        "Low_Surrogate",
+        "Other_Format_Control",
+        "Private_Use_High_Surrogate",
+        "Unassigned_Code_Point"
+    };
+    
+    static final String[] YN_TABLE = {"N", "Y"};
+    
+    static String[] EA = {
+        "N", "A", "H", "W", "F", "Na"
+    };        
+        
+    static String[] SHORT_EA = {
+        "Neutral", "Ambiguous", "Halfwidth", "Wide", "Fullwidth", "Narrow"
+    };        
+        
+    static final String[] LB = {
+        "XX", "OP", "CL", "QU", "GL", "NS", "EX", "SY",
+        "IS", "PR", "PO", "NU", "AL", "ID", "IN", "HY",
+        "CM", "BB", "BA", "SP", "BK", "CR", "LF", "CB",
+        "SA", "AI", "B2", "SG", "ZW"
+    };
+
+    static final String[] LONG_LB = {
+        "Unknown", "OpenPunctuation", "ClosePunctuation", "Quotation", 
+        "Glue", "Nonstarter", "Exclamation", "BreakSymbols",
+        "InfixNumeric", "PrefixNumeric", "PostfixNumeric", 
+        "Numeric", "Alphabetic", "Ideographic", "Inseperable", "Hyphen",
+        "CombiningMark", "BreakBefore", "BreakAfter", "Space", 
+        "MandatoryBreak", "CarriageReturn", "LineFeed", "ContingentBreak",
+        "ComplexContext", "Ambiguous", "BreakBeforeAndAfter", "Surrogate", "ZWSpace"
+    };
+
+	public static final String[] SCRIPT = {
+    "COMMON", // 	COMMON -- NOT A LETTER: NO EXACT CORRESPONDENCE IN 15924
+    "LATIN", // 	LATIN
+    "GREEK", // 	GREEK
+    "CYRILLIC", // 	CYRILLIC
+    "ARMENIAN", // 	ARMENIAN
+    "HEBREW", // 	HEBREW
+    "ARABIC", // 	ARABIC
+    "SYRIAC", // 	SYRIAC
+    "THAANA", // 	THAANA
+    "DEVANAGARI", // 	DEVANAGARI
+    "BENGALI", // 	BENGALI
+    "GURMUKHI", // 	GURMUKHI
+    "GUJARATI", // 	GUJARATI
+    "ORIYA", // 	ORIYA
+    "TAMIL", // 	TAMIL
+    "TELUGU", // 	TELUGU
+    "KANNADA", // 	KANNADA
+    "MALAYALAM", // 	MALAYALAM
+    "SINHALA", // 	SINHALA
+    "THAI", // 	THAI
+    "LAO", // 	LAO
+    "TIBETAN", // 	TIBETAN
+    "MYANMAR", // 	MYANMAR
+    "GEORGIAN", // 	GEORGIAN
+    "<unused>", // 	JAMO -- NOT SEPARATED FROM HANGUL IN 15924
+    "HANGUL", // 	HANGUL
+    "ETHIOPIC", // 	ETHIOPIC
+    "CHEROKEE", // 	CHEROKEE
+    "CANADIAN-ABORIGINAL", // 	ABORIGINAL
+    "OGHAM", // 	OGHAM
+    "RUNIC", // 	RUNIC
+    "KHMER", // 	KHMER
+    "MONGOLIAN", // 	MONGOLIAN
+    "HIRAGANA", // 	HIRAGANA
+    "KATAKANA", // 	KATAKANA
+    "BOPOMOFO", // 	BOPOMOFO
+    "HAN", // 	HAN
+    "YI", // 	YI
+    "OLD-ITALIC",
+    "GOTHIC",
+    "DESERET",
+    "INHERITED",
+  };
+  
+	public static final String[] ABB_SCRIPT = {
+    "Zyyy", // 	COMMON -- NOT A LETTER: NO EXACT CORRESPONDENCE IN 15924
+    "Latn", // 	LATIN
+    "Grek", // 	GREEK
+    "Cyrl", // 	CYRILLIC
+    "Armn", // 	ARMENIAN
+    "Hebr", // 	HEBREW
+    "Arab", // 	ARABIC
+    "Syrc", // 	SYRIAC
+    "Thaa", // 	THAANA
+    "Deva", // 	DEVANAGARI
+    "Beng", // 	BENGALI
+    "Guru", // 	GURMUKHI
+    "Gujr", // 	GUJARATI
+    "Orya", // 	ORIYA
+    "Taml", // 	TAMIL
+    "Telu", // 	TELUGU
+    "Knda", // 	KANNADA
+    "Mlym", // 	MALAYALAM
+    "Sinh", // 	SINHALA
+    "Thai", // 	THAI
+    "Laoo", // 	LAO
+    "Tibt", // 	TIBETAN
+    "Mymr", // 	MYANMAR
+    "Geor", // 	GEORGIAN
+    "<unused>", // 	JAMO -- NOT SEPARATED FROM HANGUL IN 15924
+    "Hang", // 	HANGUL
+    "Ethi", // 	ETHIOPIC
+    "Cher", // 	CHEROKEE
+    "Cans", // 	ABORIGINAL
+    "Ogam", // 	OGHAM
+    "Runr", // 	RUNIC
+    "Khmr", // 	KHMER
+    "Mong", // 	MONGOLIAN
+    "Hira", // 	HIRAGANA
+    "Kana", // 	KATAKANA
+    "Bopo", // 	BOPOMOFO
+    "Hani", // 	HAN
+    "Yiii", // 	YI
+    "Ital",
+    "Goth",
+    "Dsrt",
+    "Qaai",
+  };
+  
+  
+  
+  static final String[] AGE = {
+    "UNSPECIFIED",
+    "1.1",
+    "2.0", "2.1",
+    "3.0", "3.1"
+  };
+    
+    
+    static final String[] GC = {
+        "Cn", // = Other, Not Assigned 0
+
+        "Lu", // = Letter, Uppercase 1
+        "Ll", // = Letter, Lowercase 2
+        "Lt", // = Letter, Titlecase 3
+        "Lm", // = Letter, Modifier 4
+        "Lo", // = Letter, Other 5
+
+        "Mn", // = Mark, Non-Spacing 6
+        "Me", // = Mark, Enclosing 8
+        "Mc", // = Mark, Spacing Combining 7
+
+        "Nd", // = Number, Decimal Digit 9
+        "Nl", // = Number, Letter 10
+        "No", // = Number, Other 11
+
+        "Zs", // = Separator, Space 12
+        "Zl", // = Separator, Line 13
+        "Zp", // = Separator, Paragraph 14
+
+        "Cc", // = Other, Control 15
+        "Cf", // = Other, Format 16
+        "<unused>", // missing
+        "Co", // = Other, Private Use 18
+        "Cs", // = Other, Surrogate 19
+
+
+        "Pd", // = Punctuation, Dash 20
+        "Ps", // = Punctuation, Open 21
+        "Pe", // = Punctuation, Close 22
+        "Pc", // = Punctuation, Connector 23
+        "Po", // = Punctuation, Other 24
+
+        "Sm", // = Symbol, Math 25
+        "Sc", // = Symbol, Currency 26
+        "Sk", // = Symbol, Modifier 27
+        "So", // = Symbol, Other 28
+
+        "Pi", // = Punctuation, Initial quote 29 (may behave like Ps or Pe depending on usage)
+        "Pf" // = Punctuation, Final quote 30 (may behave like Ps or Pe dependingon usage)
+    };
+    
+    static final String[] LONG_GC = {
+        "Unassigned", // = Other, Not Assigned 0
+
+        "UppercaseLetter", // = Letter, Uppercase 1
+        "LowercaseLetter", // = Letter, Lowercase 2
+        "TitlecaseLetter", // = Letter, Titlecase 3
+        "ModifierLetter", // = Letter, Modifier 4
+        "OtherLetter", // = Letter, Other 5
+
+        "NonspacingMark", // = Mark, Non-Spacing 6
+        "EnclosingMark", // = Mark, Enclosing 8
+        "SpacingMark", // = Mark, Spacing Combining 7
+
+        "DecimalNumber", // = Number, Decimal Digit 9
+        "LetterNumber", // = Number, Letter 10
+        "OtherNumber", // = Number, Other 11
+
+        "SpaceSeparator", // = Separator, Space 12
+        "LineSeparator", // = Separator, Line 13
+        "ParagraphSeparator", // = Separator, Paragraph 14
+
+        "Control", // = Other, Control 15
+        "Format", // = Other, Format 16
+        "<unused>", // missing
+        "PrivateUse", // = Other, Private Use 18
+        "Surrogate", // = Other, Surrogate 19
+
+
+        "DashPunctuation", // = Punctuation, Dash 20
+        "OpenPunctuation", // = Punctuation, Open 21
+        "ClosePunctuation", // = Punctuation, Close 22
+        "ConnectorPunctuation", // = Punctuation, Connector 23
+        "OtherPunctuation", // = Punctuation, Other 24
+
+        "MathSymbol", // = Symbol, Math 25
+        "CurrencySymbol", // = Symbol, Currency 26
+        "ModifierSymbol", // = Symbol, Modifier 27
+        "OtherSymbol", // = Symbol, Other 28
+
+        "InitialPunctuation", // = Punctuation, Initial quote 29 (may behave like Ps or Pe depending on usage)
+        "FinalPunctuation" // = Punctuation, Final quote 30 (may behave like Ps or Pe dependingon usage)
+    };
+
+    
+
+    static String[] BC = {
+        "L", //	Left-Right; Most alphabetic, syllabic, and logographic characters (e.g., CJK ideographs)
+        "R", //	Right-Left; Arabic, Hebrew, and punctuation specific to those scripts
+        "EN", //	European Number
+        "ES", //	European Number Separator
+        "ET", //	European Number Terminator
+        "AN", //	Arabic Number
+        "CS", //	Common Number Separator
+        "B", //	Paragraph Separator
+        "S", //	Segment Separator
+        "WS", //	Whitespace
+        "ON", //	Other Neutrals ; All other characters: punctuation, symbols
+        "<unused>", "BN", "NSM", "AL", "LRO", "RLO", "LRE", "RLE", "PDF"
+    };
+        
+    static String[] LONG_BC = {
+        "LeftToRight", //	Left-Right; Most alphabetic, syllabic, and logographic characters (e.g., CJK ideographs)
+        "RightToLeft", //	Right-Left; Arabic, Hebrew, and punctuation specific to those scripts
+        "EuropeanNumber", //	European Number
+        "EuropeanSeparator", //	European Number Separator
+        "EuropeanTerminator", //	European Number Terminator
+        "ArabicNumber", //	Arabic Number
+        "CommonSeparator", //	Common Number Separator
+        "ParagraphSeparator", //	Paragraph Separator
+        "SegmentSeparator", //	Segment Separator
+        "WhiteSpace", //	Whitespace
+        "OtherNeutral", //	Other Neutrals ; All other characters: punctuation, symbols
+        "<unused>", 
+        "BoundaryNeutral", "NonspacingMark", "ArabicLetter", 
+        "LeftToRightOverride", 
+        "RightToLeftOverride", "LeftToRightEmbedding", 
+        "RightToLeftEmbedding", "PopDirectionalFormat"
+    };
+        
+    private static String[] CASE_TABLE = {
+        "LOWER", "TITLE", "UPPER", "UNCASED"
+    };
+
+    static String[] DT = {
+        "", // NONE
+        "canonical", // CANONICAL
+        "compat",	// Otherwise unspecified compatibility character.
+        "font",		// A font variant (e.g. a blackletter form).
+        "noBreak",	// A no-break version of a space or hyphen.
+        "initial",	// // An initial presentation form (Arabic).
+        "medial",	// // A medial presentation form (Arabic).
+        "final",	// // 	A final presentation form (Arabic).
+        "isolated",	// An isolated presentation form (Arabic).
+        "circle",	// An encircled form.
+        "super",	// 	A superscript form.
+        "sub",	// 	A subscript form.
+        "vertical",	// A vertical layout presentation form.
+        "wide",	// 	A wide (or zenkaku) compatibility character.
+        "narrow",	// A narrow (or hankaku) compatibility character.
+        "small",	// 	A small variant form (CNS compatibility).
+        "square",	// A CJK squared font variant.
+        "fraction",	// A vulgar fraction form.
+    };
+    
+    static String[] SHORT_DT = {
+        "", // NONE
+        "ca", // CANONICAL
+        "co",	// Otherwise unspecified compatibility character.
+        "fo",		// A font variant (e.g. a blackletter form).
+        "nb",	// A no-break version of a space or hyphen.
+        "in",	// // An initial presentation form (Arabic).
+        "me",	// // A medial presentation form (Arabic).
+        "fi",	// // 	A final presentation form (Arabic).
+        "is",	// An isolated presentation form (Arabic).
+        "ci",	// An encircled form.
+        "sp",	// 	A superscript form.
+        "sb",	// 	A subscript form.
+        "ve",	// A vertical layout presentation form.
+        "wi",	// 	A wide (or zenkaku) compatibility character.
+        "na",	// A narrow (or hankaku) compatibility character.
+        "sm",	// 	A small variant form (CNS compatibility).
+        "sq",	// A CJK squared font variant.
+        "fr",	// A vulgar fraction form.
+    };
+    
+    static private String[] MIRRORED_TABLE = {
+        "N",
+        "Y"
+    };
+
+    static String[] NT = {
+        "",
+        "numeric",
+        "digit",
+        "decimal",
+    };
+    
+    static String[] SHORT_NT = {
+        "",
+        "nu",
+        "di",
+        "de",
+    };
+    
+    static {
+        if (LIMIT_CATEGORY != GC.length) {
+            System.err.println("!! ERROR !! UnicodeTypes and UInfo out of sync: category");
+        }
+        if (LIMIT_BIDI_CLASS != BC.length) {
+            System.err.println("!! ERROR !! UnicodeTypes and UInfo out of sync: bidi");
+        }
+        if (LIMIT_LINE_BREAK != LB.length) {
+            System.err.println("!! ERROR !! UnicodeTypes and UInfo out of sync: linebreak");
+        }
+        if (LIMIT_DECOMPOSITION_TYPE != DT.length) {
+            System.err.println("!! ERROR !! UnicodeTypes and UInfo out of sync: compat type");
+        }
+        if (MIRRORED_LIMIT != MIRRORED_TABLE.length) {
+            System.err.println("!! ERROR !! UnicodeTypes and UInfo out of sync: compat type");
+        }
+        if (MIRRORED_LIMIT != MIRRORED_TABLE.length) {
+            System.err.println("!! ERROR !! UnicodeTypes and UInfo out of sync: compat type");
+        }
+        if (CASE_LIMIT != CASE_TABLE.length) {
+            System.err.println("!! ERROR !! UnicodeTypes and UInfo out of sync: case");
+        }
+        if (LIMIT_NUMERIC_TYPE != NT.length) {
+            System.err.println("!! ERROR !! UnicodeTypes and UInfo out of sync: case");
+        }
+        if (LIMIT_EAST_ASIAN_WIDTH != EA.length) {
+            System.err.println("!! ERROR !! UnicodeTypes and UInfo out of sync: east Asian Width");
+        }
+        if (LIMIT_BINARY_PROPERTIES != BP.length) {
+            System.err.println("!! ERROR !! UnicodeTypes and UInfo out of sync: binary properties");
+        }
+        if (LIMIT_SCRIPT != SCRIPT.length) {
+            System.err.println("!! ERROR !! UnicodeTypes and UInfo out of sync: script");
+        }
+        if (LIMIT_AGE != AGE.length) {
+            System.err.println("!! ERROR !! UnicodeTypes and UInfo out of sync: age");
+        }
+    }
+    
+    public static byte ON = Utility.lookup("ON", BC);
+    
+    public static String[] JOINING_TYPE = {
+        "C",
+        "D",
+        "R",
+        "U",
+        "L",
+        "T"
+    };
+
+    public static String[] LONG_JOINING_TYPE = {
+        "JoinCausing",
+        "DualJoining",
+        "RightJoining",
+        "NonJoining",
+        "LeftJoining",
+        "Transparent"
+    };
+
+    public static String[] JOINING_GROUP = {
+        "NO_JOINING_GROUP",
+        "AIN",
+        "ALAPH",
+        "ALEF",
+        "BEH",
+        "BETH",
+        "DAL",
+        "DALATH_RISH",
+        "E",
+        "FEH",
+        "FINAL_SEMKATH",
+        "GAF",
+        "GAMAL",
+        "HAH",
+        "HAMZA_ON_HEH_GOAL",
+        "HE",
+        "HEH",
+        "HEH_GOAL",
+        "HETH",
+        "KAF",
+        "KAPH",
+        "KNOTTED_HEH",
+        "LAM",
+        "LAMADH",
+        "MEEM",
+        "MIM",
+        "NOON",
+        "NUN",
+        "PE",
+        "QAF",
+        "QAPH",
+        "REH",
+        "REVERSED_PE",
+        "SAD",
+        "SADHE",
+        "SEEN",
+        "SEMKATH",
+        "SHIN",
+        "SWASH_KAF",
+        "TAH",
+        "TAW",
+        "TEH_MARBUTA",
+        "TETH",
+        "WAW",
+        "YEH",
+        "YEH_BARREE",
+        "YEH_WITH_TAIL",
+        "YUDH",
+        "YUDH_HE",
+        "ZAIN",
+    };
+    
+    public static String[] OLD_JOINING_GROUP = {
+        "<no shaping>",
+        "AIN",
+        "ALAPH",
+        "ALEF",
+        "BEH",
+        "BETH",
+        "DAL",
+        "DALATH RISH",
+        "E",
+        "FEH",
+        "FINAL SEMKATH",
+        "GAF",
+        "GAMAL",
+        "HAH",
+        "HAMZA ON HEH GOAL",
+        "HE",
+        "HEH",
+        "HEH GOAL",
+        "HETH",
+        "KAF",
+        "KAPH",
+        "KNOTTED HEH",
+        "LAM",
+        "LAMADH",
+        "MEEM",
+        "MIM",
+        "NOON",
+        "NUN",
+        "PE",
+        "QAF",
+        "QAPH",
+        "REH",
+        "REVERSED PE",
+        "SAD",
+        "SADHE",
+        "SEEN",
+        "SEMKATH",
+        "SHIN",
+        "SWASH KAF",
+        "TAH",
+        "TAW",
+        "TEH MARBUTA",
+        "TETH",
+        "WAW",
+        "YEH",
+        "YEH BARREE",
+        "YEH WITH TAIL",
+        "YUDH",
+        "YUDH HE",
+        "ZAIN",
+    };
+    
+    
+    
+    static String[] JAMO_L_TABLE = {
+                // Value;  Short Name; Unicode Name
+        "G",    // U+1100; G; HANGUL CHOSEONG KIYEOK
+        "GG",   // U+1101; GG; HANGUL CHOSEONG SSANGKIYEOK
+        "N",    // U+1102; N; HANGUL CHOSEONG NIEUN
+        "D",    // U+1103; D; HANGUL CHOSEONG TIKEUT
+        "DD",   // U+1104; DD; HANGUL CHOSEONG SSANGTIKEUT
+        "R",    // U+1105; L; HANGUL CHOSEONG RIEUL
+        "M",    // U+1106; M; HANGUL CHOSEONG MIEUM
+        "B",    // U+1107; B; HANGUL CHOSEONG PIEUP
+        "BB",   // U+1108; BB; HANGUL CHOSEONG SSANGPIEUP
+        "S",    // U+1109; S; HANGUL CHOSEONG SIOS
+        "SS",   // U+110A; SS; HANGUL CHOSEONG SSANGSIOS
+        "",     // U+110B; ; HANGUL CHOSEONG IEUNG
+        "J",    // U+110C; J; HANGUL CHOSEONG CIEUC
+        "JJ",   // U+110D; JJ; HANGUL CHOSEONG SSANGCIEUC
+        "C",    // U+110E; C; HANGUL CHOSEONG CHIEUCH
+        "K",    // U+110F; K; HANGUL CHOSEONG KHIEUKH
+        "T",    // U+1110; T; HANGUL CHOSEONG THIEUTH
+        "P",    // U+1111; P; HANGUL CHOSEONG PHIEUPH
+        "H"     // U+1112; H; HANGUL CHOSEONG HIEUH
+    };
+    
+    static String[] JAMO_V_TABLE = {
+                // Value;  Short Name; Unicode Name
+        "A",    // U+1161; A; HANGUL JUNGSEONG A
+        "AE",   // U+1162; AE; HANGUL JUNGSEONG AE
+        "YA",   // U+1163; YA; HANGUL JUNGSEONG YA
+        "YAE",  // U+1164; YAE; HANGUL JUNGSEONG YAE
+        "EO",   // U+1165; EO; HANGUL JUNGSEONG EO
+        "E",    // U+1166; E; HANGUL JUNGSEONG E
+        "YEO",  // U+1167; YEO; HANGUL JUNGSEONG YEO
+        "YE",   // U+1168; YE; HANGUL JUNGSEONG YE
+        "O",    // U+1169; O; HANGUL JUNGSEONG O
+        "WA",   // U+116A; WA; HANGUL JUNGSEONG WA
+        "WAE",  // U+116B; WAE; HANGUL JUNGSEONG WAE
+        "OE",   // U+116C; OE; HANGUL JUNGSEONG OE
+        "YO",   // U+116D; YO; HANGUL JUNGSEONG YO
+        "U",    // U+116E; U; HANGUL JUNGSEONG U
+        "WEO",  // U+116F; WEO; HANGUL JUNGSEONG WEO
+        "WE",   // U+1170; WE; HANGUL JUNGSEONG WE
+        "WI",   // U+1171; WI; HANGUL JUNGSEONG WI
+        "YU",   // U+1172; YU; HANGUL JUNGSEONG YU
+        "EU",   // U+1173; EU; HANGUL JUNGSEONG EU
+        "YI",   // U+1174; YI; HANGUL JUNGSEONG YI
+        "I",    // U+1175; I; HANGUL JUNGSEONG I
+    };
+    
+    static String[] JAMO_T_TABLE = {
+                // Value;  Short Name; Unicode Name
+        "",     // filler, for LV syllable
+        "G",    // U+11A8; G; HANGUL JONGSEONG KIYEOK
+        "GG",   // U+11A9; GG; HANGUL JONGSEONG SSANGKIYEOK
+        "GS",   // U+11AA; GS; HANGUL JONGSEONG KIYEOK-SIOS
+        "N",    // U+11AB; N; HANGUL JONGSEONG NIEUN
+        "NJ",   // U+11AC; NJ; HANGUL JONGSEONG NIEUN-CIEUC
+        "NH",   // U+11AD; NH; HANGUL JONGSEONG NIEUN-HIEUH
+        "D",    // U+11AE; D; HANGUL JONGSEONG TIKEUT
+        "L",    // U+11AF; L; HANGUL JONGSEONG RIEUL
+        "LG",   // U+11B0; LG; HANGUL JONGSEONG RIEUL-KIYEOK
+        "LM",   // U+11B1; LM; HANGUL JONGSEONG RIEUL-MIEUM
+        "LB",   // U+11B2; LB; HANGUL JONGSEONG RIEUL-PIEUP
+        "LS",   // U+11B3; LS; HANGUL JONGSEONG RIEUL-SIOS
+        "LT",   // U+11B4; LT; HANGUL JONGSEONG RIEUL-THIEUTH
+        "LP",   // U+11B5; LP; HANGUL JONGSEONG RIEUL-PHIEUPH
+        "LH",   // U+11B6; LH; HANGUL JONGSEONG RIEUL-HIEUH
+        "M",    // U+11B7; M; HANGUL JONGSEONG MIEUM
+        "B",    // U+11B8; B; HANGUL JONGSEONG PIEUP
+        "BS",   // U+11B9; BS; HANGUL JONGSEONG PIEUP-SIOS
+        "S",    // U+11BA; S; HANGUL JONGSEONG SIOS
+        "SS",   // U+11BB; SS; HANGUL JONGSEONG SSANGSIOS
+        "NG",   // U+11BC; NG; HANGUL JONGSEONG IEUNG
+        "J",    // U+11BD; J; HANGUL JONGSEONG CIEUC
+        "C",    // U+11BE; C; HANGUL JONGSEONG CHIEUCH
+        "K",    // U+11BF; K; HANGUL JONGSEONG KHIEUKH
+        "T",    // U+11C0; T; HANGUL JONGSEONG THIEUTH
+        "P",    // U+11C1; P; HANGUL JONGSEONG PHIEUPH
+        "H",    // U+11C2; H; HANGUL JONGSEONG HIEUH
+    };
+
+
+    
+/*
+    static {
+        UNASSIGNED_INFO.code = '\uFFFF';
+        UNASSIGNED_INFO.name = "<reserved>";
+        UNASSIGNED_INFO.decomposition = "";
+        UNASSIGNED_INFO.fullCanonicalDecomposition = "";
+        UNASSIGNED_INFO.fullCompatibilityDecomposition = "";
+        UNASSIGNED_INFO.name10 = "";
+        UNASSIGNED_INFO.comment = "";
+
+        UNASSIGNED_INFO.numericType = NONE;
+        UNASSIGNED_INFO.decompositionType = NONE;
+
+        UNASSIGNED_INFO.category = lookup("Cn",CATEGORY_TABLE, "PROXY");
+        UNASSIGNED_INFO.canonical = 0;
+
+        UNASSIGNED_INFO.uppercase = "";
+        UNASSIGNED_INFO.lowercase = "";
+        UNASSIGNED_INFO.titlecase = "";
+
+        UNASSIGNED_INFO.bidi = ON;
+
+        UNASSIGNED_INFO.mirrored = NO;
+    }
+        */
+}
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
@ -0,0 +1,374 @@
+package com.ibm.text.UCD;
+
+public interface UCD_Types {
+    public static final String DATA_DIR = "C:\\DATA\\";
+    public static final String BIN_DIR = DATA_DIR + "\\BIN\\";
+    public static final String GEN_DIR = DATA_DIR + "\\GEN\\";
+    
+
+    static final byte BINARY_FORMAT = 5; // bumped if binary format of UCD changes
+    /*
+  0	Code value in 4-digit hexadecimal format.
+  1	Unicode 2.1 Character Name. These names match exactly the
+  2	General Category. This is a useful breakdown into various "character
+  3	Canonical Combining Classes. The classes used for the
+  4	Bidirectional Category. See the list below for an explanation of the
+  5	Character Decomposition. In the Unicode Standard, not all of
+  6	Decimal digit value. This is a numeric field. If the character
+  7	Digit value. This is a numeric field. If the character represents a
+  8	Numeric value. This is a numeric field. If the character has the
+  9	If the characters has been identified as a "mirrored" character in
+ 10	Unicode 1.0 Name. This is the old name as published in Unicode 1.0.
+ 11	10646 Comment field. This field is informative.
+ 12	Upper case equivalent mapping. If a character is part of an
+ 13	Lower case equivalent mapping. Similar to 12. This field is informative.
+ 14	Title case equivalent mapping. Similar to 12. This field is informative.
+    */
+    
+    // Binary ENUM Grouping
+    public static final int
+        CATEGORY = 0,
+        COMBINING_CLASS = 0x100,
+        BIDI_CLASS = 0x200,
+        DECOMPOSITION_TYPE = 0x300,
+        NUMERIC_TYPE = 0x400,
+        EAST_ASIAN_WIDTH = 0x500,
+        LINE_BREAK = 0x600,
+        JOINING_TYPE = 0x700,
+        JOINING_GROUP = 0x800,
+        BINARY_PROPERTIES = 0x900,
+        SCRIPT = 0xA00,
+        AGE = 0xB00,
+        NEXT_ENUM = 0x100,
+        LIMIT_ENUM = AGE + 0x100;
+        
+    public static final int LIMIT_COMBINING_CLASS = 256;
+    
+    // getCategory
+    public static final byte
+	UNASSIGNED		= 0,
+	UPPERCASE_LETTER	= 1,
+	LOWERCASE_LETTER	= 2,
+	TITLECASE_LETTER	= 3,
+	MODIFIER_LETTER		= 4,
+	OTHER_LETTER		= 5,
+	NON_SPACING_MARK	= 6,
+	ENCLOSING_MARK		= 7,
+	COMBINING_SPACING_MARK	= 8,
+	DECIMAL_DIGIT_NUMBER	= 9,
+	LETTER_NUMBER		= 10,
+	OTHER_NUMBER		= 11,
+	SPACE_SEPARATOR		= 12,
+	LINE_SEPARATOR		= 13,
+	PARAGRAPH_SEPARATOR	= 14,
+	CONTROL			= 15,
+	FORMAT			= 16,
+	UNUSED_CATEGORY			= 17,
+	PRIVATE_USE		= 18,
+	SURROGATE		= 19,
+	DASH_PUNCTUATION	= 20,
+	START_PUNCTUATION	= 21,
+	END_PUNCTUATION		= 22,
+	CONNECTOR_PUNCTUATION	= 23,
+	OTHER_PUNCTUATION	= 24,
+	MATH_SYMBOL		= 25,
+	CURRENCY_SYMBOL		= 26,
+	MODIFIER_SYMBOL		= 27,
+	OTHER_SYMBOL		= 28,
+	INITIAL_PUNCTUATION	= 29,
+	FINAL_PUNCTUATION		= 30,
+	LIMIT_CATEGORY = FINAL_PUNCTUATION+1,
+	
+	// Unicode abbreviations
+	Lu = UPPERCASE_LETTER,
+	Ll = LOWERCASE_LETTER,
+	Lt = TITLECASE_LETTER,
+    Lm = MODIFIER_LETTER,
+	Lo = OTHER_LETTER,
+	Mn = NON_SPACING_MARK,
+	Me = ENCLOSING_MARK,
+	Mc = COMBINING_SPACING_MARK,
+	Nd = DECIMAL_DIGIT_NUMBER,
+	Nl = LETTER_NUMBER,
+	No = OTHER_NUMBER,
+	Zs = SPACE_SEPARATOR,
+	Zl = LINE_SEPARATOR,
+	Zp = PARAGRAPH_SEPARATOR,
+	Cc = CONTROL,
+	Cf = FORMAT,
+	Cs = SURROGATE,
+	Co = PRIVATE_USE,
+	Cn = UNASSIGNED,
+	Pc = CONNECTOR_PUNCTUATION,
+	Pd = DASH_PUNCTUATION,
+	Ps = START_PUNCTUATION,
+	Pe = END_PUNCTUATION,
+	Po = OTHER_PUNCTUATION,
+	Pi = INITIAL_PUNCTUATION,
+	Pf = FINAL_PUNCTUATION,
+	Sm = MATH_SYMBOL,
+	Sc = CURRENCY_SYMBOL,
+	Sk = MODIFIER_SYMBOL,
+	So = OTHER_SYMBOL;
+	
+    static final int
+        LETTER_MASK = (1<<Lu) | (1<<Ll) | (1<<Lt) | (1<<Lm) | (1 << Lo),
+        MARK_MASK = (1<<Mn) | (1<<Me) | (1<<Mc),
+        NUMBER_MASK = (1<<Nd) | (1<<Nl) | (1<<No),
+        SEPARATOR_MASK = (1<<Zs) | (1<<Zl) | (1<<Zp),
+        CONTROL_MASK = (1<<Cc) | (1<<Cf) | (1<<Cs) | (1<<Co),
+        PUNCTUATION_MASK = (1<<Pc) | (1<<Pd) | (1<<Ps) | (1<<Pe) | (1<<Po) | (1<<Pi) | (1<<Pf),
+        SYMBOL_MASK = (1<<Sm) | (1<<Sc) | (1<<Sk) | (1<<So),
+        UNASSIGNED_MASK = (1<<Cn);
+	
+	// Binary Properties
+	
+	public static final byte
+	    BidiMirrored = 0,
+	    CompositionExclusion = 1, 
+        White_space = 2,
+        Non_break = 3,
+	    Bidi_Control = 4,
+        Join_Control = 5,
+        Dash = 6,
+        Hyphen = 7,
+        Quotation_Mark = 8,
+        Terminal_Punctuation = 9,
+        Math_Property = 10,
+        Hex_Digit = 11,
+        ASCII_Hex_Digit = 12,
+	    Alphabetic = 13,
+        Ideographic = 14,
+        Diacritic = 15,
+        Extender = 16,
+        Other_Lowercase = 17,
+        Other_Uppercase = 18,
+        Noncharacter_Code_Point = 19,
+        CaseFoldTurkishI = 20,
+        Other_GraphemeExtend = 21,
+        GraphemeLink = 22,
+        IDS_BinaryOperator = 23,
+        IDS_TrinaryOperator = 24,
+        Radical = 25,
+        UnifiedIdeograph = 26,
+        Reserved_Cf_Code_Point = 27,
+        Deprecated = 28,
+	    LIMIT_BINARY_PROPERTIES = 29;
+	    
+	/*
+    static final int
+	    BidiMirroredMask = 1<<BidiMirrored,
+	    CompositionExclusionMask = 1<<CompositionExclusion, 
+	    AlphabeticMask = 1<<Alphabetic,
+	    Bidi_ControlMask = 1<<Bidi_Control,
+        DashMask = 1<<Dash,
+        DiacriticMask = 1<<Diacritic,
+        ExtenderMask = 1<<Extender,
+        Hex_DigitMask = 1<<Hex_Digit,
+        HyphenMask = 1<<Hyphen,
+        IdeographicMask = 1<<Ideographic,
+        Join_ControlMask = 1<<Join_Control,
+        Math_PropertyMask = 1<<Math_Property,
+        Non_breakMask = 1<<Non_break,
+        Noncharacter_Code_PointMask = 1<<Noncharacter_Code_Point,
+        Other_LowercaseMask = 1<<Other_Lowercase,
+        Other_UppercaseMask = 1<<Other_Uppercase,
+        Quotation_MarkMask = 1<<Quotation_Mark,
+        Terminal_PunctuationMask = 1<<Terminal_Punctuation,
+        White_spaceMask = 1<<White_space;
+    */
+
+    // line break
+    public static final byte
+        LBXX = 0, LBOP = 1, LBCL = 2, LBQU = 3, LBGL = 4, LBNS = 5, LBEX = 6, LBSY = 7,
+        LBIS = 8, LBPR = 9, LBPO = 10, LBNU = 11, LBAL = 12, LBID = 13, LBIN = 14, LBHY = 15, 
+        LBCM = 16, LBBB = 17, LBBA = 18, LBSP = 19, LBBK = 20, LBCR = 21, LBLF = 22, LBCB = 23, 
+        LBSA = 24, LBAI = 25, LBB2 = 26, LBSG = 27, LBZW = 28, LIMIT_LINE_BREAK = 29;
+    
+    // east asian width
+    public static final byte
+         EAN = 0, EAA = 1, EAH = 2, EAW = 3, EAF = 4, EANa = 5, 
+         LIMIT_EAST_ASIAN_WIDTH = 6;
+         
+	// bidi class
+	static final byte
+	    BIDI_L = 0,     // Left-Right; Most alphabetic, syllabic, and logographic characters (e.g., CJK ideographs)
+	    BIDI_R = 1,     // Right-Left; Arabic, Hebrew, and punctuation specific to those scripts
+	    BIDI_EN = 2,    // European Number
+	    BIDI_ES = 3,    // European Number Separator
+	    BIDI_ET = 4,    // European Number Terminator
+	    BIDI_AN = 5,    // Arabic Number
+	    BIDI_CS = 6,    // Common Number Separator
+	    BIDI_B = 7,     // Block Separator
+	    BIDI_S = 8,     // Segment Separator
+	    BIDI_WS = 9,    // Whitespace
+	    BIDI_ON = 10,   // Other Neutrals ; All other characters: punctuation, symbols
+	    LIMIT_BIDI_2 = 11,
+	    BIDI_UNUSED = 11,
+	    BIDI_BN = 12,
+	    BIDI_NSM = 13,
+	    BIDI_AL = 14,
+	    BIDI_LRO = 15,
+	    BIDI_RLO = 16,
+	    BIDI_LRE = 17,
+	    BIDI_RLE = 18,
+	    BIDI_PDF = 19,
+	    LIMIT_BIDI_CLASS = 20;
+	    
+	// decompositionType
+    static final byte NONE = 0,
+        CANONICAL = 1,
+        COMPATIBILITY = 2,
+        COMPAT_UNSPECIFIED = 2,	// Otherwise unspecified compatibility character.
+        COMPAT_FONT = 3,		// A font variant (e.g. a blackletter form).
+        COMPAT_NOBREAK = 4,	// A no-break version of a space or hyphen.
+        COMPAT_INITIAL = 5,	// // An initial presentation form (Arabic).
+        COMPAT_MEDIAL = 6,	// // A medial presentation form (Arabic).
+        COMPAT_FINAL = 7,	// // 	A final presentation form (Arabic).
+        COMPAT_ISOLATED = 8,	// An isolated presentation form (Arabic).
+        COMPAT_CIRCLE = 9,	// An encircled form.
+        COMPAT_SUPER = 10,	// 	A superscript form.
+        COMPAT_SUB = 11,	// 	A subscript form.
+        COMPAT_VERTICAL = 12,	// A vertical layout presentation form.
+        COMPAT_WIDE = 13,	// 	A wide (or zenkaku) compatibility character.
+        COMPAT_NARROW = 14,	// A narrow (or hankaku) compatibility character.
+        COMPAT_SMALL = 15,	// 	A small variant form (CNS compatibility).
+        COMPAT_SQUARE = 16,	// A CJK squared font variant.
+        COMPAT_FRACTION = 17,	// A vulgar fraction form.
+        LIMIT_DECOMPOSITION_TYPE = 18;
+
+    // mirrored type
+    static final byte NO = 0, YES = 1, MIRRORED_LIMIT = 2;
+    
+    // for QuickCheck
+    static final byte QNO = 0, QMAYBE = 1, QYES = 2;
+
+    // case type
+    static final byte LOWER = 0, TITLE = 1, UPPER = 2, UNCASED = 3, FOLD = 3, CASE_LIMIT = 4;
+    static final byte SIMPLE = 0, FULL = 8;
+
+    // normalization type
+    static final byte UNNORMALIZED = 0, C = 1, KC = 2, D = 3, KD = 4, FORM_LIMIT = 5;
+
+    // numericType
+    static final byte NUMERIC_NONE = 0, NUMERIC = 1, DIGIT = 2, DECIMAL = 3, 
+        LIMIT_NUMERIC_TYPE = 4;
+
+    public static final byte // SCRIPT CODE
+        COMMON_SCRIPT = 0,
+        LATIN_SCRIPT = 1,
+        GREEK_SCRIPT = 2,
+        CYRILLIC_SCRIPT = 3,
+        ARMENIAN_SCRIPT = 4,
+        HEBREW_SCRIPT = 5,
+        ARABIC_SCRIPT = 6,
+        SYRIAC_SCRIPT = 7,
+        THAANA_SCRIPT = 8, 
+        DEVANAGARI_SCRIPT = 9,
+        BENGALI_SCRIPT = 10,
+        GURMUKHI_SCRIPT = 11,
+        GUJARATI_SCRIPT = 12,
+        ORIYA_SCRIPT = 13,
+        TAMIL_SCRIPT = 14,
+        TELUGU_SCRIPT = 15,
+        KANNADA_SCRIPT = 16,
+        MALAYALAM_SCRIPT = 17,
+        SINHALA_SCRIPT = 18,
+        THAI_SCRIPT = 19,
+        LAO_SCRIPT = 20,
+        TIBETAN_SCRIPT = 21,
+        MYANMAR_SCRIPT = 22,
+        GEORGIAN_SCRIPT = 23,
+        UNUSED_SCRIPT = 24,
+        HANGUL_SCRIPT = 25,
+        ETHIOPIC_SCRIPT = 26,
+        CHEROKEE_SCRIPT = 27,
+        ABORIGINAL_SCRIPT = 28,
+        OGHAM_SCRIPT = 29,
+        RUNIC_SCRIPT = 30,
+        KHMER_SCRIPT = 31,
+        MONGOLIAN_SCRIPT = 32,
+        HIRAGANA_SCRIPT = 33,
+        KATAKANA_SCRIPT = 34,
+        BOPOMOFO_SCRIPT = 35,
+        HAN_SCRIPT = 36,
+        YI_SCRIPT = 37,
+        OLD_ITALIC_SCRIPT = 38,
+        GOTHIC_SCRIPT = 39,
+        DESERET_SCRIPT = 40,
+        INHERITED_SCRIPT = 41,
+        LIMIT_SCRIPT = 42;
+        
+  static final int 
+    UNKNOWN = 0,
+    AGE10 = 1,
+    AGE20 = 2,
+    AGE21 = 3,
+    AGE30 = 4,
+    AGE31 = 5,
+    LIMIT_AGE = 6;
+        
+        
+        
+public static byte
+    JT_C = 0,
+    JT_D = 1,
+    JT_R = 2,
+    JT_U = 3,
+    JT_L = 4,
+    JT_T = 5,
+    LIMIT_JOINING_TYPE = 6;
+
+public static byte
+    NO_SHAPING = 0,
+    AIN = 1,
+    ALAPH = 2,
+    ALEF = 3,
+    BEH = 4,
+    BETH = 5,
+    DAL = 6,
+    DALATH_RISH = 7,
+    E = 8,
+    FEH = 9,
+    FINAL_SEMKATH = 10,
+    GAF = 11,
+    GAMAL = 12,
+    HAH = 13,
+    HAMZA_ON_HEH_GOAL = 14,
+    HE = 15,
+    HEH = 16,
+    HEH_GOAL = 17,
+    HETH = 18,
+    KAF = 19,
+    KAPH = 20,
+    KNOTTED_HEH = 21,
+    LAM = 22,
+    LAMADH = 23,
+    MEEM = 24,
+    MIM = 25,
+    NOON = 26,
+    NUN = 27,
+    PE = 28,
+    QAF = 29,
+    QAPH = 30,
+    REH = 31,
+    REVERSED_PE = 32,
+    SAD = 33,
+    SADHE = 34,
+    SEEN = 35,
+    SEMKATH = 36,
+    SHIN = 37,
+    SWASH_KAF = 38,
+    TAH = 39,
+    TAW = 40,
+    TEH_MARBUTA = 41,
+    TETH = 42,
+    WAW = 43,
+    YEH = 44,
+    YEH_BARREE = 45,
+    YEH_WITH_TAIL = 46,
+    YUDH = 47,
+    YUDH_HE = 48,
+    ZAIN = 49,
+    LIMIT_JOINING_GROUP = 50;
+}
--- a/tools/unicodetools/com/ibm/text/UCD/UData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UData.java
@ -0,0 +1,317 @@
+package com.ibm.text.UCD;
+
+import java.io.*;
+import com.ibm.text.utility.*;
+
+class UData implements UCD_Types {
+    String name;
+    String decompositionMapping;
+    String simpleUppercase;
+    String simpleLowercase;
+    String simpleTitlecase;
+    String simpleCaseFolding;
+    String fullUppercase;
+    String fullLowercase;
+    String fullTitlecase;
+    String fullCaseFolding;
+    String specialCasing = "";
+    String bidiMirror;
+    
+    int codePoint = -1;
+    float numericValue = Float.NaN;
+    int binaryProperties; // bidiMirroring, compositionExclusions, PropList
+    
+    byte generalCategory = Cn;
+    byte combiningClass = 0;
+    byte bidiClass = BIDI_ON;
+    byte decompositionType = NONE;
+    byte numericType = NUMERIC_NONE;
+    
+    byte eastAsianWidth = EAN;
+    byte lineBreak = LBXX;
+    byte joiningType = JT_U;
+    byte joiningGroup = NO_SHAPING;
+    byte script = COMMON_SCRIPT;
+    byte age = 0;
+    
+    static final UData UNASSIGNED = new UData();
+    //static final UData NONCHARACTER = new UData();
+    static {
+        UNASSIGNED.name = "<unassigned>";
+        UNASSIGNED.decompositionMapping = UNASSIGNED.bidiMirror
+        = UNASSIGNED.simpleUppercase
+        = UNASSIGNED.simpleLowercase
+        = UNASSIGNED.simpleTitlecase = "";
+        UNASSIGNED.fleshOut();
+        
+        /*NONCHARACTER.name = "<noncharacter>";
+        NONCHARACTER.decompositionMapping = NONCHARACTER.bidiMirror
+        = NONCHARACTER.simpleUppercase
+        = NONCHARACTER.simpleLowercase
+        = NONCHARACTER.simpleTitlecase = "";
+
+        NONCHARACTER.binaryProperties = Noncharacter_Code_PointMask;
+        NONCHARACTER.fleshOut();
+        */
+    }
+    
+    public UData (int codePoint) {
+        this.codePoint = codePoint;
+    }
+    
+    public UData () {
+    }
+    
+    public boolean equals(Object that) {
+        UData other = (UData) that;
+        if (!name.equals(other.name)) return false;
+        if (!decompositionMapping.equals(other.decompositionMapping)) return false;
+        if (!simpleUppercase.equals(other.simpleUppercase)) return false;
+        if (!simpleLowercase.equals(other.simpleLowercase)) return false;
+        if (!simpleTitlecase.equals(other.simpleTitlecase)) return false;
+        if (!simpleCaseFolding.equals(other.simpleCaseFolding)) return false;
+        if (!fullUppercase.equals(other.fullUppercase)) return false;
+        if (!fullLowercase.equals(other.fullLowercase)) return false;
+        if (!fullTitlecase.equals(other.fullTitlecase)) return false;
+        if (!fullCaseFolding.equals(other.fullCaseFolding)) return false;
+        if (!specialCasing.equals(other.specialCasing)) return false;
+        if (!bidiMirror.equals(other.bidiMirror)) return false;
+        if (codePoint != other.codePoint) return false;
+        if (numericValue != other.numericValue) return false;
+        if (binaryProperties != other.binaryProperties) return false;
+        if (generalCategory != other.generalCategory) return false;
+        if (combiningClass != other.combiningClass) return false;
+        if (bidiClass != other.bidiClass) return false;
+        if (decompositionType != other.decompositionType) return false;
+        if (numericType != other.numericType) return false;
+        if (eastAsianWidth != other.eastAsianWidth) return false;
+        if (lineBreak != other.lineBreak) return false;
+        if (joiningType != other.joiningType) return false;
+        if (joiningGroup != other.joiningGroup) return false;
+        if (script != other.script) return false;
+        if (age != other.age) return false;
+        return true;
+    }
+    
+    public void fleshOut() {
+        String codeValue = UTF32.valueOf32(codePoint);
+        
+        if (decompositionMapping == null) decompositionMapping = codeValue;
+        if (bidiMirror == null) bidiMirror = codeValue;
+        
+        if (simpleLowercase == null) simpleLowercase = codeValue;
+        if (simpleCaseFolding == null) simpleCaseFolding = simpleLowercase;
+        if (fullLowercase == null) fullLowercase = simpleLowercase;
+        if (fullCaseFolding == null) fullCaseFolding = fullLowercase;
+        
+        if (simpleUppercase == null) simpleUppercase = codeValue;
+        if (simpleTitlecase == null) simpleTitlecase = codeValue;
+        if (fullUppercase == null) fullUppercase = simpleUppercase;
+        
+        if (fullTitlecase == null) fullTitlecase = simpleTitlecase;
+    }
+    
+    public void compact() {
+        fleshOut();
+        String codeValue = UTF32.valueOf32(codePoint);
+        
+        if (fullTitlecase.equals(simpleTitlecase)) fullTitlecase = null;
+        
+        if (fullUppercase.equals(simpleUppercase)) fullUppercase = null;
+        if (simpleTitlecase.equals(codeValue)) simpleTitlecase = null;
+        if (simpleUppercase.equals(codeValue)) simpleUppercase = null;
+        
+        if (fullCaseFolding.equals(fullLowercase)) fullCaseFolding = null;
+        if (fullLowercase.equals(simpleLowercase)) fullLowercase = null;
+        if (simpleCaseFolding.equals(simpleLowercase)) simpleCaseFolding = null;
+        if (simpleLowercase.equals(codeValue)) simpleLowercase = null;
+        
+        if (decompositionMapping.equals(codeValue)) decompositionMapping = null;
+        if (bidiMirror.equals(codeValue)) bidiMirror = null;
+    }
+    
+    public void setBinaryProperties(int binaryProperties) {
+        this.binaryProperties = binaryProperties;
+    }
+    
+    public boolean isLetter() {
+        return ((1<<generalCategory) & UCD_Types.LETTER_MASK) != 0;
+    }
+    
+    public static void writeString(DataOutputStream os, String s) throws IOException {
+        if (s == null) {
+            os.writeByte(0);            
+        } else {
+            os.writeByte(1);
+            os.writeUTF(s);
+        }
+    }
+    
+    static final byte[] byteBuffer = new byte[256];
+    
+    public static String readString(DataInputStream is) throws IOException {
+        int type = is.readUnsignedByte();
+        if (type == 0) return null;
+        return is.readUTF();
+    }
+    
+    static final byte ABBREVIATED = 0, FULL = 1;
+    
+    public String toString() {
+        return toString(FULL);
+    }
+    
+    public String toString(byte style) {
+        boolean full = style == FULL;
+        StringBuffer result = new StringBuffer();
+        String s = UTF32.valueOf32(codePoint);
+        
+        result.append("<e c='").append(Utility.quoteXML(codePoint)).append('\'');
+        result.append(" hx='").append(Utility.hex(codePoint)).append('\'');
+        if (full || script != COMMON_SCRIPT) result.append(" sn='").append(UCD_Names.SCRIPT[script]).append('\'');
+        result.append(" n='").append(Utility.quoteXML(name)).append("'\r\n");
+        
+        int lastPos = result.length();
+        
+        if (full || generalCategory != Lo) result.append(" gc='").append(UCD_Names.GC[generalCategory]).append('\'');
+        if (full || combiningClass != 0) result.append(" cc='").append(combiningClass & 0xFF).append('\'');
+        if (full || decompositionType != NONE) result.append(" dt='").append(UCD_Names.DT[decompositionType]).append('\'');
+        if (full || !s.equals(decompositionMapping)) result.append(" dm='").append(Utility.quoteXML(decompositionMapping)).append('\'');
+
+        if (full || numericType != NUMERIC_NONE) result.append(" nt='").append(UCD_Names.NT[numericType]).append('\'');
+        if (full || !Double.isNaN(numericValue)) result.append(" nv='").append(numericValue).append('\'');
+        
+        if (full || eastAsianWidth != EAN) result.append(" ea='").append(UCD_Names.EA[eastAsianWidth]).append('\'');
+        if (full || lineBreak != LBAL) result.append(" lb='").append(UCD_Names.LB[lineBreak]).append('\'');
+        if (full || joiningType != JT_U) result.append(" jt='").append(UCD_Names.JOINING_TYPE[joiningType]).append('\'');
+        if (full || joiningGroup != NO_SHAPING) result.append(" jg='").append(UCD_Names.JOINING_GROUP[joiningGroup]).append('\'');
+        if (full || age != 0) result.append(" ag='").append(UCD_Names.AGE[age]).append('\'');
+        
+        if (full || bidiClass != BIDI_L) result.append(" bc='").append(UCD_Names.BC[bidiClass]).append('\'');
+        if (full || !bidiMirror.equals(s)) result.append(" bmg='").append(Utility.quoteXML(bidiMirror)).append('\'');
+        
+        if (lastPos != result.length()) {
+            result.append("\r\n");
+            lastPos = result.length();
+        }
+        
+        //String bp = "";
+        int bprops = binaryProperties;
+        for (int i = 0; i < LIMIT_BINARY_PROPERTIES; ++i) {
+            if ((bprops & (1<<i)) != 0) result.append(UCD_Names.BP[i]).append("='T' ");
+        }
+        if (lastPos != result.length()) {
+            result.append("\r\n");
+            lastPos = result.length();
+        }
+        
+        if (full || !fullLowercase.equals(s)) result.append(" lc='").append(Utility.quoteXML(fullLowercase)).append('\'');
+        if (full || !fullUppercase.equals(simpleUppercase)) result.append(" uc='").append(Utility.quoteXML(fullUppercase)).append('\'');
+        if (full || !fullTitlecase.equals(fullUppercase)) result.append(" tc='").append(Utility.quoteXML(fullTitlecase)).append('\'');
+        if (full || !fullCaseFolding.equals(fullLowercase)) result.append(" cf='").append(Utility.quoteXML(fullCaseFolding)).append('\'');
+        
+        if (full || !simpleLowercase.equals(simpleLowercase)) result.append(" slc='").append(Utility.quoteXML(simpleLowercase)).append('\'');
+        if (full || !simpleUppercase.equals(simpleUppercase)) result.append(" suc='").append(Utility.quoteXML(simpleUppercase)).append('\'');
+        if (full || !simpleTitlecase.equals(simpleUppercase)) result.append(" stc='").append(Utility.quoteXML(simpleTitlecase)).append('\'');
+        if (full || !simpleCaseFolding.equals(simpleLowercase)) result.append(" sfc='").append(Utility.quoteXML(simpleCaseFolding)).append('\'');
+        
+        if (full || !specialCasing.equals("")) result.append(" fsc='").append(Utility.quoteXML(specialCasing)).append('\'');
+        result.append("/>");
+        return result.toString();
+    }
+    
+    public void writeBytes(DataOutputStream os) throws IOException {
+        compact();
+        os.writeInt(codePoint);
+        
+        writeString(os, name);
+        writeString(os, decompositionMapping);
+        writeString(os, simpleUppercase);
+        writeString(os, simpleLowercase);
+        writeString(os, simpleTitlecase);
+        writeString(os, simpleCaseFolding);
+        writeString(os, fullUppercase);
+        writeString(os, fullLowercase);
+        writeString(os, fullTitlecase);
+        writeString(os, fullCaseFolding);
+        writeString(os, specialCasing);
+        writeString(os, bidiMirror);
+        
+        os.writeFloat(numericValue);
+        os.writeInt(binaryProperties);
+        
+        os.writeByte(generalCategory);
+        os.writeByte(combiningClass);
+        os.writeByte(bidiClass);
+        os.writeByte(decompositionType);
+        os.writeByte(numericType);
+        os.writeByte(eastAsianWidth);
+        os.writeByte(lineBreak);
+        os.writeByte(joiningType);
+        os.writeByte(joiningGroup);
+        os.writeByte(script);
+        os.writeByte(age);
+    }
+    
+    public void readBytes(DataInputStream is) throws IOException {
+        codePoint = is.readInt();
+        
+        name = readString(is);
+        decompositionMapping = readString(is);
+        simpleUppercase = readString(is);
+        simpleLowercase = readString(is);
+        simpleTitlecase = readString(is);
+        simpleCaseFolding = readString(is);
+        fullUppercase = readString(is);
+        fullLowercase = readString(is);
+        fullTitlecase = readString(is);
+        fullCaseFolding = readString(is);
+        specialCasing = readString(is);
+        bidiMirror = readString(is);
+        
+        numericValue = is.readFloat();
+        binaryProperties = is.readInt();
+        
+        generalCategory = is.readByte();
+        combiningClass = is.readByte();
+        bidiClass = is.readByte();
+        decompositionType = is.readByte();
+        numericType = is.readByte();
+        eastAsianWidth = is.readByte();
+        lineBreak = is.readByte();
+        joiningType = is.readByte();
+        joiningGroup = is.readByte();
+        script = is.readByte();
+        age = is.readByte();
+        fleshOut();
+        
+        // HACK
+        /*
+        int bp = binaryProperties;
+        bp &= ~(1 << CaseFoldTurkishI); // clear bit
+        if (codePoint == 'i' || codePoint == 'I') {
+            bp |= (1 << CaseFoldTurkishI);
+        }
+        if (bp != binaryProperties) {
+            if (!HACK) {
+                System.out.println("\tHACK Resetting CaseFoldTurkishI on U+" + Utility.hex(codePoint) + " " + name + " and others...");
+                HACK = true;
+            }
+            binaryProperties = bp;
+        }
+        */
+        
+        /*
+        if (generalCategory == Sm) {
+            if ((binaryProperties & Math_PropertyMask) != 0) {
+                if (!HACK) {
+                    System.out.println("Stripping " + Utility.hex(codePoint) + " " + name + " and others...");
+                    HACK = true;
+                }
+                binaryProperties &= ~Math_PropertyMask;
+            }
+        }
+        */
+    }
+    static boolean HACK = false;
+}
--- a/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java
--- a/tools/unicodetools/com/ibm/text/UCD/WriteJavaScriptInfo.java
+++ b/tools/unicodetools/com/ibm/text/UCD/WriteJavaScriptInfo.java
@ -0,0 +1,115 @@
+package com.ibm.text.UCD;
+
+import java.util.*;
+import java.io.*;
+//import java.text.*;
+import com.ibm.text.utility.*;
+
+public class WriteJavaScriptInfo {
+    /* TODO: fix enumeration of compositions
+    
+    static public void writeJavascriptInfo() throws IOException {
+        System.err.println("Writing Javascript data");
+        UCD ucd = UCD.make();
+        Normalizer normKD = new Normalizer(Normalizer.NFKD);
+        Normalizer normD = new Normalizer(Normalizer.NFD);
+        PrintWriter log = new PrintWriter(new FileOutputStream("Normalization_data.js"));
+        
+        int count = 0;
+        int datasize = 0;
+        int max = 0;
+        int over7 = 0;
+        log.println("var KD = new Object(); // NFKD compatibility decomposition mappings");
+        log.println("// NOTE: Hangul is done in code!");
+        CompactShortArray csa = new CompactShortArray((short)0);
+        
+        for (char c = 0; c < 0xFFFF; ++c) {
+            if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
+            if (0xAC00 <= c && c <= 0xD7A3) continue;
+            if (normKD.hasDecomposition(c)) {
+                ++count;
+                String decomp = normKD.normalize(c);
+                datasize += decomp.length();
+                if (max < decomp.length()) max = decomp.length();
+                if (decomp.length() > 7) ++over7;
+                csa.setElementAt(c, (short)count);
+                log.println("\t KD[0x" + Utility.hex(c) + "]='\\u" + Utility.hex(decomp,"\\u") + "';");
+            }
+        }
+        csa.compact();
+        log.println("// " + count + " NFKD mappings total");
+        log.println("// " + datasize + " total characters of results");
+        log.println("// " + max + " string length, maximum");
+        log.println("// " + over7 + " result strings with length > 7");
+        log.println("// " + csa.storage() + " trie length (doesn't count string size)");
+        log.println();
+
+        count = 0;
+        datasize = 0;
+        max = 0;
+        log.println("var D = new Object();  // NFD canonical decomposition mappings");
+        log.println("// NOTE: Hangul is done in code!");
+        csa = new CompactShortArray((short)0);
+        
+        for (char c = 0; c < 0xFFFF; ++c) {
+            if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
+            if (0xAC00 <= c && c <= 0xD7A3) continue;
+            if (normD.hasDecomposition(c)) {
+                ++count;
+                String decomp = normD.normalize(c);
+                datasize += decomp.length();
+                if (max < decomp.length()) max = decomp.length();
+                csa.setElementAt(c, (short)count);
+                log.println("\t D[0x" + Utility.hex(c) + "]='\\u" + Utility.hex(decomp,"\\u") + "';");
+            }
+        }
+        csa.compact();
+        
+        log.println("// " + count + " NFD mappings total");
+        log.println("// " + datasize + " total characters of results");
+        log.println("// " + max + " string length, maximum");
+        log.println("// " + csa.storage() + " trie length (doesn't count string size)");
+        log.println();
+
+        count = 0;
+        datasize = 0;
+        log.println("var CC = new Object(); // canonical class mappings");
+        CompactByteArray cba = new CompactByteArray();
+        
+        for (char c = 0; c < 0xFFFF; ++c) {
+            if ((c & 0xFFF) == 0) System.err.println(Utility.hex(c));
+            int canClass = normKD.getCanonicalClass(c);
+            if (canClass != 0) {
+                ++count;
+                
+                log.println("\t CC[0x" + Utility.hex(c) + "]=" + canClass + ";");
+            }
+        }
+        cba.compact();
+        log.println("// " + count + " canonical class mappings total");
+        log.println("// " + cba.storage() + " trie length");
+        log.println();
+        
+        count = 0;
+        datasize = 0;
+        log.println("var C = new Object();  // composition mappings");
+        log.println("// NOTE: Hangul is done in code!");
+
+        IntHashtable.IntEnumeration enum = normKD.getD getComposition();
+        while (enum.hasNext()) {
+            int key = enum.next();
+            char val = (char) enum.value();
+            if (0xAC00 <= val && val <= 0xD7A3) continue;
+            ++count;
+            log.println("\tC[0x" + Utility.hex(key) + "]=0x" + Utility.hex(val) + ";");
+        }
+        log.println("// " + count + " composition mappings total");
+        log.println();
+        
+        log.close();
+        System.err.println("Done writing Javascript data");
+    }
+    
+    */
+  
+}
--- a/tools/unicodetools/com/ibm/text/utility/ChainException.java
+++ b/tools/unicodetools/com/ibm/text/utility/ChainException.java
@ -0,0 +1,38 @@
+package com.ibm.text.utility;
+
+
+import java.text.*;
+import java.io.*;
+public class ChainException extends RuntimeException {
+    Object[] keyData;
+    String messageFormat;
+    Exception chain;
+
+    public ChainException (String messageFormat, Object[] objects) {
+        this.messageFormat = messageFormat;
+        keyData = (Object[]) objects.clone();
+    }
+
+    public ChainException (String messageFormat, Object[] objects, Exception chainedException) {
+        this.messageFormat = messageFormat;
+        keyData = objects == null ? null : (Object[]) objects.clone();
+        chain = chainedException;
+    }
+
+    public String getMessage() {
+        String chainMsg = "";
+        if (chain != null) {
+            chainMsg = "; " + chain.getClass().getName()
+                + ", " + chain.getMessage();
+            StringWriter w = new StringWriter();
+            PrintWriter p = new PrintWriter(w);
+            chain.printStackTrace(p);
+            chainMsg += ", " + w.getBuffer();
+            p.close();
+        }
+        String main = "";
+        if (keyData != null) main = MessageFormat.format(messageFormat, keyData);
+        return main + chainMsg;
+    }
+}
+
--- a/tools/unicodetools/com/ibm/text/utility/CompactByteArray.java
+++ b/tools/unicodetools/com/ibm/text/utility/CompactByteArray.java
@ -0,0 +1,305 @@
+package com.ibm.text.utility;
+
+/*
+ * %W% %E%
+ *
+ * (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
+ * (C) Copyright IBM Corp. 1996 - All Rights Reserved
+ *
+ * Portions copyright (c) 1996 Sun Microsystems, Inc. All Rights Reserved.
+ *
+ *   The original version of this source code and documentation is copyrighted
+ * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
+ * materials are provided under terms of a License Agreement between Taligent
+ * and Sun. This technology is protected by multiple US and International
+ * patents. This notice and attribution to Taligent may not be removed.
+ *   Taligent is a registered trademark of Taligent, Inc.
+ *
+ * Permission to use, copy, modify, and distribute this software
+ * and its documentation for NON-COMMERCIAL purposes and without
+ * fee is hereby granted provided that this copyright notice
+ * appears in all copies. Please refer to the file "copyright.html"
+ * for further important copyright and licensing information.
+ *
+ * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
+ * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+ * TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
+ * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
+ * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
+ *
+ */
+
+import java.io.*;
+
+/**
+ *
+ * Provides a compact way to store information that is indexed by Unicode
+ * values, such as character properties, types, keyboard values, etc.
+ * only for internal use for now. Made public for discussion purposes.
+ *
+ * @see                CompactIntArray
+ * @see                CompactShortArray
+ * @version            %I% %G%
+ * @author             Helena Shih
+ */
+public final class CompactByteArray implements Serializable {
+
+
+    public static  final int UNICODECOUNT =65536;
+
+    public CompactByteArray()
+    {
+        this((byte)0);
+    }
+    public CompactByteArray(byte defaultValue)
+    {
+        int i;
+        values = new byte[UNICODECOUNT];
+        indices = new short[INDEXCOUNT];
+        for (i = 0; i < UNICODECOUNT; ++i) {
+            values[i] = defaultValue;
+        }
+        for (i = 0; i < INDEXCOUNT; ++i) {
+            indices[i] = (short)(i<<BLOCKSHIFT);
+        }
+        isCompact = false;
+    }
+    public CompactByteArray(short indexArray[],
+                            byte newValues[]) throws IllegalArgumentException
+    {
+        int i;
+        if (indexArray.length != INDEXCOUNT)
+            throw new IllegalArgumentException();
+        for (i = 0; i < INDEXCOUNT; ++i) {
+            short index = indexArray[i];
+            if ((index < 0) || (index >= newValues.length+BLOCKCOUNT))
+                throw new IllegalArgumentException();
+        }
+        indices = indexArray;
+        values = newValues;
+        isCompact = true;
+    }
+
+    public void writeArrays(PrintWriter output)
+    {
+        int i;
+        output.println("package com.ibm.text.unicode;");
+        output.println("import com.ibm.text.collections.*;");
+
+        output.println("public final class GeneralCategory {");
+
+        output.println("    public static byte getCategory (char ch) {");
+        output.println("	    return compactArray.elementAt(ch);");
+        output.println("    }");
+
+        output.println("    static CompactByteArray compactArray;");
+
+        output.println("    static void init () {");
+        output.println("        short[] index = {");
+        for (i = 0; i < indices.length; i++) {
+            if (i % 8 == 0) output.println();
+            output.print("(short)" + (indices[i] & 0xFFFF) + ", ");
+        }
+        output.println("    };");
+
+        output.println("        byte[] data = {");
+        for (i = 0; i < values.length; i++) {
+            if (i % 8 == 0) output.println();
+            output.print("(byte)" + (values[i] & 0xFF) + ", ");
+        }
+        output.println(" };");
+        output.println("	    compactArray = new CompactByteArray(index, data);");
+        output.println("    }");
+        output.println("}");
+        output.close();
+    }
+
+    public byte elementAt(char index) // parameterized on byte
+    {
+        return (values[(indices[index >>> BLOCKSHIFT] & 0xFFFF) +
+                      (index & BLOCKMASK)]);
+    }
+    // Set automatically expands the array if it is compacted.
+    // parameterized on value (byte)
+    public void setElementAt(char index, byte value)
+    {
+        if (isCompact)
+            expand();
+        values[(int)index] = value;
+    }
+    public void setElementAt(char start, char end, byte value)
+    {
+        int i;
+        if (isCompact) {
+            expand();
+        }
+        for (i = start; i <= end; ++i) {
+            values[i] = value;
+        }
+    }
+    // Compact the array.
+    // The value of cycle determines how large the overlap can be.
+    // A cycle of 1 is the most compacted, but takes the most time to do.
+    // If values stored in the array tend to repeat in cycles of, say, 16,
+    // then using that will be faster than cycle = 1, and get almost the
+    // same compression.  cycle is hardcoded as BLOCKCOUNT now.
+    public void compact()
+    {
+        if (isCompact == false) {
+            char[]      tempIndex;
+            int                     tempIndexCount;
+            byte[]          tempArray;
+            short           iBlock, iIndex;
+
+            // make temp storage, larger than we need
+            tempIndex = new char[UNICODECOUNT];
+            // set up first block.
+            tempIndexCount = BLOCKCOUNT;
+            for (iIndex = 0; iIndex < BLOCKCOUNT; ++iIndex) {
+                tempIndex[iIndex] = (char)iIndex;
+            }; // endfor (iIndex = 0; .....)
+            indices[0] = (short)0;
+
+            // for each successive block, find out its first position
+            // in the compacted array
+            for (iBlock = 1; iBlock < INDEXCOUNT; ++iBlock) {
+                int     newCount, firstPosition, block;
+                block = iBlock<<BLOCKSHIFT;
+                if (DEBUGSMALL) if (block > DEBUGSMALLLIMIT) break;
+                firstPosition = FindOverlappingPosition( block, tempIndex,
+                                                         tempIndexCount );
+
+                newCount = firstPosition + BLOCKCOUNT;
+                if (newCount > tempIndexCount) {
+                    for (iIndex = (short)tempIndexCount;
+                         iIndex < newCount;
+                         ++iIndex) {
+                        tempIndex[iIndex] = (char)
+                                            (iIndex - firstPosition + block);
+                    } // endfor (iIndex = tempIndexCount....)
+                    tempIndexCount = newCount;
+                } // endif (newCount > tempIndexCount)
+                indices[iBlock] = (short)firstPosition;
+            } // endfor (iBlock = 1.....)
+
+            // now allocate and copy the items into the array
+            tempArray = new byte[tempIndexCount];
+            for (iIndex = 0; iIndex < tempIndexCount; ++iIndex) {
+                tempArray[iIndex] = values[tempIndex[iIndex]];
+            }
+            values = null;
+            values = tempArray;
+            isCompact = true;
+        } // endif (isCompact != false)
+    }
+    // Expanded takes the array back to a 65536 element array
+    public void expand()
+    {
+        int i;
+        if (isCompact) {
+            byte[]  tempArray;
+            tempArray = new byte[UNICODECOUNT];
+            for (i = 0; i < UNICODECOUNT; ++i) {
+                tempArray[i] = elementAt((char)i);
+            }
+            for (i = 0; i < INDEXCOUNT; ++i) {
+                indices[i] = (short)(i<<BLOCKSHIFT);
+            }
+            values = null;
+            values = tempArray;
+            isCompact = false;
+        }
+    }
+    // Print char Array  : Debug only
+    public void printIndex(short start, short count)
+    {
+        int i;
+        for (i = start; i < count; ++i)
+        {
+            System.out.println(i + " -> : " +
+                               (int)((indices[i] >= 0) ?
+                                     indices[i] :
+                                     indices[i] + UNICODECOUNT));
+        }
+        System.out.println();
+    }
+    public void printPlainArray(int start,int count, char[] tempIndex)
+    {
+        int iIndex;
+        if (tempIndex != null)
+        {
+            for (iIndex     = start; iIndex < start + count; ++iIndex)
+            {
+                System.out.print(" " + (int)values[tempIndex[iIndex]]);
+            }
+        }
+        else
+        {
+            for (iIndex = start; iIndex < start + count; ++iIndex)
+            {
+                System.out.print(" " + (int)values[iIndex]);
+            }
+        }
+        System.out.println("    Range: start " + start + " , count " + count);
+    }
+    // # of elements in the indexed array
+    public short capacity()
+    {
+        return (short)values.length;
+    }
+
+    public int storage()
+    {
+        return values.length * 1 + indices.length * 2 + 12;
+    }
+
+    private byte[] getArray()
+    {
+        return values;
+    }
+    private int
+    FindOverlappingPosition(int start, char[] tempIndex, int tempIndexCount)
+    {
+        int i;
+        short j;
+        short currentCount;
+
+        if (DEBUGOVERLAP && start < DEBUGSHOWOVERLAPLIMIT) {
+            printPlainArray(start, BLOCKCOUNT, null);
+            printPlainArray(0, tempIndexCount, tempIndex);
+        }
+        for (i = 0; i < tempIndexCount; i += BLOCKCOUNT) {
+            currentCount = (short)BLOCKCOUNT;
+            if (i + BLOCKCOUNT > tempIndexCount) {
+                currentCount = (short)(tempIndexCount - i);
+            }
+            for (j = 0; j < currentCount; ++j) {
+                if (values[start + j] != values[tempIndex[i + j]]) break;
+            }
+            if (j == currentCount) break;
+        }
+        if (DEBUGOVERLAP && start < DEBUGSHOWOVERLAPLIMIT) {
+            for (j = 1; j < i; ++j) {
+                System.out.print(" ");
+            }
+            printPlainArray(start, BLOCKCOUNT, null);
+            System.out.println("    Found At: " + i);
+        }
+        return i;
+    }
+    private static  final int DEBUGSHOWOVERLAPLIMIT = 100;
+    private static  final boolean DEBUGTRACE = false;
+    private static  final boolean DEBUGSMALL = false;
+    private static  final boolean DEBUGOVERLAP = false;
+    private static  final int DEBUGSMALLLIMIT = 30000;
+    private static  final int BLOCKSHIFT =6;
+    private static  final int BLOCKCOUNT =(1<<BLOCKSHIFT);
+    private static  final int INDEXSHIFT =(16-BLOCKSHIFT);
+    private static  final int INDEXCOUNT =(1<<INDEXSHIFT);
+    private static  final int BLOCKMASK = BLOCKCOUNT - 1;
+
+    private byte[] values;  // char -> short (char parameterized short)
+    private short indices[];
+    private boolean isCompact;
+};
--- a/tools/unicodetools/com/ibm/text/utility/CompactShortArray.java
+++ b/tools/unicodetools/com/ibm/text/utility/CompactShortArray.java
@ -0,0 +1,367 @@
+package com.ibm.text.utility;
+
+
+/*
+ * %W% %E%
+ *
+ * (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
+ * (C) Copyright IBM Corp. 1996 - All Rights Reserved
+ *
+ * Portions copyright (c) 1996 Sun Microsystems, Inc. All Rights Reserved.
+ *
+ *   The original version of this source code and documentation is copyrighted
+ * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
+ * materials are provided under terms of a License Agreement between Taligent
+ * and Sun. This technology is protected by multiple US and International
+ * patents. This notice and attribution to Taligent may not be removed.
+ *   Taligent is a registered trademark of Taligent, Inc.
+ *
+ * Permission to use, copy, modify, and distribute this software
+ * and its documentation for NON-COMMERCIAL purposes and without
+ * fee is hereby granted provided that this copyright notice
+ * appears in all copies. Please refer to the file "copyright.html"
+ * for further important copyright and licensing information.
+ *
+ * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
+ * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+ * TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
+ * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
+ * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
+ *
+ */
+
+import java.io.*;
+import java.lang.*;
+/**
+ * class CompactATypeArray : use only on primitive data types
+ * Provides a compact way to store information that is indexed by Unicode
+ * values, such as character properties, types, keyboard values, etc.This
+ * is very useful when you have a block of Unicode data that contains
+ * significant values while the rest of the Unicode data is unused in the
+ * application or when you have a lot of redundance, such as where all 21,000
+ * Han ideographs have the same value.  However, lookup is much faster than a
+ * hash table.
+ * A compact array of any primitive data type serves two purposes:
+ * <UL type = round>
+ *     <LI>Fast access of the indexed values.
+ *     <LI>Smaller memory footprint.
+ * </UL>
+ * A compact array is composed of a index array and value array.  The index
+ * array contains the indicies of Unicode characters to the value array.
+ * @see                CompactByteArray
+ * @see                CompactIntArray
+ * @see                CompactCharArray
+ * @see                CompactStringArray
+ * @version            %I% %G%
+ * @author             Helena Shih
+ */
+public final class CompactShortArray implements Serializable {
+
+
+    /**
+     * The total number of Unicode characters.
+     */
+    public static  final int UNICODECOUNT =65536;
+
+    /**
+     * Default constructor for CompactShortArray, the default value of the
+     * compact array is 0.
+     */
+    public CompactShortArray()
+    {
+        this((short)0);
+    }
+    /**
+     * Constructor for CompactShortArray.
+     * @param defaultValue the default value of the compact array.
+     */
+    public CompactShortArray(short defaultValue)
+    {
+        int i;
+        values = new short[UNICODECOUNT];
+        indices = new short[INDEXCOUNT];
+        for (i = 0; i < UNICODECOUNT; ++i) {
+            values[i] = defaultValue;
+        }
+        for (i = 0; i < INDEXCOUNT; ++i) {
+            indices[i] = (short)(i<<BLOCKSHIFT);
+        }
+        isCompact = false;
+    }
+    /**
+     * Constructor for CompactShortArray.
+     * @param indexArray the indicies of the compact array.
+     * @param newValues the values of the compact array.
+     * @exception IllegalArgumentException If the index is out of range.
+     */
+    public CompactShortArray(short indexArray[],
+                             short newValues[]) throws IllegalArgumentException
+    {
+        int i;
+        if (indexArray.length != INDEXCOUNT)
+            throw new IllegalArgumentException("Index out of bounds.");
+        for (i = 0; i < INDEXCOUNT; ++i) {
+            short index = indexArray[i];
+            if ((index < 0) || (index >= newValues.length+BLOCKCOUNT))
+                throw new IllegalArgumentException("Index out of bounds.");
+        }
+        indices = indexArray;
+        values = newValues;
+    }
+    /**
+     * Get the mapped value of a Unicode character.
+     * @param index the character to get the mapped value with
+     * @return the mapped value of the given character
+     */
+    public short elementAt(char index) // parameterized on short
+    {
+        return (values[(indices[index >> BLOCKSHIFT] & 0xFFFF)
+                       + (index & BLOCKMASK)]);
+    }
+    /**
+     * Set a new value for a Unicode character.
+     * Set automatically expands the array if it is compacted.
+     * @param index the character to set the mapped value with
+     * @param value the new mapped value
+     */
+    public void setElementAt(char index, short value)
+    {
+        if (isCompact)
+            expand();
+        values[(int)index] = value;
+    }
+    /**
+     * Set new values for a range of Unicode character.
+     * @param start the starting offset of the range
+     * @param end the ending offset of the range
+     * @param value the new mapped value
+     */
+    public void setElementAt(char start, char end, short value)
+    {
+        int i;
+        if (isCompact) {
+            expand();
+        }
+        for (i = start; i <= end; ++i) {
+            values[i] = value;
+        }
+    }
+    /**
+      *Compact the array.
+      */
+    public void compact()
+    {
+        if (isCompact == false) {
+            char[]      tempIndex;
+            int                     tempIndexCount;
+            short[]         tempArray;
+            short           iBlock, iIndex;
+
+            // make temp storage, larger than we need
+            tempIndex = new char[UNICODECOUNT];
+            // set up first block.
+            tempIndexCount = BLOCKCOUNT;
+            for (iIndex = 0; iIndex < BLOCKCOUNT; ++iIndex) {
+                tempIndex[iIndex] = (char)iIndex;
+            }; // endfor (iIndex = 0; .....)
+            indices[0] = (short)0;
+
+            // for each successive block, find out its first position
+            // in the compacted array
+            for (iBlock = 1; iBlock < INDEXCOUNT; ++iBlock) {
+                int     newCount, firstPosition, block;
+                block = iBlock<<BLOCKSHIFT;
+                if (DEBUGSMALL) if (block > DEBUGSMALLLIMIT) break;
+                firstPosition = FindOverlappingPosition(block, tempIndex,
+                                                        tempIndexCount);
+
+                newCount = firstPosition + BLOCKCOUNT;
+                if (newCount > tempIndexCount) {
+                    for (iIndex = (short)tempIndexCount;
+                         iIndex < newCount;
+                         ++iIndex) {
+                        tempIndex[iIndex]
+                            = (char)(iIndex - firstPosition + block);
+                    } // endfor (iIndex = tempIndexCount....)
+                    tempIndexCount = newCount;
+                } // endif (newCount > tempIndexCount)
+                indices[iBlock] = (short)firstPosition;
+            } // endfor (iBlock = 1.....)
+
+            // now allocate and copy the items into the array
+            tempArray = new short[tempIndexCount];
+            for (iIndex = 0; iIndex < tempIndexCount; ++iIndex) {
+                tempArray[iIndex] = values[tempIndex[iIndex]];
+            }
+            values = null;
+            values = tempArray;
+            isCompact = true;
+        } // endif (isCompact != false)
+    }
+    /** For internal use only.  Do not modify the result, the behavior of
+      * modified results are undefined.
+      */
+    public short getIndexArray()[]
+    {
+        return indices;
+    }
+    /** For internal use only.  Do not modify the result, the behavior of
+      * modified results are undefined.
+      */
+    public short getStringArray()[]
+    {
+        return values;
+    }
+    // --------------------------------------------------------------
+    // package private
+    // --------------------------------------------------------------
+    void writeArrays()
+    {
+        int i;
+        int cnt = ((values.length > 0) ? values.length :
+                   (values.length + UNICODECOUNT));
+        System.out.println("{");
+        for (i = 0; i < INDEXCOUNT-1; i++)
+        {
+            System.out.print("(short)" + (int)((getIndexArrayValue(i) >= 0) ?
+                (int)getIndexArrayValue(i) :
+                (int)(getIndexArrayValue(i)+UNICODECOUNT)) + ", ");
+            if (i != 0)
+                if (i % 10 == 0)
+                    System.out.println();
+        }
+        System.out.println("(short)" +
+                           (int)((getIndexArrayValue(INDEXCOUNT-1) >= 0) ?
+                                 (int)getIndexArrayValue(i) :
+                                 (int)(getIndexArrayValue(i)+UNICODECOUNT)) +
+                           " }");
+        System.out.println("{");
+        for (i = 0; i < cnt-1; i++)
+        {
+            System.out.print("(short)" + (int)getArrayValue(i) + ", ");
+            if (i != 0)
+                if (i % 10 == 0)
+                    System.out.println();
+        }
+        System.out.println("(short)" + (int)getArrayValue(cnt-1) + " }");
+    }
+    // Print char Array  : Debug only
+    void printIndex(short start, short count)
+    {
+        int i;
+        for (i = start; i < count; ++i)
+        {
+            System.out.println(i + " -> : " +
+                               (int)((indices[i] >= 0) ?
+                                     indices[i] :
+                                     indices[i] + UNICODECOUNT));
+        }
+        System.out.println();
+    }
+    void printPlainArray(int start,int count, char[] tempIndex)
+    {
+        int iIndex;
+        if (tempIndex != null)
+        {
+            for (iIndex     = start; iIndex < start + count; ++iIndex)
+            {
+                System.out.print(" " + (int)getArrayValue(tempIndex[iIndex]));
+            }
+        }
+        else
+        {
+            for (iIndex = start; iIndex < start + count; ++iIndex)
+            {
+                System.out.print(" " + (int)getArrayValue(iIndex));
+            }
+        }
+        System.out.println("    Range: start " + start + " , count " + count);
+    }
+    // --------------------------------------------------------------
+    // private
+    // --------------------------------------------------------------
+    /**
+      * Expanding takes the array back to a 65536 element array.
+      */
+    private void expand()
+    {
+        int i;
+        if (isCompact) {
+            short[] tempArray;
+            tempArray = new short[UNICODECOUNT];
+            for (i = 0; i < UNICODECOUNT; ++i) {
+                tempArray[i] = elementAt((char)i);
+            }
+            for (i = 0; i < INDEXCOUNT; ++i) {
+                indices[i] = (short)(i<<BLOCKSHIFT);
+            }
+            values = null;
+            values = tempArray;
+            isCompact = false;
+        }
+    }
+    // # of elements in the indexed array
+    private short capacity()
+    {
+        return (short)values.length;
+    }
+    public int storage()
+    {
+        return values.length * 2 + indices.length * 2 + 12;
+    }
+
+    private short getArrayValue(int n)
+    {
+        return values[n];
+    }
+    private short getIndexArrayValue(int n)
+    {
+        return indices[n];
+    }
+    private int
+    FindOverlappingPosition(int start, char[] tempIndex, int tempIndexCount)
+    {
+        int i;
+        short j;
+        short currentCount;
+
+        if (DEBUGOVERLAP && start < DEBUGSHOWOVERLAPLIMIT) {
+            printPlainArray(start, BLOCKCOUNT, null);
+            printPlainArray(0, tempIndexCount, tempIndex);
+        }
+        for (i = 0; i < tempIndexCount; i += BLOCKCOUNT) {
+            currentCount = (short)BLOCKCOUNT;
+            if (i + BLOCKCOUNT > tempIndexCount) {
+                currentCount = (short)(tempIndexCount - i);
+            }
+            for (j = 0; j < currentCount; ++j) {
+                if (values[start + j] != values[tempIndex[i + j]]) break;
+            }
+            if (j == currentCount) break;
+        }
+        if (DEBUGOVERLAP && start < DEBUGSHOWOVERLAPLIMIT) {
+            for (j = 1; j < i; ++j) {
+                System.out.print(" ");
+            }
+            printPlainArray(start, BLOCKCOUNT, null);
+            System.out.println("    Found At: " + i);
+        }
+        return i;
+    }
+
+    private static  final int DEBUGSHOWOVERLAPLIMIT = 100;
+    private static  final boolean DEBUGTRACE = false;
+    private static  final boolean DEBUGSMALL = false;
+    private static  final boolean DEBUGOVERLAP = false;
+    private static  final int DEBUGSMALLLIMIT = 30000;
+    private static  final int BLOCKSHIFT =7;
+    private static  final int BLOCKCOUNT =(1<<BLOCKSHIFT);
+    private static  final int INDEXSHIFT =(16-BLOCKSHIFT);
+    private static  final int INDEXCOUNT =(1<<INDEXSHIFT);
+    private static  final int BLOCKMASK = BLOCKCOUNT - 1;
+
+    private short values[];  // char -> short (char parameterized short)
+    private short indices[];
+    private boolean isCompact;
+};
--- a/tools/unicodetools/com/ibm/text/utility/Counter.java
+++ b/tools/unicodetools/com/ibm/text/utility/Counter.java
@ -0,0 +1,65 @@
+package com.ibm.text.utility;
+
+
+import java.io.IOException;
+//import com.ibm.text.unicode.UInfo;
+import java.util.*;
+import java.io.*;
+import java.text.*;
+
+public final class Counter {
+    Map map = new HashMap();
+
+    static public final class RWInteger implements Comparable {
+        static int uniqueCount;
+        public int value;
+        private int forceUnique = uniqueCount++;
+
+        // public RWInteger() {
+          //  forceUnique
+
+        public int compareTo(Object other) {
+            RWInteger that = (RWInteger) other;
+            if (that.value < value) return -1;
+            else if (that.value > value) return 1;
+            else if (that.forceUnique < forceUnique) return -1;
+            else if (that.forceUnique > forceUnique) return 1;
+            return 0;
+        }
+        public String toString() {
+            return String.valueOf(value);
+        }
+    }
+
+    public void add(String obj) {
+        RWInteger count = (RWInteger)map.get(obj);
+        if (count == null) {
+            count = new RWInteger();
+            map.put(obj, count);
+        }
+        count.value += obj.length();
+    }
+
+    public Map getSortedByCount() {
+        Map result = new TreeMap();
+        Iterator it = map.keySet().iterator();
+        while (it.hasNext()) {
+            Object key = it.next();
+            Object count = map.get(key);
+            result.put(count, key);
+        }
+        return result;
+    }
+
+    public Map getKeyToKey() {
+        Map result = new HashMap();
+        Iterator it = map.keySet().iterator();
+        while (it.hasNext()) {
+            Object key = it.next();
+            result.put(key, key);
+        }
+        return result;
+    }
+
+
+}
--- a/tools/unicodetools/com/ibm/text/utility/Differ.java
+++ b/tools/unicodetools/com/ibm/text/utility/Differ.java
@ -0,0 +1,164 @@
+package com.ibm.text.utility;
+
+/** Basic Diff program. Compares two sequences of objects fed into it, and
+ * lets you know where they are different. For a usage example, see DifferTest
+ * @author Mark Davis
+ * @version 1.0
+ */
+
+final public class Differ {
+    public static final String copyright =
+      "Copyright (C) 2000, International Business Machines Corporation and others. All Rights Reserved.";
+
+    /**
+     * @param stackSize The size of the largest difference you expect.
+     * @param matchCount The number of items that have to be the same to count as a match
+     */
+    public Differ(int stackSize, int matchCount) {
+        this.STACKSIZE = stackSize;
+        this.EQUALSIZE = matchCount;
+        a = new Object[stackSize+matchCount];
+        b = new Object[stackSize+matchCount];
+    }
+
+    public void add (Object aStr, Object bStr) {
+        addA(aStr);
+        addB(bStr);
+    }
+
+    public void addA (Object aStr) {
+        flush();
+        a[aCount++] = aStr;
+    }
+
+    public void addB (Object bStr) {
+        flush();
+        b[bCount++] = bStr;
+    }
+
+    public int getALine(int offset) {
+        return aLine + maxSame + offset;
+    }
+
+    public Object getA(int offset) {
+        if (offset < 0) return last;
+        if (offset > aTop-maxSame) return next;
+        return a[offset];
+    }
+
+    public int getACount() {
+        return aTop-maxSame;
+    }
+
+    public int getBCount() {
+        return bTop-maxSame;
+    }
+
+    public int getBLine(int offset) {
+        return bLine + maxSame + offset;
+    }
+
+    public Object getB(int offset) {
+        if (offset < 0) return last;
+        if (offset > bTop-maxSame) return next;
+        return b[offset];
+    }
+
+    public void checkMatch(boolean finalPass) {
+        // find the initial strings that are the same
+        int max = aCount;
+        if (max > bCount) max = bCount;
+        int i;
+        for (i = 0; i < max; ++i) {
+            if (!a[i].equals(b[i])) break;
+        }
+        // at this point, all items up to i are equal
+        maxSame = i;
+        aTop = bTop = maxSame;
+        if (maxSame > 0) last = a[maxSame-1];
+        next = "";
+
+        if (finalPass) {
+            aTop = aCount;
+            bTop = bCount;
+            next = "";
+            return;
+        }
+
+        if (aCount - maxSame < EQUALSIZE || bCount - maxSame < EQUALSIZE) return;
+
+        // now see if the last few a's occur anywhere in the b's, or vice versa
+        int match = find (a, aCount-EQUALSIZE, aCount, b, maxSame, bCount);
+        if (match != -1) {
+            aTop = aCount-EQUALSIZE;
+            bTop = match;
+            next = a[aTop];
+            return;
+        }
+        match = find (b, bCount-EQUALSIZE, bCount, a, maxSame, aCount);
+        if (match != -1) {
+            bTop = bCount-EQUALSIZE;
+            aTop = match;
+            next = b[bTop];
+            return;
+        }
+        if (aCount >= STACKSIZE || bCount >= STACKSIZE) {
+            // flush some of them
+            aCount = (aCount + maxSame) / 2;
+            bCount = (bCount + maxSame) / 2;
+            next = "";
+        }
+    }
+
+    /** Convenient utility
+     * finds a segment of the first array in the second array.
+     * @return -1 if not found, otherwise start position in b
+     */
+
+    public int find (Object[] a, int aStart, int aEnd, Object[] b, int bStart, int bEnd) {
+        int len = aEnd - aStart;
+        int bEndMinus = bEnd - len;
+        tryA:
+        for (int i = bStart; i <= bEndMinus; ++i) {
+            for (int j = 0; j < len; ++j) {
+                if (!b[i + j].equals(a[aStart + j])) continue tryA;
+            }
+            return i; // we have a match!
+        }
+        return -1;
+    }
+
+    // ====================== PRIVATES ======================
+
+    private void flush() {
+        if (aTop != 0) {
+            int newCount = aCount-aTop;
+            System.arraycopy(a, aTop, a, 0, newCount);
+            aCount = newCount;
+            aLine += aTop;
+            aTop = 0;
+        }
+
+        if (bTop != 0) {
+            int newCount = bCount-bTop;
+            System.arraycopy(b, bTop, b, 0, newCount);
+            bCount = newCount;
+            bLine += bTop;
+            bTop = 0;
+        }
+    }
+
+    private int STACKSIZE;
+    private int EQUALSIZE;
+
+    private Object [] a;
+    private Object [] b;
+    private Object last = "";
+    private Object next = "";
+    private int aCount = 0;
+    private int bCount = 0;
+    private int aLine = 1;
+    private int bLine = 1;
+    private int maxSame = 0, aTop = 0, bTop = 0;
+
+}
--- a/tools/unicodetools/com/ibm/text/utility/DifferTest.java
+++ b/tools/unicodetools/com/ibm/text/utility/DifferTest.java
@ -0,0 +1,37 @@
+package com.ibm.text.utility;
+
+
+public class DifferTest {
+    public static final String copyright =
+      "Copyright (C) 2000, IBM Corp. and others. All Rights Reserved.";
+
+    static final void main(String[] args) { // for testing
+
+        String[] as = {"a", "b", "20D4", "0344", "20D5", "20D6", "20D7", "20D8", "20D9"};
+        String[] bs = {"a", "b", "20D4", "20D5", "0344", "20D6", "20D7", "20D8", "20D9"};
+        Differ differ = new Differ(50,2);
+        int max = as.length;
+        if (max < bs.length) max = bs.length;
+        for (int j = 0; j <= max; ++j) {
+            if (j < as.length) differ.addA(as[j]);
+            if (j < bs.length) differ.addB(bs[j]);
+            differ.checkMatch(j == max);
+
+            if (differ.getACount() != 0 || differ.getBCount() != 0) {
+                if (differ.getACount() != 0) {
+                    for (int i = -1; i < differ.getACount()+1; ++i) {
+                        System.out.println("a: " + differ.getALine(i) + " " + differ.getA(i));
+                    }
+                }
+                if (differ.getBCount() != 0) {
+                    if (differ.getACount() != 0) System.out.println();
+                    for (int i = -1; i < differ.getBCount()+1; ++i) {
+                        System.out.println("b: " + differ.getBLine(i) + " " + differ.getB(i));
+                    }
+                }
+            }
+            System.out.println("----");
+            //differ.flush();
+        }
+    }
+}
--- a/tools/unicodetools/com/ibm/text/utility/DualWriter.java
+++ b/tools/unicodetools/com/ibm/text/utility/DualWriter.java
@ -0,0 +1,52 @@
+package com.ibm.text.utility;
+
+import java.awt.*;
+import java.io.*;
+
+final public class DualWriter extends Writer {
+    private static final String copyright = "(C) Copyright IBM Corp. 1998 - All Rights Reserved";
+    // Abstract class for writing to character streams.
+    // The only methods that a subclass must implement are
+    // write(char[], int, int), flush(), and close().
+
+    private boolean autoflush ;
+    private Writer a;
+    private Writer b;
+
+    public DualWriter (Writer a, Writer b) {
+        this.a = a;
+        this.b = b;
+    }
+
+    public DualWriter (Writer a, Writer b, boolean autoFlush) {
+        this.a = a;
+        this.b = b;
+        autoflush = autoFlush;
+    }
+
+    public void setAutoFlush(boolean value) {
+        autoflush = value;
+    }
+
+    public boolean getAutoFlush() {
+        return autoflush;
+    }
+
+    public void write(char cbuf[],
+                        int off,
+                        int len) throws IOException {
+        a.write(cbuf, off, len);
+        b.write(cbuf, off, len);
+        if (autoflush) flush();
+    }
+
+    public void close() throws IOException {
+        a.close();
+        b.close();
+    }
+
+    public void flush() throws IOException {
+        a.flush();
+        b.flush();
+    }
+}
--- a/tools/unicodetools/com/ibm/text/utility/EquivalenceClass.java
+++ b/tools/unicodetools/com/ibm/text/utility/EquivalenceClass.java
@ -0,0 +1,152 @@
+package com.ibm.text.utility;
+
+
+import java.io.*;
+import java.util.*;
+
+public class EquivalenceClass {
+    static final boolean DEBUG = false;
+    /**
+     * Takes a many:many relation between source and value.
+     * Produces equivalence class.
+     * Two sources are in the same equivalence class any time they share the same value.
+     */
+    // associated with each value, we keep a set of sources.
+    // whenever we add a <source, value> pair, we see if any sets collide.
+    // associated with each set of sources, we keep a representative Whenever we add to the set, if we
+    //
+    Map sourceToEquiv = new HashMap();
+    Map valueToRepresentativeSource = new HashMap();
+    Map forcedMerge = new HashMap();
+    /**
+     * @return true if made a difference
+     */
+
+    String itemSeparator;
+    int places;
+    boolean hex;
+
+    public EquivalenceClass() {
+        this(",", 4, true);
+    }
+
+    public EquivalenceClass(String itemSeparator, int places, boolean hex) {
+        this.itemSeparator = itemSeparator;
+        this.places = places;
+        this.hex = hex;
+    }
+
+    public boolean add(Object source, Object value) {
+        boolean result = false;
+        Object repSource = valueToRepresentativeSource.get(value);
+        Set equivSet = (Set)sourceToEquiv.get(source);
+        Set fm = (Set)forcedMerge.get(source);
+        if (fm == null) {
+            fm = new TreeSet();
+            forcedMerge.put(source, fm);
+        }
+
+        if (DEBUG) System.out.println("+Source " + source
+            + ", value: " + value);
+        if (repSource == null && equivSet == null) {
+            equivSet = new HashSet();
+            equivSet.add(source);
+            sourceToEquiv.put(source, equivSet);
+            valueToRepresentativeSource.put(value, source);
+            repSource = source; // for debugging
+        } else if (equivSet == null) {
+            equivSet = (Set) sourceToEquiv.get(repSource);
+            equivSet.add(source);
+            sourceToEquiv.put(source, equivSet);
+            result = true;
+        } else if (repSource == null) {
+            valueToRepresentativeSource.put(value, source);
+            repSource = source; // for debugging;
+        } else { // both non-null
+            Set repEquiv = (Set) sourceToEquiv.get(repSource);
+            if (!repEquiv.equals(equivSet)) {
+
+                result = true;
+                if (DEBUG) System.out.println("Merging (" + repSource + ") " + toString(repEquiv)
+                    + " + (" + source + ") " + toString(equivSet));
+                // merge!!
+                // put all items from equivSet into repEquiv
+                repEquiv.addAll(equivSet);
+
+                // now add the values to the forced sets
+                Iterator it = repEquiv.iterator();
+                while (it.hasNext()) {
+                    Object n = it.next();
+                    fm = (Set)forcedMerge.get(n);
+                    fm.add(value);
+                }
+
+                // then replace all instances for equivSet by repEquiv
+                // we have to do this in two steps, since iterators are invalidated by changes
+                Set toReplace = new HashSet();
+                it = sourceToEquiv.keySet().iterator();
+                while (it.hasNext()) {
+                    Object otherSource = it.next();
+                    Set otherSet = (Set) sourceToEquiv.get(otherSource);
+                    if (otherSet == equivSet) {
+                        toReplace.add(otherSource);
+                    }
+                }
+                it = toReplace.iterator();
+                while (it.hasNext()) {
+                    Object otherSource = it.next();
+                    sourceToEquiv.put(otherSource,repEquiv);
+                }
+                equivSet = repEquiv; // for debugging
+            }
+        }
+        if (DEBUG) System.out.println("--- repSource: " + repSource
+            + ", equivSet: " + equivSet);
+        return result;
+    }
+
+    public String toString () {
+        StringBuffer result = new StringBuffer();
+        // make a set to skip duplicates
+        Iterator it = new HashSet(sourceToEquiv.values()).iterator();
+        while (it.hasNext()) {
+            toString((Set)it.next(), result, forcedMerge);
+        }
+        return result.toString();
+    }
+
+    private String toString(Object s) {
+        if (s == null) return "null";
+        if (s instanceof Collection) {
+            StringBuffer sb = new StringBuffer();
+            toString((Collection)s, sb, null);
+            return sb.toString();
+        }
+        if (hex && s instanceof Number) {
+            return Utility.hex(s, places);
+        }
+        return s.toString();
+    }
+
+    private void toString(Collection s, StringBuffer sb, Map valueToRep) {
+        if (sb.length() != 0) sb.append(itemSeparator);
+        if (s == null) {
+            sb.append("{}");
+            return;
+        }
+        sb.append('{');
+        Iterator it = s.iterator();
+        boolean notFirst = false;
+        while (it.hasNext()) {
+            if (notFirst) sb.append(", ");
+            notFirst = true;
+            Object n = it.next();
+            sb.append(toString(n));
+            /*if (valueToRep != null) {
+                sb.append("(" + toString(valueToRep.get(n)) + ")");
+            }*/
+        }
+        sb.append('}');
+    }
+
+}
--- a/tools/unicodetools/com/ibm/text/utility/IndentWriter.java
+++ b/tools/unicodetools/com/ibm/text/utility/IndentWriter.java
@ -0,0 +1,113 @@
+package com.ibm.text.utility;
+
+import java.io.*;
+
+public class IndentWriter extends Writer {
+    public IndentWriter(Writer writer) {
+        this.writer = writer;
+        this.width = 30000;
+        this.separator = " ";
+    }
+    public IndentWriter(OutputStream writer, String encoding)
+      throws UnsupportedEncodingException{
+        this.writer = new OutputStreamWriter(writer, encoding);
+        this.width = 30000;
+        this.separator = " ";
+    }
+    public void setSeparator(String separator) {
+        this.separator = separator;
+    }
+    public String getSeparator() {
+        return separator;
+    }
+    public void setWidth(int width) {
+        this.width = width;
+    }
+    public int getWidth() {
+        return width;
+    }
+    public void indentBy(int indentDelta) throws IOException {
+        this.indent += indentDelta;
+        flush();
+    }
+    public void setIndent(int indent) {
+        this.indent = indent;
+    }
+    public int getIndent() {
+        return indent;
+    }
+    /*
+    public void write(String cbuf, int off, int len) throws IOException {
+        if (buffer.length() + len > width) {
+            flushLine();
+            buffer.append("                                       ".substring(0,indent));
+            buffer.append("(" + indent + ") ");
+        } else {
+            buffer.append(separator);
+        }
+	    buffer.append(cbuf, off, len);
+    }
+    public void write(String string) throws IOException {
+        write(string,0,string.length());
+    }
+    */
+    public void write(int indent, String string) throws IOException {
+        setIndent(indent);
+        write(string,0,string.length());
+    }
+    public void writeln(int indent, String string) throws IOException {
+        write(indent, string);
+        flushLine();
+    }
+    public void writeln(String string) throws IOException {
+        write(string);
+        flushLine();
+    }
+    public void writeln() throws IOException {
+        flushLine();
+    }
+
+    public void write(char cbuf[], int off, int len) throws IOException {
+        if (buffer.length() == 0) {
+            bufferIndent = indent;
+        } else if (bufferIndent + buffer.length() + separator.length() + len > width) {
+            flushLine();
+        } else {
+            buffer.append(separator);
+        }
+	    buffer.append(cbuf, off, len);
+    }
+
+    public void flushLine() throws IOException {
+        if (buffer.length() != 0) { // indent
+            writer.write("                                       ",0,bufferIndent);
+            writer.write(buffer.toString());
+            writer.write(EOL);
+            buffer.setLength(0);
+        }
+    }
+
+    public void flush() throws IOException {
+        flushLine();
+        writer.flush();
+    }
+
+    public void close() throws IOException {
+        flush();
+        writer.close();
+    }
+    private Writer writer;
+    private StringBuffer buffer = new StringBuffer(200);
+    private int width;
+    private int indent;
+    private int bufferIndent;
+    private String separator;
+    private static String EOL;
+    static { // gets platform-specific eol
+        StringWriter foo = new StringWriter();
+        PrintWriter fii = new PrintWriter(foo);
+        fii.println();
+        fii.flush();
+        EOL = foo.toString();
+    }
+}
--- a/tools/unicodetools/com/ibm/text/utility/IntStack.java
+++ b/tools/unicodetools/com/ibm/text/utility/IntStack.java
@ -0,0 +1,41 @@
+package com.ibm.text.utility;
+
+// =============================================================
+// Simple stack mechanism, with push, pop and access
+// =============================================================
+
+public final class IntStack {
+    private int[] values;
+    private int top = 0;
+
+    public IntStack(int initialSize) {
+        values = new int[initialSize];
+    }
+
+    public void push(int value) {
+        if (top >= values.length) { // must grow?
+            int[] temp = new int[values.length*2];
+            System.arraycopy(values,0,temp,0,values.length);
+            values = temp;
+        }
+        values[top++] = value;
+    }
+
+    public int pop() {
+        if (top > 0) return values[--top];
+        throw new IllegalArgumentException("Stack underflow");
+    }
+
+    public int get(int index) {
+        if (0 <= index && index < top) return values[index];
+        throw new IllegalArgumentException("Stack index out of bounds");
+    }
+
+    public int getTop() {
+        return top;
+    }
+
+    public boolean isEmpty() {
+        return top == 0;
+    }
+}
--- a/tools/unicodetools/com/ibm/text/utility/LengthFirstComparator.java
+++ b/tools/unicodetools/com/ibm/text/utility/LengthFirstComparator.java
@ -0,0 +1,13 @@
+package com.ibm.text.utility;
+
+import java.util.*;
+
+public final class LengthFirstComparator implements Comparator {
+	public int compare(Object a, Object b) {
+		String as = (String) a;
+		String bs = (String) b;
+		if (as.length() < bs.length()) return -1;
+		if (as.length() > bs.length()) return 1;
+		return as.compareTo(bs);
+	}
+}
--- a/tools/unicodetools/com/ibm/text/utility/Pair.java
+++ b/tools/unicodetools/com/ibm/text/utility/Pair.java
@ -0,0 +1,31 @@
+package com.ibm.text.utility;
+
+public final class Pair implements java.lang.Comparable {
+
+  public Comparable first, second;
+
+  public Pair (Comparable first, Comparable second) {
+    this.first = first;
+    this.second = second;
+  }
+
+  public int hashCode() {
+    return first.hashCode() * 37 + second.hashCode();
+  }
+
+  public boolean equals(Object other) {
+    try {
+      Pair that = (Pair)other;
+      return first.equals(that.first) && second.equals(that.second);
+    } catch (Exception e) {
+      return false;
+    }
+  }
+
+    public int compareTo(Object other) {
+        Pair that = (Pair)other;
+        int trial = first.compareTo(that.first);
+        if (trial != 0) return trial;
+        return second.compareTo(that.second);
+    }
+}
--- a/tools/unicodetools/com/ibm/text/utility/UTF16Plus.java
+++ b/tools/unicodetools/com/ibm/text/utility/UTF16Plus.java
@ -0,0 +1,8 @@
+package com.ibm.text.utility;
+
+public class UTF16Plus {
+    public static int charAt(StringBuffer source, int offset16) {
+        return UTF32.char32At(source, offset16);
+    }
+}
+
--- a/tools/unicodetools/com/ibm/text/utility/UTF32.java
+++ b/tools/unicodetools/com/ibm/text/utility/UTF32.java
@ -0,0 +1,718 @@
+package com.ibm.text.utility;
+
+/**
+* Utility class for demonstrating UTF16 character conversions and indexing conversions.
+* Ideally, these methods would be on existing classes in Java, but they can also be used
+* in a stand-alone utility class like this one.
+* <p>Code that uses strings alone rarely need modification. 
+* By design, UTF-16 does not allow overlap, so searching for strings is a safe operation.
+* Similarly, concatenation is always safe. Substringing is safe if the start and end are both
+* on UTF32 boundaries. In normal code, the values for start and end are on those boundaries,
+* since they arose from operations like searching.
+* If not, the nearest UTF-32 boundaries can be determined using <code>bounds32()</code>.
+* <p>Here is a summary of the methods:
+* <ul><li>
+* <code>char32At()</code>, <code>count32()</code>, and <code>append32()</code>
+* are most important methods for most programs.
+* They are used for iteration, filtering and copying. See the examples below.
+* </li><li>
+* <code>bounds32()</code> is useful for finding the nearest UTF-32 boundaries.
+* However, in most circumstances it is better to use 
+* <a <a href="http://java.sun.com/products/jdk/1.2/docs/api/java/text/BreakIterator.html#getCharacterInstance(java.util.Locale)">
+* BreakIterator.getCharacterInstance(Locale)</a> to find character boundaries
+* that are closer to end-user expectations.
+* </li><li>
+* <code>valueOf32()</code> is occasionally convenient for producing a string containing a UTF-32 value. 
+* </li><li>
+* <code>findOffset16()</code> and <code>findOffset32()</code> are generally not needed, 
+* except when interfacing to specifications that use UTF-32 indices (such as XSL).
+* </li><li>
+* <code>isLegal()</code> can be used to test whether UTF-16 or UTF-32 values are valid.
+* </li><li>
+* <code>isLeadSurrogate()</code>, <code>isSurrogate()</code>, and <code>isTrailSurrogate()</code>
+* test the type of a char. They are useful for lower-level code.
+* </li><li>
+* <code>getChar32()</code>, <code>getLead()</code>, and <code>getTrail()</code> 
+* are sometimes useful for putting together and taking apart UTF-32 values.
+* </li></ul>
+* <strong>Examples:</strong>
+* <p>The following examples illustrate use of some of these methods. 
+<pre>
+// iteration forwards: Original
+for (int i = 0; i < s.length(); ++i) {
+    char ch = s.charAt(i);
+    doSomethingWith(ch);
+}
+
+// iteration forwards: Changes for UTF-32
+int ch;
+for (int i = 0; i < s.length(); i+=UTF32.count16(ch)) {
+    ch = UTF32.char32At(s,i);
+    doSomethingWith(ch);
+}
+
+// iteration backwards: Original
+for (int i = s.length()-1; i >= 0; --i) {
+    char ch = s.charAt(i);
+    doSomethingWith(ch);
+}
+
+// iteration backwards: Changes for UTF-32
+int ch;
+for (int i = s.length()-1; i > 0; i-=UTF32.count16(ch)) {
+    ch = UTF32.char32At(s,i);
+    doSomethingWith(ch);
+}
+
+* </pre>
+* <strong>Notes:</strong>
+* <ul><li>
+* <strong>Naming:</strong> For clarity, High and Low surrogates are called <code>Lead</code> and <code>Trail</code> in the API,
+* which gives a better sense of their ordering in a string. <code>offset16</code> and <code>offset32</code> are used to distinguish
+* offsets to UTF-16 boundaries vs offsets to UTF-32 boundaries. 
+* <code>int char32</code> is used to contain UTF-32 characters, as opposed to <code>char</code>, which is a UTF-16 code unit.
+* </li><li>
+* <strong>Roundtripping Offsets:</strong> You can always roundtrip
+* from a UTF-32 offset to a UTF-16 offset and back.
+* Because of the difference in structure, you can roundtrip
+* from a UTF-16 offset to a UTF-32 offset and back if and only if <code>bounds(string, offset16) != TRAIL</code>.
+* </li><li>
+* <strong>Exceptions:</strong> The error checking will throw an exception if indices are out of bounds.
+* Other than than that, all methods will behave reasonably, 
+* even if unmatched surrogates or out-of-bounds UTF-32 values are present.
+* <code>isLegal()</code> can be used to check for validity if desired.
+* </li><li>
+* <strong>Unmatched Surrogates:</strong> If the string contains unmatched surrogates, then these are
+* counted as one UTF-32 value. This matches their iteration behavior, which is vital.
+* It also matches common display practice as
+* missing glyphs (see the Unicode Standard Section 5.4, 5.5).
+* </li><li>
+* <strong>Out-of-bounds UTF-32 values:</strong> If a <code>char32</code> contains an out-of-bounds UTF-32 value, 
+* then it is treated as REPLACEMENT_CHAR for consistency across the API.
+* </li><li>
+* <strong>Optimization:</strong> The method implementations may need optimization if the compiler doesn't fold static final methods.
+* Since surrogate pairs will form an exceeding small percentage of all the text in the world,
+* the singleton case should always be optimized for.
+* </li></ul>
+* @author Mark Davis, with help from Markus Scherer
+*/
+public final class UTF32 {
+    
+    // =========================================================
+    // UTILITIES
+    // =========================================================
+    
+    /**
+     * Unicode value used when translating into Unicode encoding form
+     * and there is no existing character.
+     */
+	public static final char REPLACEMENT_CHAR = '\uFFFD';
+	    
+    /**
+     * Value returned in <code><a href="#bounds32(java.lang.String, int)">bounds32()</a></code>.
+     */
+    public static final int SINGLE = 1, LEAD = 2, TRAIL = 5;
+
+    /**
+    * Determines how many chars this char32 requires.
+    * If a validity check is required, use <code><a href="#isLegal(char)">isLegal()</a></code>
+    * on char32 before calling.
+     * <p><i>If this were integrated into the Java API, it could be a static method of either Character or String.</i>
+    * @return 2 if is in surrogate space, otherwise 1. 
+    * @param ch the input character.
+    */
+    public static int count16(int char32) {
+        if (char32 < MIN_SUPPLEMENTARY) return 1;
+        return 2;
+    }
+    
+   /**
+     * Extract a single UTF-32 value from a string.
+     * Used when iterating forwards or backwards (with <code>count16()</code>, as well as random access.
+     * If a validity check is required, use <code><a href="#isLegal(char)">isLegal()</a></code> on the return value.
+     * <p><i>If this were integrated into the Java API, it could be a method of String, StringBuffer and possibly CharacterIterator.</i>
+     * @return UTF-32 value for the UTF-32 value that contains the char at offset16.
+     * The boundaries of that codepoint are the same as in <code>bounds32()</code>. 
+     * @param source array of UTF-16 chars
+     * @param offset16 UTF-16 offset to the start of the character.
+     */
+    public static int char32At(String source, int offset16) {
+        char single = source.charAt(offset16);
+        if (!isSurrogate(single)) return single;
+
+        try { // use exception to catch out-of-bounds
+        
+            // Convert the UTF-16 surrogate pair if necessary.
+            // For simplicity in usage, and because the frequency of pairs is low,
+            // look both directions.
+            
+	        if (isLeadSurrogate(single)) {
+	            char trail = source.charAt(++offset16);
+	            if (isTrailSurrogate(trail)) {
+	                return ((int)single << SURROGATE_SHIFT) + trail + SURROGATE_OFFSET;
+	            }
+            } else { // isTrailSurrogate(single), so
+	            char lead = source.charAt(--offset16);
+	            if (isLeadSurrogate(lead)) {
+	                return ((int)lead << SURROGATE_SHIFT) + single + SURROGATE_OFFSET;
+	            }
+            }
+        } catch (StringIndexOutOfBoundsException e) {}
+        return single; // return unmatched surrogate
+    }
+
+    public static int char32At(StringBuffer source, int offset16) {
+        char single = source.charAt(offset16);
+        if (!isSurrogate(single)) return single;
+
+        try { // use exception to catch out-of-bounds
+        
+            // Convert the UTF-16 surrogate pair if necessary.
+            // For simplicity in usage, and because the frequency of pairs is low,
+            // look both directions.
+            
+	        if (isLeadSurrogate(single)) {
+	            char trail = source.charAt(++offset16);
+	            if (isTrailSurrogate(trail)) {
+	                return ((int)single << SURROGATE_SHIFT) + trail + SURROGATE_OFFSET;
+	            }
+            } else { // isTrailSurrogate(single), so
+	            char lead = source.charAt(--offset16);
+	            if (isLeadSurrogate(lead)) {
+	                return ((int)lead << SURROGATE_SHIFT) + single + SURROGATE_OFFSET;
+	            }
+            }
+        } catch (StringIndexOutOfBoundsException e) {}
+        return single; // return unmatched surrogate
+    }
+    
+    public static int char32At(char[] source, int start16, int end16, int offset16) {
+        if (offset16 < start16 || offset16 >= end16) {
+            throw new ArrayIndexOutOfBoundsException(offset16);
+        }
+        
+        char single = source[offset16];
+        if (!isSurrogate(single)) return single;
+
+        try { // use exception to catch out-of-bounds
+        
+            // Convert the UTF-16 surrogate pair if necessary.
+            // For simplicity in usage, and because the frequency of pairs is low,
+            // look both directions.
+            
+	        if (isLeadSurrogate(single)) {
+	            ++offset16;
+	            if (offset16 >= end16) return single;
+	            char trail = source[offset16];
+	            if (isTrailSurrogate(trail)) {
+	                return ((int)single << SURROGATE_SHIFT) + trail + SURROGATE_OFFSET;
+	            }
+            } else { // isTrailSurrogate(single), so
+	            char lead = source[--offset16];
+	            if (isLeadSurrogate(lead)) {
+	                return ((int)lead << SURROGATE_SHIFT) + single + SURROGATE_OFFSET;
+	            }
+            }
+        } catch (ArrayIndexOutOfBoundsException e) {}
+        return single; // return unmatched surrogate
+    }
+    
+    
+    // moral equivalent of valueOf32(charAt32(x)), but no memory alloc
+    public static String getCodePointSubstring(String s, int offset16) {
+        switch(bounds32(s,offset16)) {
+          default: return s.substring(offset16,offset16+1);
+          case LEAD: return s.substring(offset16,offset16+2);
+          case TRAIL: return s.substring(offset16-1,offset16+1);
+        }
+    }
+
+    // moral equivalent of valueOf32(charAt32(x)), but no memory alloc
+    public static String getCodePointSubstring(StringBuffer s, int offset16) {
+        switch(bounds32(s,offset16)) {
+          default: return s.substring(offset16,offset16+1);
+          case LEAD: return s.substring(offset16,offset16+2);
+          case TRAIL: return s.substring(offset16-1,offset16+1);
+        }
+    }
+
+    public static int append32(char[] output, int oPosition, int oEnd, int cp) {
+        if (oPosition >= oEnd) throw new ArrayIndexOutOfBoundsException(oPosition);
+        output[oPosition++] = UTF32.getLead(cp);
+        if (UTF32.count16(cp) != 1) {
+            if (oPosition >= oEnd) throw new ArrayIndexOutOfBoundsException(oPosition);
+            output[oPosition++] = UTF32.getTrail(cp);
+        }
+        return oPosition;
+    }
+
+    public static void setChar32At(StringBuffer b, int position, int codePoint) {
+        int type = bounds32(b, position);
+        // handle simple cases: #chars at position match #chars in codePoint
+        int end = position;
+        switch (type) {
+          case SINGLE:
+            if (isSupplementary(codePoint)) break;
+            b.setCharAt(position, (char)codePoint);
+            return;
+          case LEAD:
+            if (!isSupplementary(codePoint)) {
+                ++end;
+                break;
+            }
+            b.setCharAt(position++, (char)getLead(codePoint));
+            b.setCharAt(position, (char)getTrail(codePoint));
+            return;
+          case TRAIL:
+            if (!isSupplementary(codePoint)) {
+                --position;
+                break;
+            }
+            b.setCharAt(position++, (char)getLead(codePoint));
+            b.setCharAt(position, (char)getTrail(codePoint));
+            return;
+        }
+        // mismatch, just use long form
+        b.replace(position, end+1, valueOf32(codePoint));
+    }
+        
+    /**
+     * See if a char value is legal. It can't be:
+     * <ul><li>Not-a-character (either \\uFFFF or\\uFFFE).
+     * The datatype char itself prevents out of bounds errors.
+     * </li></ul>
+     * Note: legal does not mean that it is assigned in this version of Unicode. 
+     * <p><i>If this were integrated into the Java API, it could be a static method of String or Character.</i>
+     * @param UTF-32 value to test
+     * @return true iff legal. 
+     */
+    public static boolean isLegal(char char16) {
+        return (char16 < 0xFFFE);
+    }
+
+    /**
+     * See if a UTF32 value is legal. It can't be:
+     * <ul>
+     * <li>Out of bounds (less than 0 or greater than MAX_UNICODE)</li>
+     * <li>A surrogate value (00D800 to 00DCFF)</li>
+     * <li>Not-a-character (of the form xxFFFF or xxFFFE)</li>
+     * </ul>
+     * Note: legal does not mean that it is assigned in this version of Unicode.
+     * <p><i>If this were integrated into the Java API, it could be a static method of String or Character.</i>
+     * @param char32 UTF-32 value to test
+     * @return true iff legal. 
+     */
+    public static boolean isLegal(int char32) {
+        if (char32 < 0) return false;
+        //if (char32 < SURROGATE_BASE) return true;
+        //if (char32 < SURROGATE_LIMIT) return false;
+        if ((char32 & PLANE_MASK) >= NON_CHARACTER_BASE) return false;
+        return (char32 <= MAX_UNICODE);
+    }
+
+   /**
+    * Determines whether the code unit OR code point is a surrogate.
+     * <p><i>If this were integrated into the Java API, it could be a static method of String or Character.</i>
+    * @return true iff the input character is a surrogate.
+    * @param ch the input character.
+    */
+    public static boolean isSurrogate(int char32) {
+        return (SURROGATE_BASE <= char32 && char32 < SURROGATE_LIMIT);
+    }
+    
+   /**
+    * Determines whether the code point is a supplementary.
+     * <p><i>If this were integrated into the Java API, it could be a static method of String or Character.</i>
+    * @return true iff the input character is a surrogate.
+    * @param ch the input character.
+    */
+    public static boolean isSupplementary(int char32) {
+        return (char32 >= MIN_SUPPLEMENTARY && char32 <= MAX_UNICODE);
+    }
+    
+   /**
+    * Determines whether the code point is a supplementary.
+     * <p><i>If this were integrated into the Java API, it could be a static method of String or Character.</i>
+    * @return true iff the input character is a surrogate.
+    * @param ch the input character.
+    */
+    public static boolean isBasic(int char32) {
+        return (char32 >= 0 && char32 < MIN_SUPPLEMENTARY);
+    }
+    
+   /**
+    * Determines whether the character is a trail surrogate.
+     * <p><i>If this were integrated into the Java API, it could be a static method of String or Character.</i>
+    * @return true iff the input character is a trail surrogate.
+    * @param ch the input character.
+    */
+    public static boolean isTrailSurrogate(char ch) {
+        return (TRAIL_BASE <= ch && ch < TRAIL_LIMIT);
+    }
+    
+   /**
+    * Determines whether the character is a lead surrogate.
+     * <p><i>If this were integrated into the Java API, it could be a static method of String or Character.</i>
+    * @return true iff the input character is a lead surrogate.
+    * @param ch the input character.
+    */
+    public static boolean isLeadSurrogate(char ch) {
+        return (LEAD_BASE <= ch && ch < LEAD_LIMIT);
+    }
+        
+   /**
+    * Returns the lead surrogate.
+    * If a validity check is required, use <code><a href="#isLegal(char)">isLegal()</a></code> on char32 before calling.
+     * <p><i>If this were integrated into the Java API, it could be a static method of String or Character.</i>
+    * @return lead surrogate if the count16(ch) is 2;
+    * <br>otherwise the character itself
+    * @param char32 the input character.
+    */
+    public static char getLead(int char32) {
+        if (char32 >= MIN_SUPPLEMENTARY) {
+            return (char)(LEAD_BASE_OFFSET + (char32 >> SURROGATE_SHIFT));
+        }
+        return (char)char32;
+    }
+    
+   /**
+    * Returns the trail surrogate.
+    * If a validity check is required, use <code><a href="#isLegal(char)">isLegal()</a></code> on char32 before calling.
+     * <p><i>If this were integrated into the Java API, it could be a static method of String or Character.</i>
+    * @return the trail surrogate if the count16(ch) is 2;
+    * <br>and 0 otherwise (note: 0 is not a valid lead surrogate).
+    * @param char32 the input character.
+    */
+    public static char getTrail(int char32) {
+        if (char32 >= MIN_SUPPLEMENTARY) {
+            return (char)(TRAIL_BASE + (char32 & TRAIL_MASK));       
+        }
+        return '\u0000';
+    }
+    
+   /**
+    * Convenience method corresponding to String.valueOf(char). It returns a one or two char string containing
+    * the UTF-32 value. If the input value can't be converted, it substitutes REPLACEMENT_CHAR.
+    * If a validity check is required, use <code><a href="#isLegal(char)">isLegal()</a></code> before calling.
+     * <p><i>If this were integrated into the Java API, it could be a static method of String.</i>
+    * @return string value of char32
+    * @param ch the input character.
+    */
+    public static String valueOf32(int char32) {
+        if (char32 < 0 || MAX_UNICODE < char32) return String.valueOf(REPLACEMENT_CHAR);
+        if (char32 < MIN_SUPPLEMENTARY) return String.valueOf((char)char32);
+        synchronized (buf2) {   // saves allocations
+            buf2[0] = (char)(LEAD_BASE_OFFSET + (char32 >> SURROGATE_SHIFT));
+            buf2[1] = (char)(TRAIL_BASE + (char32 & TRAIL_MASK));
+            return String.valueOf(buf2);
+        }
+    }
+    private static char[] buf2 = new char[2]; // used to avoid allocations
+    
+   /**
+    * Returns the UTF-32 character corresponding to the two chars.
+    * If a validity check is required, check the arguments with 
+    * <code>isLeadSurrogate()</code> and <code>isTrailSurrogate()</code>, respectively before calling.
+     * <p><i>If this were integrated into the Java API, it could be a static method of String or Character.</i>
+    * @return the UTF-32 character, or REPLACEMENT_CHAR if invalid.
+    * @param lead the lead char
+    * @param lead the trail char
+    */
+    public static int getChar32(char lead, char trail) {
+        if (isLeadSurrogate(lead) && isTrailSurrogate(trail)) {
+            return (lead <<= SURROGATE_SHIFT) + trail + SURROGATE_OFFSET;
+        }
+        return REPLACEMENT_CHAR;
+    }
+        
+    /**
+    * Returns the type of the UTF32 boundaries around the char at offset16.
+    * Used for random access.
+     * <p><i>If this were integrated into the Java API, it could be a method of String, StringBuffer and possibly CharacterIterator.</i>
+    * @return SINGLE, FIRST, or SECOND:
+    * <ul><li>
+    * SINGLE: a single char; the bounds are [offset16, offset16+1]
+    * </li><li>
+    * LEAD: a surrogate pair starting at offset16; the bounds are [offset16, offset16+2]
+    * </li><li>
+    * TRAIL: a surrogate pair starting at offset16-1; the bounds are [offset16-1, offset16+1]
+    * </ul>
+    * For bit-twiddlers, the return values for these are chosen so that the boundaries can be gotten by:
+    * [offset16 - (value>>2), offset16 + (value&3)].
+    * @param source text to analyse
+    * @param offset16 UTF-16 offset
+    * @exception StringIndexOutOfBoundsException if offset16 is out of bounds.
+    */
+    public static int bounds32(String source, int offset16) {
+        char ch = source.charAt(offset16);
+        if (isSurrogate(ch)) {
+            if (isLeadSurrogate(ch)) {
+                if (++offset16 < source.length()
+                  && isTrailSurrogate(source.charAt(offset16))) return LEAD;
+            } else { // isTrailSurrogate(ch), so
+                if (--offset16 >= 0
+                  && isLeadSurrogate(source.charAt(offset16))) return TRAIL;
+            }
+        }
+        return SINGLE;
+    }
+
+    public static int bounds32(StringBuffer source, int offset16) {
+        char ch = source.charAt(offset16);
+        if (isSurrogate(ch)) {
+            if (isLeadSurrogate(ch)) {
+                if (++offset16 < source.length()
+                  && isTrailSurrogate(source.charAt(offset16))) return LEAD;
+            } else { // isTrailSurrogate(ch), so
+                if (--offset16 >= 0
+                  && isLeadSurrogate(source.charAt(offset16))) return TRAIL;
+            }
+        }
+        return SINGLE;
+    }
+    
+    // should be renamed bounds
+
+    public static int bounds32(char[] source, int oStart, int oEnd, int offset16) {
+        if (offset16 < oStart || offset16 >= oEnd) {
+            throw new ArrayIndexOutOfBoundsException(offset16);
+        }
+        char ch = source[offset16];
+        if (isSurrogate(ch)) {
+            if (isLeadSurrogate(ch)) {
+                if (++offset16 < oEnd
+                  && isTrailSurrogate(source[offset16])) return LEAD;
+            } else { // isTrailSurrogate(ch), so
+                if (--offset16 >= oStart
+                  && isLeadSurrogate(source[offset16])) return TRAIL;
+            }
+        }
+        return SINGLE;
+    }
+
+
+
+    /**
+    * Returns the UTF-16 offset that corresponds to a UTF-32 offset. 
+    * Used for random access. See the <a name="_top_">class description</a>
+    * for notes on roundtripping.
+     * <p><i>If this were integrated into the Java API, it could be a method of String, StringBuffer and possibly CharacterIterator.</i>
+    * @return UTF-16 offset
+    * @param offset32 UTF-32 offset
+    * @param source text to analyse
+    * @exception StringIndexOutOfBoundsException if offset32 is out of bounds.
+    */
+    public static int findOffset16(String source, int offset32) {
+        int remaining = offset32; // for decrementing
+        boolean hadLeadSurrogate = false;
+        int i;
+        
+        for (i = 0; remaining > 0 && i < source.length(); ++i) {
+            char ch = source.charAt(i);
+            if (hadLeadSurrogate && isTrailSurrogate(ch)) {
+                hadLeadSurrogate = false;           // count valid trail as zero
+            } else {
+                hadLeadSurrogate = isLeadSurrogate(ch);
+                --remaining;                        // count others as 1
+            }
+        }
+        
+        // if we didn't use up all of remaining (or if we started < 0)
+        // then it is beyond the bounds
+        
+        if (remaining != 0) throw new StringIndexOutOfBoundsException(offset32);
+        
+        // special check for last surrogate if needed, for consistency with
+        // other situations
+        
+        if (hadLeadSurrogate && i < source.length() && isTrailSurrogate(source.charAt(i))) {
+            ++i;                                // grab extra unicode
+        }
+        return i;
+    }
+
+    /**
+    * Returns the UTF-32 offset corresponding to the first UTF-32 boundary at or after the given UTF-16 offset.
+    * Used for random access. See the <a name="_top_">class description</a>
+    * for notes on roundtripping.
+    * <i>Note: If the UTF-16 offset is into the middle of a surrogate pair, then
+    * the UTF-32 offset of the <strong>end</strong> of the pair is returned.</i>
+    * <p>To find the UTF-32 length of a string, use:
+    * <pre>
+    * len32 = getOffset32(source, source.length());
+    * </pre>
+     * <p><i>If this were integrated into the Java API, it could be a methods of String, StringBuffer and possibly CharacterIterator.</i>
+    * @return UTF-32 offset
+    * @param source text to analyse
+    * @param offset16 UTF-16 offset
+    * @exception StringIndexOutOfBoundsException if offset16 is out of bounds.
+    */
+    public static int findOffset32(String source, int offset16) {
+        int result = 0;
+        boolean hadLeadSurrogate = false;
+        for (int i = 0; i < offset16; ++i) {
+            char ch = source.charAt(i);
+            if (hadLeadSurrogate && isTrailSurrogate(ch)) {
+                hadLeadSurrogate = false;           // count valid trail as zero
+            } else {
+                hadLeadSurrogate = isLeadSurrogate(ch);
+                ++result;                           // count others as 1
+            }
+        }
+        return result;
+    }
+
+    public static int length32(String source) {
+        return findOffset32(source, source.length());
+    }
+
+    /**
+     * Append a single UTF-32 value to the end of a StringBuffer.
+    * If a validity check is required, use <code><a href="#isLegal(char)">isLegal()</a></code> on char32 before calling.
+     * <p><i>If this were integrated into the Java API, it could be a method of StringBuffer.</i>
+     * @param char32 value to append. If out of bounds, substitutes REPLACEMENT_CHAR.
+     * @param target string to add to
+     */
+    public static void append32(StringBuffer target, int char32) {
+        
+        // Check for irregular values
+            
+        if (char32 < 0 || char32 > MAX_UNICODE) char32 = REPLACEMENT_CHAR;
+        
+        // Write the UTF-16 values
+        
+	    if (char32 >= MIN_SUPPLEMENTARY) {
+	        target.append((char)(LEAD_BASE_OFFSET + (char32 >> SURROGATE_SHIFT)));
+	        target.append((char)(TRAIL_BASE + (char32 & TRAIL_MASK)));
+	    } else {
+	        target.append((char)char32);
+	    }
+    }
+    
+    /**
+     * Compare strings using Unicode code point order, instead of UTF-16 code unit order.
+     */
+    public static final class StringComparator implements java.util.Comparator {
+        /**
+         * Standard String compare. Only one small section is different, marked in the code.
+         */
+        public int compare(Object a, Object b) {
+	        if (a == b) {
+	            return 0;
+	        }
+            if (a == null) {
+                return -1;
+            } else if (b == null) {
+                return 1;
+            }
+            String sa = (String) a;
+            String sb = (String) b;
+            int lena = sa.length();
+            int lenb = sb.length();
+            int len = lena;
+            if (len > lenb) len = lenb;
+            for (int i = 0; i < len; ++i) {
+                char ca = sa.charAt(i);
+                char cb = sb.charAt(i);
+                if (ca == cb) continue; // skip remap if equal
+                
+                // start of only different section
+                if (ca >= 0xD800) {  // reshuffle to get right codepoint order
+                    ca += (ca < 0xE000) ? 0x2000 : -0x800;
+                }
+                if (cb >= 0xD800) {  // reshuffle to get right codepoint order
+                    cb += (cb < 0xE000) ? 0x2000 : -0x800;
+                }
+                // end of only different section
+                
+                if (ca < cb) return -1;
+                return 1; // wasn't equal, so return 1
+            }
+            if (lena < lenb) return -1;
+            if (lena > lenb) return 1;
+            return 0;
+        }
+    }
+                        
+    // ===========================================================
+    // PRIVATES
+    // ===========================================================
+    
+    /**
+     * Prevent instance from being created.
+     */
+    private UTF32() {}
+    
+   /**
+     * Maximum code point values for UTF-32.
+     */
+    private static final int MAX_UNICODE = 0x10FFFF;
+    
+   /**
+     * Maximum values for Basic code points (BMP).
+     */
+    private static final int MAX_BASIC = 0xFFFF;
+    
+   /**
+     * Minimum value for Supplementary code points (SMP).
+     */
+    private static final int MIN_SUPPLEMENTARY = 0x10000;
+    
+    /**
+     * Used to mask off single plane in checking for NON_CHARACTER
+     */
+    private static final int PLANE_MASK = 0xFFFF;
+    
+    /**
+     * Range of non-characters in each plane
+     */
+    private static final int 
+        NON_CHARACTER_BASE = 0xFFFE, 
+        NON_CHARACTER_END = 0xFFFF;
+
+    // useful statics and tables for fast lookup
+    
+	/**
+	 * Values for surrogate detection. X is a surrogate iff X & SURROGATE_MASK == SURROGATE_MASK.
+	 */
+    static final int SURROGATE_MASK = 0xD800;
+    
+    /**
+     * Bottom 10 bits for use in surrogates.
+     */
+	private static final int TRAIL_MASK = 0x3FF;
+	
+    /**
+     * Shift value for surrogates.
+     */
+	private static final int SURROGATE_SHIFT = 10;
+	
+	/** 
+	 * Lead surrogates go from LEAD_BASE up to LEAD_LIMIT-1.
+	 */
+	private static final int LEAD_BASE = 0xD800, LEAD_LIMIT = 0xDC00;
+	
+	/** 
+	 * Trail surrogates go from TRAIL_BASE up to TRAIL_LIMIT-1.
+	 */
+	private static final int TRAIL_BASE = 0xDC00, TRAIL_LIMIT = 0xE000;
+	
+	/** 
+	 * Surrogates go from SURROGATE_BASE up to SURROGATE_LIMIT-1.
+	 */
+	private static final int SURROGATE_BASE = 0xD800, SURROGATE_LIMIT = 0xE000;
+    
+    /**
+     * Any codepoint at or greater than SURROGATE_SPACE_BASE requires 2 16-bit code units.
+     */
+	//private static final int SURROGATE_SPACE_BASE = 0x10000;
+
+    /**
+     * Offset to add to combined surrogate pair to avoid masking.
+     */
+	private static final int SURROGATE_OFFSET = MIN_SUPPLEMENTARY
+	    - (LEAD_BASE << SURROGATE_SHIFT) - TRAIL_BASE;
+	    
+	private static final int LEAD_BASE_OFFSET = LEAD_BASE - (MIN_SUPPLEMENTARY >> SURROGATE_SHIFT);
+	
+};
--- a/tools/unicodetools/com/ibm/text/utility/UTF8StreamReader.java
+++ b/tools/unicodetools/com/ibm/text/utility/UTF8StreamReader.java
@ -0,0 +1,177 @@
+package com.ibm.text.utility;
+
+import java.io.Reader;
+import java.io.InputStream;
+import java.io.IOException;
+
+/**
+ * Utility class that writes UTF8.<br>
+ * Main purpose is to supplant OutputStreamWriter(x, "UTF8"), since that has serious errors.
+ * <br>
+ * Example of Usage:
+ * <pre>
+ * PrintWriter log = new PrintWriter(
+ *   new UTF8StreamWriter(new FileOutputStream(fileName), 32*1024));
+ * </pre>
+ * NB: unsynchronized for simplicity and speed. The same object must NOT be used in multiple threads.
+ */
+ // TODO: Fix case of surrogate pair crossing input buffer boundary
+
+public final class UTF8StreamReader extends Reader {
+
+    private InputStream input;
+    private boolean checkIrregular = true;
+
+    UTF8StreamReader(InputStream stream, int buffersize) {
+        if (buffersize < 1) {
+            throw new IllegalArgumentException("UTF8StreamReader buffersize must be >= 1");
+        }
+        input = stream;
+        bBuffer = new byte[buffersize];
+    }
+
+    private static final int MAGIC = 0x10000 + ((0 - 0xD800) << 10) + (0 - 0xDC00);
+
+    private byte[] bBuffer; // do a bit of buffering ourselves for efficiency
+    private int
+        bIndex = 0,
+        bEnd = 0,
+        bRemaining = 0,
+        currentPoint = 0,
+        lastPoint,
+        shortestFormTest = 0;
+    private char cCarry = 0;
+
+    private static final byte[] BYTES_REMAINING = {
+        0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  // 0-
+        0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  // 1-
+        0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  // 2-
+        0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  // 3-
+        0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  // 4-
+        0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  // 5-
+        0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  // 6-
+        0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  // 7-
+       -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,  // 8-
+       -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,  // 9-
+       -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,  // A-
+       -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,  // B-
+       -1,-1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  // C-
+        1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  1, 1, 1, 1,  // D-
+        2, 2, 2, 2,  2, 2, 2, 2,  2, 2, 2, 2,  2, 2, 2, 2,  // E-
+        3, 3, 3, 3, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1   // F-
+    };
+
+    public int read(char cbuf[], int off, int len) throws IOException {
+
+        // check input arguments
+
+        if (len <= 0) return 0;
+        if (off > len) return 0;
+
+        int cIndex = off;
+        int cEnd = off + len;
+
+        // if we had a low surrogate from the last call, get it first
+
+        if (cCarry != 0 && len > 0) {
+            cbuf[cIndex++] = cCarry;
+            cCarry = 0;
+        }
+
+        // now loop, filling in the output
+
+        while (cIndex < cEnd) {
+
+            // get more bytes if we run out
+
+            if (bIndex >= bEnd) {
+                bIndex = 0;
+                bEnd = input.read(bBuffer, 0, bBuffer.length);
+                if (bEnd < 0) {
+                    if (cIndex == off) return -1;
+                    return cIndex - off;
+                }
+            }
+
+            // process the current byte (mask because Java doesn't have unsigned byte)
+
+            int b = bBuffer[bIndex++] & 0xFF;
+
+            switch (bRemaining) {
+              // First Byte case
+              case 0:
+                bRemaining = BYTES_REMAINING[b];
+                switch (bRemaining) {
+                  case 0:
+	                cbuf[cIndex++] = (char) (lastPoint = b);
+	                break;
+            	  case 1:
+	                currentPoint = b & 0x1F;
+                    shortestFormTest = 0x80;
+	                break;
+	              case 2:
+	                currentPoint = b & 0xF;
+                    shortestFormTest = 0x800;
+	                break;
+                  case 3:
+	                currentPoint = b & 0x7;
+                    shortestFormTest = 0x10000;
+	                break;
+                  default:
+                    throw new IllegalArgumentException("illegal lead code unit: " + b);
+                }
+                break;
+
+              // Trailing bytes
+              case 2: case 3:
+                b ^= 0x80;
+                if (b > 0x3F) {
+                    throw new IllegalArgumentException("illegal trail code unit: " + (b ^ 0x80));
+                }
+                currentPoint = (currentPoint << 6) | b;
+                --bRemaining;
+                break;
+
+              // Last trailing byte, time to assemble
+              case 1:
+                b ^= 0x80;
+                if (b > 0x3F) {
+                    throw new IllegalArgumentException("illegal trail code unit: " + (b ^ 0x80));
+                }
+                currentPoint = (currentPoint << 6) | b;
+                --bRemaining;
+
+                // we have gotten the code, so check and stash it
+
+                if (currentPoint < shortestFormTest) {
+                    throw new IllegalArgumentException("illegal sequence, not shortest form: " + currentPoint);
+                }
+                if (checkIrregular && 0xD800 <= lastPoint && lastPoint <= 0xDC00
+                        && 0xDC00 <= currentPoint && currentPoint <= 0xDFFF) {
+                    throw new IllegalArgumentException("irregular sequence, surrogate pair: " + currentPoint);
+                }
+                lastPoint = currentPoint;
+                if (currentPoint >= 0x10000) {
+                    if (currentPoint > 0x10FFFF) {
+                        throw new IllegalArgumentException("illegal code point, too large: " + currentPoint);
+                    }
+                    currentPoint -= 0x10000;
+                    cbuf[cIndex++] = (char)(0xD800 + (currentPoint >> 10));
+                    currentPoint = 0xDC00 + (currentPoint & 0x3FF);
+                    if (cIndex >= cEnd) {
+                        cCarry = (char)currentPoint;
+                        return cIndex - off;
+                    }
+                }
+                cbuf[cIndex++] = (char)currentPoint;
+                currentPoint = 0;
+                break;
+            }
+        }
+        return cIndex - off;
+    }
+
+    public void close() throws IOException {
+        input.close();
+    }
+}
--- a/tools/unicodetools/com/ibm/text/utility/UTF8StreamWriter.java
+++ b/tools/unicodetools/com/ibm/text/utility/UTF8StreamWriter.java
@ -0,0 +1,147 @@
+package com.ibm.text.utility;
+import java.io.*;
+
+/**
+ * Utility class that writes UTF8.<br>
+ * Main purpose is to supplant OutputStreamWriter(x, "UTF8"), since that has serious errors.
+ * <br>
+ * Example of Usage:
+ * <pre>
+ * PrintWriter log = new PrintWriter(
+ *   new UTF8StreamWriter(new FileOutputStream(fileName), 32*1024));
+ * </pre>
+ * NB: unsynchronized for simplicity and speed. The same object must NOT be used in multiple threads.
+ */
+ // TODO: Fix case of surrogate pair crossing input buffer boundary
+
+public final class UTF8StreamWriter extends Writer {
+
+    private OutputStream output;
+    private byte[] bBuffer; // do a bit of buffering ourselves for efficiency
+    private int bSafeEnd;
+    private int bEnd;
+    private int bIndex = 0;
+    private int highSurrogate = 0;
+
+    public UTF8StreamWriter(OutputStream stream, int buffersize) {
+        if (buffersize < 5) {
+            throw new IllegalArgumentException("UTF8StreamWriter buffersize must be >= 5");
+        }
+        output = stream;
+        bBuffer = new byte[buffersize];
+        bEnd = buffersize;
+        bSafeEnd = buffersize - 4;
+    }
+
+    private static final int
+        NEED_2_BYTES = 1<<7,
+        NEED_3_BYTES = 1<<(2*5 + 1),
+        NEED_4_BYTES = 1<<(3*5 + 1);
+
+    private static final int
+        TRAILING_BOTTOM_MASK = 0x3F,
+        TRAILING_TOP = 0x80;
+
+    private static final int MAGIC = 0x10000 + ((0 - 0xD800) << 10) + (0 - 0xDC00);
+
+    public final void write(char[] buffer, int cStart, int cLength) throws IOException {
+        int cEnd = cStart + cLength;
+        while (cStart < cEnd) {
+
+            // write if we need to
+
+            if (bIndex > bSafeEnd) {
+                output.write(bBuffer, 0, bIndex);
+                bIndex = 0;
+            }
+
+            // get code point
+
+            int utf32 = buffer[cStart++];
+
+            // special check for surrogates
+
+            if (highSurrogate != 0) {
+                if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) {
+                    writeCodePoint((highSurrogate << 10) + utf32 + MAGIC);
+                    highSurrogate = 0;
+                    continue;
+                }
+                writeCodePoint(highSurrogate);
+                highSurrogate = 0;
+            }
+
+            if (0xD800 <= utf32 && utf32 <= 0xDBFF) {
+                highSurrogate = utf32;
+                continue;
+            }
+
+            // normal case
+
+            writeCodePoint(utf32);
+        }
+    }
+
+    private final void writeCodePoint(int utf32) {
+
+        // convert to bytes
+
+		if (utf32 < NEED_2_BYTES) {
+		    bBuffer[bIndex++] = (byte)utf32;
+		    return;
+        }
+
+		// Find out how many bytes we need to write
+		// At this point, it is at least 2.
+
+	    //int count;
+		int backIndex;
+		int firstByteMark;
+		if (utf32 < NEED_3_BYTES) {
+		    backIndex = bIndex += 2;
+		    firstByteMark = 0xC0;
+		} else if (utf32 < NEED_4_BYTES) {
+		    backIndex = bIndex += 3;
+		    firstByteMark = 0xE0;
+			bBuffer[--backIndex] = (byte)(TRAILING_TOP | (utf32 & TRAILING_BOTTOM_MASK));
+			utf32 >>= 6;
+		} else {
+		    backIndex = bIndex += 4;
+		    firstByteMark = 0xF0;
+			bBuffer[--backIndex] = (byte)(TRAILING_TOP | (utf32 & TRAILING_BOTTOM_MASK));
+			utf32 >>= 6;
+			bBuffer[--backIndex] = (byte)(TRAILING_TOP | (utf32 & TRAILING_BOTTOM_MASK));
+			utf32 >>= 6;
+		};
+		bBuffer[--backIndex] = (byte)(TRAILING_TOP | (utf32 & TRAILING_BOTTOM_MASK));
+		utf32 >>= 6;
+		bBuffer[--backIndex] = (byte)(firstByteMark | utf32);
+    }
+
+    private void internalFlush() throws IOException {
+        if (highSurrogate != 0) {
+            if (bIndex > bEnd) {
+                output.write(bBuffer, 0, bIndex);
+                bIndex = 0;
+            }
+            writeCodePoint(highSurrogate);
+            highSurrogate = 0;
+        }
+
+        // write buffer if we need to
+        if (bIndex != 0) {
+            output.write(bBuffer, 0, bIndex);
+            bIndex = 0;
+        }
+    }
+
+    public void close() throws IOException {
+        internalFlush();
+        output.close();
+    }
+
+    public void flush() throws IOException {
+        internalFlush();
+        output.flush();
+    }
+}
--- a/tools/unicodetools/com/ibm/text/utility/Utility.java
+++ b/tools/unicodetools/com/ibm/text/utility/Utility.java
@ -0,0 +1,443 @@
+package com.ibm.text.utility;
+
+import java.util.*;
+import java.text.*;
+import java.io.*;
+
+public final class Utility {    // COMMON UTILITIES
+
+    static final boolean UTF8 = true; // TODO -- make argument
+
+    public static String getName(int i, String[] names) {
+        try {
+            return names[i];
+        } catch (Exception e) {
+            return "UNKNOWN";
+        }
+    }
+
+    private static boolean needCRLF = false;
+
+    public static void dot(int i) {
+        if ((i % 0x7FF) == 0) {
+            needCRLF = true;
+            System.out.print('.');
+        }
+    }
+
+    public static void fixDot() {
+        if (needCRLF) {
+            System.out.println();
+            needCRLF = false;
+        }
+    }
+
+    public static int setBits(int source, int start, int end) {
+        if (start < end) {
+            int temp = start;
+            start = end;
+            end = temp;
+        }
+        int bmstart = (1 << (start+1)) - 1;
+        int bmend = (1 << end) - 1;
+        bmstart &= ~bmend;
+        return source |= bmstart;
+    }
+
+    public static int setBit(int source, int start) {
+        return setBits(source, start, start);
+    }
+
+    public static int clearBits(int source, int start, int end) {
+        if (start < end) {
+            int temp = start;
+            start = end;
+            end = temp;
+        }
+        int bmstart = (1 << (start+1)) - 1;
+        int bmend = (1 << end) - 1;
+        bmstart &= ~bmend;
+        return source &= ~bmstart;
+    }
+
+    public static int clearBit(int source, int start) {
+        return clearBits(source, start, start);
+    }
+
+    public static int find(String source, String[] target) {
+        for (int i = 0; i < target.length; ++i) {
+            if (source.equalsIgnoreCase(target[i])) return i;
+        }
+        return -1;
+    }
+
+    public static byte lookup(String source, String[] target) {
+        int result = Utility.find(source, target);
+        if (result != -1) return (byte)result;
+        throw new ChainException("Could not find \"{0}\" in table [{1}]", new Object [] {source, target});
+    }
+
+    /**
+     * Supplies a zero-padded hex representation of an integer (without 0x)
+     */
+    static public String hex(long i, int places) {
+        if (i == Long.MIN_VALUE) return "-8000000000000000";
+        boolean negative = i < 0;
+        if (negative) {
+            i = -i;
+        }
+        String result = Long.toString(i, 16).toUpperCase();
+        if (result.length() < places) {
+            result = "0000000000000000".substring(result.length(),places) + result;
+        }
+        if (negative) {
+            return '-' + result;
+        }
+        return result;
+    }
+
+	public static String hex(long ch) {
+	    return hex(ch,4);
+	}
+
+	public static String hex(Object s) {
+	    return hex(s, 4, " ");
+	}
+
+	public static String hex(Object s, int places) {
+	    return hex(s, places, " ");
+	}
+
+	public static String hex(Object s, String separator) {
+	    return hex(s, 4, separator);
+	}
+
+	public static String hex(Object o, int places, String separator) {
+	    if (o == null) return "";
+	    if (o instanceof Number) return hex(((Number)o).longValue(), places);
+
+	    String s = o.toString();
+	    StringBuffer result = new StringBuffer();
+	    int ch;
+	    for (int i = 0; i < s.length(); i += UTF32.count16(ch)) {
+	        if (i != 0) result.append(separator);
+	        ch = UTF32.char32At(s, i);
+	        result.append(hex(ch));
+	    }
+	    return result.toString();
+	}
+
+	public static String hex(byte[] o, int start, int end) {
+	    StringBuffer result = new StringBuffer();
+	    //int ch;
+	    for (int i = start; i < end; ++i) {
+	        if (i != 0) result.append(' ');
+	        result.append(hex(o[i] & 0xFF, 2));
+	    }
+	    return result.toString();
+	}
+
+	public static String hex(char[] o, int start, int end) {
+	    StringBuffer result = new StringBuffer();
+	    for (int i = start; i < end; ++i) {
+	        if (i != 0) result.append(' ');
+	        result.append(hex(o[i], 4));
+	    }
+	    return result.toString();
+	}
+
+	public static String repeat(String s, int count) {
+	    if (count <= 0) return "";
+	    if (count == 1) return s;
+	    StringBuffer result = new StringBuffer(count*s.length());
+	    for (int i = 0; i < count; ++i) {
+	        result.append(s);
+	    }
+	    return result.toString();
+	}
+
+    public static int intFrom(String p) {
+        if (p.length() == 0) return Short.MIN_VALUE;
+        return Integer.parseInt(p);
+    }
+
+    public static float floatFrom(String p) {
+        if (p.length() == 0) return Float.NaN;
+        int fract = p.indexOf('/');
+        if (fract == -1) return Float.valueOf(p).floatValue();
+        String q = p.substring(0,fract);
+        float num = 0;
+        if (q.length() != 0) num = Integer.parseInt(q);
+        p = p.substring(fract+1,p.length());
+        float den = 0;
+        if (p.length() != 0) den = Integer.parseInt(p);
+        return num/den;
+    }
+    
+    public static int codePointFromHex(String p) {
+        String temp = Utility.fromHex(p);
+        if (UTF32.length32(temp) != 1) throw new ChainException("String is not single (UTF32) character: " + p, null);
+        return UTF32.char32At(temp, 0);
+    }
+
+    public static String fromHex(String p) {
+        StringBuffer output = new StringBuffer();
+        int value = 0;
+        int count = 0;
+        main:
+        for (int i = 0; i < p.length(); ++i) {
+            char ch = p.charAt(i);
+            int digit = 0;
+            switch (ch) {
+                case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+                    digit = ch - 'a' + 10;
+                    break;
+                case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+                    digit = ch - 'A' + 10;
+                    break;
+                case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
+                case '8': case '9':
+                    digit = ch - '0';
+                    break;
+                default:
+                    int type = Character.getType(ch);
+                    if (type != Character.SPACE_SEPARATOR) {
+                        throw new ChainException("bad hex value: '{0}' at position {1} in \"{2}\"",
+                            new Object[] {String.valueOf(ch), new Integer(i), p});
+                    }
+                    // fall through!!
+                case ' ': case ',': case ';': // do SPACE here, just for speed
+                    if (count != 0) {
+                        UTF32.append32(output, value);
+                    }
+                    count = 0;
+                    value = 0;
+                    continue main;
+            }
+            value <<= 4;
+            value += digit;
+            if (value > 0x10FFFF) {
+                throw new ChainException("Character code too large: '{0}' at position {1} in \"{2}\"",
+                    new Object[] {String.valueOf(ch), new Integer(i), p});
+            }
+            count++;
+        }
+        if (count != 0) {
+            UTF32.append32(output, value);
+        }
+        return output.toString();
+    }
+
+	public static int split(String s, char divider, String[] output) {
+	    int last = 0;
+	    int current = 0;
+	    int i;
+	    for (i = 0; i < s.length(); ++i) {
+	        if (s.charAt(i) == divider) {
+	            output[current++] = s.substring(last,i);
+	            last = i+1;
+	        }
+	    }
+	    output[current++] = s.substring(last,i);
+	    int result = current;
+	    while (current < output.length) {
+	        output[current++] = "";
+	    }
+	    return result;
+	}
+
+	public static String[] split(String s, char divider) {
+	    String[] result = new String[100];
+	    int count = split(s, divider, result);
+	    return extract(result, 0, count);
+	}
+
+	public static String[] extract(String[] source, int start, int end) {
+	    String[] result = new String[end-start];
+	    System.arraycopy(source, start, result, 0, end - start);
+	    return result;
+	}
+
+	/*
+	public static String quoteJava(String s) {
+	    StringBuffer result = new StringBuffer();
+	    for (int i = 0; i < s.length(); ++i) {
+	        result.append(quoteJava(s.charAt(i)));
+	    }
+	    return result.toString();
+	}
+	*/
+	public static String quoteJavaString(String s) {
+	    if (s == null) return "null";
+	    StringBuffer result = new StringBuffer();
+	    result.append('"');
+	    for (int i = 0; i < s.length(); ++i) {
+	        result.append(quoteJava(s.charAt(i)));
+	    }
+	    result.append('"');
+	    return result.toString();
+	}
+
+	public static String quoteJava(int c) {
+	    switch (c) {
+	      case '\\':
+	        return "\\\\";
+	      case '"':
+	        return "\\\"";
+	      case '\r':
+	        return "\\r";
+	      case '\n':
+	        return "\\n";
+	      default:
+            if (c >= 0x20 && c <= 0x7E) {
+                return String.valueOf((char)c);
+            } else if (UTF32.isSupplementary(c)) {
+                return "\\u" + hex((char)UTF32.getLead(c),4) + "\\u" + hex((char)UTF32.getTrail(c),4);
+            } else {
+                return "\\u" + hex((char)c,4);
+            }
+        }
+	}
+
+    public static String quoteXML(int c) {
+        switch (c) {
+            case '<': return "&lt;";
+            case '>': return "&gt;";
+            case '&': return "&amp;";
+            case '\'': return "&apos;";
+            case '"': return "&quot;";
+
+            // fix controls, since XML can't handle
+
+            // also do this for 09, 0A, and 0D, so we can see them.
+            case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06: case 0x07:
+            case 0x08: case 0x09: case 0x0A: case 0x0B: case 0x0C: case 0x0D: case 0x0E: case 0x0F:
+            case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17:
+            case 0x18: case 0x19: case 0x1A: case 0x1B: case 0x1C: case 0x1D: case 0x1E: case 0x1F:
+            case 0x7F:
+
+             // fix noncharacters, since XML can't handle
+            case 0xFFFE: case 0xFFFF:
+
+                return "#x" + hex(c,1) + ";";
+        }
+
+        // fix surrogates, since XML can't handle
+        if (UTF32.isSurrogate(c)) {
+            return "#x" + hex(c,1) + ";";
+        }
+
+        if (c <= 0x7E || UTF8) {
+            return UTF32.valueOf32(c);
+        }
+
+        // fix supplementaries & high characters, because of IE bug
+        /*if (UTF32.isSupplementary(c) || 0xFFF9 <= c && c <= 0xFFFD) {
+            return "#x" + hex(c,1) + ";";
+        }
+        */
+
+        return "&#x" + hex(c,1) + ";";
+    }
+
+    public static String quoteXML(String source) {
+        if (source == null) return "null";
+        StringBuffer result = new StringBuffer();
+        for (int i = 0; i < source.length(); ++i) {
+            int c = UTF32.char32At(source, i);
+            if (UTF32.isSupplementary(c)) ++i;
+            result.append(quoteXML(c));
+        }
+        return result.toString();
+    }
+
+    public static int compare(char[] a, int aStart, int aEnd, char[] b, int bStart, int bEnd) {
+        while (aStart < aEnd && bStart < bEnd) {
+            int diff = a[aStart++] - b[bStart++];
+            if (diff != 0) return diff;
+        }
+        return (aEnd - aStart) - (bEnd - bStart);
+    }
+
+    public static int compare(byte[] a, int aStart, int aEnd, byte[] b, int bStart, int bEnd) {
+        while (aStart < aEnd && bStart < bEnd) {
+            int diff = a[aStart++] - b[bStart++];
+            if (diff != 0) return diff;
+        }
+        return (aEnd - aStart) - (bEnd - bStart);
+    }
+
+    public static int compareUnsigned(byte[] a, int aStart, int aEnd, byte[] b, int bStart, int bEnd) {
+        while (aStart < aEnd && bStart < bEnd) {
+            int diff = (a[aStart++] & 0xFF) - (b[bStart++] & 0xFF);
+            if (diff != 0) return diff;
+        }
+        return (aEnd - aStart) - (bEnd - bStart);
+    }
+
+    public static String join(int[] array, String sep) {
+        String result = "{";
+        for (int i = 0; i < array.length; ++i) {
+            if (i != 0) result += sep;
+            result += array[i];
+        }
+        return result + "}";
+    }
+
+    public static String join(long[] array, String sep) {
+        String result = "{";
+        for (int i = 0; i < array.length; ++i) {
+            if (i != 0) result += sep;
+            result += array[i];
+        }
+        return result + "}";
+    }
+
+    private static final String[] searchPath = {
+        "EXTRAS",
+        "3.1.1",
+        "3.1.0",
+        "3.0.1",
+        "3.0.0",
+        "2.1.9",
+        "2.0.0",
+        "1.1.0",
+    };
+
+    private static final String DATA_DIR = "C:\\DATA";
+
+    public static PrintWriter openPrintWriter(String filename) throws IOException {
+        return new PrintWriter(
+                    new UTF8StreamWriter(new FileOutputStream(DATA_DIR + File.separator + "GEN" + File.separator + filename),
+                    32*1024));
+    }
+
+    public static BufferedReader openUnicodeFile(String filename, String version) throws IOException {
+        // get all the files in the directory
+
+        for (int i = 0; i < searchPath.length; ++i) {
+            if (version.length() != 0 && version.compareTo(searchPath[i]) < 0) continue;
+
+            String directoryName = DATA_DIR + File.separator + searchPath[i] + "-Update" + File.separator;
+            System.out.println("Trying: '" + directoryName + "'");
+            File directory = new File(directoryName);
+            String[] list = directory.list();
+            for (int j = 0; j < list.length; ++j) {
+                String fn = list[j];
+                if (!fn.endsWith(".txt")) continue;
+                //System.out.print("\t'" + fn + "'");
+                if (!fn.startsWith(filename)) {
+                    //System.out.println(" -- MISS: '" + filename + "'");
+                    continue;
+                }
+                //System.out.println(" -- HIT");
+                System.out.println("\tFound: '" + fn + "'");
+                return new BufferedReader(new FileReader(directoryName + fn),32*1024);
+            }
+        }
+        return null;
+    }
+
+
+
+}
--- a/tools/unicodetools/com/ibm/text/utility/XMLParse.java
+++ b/tools/unicodetools/com/ibm/text/utility/XMLParse.java
@ -0,0 +1,403 @@
+package com.ibm.text.utility;
+
+/**
+ * Very dumb XML parser, designed for restricted environment where transmitter is guaranteed
+ * to limit types of XML files generated.
+ *
+ * RESTRICTIONS
+ *  Requires document to be well-formed. Doesn't properly signal errors if it is not.
+ *  No DTDs, !DOCTYPE, !ATTLIST, !ELEMENT, ![, !NOTATION, !ENTITY, CDATA
+ *  No processing instructions
+ *  Does do character references, lt, gt, amp, apos, quot
+ *  The encoding is specified by the user, by using the right Reader
+ *  On creation, you supply a buffer for the textual elements.  Use a buffer that is as large
+ * as the largest possible piece of text (e.g. attribute value or element text) in the file.
+ *
+ * @author Mark Davis
+ */
+import java.io.*;
+
+public final class XMLParse implements XMLParseTypes {
+
+    /** Create a parser.
+     */
+    public XMLParse(Reader stream, char[] buffer) {
+        this.stream = stream;
+        this.buffer = buffer;
+    }
+
+    /** Create a parser.
+     */
+    public XMLParse(String fileName, char[] buffer) throws FileNotFoundException {
+        stream = new BufferedReader(new FileReader(fileName),32*1024);
+        this.buffer = buffer;
+    }
+
+    /** Get the textual value associated with this item.
+     * Only valid for ELEMENT_TAG*, ATTRIBUTE*, TEXT.
+     */
+    public String getValue() {
+        return String.valueOf(buffer, 0, bufferCount);
+    }
+
+    /** Get length of the textual value associated with this item.
+     * Only valid for ELEMENT_TAG*, ATTRIBUTE*, TEXT.
+     */
+    public int getValueCount() {
+        return bufferCount;
+    }
+
+    /** Get the buffer that was passed in on creation.
+     */
+    public char[] getValueArray() {
+        return buffer;
+    }
+
+    /** Get the "kind" of the last item (see XMLParseTypes)
+     */
+    public int getKind() {
+        return kind;
+    }
+
+    /** Get the next element, returning a "Kind" (see XMLParseTypes)
+     */
+
+    public byte next() {
+
+        char c = '\u0000';
+        char type = c;
+
+        while (c != 0xFFFF) {
+            try {
+
+                // First read the character. If there is a buffered char, use it instead
+
+                if (bufferChar != 0) {
+                    c = bufferChar;
+                    bufferChar = 0;
+                } else {
+                    c = (char) stream.read();
+                }
+
+                // Now set the right type. Since we assume validity, anything but the syntax chars
+                // can be classed as IDENTIFIER
+
+                switch (c) {
+                    case ' ': case '\r': case '\n': case '\t':
+                        type = ' ';
+                        break;
+                    case '<': case '>':  case '#': case ';': case '/': case '\'': case '"':
+                    case '=': case '?': case '!': case '-':
+                        type = c;
+                        break;
+                    case '&':   // CR, either numerical or lt, gt, quot, amp, apos
+
+                        // gather characters
+
+                        int crCount = 0;
+                        while (true) {
+                            c = (char) stream.read();
+                            if (c == ';') break;
+                            crBuffer[crCount++] = c;
+                        }
+
+                        // parse it, and break into two pieces if necessary
+
+                        int x = parseCR(crBuffer, crCount);
+                        c = (char)x;
+                        if (x > 0xFFFF) {            // Supplementary
+                            x -= 0x10000;
+                            c = (char) (0xD800 + (x >> 10));
+                            bufferChar = (char) (0xDC00 + (x & 0x3FF));
+                        }
+
+                        // Since we assume validity, any CRs are not syntax characters
+
+                        type = IDENTIFIER; // everything else
+                        break;
+                    default:
+                        type = IDENTIFIER; // everything else
+                        break;
+                }
+            } catch (Exception e) {
+                c = '\uFFFF';
+            }
+
+            // We now have a character. Throw it at our little state machine
+
+            if (SHOW) System.out.println(c + ", " + type + ", " + stateNames[state]);
+            switch (state) {
+                case IN_TEXT:
+                    if (type == '<') {
+                        state = START_ELEMENT;
+                        if (bufferCount != 0) {
+                            kind = TEXT;
+                            return kind;
+                        }
+                        break;
+                    }
+                    buffer[bufferCount++] = c;
+                    break;
+                case START_ELEMENT: // must be either '/' or more than one ID char
+                    bufferCount = 0;
+                    switch (type) {
+                        case '/':
+                            elementType = ELEMENT_TAG_SLASH;
+                            state = IN_ELEMENT;
+                            break;
+                        case '!':
+                            buffer[bufferCount++] = c;
+                            elementType = ELEMENT_TAG_COMMENT;
+                            state = IN_COMMENT;
+                            break;
+                        case '?':
+                            elementType = ELEMENT_TAG_QUESTION;
+                            state = IN_ELEMENT;
+                            break;
+                        default:
+                            elementType = ELEMENT_TAG;
+                            buffer[bufferCount++] = c;
+                            state = IN_ELEMENT;
+                            break;
+                    }
+                    break;
+                case IN_COMMENT:
+                    buffer[bufferCount++] = c;
+                    if (type == '-') state = IN_COMMENT2;
+                    else state = IN_COMMENT;
+                    break;
+                case IN_COMMENT2:
+                    buffer[bufferCount++] = c;
+                    if (type == '-') state = IN_COMMENT3;
+                    else state = IN_COMMENT;
+                    break;
+                case IN_COMMENT3:
+                    if (type == '>') {
+                        kind = ELEMENT_TAG_COMMENT;
+                        bufferChar = c;
+                        state = IN_ATTRIBUTES;
+                        elementType = END_ELEMENT_COMMENT;
+                        return kind;
+                    } else if (type != '-') {
+                        state = IN_COMMENT;
+                    }
+                    buffer[bufferCount++] = c;
+                    break;
+                case IN_ELEMENT:
+                    if (type != IDENTIFIER) {
+                        state = IN_ATTRIBUTES;
+                        kind = elementType;
+                        elementType = END_ELEMENT;
+                        bufferChar = c;
+                        return kind;
+                    }
+                    buffer[bufferCount++] = c;
+                    break;
+                case IN_ATTRIBUTES:
+                    bufferCount = 0;
+                    if (type == '/') {
+                        elementType = END_ELEMENT_SLASH;
+                    } else if (type == '?') {
+                        elementType = END_ELEMENT_QUESTION;
+                    } else if (type == '>') {
+                        state = IN_TEXT;
+                        kind = elementType;
+                        return kind;
+                    } else if (type == IDENTIFIER) {
+                        state = IN_ATTR;
+                        buffer[bufferCount++] = c;
+                        break;
+                    }
+                    break;
+                case IN_ATTR:
+                    if (type != IDENTIFIER) {
+                        state = START_VALUE;
+                        kind = ATTRIBUTE_TAG;
+                        return kind;
+                    }
+                    buffer[bufferCount++] = c;
+                    break;
+                case START_VALUE:   // must have <s>* = ( ' | " )
+                    if (type == '\'' || type == '"') {
+                        lastQuote = c;
+                        state = IN_VALUE;
+                        bufferCount = 0;
+                    }
+                    break;
+                case IN_VALUE: // only terminated by lastQuote
+                    if (type == lastQuote) {
+                        state = IN_ATTRIBUTES;
+                        kind = ATTRIBUTE_VALUE;
+                        return kind;
+                    }
+                    buffer[bufferCount++] = c;
+                    break;
+            }
+        }
+        return DONE;
+    }
+
+    /** Utility for doing XML quotes. Flags control which characters are handled and how.
+     * (see XMLParseTypes for values)
+     */
+
+    public static String quote(int c) {
+        return quote(c, 0);
+    }
+
+    /** Utility for doing XML quotes. Flags control which characters are handled and how.
+     * (see XMLParseTypes for values)
+     */
+
+    public static String quote(int c, int flags) {
+        String result = quoteGuts(c, flags);
+        if (result != null) return result;
+        return String.valueOf((char)c);
+    }
+
+    /** Utility for doing XML quotes. Flags control which characters are handled and how.
+     * (see XMLParseTypes for values)
+     */
+
+    public static String quote(String source) {
+        return quote(source, 0);
+    }
+
+    /** Utility for doing XML quotes. Flags control which characters are handled and how.
+     * (see XMLParseTypes for values)
+     */
+
+    public static String quote(String source, int flags) {
+        StringBuffer result = new StringBuffer();
+        String temp;
+        for (int i = 0; i < source.length(); ++i) {
+            int c = UTF32.char32At(source, i);
+            if (c > 0xFFFF) ++i;
+            temp = quoteGuts(c, flags);
+            if (temp != null) result.append(temp);
+            else if (c <= 0xFFFF) result.append((char)c);
+            else result.append(source.substring(i-1,i+1)); // surrogates
+        }
+        return result.toString();
+    }
+
+    /** Parses inside of CR. buffer should not contain the initial '&', or final ';'
+     */
+    static int parseCR(char[] crBuffer, int crCount) {
+        int c;
+        int start = 0;
+        if (crCount == 0) return -1;
+        switch (crBuffer[start++]) {
+            case 'l':   c = '<'; break;     // lt
+            case 'g':   c = '>'; break;     // gt
+            case 'q':   c = '"'; break;     // quot
+            case 'a':   // &amp;, &apos;
+                if (crCount > start && crBuffer[start] == 'm') c = '&';
+                else c = '\'';
+                break;
+            case '#':
+                int radix = 10;
+                if (crCount > start && crBuffer[start] == 'x') {
+                    radix = 16;
+                    ++start;
+                }
+                // Simple code for now. Could be sped up.
+                c = Integer.parseInt(String.valueOf(crBuffer,start,crCount-start), radix);
+                break;
+            default:
+                c = -1;
+        }
+        return c;
+    }
+
+    /** Utility for doing hex, padding with zeros
+     */
+
+    static public String hex(long i, int places) {
+        String result = Long.toString(i, 16).toUpperCase();
+        if (result.length() < places) {
+            result = "0000000000000000".substring(result.length(),places) + result;
+        }
+        return result;
+    }
+    // =================== PRIVATES =================================
+
+    private static final char[] buf2 = new char[2];
+
+    private static final boolean SHOW = false;
+
+    private char[] buffer;
+    private int bufferCount;
+    private byte kind = TEXT;
+
+    private Reader stream;
+    private char[] crBuffer = new char[10];
+    private int state = IN_TEXT;
+    private byte elementType;
+    private char lastQuote;
+    private char bufferChar;
+
+    private static final byte IN_TEXT = 0, START_ELEMENT = 1, IN_ELEMENT = 2,
+        IN_ATTR = 3, START_VALUE = 4, IN_VALUE = 5, IN_ATTRIBUTES = 6,
+        IN_COMMENT = 7, IN_COMMENT2 = 8, IN_COMMENT3 = 9;
+
+    private static final String[] stateNames = {"IN_TEXT", "START_ELEMENT", "IN_ELEMENT",
+        "IN_ATTR", "START_VALUE", "IN_VALUE", "IN_ATTRIBUTES",
+        "IN_COMMENT", "IN_COMMENT2", "IN_COMMENT3"};
+
+    private static final char IDENTIFIER = 'a';
+
+
+    private static String quoteGuts(int c, int flags) {
+        String prefix = "&";
+        switch (c) {
+            case '<': return "&lt;";
+            case '>': return "&gt;";
+            case '&': return "&amp;";
+            case '\'': return "&apos;";
+            case '"': return "&quot;";
+
+            // Optionally fix TAB, CR, LF
+
+            case 0x09: case 0x0A: case 0x0D:
+                if ((flags & QUOTE_TABCRLF) == 0) return null;
+                break;
+
+            // Fix controls, non-characters, since XML can't handle
+
+            case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06: case 0x07:
+            case 0x08: case 0x0B: case 0x0C: case 0x0E: case 0x0F:
+            case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17:
+            case 0x18: case 0x19: case 0x1A: case 0x1B: case 0x1C: case 0x1D: case 0x1E: case 0x1F:
+            case 0x7F:
+            case 0xFFFE: case 0xFFFF:
+                prefix = "";
+                break;
+
+            // Optionally fix IE Bug characters
+
+            case 0xFF00: case 0xFF01: case 0xFF02: case 0xFF03: case 0xFF04: case 0xFF05: case 0xFF06: case 0xFF07:
+            case 0xFFF8: case 0xFFF9: case 0xFFFA: case 0xFFFB: case 0xFFFC: case 0xFFFD:
+                if ((flags & QUOTE_IEBUG) == 0) return null;
+                prefix = "";
+                break;
+
+            default:
+                if (c <= 0x7E) {    // don't quote other ASCII
+                    if ((flags & QUOTE_ASCII) == 0) return null;
+                } else if (0xD800 <= c && c <= 0xDFFF) {// fix surrogates, since XML can't handle
+                    prefix = "";
+                } else if (c > 0xFFFF && (flags & QUOTE_IEBUG) != 0) {
+                    prefix = "";
+                } else if ((flags & QUOTE_NON_ASCII) == 0) {
+                    return null;
+                }
+                break;
+        }
+        if ((flags & QUOTE_DECIMAL) == 0) {
+            return prefix + "#x" + hex(c,1) + ";";
+        } else {
+            return prefix + "#" + Integer.toString(c) + ";";
+        }
+    }
+}
--- a/tools/unicodetools/com/ibm/text/utility/XMLParseTypes.java
+++ b/tools/unicodetools/com/ibm/text/utility/XMLParseTypes.java
@ -0,0 +1,35 @@
+package com.ibm.text.utility;
+
+/** Interface of values for use with XMLParse.
+ * Others classes can "implements" this also, to avoid typing XMLParseTypes.XXX
+ */
+public interface XMLParseTypes {
+
+    /** Kind values, for XMLParse.getKind(), next()
+     */
+    public static final byte
+        DONE = 0,
+        ELEMENT_TAG = 1, ELEMENT_TAG_SLASH = 2, ELEMENT_TAG_COMMENT = 3, ELEMENT_TAG_QUESTION = 4,
+        END_ELEMENT = 5, END_ELEMENT_SLASH = 6, END_ELEMENT_COMMENT = 7, END_ELEMENT_QUESTION = 8,
+        ATTRIBUTE_TAG = 9, ATTRIBUTE_VALUE = 10,
+        TEXT = 11;
+
+    /** Flag masks for XMLParse.quote(x, flags). Use '|' to combine
+     */
+    public static final byte
+        QUOTE_NON_ASCII = 1,
+        QUOTE_ASCII = 2,
+        QUOTE_IEBUG = 4,
+        QUOTE_TABCRLF = 8,
+        QUOTE_DECIMAL = 16;
+
+    /** For Debugging
+     */
+    static final String[] kindNames = {
+        "DONE",
+        "ELEMENT_TAG", "ELEMENT_TAG_SLASH", "ELEMENT_TAG_COMMENT", "ELEMENT_TAG_QUESTION",
+        "END_ELEMENT", "END_ELEMENT_SLASH", "END_ELEMENT_COMMENT", "END_ELEMENT_QUESTION",
+        "ATTRIBUTE_TAG", "ATTRIBUTE_VALUE",
+        "TEXT",
+        };
+}
--- a/tools/unicodetools/com/ibm/text/utility/testParser.java
+++ b/tools/unicodetools/com/ibm/text/utility/testParser.java
@ -0,0 +1,336 @@
+package com.ibm.text.utility;
+
+/** Simple Test program for XMLParse
+ */
+import java.io.*;
+import java.util.*;
+
+public class testParser implements XMLParseTypes {
+    public static final String BASE_DIR = "C:\\Documents and Settings\\Davis\\My Documents\\UnicodeData\\UNIDATA 3.0.1\\";
+    public static final boolean VERBOSE = false;
+
+    private static final String testFile = BASE_DIR + "UCD-Main.xml"; // "test.xml"; // BASE_DIR + "UCD-Main.xml";
+
+    public static void main (String[] args) throws Exception {
+        //test1();
+        //test2();
+        test3();
+    }
+
+    public static void test1() throws Exception {
+        XMLParse xml = new XMLParse(testFile, new char[1000]);
+        for (int i = 0; i < 100000; ++i) {
+            byte kind = xml.next();
+            if (kind == DONE) break;
+            String value = xml.getValue();
+            int quoteFlags = QUOTE_IEBUG | QUOTE_NON_ASCII | (kind != TEXT ? QUOTE_TABCRLF : 0);
+            String qValue = XMLParse.quote(value, quoteFlags);
+            if (VERBOSE) System.out.println(kindNames[kind] + ", \"" + value + "\", \"" + qValue + "\"");
+            else {
+                switch (kind) {
+                    case ELEMENT_TAG: System.out.print('<' + qValue); break;
+                    case ELEMENT_TAG_SLASH: System.out.print("</" + qValue); break;
+                    case ELEMENT_TAG_COMMENT: System.out.print("<" + qValue); break;
+                    case ELEMENT_TAG_QUESTION: System.out.print("<?" + qValue); break;
+
+                    case END_ELEMENT: System.out.print(">"); break;
+                    case END_ELEMENT_COMMENT: System.out.print(">"); break;
+                    case END_ELEMENT_SLASH: System.out.print("/>"); break;
+                    case END_ELEMENT_QUESTION: System.out.print("?>"); break;
+
+                    case ATTRIBUTE_TAG: System.out.print(" " + qValue + "="); break;
+                    case ATTRIBUTE_VALUE: System.out.print("\"" + qValue + "\""); break;
+
+                    case TEXT: System.out.print(qValue); break;
+
+                    default: throw new Exception("Unknown KIND");
+                }
+            }
+        }
+    }
+
+    static final int NORMAL_QUOTE = QUOTE_NON_ASCII | QUOTE_IEBUG | QUOTE_TABCRLF;
+
+    static void test2() throws Exception {
+
+        PrintWriter log = Utility.openPrintWriter("UCD-Extract.html");
+
+        //int fieldCount = 4;
+        //int width = 100/fieldCount;
+        //int first = width + 100 - width*fieldCount;
+        try {
+            log.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>");
+            log.println("<style><!--");
+            log.println("th { background-color: #99FFFF; text-align: Left; font-style: italic; font-weight: bold }");
+            log.println("table { page-break-after: always }");
+            log.println("--></style>");
+
+            log.println("<title>Extract from UCD</title>");
+            log.println("</head><body>");
+
+            String tableHead = "<table border='1' width='100%' cellpadding='4'><tr>"
+                + "<th width='20'>Code</th>"
+                + "<th width='20'>Char</th>"
+                + "<th width='20'>GC</th>"
+                + "<th width='50%'>Props</th>"
+                + "<th width='50%'>Name</th></tr></tr>";
+            log.println(tableHead);
+
+            XMLParse xml = new XMLParse(BASE_DIR + "UCD-Main.xml", new char[1000]);
+            boolean recordingChar = false;
+            int topByte = 0;
+            int printByte = 0;
+            Map data = new TreeMap();
+            String lastTag = "";
+
+            for (int line = 0; ; ++line) {
+                byte kind = xml.next();
+                if (kind == DONE) break;
+                String value = xml.getValue();
+                switch (kind) {
+                    case ELEMENT_TAG:
+                        recordingChar = value.equals("e");
+                        break;
+
+                    case ATTRIBUTE_TAG:
+                        if (!recordingChar) break;
+                        lastTag = value;
+                        break;
+
+                    case ATTRIBUTE_VALUE:
+                        if (!recordingChar) break;
+                        data.put(lastTag, value);
+                        break;
+
+                    case END_ELEMENT:
+                    case END_ELEMENT_SLASH:
+                        if (!recordingChar) break;
+                        recordingChar = false;
+
+                        // get data
+
+                        String ch = (String)data.get("c");
+                        ch = fixHack(ch);
+                        String name = (String)data.get("n");
+                        if (name == null) name = "<computed>";
+                        String props = (String)data.get("xs");
+                        if (props == null) props = "\u00A0";
+                        String gc = (String)data.get("gc");
+                        if (gc == null) gc = "Lo";
+
+                        // split tables
+                        int code = UTF32.char32At(ch, 0);
+                        if ((topByte & ~0x1F) != (code & ~0x1F)) {
+                            log.println("</table><br>");
+                            log.println(tableHead);
+                            topByte = code;
+                            if ((printByte & ~0xFF) != (code & ~0xFF)) {
+                                System.out.println("Printing table for " + XMLParse.hex(topByte,2));
+                                printByte = code;
+                            }
+                        }
+
+                        // draw line
+
+                        log.println("<tr><td>" + XMLParse.hex(code,4) +
+                            "</td><td>" + XMLParse.quote(ch,NORMAL_QUOTE) +
+                            "</td><td>" + XMLParse.quote(gc,NORMAL_QUOTE) +
+                            "</td><td>" + XMLParse.quote(props,NORMAL_QUOTE) +
+                            "</td><td>" + XMLParse.quote(name,NORMAL_QUOTE) + "</td></tr>");
+
+                        // clear storage
+                        data.clear();
+                        break;
+
+                }
+            }
+            log.println("</table></body></html>");
+        } finally {
+            log.close();
+        }
+    }
+
+    static void test3() throws Exception {
+        PrintWriter log = new PrintWriter(new BufferedWriter(
+            new OutputStreamWriter(
+                new FileOutputStream(BASE_DIR + "CaseFoldingDraft3.txt"),
+                "UTF8"),
+            32*1024));
+
+        try {
+            collect(log, "Other_Math");
+            collect (log, "Other_Alphabetic");
+            collect (log, "Other_Composite");
+            //int fieldCount = 4;
+            //int width = 100/fieldCount;
+            //int first = width + 100 - width*fieldCount;
+        } finally {
+            log.close();
+        }
+    }
+
+        static final void collect(PrintWriter log, String prop)  throws Exception {
+            XMLParse xml = new XMLParse(BASE_DIR + "UCD-Main.xml", new char[1000]);
+            //boolean recordingChar = false;
+            //int topByte = 0;
+            //int printByte = 0;
+            //Map data = new TreeMap();
+            String lastTag = "";
+            String lastChar = "";
+            String lastName = "";
+            String lastCat = "";
+            int startChar = -1;
+            int endChar = -2;
+            String startName = "";
+            String startCat = "";
+
+            for (int line = 0; ; ++line) {
+                if ((line % 10000) == 0) System.err.println("Item " + line);
+                byte kind = xml.next();
+                if (kind == DONE) break;
+                String value = xml.getValue();
+                switch (kind) {
+                    case ATTRIBUTE_TAG:
+                        lastTag = value;
+                        break;
+
+                    case ATTRIBUTE_VALUE:
+                        if (lastTag.equals("c")) lastChar = value;
+                        else if (lastTag.equals("n")) lastName = value;
+                        else if (lastTag.equals("gc")) lastCat = value;
+                        else if (lastTag.equals("xs") && value.indexOf(prop) >= 0) {
+                            lastChar = fixHack(lastChar);
+                            int ch = UTF32.char32At(lastChar,0);
+                            if (ch == endChar + 1) endChar = ch;
+                            else {
+                                //FDD0; FDEF; Noncharacter_Code_Point; # XX;    32;
+                                if (endChar >= 0) log.println(Utility.hex(startChar, 4) + "; "
+                                    + (endChar == startChar ? "    " : Utility.hex(endChar, 4))
+                                    + "; " + prop
+                                    + "; # " + startCat
+                                    + "; " + (endChar-startChar+1)
+                                    + "; " + startName
+                                    + (endChar == startChar ? "" : "..."));
+                                startChar = endChar = ch;
+                                startName = lastName;
+                                startCat = lastCat;
+                            }
+                        }
+                        break;
+                }
+            }
+            if (endChar >= 0) log.println(Utility.hex(startChar, 4) + "; "
+                                    + (endChar == startChar ? "    " : Utility.hex(endChar, 4))
+                                    + "; " + prop
+                                    + "; # " + startCat
+                                    + "; " + (endChar-startChar+1)
+                                    + "; " + startName
+                                    + (endChar == startChar ? "" : "..."));
+        }
+
+    static void test4() throws Exception {
+        PrintWriter log = new PrintWriter(new BufferedWriter(
+            new OutputStreamWriter(
+                new FileOutputStream(BASE_DIR + "CaseFoldingDraft3.txt"),
+                "UTF8"),
+            32*1024));
+
+        //int fieldCount = 4;
+        //int width = 100/fieldCount;
+        //int first = width + 100 - width*fieldCount;
+        try {
+            XMLParse xml = new XMLParse(BASE_DIR + "UCD-Main.xml", new char[1000]);
+            boolean recordingChar = false;
+            //int topByte = 0;
+            //int printByte = 0;
+            Map data = new TreeMap();
+            String lastTag = "";
+
+            for (int line = 0; ; ++line) {
+                if ((line % 10000) == 0) System.err.println("Item " + line);
+                byte kind = xml.next();
+                if (kind == DONE) break;
+                String value = xml.getValue();
+                switch (kind) {
+                    case ELEMENT_TAG:
+                        recordingChar = value.equals("e");
+                        break;
+
+                    case ATTRIBUTE_TAG:
+                        if (!recordingChar) break;
+                        lastTag = value;
+                        break;
+
+                    case ATTRIBUTE_VALUE:
+                        if (!recordingChar) break;
+                        data.put(lastTag, value);
+                        break;
+
+                    case END_ELEMENT:
+                    case END_ELEMENT_SLASH:
+                        if (!recordingChar) break;
+                        recordingChar = false;
+
+                        // get data
+
+                        String ch = (String)data.get("c");
+                        ch = fixHack(ch);
+
+                        String name = (String)data.get("n");
+                        if (name == null) name = "<computed>";
+
+                        String lc = (String)data.get("lc");
+                        if (lc == null) lc = ch;
+
+                        String fc = (String)data.get("fc");
+                        if (fc == null) fc = (String)data.get("sl");
+                        if (fc == null) fc = lc;
+
+                        if (fc.equals(ch)) continue;
+
+                        if (fc.length() == 1) {
+                            log.println(Utility.hex(ch, " ") + "; C; " + Utility.hex(fc, " ") + "; # " + name);
+                        } else {
+                            log.println(Utility.hex(ch, " ") + "; F; " + Utility.hex(fc, " ") + "; # " + name);
+                            if (!lc.equals(ch)) {
+                                log.println(Utility.hex(ch, " ") + "; S; " + Utility.hex(lc, " ") + "; # " + name);
+                            }
+                        }
+
+                        // clear storage
+                        data.clear();
+                        break;
+
+                }
+            }
+        } finally {
+            log.close();
+        }
+    }
+
+    static final String fixHack(String s) {
+        StringBuffer result = new StringBuffer();
+        char last = '\u0000';
+        int position = -1;
+        for (int i = 0; i < s.length(); ++i) {
+            char c = s.charAt(i);
+            if (position > 0) {
+                if (c == ';') {
+                    int x = Integer.parseInt(s.substring(position,i),16);
+                    result.append(UTF32.valueOf32(x));
+                    position = -1;
+                }
+            } else {
+                if (last == '#' && c == 'x') {
+                    result.setLength(result.length()-1); // remove '#'
+                    position = i+1;
+                } else {
+                    result.append(c);
+                }
+            }
+            last = c;
+        }
+        if (result != null) return result.toString();
+        return s;
+    }
+}