ICU-4700 Misc tools

X-SVN-Rev: 18741
2025-04-14 17:24:01 +00:00 · 2005-11-01 00:10:54 +00:00 · 2005-11-01 00:10:54 +00:00 · b120a3251b
commit b120a3251b
parent ddcee69efa
14 changed files with 853 additions and 196 deletions
--- a/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/ConvertUCD.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ConvertUCD.java,v $
-* $Date: 2004/11/12 23:17:15 $
-* $Revision: 1.16 $
+* $Date: 2005/11/01 00:10:53 $
+* $Revision: 1.17 $
 *
 *******************************************************************************
 */
@ -396,142 +396,145 @@ public final class ConvertUCD implements UCD_Types {
        try {
    	    String[] parts = new String[20];
            for (int lineNumber = 1; ; ++lineNumber) {
-                line = input.readLine();
-			    if (line == null) break;
-			    if (SHOW && (lineNumber % 500) == 0) System.out.println("//" + lineNumber + ": '" + line + "'");
+                try {
+					line = input.readLine();
+					if (line == null) break;
+					if (SHOW && (lineNumber % 500) == 0) System.out.println("//" + lineNumber + ": '" + line + "'");

-                String original = line;
-			    String comment = "";
-			    int commentPos = line.indexOf('#');
-			    if (commentPos >= 0) {
-			        comment = line.substring(commentPos+1).trim();
-			        line = line.substring(0, commentPos);
-			    }
-			    line = line.trim();
-			    if (line.length() == 0) continue;
+					String original = line;
+					String comment = "";
+					int commentPos = line.indexOf('#');
+					if (commentPos >= 0) {
+					    comment = line.substring(commentPos+1).trim();
+					    line = line.substring(0, commentPos);
+					}
+					line = line.trim();
+					if (line.length() == 0) continue;

-                int count = Utility.split(line,';',parts);
+					int count = Utility.split(line,';',parts);

-                if (false && parts[0].equals("2801")) {
-                    System.out.println("debug?");
-                }
+					if (false && parts[0].equals("2801")) {
+					    System.out.println("debug?");
+					}

-                // fix malformed or simple lists.
+					// fix malformed or simple lists.

-                if (count != labels.length) {
-                    if (count == labels.length + 1 && parts[count-1].equals("")) {
-                        if (!showedSemi) System.out.println("Extra semicolon in: " + original);
-                        showedSemi = true;
-                    } else if (count == 1) { // fix simple list
-                        ++count;
-                        parts[1] = "Y";
-                    } else if (count < labels.length) {
-                        if (!showedShort) System.out.println("Line shorter than labels: " + original);
-                        showedShort = true;
-                        for (int i = count; i < labels.length; ++i) {
-                            parts[i] = "";
-                        }
-                    } else {
-                        throw new ChainException("wrong count: {0}",
-                            new Object[] {new Integer(line), new Integer(count)});
-                    }
-                }
+					if (count != labels.length) {
+					    if (count == labels.length + 1 && parts[count-1].equals("")) {
+					        if (!showedSemi) System.out.println("Extra semicolon in: " + original);
+					        showedSemi = true;
+					    } else if (count == 1) { // fix simple list
+					        ++count;
+					        parts[1] = "Y";
+					    } else if (count < labels.length) {
+					        if (!showedShort) System.out.println("Line shorter than labels: " + original);
+					        showedShort = true;
+					        for (int i = count; i < labels.length; ++i) {
+					            parts[i] = "";
+					        }
+					    } else {
+					        throw new ChainException("wrong count: {0}",
+					            new Object[] {new Integer(line), new Integer(count)});
+					    }
+					}

-                // store char
-                 // first field is always character OR range. May be UTF-32
-                int cpTop;
-                int cpStart;
-                int ddot = parts[0].indexOf(".");
-                if (ddot >= 0) {
-                    cpStart = UTF32.char32At(Utility.fromHex(parts[0].substring(0,ddot)),0);
-                    cpTop = UTF32.char32At(Utility.fromHex(parts[0].substring(ddot+2)),0);
-                    // System.out.println(Utility.hex(cpStart) + " ... " + Utility.hex(cpTop));
-                } else {
-                    cpStart = UTF32.char32At(Utility.fromHex(parts[0]),0);
-                    cpTop = cpStart;
-                    if (labels[1].equals("RANGE")) UTF32.char32At(Utility.fromHex(parts[1]),0);
-                }
+					// store char
+					 // first field is always character OR range. May be UTF-32
+					int cpTop;
+					int cpStart;
+					int ddot = parts[0].indexOf(".");
+					if (ddot >= 0) {
+					    cpStart = UTF32.char32At(Utility.fromHex(parts[0].substring(0,ddot)),0);
+					    cpTop = UTF32.char32At(Utility.fromHex(parts[0].substring(ddot+2)),0);
+					    // System.out.println(Utility.hex(cpStart) + " ... " + Utility.hex(cpTop));
+					} else {
+					    cpStart = UTF32.char32At(Utility.fromHex(parts[0]),0);
+					    cpTop = cpStart;
+					    if (labels[1].equals("RANGE")) UTF32.char32At(Utility.fromHex(parts[1]),0);
+					}

+					// properties first
+					if (labels[1].equals("PROP")) {
+					    String prop = parts[2].trim();
+					    // FIX!!
+					    boolean skipLetters = false;
+					    if (prop.equals("Alphabetic")) {
+					        prop = "Other_Alphabetic";
+					        skipLetters = true;
+					    }
+					    // END FIX!!
+					    properties.add(prop);
+					    if (Utility.find(prop, UCD_Names.DeletedProperties, true) == -1) { // only undeleted
+					        int end = UTF32.char32At(Utility.fromHex(parts[1]),0);
+					        if (end == 0) end = cpStart;

+					        for (int j = cpStart; j <= end; ++j) {
+					            if (j != UCD.mapToRepresentative(j, Integer.MAX_VALUE)) continue;
+					            if (skipLetters && getEntry(cpStart).isLetter()) continue;
+					            appendCharProperties(j, prop);
+					        }
+					    }
+					} else { // not range!
+					    String val = "";
+					    String lastVal;

-                // properties first
-                if (labels[1].equals("PROP")) {
-                    String prop = parts[2].trim();
-                    // FIX!!
-                    boolean skipLetters = false;
-                    if (prop.equals("Alphabetic")) {
-                        prop = "Other_Alphabetic";
-                        skipLetters = true;
-                    }
-                    // END FIX!!
-                    properties.add(prop);
-                    if (Utility.find(prop, UCD_Names.DeletedProperties, true) == -1) { // only undeleted
-                        int end = UTF32.char32At(Utility.fromHex(parts[1]),0);
-                        if (end == 0) end = cpStart;
+					    for (int i = 1; i < labels.length; ++i) {
+					        String key = labels[i];
+					        lastVal = val;
+					        if (isHex.get(key) != null) {
+					            val = Utility.fromHex(parts[i]);
+					        } else {
+					            val = parts[i].trim();
+					        }
+					        if (key.equals("OMIT")) continue; // do after val, so lastVal is correct
+					        if (key.equals("RANGE")) continue; // do after val, so lastVal is correct
+					        if (val.equals("")) continue; // skip empty values, they mean default

-                        for (int j = cpStart; j <= end; ++j) {
-                            if (j != UCD.mapToRepresentative(j, Integer.MAX_VALUE)) continue;
-                            if (skipLetters && getEntry(cpStart).isLetter()) continue;
-                            appendCharProperties(j, prop);
-                        }
-                    }
-                } else { // not range!
-                    String val = "";
-                    String lastVal;
+					        for (int cps = cpStart; cps <= cpTop; ++cps) {
+					            if (UCD.mapToRepresentative(cps, Integer.MAX_VALUE) != cps) continue;    // skip condensed ranges

-                    for (int i = 1; i < labels.length; ++i) {
-                        String key = labels[i];
-                        lastVal = val;
-                        if (isHex.get(key) != null) {
-                            val = Utility.fromHex(parts[i]);
-                        } else {
-                            val = parts[i].trim();
-                        }
-                        if (key.equals("OMIT")) continue; // do after val, so lastVal is correct
-                        if (key.equals("RANGE")) continue; // do after val, so lastVal is correct
-                        if (val.equals("")) continue; // skip empty values, they mean default
-
-                        for (int cps = cpStart; cps <= cpTop; ++cps) {
-                            if (UCD.mapToRepresentative(cps, Integer.MAX_VALUE) != cps) continue;    // skip condensed ranges
-
-                            if (key.equals("binary")) {
-                                appendCharProperties(cps, val);
-                            } else if (key.equals("fc")) {
-                                UData data = getEntry(cps);
-                                String type = parts[i-1].trim();
-                                if (type.equals("F") || type.equals("C") || type.equals("E") || type.equals("L")) {
-                                    data.fullCaseFolding = val;
-                                    //System.out.println("*<" + parts[i-1] + "> Setting " + Utility.hex(cps) + ": " + Utility.hex(val));
-                                }
-                                if (type.equals("S") || type.equals("C") || type.equals("L")) {
-                                    data.simpleCaseFolding = val;
-                                    //System.out.println("<" + parts[i-1] + "> Setting " + Utility.hex(cps) + ": " + Utility.hex(val));
-                                }
-                                if (type.equals("I")) {
-                                    data.simpleCaseFolding = val;
-                                    setBinaryProperty(cps, CaseFoldTurkishI);
-                                    if (DEBUG) System.out.println("SPOT-CHECK: <" + parts[i-1] + "> Setting " 
-                                    	+ Utility.hex(cps) + ": " + Utility.hex(val));
-                                }
-                            } else if (labels[0].equals("SpecialCasing")   // special handling for special casing
-                            			&& labels[4].equals("sc")
-                                		&& parts[4].trim().length() > 0) {
-                                if (i < 4) {
-                                	if (DEBUG) System.out.println("Got special: " + Utility.hex(cps) + ", " 
-                                		+ Utility.hex(key) + ":" + Utility.hex(val));
-                                	addCharData(cps, "sc", parts[4].trim() + ":" + key + ":" + val);
-                                }
-                            } else {
-                                /*if (key.equals("sn")) { // SKIP UNDEFINED!!
-                                    UData data = getEntryIfExists(cps);
-                                    if (data == null || data.generalCategory == Cn) continue;
-                                }
-                                */
-                                addCharData(cps, key, val);
-                            }
-                        }
-                    }
-                }
+					            if (key.equals("binary")) {
+					                appendCharProperties(cps, val);
+					            } else if (key.equals("fc")) {
+					                UData data = getEntry(cps);
+					                String type = parts[i-1].trim();
+					                if (type.equals("F") || type.equals("C") || type.equals("E") || type.equals("L")) {
+					                    data.fullCaseFolding = val;
+					                    //System.out.println("*<" + parts[i-1] + "> Setting " + Utility.hex(cps) + ": " + Utility.hex(val));
+					                }
+					                if (type.equals("S") || type.equals("C") || type.equals("L")) {
+					                    data.simpleCaseFolding = val;
+					                    //System.out.println("<" + parts[i-1] + "> Setting " + Utility.hex(cps) + ": " + Utility.hex(val));
+					                }
+					                if (type.equals("I")) {
+					                    data.simpleCaseFolding = val;
+					                    setBinaryProperty(cps, CaseFoldTurkishI);
+					                    if (DEBUG) System.out.println("SPOT-CHECK: <" + parts[i-1] + "> Setting " 
+					                    	+ Utility.hex(cps) + ": " + Utility.hex(val));
+					                }
+					            } else if (labels[0].equals("SpecialCasing")   // special handling for special casing
+					            			&& labels[4].equals("sc")
+					                		&& parts[4].trim().length() > 0) {
+					                if (i < 4) {
+					                	if (DEBUG) System.out.println("Got special: " + Utility.hex(cps) + ", " 
+					                		+ Utility.hex(key) + ":" + Utility.hex(val));
+					                	addCharData(cps, "sc", parts[4].trim() + ":" + key + ":" + val);
+					                }
+					            } else {
+					                /*if (key.equals("sn")) { // SKIP UNDEFINED!!
+					                    UData data = getEntryIfExists(cps);
+					                    if (data == null || data.generalCategory == Cn) continue;
+					                }
+					                */
+					                addCharData(cps, key, val);
+					            }
+					        }
+					    }
+					}
+				} catch (Exception e) {
+		            System.err.println("*Exception at: " + line + ", " + e.getMessage());
+					//System.err.println(e.getMessage());
+				}
            }
        } catch (Exception e) {
            System.out.println("Exception at: " + line + ", " + e.getMessage());
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateConfusables.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateConfusables.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateConfusables.java,v $
-* $Date: 2005/07/19 17:21:00 $
-* $Revision: 1.7 $
+* $Date: 2005/11/01 00:10:53 $
+* $Revision: 1.8 $
 *
 *******************************************************************************
 */
@ -290,7 +290,7 @@ public class GenerateConfusables {
 			lowerIsBetter.putAll(remainingOutputSet, MARK_ASCII);
 			lowerIsBetter.setMissing(MARK_NOT_NFC);
 			
-			lowerIsBetter.lock();
+			lowerIsBetter.freeze();
 			// add special values:
 			//lowerIsBetter.putAll(new UnicodeSet("["), new Integer(0));
 			
@ -321,11 +321,11 @@ public class GenerateConfusables {
 					PROHIBITED + NOT_IN_XID);
 			removals2.setMissing("future?");
 			
-			additions.lock();
-			remap.lock();
-			removals.lock();
-			reviews.lock();
-			removals2.lock();
+			additions.freeze();
+			remap.freeze();
+			removals.freeze();
+			reviews.freeze();
+			removals2.freeze();
 		}

 		/**
@ -431,8 +431,8 @@ public class GenerateConfusables {
 			//reviews.putAll(UNASSIGNED, "");
 			out.print("\uFEFF");
 			out.println("# Review List for IDN");
-			out.println("# $Revision: 1.7 $");
-			out.println("# $Date: 2005/07/19 17:21:00 $");
+			out.println("# $Revision: 1.8 $");
+			out.println("# $Date: 2005/11/01 00:10:53 $");
 			out.println("");

 			UnicodeSet fullSet = reviews.getSet("").complement();
@ -487,8 +487,8 @@ public class GenerateConfusables {
 			PrintWriter out = BagFormatter.openUTF8Writer(outdir, "idnchars.txt");

 			out.println("# Recommended Identifier Profiles for IDN");
-			out.println("# $Revision: 1.7 $");
-			out.println("# $Date: 2005/07/19 17:21:00 $");
+			out.println("# $Revision: 1.8 $");
+			out.println("# $Date: 2005/11/01 00:10:53 $");

 			out.println("");
 			out.println("# Output Characters");
@ -557,8 +557,8 @@ public class GenerateConfusables {
 					"xidmodifications.txt");

 			out.println("# Security Profile for General Identifiers");
-			out.println("# $Revision: 1.7 $");
-			out.println("# $Date: 2005/07/19 17:21:00 $");
+			out.println("# $Revision: 1.8 $");
+			out.println("# $Date: 2005/11/01 00:10:53 $");
 			out.println("");

 			out.println("# Characters restricted");
@ -614,8 +614,8 @@ public class GenerateConfusables {
 			//someRemovals = removals;
 			out = BagFormatter.openUTF8Writer(outdir, "draft-restrictions.txt");
 			out.println("# Characters restricted in domain names");
-			out.println("# $Revision: 1.7 $");
-			out.println("# $Date: 2005/07/19 17:21:00 $");
+			out.println("# $Revision: 1.8 $");
+			out.println("# $Date: 2005/11/01 00:10:53 $");
 			out.println("#");
 			out.println("# This file contains a draft list of characters for use in");
 			out.println("#     UTR #36: Unicode Security Considerations");
@ -1149,8 +1149,8 @@ public class GenerateConfusables {
 		public void writeSource(String directory, String filename) throws IOException {
 			PrintWriter out = BagFormatter.openUTF8Writer(directory, filename);
 			out.println("# Source File for IDN Confusables");
-			out.println("# $Revision: 1.7 $");
-			out.println("# $Date: 2005/07/19 17:21:00 $");
+			out.println("# $Revision: 1.8 $");
+			out.println("# $Date: 2005/11/01 00:10:53 $");
 			out.println("");
 			dataMixedAnycase.writeSource(out);
 			out.close();
@ -1160,8 +1160,8 @@ public class GenerateConfusables {
 			PrintWriter out = BagFormatter.openUTF8Writer(directory, filename);
 			out.print('\uFEFF');
 			out.println("# Recommended confusable mapping for IDN");
-			out.println("# $Revision: 1.7 $");
-			out.println("# $Date: 2005/07/19 17:21:00 $");
+			out.println("# $Revision: 1.8 $");
+			out.println("# $Date: 2005/11/01 00:10:53 $");
 			out.println("");

 			if (appendFile) {
@ -1369,8 +1369,8 @@ public class GenerateConfusables {
 			UnicodeSet representable = new UnicodeSet();
 			out.print('\uFEFF');
 			out.println("# Summary: Recommended confusable mapping for IDN");
-			out.println("# $Revision: 1.7 $");
-			out.println("# $Date: 2005/07/19 17:21:00 $");
+			out.println("# $Revision: 1.8 $");
+			out.println("# $Date: 2005/11/01 00:10:53 $");
 			out.println("");
 			MyEquivalenceClass data = dataMixedAnycase;
 			Set items = data.getOrderedExplicitItems();
@ -1494,8 +1494,8 @@ public class GenerateConfusables {
 			PrintWriter out = BagFormatter.openUTF8Writer(outdir, filename);
 			out.print('\uFEFF');
 			out.println("# Summary: Whole-Script Confusables");
-			out.println("# $Revision: 1.7 $");
-			out.println("# $Date: 2005/07/19 17:21:00 $");
+			out.println("# $Revision: 1.8 $");
+			out.println("# $Date: 2005/11/01 00:10:53 $");
 			out.println("# This data is used for determining whether a strings is a");
 			out.println("# whole-script or mixed-script confusable.");
 			out.println("# The mappings here ignore common and inherited script characters,");
--- a/tools/unicodetools/com/ibm/text/UCD/GenerateStringPrep.java
+++ b/tools/unicodetools/com/ibm/text/UCD/GenerateStringPrep.java
@ -206,7 +206,7 @@ class GenerateStringPrep implements UCD_Types {
 				return a + "\t" + b;
 			}
 		};
-		UnicodeMap sb = ((UnicodeMap)scripts.clone()).composeWith(blocks, myCompose);
+		UnicodeMap sb = ((UnicodeMap)scripts.cloneAsThawed()).composeWith(blocks, myCompose);
 		for (Iterator it = sb.getAvailableValues(new TreeSet()).iterator(); it.hasNext();) {
 			System.out.println(it.next());
 		}
--- a/tools/unicodetools/com/ibm/text/UCD/MakeNamesChart.java
+++ b/tools/unicodetools/com/ibm/text/UCD/MakeNamesChart.java
@ -0,0 +1,501 @@
+package com.ibm.text.UCD;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import com.ibm.icu.dev.test.util.BagFormatter;
+import com.ibm.icu.dev.test.util.UnicodeMap;
+import com.ibm.icu.dev.test.util.UnicodePropertySource;
+import com.ibm.icu.text.Collator;
+import com.ibm.icu.text.Replaceable;
+import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.text.UnicodeSetIterator;
+import com.ibm.icu.util.ULocale;
+import com.ibm.text.utility.Utility;
+import com.ibm.text.utility.Utility.Encoding;
+
+public class MakeNamesChart {
+	
+	static int lastCodePoint = -1;
+	static boolean lastCodePointIsOld = false;
+	static int lastDecompType = UCD.NONE;
+	
+	static final String chartPrefix = "c_";
+	static final String namePrefix = "n_";
+	
+	static UnicodeSet skipChars;// = new UnicodeSet("[[:gc=cn:]-[:noncharactercodepoint:]]");
+	static UnicodeSet rtl;// = new UnicodeSet("[[:bidiclass=r:][:bidiclass=al:]]");
+	static UnicodeSet usePicture;// = new UnicodeSet("[[:whitespace:][:defaultignorablecodepoint:]]");
+	
+	static UCD ucd41;
+
+	public static void main(String[] args) throws Exception {
+		//ConvertUCD.main(new String[]{"5.0.0"});
+		BlockInfo blockInfo = new BlockInfo("5.0.0", "NamesList.txt");
+		// http://www.unicode.org/~book/incoming/kenfiles/U50M051010.lst
+		Default.setUCD("5.0.0");
+		ucd41 = UCD.make("4.1.0");
+		ToolUnicodePropertySource up = ToolUnicodePropertySource.make("5.0.0");
+		skipChars = new UnicodeSet(up.getSet("gc=cn")).removeAll(up.getSet("gc=cn"));
+		//"[[:gc=cn:]-[:noncharactercodepoint:]]");
+		rtl = new UnicodeSet(up.getSet("bidiclass=r")).addAll(up.getSet("bidiclass=al"));// "[[:bidiclass=r:][:bidiclass=al:]]");
+		usePicture = new UnicodeSet(up.getSet("whitespace=true")).addAll(up.getSet("defaultignorablecodepoint=true"));// new UnicodeSet("[[:whitespace:][:defaultignorablecodepoint:]]");
+
+		List nameList = new ArrayList();
+		ArrayList lines = new ArrayList();
+		UnicodeSet collectedCodePoints = new UnicodeSet();
+		BitSet nameListNew = new BitSet();
+
+		int limit = Integer.MAX_VALUE;
+		for (int count = 0; count < limit; ++count) {
+			if (!blockInfo.next(lines)) break;
+			String firstLine = (String)lines.get(0);
+			if (firstLine.startsWith("@@@")) continue;
+			String[] lineParts = firstLine.split("\t");
+			String fileName = lineParts[1] + ".html";
+			nameList.add(firstLine);
+			System.out.println();
+			System.out.println("file: " + chartPrefix + fileName);
+			PrintWriter out = BagFormatter.openUTF8Writer("C:/DATA/GEN/charts/namelist/", chartPrefix + fileName);
+			out.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'><title>" +
+					BagFormatter.toHTML.transliterate(getHeading(lineParts[2])) +
+					"</title><link rel='stylesheet' type='text/css' href='namelist.css'>" +
+					"<base target='names'></head><body>");
+
+			// header
+			out.println("<table class='headerTable'><tr><td class='headerLeft'>" +
+					lineParts[1] + 
+					" <a href='help.html'>help</a></td><td class='headerCenter'>" +				
+					getHeading(lineParts[2]) +
+					"</td><td class='headerRight'><a href='mainList.html'>index</a> " +
+					lineParts[3] +
+					"</td></tr></table>");
+
+			if ("Unassigned".equals(lineParts[2])) {
+				System.out.println("debug");
+			}
+			// first pass through and collect all the code points
+			collectedCodePoints.clear();
+			for (int i = 1; i < lines.size(); ++i) {
+				String line = (String)lines.get(i);
+				int cp1 = line.charAt(0);
+				if (cp1 != '@' && cp1 != '\t') {
+					int cp = Integer.parseInt(line.split("\t")[0],16);
+					collectedCodePoints.add(cp);
+				}
+			}
+			collectedCodePoints.removeAll(skipChars);
+			if (collectedCodePoints.size() == 0) {
+				out.println("<p align='center'>No Names List</p>");
+			} else {
+				out.println("<div align='center'><table class='chart'><tr>");
+				int counter = 0;
+				for (UnicodeSetIterator it = new UnicodeSetIterator(collectedCodePoints); it.next();) {
+					if ((counter % 16) == 0 && counter != 0) {
+						out.println("</tr><tr>");
+					}
+					String tdclass = "cell";
+					if (counter < 16) tdclass = "cellw";
+					if (it.codepoint == 0x242) {
+						System.out.println("debug");
+					}
+					boolean isNew = isNew(it.codepoint);
+					if (isNew) tdclass += "new";
+					String hexcp = Utility.hex(it.codepoint, 4);
+					String title = "";
+					String name = Default.ucd().getName(it.codepoint);
+					if (name != null) title = " title='" + BagFormatter.toHTML.transliterate(name.toLowerCase()) + "'";
+					out.println("<td class='" + tdclass + "'"
+							+ title
+							+ ">\u00A0"
+							+ showChar(it.codepoint) + "\u00A0<br><tt><a href='" + namePrefix + fileName + "#"+ hexcp + "'>" + 
+							hexcp + "</a></tt></td>");
+					counter++;
+				}
+				if (counter > 16) {
+					counter &= 0xF;
+					if (counter != 0) for (; counter < 16; ++counter) out.println("<td class='cell'>\u00A0</td>");
+					out.println("</tr></table></div>");
+				}
+			}
+			out.close();
+			out = BagFormatter.openUTF8Writer("C:/DATA/GEN/charts/namelist/", namePrefix + fileName);
+			out.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>" +
+					"<link rel='stylesheet' type='text/css' href='namelist.css'></head><body>");
+
+			// now do the characters
+			boolean inTable = false;
+			for (int i = 1; i < lines.size(); ++i) {
+				String line = (String)lines.get(i);
+				try {
+					if (line.startsWith("@")) {
+						finishItem(out);
+						if (inTable) {
+							out.println("</table>");
+							inTable = false;
+						}
+						if (line.startsWith("@+")) {
+							line = line.substring(2).trim();
+							out.println("<p class='comment'>"
+									+ line
+									+ "</p>");
+						} else {
+							line = line.substring(1).trim();
+							out.println("<h2>"
+									+ line
+									+ "</h2>");
+						}
+					} else {
+						if (!inTable) {
+							out.println("<table>");
+							inTable = true;
+						}
+						//String line2 = lineParts[1];
+						if (line.startsWith("\t")) {
+							String body = line.trim();
+							if (false && line.indexOf(body) != 1) {
+								System.out.println("Format error: too much inital whitespace: <" + line + ">");
+							}
+							char firstChar = body.charAt(0);
+							switch (firstChar) {
+							case '*': body = "\u2022 " + body.substring(2); break;
+							case ':': body = checkCanonical(lastCodePoint, body); break;
+							case '#': body = checkCompatibility(lastCodePoint, body); break;
+							case 'x': body = getOther(body); break;
+							case '=': break;
+							default: throw new IllegalArgumentException("Huh? " + body);
+							}  
+							out.println("<tr><td>\u00A0</td><td>\u00A0</td><td>"
+									+ maybeNameStyle(showTextConvertingHex(body, firstChar != '='), firstChar == '=')
+									+ "</td></tr>");
+						} else {
+							finishItem(out);
+							lineParts = line.split("\t");
+							String x = lineParts[0];
+							lastCodePoint = Integer.parseInt(x,16);
+							boolean lastCodePointIsNew = isNew(lastCodePoint);
+							if (lastCodePointIsNew) nameListNew.set(nameList.size()-1, true);
+							out.println("<tr><td" 
+									+ (lastCodePointIsNew ? " class='new'" : "")
+									+ "><code><a name='" + x + "'>" + x + "</a></code></td><td>\u00A0"
+									+ showChar(lastCodePoint) + "\u00A0</td><td"
+									+ (lastCodePointIsNew ? " class='new'" : "") + ">"
+									+ nameStyle(showTextConvertingHex(lineParts[1], false)) + "</td></tr>");
+							lastDecompType = Default.ucd().getDecompositionType(lastCodePoint);
+						}
+					}
+				} catch (Exception e) {
+					throw (IllegalArgumentException) new IllegalArgumentException("Error on line: " + line)
+					.initCause(e);
+				}
+			}
+			finishItem(out);
+			out.close();
+		}
+		blockInfo.in.close();
+		PrintWriter out = BagFormatter.openUTF8Writer("C:/DATA/GEN/charts/namelist/", "mainList.html");
+		out.println("<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8'>" +
+				"<title>Main List</title><link rel='stylesheet' type='text/css' href='namelist.css'>" +
+				"<base target='chart'></head><body><table>");
+		for (int i = 0; i < nameList.size(); ++i) {
+			String line = (String) nameList.get(i);
+			String[] lineParts = line.split("\t");
+			String fileName = lineParts[1] + ".html";
+			out.println("<tr><td><code>" + lineParts[1] +
+					"</code></td><td"
+					+ (nameListNew.get(i) ? " class='new'" : "") 
+					+ "><a href='" + chartPrefix + fileName + "'>" + getHeading(lineParts[2]) + "</a></td><td><code>" +
+					lineParts[3] +"</code></td></tr>");
+		}
+		out.println("</table></body></html>");
+		out.close();
+		BagFormatter bf = new BagFormatter();
+		//System.out.println(bf.showSetDifferences("Has name in decomps", hasName, "Has no name in decomps", hasNoName));
+		System.out.println("Name differences: Canonical");
+		showNameDifferences(hasNameCan, hasNoNameCan);
+		System.out.println("Name differences: Compatibility");
+		showNameDifferences(hasNameComp, hasNoNameComp);
+//		System.out.println("Characters with names in decomps: " + hasName.toPattern(true));
+//		System.out.println("Characters without names in decomps: " + hasNoName.toPattern(true));
+//		System.out.println("Characters sometimes with, sometimes without names in decomps: " + both.toPattern(true));
+		System.out.println("Done");
+	}
+
+	private static boolean isNew(int codepoint) {
+		return Default.ucd().isAllocated(codepoint) && !ucd41.isAllocated(codepoint);
+	}
+
+	private static void showNameDifferences(Map hasName, Map hasNoName) {
+		Set both = new TreeSet(hasNoName.keySet());
+		both.retainAll(hasName.keySet());
+		//hasNoName.removeAll(both);
+		//hasName.removeAll(both);
+		for (Iterator it = both.iterator(); it.hasNext();) {
+			String decomp = (String) it.next();
+			System.out.println();
+			System.out.println("decomp: " + Utility.hex(decomp));
+			System.out.println("Has name in: " + Utility.hex((String)hasName.get(decomp)));
+			System.out.println("Has no name in: " + Utility.hex((String)hasNoName.get(decomp)));
+		}
+		System.out.println("Count: " + both.size());
+	}
+	
+	static TestIdentifiers ti;
+	static {
+		try {
+			ti = new TestIdentifiers("L");
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+	}
+
+	private static void finishItem(PrintWriter out) {
+		if (lastCodePoint < 0) return;
+		if (lastDecompType != UCD.NONE) {
+			System.out.println("Alert: missing decomp for " + Utility.hex(lastCodePoint));
+		}
+		String str = UTF16.valueOf(lastCodePoint);
+		String upper = showForm(out, str, null, null, Default.ucd().getCase(str,UCD.FULL,UCD.UPPER), "\u2191");
+		showForm(out, str, upper, null, Default.ucd().getCase(str,UCD.FULL,UCD.TITLE), "\u2195");
+		String lower = showForm(out, str, null, null, Default.ucd().getCase(str,UCD.FULL,UCD.LOWER), "\u2193");
+		showForm(out, lower, null, null, Default.ucd().getCase(str,UCD.FULL,UCD.FOLD), "\u2194");
+		
+		String dc = Default.ucd().getDecompositionMapping(lastCodePoint);
+		String nfd = showForm(out, dc, str, null, Default.nfd().normalize(lastCodePoint), "\u21DB");
+		//String nfc = showForm(out, dc, null, Default.nfc().normalize(lastCodePoint), "\u21DB");
+		String nfkd = showForm(out, dc, str, nfd, Default.nfkd().normalize(lastCodePoint), "\u21DD");
+		
+		if (nfkd.equals(str)) {
+			Set s = ti.getConfusables(lastCodePoint, "MA");
+			if (s.size() > 1) {
+				sortedSet.clear();
+				for (Iterator it = s.iterator(); it.hasNext();) {
+					sortedSet.add(Default.nfkd().normalize((String)it.next()));
+				}
+				sortedSet.remove(nfkd); // remove me
+				for (Iterator it = sortedSet.iterator(); it.hasNext();) {
+					String other = (String)it.next();
+					if (nfkd.equals(Default.nfkd().normalize(other))) continue;
+					out.println("<tr><td>\u00A0</td><td>\u00A0</td><td class='conf'>\u279F\u00A0"
+							+ showTextConvertingHex(Utility.hex(other, 4, " + "), true)
+							+ " "
+							+ Default.ucd().getName(other, UCD.NORMAL, " + ").toLowerCase()
+							// maybeNameStyle(showTextConvertingHex(upper, firstChar != '='), firstChar == '=')
+							+ "</td></tr>");
+				}
+			}
+		}
+		lastCodePoint = -1;
+	}
+	
+	static Set sortedSet = new TreeSet(Collator.getInstance(ULocale.ENGLISH));
+
+	private static String showForm(PrintWriter out, String str, String str2, String str3, String transformed, String symbol) {
+		if (!transformed.equals(str) && !transformed.equals(str2) && !transformed.equals(str3)) {
+			out.println("<tr><td>\u00A0</td><td>\u00A0</td><td class='c'>" + symbol + "\u00A0"
+				+ showTextConvertingHex(Utility.hex(transformed, 4, " + "), true)
+				+ (UTF16.countCodePoint(transformed) != 1 ? "" : 
+					" " + Default.ucd().getName(transformed, UCD.NORMAL, " + ").toLowerCase())
+				// maybeNameStyle(showTextConvertingHex(upper, firstChar != '='), firstChar == '=')
+				+ "</td></tr>");
+		}
+		return transformed;
+	}
+	
+	static public String getHeading(String name) {
+		int pos = name.lastIndexOf(" (");
+		if (pos < 0) return name;
+		return name.substring(0, pos);
+	}
+	
+	private static String maybeNameStyle(String string, boolean b) {
+		if (b && string.equals(string.toUpperCase(Locale.ENGLISH))) return nameStyle(string);
+		return string;
+	}
+
+
+	private static String nameStyle(String string) {
+		// TODO Auto-generated method stub
+		String result = "<i>" + Default.ucd().getCase(string, UCD.FULL, UCD.TITLE) + "</i>";
+		// if it has any &xxx;, then restore them.
+		int position = 0;
+		while (true) {
+			if (!escapeMatch.reset(result).find(position)) break;
+			int start = escapeMatch.start();
+			position = escapeMatch.end();
+			result = result.substring(0,start) 
+			+ result.substring(start, position).toLowerCase() 
+			+ result.substring(position);
+		}
+		return result;
+	}
+
+	static Matcher escapeMatch = Pattern.compile("\\&[A-Z][a-z]*\\;").matcher("");
+	
+	private static String showTextConvertingHex(String body, boolean addCharToHex) {
+		body = BagFormatter.toHTML.transliterate(body);
+		if (addCharToHex) {
+			int position = 0;
+			while (position < body.length()) {
+				if (!findHex.reset(body).find(position)) break;
+				position = findHex.end();
+				int start = findHex.start();
+				int len = position - start;
+				if (len < 4 || len > 6) continue;
+				int cp = Integer.parseInt(findHex.group(),16);
+				if (cp > 0x10FFFF) continue;
+				String insert = "\u00A0" + showChar(cp);
+				String beginning = body.substring(0,start)
+					+ "<code>" + body.substring(start, position) + "</code>"
+					+ insert;
+				body = beginning + body.substring(position);
+				position = beginning.length();
+			}
+		}
+		return body;
+	}
+	
+	static Matcher pointer = Pattern.compile("x \\((.*) - ([0-9A-F]+)\\)").matcher("");
+	static Matcher pointer2 = Pattern.compile("x ([0-9A-F]{4,6})").matcher("");
+	static Matcher findHex = Pattern.compile("[0-9A-F]+").matcher("");
+	
+	private static String getOther(String body) {
+		// of form: 	x (hyphenation point - 2027)
+		// => arrow 2027 X hyphenation point
+		int cp;
+		String name = null;
+		if (pointer.reset(body).matches()) {
+			cp = Integer.parseInt(pointer.group(2),16);
+			name = pointer.group(1);
+			String name2 = Default.ucd().getName(cp);
+			if (name2 == null) name2 = "<not a character>";
+			if (!name.equalsIgnoreCase(name2)) {
+				System.out.println("Mismatch in name for " + body + " in " + Utility.hex(lastCodePoint));
+				System.out.println("\tName is: " + name2);
+			}
+		} else if (pointer2.reset(body).matches()) {
+			cp = Integer.parseInt(pointer2.group(1),16);
+			// name = UCharacter.getName(cp).toLowerCase();
+			// System.out.println("Irregular format: " + body);
+		} else {
+			throw new IllegalArgumentException("Bad format: " + body);
+		}
+		return "\u2192 " + Utility.hex(cp,4) /*+ " " + showChar(cp)*/ + (name != null ? " " + name : "");
+	}
+	
+	static String showChar(int cp) {
+		if (usePicture.contains(cp)) {
+			int rep = '\u2588';
+			if (cp <= 0x20) rep = 0x2400 + cp;
+			else if (cp == 0x7F) rep = 0x2421;
+			return "<span class='inv'>" + (char)rep + "</span>";
+			//String hex = Utility.hex(cp);
+			//return "<img alt='" + hex + "' src='http://www.unicode.org/cgi-bin/refglyph?24-" + hex + "'>";
+		}
+		int type = Default.ucd().getCategory(cp);
+		String result = BagFormatter.toHTML.transliterate(UTF16.valueOf(cp));
+		if (type == UCD.Me || type == UCD.Mn) {
+			result = "\u25CC" + result;
+		} else if (rtl.contains(cp)) {
+			result = "\u200E" + result + "\u200E";
+		}
+		return result;
+	}
+	
+	//static final UnicodeSet noname = new UnicodeSet("[[:ascii:][:ideographic:]]");
+	static final Map hasNoNameCan = new TreeMap();
+	static final Map hasNameCan = new TreeMap();
+	static final Map hasNoNameComp = new TreeMap();
+	static final Map hasNameComp = new TreeMap();
+
+	private static String checkCanonical(int codePoint, String body) {
+		body = body.substring(2);
+		if (lastDecompType != UCD.CANONICAL) {
+			System.out.println("Mismatching Decomposition Type: " + body + " in " + Utility.hex(codePoint));
+		}
+		String lastDecomp = Default.ucd().getDecompositionMapping(lastCodePoint);
+		String hexed = Utility.hex(lastDecomp, 4, " ");
+		String hexed2 = hexed;
+		if (UTF16.countCodePoint(lastDecomp) == 1) {
+			hexed2 += " " + Default.ucd().getName(lastDecomp).toLowerCase();
+		}
+		if (hexed.equalsIgnoreCase(body)) {
+			hasNoNameCan.put(lastDecomp, UTF16.valueOf(codePoint));
+		} else if (hexed2.equalsIgnoreCase(body)) {
+			hasNameCan.put(lastDecomp, UTF16.valueOf(codePoint));
+		} else {
+			System.out.println("Mismatching Decomposition: " + body + " in " + Utility.hex(codePoint));
+			System.out.println("\tShould be: " + hexed);
+		}
+		lastDecompType = UCD.NONE;
+		return "\u2261 " + body;
+	}
+
+	private static String checkCompatibility(int codePoint, String body) {
+		body = body.substring(2);
+		if (lastDecompType <= UCD.CANONICAL) {
+			System.out.println("Mismatching Decomposition Type: " + body + " in " + Utility.hex(codePoint));
+		}
+		String lastDecomp = Default.ucd().getDecompositionMapping(lastCodePoint);
+		String hexed = Utility.hex(lastDecomp, 4, " ");
+		if (lastDecompType != UCD.COMPAT_UNSPECIFIED) {
+			String lastDecompID = Default.ucd().getDecompositionTypeID(lastCodePoint);
+			hexed = "<" + lastDecompID + "> " + hexed;
+		}
+		String hexed2 = hexed;
+		if (UTF16.countCodePoint(lastDecomp) == 1) {
+			hexed2 += " " + Default.ucd().getName(lastDecomp).toLowerCase();
+		}
+		if (hexed.equalsIgnoreCase(body)) {
+			hasNoNameComp.put(lastDecomp, UTF16.valueOf(codePoint));
+		} else if (hexed2.equalsIgnoreCase(body)) {
+			hasNameComp.put(lastDecomp, UTF16.valueOf(codePoint));
+		} else {
+			System.out.println("Mismatching Decomposition: " + body + " in " + Utility.hex(codePoint));
+			System.out.println("\tShould be: " + hexed);
+		}
+		lastDecompType = UCD.NONE;
+		return "\u2248 " + body;
+	}
+
+	static class BlockInfo {
+		BufferedReader in;
+		String lastLine;
+		BlockInfo (String version, String filename) throws IOException {
+			in = Utility.openUnicodeFile(filename, version, true, Utility.LATIN1_WINDOWS);
+			//in = BagFormatter.openUTF8Reader(dir, filename);
+		}
+		boolean next(List inout) throws IOException {
+			inout.clear();
+			if (lastLine != null) {
+				inout.add(lastLine);
+				lastLine = null;
+			}
+			while (true) {
+				String line = in.readLine();
+				if (line == null) break;
+				if (line.startsWith("@@\t")) {
+					lastLine = line;
+					break;
+				}
+				inout.add(line);
+			}
+			return inout.size() > 0;
+		}
+
+	}
+}
--- a/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Normalizer.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Normalizer.java,v $
-* $Date: 2004/04/17 18:21:39 $
-* $Revision: 1.16 $
+* $Date: 2005/11/01 00:10:54 $
+* $Revision: 1.17 $
 *
 *******************************************************************************
 */
@ -136,7 +136,7 @@ public final class Normalizer implements UCD_Types {

    /**
    * Normalizes text according to the chosen form
-    * @param   source      the original text, unnormalized
+    * @param   newLocaleID      the original text, unnormalized
    * @return  target      the resulting normalized text
    */
    public String normalize(int cp) {
@ -157,7 +157,7 @@ public final class Normalizer implements UCD_Types {
    /**
     * Does a quick check to see if the string is in the current form. Checks canonical order and
     * isAllowed().
-     * @param   source  source text
+     * @param   newLocaleID  source text
     * @return YES, NO, MAYBE
     */
     /*
--- a/tools/unicodetools/com/ibm/text/UCD/NormalizerSample.java
+++ b/tools/unicodetools/com/ibm/text/UCD/NormalizerSample.java
@ -86,7 +86,7 @@ public class NormalizerSample implements UCD_Types {

    /**
    * Normalizes text according to the chosen form
-    * @param   source      the original text, unnormalized
+    * @param   newLocaleID      the original text, unnormalized
    * @return  target      the resulting normalized text
    */
    public String normalize(int cp) {
--- a/tools/unicodetools/com/ibm/text/UCD/QuickTest.java
+++ b/tools/unicodetools/com/ibm/text/UCD/QuickTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/QuickTest.java,v $
-* $Date: 2005/10/11 19:39:15 $
-* $Revision: 1.7 $
+* $Date: 2005/11/01 00:10:54 $
+* $Revision: 1.8 $
 *
 *******************************************************************************
 */
@ -21,6 +21,7 @@ import com.ibm.icu.dev.test.util.UnicodeMap;
 import com.ibm.icu.dev.test.util.UnicodeProperty;
 import com.ibm.icu.dev.test.util.UnicodePropertySource;
 import com.ibm.icu.dev.test.util.UnicodeMap.MapIterator;
+import com.ibm.icu.impl.Utility;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
@ -30,17 +31,92 @@ import com.ibm.text.utility.*;

 public class QuickTest implements UCD_Types {
 	public static void main(String[] args) throws IOException {
-		getBidiMirrored();
-		if (true) return;
-		getLengths("NFC", Default.nfc());
-		getLengths("NFD", Default.nfd());
-		getLengths("NFKC", Default.nfkc());
-		getLengths("NFKD", Default.nfkd());
-		System.out.println("Done");
+		try {
+			//getBidiMirrored();
+			getCaseFoldingUnstable();
+			if (true) return;
+			getHasAllNormalizations();
+			getLengths("NFC", Default.nfc());
+			getLengths("NFD", Default.nfd());
+			getLengths("NFKC", Default.nfkc());
+			getLengths("NFKD", Default.nfkd());
+		} finally {
+			System.out.println("Done");
+		}
 	}
 	
-	
-	
+	private static void getCaseFoldingUnstable() {
+		for (int i = 3; i < com.ibm.text.utility.Utility.searchPath.length - 1; ++i) {
+			String newName = com.ibm.text.utility.Utility.searchPath[i];
+			String oldName = com.ibm.text.utility.Utility.searchPath[i+1];
+			showMemoryUsage();		
+			UCD ucdNew = UCD.make(newName);
+			showMemoryUsage();
+			UCD ucdOld = UCD.make(oldName);
+			showMemoryUsage();
+			UnicodeMap differences = new UnicodeMap();
+			UnicodeSet differenceSet = new UnicodeSet();
+			for (int j = 0; j < 0x10FFFF; ++j) {
+				if (!ucdOld.isAssigned(j)) continue;
+				String oldString = ucdOld.getCase(j, UCD.FULL, UCD.FOLD);
+				String newString = ucdNew.getCase(j, UCD.FULL, UCD.FOLD);
+				if (!oldString.equals(newString)) {
+					differenceSet.add(j);
+					differences.put(j, new String[]{oldString, newString});
+					System.out.println(".");
+				}
+			}
+			if (differenceSet.size() != 0) {
+				System.out.println("Differences in " + com.ibm.text.utility.Utility.searchPath[i]);
+				for (UnicodeSetIterator it = new UnicodeSetIterator(differenceSet); it.next();) {
+					System.out.println(ucdNew.getCodeAndName(it.codepoint));
+					String[] strings = (String[]) differences.getValue(it.codepoint);
+					System.out.println("\t" + oldName + ": " + ucdNew.getCodeAndName(strings[0]));
+					System.out.println("\t" + newName + ": " + ucdNew.getCodeAndName(strings[1]));
+				}
+			}
+		}
+	}
+
+	  static public void showMemoryUsage() {
+		    System.gc(); System.gc(); System.gc(); System.gc();
+		    System.gc(); System.gc(); System.gc(); System.gc();
+		    System.gc(); System.gc(); System.gc(); System.gc();
+		    System.gc(); System.gc(); System.gc(); System.gc();
+		    System.out.println("total:\t" + Runtime.getRuntime().totalMemory() + ";\tfree:\t" + 
+		      Runtime.getRuntime().freeMemory());
+		  }
+
+	private static void getHasAllNormalizations() {
+		UnicodeSet items = new UnicodeSet();
+		Set s = new LinkedHashSet();
+		for (int i = 0; i <= 0x10FFFF; ++i) {
+			if (!Default.ucd().isAssigned(i)) continue;
+			if (Default.ucd().getDecompositionType(i) == UCD.NONE) continue;
+			String source = UTF16.valueOf(i);
+			String nfc = Default.nfc().normalize(source);
+			String nfd = Default.nfd().normalize(source);
+			String nfkd = Default.nfkd().normalize(source);
+			String nfkc = Default.nfkc().normalize(source);
+			s.clear();
+			s.add(source);
+			s.add(nfc);
+			s.add(nfd);
+			s.add(nfkd);
+			s.add(nfkc);
+			if (s.size() > 3) {
+				System.out.println(Utility.hex(source) + "\t" + Utility.escape(source)
+					+ "\t" + Default.ucd().getName(source)
+					+ "\tnfd\t" + Utility.hex(nfd) + "\t" + Utility.escape(nfd)
+					+ "\tnfc\t" + Utility.hex(nfc) + "\t" + Utility.escape(nfc)
+					+ "\tnfkd\t" + Utility.hex(nfkd) + "\t" + Utility.escape(nfkd)
+					+ "\tnfkc\t" + Utility.hex(nfkc) + "\t" + Utility.escape(nfkc));
+			}
+		}
+	}
+
+
+
 	private static void getBidiMirrored() {
 		ToolUnicodePropertySource foo = ToolUnicodePropertySource.make("");
 		UnicodeMap status = new UnicodeMap();
@ -92,9 +168,10 @@ public class QuickTest implements UCD_Types {
 			UnicodeSet set = status.getSet(value);
 			for (UnicodeSetIterator umi = new UnicodeSetIterator(set); umi.next();) {
 				System.out.println(Utility.hex(umi.codepoint) 
-						+ ";\t" + value
-						+ ";\t" + (x.contains(umi.codepoint) ? "O" : "")
-						+ ";\t" + Default.ucd().getName(umi.codepoint));
+						+ (value.startsWith("*") ? ";\tBidi_Mirrored" : "")
+						+ "\t#\t" + value
+						//+ ";\t" + (x.contains(umi.codepoint) ? "O" : "")
+						+ "\t" + Default.ucd().getName(umi.codepoint));
 			}
 		}
 	}
@ -288,6 +365,6 @@ public class QuickTest implements UCD_Types {
        System.out.println("\tCount:" + set1.size());
        System.out.println("\tSet:" + set1.toPattern(true));
        System.out.println("\tDetails:");
-        Utility.showSetNames("", set1, false, Default.ucd());
+        //Utility.showSetNames("", set1, false, Default.ucd());
    }
 }
--- a/tools/unicodetools/com/ibm/text/UCD/TestIdentifiers.java
+++ b/tools/unicodetools/com/ibm/text/UCD/TestIdentifiers.java
@ -4,10 +4,15 @@ import java.io.BufferedReader;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.BitSet;
+import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
+import java.util.Set;

 import com.ibm.icu.dev.test.util.BagFormatter;
 import com.ibm.icu.dev.test.util.UnicodeMap;
+import com.ibm.icu.dev.test.util.XEquivalenceClass;
 import com.ibm.icu.lang.UScript;
 import com.ibm.icu.text.Normalizer;
 import com.ibm.icu.text.UTF16;
@ -34,6 +39,14 @@ public class TestIdentifiers {
 				System.out.print(folded);
 				ti.testItem(folded);
 			}
+			for (int j = 0; j < tests[i].length(); ++j) {
+				int cp = tests[i].charAt(j);
+				Set s = ti.getConfusables(cp, "MA");
+				System.out.println(Default.ucd().getCodeAndName(cp));
+				for (Iterator it = s.iterator(); it.hasNext();) {
+					System.out.println("\t= " + Default.ucd().getCodeAndName((String)it.next()));
+				}
+			}
 		}
 	}
 	
@ -141,6 +154,49 @@ public class TestIdentifiers {
 		}
 		br.close();
 	}
+	
+	Map type_equivalences;
+	
+	void loadConfusables() throws IOException {
+		BufferedReader br = BagFormatter.openUTF8Reader(indir,
+				"confusables.txt");
+		String line = null;
+		type_equivalences = new HashMap();
+		try {
+			while (true) {
+				line = Utility.readDataLine(br);
+				if (line == null)
+					break;
+				if (line.length() == 0)
+					continue;
+				String[] pieces = Utility.split(line, ';');
+				// part 0 is source code point
+				String s = Utility.fromHex(pieces[0].trim());
+				// part 1 is script1
+				String t = Utility.fromHex(pieces[1].trim());
+
+				String type = pieces[2].trim();
+				XEquivalenceClass ec = (XEquivalenceClass) type_equivalences.get(type);
+				if (ec == null) type_equivalences.put(type, ec = new XEquivalenceClass(""));
+				ec.add(s, t);
+				//System.out.println(type + ": " + Default.ucd().getCodeAndName(s) + " => " + Default.ucd().getCodeAndName(t));
+			}
+		} catch (Exception e) {
+			throw (RuntimeException) new RuntimeException("Failure on line "
+					+ line).initCause(e);
+		}
+		br.close();
+	}
+
+	public Set getConfusables(int cp, String type) {
+		try {
+			if (type_equivalences == null) loadConfusables();
+		} catch (IOException e) {
+			return null;
+		}
+		XEquivalenceClass ec = (XEquivalenceClass) type_equivalences.get(type);
+		return ec.getEquivalences(UTF16.valueOf(cp));
+	}

 	void loadWholeScriptConfusables(String filterType) throws IOException {
 		UnicodeSet[][] script_script_set = new UnicodeSet[UScript.CODE_LIMIT][UScript.CODE_LIMIT];
--- a/tools/unicodetools/com/ibm/text/UCD/TestUnicodeInvariants.java
+++ b/tools/unicodetools/com/ibm/text/UCD/TestUnicodeInvariants.java
@ -73,7 +73,7 @@ public class TestUnicodeInvariants {
       int variableCount = 0;
       PrintWriter out = BagFormatter.openUTF8Writer(UCD_Types.GEN_DIR, "UnicodeInvariantResults.txt");
       out.write('\uFEFF'); // BOM
-       BufferedReader in = BagFormatter.openUTF8Reader("", "UnicodeInvariants.txt");
+       BufferedReader in = BagFormatter.openUTF8Reader("com/ibm/text/UCD/", "UnicodeInvariants.txt");
       BagFormatter bf = new BagFormatter();
       bf.setUnicodePropertyFactory(ToolUnicodePropertySource.make(""));
       BagFormatter bf2 = new BagFormatter();
--- a/tools/unicodetools/com/ibm/text/UCD/UCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
-* $Date: 2005/05/02 15:39:53 $
-* $Revision: 1.39 $
+* $Date: 2005/11/01 00:10:54 $
+* $Revision: 1.40 $
 *
 *******************************************************************************
 */
@ -43,7 +43,7 @@ public final class UCD implements UCD_Types {
    /**
     * Used for the default version.
     */
-    public static final String latestVersion = "4.1.0";
+    public static final String latestVersion = "5.1.0";

    /**
     * Create singleton instance for default (latest) version
@ -158,12 +158,16 @@ public final class UCD implements UCD_Types {
     * Get the character names for the code points in a string, separated by ", "
     */
    public String getName(String s, byte style) {
+    	return getName(s, style, ", ");
+    }
+    
+    public String getName(String s, byte style, String separator) {
        if (s.length() == 1) return getName(s.charAt(0), style); // optimize BMP
        StringBuffer result = new StringBuffer();
        int cp;
        for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
            cp = UTF16.charAt(s, i);
-            if (i > 0) result.append(", ");
+            if (i > 0) result.append(separator);
            result.append(getName(cp, style));
        }
        return result.toString();
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
-* $Date: 2005/03/10 02:37:20 $
-* $Revision: 1.31 $
+* $Date: 2005/11/01 00:10:54 $
+* $Revision: 1.32 $
 *
 *******************************************************************************
 */
@ -15,7 +15,7 @@ package com.ibm.text.UCD;

 public interface UCD_Types {
    
-    static final byte BINARY_FORMAT = 16; // bumped if binary format of UCD changes. Forces rebuild   
+    static final byte BINARY_FORMAT = 17; // bumped if binary format of UCD changes. Forces rebuild   
    
    public static final String BASE_DIR = "C:\\DATA\\";
    public static final String UCD_DIR = BASE_DIR + "UCD\\";
--- a/tools/unicodetools/com/ibm/text/UCD/UnicodeInvariants.txt
+++ b/tools/unicodetools/com/ibm/text/UCD/UnicodeInvariants.txt
@ -111,6 +111,21 @@ $XID_Continue ! [$Pattern_Whitespace $Pattern_Syntax]
 $Pattern_Whitespace ! [$XID_Continue $Pattern_Syntax]
 $Pattern_Syntax ! [$XID_Continue $Pattern_Whitespace]

+# Test SA characters
+
+# They are limited to certain scripts:
+Let $SAScripts = [$script:thai $script:lao $script:myanmar $script:khmer]
+$SAScripts ⊇ $LineBreak:SA
+
+# And in those scripts, they are all the alphabetic spacing characters, plus some odd Cf
+[$SAScripts & [$Alphabetic $gc:cf]] = [$SAScripts & [$LineBreak:SA $LineBreak:CM]]
+
+# Try removing M* from alphabetic, and matching to SA
+[$SAScripts & [$Alphabetic $gc:cf - $gcAllMarks]] = $LineBreak:SA
+
+# Try adding M* to alphabetic, and matching to SA
+[$SAScripts & [$Alphabetic $gc:cf $gcAllMarks]] = $LineBreak:SA
+
 # testing
 # [$Pattern_Whitespace $Pattern_Syntax] ! [[^$WB:Format $WB:Other] \u2019 \u0027 \u02BC \u002d \u00ad \u2027 \u058A]
 Let $otherword = [\u2019 \u0027 \u02BC \u002d \u00ad \u2027 \u058A]
--- a/tools/unicodetools/com/ibm/text/utility/ChainException.java
+++ b/tools/unicodetools/com/ibm/text/utility/ChainException.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/ChainException.java,v $
-* $Date: 2001/12/06 00:05:52 $
-* $Revision: 1.3 $
+* $Date: 2005/11/01 00:10:53 $
+* $Revision: 1.4 $
 *
 *******************************************************************************
 */
@ -20,7 +20,7 @@ import java.io.*;
 public class ChainException extends RuntimeException {
    Object[] keyData;
    String messageFormat;
-    Exception chain;
+    //Exception chain;

    public ChainException (String messageFormat, Object[] objects) {
        this.messageFormat = messageFormat;
@ -30,20 +30,20 @@ public class ChainException extends RuntimeException {
    public ChainException (String messageFormat, Object[] objects, Exception chainedException) {
        this.messageFormat = messageFormat;
        keyData = objects == null ? null : (Object[]) objects.clone();
-        chain = chainedException;
+        initCause(chainedException);
    }

    public String getMessage() {
        String chainMsg = "";
-        if (chain != null) {
-            chainMsg = "; " + chain.getClass().getName()
-                + ", " + chain.getMessage();
-            StringWriter w = new StringWriter();
-            PrintWriter p = new PrintWriter(w);
-            chain.printStackTrace(p);
-            chainMsg += ", " + w.getBuffer();
-            p.close();
-        }
+//        if (chain != null) {
+//            chainMsg = "; " + chain.getClass().getName()
+//                + ", " + chain.getMessage();
+//            StringWriter w = new StringWriter();
+//            PrintWriter p = new PrintWriter(w);
+//            chain.printStackTrace(p);
+//            chainMsg += ", " + w.getBuffer();
+//            p.close();
+//        }
        String main = "";
        if (keyData != null) main = MessageFormat.format(messageFormat, keyData);
        return main + chainMsg;
--- a/tools/unicodetools/com/ibm/text/utility/Utility.java
+++ b/tools/unicodetools/com/ibm/text/utility/Utility.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
-* $Date: 2005/06/21 21:28:31 $
-* $Revision: 1.50 $
+* $Date: 2005/11/01 00:10:53 $
+* $Revision: 1.51 $
 *
 *******************************************************************************
 */
@ -700,8 +700,9 @@ public final class Utility implements UCD_Types {    // COMMON UTILITIES
        return result + "}";
    }

-    private static final String[] searchPath = {
+    public static final String[] searchPath = {
        "EXTRAS",
+        "5.0.0",
        "4.1.0",
        "4.0.1",
        "4.0.0",