From 3940ed8c005493e92e020681b2d811443bac3a53 Mon Sep 17 00:00:00 2001
From: Mark Davis <mark@macchiato.com>
Date: Sat, 15 Jun 2002 02:47:14 +0000
Subject: [PATCH] finally did some significant code cleanup on collation. not
 enough, but it's a start

X-SVN-Rev: 8896
---
 .../unicodetools/com/ibm/text/UCA/CEList.java |  13 +-
 tools/unicodetools/com/ibm/text/UCA/Main.java |   7 +-
 tools/unicodetools/com/ibm/text/UCA/UCA.java  | 681 ++++++------------
 .../com/ibm/text/UCA/WriteCollationData.java  | 209 +++---
 .../com/ibm/text/UCA/WriteHTMLCollation.java  |  39 +-
 tools/unicodetools/com/ibm/text/UCD/Main.java |   5 +-
 tools/unicodetools/com/ibm/text/UCD/UCD.java  |  72 +-
 .../com/ibm/text/UCD/UCD_Types.java           |  17 +-
 .../unicodetools/com/ibm/text/UCD/UData.java  |   6 +-
 .../com/ibm/text/UCD/VerifyUCD.java           |  25 +-
 .../com/ibm/text/utility/IntStack.java        |  75 +-
 11 files changed, 522 insertions(+), 627 deletions(-)

diff --git a/tools/unicodetools/com/ibm/text/UCA/CEList.java b/tools/unicodetools/com/ibm/text/UCA/CEList.java
index 0e6ae21874b..81def32e15b 100644
--- a/tools/unicodetools/com/ibm/text/UCA/CEList.java
+++ b/tools/unicodetools/com/ibm/text/UCA/CEList.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/CEList.java,v $ 
-* $Date: 2002/05/31 01:41:03 $ 
-* $Revision: 1.4 $
+* $Date: 2002/06/15 02:47:12 $ 
+* $Revision: 1.5 $
 *
 *******************************************************************************
 */
@@ -165,6 +165,15 @@ public final class CEList implements java.lang.Comparable, UCD_Types {
         return result.toString();
     }
     
+    public static String toString(IntStack ces) {
+        StringBuffer result = new StringBuffer();
+        for (int i = 0; i < ces.length(); ++i) {
+            if (i != 0) result.append(' ');
+            result.append(toString(ces.get(i)));
+        }
+        return result.toString();
+    }
+    
     public static String toString(int ce) {
         return "[" + Utility.hex(UCA.getPrimary(ce)) + "." 
           + Utility.hex(UCA.getSecondary(ce)) + "."
diff --git a/tools/unicodetools/com/ibm/text/UCA/Main.java b/tools/unicodetools/com/ibm/text/UCA/Main.java
index 03bbb3a8649..203bebd66bf 100644
--- a/tools/unicodetools/com/ibm/text/UCA/Main.java
+++ b/tools/unicodetools/com/ibm/text/UCA/Main.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $ 
-* $Date: 2002/06/04 01:59:01 $ 
-* $Revision: 1.5 $
+* $Date: 2002/06/15 02:47:12 $ 
+* $Revision: 1.6 $
 *
 *******************************************************************************
 */
@@ -18,7 +18,8 @@ import com.ibm.text.utility.*;
 
 public class Main {
 	static final String UCDVersion = "";
-	static final String[] ICU_FILES = {"writeCollationValidityLog", "FractionalUCA", "writeconformance", "writeconformanceshifted", 
+	static final String[] ICU_FILES = {"writeCollationValidityLog", "FractionalUCA",
+	    "writeconformance", "writeconformanceshifted", 
 		"WriteRules", "WriteRulesWithNames", "WriteRulesXML"};
 	
 	public static void main(String args[]) throws Exception {
diff --git a/tools/unicodetools/com/ibm/text/UCA/UCA.java b/tools/unicodetools/com/ibm/text/UCA/UCA.java
index 8f3e1cb7953..ffd674516cf 100644
--- a/tools/unicodetools/com/ibm/text/UCA/UCA.java
+++ b/tools/unicodetools/com/ibm/text/UCA/UCA.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA.java,v $ 
-* $Date: 2002/06/04 01:58:56 $ 
-* $Revision: 1.13 $
+* $Date: 2002/06/15 02:47:12 $ 
+* $Revision: 1.14 $
 *
 *******************************************************************************
 */
@@ -24,6 +24,7 @@ import com.ibm.text.UCD.Normalizer;
 import com.ibm.text.UCD.UCD;
 import com.ibm.text.utility.*;
 import com.ibm.icu.text.UTF16;
+import com.ibm.icu.text.UnicodeSet;
 
 //import com.ibm.text.CollationData.*;
 
@@ -62,7 +63,7 @@ This is because of shared
 characters between scripts with different directions, like French with Arabic or Greek.
 */
 
-final public class UCA implements Comparator {
+final public class UCA implements Comparator, UCA_Types {
     public static final String copyright = 
       "Copyright (C) 2000, IBM Corp. and others. All Rights Reserved.";
       
@@ -85,19 +86,13 @@ final public class UCA implements Comparator {
     // base directory will change depending on the installation
     public static final String BASE_DIR = "c:\\DATA\\";
     
-    /** Enum for alternate handling */
-    public static final byte SHIFTED = 0, ZEROED = 1, NON_IGNORABLE = 2, SHIFTED_TRIMMED = 3, LAST = 3;
-    
-    /**
-     * Used to terminate a list of CEs
-     */
-    public static final int TERMINATOR = 0xFFFFFFFF;   // CE that marks end of string
-         
     
 // =============================================================
 // Test Settings
 // =============================================================
     static final boolean DEBUG = false;
+    static final boolean DEBUG_SHOW_LINE = false;
+    
     static final boolean SHOW_STATS = true;
     
     static final boolean SHOW_CE = false;
@@ -109,6 +104,7 @@ final public class UCA implements Comparator {
     static final boolean RECORDING_CHARS = true;
     
     private UCD ucd;
+    private UCA_Data ucaData;
     
 // =============================================================
 // Main Methods
@@ -121,11 +117,7 @@ final public class UCA implements Comparator {
      */
     public UCA(BufferedReader source, String unicodeVersion) throws java.io.IOException {
         fullData = source == null;
-
-        // clear some tables
-        for (int i = 0; i < collationElements.length; ++i) {
-            collationElements[i] = UNSUPPORTED;
-        }
+        
         // load the normalizer
         if (toD == null) {
             toD = new Normalizer(Normalizer.NFD, unicodeVersion);
@@ -134,6 +126,8 @@ final public class UCA implements Comparator {
         ucd = UCD.make(unicodeVersion);
         ucdVersion = ucd.getVersion();
         
+        ucaData = new UCA_Data(toD, ucd);
+        
         // either get the full sources, or just a demo set
         if (fullData) {
             for (int i = 0; i < KEYS.length; ++i) {
@@ -234,7 +228,7 @@ final public class UCA implements Comparator {
             }
             if (SHOW_CE) {
                 if (debugList.length() != 0) debugList.append("/");
-                debugList.append(ceToString(ce));
+                debugList.append(CEList.toString(ce));
             }
             
             // add weights
@@ -412,6 +406,35 @@ final public class UCA implements Comparator {
         return target;
     }
     
+    /**
+     * Returns a list of CEs for a unicode character at a position.
+     * @param sourceString string to make a sort key for.
+     * @param offset position in string
+     * @param decomposition true for UCA, false where the text is guaranteed to be
+     * normalization form C with no combining marks of class 0.
+     * @param output array for output. Must be large enough on entry. When done, is terminated with TERMINATOR.
+     */
+    public void getCEs(String sourceString, boolean decomposition, IntStack output) {
+        decompositionBuffer.setLength(0);
+        if (decomposition) {
+            toD.normalize(sourceString, decompositionBuffer);
+        } else {
+            decompositionBuffer.append(sourceString);
+        }
+        rearrangeBuffer = EMPTY;            // clear the rearrange buffer (thai)
+        index = 0;
+
+        // process CEs, building weight strings
+        while (true) {
+            //fixQuaternatiesPosition = quaternaries.length();
+            int ce = getCE();
+            if (ce == 0) continue;
+            if (ce == TERMINATOR) break;
+            output.push(ce);
+        }
+    }
+    
+    
     /**
      * Returns a list of CEs for a unicode character at a position.
      * @param sourceString string to make a sort key for.
@@ -477,14 +500,6 @@ final public class UCA implements Comparator {
         return strength == 1 ? primarySet : strength == 2 ? secondarySet : tertiarySet;
     }
      
-    /**
-     * CE Type
-     */
-    static final byte NORMAL_CE = 0, CONTRACTING_CE = 1, EXPANDING_CE = 2, 
-        CJK_CE = 3, CJK_AB_CE = 4, HANGUL_CE = 5, UNSUPPORTED_CE = 7,
-        FIXED_CE = 3;
-        // SURROGATE_CE = 6, 
-   
     /**
      * Returns the char associated with a FIXED value
      */
@@ -497,28 +512,7 @@ final public class UCA implements Comparator {
      * Return the type of the CE
      */
     public byte getCEType(int ch) {
-        
-        if (ch > 0xFFFF) ch = UTF16.getLeadSurrogate(ch); // first if expands
-        
-        int ce = collationElements[ch];
-        if ((ce & EXCEPTION_CE_MASK) != EXCEPTION_CE_MASK) return NORMAL_CE;
-        if (ce == UNSUPPORTED) {
-            
-            // Special check for Han, Hangul
-            if (isHangul(ch)) return HANGUL_CE;
-            
-            if (isCJK(ch)) return CJK_CE;
-            if (isCJK_AB(ch)) return CJK_AB_CE;
-                        
-            // special check for unsupported surrogate pair, 20 1/8 bits
-            //if (0xD800 <= ch && ch <= 0xDFFF) {
-            //    return SURROGATE_CE;
-            //}
-            return UNSUPPORTED_CE;
-        }
-            
-        if (ce == CONTRACTING) return CONTRACTING_CE;
-        return EXPANDING_CE;
+        return ucaData.getCEType(ch);
     }
 
     /**
@@ -604,19 +598,11 @@ final public class UCA implements Comparator {
         return result.toString();
     }
     
-    /**
-     * Produces a human-readable string for a collation element
-     */
-    static public String ceToString(int ce) {
-        return "[" + Utility.hex(getPrimary(ce)) + "." 
-          + Utility.hex(getSecondary(ce)) + "."
-          + Utility.hex(getTertiary(ce)) + "]";
-    }
-    
     /**
      * Produces a human-readable string for a collation element.
      * value is terminated by -1!
      */
+     /*
     static public String ceToString(int[] ces, int len) {
         StringBuffer result = new StringBuffer();
         for (int i = 0; i < len; ++i) {
@@ -624,11 +610,13 @@ final public class UCA implements Comparator {
         }
         return result.toString();
     }
+    &/
     
     /**
      * Produces a human-readable string for a collation element.
      * value is terminated by -1!
      */
+     /*
     static public String ceToString(int[] ces) {
         StringBuffer result = new StringBuffer();
         for (int i = 0; ; ++i) {
@@ -637,7 +625,7 @@ final public class UCA implements Comparator {
         }
         return result.toString();
     }
-    
+    */
     
     static boolean isImplicitLeadCE(int ce) {
     	return isImplicitLeadPrimary(getPrimary(ce));
@@ -670,10 +658,10 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
      * and to get the second part use (x & 0xFFFF)
      */
     
-    static void CodepointToImplicit(int cp, int[] output) {
+    void CodepointToImplicit(int cp, int[] output) {
 		int base = UNSUPPORTED_OTHER_BASE;
-        if (isCJK(cp)) base = UNSUPPORTED_CJK_BASE;
-        else if (isCJK_AB(cp)) base = UNSUPPORTED_CJK_AB_BASE;
+        if (ucd.isCJK_BASE(cp)) base = UNSUPPORTED_CJK_BASE;
+        else if (ucd.isCJK_AB(cp)) base = UNSUPPORTED_CJK_AB_BASE;
         output[0] = base + (cp >>> 15);
         output[1] = (cp & 0x7FFF) | 0x8000;
     }
@@ -768,6 +756,9 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
 // Privates
 // =============================================================
     
+    
+    IntStack expandingStack = new IntStack(10);
+    
     /**
      * Array used to reorder surrogates to top of 16-bit range, and others down.
      * Adds 2000 to D800..DFFF, making them F800..FFFF
@@ -847,77 +838,13 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
 // Collation Element Memory Data Table Formats
 // =============================================================
 
-    /**
-     * Used to composed Hangul and Han characters
-     */
-     
-    static final int NEUTRAL_SECONDARY = 0x20;
-    static final int NEUTRAL_TERTIARY = 0x02;
-    
     /**
      * Temporary buffer used in getSortKey for the decomposed string
      */
     private StringBuffer decompositionBuffer = new StringBuffer();
     
-    /**
-     * The collation element data is stored a couple of different structures.
-     * First is collationElements, which generally contains the 32-bit CE corresponding
-     * to the data. It is directly indexed by character code.<br>
-     * For brevity in the implementation, we just use a flat array.
-     * A real implementation would use a multi-stage table, as described in TUS Section 5.
-     * table of simple collation elements, indexed by char.<br>
-     * Exceptional cases: expanding, contracting, unsupported are handled as described below.
-     */
-    private int[] collationElements = new int[65536];
-    
-    /**
-     * A special bit combination in a CE is used to reserve exception cases. This has the effect
-     * of removing a small number of the primary key values out of the 65536 possible.
-     */
-    private static final int EXCEPTION_CE_MASK = 0xF8000000;
-    
-       
-    /**
-     * Any unsupported characters (those not in the UCA data tables) 
-     * are marked with a exception bit combination
-     * so that they can be treated specially.<br>
-     * There are at least 34 values, so that we can use a range for surrogates
-     * However, we do add to the first weight if we have surrogate pairs!
-     */
-    private static final int UNSUPPORTED_CJK_BASE = 0xFF40;
-    private static final int UNSUPPORTED_CJK_AB_BASE = 0xFF80;
-    private static final int UNSUPPORTED_OTHER_BASE = 0xFFC0;
-    
-    private static final int UNSUPPORTED_BASE = UNSUPPORTED_CJK_BASE;
-    private static final int UNSUPPORTED_LIMIT = UNSUPPORTED_OTHER_BASE + 0x40;
-    
-    private static final int UNSUPPORTED = makeKey(UNSUPPORTED_BASE, NEUTRAL_SECONDARY, NEUTRAL_TERTIARY);
-    
     // was 0xFFC20101;
     
-    /**
-     * Contracting characters are marked with a exception bit combination 
-     * in the collationElement table.
-     * This means that they are the first character of a contraction, and need
-     * to be looked up (with following characters) in the contractingTable.<br>
-     * This isn't a MASK since there is exactly one value.
-     */
-    private static final int CONTRACTING = 0xFA310000;
-
-    /**
-     * Expanding characters are marked with a exception bit combination
-     * in the collationElement table.
-     * This means that they map to more than one CE, which is looked up in
-     * the expansionTable by index. See EXCEPTION_INDEX_MASK
-     */
-    private static final int EXPANDING_MASK = 0xFA300000; // marks expanding range start
-    
-    /**
-     * This mask is used to get the index from an EXPANDING exception.
-     * The contracting characters can also make use of this in a future optimization.
-     */
-    static final int EXCEPTION_INDEX_MASK = 0x0000FFFF;
- 
     /**
      * We take advantage of the variables being in a closed range to save a bit per CE.
      * The low and high values are initially set to be at the opposite ends of the range,
@@ -931,27 +858,18 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
     private int variableLowCE;  // used for testing against
     private int variableHighCE; // used for testing against
     
-    /**
-     * Although a single character can expand into multiple CEs, we don't want to burden
-     * the normal case with the storage. So, they get a special value in the collationElements
-     * array. This value has a distinct primary weight, followed by an index into a separate
-     * table called expandingTable. All of the CEs in that table, up to a TERMINATOR value
-     * will be used for the expansion. The implementation is as a stack; this just makes it
-     * easy to generate.
-     */
-    private IntStack expandingTable = new IntStack(3600); // initial number is from compKeys
-        
-    /**
-     * For now, this is just a simple mapping of strings to collation elements.
-     * The implementation depends on the contracting characters being "completed",
-     * so that it can be efficiently determined when to stop looking.
-     */
-    private Hashtable contractingTable = new Hashtable();
+    /*
     
-    /**
-     *  Special char value that means failed or terminated
-     */
-    private static final char NOT_A_CHAR = '\uFFFF';
+    private void fixSurrogateContraction(char ch) {
+        //if (DEBUGCHAR) System.out.println(Utility.hex(ch) + ": " + line.substring(0, position[0]) + "|" + line.substring(position[0]));            
+        if (ch == NOT_A_CHAR || !UTF16.isLeadSurrogate(ch)) return;
+        String chs = String.valueOf(ch);
+        Object probe = contractingTable.get(chs);
+        if (probe != null) return;
+        contractingTable.put(chs, new Integer(UNSUPPORTED));
+    }
+    
+    */
     
     /**
      * Marks whether we are using the full data set, or an abbreviated version for
@@ -965,11 +883,6 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
 // Made part of the object to avoid reallocating each time.
 // =============================================================
 
-    /**
-     * Stack for expanding characters
-     */
-    private IntStack expandingStack = new IntStack(100);
-    
     /**
      * Temporary buffers used in getSortKey to store weights
      * these are NOT strings of Unicode characters--they are
@@ -990,8 +903,6 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
      * Temporary with requested decomposition
      */
     boolean storedDecomposition;
-    int hangulHackBottom;
-    int hangulHackTop;
     
     /**
      * Used for supporting Thai rearrangement
@@ -1015,7 +926,7 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
      * (normalized) character code.
      */
     private int getCE() {
-        if (!expandingStack.isEmpty()) return expandingStack.pop();
+        if (!expandingStack.isEmpty()) return expandingStack.popFront();
         char ch;
         
         // Fetch next character. Handle rearrangement for Thai, etc.
@@ -1037,190 +948,56 @@ CP => [.AAAA.0020.0002.][.BBBB.0000.0000.]
             }
         }
         
-        int ce = collationElements[ch];
-        
-        // Hangul tailoring hack
-        //if (!storedDecomposition && hangulHackBottom <= ce && ce < hangulHackTop) return fixJamo(ch, ce);   // hard coded fix!!
-
-        // if the CE is not exceptional (unsupported, contracting, expanding) we are done.
-        if ((ce & EXCEPTION_CE_MASK) != EXCEPTION_CE_MASK) return ce;
-        
-        if (ce == UNSUPPORTED) {
-            int bigChar = ch;
+        index = ucaData.get(ch, decompositionBuffer, index, expandingStack);
+        int ce = expandingStack.popFront(); // pop first (guaranteed to exist!)
+        if (ce == UNSUPPORTED_FLAG) {
+            return handleUnsupported(ch);
+        }
+        return ce;
+    }
+    
+    private int handleUnsupported(char ch) {
+        int bigChar = ch;
             
-            // Special check for Hangul
-            if (isHangul(bigChar)) {
-                // MUST DECOMPOSE!!
-                hangulBuffer = new StringBuffer();
-                decomposeHangul(bigChar, hangulBuffer);
-                return getCE();
-                // RECURSIVE!!!
-            }
+        // Special check for Hangul
+        if (ucd.isHangulSyllable(bigChar)) {
+            // MUST DECOMPOSE!!
+            hangulBuffer = new StringBuffer();
+            decomposeHangul(bigChar, hangulBuffer);
+            return getCE();
+            // RECURSIVE!!!
+        }
+        
+        // special check and fix for unsupported surrogate pair, 20 1/8 bits
+        if (0xD800 <= bigChar && bigChar <= 0xDFFF) {
+            // ignore unmatched surrogates (e.g. return zero)
+            if (bigChar >= 0xDC00 || index >= decompositionBuffer.length()) return 0; // unmatched
+            int ch2 = decompositionBuffer.charAt(index);
+            if (ch2 < 0xDC00 || 0xDFFF < ch2) return 0;  // unmatched
+            index++; // skip next char
+            bigChar = 0x10000 + ((ch - 0xD800) << 10) + (ch2 - 0xDC00); // extract value
+        }
+
                         
-            if (ucd.isNoncharacter(bigChar)) { // illegal code value, ignore!!
-                return 0;
-            }
+        if (ucd.isNoncharacter(bigChar)) { // illegal code value, ignore!!
+            return 0;
+        }
             
-            // special check and fix for unsupported surrogate pair, 20 1/8 bits
-            if (0xD800 <= bigChar && bigChar <= 0xDFFF) {
-                // ignore unmatched surrogates (e.g. return zero)
-                if (bigChar >= 0xDC00 || index >= decompositionBuffer.length()) return 0; // unmatched
-                int ch2 = decompositionBuffer.charAt(index);
-                if (ch2 < 0xDC00 || 0xDFFF < ch2) return 0;  // unmatched
-                index++; // skip next char
-                bigChar = 0x10000 + ((ch - 0xD800) << 10) + (ch2 - 0xDC00); // extract value
-            }
-
-			// find the implicit values; returned in 0 and 1
-			int[] implicit = new int[2];
-			CodepointToImplicit(bigChar, implicit);
+		// find the implicit values; returned in 0 and 1
+		int[] implicit = new int[2];
+		CodepointToImplicit(bigChar, implicit);
 			
-            // Now compose the two keys
-            // first push BBBB, which is #1
+        // Now compose the two keys
+            
+        // push BBBB
                         
-            expandingStack.push(makeKey(implicit[1], NEUTRAL_SECONDARY, NEUTRAL_TERTIARY));
+        expandingStack.push(makeKey(implicit[1], NEUTRAL_SECONDARY, NEUTRAL_TERTIARY));
+        
+        // return AAAA
             
-            // now return AAAA, which is #0
-            
-            return makeKey(implicit[0], NEUTRAL_SECONDARY, NEUTRAL_TERTIARY);
+        return makeKey(implicit[0], NEUTRAL_SECONDARY, NEUTRAL_TERTIARY);
+        
 
-        }
-        if (ce == CONTRACTING) {
-            // Contracting is probably the most interesting (read "tricky") part
-            // of the algorithm.
-            // First get longest substring that is in the contracting table.
-            // For simplicity, we use a hash table for contracting.
-            // There are much better optimizations, 
-            // but they take a more complicated build algorithm than we want to show here.
-            // NOTE: We are guaranteed that the character itself is in the contracting table because
-            // of the build process.
-            String probe = String.valueOf(ch);
-            Object value = contractingTable.get(probe);
-            if (value == null) throw new IllegalArgumentException("Missing value for " + Utility.hex(ch));
-            
-            // We loop, trying to add successive characters to the longest substring.
-            while (index < decompositionBuffer.length()) {
-                char ch2 = decompositionBuffer.charAt(index);
-                
-                // see whether the current string plus the next char are in
-                // the contracting table.
-                String newProbe = probe + ch2;
-                Object newValue = contractingTable.get(newProbe);
-                if (newValue == null) break;    // stop if not in table.
-                
-                // We succeeded--so update our new values, and set index
-                // and quaternary to indicate that we swallowed another character.
-                probe = newProbe;
-                value = newValue;
-                index++;
-            }
-            
-            // Now, see if we can add any combining marks
-            short lastCan = 0;
-            for (int i = index; i < decompositionBuffer.length(); ++i) {
-                // We only take certain characters. They have to be accents,
-                // and they have to not be blocked.
-                // Unlike above, if we don't find a match (and it was an accent!)
-                // then we don't stop, we continue looping.
-                char ch2 = decompositionBuffer.charAt(i);
-                short can = toD.getCanonicalClass(ch2);
-                if (can == 0) break;            // stop with any zero (non-accent)
-                if (can == lastCan) continue;   // blocked if same class as last
-                lastCan = can;                  // remember for next time
-                
-                // Now see if we can successfully add it onto our string
-                // and find it in the contracting table.
-                String newProbe = probe + ch2;
-                Object newValue = contractingTable.get(newProbe);
-                if (newValue == null) continue;
-
-                // We succeeded--so update our new values, remove the char, and update
-                // quaternary to indicate that we swallowed another character.
-                probe = newProbe;
-                value = newValue;
-                decompositionBuffer.setCharAt(i,'\u0000');  // zero char
-            }
-            
-            // we are all done, and can extract the CE from the last value set.
-            ce = ((Integer)value).intValue();
-            // if the CE is not exceptional (unsupported expanding) we are done.
-            // BTW we will never have a contracting CE at this point.
-            if ((ce & EXCEPTION_CE_MASK) != EXCEPTION_CE_MASK) return ce;
-            // otherwise fall through to expansion
-        }
-        // expanding, so copy list of items onto stack
-        int index = ce & EXCEPTION_INDEX_MASK; // get index
-        // copy onto stack from index until reach TERMINATOR
-        while (true) {
-            ce = expandingTable.get(index++);
-            if (ce == TERMINATOR) break;
-            expandingStack.push(ce);
-        }
-        return expandingStack.pop(); // pop last (guaranteed to exist!)
-    }
-    
-    // Neither Mapped nor Composite CJK: [\u3400-\u4DB5\u4E00-\u9FA5\U00020000-\U0002A6D6]
-    
-    public static boolean isCJK(int cp) {
-        return (CJK_BASE <= cp && cp < CJK_LIMIT 
-        || cp == 0xFA0E	// compat characters that don't decompose.
-        || cp == 0xFA0F
-        || cp == 0xFA11
-        || cp == 0xFA13
-        || cp == 0xFA14
-        || cp == 0xFA1F
-        || cp == 0xFA21
-        || cp == 0xFA23
-        || cp == 0xFA24
-        || cp == 0xFA27
-        || cp == 0xFA28
-        || cp == 0xFA29
-        || cp == 0xFA2E
-        || cp == 0xFA2F
-        );
-    }
-    
-    public static final int 
-    	CJK_BASE = 0x4E00,
-    	CJK_LIMIT = 0x9FFF+1,
-    	CJK_COMPAT_USED_BASE = 0xFA0E,
-    	CJK_COMPAT_USED_LIMIT = 0xFA2F+1,
-    	CJK_A_BASE = 0x3400,
-    	CJK_A_LIMIT = 0x4DBF+1,
-    	CJK_B_BASE = 0x20000,
-    	CJK_B_LIMIT = 0x2A6DF+1;
-    
-    public static final boolean isCJK_AB(int bigChar) {
-        return (CJK_A_BASE <= bigChar && bigChar < CJK_A_LIMIT
-             || CJK_B_BASE <= bigChar && bigChar < CJK_B_LIMIT);
-    }
-/*
-2E80..2EFF; CJK Radicals Supplement
-2F00..2FDF; Kangxi Radicals
-
-3400..4DBF; CJK Unified Ideographs Extension A
-4E00..9FFF; CJK Unified Ideographs
-F900..FAFF; CJK Compatibility Ideographs
-
-20000..2A6DF; CJK Unified Ideographs Extension B
-2F800..2FA1F; CJK Compatibility Ideographs Supplement
-
-Compat:
-# F900..FA0D     [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D
-# FA10                 CJK COMPATIBILITY IDEOGRAPH-FA10
-# FA12                 CJK COMPATIBILITY IDEOGRAPH-FA12
-# FA15..FA1E      [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E
-# FA20                 CJK COMPATIBILITY IDEOGRAPH-FA20
-# FA22                 CJK COMPATIBILITY IDEOGRAPH-FA22
-# FA25..FA26       [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
-# FA2A..FA2D       [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D
-# FA30..FA6A      [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A
-# 2F800..2FA1D   [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
-
-*/
-    
-    private final boolean isHangul(int bigChar) {
-        return (0xAC00 <= bigChar && bigChar <= 0xD7A3);
     }
     
     /**
@@ -1287,12 +1064,12 @@ Compat:
      */
     private int count1 = 0, count2 = 0, count3 = 0, max2 = 0, max3 = 0;
     private int oldKey1 = -1, oldKey2 = -1, oldKey3 = -1;
-    Map multiTable = new TreeMap();
-    BitSet found = new BitSet();
+    UnicodeSet found = new UnicodeSet();
     
-    public Hashtable getContracting() {
+    /*public Hashtable getContracting() {
         return new Hashtable(multiTable);
     }
+    */
     
     public UCAContents getContents(byte ceLimit, Normalizer skipDecomps) {
         return new UCAContents(ceLimit, skipDecomps, ucdVersion);
@@ -1317,6 +1094,16 @@ Compat:
             this.ceLimit = ceLimit;
             this.nfd = new Normalizer(Normalizer.NFD, unicodeVersion);
             this.skipDecomps = skipDecomps;
+            
+            // FIX SAMPLES
+            if (SAMPLE_RANGES[0][0] == 0) {
+                for (int i = 0; ; ++i) { // add first unallocated character
+                    if (!ucd.isAssigned(i)) {
+                        SAMPLE_RANGES[0][0] = i;
+                        break;
+                    }
+                }
+            }
         }
         
         /**
@@ -1334,7 +1121,9 @@ Compat:
             
             // normal case
             while (current++ < 0x10FFFF) {
-
+                if (current == 0x406) {
+                    System.out.println("DEBUG");
+                }
                 //char ch = (char)current;
                 byte type = getCEType(current);
                 if (type >= ceLimit || type == CONTRACTING_CE) continue;
@@ -1349,15 +1138,18 @@ Compat:
             }
             
             // contractions
-            if (enum == null) enum = multiTable.keySet().iterator();
-            if (enum.hasNext()) {
+            if (enum == null) enum = ucaData.getContractions();
+            while (enum.hasNext()) {
                 result = (String)enum.next();
+                if (result.length() == 1 && UTF16.isLeadSurrogate(result.charAt(0))) {
+                    //System.out.println("Skipping " + ucd.getCodeAndName(result));
+                    continue; // try again
+                }
                 return result;
             }
             
             // extra samples
             if (currentRange < SAMPLE_RANGES.length) {
-            	System.out.println("*");
                 try {
                     result = UTF16.valueOf(itemInRange);
                 } catch (RuntimeException e) {
@@ -1372,10 +1164,11 @@ Compat:
                         endOfRange = SAMPLE_RANGES[currentRange].length > 1
                             ? SAMPLE_RANGES[currentRange][1]
                             : startOfRange;
-                        skip = ((endOfRange - startOfRange) / 513);
+                        //skip = ((endOfRange - startOfRange) / 3);
                     }
-                } else if (itemInRange > startOfRange + 9 && itemInRange < endOfRange - 9 - skip) {
-                    itemInRange += skip;
+                } else if (itemInRange > startOfRange + 5 && itemInRange < endOfRange - 5 /* - skip*/) {
+                    //itemInRange += skip;
+                    itemInRange = endOfRange - 5;
                 }
             }
             
@@ -1410,14 +1203,16 @@ Compat:
     }
     
     static final int[][] SAMPLE_RANGES = {
-                {0x10000},
-                {0x10FFFF},
-                {0x0220},
+                {0}, // LEAVE EMPTY--Turns into first unassigned character
                 {0xFFF0}, 
                 {0xD800},
                 {0xDFFF},
                 {0xFFFE},
                 {0xFFFF},
+                {0x10000},
+                {0xC0000},
+                {0xD0000},
+                {0x10FFFF},
                 {0x10FFFE},
                 {0x10FFFF},
                 {0x3400, 0x4DB5},
@@ -1426,7 +1221,7 @@ Compat:
                 {0xA000, 0xA48C},
                 {0xE000, 0xF8FF},
                 {0x20000, 0x2A6D6},
-                {0xE0000, 0xE00FF},
+                {0xE0000, 0xE007E},
                 {0xF0000, 0xF00FD},
                 {0xFFF00, 0xFFFFD},
                 {0x100000, 0x1000FD},
@@ -1438,7 +1233,7 @@ Compat:
      * Values will override any previous mappings.
      */
     private void addCollationElements(BufferedReader in) throws java.io.IOException {
-        IntStack tempStack = new IntStack(100); // used for reversal
+        IntStack tempStack = new IntStack(100);
         StringBuffer multiChars = new StringBuffer(); // used for contracting chars
         String inputLine = "";
         boolean[] wasImplicitLeadPrimary = new boolean[1];
@@ -1448,6 +1243,10 @@ Compat:
             if (inputLine == null) break;       // means file is done
             String line = cleanLine(inputLine); // remove comments, extra whitespace
             if (line.length() == 0) continue;   // skip empty lines
+            
+            if (DEBUG_SHOW_LINE) {
+                System.out.println("Processing: " + inputLine);
+            } 
 
             position[0] = 0;                    // start at front of line
             if (line.startsWith("@version")) {
@@ -1464,29 +1263,21 @@ Compat:
             }
             
             // collect characters
-            char value = getChar(line, position);
-            fixSurrogateContraction(value);
-            char value2 = getChar(line, position);
             multiChars.setLength(0);            // clear buffer
-            if (value2 != NOT_A_CHAR) {
-                fixSurrogateContraction(value2);
-                multiChars.append(value);       // append until we get terminator
+            
+            char value = getChar(line, position);
+            multiChars.append(value);
+            
+            //fixSurrogateContraction(value);
+            char value2 = getChar(line, position);
+            // append until we get terminator
+            while (value2 != NOT_A_CHAR) {
                 multiChars.append(value2);
-                while (true) {
-                    value2 = getChar(line, position);
-                    if (value2 == NOT_A_CHAR) break;
-                    fixSurrogateContraction(value2);
-                    multiChars.append(value2);
-                }
+                value2 = getChar(line, position);
             }
+
             if (RECORDING_CHARS) {
-                if (multiChars.length() > 1) {
-                    multiTable.put(multiChars.toString(), "");
-                }
-                found.set(value);
-                for (int i = 1; i < multiChars.length(); ++i) {
-                    found.set(multiChars.charAt(i));
-                }
+                found.addAll(multiChars.toString());
             }
             if (!fullData && RECORDING_DATA) {
                 if (value == 0 || value == '\t' || value == '\n' || value == '\r'
@@ -1522,141 +1313,69 @@ Compat:
                     }
                 }
             }
-            if (ce2 != TERMINATOR) { // have expanding character!
-                // put list into the expanding table
-                // use a temporary stack to get them in reverse order
-                tempStack.push(ce);
-                tempStack.push(ce2);
-                // set collationElement to exception value, plus index
-                ce = EXPANDING_MASK | expandingTable.getTop();
-                while (true) {
-                    ce2 = getCEFromLine(value, line, position, record, wasImplicitLeadPrimary);
-                    if (ce2 == TERMINATOR) break;
-                    tempStack.push(ce2);
-                } 
-                // push onto expanding table, now in reverse order
-                while (!tempStack.isEmpty()) expandingTable.push(tempStack.pop());
-                expandingTable.push(TERMINATOR);
-            }
             
-            //if (value == 0xd801) System.out.print("DEBUG: " + line);
-            	
-            // assign CE(s) to char(s)
-            if (multiChars.length() > 0) {
-                contractingTable.put(multiChars.toString(), new Integer(ce));
-                if (collationElements[value] == UNSUPPORTED) {
-                    collationElements[value] = CONTRACTING; // mark special
-                } else if (collationElements[value] != CONTRACTING) {
-                    // move old value to contracting table!
-                    contractingTable.put(String.valueOf(value), new Integer(collationElements[value]));
-                    collationElements[value] = CONTRACTING; // signal we must look up in table
-                }
-            } else if (collationElements[value] == CONTRACTING) {
-                // must add old value to contracting table!
-                contractingTable.put(String.valueOf(value), new Integer(ce));
-            } else {
-                collationElements[value] = ce; // normal
-            }
-        //} catch (Exception e) {
-          //  throw new IllegalArgumentException("Malformed line: " + inputLine + "\n " 
-            //  + e.getClass().getName() + ": " + e.getMessage());
+            tempStack.clear();
+            tempStack.push(ce);
+            
+            while (ce2 != TERMINATOR) {
+                tempStack.push(ce2);
+                ce2 = getCEFromLine(value, line, position, record, wasImplicitLeadPrimary);
+                if (ce2 == TERMINATOR) break;
+            } 
+            
+            ucaData.add(multiChars, tempStack);
+            
         } catch (RuntimeException e) {
             System.out.println("Error on line: " + inputLine);
             throw e;
         }
     }
     
-    private void fixSurrogateContraction(char ch) {
-        //if (DEBUGCHAR) System.out.println(Utility.hex(ch) + ": " + line.substring(0, position[0]) + "|" + line.substring(position[0]));            
-        if (ch == NOT_A_CHAR || !UTF16.isLeadSurrogate(ch)) return;
-        String chs = String.valueOf(ch);
-        Object probe = contractingTable.get(chs);
-        if (probe != null) return;
-        contractingTable.put(chs, new Integer(0));
-    }
-    
+    /*
     private void concat(int[] ces1, int[] ces2) {
         
     }
-    
-    private void add(String source, int[] ces, int ceLen) {
-        
-        int ce;
-        if (ceLen < 1) {
-            throw new IllegalArgumentException("CE too short: " + ceLen);
-        } else if (ceLen == 1) {
-            ce = ces[0];
-        } else {
-            ce = EXPANDING_MASK | expandingTable.getTop();
-            for (int i = 0; i < ceLen; ++i) {
-                expandingTable.push(ces[i]);
-            }
-        }
-        
-        // assign CE(s) to char(s)
-        int value = source.charAt(0);
-        //if (value == 0x10000) System.out.print("DEBUG2: " + source);
-            	        
-        if (source.length() > 0) {
-            contractingTable.put(source.toString(), new Integer(ce));
-            if (collationElements[value] == UNSUPPORTED) {
-                collationElements[value] = CONTRACTING; // mark special
-            } else if (collationElements[value] != CONTRACTING) {
-                // move old value to contracting table!
-                contractingTable.put(String.valueOf(value), new Integer(collationElements[value]));
-                collationElements[value] = CONTRACTING; // signal we must look up in table
-            }
-        } else if (collationElements[value] == CONTRACTING) {
-            // must add old value to contracting table!
-            contractingTable.put(source, new Integer(ce));
-        } else {
-            collationElements[source.charAt(0)] = ce; // normal
-        }
-    }
+    */
     
     /**
      * Checks the internal tables corresponding to the UCA data.
      */
     private void cleanup() {
         
-        // at this point, we have to guarantee that the contractingTable is CLOSED
-        // e.g. if a substring of length n is in the table, then the first n-1 characters
-        // are also!!
+        ucaData.checkConsistency();
+
+        Map missingStrings = new HashMap();
+        Map tempMap = new HashMap();
         
-        
-/*
-0FB2 0F71 ; [.124E.0020.0002.0FB2][.125F.0020.0002.0F71] # TIBETAN SUBJOINED LETTER RA + TIBETAN VOWEL SIGN AA
-0FB3 0F71 ; [.1250.0020.0002.0FB3][.125F.0020.0002.0F71] # TIBETAN SUBJOINED LETTER LA + TIBETAN VOWEL SIGN AA
-        int[] temp1 = int[20];
-        int[] temp2 = int[20];
-        int[] temp3 = int[20];
-        getCEs("\u0fb2", true, temp1);
-        getCEs("\u0fb3", true, temp2);
-        getCEs("\u0f71", true, temp3);
-        add("\u0FB2\u0F71", concat(temp1, temp3));
-*/
-        
-        Hashtable missingStrings = new Hashtable();
-        
-        int[] temp1 = new int[20];
-        Enumeration enum = contractingTable.keys();
-        while (enum.hasMoreElements()) {
-            String sequence = (String)enum.nextElement();
+        Iterator enum = ucaData.getContractions();
+        while (enum.hasNext()) {
+            String sequence = (String)enum.next();
             //System.out.println("Contraction: " + Utility.hex(sequence));
             for (int i = sequence.length()-1; i > 0; --i) {
                 String shorter = sequence.substring(0,i);
-                Object probe = contractingTable.get(shorter);
-                if (probe == null) {
-                    int len = getCEs(shorter, true, temp1);
-                    if (false) System.out.println("WARNING: CLOSING: " + UCD.make().getCodeAndName(shorter) + " => " + ceToString(temp1, len));
-                    add(shorter, temp1, len);
+                if (!ucaData.contractionTableContains(shorter)) {
+                    IntStack tempStack = new IntStack(1);
+                    getCEs(shorter, true, tempStack);
+                    if (false) System.out.println("WARNING: CLOSING: " + ucd.getCodeAndName(shorter)
+                        + " => " + CEList.toString(tempStack));
+                    tempMap.put(shorter, tempStack);
                     // missingStrings.put(shorter,"");
                     // collationElements[sequence.charAt(0)] = UNSUPPORTED; // nuke all bad values
                 }
             }
         }
         
-        enum = missingStrings.keys();
+        // now add them. We couldn't before because we were iterating over it.
+        
+        enum = tempMap.keySet().iterator();
+        while (enum.hasNext()) {
+            String shorter = (String) enum.next();
+            IntStack tempStack = (IntStack) tempMap.get(shorter);
+            ucaData.add(shorter, tempStack);
+        }
+        
+        
+        enum = missingStrings.keySet().iterator();
         if (missingStrings.size() != 0) {
             /**
             while (enum.hasMoreElements()) {
@@ -1666,26 +1385,30 @@ Compat:
             }
             */
             String errorMessage = "";
-            while (enum.hasMoreElements()) {
-                String missing = (String)enum.nextElement();
+            while (enum.hasNext()) {
+                String missing = (String)enum.next();
                 if (errorMessage.length() != 0) errorMessage += ", ";
                 errorMessage += "\"" + missing + "\"";
             }
             throw new IllegalArgumentException("Contracting table not closed! Missing " + errorMessage);
         }
-        
+
         //fixlater;
         variableLowCE = variableLow << 16;
         variableHighCE = (variableHigh << 16) | 0xFFFF; // turn on bottom bits
         
-        hangulHackBottom = collationElements[0x1100] & 0xFFFF0000; // remove secondaries & tertiaries
-        hangulHackTop = collationElements[0x11F9] | 0xFFFF; // bump up secondaries and tertiaries
-        if (SHOW_STATS) System.out.println("\tHangul Hack: " + Utility.hex(hangulHackBottom) + ", " + Utility.hex(hangulHackTop));
+        //int hangulHackBottom;
+        //int hangulHackTop;
+        
+        //hangulHackBottom = collationElements[0x1100] & 0xFFFF0000; // remove secondaries & tertiaries
+        //hangulHackTop = collationElements[0x11F9] | 0xFFFF; // bump up secondaries and tertiaries
+        //if (SHOW_STATS) System.out.println("\tHangul Hack: " + Utility.hex(hangulHackBottom) + ", " + Utility.hex(hangulHackTop));
         
         // show some statistics
         if (SHOW_STATS) System.out.println("\tcount1: " + count1);
         if (SHOW_STATS) System.out.println("\tcount2: " + max2);
         if (SHOW_STATS) System.out.println("\tcount3: " + max3);
+        if (SHOW_STATS) System.out.println("\tcontractions: " + ucaData.getContractionCount());
         
         if (SHOW_STATS) System.out.println("\tMIN1/MAX1: " + Utility.hex(MIN1) + "/" + Utility.hex(MAX1));
         if (SHOW_STATS) System.out.println("\tMIN2/MAX2: " + Utility.hex(MIN2) + "/" + Utility.hex(MAX2));
@@ -1912,7 +1635,7 @@ Compat:
     /**
      * Used for checking data file integrity
      */
-    private Hashtable uniqueTable = new Hashtable();
+    private Map uniqueTable = new HashMap();
     
     /**
      * Used for checking data file integrity
diff --git a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
index 669dda374b0..3a1d8e83d2d 100644
--- a/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteCollationData.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $ 
-* $Date: 2002/06/13 21:14:05 $ 
-* $Revision: 1.18 $
+* $Date: 2002/06/15 02:47:12 $ 
+* $Revision: 1.19 $
 *
 *******************************************************************************
 */
@@ -31,9 +31,12 @@ import com.ibm.text.UCD.UCD_Types;
 import com.ibm.text.utility.*;
 import com.ibm.text.UCD.Normalizer;
 
-public class WriteCollationData implements UCD_Types {
+public class WriteCollationData implements UCD_Types, UCA_Types {
 	
 	static final boolean DEBUG = false;
+	static final boolean DEBUG_SHOW_ITERATION = true;
+	
+	
 	
     public static final String copyright = 
       "Copyright (C) 2000, IBM Corp. and others. All Rights Reserved.";
@@ -289,7 +292,21 @@ public class WriteCollationData implements UCD_Types {
     
     
     static void writeConformance(String filename, byte option, boolean shortPrint)  throws IOException {
-        UCD ucd30 = UCD.make("3.0.0");
+        //UCD ucd30 = UCD.make("3.0.0");
+        
+/*
+U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
+ => U+00DC LATIN CAPITAL LETTER U WITH DIAERESIS, U+0304 COMBINING MACRON
+*/
+        String[] testList = {"\uF934", "U", "U\u0308", "\u00DC", "\u00DC\u0304", "U\u0308\u0304"};
+        for (int jj = 0; jj < testList.length; ++jj) {
+            String t = testList[jj];
+            System.out.println(ucd.getCodeAndName(t));
+            String test = collator.getSortKey(t, UCA.NON_IGNORABLE);
+            System.out.println("Decomp: " + collator.toString(test));
+            test = collator.getSortKey(t, UCA.NON_IGNORABLE, false);
+            System.out.println("No Dec: " + collator.toString(test));
+        }
         
         PrintWriter log = Utility.openPrintWriter(filename + (shortPrint ? "_SHORT" : "") + ".txt", true, false);
         if (!shortPrint) log.write('\uFEFF');
@@ -297,9 +314,39 @@ public class WriteCollationData implements UCD_Types {
         System.out.println("Sorting");
         int counter = 0;
         
-        for (int i = 0; i <= 0x10FFFF; ++i) {
+        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, null);
+        cc.enableSamples();
+        UnicodeSet found2 = new UnicodeSet();
+        
+        while (true) {
+            String s = cc.next();
+            if (s == null) break;
+            
+            found2.addAll(s);
+            
+            if (DEBUG_SHOW_ITERATION) {
+                int cp = UTF16.charAt(s, 0);
+                if (cp == 0x220 || !ucd.isAssigned(cp) || ucd.isCJK_BASE(cp)) {
+                    System.out.println(ucd.getCodeAndName(s));
+                }
+            }
             Utility.dot(counter++);
-            if (!ucd.isRepresented(i)) continue;
+            addStringX(s, option);
+            // TODO: add other accents with Cyrillic
+        }
+        
+        UnicodeSet found = collator.found;
+        if (!found2.containsAll(found2)) {
+            System.out.println("In both: " + new UnicodeSet(found).retainAll(found2).toPattern(true));
+            System.out.println("In UCA but not iteration: " + new UnicodeSet(found).removeAll(found2).toPattern(true));
+            System.out.println("In iteration but not UCA: " + new UnicodeSet(found2).removeAll(found).toPattern(true));
+            throw new IllegalArgumentException("Inconsistent data");
+            
+        }
+        
+        /*
+        for (int i = 0; i <= 0x10FFFF; ++i) {
+            if (!ucd.isAssigned(i)) continue;
             addStringX(UTF32.valueOf32(i), option);
         }
         
@@ -318,15 +365,6 @@ public class WriteCollationData implements UCD_Types {
             addStringX(s, option);
         }
         
-        for (int i = 0; ; ++i) { // add first unallocated character
-            if (!ucd.isAssigned(i)) {
-                String s = UTF32.valueOf32(i);
-                Utility.fixDot();
-                System.out.println("Adding: " + Utility.hex(s));
-                addStringX(s, option);
-                break;
-            }
-        }
         
         
         for (int i = 0; i < extraConformanceRanges.length; ++i) {
@@ -343,6 +381,7 @@ public class WriteCollationData implements UCD_Types {
             addStringX(end-1, option);
             addStringX(end, option);
         }
+        */
         
         Utility.fixDot();
         System.out.println("Total: " + sortedD.size());
@@ -364,12 +403,12 @@ public class WriteCollationData implements UCD_Types {
             //String status = key.equals(lastKey) ? "*" : "";
             //lastKey = key;
             //log.println(source);
+            char extra = source.charAt(source.length()-1);
             String clipped = source.substring(0, source.length()-1);
-            String stren = source.substring(source.length()-1);
             if (!shortPrint) {
                 log.print(Utility.hex(source));
                 log.print(
-                    ";\t#" + ucd.getName(clipped) + "\t" + UCA.toString(key));
+                    ";\t# " + (extra != LOW_ACCENT ? extra : '.') + " " + ucd.getName(clipped, SHORT) + "\t" + UCA.toString(key));
             } else {
                 log.print(source + "\t" + Utility.hex(clipped));
             }
@@ -384,13 +423,15 @@ public class WriteCollationData implements UCD_Types {
     static void addStringX(int x, byte option) {
         addStringX(UTF32.valueOf32(x), option);
     }
+    
+    static final char LOW_ACCENT = '\u0325';
    
     static void addStringX(String s, byte option) {
         addStringY(s + 'a', option);
         addStringY(s + 'A', option);
         addStringY(s + '�', option);
         addStringY(s + 'b', option);
-        addStringY(s + '\u0325', option);
+        addStringY(s + LOW_ACCENT, option);
         addStringY(s + '!', option);
     }
     
@@ -527,7 +568,7 @@ public class WriteCollationData implements UCD_Types {
             
             if (!arraysMatch(kenCes, kenLen, markCes, markLen)) {
                 int kenCLen = fixCompatibilityCE(s, true, kenComp, true);
-                String comp = collator.ceToString(kenComp, kenCLen);
+                String comp = CEList.toString(kenComp, kenCLen);
                 
                 if (arraysMatch(kenCes, kenLen, kenComp, kenCLen)) {
                     forLater.put((char)(COMPRESSED | type) + s, comp);
@@ -567,10 +608,10 @@ public class WriteCollationData implements UCD_Types {
             String comp = (String)forLater.get(key);
             
             int kenLen = collator.getCEs(s, decompType, kenCes);
-            String kenStr = collator.ceToString(kenCes, kenLen);
+            String kenStr = CEList.toString(kenCes, kenLen);
             
             int markLen = fixCompatibilityCE(s, true, markCes, false);
-            String markStr = collator.ceToString(markCes, markLen);
+            String markStr = CEList.toString(markCes, markLen);
             
             if ((type & COMPRESSED) != 0) {
                 log.println("COMPRESSED #" + (++count) + ": " + ucd.getCodeAndName(s));
@@ -589,7 +630,7 @@ public class WriteCollationData implements UCD_Types {
                     log.println("NFD       : " + ucd.getCodeAndName(nfd));
                 }
                 //kenCLen = collator.getCEs(decomp, true, kenComp);
-                //log.println("decomp ce: " + collator.ceToString(kenComp, kenCLen));                   
+                //log.println("decomp ce: " + CEList.toString(kenComp, kenCLen));                   
             }
             log.println();
         }
@@ -785,7 +826,7 @@ public class WriteCollationData implements UCD_Types {
             
             if (s.length() > 1) {
                 diLog.println(Utility.hex(s, " ")
-                    + ";\t #" + collator.ceToString(ces, len)
+                    + ";\t #" + CEList.toString(ces, len)
                     + " ( " + s + " )"
                     + " " + ucd.getName(s));
             }
@@ -859,7 +900,7 @@ public class WriteCollationData implements UCD_Types {
                 ccc = UTF32.char32At(s,kk);
                 byte cat = ucd.getCategory(ccc);
                 if (cat == Cf || cat == Cc || cat == Zs || cat == Zl || cat == Zp) {
-                    sortedCodes.add(UCA.ceToString(ces, lenArray[0]) + "\t" + ucd.getCodeAndName(s));
+                    sortedCodes.add(CEList.toString(ces, lenArray[0]) + "\t" + ucd.getCodeAndName(s));
                     break;
                 }
             }
@@ -882,7 +923,7 @@ public class WriteCollationData implements UCD_Types {
                 if (collator.isVariable(ce)) haveMixture |= 1;
                 else haveMixture |= 2;
                 if (haveMixture == 3) {
-                    mixedCEs.add(UCA.ceToString(ces, len) + "\t" + ucd.getCodeAndName(s));
+                    mixedCEs.add(CEList.toString(ces, len) + "\t" + ucd.getCodeAndName(s));
                 }
             }
         }
@@ -1030,7 +1071,7 @@ public class WriteCollationData implements UCD_Types {
                 ccc = UTF32.char32At(s,kk);
                 byte cat = ucd.getCategory(ccc);
                 if (cat == Cf || cat == Cc || cat == Zs || cat == Zl || cat == Zp) {
-                    sortedCodes.add(UCA.ceToString(ces, lenArray[0]) + "\t" + ucd.getCodeAndName(s));
+                    sortedCodes.add(CEList.toString(ces, lenArray[0]) + "\t" + ucd.getCodeAndName(s));
                     break;
                 }
             }
@@ -1053,7 +1094,7 @@ public class WriteCollationData implements UCD_Types {
                 if (collator.isVariable(ce)) haveMixture |= 1;
                 else haveMixture |= 2;
                 if (haveMixture == 3) {
-                    mixedCEs.add(UCA.ceToString(ces, len) + "\t" + ucd.getCodeAndName(s));
+                    mixedCEs.add(CEList.toString(ces, len) + "\t" + ucd.getCodeAndName(s));
                 }
             }
         }
@@ -1130,8 +1171,8 @@ public class WriteCollationData implements UCD_Types {
             + "\t" + head
             //+ "\t" + Utility.hex(oldWeight)
             //+ " => " + Utility.hex(newWeight)
-            + "\t" + collator.ceToString(ces, len)
-            + (doNew ? " => " + collator.ceToString(newCes, newLen) : "")
+            + "\t" + CEList.toString(ces, len)
+            + (doNew ? " => " + CEList.toString(newCes, newLen) : "")
             + "\t( " + src + " )"
             + "\t" + ucd.getName(src)
             );
@@ -1198,7 +1239,7 @@ public class WriteCollationData implements UCD_Types {
         
         if (false) {
         int len2 = collator.getCEs("\u2474", true, ces);
-        System.out.println(UCA.ceToString(ces, len2));
+        System.out.println(CEList.toString(ces, len2));
 
         String a = collator.getSortKey("a");
         String b = collator.getSortKey("A");
@@ -1442,9 +1483,9 @@ F900..FAFF; CJK Compatibility Ideographs
            
             
             if (false) System.out.println(
-                collator.ceToString(lastCE) + " " 
-                + collator.ceToString(ce) + " " 
-                + collator.ceToString(nextCE) + " " 
+                CEList.toString(lastCE) + " " 
+                + CEList.toString(ce) + " " 
+                + CEList.toString(nextCE) + " " 
                 + ucd.getCodeAndName(chr)
                 );
             
@@ -1513,7 +1554,7 @@ F900..FAFF; CJK Compatibility Ideographs
             */
 
             if (chr.equals("\u2F00")) {
-                System.out.println(UCA.ceToString(ces, len));
+                System.out.println(CEList.toString(ces, len));
             }
             
             // There are double-CEs, so we have to know what the length of the first bit is.
@@ -1561,7 +1602,7 @@ F900..FAFF; CJK Compatibility Ideographs
                 if (expansion.length() > 0) log.print(" / " + quoteOperand(expansion));
                 if (option == WITH_NAMES) {
                     log.print("\t# " 
-                        + collator.ceToString(ces, len) + " " 
+                        + CEList.toString(ces, len) + " " 
                         + ucd.getCodeAndName(chr));
                     if (expansion.length() > 0) log.print(" / " + Utility.hex(expansion));
                 }
@@ -1801,7 +1842,7 @@ F900..FAFF; CJK Compatibility Ideographs
                     
                     // we failed completely. Print error message, and bail
                     
-                    System.out.println("No back map for " + collator.ceToString(ces[i])
+                    System.out.println("No back map for " + CEList.toString(ces[i])
                         + " from " + CEList.toString(ces, len));
                     System.out.println("\t" + ucd.getCodeAndName(chr)
                         + " => " + ucd.getCodeAndName(nfkdNew.normalize(chr))
@@ -2126,6 +2167,7 @@ F900..FAFF; CJK Compatibility Ideographs
                 continue;
             }
             canIt.setSource(key);
+            
             boolean first = true;
             while (true) {
                 String s = canIt.next();
@@ -2134,9 +2176,6 @@ F900..FAFF; CJK Compatibility Ideographs
                 if (contentsForCanonicalIteration.contains(s)) continue;
                 if (additionalSet.contains(s)) continue;
                 
-                if (s.equals("\u01EC")) {
-                    System.out.println("01ec");
-                }
                 
                 // Skip anything that is not FCD.
                 if (!NFD.isFCD(s)) continue;
@@ -2234,7 +2273,7 @@ F900..FAFF; CJK Compatibility Ideographs
         log.println("#  - Differs from previous version in that MAX value was introduced at 1F.");
         log.println("#    All tertiary values are shifted down by 1, filling the gap at 7!");
         
-        int firstImplicit = getImplicitPrimary(UCA.CJK_BASE) >>> 24;
+        int firstImplicit = getImplicitPrimary(CJK_BASE) >>> 24;
         int lastImplicit = getImplicitPrimary(0x10FFFF) >>> 24;
         log.println("[FIRST_IMPLICIT= " + Utility.hex(firstImplicit) + "]");
         log.println("[LAST_IMPLICIT= " + Utility.hex(lastImplicit) + "]");
@@ -2285,13 +2324,15 @@ F900..FAFF; CJK Compatibility Ideographs
                 int sec = UCA.getSecondary(ces[q]); 
                 int ter = UCA.getTertiary(ces[q]);
                 
-                oldStr.append(UCA.ceToString(ces[q]));// + "," + Integer.toString(ces[q],16);
+                oldStr.append(CEList.toString(ces[q]));// + "," + Integer.toString(ces[q],16);
                 
                 // special treatment for unsupported!
                 
                 if (UCA.isImplicitLeadPrimary(pri)) {
+                    System.out.println("DEBUG: " + CEList.toString(ces, len) 
+                        + ", Current: " + q + ", " + ucd.getCodeAndName(chr));
                     ++q;
-                    oldStr.append(UCA.ceToString(ces[q]));// + "," + Integer.toString(ces[q],16);
+                    oldStr.append(CEList.toString(ces[q]));// + "," + Integer.toString(ces[q],16);
                 
                     int pri2 = UCA.getPrimary(ces[q]);
                     // get old code point
@@ -2301,7 +2342,7 @@ F900..FAFF; CJK Compatibility Ideographs
                     // double check results!
                     
                     int[] testImplicit = new int[2];
-                    UCA.CodepointToImplicit(cp, testImplicit);
+                    collator.CodepointToImplicit(cp, testImplicit);
                     boolean gotError = pri != testImplicit[0] || pri2 != testImplicit[1];
                     if (gotError) {
                     	System.out.println("ERROR");
@@ -2360,7 +2401,7 @@ F900..FAFF; CJK Compatibility Ideographs
             }
             if (nonePrinted) {
                 log.print("[,,]");
-                oldStr.append(UCA.ceToString(0));
+                oldStr.append(CEList.toString(0));
             }
             longLog.print("    # " + oldStr + " # " + ucd.getName(UTF16.charAt(chr, 0)));
             log.println();
@@ -2386,7 +2427,7 @@ F900..FAFF; CJK Compatibility Ideographs
         
         boolean lastOne = false;
         for (int i = 0; i < 0x10FFFF; ++i) {
-            boolean thisOne = UCA.isCJK(i) || UCA.isCJK_AB(i);
+            boolean thisOne = ucd.isCJK_BASE(i) || ucd.isCJK_AB(i);
             if (thisOne != lastOne) {
                 summary.println("# Implicit Cusp: CJK=" + lastOne + ": " + Utility.hex(i-1) + " => " + Utility.hex(0xFFFFFFFFL & getImplicitPrimary(i-1)));
                 summary.println("# Implicit Cusp: CJK=" + thisOne + ": " + Utility.hex(i) + " => " + Utility.hex(0xFFFFFFFFL & getImplicitPrimary(i)));
@@ -2425,7 +2466,7 @@ F900..FAFF; CJK Compatibility Ideographs
             summary.print("# " + Utility.hex(i) + ": (" + Utility.hex(newval) + ") "
                 + Utility.hex(sampleEq[i]) + " ");
             for (int q = 0; q < len; ++q) {
-                summary.print(UCA.ceToString(ces[q]));
+                summary.print(CEList.toString(ces[q]));
             }
             summary.println(" " + ucd.getName(sampleEq[i]));
         }
@@ -2499,24 +2540,24 @@ F900..FAFF; CJK Compatibility Ideographs
 	*/
 static int swapCJK(int i) {
     	
-	if (i >= UCA.CJK_BASE) {
-		if (i < UCA.CJK_LIMIT)				return i - UCA.CJK_BASE;
+	if (i >= CJK_BASE) {
+		if (i < CJK_LIMIT)				return i - CJK_BASE;
 			
-		if (i < UCA.CJK_COMPAT_USED_BASE)	return i + NON_CJK_OFFSET;
+		if (i < CJK_COMPAT_USED_BASE)	return i + NON_CJK_OFFSET;
     		
-		if (i < UCA.CJK_COMPAT_USED_LIMIT)	return i - UCA.CJK_COMPAT_USED_BASE
-												+ (UCA.CJK_LIMIT - UCA.CJK_BASE);
-		if (i < UCA.CJK_B_BASE)				return i + NON_CJK_OFFSET;
+		if (i < CJK_COMPAT_USED_LIMIT)	return i - CJK_COMPAT_USED_BASE
+												+ (CJK_LIMIT - CJK_BASE);
+		if (i < CJK_B_BASE)				return i + NON_CJK_OFFSET;
     		
-		if (i < UCA.CJK_B_LIMIT)			return i; // non-BMP-CJK
+		if (i < CJK_B_LIMIT)			return i; // non-BMP-CJK
     		
 		return i + NON_CJK_OFFSET;	// non-CJK
 	}
-	if (i < UCA.CJK_A_BASE)					return i + NON_CJK_OFFSET;
+	if (i < CJK_A_BASE)					return i + NON_CJK_OFFSET;
 		
-	if (i < UCA.CJK_A_LIMIT)				return i - UCA.CJK_A_BASE
-												+ (UCA.CJK_LIMIT - UCA.CJK_BASE) 
-												+ (UCA.CJK_COMPAT_USED_LIMIT - UCA.CJK_COMPAT_USED_BASE);
+	if (i < CJK_A_LIMIT)				return i - CJK_A_BASE
+												+ (CJK_LIMIT - CJK_BASE) 
+												+ (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);
     return i + NON_CJK_OFFSET; // non-CJK
 }
     
@@ -2642,14 +2683,14 @@ static int swapCJK(int i) {
             oldPrimary = newPrimary;
     	}
     	
-        showImplicit("# First CJK", UCA.CJK_BASE);
-        showImplicit("# Last CJK", UCA.CJK_LIMIT-1);
-        showImplicit("# First CJK-compat", UCA.CJK_COMPAT_USED_BASE);
-        showImplicit("# Last CJK-compat", UCA.CJK_COMPAT_USED_LIMIT-1);
-        showImplicit("# First CJK_A", UCA.CJK_A_BASE);
-        showImplicit("# Last CJK_A", UCA.CJK_A_LIMIT-1);
-        showImplicit("# First CJK_B", UCA.CJK_B_BASE);
-        showImplicit("# Last CJK_B", UCA.CJK_B_LIMIT-1);
+        showImplicit("# First CJK", CJK_BASE);
+        showImplicit("# Last CJK", CJK_LIMIT-1);
+        showImplicit("# First CJK-compat", CJK_COMPAT_USED_BASE);
+        showImplicit("# Last CJK-compat", CJK_COMPAT_USED_LIMIT-1);
+        showImplicit("# First CJK_A", CJK_A_BASE);
+        showImplicit("# Last CJK_A", CJK_A_LIMIT-1);
+        showImplicit("# First CJK_B", CJK_B_BASE);
+        showImplicit("# Last CJK_B", CJK_B_LIMIT-1);
         showImplicit("# First Other Implicit", 0);
         showImplicit("# Last Other Implicit", 0x10FFFF);
         
@@ -2667,9 +2708,9 @@ static int swapCJK(int i) {
         		
         		// separate the three groups
         		
-        		if (UCA.isCJK(i) || UCA.CJK_COMPAT_USED_BASE <= i && i < UCA.CJK_COMPAT_USED_LIMIT) {
+        		if (ucd.isCJK_BASE(i) || CJK_COMPAT_USED_BASE <= i && i < CJK_COMPAT_USED_LIMIT) {
         			if (batch != 0) continue;
-        		} else if (UCA.isCJK_AB(i)) {
+        		} else if (ucd.isCJK_AB(i)) {
         			if (batch != 1) continue;
         		} else if (batch != 2) continue;
         		
@@ -2993,7 +3034,7 @@ static int swapCJK(int i) {
          
         for (char ch = 0; ch < 0xFFFF; ++ch) {
             byte type = collator.getCEType(ch);
-            if (type < UCA.FIXED_CE) {
+            if (type < FIXED_CE) {
                 int len = collator.getCEs(String.valueOf(ch), true, ces);
                 int primary = UCA.getPrimary(ces[0]);
                 if (primary < variableHigh) continue;
@@ -3088,36 +3129,22 @@ static int swapCJK(int i) {
         System.out.println("Sorting");
         
         for (int i = 0; i <= 0xFFFF; ++i) {
-            if (EXCLUDE_UNSUPPORTED && !collator.found.get(i)) continue;
+            if (EXCLUDE_UNSUPPORTED && !collator.found.contains(i)) continue;
             if (0xD800 <= i && i <= 0xF8FF) continue; // skip surrogates and private use
             //if (0xA000 <= c && c <= 0xA48F) continue; // skip YI
             addString(UTF32.valueOf32(i), option);
         }
         
-        Hashtable multiTable = collator.getContracting();
-        Enumeration enum = multiTable.keys();
-        while (enum.hasMoreElements()) {
-            addString((String)enum.nextElement(), option);
-        }
+
+        UCA.UCAContents cc = collator.getContents(UCA.FIXED_CE, null);
+        cc.enableSamples();
         
-        for (int i = 0; i < extraConformanceTests.length; ++i) { // put in sample non-characters
-            addString(extraConformanceTests[i], option);
+        while (true) {
+            String s = cc.next();
+            if (s == null) break;
+            addString(s, option);
         }
-        
-        for (int i = 0; i < extraConformanceRanges.length; ++i) {
-            int start = extraConformanceRanges[i][0];
-            int end = extraConformanceRanges[i][1];
-            int increment = ((end - start + 1) / 303) + 1;
-            //System.out.println("Range: " + start + ", " + end + ", " + increment);
-            addString(start, option);
-            for (int j = start+1; j < end-1; j += increment) {
-                addString(j, option);
-                addString(j+1, option);
-            }
-            addString(end-1, option);
-            addString(end, option);
-        }
-        
+                
         System.out.println("Total: " + sortedD.size());
         Iterator it;
         
diff --git a/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java b/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java
index 42de5a19ada..e1bbb9cca26 100644
--- a/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java
+++ b/tools/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java
@@ -5,12 +5,14 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteHTMLCollation.java,v $ 
-* $Date: 2002/05/31 01:41:03 $ 
-* $Revision: 1.7 $
+* $Date: 2002/06/15 02:47:12 $ 
+* $Revision: 1.8 $
 *
 *******************************************************************************
 */
 
+WARNING: OLD FILE. DON"T COMPILE.
+
 package com.ibm.text.UCA;
 
 import java.util.*;
@@ -21,6 +23,7 @@ import com.ibm.text.UCD.*;
 import com.ibm.text.utility.*;
 
 public class WriteHTMLCollation implements UCD_Types {
+
     public static final String copyright = 
       "Copyright (C) 2000, IBM Corp. and others. All Rights Reserved.";
       
@@ -74,8 +77,8 @@ public class WriteHTMLCollation implements UCD_Types {
         */
         
         // DO FOLLOWING
-        writeConformance("CollationTest_NON_IGNORABLE.txt", UCA.NON_IGNORABLE);
-        writeConformance("CollationTest_SHIFTED.txt", UCA.SHIFTED);
+        //writeConformance("CollationTest_NON_IGNORABLE.txt", UCA.NON_IGNORABLE);
+        //writeConformance("CollationTest_SHIFTED.txt", UCA.SHIFTED);
        
         // SKIP BELOW
         if (true) return;
@@ -178,7 +181,7 @@ public class WriteHTMLCollation implements UCD_Types {
         }
         return result.toString();
     }
-    
+    /*
     static void writeConformance(String filename, byte option)  throws IOException {
         PrintWriter log = Utility.openPrintWriter(filename);
 
@@ -193,6 +196,7 @@ public class WriteHTMLCollation implements UCD_Types {
             addStringX(c, option);
         }
         
+
         Hashtable multiTable = collator.getContracting();
         Enumeration enum = multiTable.keys();
         while (enum.hasMoreElements()) {
@@ -248,7 +252,8 @@ public class WriteHTMLCollation implements UCD_Types {
         sortedD.clear();
         System.out.println("Done");
     }
-
+    */
+    
     static void addStringX(int x, byte option) {
         addStringX(String.valueOf((char)x), option);
     }
@@ -382,7 +387,7 @@ public class WriteHTMLCollation implements UCD_Types {
             
             if (!arraysMatch(kenCes, kenLen, markCes, markLen)) {
                 int kenCLen = fixCompatibilityCE(s, true, kenComp, true);
-                String comp = collator.ceToString(kenComp, kenCLen);
+                String comp = CEList.toString(kenComp, kenCLen);
                 
                 if (arraysMatch(kenCes, kenLen, kenComp, kenCLen)) {
                     forLater.put((char)(COMPRESSED | type) + s, comp);
@@ -422,10 +427,10 @@ public class WriteHTMLCollation implements UCD_Types {
             String comp = (String)forLater.get(key);
             
             int kenLen = collator.getCEs(s, decompType, kenCes);
-            String kenStr = collator.ceToString(kenCes, kenLen);
+            String kenStr = CEList.toString(kenCes, kenLen);
             
             int markLen = fixCompatibilityCE(s, true, markCes, false);
-            String markStr = collator.ceToString(markCes, markLen);
+            String markStr = CEList.toString(markCes, markLen);
             
             if ((type & COMPRESSED) != 0) {
                 log.println("COMPRESSED #" + (++count) + ": " + ucd.getCodeAndName(s));
@@ -444,7 +449,7 @@ public class WriteHTMLCollation implements UCD_Types {
                     log.println("NFD       : " + ucd.getCodeAndName(nfdstr));
                 }
                 //kenCLen = collator.getCEs(decomp, true, kenComp);
-                //log.println("decomp ce: " + collator.ceToString(kenComp, kenCLen));                   
+                //log.println("decomp ce: " + CEList.toString(kenComp, kenCLen));                   
             }
             log.println();
         }
@@ -569,7 +574,7 @@ public class WriteHTMLCollation implements UCD_Types {
         
         {
         int len2 = collator.getCEs("\u2474", true, ces);
-        System.out.println(UCA.ceToString(ces, len2));
+        System.out.println(CEList.toString(ces, len2));
 
         String a = collator.getSortKey("a");
         String b = collator.getSortKey("A");
@@ -640,7 +645,7 @@ public class WriteHTMLCollation implements UCD_Types {
             else if (collator.getTertiary(ce) != collator.getTertiary(lastCE)) relation = "    <<<";
             lastCE = ce;
             if (chr.equals("\u2474")) {
-                System.out.println(UCA.ceToString(ces, len));
+                System.out.println(CEList.toString(ces, len));
             }
             
             // check expansions
@@ -653,7 +658,7 @@ public class WriteHTMLCollation implements UCD_Types {
                     int probe = ces[i];
                     String s = getFromBackMap(backMap, probe);
                     if (s == null) {
-                        System.out.println("No back map for " + collator.ceToString(ces[i])
+                        System.out.println("No back map for " + CEList.toString(ces[i])
                             + ": " + ucd.getCodeAndName(chr));
                         expansion += "[" + Utility.hex(ces[i]) + "]";
                     } else {
@@ -943,7 +948,7 @@ public class WriteHTMLCollation implements UCD_Types {
                 }
                 if (sampleEq[sec] == null) sampleEq[sec] = chr;
                 if (sampleEq[ter] == null) sampleEq[ter] = chr;
-                oldStr.append(UCA.ceToString(ces[q]));// + "," + Integer.toString(ces[q],16);
+                oldStr.append(CEList.toString(ces[q]));// + "," + Integer.toString(ces[q],16);
                 int np = primaryDelta[UCA.getPrimary(ces[q])];
                 hexBytes(np, newPrimary);
                 hexBytes(fixSecondary(UCA.getSecondary(ces[q])), newSecondary);
@@ -968,7 +973,7 @@ public class WriteHTMLCollation implements UCD_Types {
             }
             if (nonePrinted) {
                 log.print("[,,]");
-                oldStr.append(UCA.ceToString(0));
+                oldStr.append(CEList.toString(0));
             }
             log.println("    # " + oldStr + " # " + ucd.getName(chr.charAt(0)));
             lastChr = chr;
@@ -1017,7 +1022,7 @@ public class WriteHTMLCollation implements UCD_Types {
             summary.print("# " + Utility.hex(i) + ": (" + Utility.hex(newval) + ") "
                 + Utility.hex(sampleEq[i]) + " ");
             for (int q = 0; q < len; ++q) {
-                summary.print(UCA.ceToString(ces[q]));
+                summary.print(CEList.toString(ces[q]));
             }
             summary.println(" " + ucd.getName(sampleEq[i]));
         }
@@ -1438,7 +1443,7 @@ public class WriteHTMLCollation implements UCD_Types {
         
         for (int i = 0; i <= 0xFFFF; ++i) {
             char c = (char)i;
-            if (EXCLUDE_UNSUPPORTED && !collator.found.get(c)) continue;
+            if (EXCLUDE_UNSUPPORTED && !collator.found.contains(c)) continue;
             if (0xD800 <= i && i <= 0xF8FF) continue; // skip surrogates and private use
             //if (0xA000 <= c && c <= 0xA48F) continue; // skip YI
             addString(String.valueOf(c), option);
diff --git a/tools/unicodetools/com/ibm/text/UCD/Main.java b/tools/unicodetools/com/ibm/text/UCD/Main.java
index 81222011cdd..0ae1b3c835c 100644
--- a/tools/unicodetools/com/ibm/text/UCD/Main.java
+++ b/tools/unicodetools/com/ibm/text/UCD/Main.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/Main.java,v $
-* $Date: 2002/06/13 21:14:05 $
-* $Revision: 1.15 $
+* $Date: 2002/06/15 02:47:14 $
+* $Revision: 1.16 $
 *
 *******************************************************************************
 */
@@ -63,6 +63,7 @@ public final class Main implements UCD_Types {
             else if (arg.equalsIgnoreCase("diffIgnorable")) VerifyUCD.diffIgnorable();
             else if (arg.equalsIgnoreCase("generateXML")) VerifyUCD.generateXML();
             else if (arg.equalsIgnoreCase("checkSpeed")) VerifyUCD.checkSpeed();
+            else if (arg.equalsIgnoreCase("onetime")) VerifyUCD.oneTime();
             else if (arg.equalsIgnoreCase("verifyNormalizationStability")) VerifyUCD.verifyNormalizationStability();
             
             else if (arg.equalsIgnoreCase("definitionTransliterator")) GenerateHanTransliterator.main(0);
diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD.java b/tools/unicodetools/com/ibm/text/UCD/UCD.java
index aa4c04ca3bd..c533c1b313c 100644
--- a/tools/unicodetools/com/ibm/text/UCD/UCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
-* $Date: 2002/06/13 21:14:05 $
-* $Revision: 1.13 $
+* $Date: 2002/06/15 02:47:13 $
+* $Revision: 1.14 $
 *
 *******************************************************************************
 */
@@ -146,7 +146,7 @@ public final class UCD implements UCD_Types {
      * Get the character names for the code points in a string, separated by ", "
      */
     public String getName(String s, byte style) {
-        if (s.length() == 1) return get(s.charAt(0), true).name;
+        if (s.length() == 1) return getName(s.charAt(0), style);
         StringBuffer result = new StringBuffer();
         int cp;
         for (int i = 0; i < s.length(); i += UTF32.count16(cp)) {
@@ -182,15 +182,15 @@ public final class UCD implements UCD_Types {
     /**
      * Get the name and number (U+xxxx NAME) for a code point
      */
-    public String getCodeAndName(int codePoint) {
-        return getCode(codePoint) + " " + getName(codePoint);
+    public String getCodeAndName(int codePoint, byte type) {
+        return getCode(codePoint) + " " + getName(codePoint, type);
     }
 
     /**
      * Get the name and number (U+xxxx NAME) for the code points in a string,
      * separated by ", "
      */
-    public String getCodeAndName(String s) {
+    public String getCodeAndName(String s, byte type) {
         if (s == null || s.length() == 0) return "NULL";
         if (s.length() == 1) return getCodeAndName(s.charAt(0)); // fast path
         StringBuffer result = new StringBuffer();
@@ -203,6 +203,20 @@ public final class UCD implements UCD_Types {
         return result.toString();
     }
 
+    /**
+     * Get the name and number (U+xxxx NAME) for a code point
+     */
+    public String getCodeAndName(int codePoint) {
+        return getCodeAndName(codePoint, NORMAL);
+    }
+
+    /**
+     * Get the name and number (U+xxxx NAME) for a code point
+     */
+    public String getCodeAndName(String s) {
+        return getCodeAndName(s, NORMAL);
+    }
+
     /**
      * Get the general category
      */
@@ -990,10 +1004,20 @@ to guarantee identifier closure.
             result = getRaw(codePoint);
             if (result == null) {
                 result = UData.UNASSIGNED;
-                if (fixStrings) result.name = "<unassigned-" + Utility.hex(codePoint, 4) + ">";
+                result.name = null; // clean this up, since we reuse UNASSIGNED
+                result.shortName = null;
+                if (fixStrings) {
+                    result.name = "<unassigned-" + Utility.hex(codePoint, 4) + ">";
+                }
             }
-            if (result.shortName != null && result.shortName.length() == 0) {
-                result.shortName = Utility.replace(result.name, UCD_Names.NAME_ABBREVIATIONS);
+            if (fixStrings) {
+                if (result.name == null) {
+                    result.name = "<unassigned-" + Utility.hex(codePoint, 4) + ">";
+                    System.out.println("Warning: fixing name for " + result.name);
+                }
+                if (result.shortName == null) {
+                    result.shortName = Utility.replace(result.name, UCD_Names.NAME_ABBREVIATIONS);
+                }
             }
             return result;
           case 0x3400: // CJK Ideograph Extension A
@@ -1024,6 +1048,8 @@ to guarantee identifier closure.
         result = getRaw(rangeStart);
         if (result == null) {
             result = UData.UNASSIGNED;
+            result.name = null; // clean this up, since we reuse UNASSIGNED
+            result.shortName = null;
             if (fixStrings) {
                 result.name = "<reserved-" + Utility.hex(codePoint, 4) + ">";
                 result.shortName = Utility.replace(result.name, UCD_Names.NAME_ABBREVIATIONS);
@@ -1047,6 +1073,32 @@ to guarantee identifier closure.
         return result;
     }
     
+    // Neither Mapped nor Composite CJK: [\u3400-\u4DB5\u4E00-\u9FA5\U00020000-\U0002A6D6]
+    
+    public static final boolean isCJK_AB(int bigChar) {
+        return (CJK_A_BASE <= bigChar && bigChar < CJK_A_LIMIT
+             || CJK_B_BASE <= bigChar && bigChar < CJK_B_LIMIT);
+    }
+    
+    public static boolean isCJK_BASE(int cp) {
+        return (CJK_BASE <= cp && cp < CJK_LIMIT 
+        || cp == 0xFA0E	// compat characters that don't decompose.
+        || cp == 0xFA0F
+        || cp == 0xFA11
+        || cp == 0xFA13
+        || cp == 0xFA14
+        || cp == 0xFA1F
+        || cp == 0xFA21
+        || cp == 0xFA23
+        || cp == 0xFA24
+        || cp == 0xFA27
+        || cp == 0xFA28
+        || cp == 0xFA29
+        || cp == 0xFA2E
+        || cp == 0xFA2F
+        );
+    }
+    
     // Hangul constants
 
     public static final int
@@ -1108,7 +1160,7 @@ to guarantee identifier closure.
         return 0xFFFF; // no composition
     }
     
-    static boolean isHangulSyllable(int char1) {
+    static public boolean isHangulSyllable(int char1) {
         return SBase <= char1 && char1 < SLimit;
     }
 
diff --git a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
index 05b771caf4e..86fc89bd8f3 100644
--- a/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UCD_Types.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
-* $Date: 2002/05/29 02:01:00 $
-* $Revision: 1.12 $
+* $Date: 2002/06/15 02:47:13 $
+* $Revision: 1.13 $
 *
 *******************************************************************************
 */
@@ -21,8 +21,17 @@ public interface UCD_Types {
     public static final String UCD_DIR = BASE_DIR + "UCD\\";
     public static final String BIN_DIR = BASE_DIR + "BIN\\";
     public static final String GEN_DIR = BASE_DIR + "GEN\\";
-
-
+    
+    public static final int 
+    	CJK_BASE = 0x4E00,
+    	CJK_LIMIT = 0x9FFF+1,
+    	CJK_COMPAT_USED_BASE = 0xFA0E,
+    	CJK_COMPAT_USED_LIMIT = 0xFA2F+1,
+    	CJK_A_BASE = 0x3400,
+    	CJK_A_LIMIT = 0x4DBF+1,
+    	CJK_B_BASE = 0x20000,
+    	CJK_B_LIMIT = 0x2A6DF+1;
+    
     static final byte BINARY_FORMAT = 6; // bumped if binary format of UCD changes
     
     // Unicode Property Types
diff --git a/tools/unicodetools/com/ibm/text/UCD/UData.java b/tools/unicodetools/com/ibm/text/UCD/UData.java
index c5aff42663e..ae16a129eda 100644
--- a/tools/unicodetools/com/ibm/text/UCD/UData.java
+++ b/tools/unicodetools/com/ibm/text/UCD/UData.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UData.java,v $
-* $Date: 2002/06/13 21:14:05 $
-* $Revision: 1.4 $
+* $Date: 2002/06/15 02:47:12 $
+* $Revision: 1.5 $
 *
 *******************************************************************************
 */
@@ -18,7 +18,7 @@ import com.ibm.text.utility.*;
 
 class UData implements UCD_Types {
     String name;
-    String shortName = ""; // cache
+    String shortName; // cache
     String decompositionMapping;
     String simpleUppercase;
     String simpleLowercase;
diff --git a/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java b/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java
index 7f2cdd8e8ac..efb7bf12ce0 100644
--- a/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java
+++ b/tools/unicodetools/com/ibm/text/UCD/VerifyUCD.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/VerifyUCD.java,v $
-* $Date: 2002/06/13 21:14:05 $
-* $Revision: 1.15 $
+* $Date: 2002/06/15 02:47:12 $
+* $Revision: 1.16 $
 *
 *******************************************************************************
 */
@@ -27,6 +27,27 @@ import com.ibm.text.utility.*;
 import java.text.NumberFormat;
 
 public class VerifyUCD implements UCD_Types {
+    
+    static void oneTime() {
+        Default.setUCD();
+        int[] testSet = {0x10000, 'a', 0xE0000, '\u0221'}; // 10000
+        for (int i = 0; i < testSet.length; ++i) {
+            int item = testSet[i];
+            System.out.println(Default.ucd.getCode(item));
+            
+            boolean ass = Default.ucd.isAssigned(item);
+            System.out.println(ass ? " assigned" : " unassigned");
+            ass = Default.ucd.isAllocated(item);
+            System.out.println(ass ? " allocated" : " unallocated");
+            
+            String name = Default.ucd.getName(item, SHORT);
+            System.out.println(" " + name);
+            name = Default.ucd.getName(item);
+            System.out.println(" " + name);
+            
+            System.out.println();
+       }
+    }
 	
 	static final byte NC = UNUSED_CATEGORY;
     
diff --git a/tools/unicodetools/com/ibm/text/utility/IntStack.java b/tools/unicodetools/com/ibm/text/utility/IntStack.java
index 5fdca1f1f9f..401a2dd3b85 100644
--- a/tools/unicodetools/com/ibm/text/utility/IntStack.java
+++ b/tools/unicodetools/com/ibm/text/utility/IntStack.java
@@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/IntStack.java,v $
-* $Date: 2001/09/19 23:33:52 $
-* $Revision: 1.3 $
+* $Date: 2002/06/15 02:47:14 $
+* $Revision: 1.4 $
 *
 *******************************************************************************
 */
@@ -17,30 +17,65 @@ package com.ibm.text.utility;
 // Simple stack mechanism, with push, pop and access
 // =============================================================
 
-public final class IntStack implements Comparable {
+public final class IntStack implements Comparable, Cloneable {
     private int[] values;
     private int top = 0;
+    private int first = 0;
 
     public IntStack(int initialSize) {
         values = new int[initialSize];
     }
+    
+    public IntStack append(IntStack other) {
+        // TODO speed up by copying arrays
+        for (int i = 0; i < other.getTop(); ++i) {
+            push(other.get(i));
+        }
+        return this;
+    }
 
-    public void push(int value) {
+    public IntStack append(int value) {
+        return push(value);
+    }
+
+    public int length() {
+        return top - first;
+    }
+
+    public IntStack push(int value) {
         if (top >= values.length) { // must grow?
             int[] temp = new int[values.length*2];
             System.arraycopy(values,0,temp,0,values.length);
             values = temp;
         }
         values[top++] = value;
+        return this;
     }
 
     public int pop() {
-        if (top > 0) return values[--top];
+        if (top > first) {
+            int result = values[--top];
+            if (top == first && first > 0) {
+                top = first = 0;
+            }
+            return result;
+        }
+        throw new IllegalArgumentException("Stack underflow");
+    }
+
+    public int popFront() {
+        if (top > first) {
+            int result = values[first++];
+            if (top == first) {
+                top = first = 0;
+            }
+            return result;
+        }
         throw new IllegalArgumentException("Stack underflow");
     }
 
     public int get(int index) {
-        if (0 <= index && index < top) return values[index];
+        if (first <= index && index < top) return values[index];
         throw new IllegalArgumentException("Stack index out of bounds");
     }
 
@@ -49,22 +84,24 @@ public final class IntStack implements Comparable {
     }
 
     public boolean isEmpty() {
-        return top == 0;
+        return top - first == 0;
     }
     
     public void clear() {
-        top = 0;
+        top = first = 0;
     }
     
     public int compareTo(Object other) {
         IntStack that = (IntStack) other;
-        int min = top;
-        if (min < that.top) min = that.top;
-        for (int i = 0; i < min; ++i) {
-            int result = values[i] - that.values[i];
+        int myLen = top - first;
+        int thatLen = that.top - that.first;
+        int limit = first + ((myLen < thatLen) ? myLen : thatLen);
+        int delta = that.first - first;
+        for (int i = first; i < limit; ++i) {
+            int result = values[i] - that.values[i + delta];
             if (result != 0) return result;
         }
-        return top - that.top;
+        return myLen - thatLen;
     }
 
     public boolean equals(Object other) {
@@ -73,9 +110,19 @@ public final class IntStack implements Comparable {
 
     public int hashCode() {
         int result = top;
-        for (int i = 0; i < top; ++i) {
+        for (int i = first; i < top; ++i) {
             result = result * 37 + values[i];
         }
         return result;
     }
+    
+    public Object clone() {
+        try {
+            IntStack result = (IntStack) (super.clone());
+            result.values = (int[]) result.values.clone();
+            return result;
+        } catch (CloneNotSupportedException e) {
+            throw new IllegalArgumentException("Will never happen");
+        }
+    }
 }
\ No newline at end of file