ICU-1763 First cut of Normalizer code port from ICU4C

X-SVN-Rev: 8907
2025-04-07 14:31:31 +00:00 · 2002-06-20 01:21:18 +00:00 · 2002-06-20 01:21:18 +00:00 · c445874382
commit c445874382
parent 7c37ae1353
40 changed files with 25418 additions and 17631 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -72,7 +72,6 @@ icu4j/src/com/ibm/icu/dev/data/ThaiWordFreq.xls -text
 icu4j/src/com/ibm/icu/dev/data/holidays_jp.ucs -text
 icu4j/src/com/ibm/icu/dev/data/rbbi/english.dict -text
 icu4j/src/com/ibm/icu/dev/data/thai6.ucs -text
-icu4j/src/com/ibm/icu/dev/data/unicode/Draft-TestSuite.txt -text
 icu4j/src/com/ibm/icu/impl/data/ICULocaleData.jar -text
 icu4j/src/com/ibm/icu/impl/data/thai_dict -text
 icu4j/src/com/ibm/icu/impl/data/ucadata.dat -text
--- a/icu4j/src/com/ibm/icu/dev/data/unicode/Draft-TestSuite.txt
+++ b/icu4j/src/com/ibm/icu/dev/data/unicode/Draft-TestSuite.txt
--- a/icu4j/src/com/ibm/icu/dev/data/unicode/NormalizationTest.txt
+++ b/icu4j/src/com/ibm/icu/dev/data/unicode/NormalizationTest.txt
--- a/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterIteratorTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterIteratorTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/lang/Attic/UCharacterIteratorTest.java,v $ 
-* $Date: 2002/04/03 00:00:00 $ 
-* $Revision: 1.1 $
+* $Date: 2002/06/20 01:16:00 $ 
+* $Revision: 1.2 $
 *
 *******************************************************************************
 */
@ -15,7 +15,7 @@ package com.ibm.icu.dev.test.lang;


 import com.ibm.icu.dev.test.TestFmwk;
-import com.ibm.icu.impl.UCharacterIterator;
+import com.ibm.icu.impl.UnicodeCharacterIterator;
 import com.ibm.icu.text.UTF16;

 /**
@ -41,10 +41,10 @@ public final class UCharacterIteratorTest extends TestFmwk
  	*/
  	public void TestClone()
  	{
-     	 UCharacterIterator iterator = new UCharacterIterator("testing");
-     	 UCharacterIterator cloned = (UCharacterIterator)iterator.clone();
+     	 UnicodeCharacterIterator iterator = new UnicodeCharacterIterator("testing");
+     	 UnicodeCharacterIterator cloned = (UnicodeCharacterIterator)iterator.clone();
     	 char completed = 0;
-     	 while (completed != UCharacterIterator.DONE) {
+     	 while (completed != UnicodeCharacterIterator.DONE) {
     	 	completed = iterator.next();
     	 	if (completed != cloned.next()) {
     	 		errln("Cloned operation failed");
@ -57,9 +57,9 @@ public final class UCharacterIteratorTest extends TestFmwk
  	 */
  	public void TestIteration()
  	{
-  		UCharacterIterator iterator  = new UCharacterIterator(
+  		UnicodeCharacterIterator iterator  = new UnicodeCharacterIterator(
  		                                               ITERATION_STRING_);
-  		UCharacterIterator iterator2 = new UCharacterIterator(
+  		UnicodeCharacterIterator iterator2 = new UnicodeCharacterIterator(
  		                                               ITERATION_STRING_);
  		if (iterator.first() != ITERATION_STRING_.charAt(0)) {
  			errln("Iterator failed retrieving first character");
@ -75,12 +75,12 @@ public final class UCharacterIteratorTest extends TestFmwk
  		iterator2.setIndex(0);
  		iterator.setIndex(0);
  		int ch = 0;
-  		while (ch != UCharacterIterator.DONE_CODEPOINT) {
+  		while (ch != UnicodeCharacterIterator.DONE_CODEPOINT) {
  			int index = iterator2.getIndex();
  			ch = iterator2.nextCodePoint();
  			if (index != ITERATION_SUPPLEMENTARY_INDEX) {
  				if (ch != (int)iterator.next() && 
-  				    ch != UCharacterIterator.DONE_CODEPOINT) {
+  				    ch != UnicodeCharacterIterator.DONE_CODEPOINT) {
  					errln("Error mismatch in next() and nextCodePoint()"); 
  				}
  			}
@ -94,12 +94,12 @@ public final class UCharacterIteratorTest extends TestFmwk
  		}
  		iterator.setIndex(ITERATION_STRING_.length());
  		iterator2.setIndex(ITERATION_STRING_.length());
-  		while (ch != UCharacterIterator.DONE_CODEPOINT) {
+  		while (ch != UnicodeCharacterIterator.DONE_CODEPOINT) {
  			int index = iterator2.getIndex();
  			ch = iterator2.previousCodePoint();
  			if (index != ITERATION_SUPPLEMENTARY_INDEX) {
  				if (ch != (int)iterator.previous() && 
-  				    ch != UCharacterIterator.DONE_CODEPOINT) {
+  				    ch != UnicodeCharacterIterator.DONE_CODEPOINT) {
  					errln("Error mismatch in previous() and " +
  					      "previousCodePoint()"); 
  				}
--- a/icu4j/src/com/ibm/icu/dev/test/normalizer/BasicTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/normalizer/BasicTest.java
--- a/icu4j/src/com/ibm/icu/dev/test/normalizer/ConformanceTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/normalizer/ConformanceTest.java
@ -1,9 +1,15 @@
 /*
-************************************************************************
-* Copyright (c) 1997-2000, International Business Machines
-* Corporation and others.  All Rights Reserved.
-************************************************************************
-*/
+ *******************************************************************************
+ * Copyright (C) 1996-2000, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ *
+ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/normalizer/ConformanceTest.java,v $ 
+ * $Date: 2002/06/20 01:16:24 $ 
+ * $Revision: 1.9 $
+ *
+ *****************************************************************************************
+ */

 package com.ibm.icu.dev.test.normalizer;

@ -13,6 +19,7 @@ import com.ibm.icu.dev.test.*;
 import com.ibm.icu.lang.*;
 import com.ibm.icu.text.*;
 import com.ibm.icu.impl.Utility;
+import com.ibm.icu.impl.UCharacterProperty;

 public class ConformanceTest extends TestFmwk {

@ -21,28 +28,28 @@ public class ConformanceTest extends TestFmwk {
    public static void main(String[] args) throws Exception {
        new ConformanceTest().run(args);
    }
-
+    
    public ConformanceTest() {
        // Doesn't matter what the string and mode are; we'll change
        // them later as needed.
-        normalizer = new Normalizer("", Normalizer.COMPOSE);
+        normalizer = new Normalizer("", Normalizer.NFC);
    }

    /**
-     * Test the conformance of Normalizer to
+     * Test the conformance of NewNormalizer to
     * http://www.unicode.org/unicode/reports/tr15/conformance/Draft-TestSuite.txt.
     * This file must be located at the path specified as TEST_SUITE_FILE.
     */
-    public void TestConformance() {
-		BufferedReader input = null;
+    public void TestConformance() throws Exception{
+        BufferedReader input = null;
        String line = null;
        String[] fields = new String[5];
        StringBuffer buf = new StringBuffer();
        int passCount = 0;
        int failCount = 0;
-
+        InputStream is = null;
        try {
-			input = TestUtil.getDataReader("unicode/Draft-TestSuite.txt");
+            input = TestUtil.getDataReader("unicode/NormalizationTest.txt");
            for (int count = 0;;++count) {
                line = input.readLine();
                if (line == null) break;
@ -52,7 +59,7 @@ public class ConformanceTest extends TestFmwk {
                // 1E0C;1E0C;0044 0323;1E0C;0044 0323; # <comments>

                // Skip comments
-                if (line.charAt(0) == '#') continue;
+                if (line.charAt(0) == '#'  || line.charAt(0)=='@') continue;

                // Parse out the fields
                hexsplit(line, ';', fields, buf);
@ -101,46 +108,139 @@ public class ConformanceTest extends TestFmwk {
     * @param line the source line from the test suite file
     * @return true if the test passes
     */
-    private boolean checkConformance(String[] field, String line) {
+    private boolean checkConformance(String[] field, String line) throws Exception{
        boolean pass = true;
        StringBuffer buf = new StringBuffer(); // scratch
-        String out;
-        
-        for (int i=0; i<5; ++i) {
+        String out,fcd;
+        int i=0;
+        UTF16.StringComparator comp = new UTF16.StringComparator();
+        for (i=0; i<5; ++i) {
            if (i<3) {
-                out = Normalizer.normalize(field[i], Normalizer.COMPOSE, 0);
+                out = Normalizer.normalize(field[i], Normalizer.NFC);
                pass &= assertEqual("C", field[i], out, field[1], "c2!=C(c" + (i+1));
-                out = iterativeNorm(field[i], Normalizer.COMPOSE, buf, +1);
+                
+                out = iterativeNorm(field[i], Normalizer.NFC, buf, +1);
                pass &= assertEqual("C(+1)", field[i], out, field[1], "c2!=C(c" + (i+1));
-                out = iterativeNorm(field[i], Normalizer.COMPOSE, buf, -1);
+                
+                out = iterativeNorm(field[i], Normalizer.NFC, buf, -1);
                pass &= assertEqual("C(-1)", field[i], out, field[1], "c2!=C(c" + (i+1));

-                out = Normalizer.normalize(field[i], Normalizer.DECOMP, 0);
+                out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFC, buf, +1);
+                pass &= assertEqual("C(+1)", field[i], out, field[1], "c2!=C(c" + (i+1));
+                
+                out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFC, buf, -1);
+                pass &= assertEqual("C(-1)", field[i], out, field[1], "c2!=C(c" + (i+1));
+                 
+                out = Normalizer.normalize(field[i], Normalizer.NFD);
                pass &= assertEqual("D", field[i], out, field[2], "c3!=D(c" + (i+1));
-                out = iterativeNorm(field[i], Normalizer.DECOMP, buf, +1);
+                
+                out = iterativeNorm(field[i], Normalizer.NFD, buf, +1);
                pass &= assertEqual("D(+1)", field[i], out, field[2], "c3!=D(c" + (i+1));
-                out = iterativeNorm(field[i], Normalizer.DECOMP, buf, -1);
+                
+                out = iterativeNorm(field[i], Normalizer.NFD, buf, -1);
                pass &= assertEqual("D(-1)", field[i], out, field[2], "c3!=D(c" + (i+1));
+
+                out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFD, buf, +1);
+                pass &= assertEqual("D(+1)", field[i], out, field[2], "c3!=D(c" + (i+1));
+                
+                out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFD, buf, -1);
+                pass &= assertEqual("D(-1)", field[i], out, field[2], "c3!=D(c" + (i+1));
+                
            }
-            out = Normalizer.normalize(field[i], Normalizer.COMPOSE_COMPAT, 0);
+            out = Normalizer.normalize(field[i], Normalizer.NFKC);
            pass &= assertEqual("KC", field[i], out, field[3], "c4!=KC(c" + (i+1));
-            out = iterativeNorm(field[i], Normalizer.COMPOSE_COMPAT, buf, +1);
+            
+            out = iterativeNorm(field[i], Normalizer.NFKC, buf, +1);
            pass &= assertEqual("KD(+1)", field[i], out, field[3], "c4!=KC(c" + (i+1));
-            out = iterativeNorm(field[i], Normalizer.COMPOSE_COMPAT, buf, -1);
+            
+            out = iterativeNorm(field[i], Normalizer.NFKC, buf, -1);
            pass &= assertEqual("KD(-1)", field[i], out, field[3], "c4!=KC(c" + (i+1));

-            out = Normalizer.normalize(field[i], Normalizer.DECOMP_COMPAT, 0);
+            out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFKC, buf, +1);
+            pass &= assertEqual("KD(+1)", field[i], out, field[3], "c4!=KC(c" + (i+1));
+            
+            out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFKC, buf, -1);
+            pass &= assertEqual("KD(-1)", field[i], out, field[3], "c4!=KC(c" + (i+1));
+              
+
+            out = Normalizer.normalize(field[i], Normalizer.NFKD);
            pass &= assertEqual("KD", field[i], out, field[4], "c5!=KD(c" + (i+1));
-            out = iterativeNorm(field[i], Normalizer.DECOMP_COMPAT, buf, +1);
+            
+            out = iterativeNorm(field[i], Normalizer.NFKD, buf, +1);
            pass &= assertEqual("KD(+1)", field[i], out, field[4], "c5!=KD(c" + (i+1));
-            out = iterativeNorm(field[i], Normalizer.DECOMP_COMPAT, buf, -1);
+            
+            out = iterativeNorm(field[i], Normalizer.NFKD, buf, -1);
            pass &= assertEqual("KD(-1)", field[i], out, field[4], "c5!=KD(c" + (i+1));
+         
+            out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFKD, buf, +1);
+            pass &= assertEqual("KD(+1)", field[i], out, field[4], "c5!=KD(c" + (i+1));
+            
+            out = iterativeNorm(new StringCharacterIterator(field[i]), Normalizer.NFKD, buf, -1);
+            pass &= assertEqual("KD(-1)", field[i], out, field[4], "c5!=KD(c" + (i+1));
+              
        }
+         // test quick checks
+	    if(Normalizer.NO == Normalizer.quickCheck(field[1], Normalizer.NFC)) {
+	        errln("Normalizer error: quickCheck(NFC(s), NewNormalizer.NFC) is NewNormalizer.NO");
+	        pass = false;
+	    }
+	    if(Normalizer.NO == Normalizer.quickCheck(field[2], Normalizer.NFD)) {
+	        errln("Normalizer error: quickCheck(NFD(s), NewNormalizer.NFD) is NewNormalizer.NO");
+	        pass = false;
+	    }
+	    if(Normalizer.NO == Normalizer.quickCheck(field[3], Normalizer.NFKC)) {
+	        errln("Normalizer error: quickCheck(NFKC(s), NewNormalizer.NFKC) is NewNormalizer.NO");
+	        pass = false;
+	    }
+	    if(Normalizer.NO == Normalizer.quickCheck(field[4], Normalizer.NFKD)) {
+	        errln("Normalizer error: quickCheck(NFKD(s), NewNormalizer.NFKD) is NewNormalizer.NO");
+	        pass = false;
+	    }
+	
+        if(!Normalizer.isNormalized(field[1], Normalizer.NFC)) {
+            errln("Normalizer error: isNormalized(NFC(s), NewNormalizer.NFC) is false");
+            pass = false;
+        }
+        if(!field[0].equals(field[1]) && Normalizer.isNormalized(field[0], Normalizer.NFC)) {
+            errln("Normalizer error: isNormalized(s, NewNormalizer.NFC) is TRUE");
+            pass = false;
+        }
+        if(!Normalizer.isNormalized(field[3], Normalizer.NFKC)) {
+            errln("Normalizer error: isNormalized(NFKC(s), NewNormalizer.NFKC) is false");
+            pass = false;
+        }
+        if(!field[0].equals(field[3]) && Normalizer.isNormalized(field[0], Normalizer.NFKC)) {
+            errln("Normalizer error: isNormalized(s, NewNormalizer.NFKC) is TRUE");
+            pass = false;
+        }
+	
+	    // test FCD quick check and "makeFCD"
+	    fcd=Normalizer.normalize(field[0], Normalizer.FCD);
+	    if(Normalizer.NO == Normalizer.quickCheck(fcd, Normalizer.FCD)) {
+	        errln("Normalizer error: quickCheck(FCD(s), NewNormalizer.FCD) is NewNormalizer.NO");
+	        pass = false;
+	    }
+	    if(Normalizer.NO == Normalizer.quickCheck(field[2], Normalizer.FCD)) {
+	        errln("Normalizer error: quickCheck(NFD(s), NewNormalizer.FCD) is NewNormalizer.NO");
+	        pass = false;
+	    }
+	    if(Normalizer.NO == Normalizer.quickCheck(field[4], Normalizer.FCD)) {
+	        errln("Normalizer error: quickCheck(NFKD(s), NewNormalizer.FCD) is NewNormalizer.NO");
+	        pass = false;
+	    }
+	
+	    out=Normalizer.normalize(fcd, Normalizer.NFD);
+	    if(!out.equals(field[2])) {
+	        errln("Normalizer error: NFD(FCD(s))!=NFD(s)");
+	        pass = false;
+	    }    
        if (!pass) {
            errln("FAIL: " + line);
-        }
+        }     
+       
        return pass;
    }
+    

    /**
     * Do a normalization using the iterative API in the given direction.
@ -148,20 +248,48 @@ public class ConformanceTest extends TestFmwk {
     * @param dir either +1 or -1
     */
    private String iterativeNorm(String str, Normalizer.Mode mode,
-                                 StringBuffer buf, int dir) {
+                                 StringBuffer buf, int dir) throws Exception{
        normalizer.setText(str);
        normalizer.setMode(mode);
        buf.setLength(0);
-        char ch;
+        
+        int ch;
        if (dir > 0) {
            for (ch = normalizer.first(); ch != Normalizer.DONE;
                 ch = normalizer.next()) {
-                buf.append(ch);
+                buf.append(UTF16.toString(ch));
            }
        } else {
            for (ch = normalizer.last(); ch != Normalizer.DONE;
                 ch = normalizer.previous()) {
-                buf.insert(0, ch);
+                buf.insert(0, UTF16.toString(ch));
+            }
+        }
+        return buf.toString();
+    }
+    
+    /**
+     * Do a normalization using the iterative API in the given direction.
+     * @param str a Java StringCharacterIterator
+     * @param buf scratch buffer
+     * @param dir either +1 or -1
+     */
+    private String iterativeNorm(StringCharacterIterator str, Normalizer.Mode mode,
+                                 StringBuffer buf, int dir) throws Exception{
+        normalizer.setText(str);
+        normalizer.setMode(mode);
+        buf.setLength(0);
+        
+        int ch;
+        if (dir > 0) {
+            for (ch = normalizer.first(); ch != Normalizer.DONE;
+                 ch = normalizer.next()) {
+                buf.append(UTF16.toString(ch));
+            }
+        } else {
+            for (ch = normalizer.last(); ch != Normalizer.DONE;
+                 ch = normalizer.previous()) {
+                buf.insert(0, UTF16.toString(ch));
            }
        }
        return buf.toString();
@ -180,8 +308,8 @@ public class ConformanceTest extends TestFmwk {
        if (exp.equals(got)) {
            return true;
        }
-        errln(Utility.escape("      " + msg + ") " + op + "(" + s + ")=" + got +
-                             ", exp. " + exp));
+        errln(("      " + msg + ") " + op + "(" + s + ")=" + hex(got) +
+                             ", exp. " + hex(exp)));
        return false;
    }

@ -207,21 +335,26 @@ public class ConformanceTest extends TestFmwk {
            }
            // Our field is from pos..delim-1.
            buf.setLength(0);
-            while (pos < delim) {
-                if (s.charAt(pos) == ' ') {
-                    ++pos;
-                } else if (pos+4 > delim) {
-                    throw new IllegalArgumentException("Premature eol in " + s);
-                } else {
-                    int hex = Integer.parseInt(s.substring(pos, pos+4), 16);
-                    if (hex < 0 || hex > 0xFFFF) {
-                        throw new IllegalArgumentException("Out of range hex " +
-                                                           hex + " in " + s);
+            
+            String toHex = s.substring(pos,delim);
+            pos = delim;
+            int index = 0;
+            int len = toHex.length();
+            while(index< len){
+                if(toHex.charAt(index)==' '){
+                    index++;
+                }else{
+                    int spacePos = toHex.indexOf(' ', index);
+                    if(spacePos==-1){
+                        appendInt(buf,toHex.substring(index,len),s);
+                        spacePos = len;
+                    }else{
+                        appendInt(buf,toHex.substring(index, spacePos),s);
                    }
-                    buf.append((char) hex);
-                    pos += 4;
+                    index = spacePos+1;
                }
            }
+            
            if (buf.length() < 1) {
                throw new IllegalArgumentException("Empty field " + i + " in " + s);
            }
@ -229,17 +362,29 @@ public class ConformanceTest extends TestFmwk {
            ++pos; // Skip over delim
        }
    }
-
+    public static void appendInt(StringBuffer buf, String strToHex, String s){
+        int hex = Integer.parseInt(strToHex,16);
+        if (hex < 0 ) {
+            throw new IllegalArgumentException("Out of range hex " +
+                                                hex + " in " + s);
+        }else if (hex > 0xFFFF){
+            buf.append((char)((hex>>10)+0xd7c0)); 
+            buf.append((char)((hex&0x3ff)|0xdc00));
+        }else{
+            buf.append((char) hex);
+        }
+    }
+            
    // Specific tests for debugging.  These are generally failures
    // taken from the conformance file, but culled out to make
    // debugging easier.  These can be eliminated without affecting
    // coverage.

-    public void _hideTestCase6() {
+    public void _hideTestCase6() throws Exception{
        _testOneLine("0385;0385;00A8 0301;0020 0308 0301;0020 0308 0301;");
    }

-    public void _testOneLine(String line) {
+    public void _testOneLine(String line) throws Exception{
        String[] fields = new String[5];
        StringBuffer buf = new StringBuffer();
        // Parse out the fields
--- a/icu4j/src/com/ibm/icu/dev/test/normalizer/ExhaustiveTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/normalizer/ExhaustiveTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/normalizer/Attic/ExhaustiveTest.java,v $ 
- * $Date: 2002/03/01 18:48:01 $ 
- * $Revision: 1.10 $
+ * $Date: 2002/06/20 01:16:24 $ 
+ * $Revision: 1.11 $
 *
 *****************************************************************************************
 */
@ -15,46 +15,38 @@ package com.ibm.icu.dev.test.normalizer;
 import com.ibm.icu.dev.test.*;
 import com.ibm.icu.lang.*;
 import com.ibm.icu.text.*;
-import com.ibm.icu.dev.tool.normalizer.UInfo;
+import com.ibm.icu.impl.NormalizerImpl;

 public class ExhaustiveTest extends TestFmwk
 {
-    private UInfo info;
-	
+ 	
    public static void main(String[] args) throws Exception
    {
-    	UInfo tempInfo = null;
-        String[] tempArgs = new String[args.length];
+    	String[] tempArgs = new String[args.length];
        int count = 0;

        // Allow the test to be pointed at a specific version of the Unicode database
-        for (int i = 0; i < args.length; i++)
-        {
-            if (args[i].equals("-data")) {
-                tempInfo = new UInfo(args[++i], args[++i]);
-            } else {
-                tempArgs[count++] = args[i];
-            }
-        }
+        //for (int i = 0; i < args.length; i++)
+        //{
+        //    if (args[i].equals("-data")) {
+        //        tempInfo = new UInfo(args[++i], args[++i]);
+        //    } else {
+        //        tempArgs[count++] = args[i];
+        //    }
+        //}

        args = new String[count];
        System.arraycopy(tempArgs, 0, args, 0, count);


-        if (tempInfo == null) {
-            tempInfo = new UInfo();
-	    }
-        new ExhaustiveTest(tempInfo).run(args);
+
+        new ExhaustiveTest().run(args);
    }
    
    public ExhaustiveTest() {
-    	this.info = new UInfo();
-    }
-
-    public ExhaustiveTest(UInfo info) {
-    	this.info = info;
    }

+ 

    /**
     * Run through all of the characters returned by a composed-char iterator
@ -89,7 +81,7 @@ public class ExhaustiveTest extends TestFmwk
            // make sense
            String chString   = new StringBuffer().append(ch).toString();
            String iterDecomp = iter.decomposition();
-            String normDecomp = Normalizer.decompose(chString, compat, 0);
+            String normDecomp = Normalizer.decompose(chString, compat);

            if (iterDecomp.equals(chString)) {
                errln("ERROR: " + hex(ch) + " has identical decomp");
@ -106,7 +98,7 @@ public class ExhaustiveTest extends TestFmwk
    {
        for (char x = ++start; x < limit; x++) {
            String xString   = new StringBuffer().append(x).toString();
-            String decomp = Normalizer.decompose(xString, compat, options);
+            String decomp = Normalizer.decompose(xString, compat);
            if (!decomp.equals(xString)) {
                errln("ERROR: " + hex(x) + " has decomposition (" + hex(decomp) + ")"
                    + " but was not returned by iterator");
@ -124,26 +116,31 @@ public class ExhaustiveTest extends TestFmwk
            char ch = iter.next();

            String chStr = new StringBuffer().append(ch).toString();
-            String decomp = Normalizer.decompose(chStr, compat, options);
-            String comp = Normalizer.compose(decomp, compat, options);
+            String decomp = Normalizer.decompose(chStr, compat);
+            String comp = Normalizer.compose(decomp, compat);

-            short cClass = info.getCanonicalClass(decomp.charAt(0));
+            int cClass = UCharacter.getCombiningClass(decomp.charAt(0));
            cClass = 0;

-            if (info.isExcludedComposition(ch)) {
-                logln("Skipped excluded char " + hex(ch) + " (" + info.getName(ch,true) + ")" );
+            if (NormalizerImpl.isFullCompositionExclusion(ch)) {
+                logln("Skipped excluded char " + hex(ch) + " (" + UCharacter.getName(ch) + ")" );
                continue;
            }

            // Avoid disparaged characters
-            if (info.getDecomposition(ch).length() == 4) continue;
+            if (getDecomposition(ch,compat).length() == 4) continue;

            if (!comp.equals(chStr)) {
                errln("ERROR: Round trip invalid: " + hex(chStr) + " --> " + hex(decomp)
                    + " --> " + hex(comp));

-                errln("  char decomp is '" + info.getDecomposition(ch) + "'");
+                errln("  char decomp is '" + getDecomposition(ch,compat) + "'");
            }
        }
    }
+    private String getDecomposition(char ch, boolean compat){
+        char[] dest = new char[10];   
+        int length = NormalizerImpl.getDecomposition(ch,compat,dest,0,dest.length);   
+        return new String(dest,0,length);
+    }
 }
--- a/icu4j/src/com/ibm/icu/dev/test/normalizer/TestCanonicalIterator.java
+++ b/icu4j/src/com/ibm/icu/dev/test/normalizer/TestCanonicalIterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/normalizer/TestCanonicalIterator.java,v $ 
- * $Date: 2002/03/19 00:18:44 $ 
- * $Revision: 1.7 $
+ * $Date: 2002/06/20 01:16:24 $ 
+ * $Revision: 1.8 $
 *
 *****************************************************************************************
 */
@ -39,6 +39,7 @@ public class TestCanonicalIterator extends TestFmwk {
        {"x\u0307\u0327", "x\u0307\u0327, x\u0327\u0307, \u1E8B\u0327"},
    };
    
+     
    public void TestExhaustive() {
    	int counter = 0;
    	int mixedCounter = 0;
@ -63,8 +64,8 @@ public class TestCanonicalIterator extends TestFmwk {
    		if ((++counter % 5000) == 0) logln("Testing " + Utility.hex(i,0));
    		
    		String s = UTF16.valueOf(i) + "\u0345";
-    		String decomp = Normalizer.decompose(s, false, 0);
-    		String comp = Normalizer.compose(s, false, 0);
+    		String decomp = Normalizer.decompose(s, false);
+    		String comp = Normalizer.compose(s, false);
    		// skip characters that don't have either decomp.
    		// need quick test for this!
    		if (s.equals(decomp) && s.equals(comp)) continue;
@ -170,14 +171,17 @@ public class TestCanonicalIterator extends TestFmwk {
    }
    
    public void TestBasic() {
-        // check build
-        UnicodeSet ss = CanonicalIterator.getSafeStart();
-        logln("Safe Start: " + ss.toPattern(true));
-        ss = CanonicalIterator.getStarts('a');
-        expectEqual("Characters with 'a' at the start of their decomposition: ", "", CanonicalIterator.getStarts('a'),
-        	new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB"
-        	+ "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7]")
-        		);
+//      This is not interesting anymore as the data is already built 
+//      beforehand
+
+//        check build
+//        UnicodeSet ss = CanonicalIterator.getSafeStart();
+//        logln("Safe Start: " + ss.toPattern(true));
+//        ss = CanonicalIterator.getStarts('a');
+//        expectEqual("Characters with 'a' at the start of their decomposition: ", "", CanonicalIterator.getStarts('a'),
+//        	new UnicodeSet("[\u00E0-\u00E5\u0101\u0103\u0105\u01CE\u01DF\u01E1\u01FB"
+//        	+ "\u0201\u0203\u0227\u1E01\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7]")
+//        		);
        
        // check permute
        // NOTE: we use a TreeSet below to sort the output, which is not guaranteed to be sorted!
--- a/icu4j/src/com/ibm/icu/dev/test/translit/RoundTripTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/translit/RoundTripTest.java
@ -98,14 +98,14 @@ public class RoundTripTest extends TestFmwk {
    public void TestGreekUNGEGN() throws IOException, ParseException {
        new Test("Latin-Greek/UNGEGN")
          .test("[a-zA-Z]", "[\u003B\u00B7[:Greek:]-[\u03D7-\u03EF]]", 
-            "[\u00B5\u037A\u03D0-\uFFFF]", /* roundtrip exclusions */
+            "[\u00B5\u037A\u03D0-\uFFFF{\u039C\u03C0}]", /* roundtrip exclusions */
            this, new LegalGreek(false));
    }

    public void Testel() throws IOException, ParseException {
        new Test("Latin-el")
          .test("[a-zA-Z]", "[\u003B\u00B7[:Greek:]-[\u03D7-\u03EF]]", 
-            "[\u00B5\u037A\u03D0-\uFFFF]", /* roundtrip exclusions */
+            "[\u00B5\u037A\u03D0-\uFFFF{\u039C\u03C0}]", /* roundtrip exclusions */
            this, new LegalGreek(false));
    }

@ -136,7 +136,7 @@ public class RoundTripTest extends TestFmwk {
        String nukta = "\u093c\u09bc\u0a3c\u0abc\u0b3c";
        String virama = "\u094d\u09cd\u0a4d\u0acd\u0b4d\u0bcd\u0c4d\u0ccd\u0d4d";
        String sanskritStressSigns = "\u0951\u0952\u0953\u0954";
-        String chandrabindu = "\u0901\u0981\u0A81\u0b01";
+        String chandrabindu = "\u0901\u0981\u0A81\u0b01\u0c01";
        public boolean is(String sourceString){
            int cp=sourceString.charAt(0);
            
@ -221,7 +221,7 @@ public class RoundTripTest extends TestFmwk {

        new String [] {  "Tamil-DEVANAGARI",
          "[:tamil:]", "[:Devanagari:]", 
-                  "[\u093c\u0943-\u094a\u0951-\u0954\u0962\u0963\u090B\u090C\u090D\u0911\u0916\u0917\u0918\u091B\u091D\u0920\u0921\u0922\u0925\u0926\u0927\u092B\u092C\u092D\u0936\u093d\u0950[\u0958-\u0961]]", /*roundtrip exclusions*/
+                  "[\u0901\u093c\u0943-\u094a\u0951-\u0954\u0962\u0963\u090B\u090C\u090D\u0911\u0916\u0917\u0918\u091B\u091D\u0920\u0921\u0922\u0925\u0926\u0927\u092B\u092C\u092D\u0936\u093d\u0950[\u0958-\u0961]]", /*roundtrip exclusions*/
                  },
        new String [] {  "DEVANAGARI-Tamil",
           "[:Devanagari:]", "[:tamil:]",
@ -239,7 +239,7 @@ public class RoundTripTest extends TestFmwk {

        new String [] {  "KANNADA-DEVANAGARI",
          "[:KANNADA:]", "[:Devanagari:]", 
-                "[\u0946\u093c\u0950\u0945\u0949\u0951-\u0954\u0962\u0963\u0950\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]]", /*roundtrip exclusions*/
+                "[\u0901\u0946\u093c\u0950\u0945\u0949\u0951-\u0954\u0962\u0963\u0950\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]]", /*roundtrip exclusions*/
                },
        new String [] {  "DEVANAGARI-KANNADA",
           "[:Devanagari:]", "[:KANNADA:]",
@ -248,7 +248,7 @@ public class RoundTripTest extends TestFmwk {

        new String [] {  "MALAYALAM-DEVANAGARI",
          "[:MALAYALAM:]", "[:Devanagari:]", 
-                "[\u094a\u094b\u094c\u093c\u0950\u0944\u0945\u0949\u0951-\u0954\u0962\u0963\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]]", /*roundtrip exclusions*/
+                "[\u0901\u094a\u094b\u094c\u093c\u0950\u0944\u0945\u0949\u0951-\u0954\u0962\u0963\u090D\u0911\u093d\u0929\u0934[\u0958-\u095f]]", /*roundtrip exclusions*/
                },
        new String [] {  "DEVANAGARI-MALAYALAM",
           "[:Devanagari:]", "[:MALAYALAM:]",
@ -284,7 +284,7 @@ public class RoundTripTest extends TestFmwk {

        new String [] {  "Tamil-BENGALI",
          "[:tamil:]", "[:BENGALI:]", 
-                  "[\u09bc\u09c3\u09c4\u09e2\u09e3\u09f0\u09f1\u098B\u098C\u0996\u0997\u0998\u099B\u099D\u09A0\u09A1\u09A2\u09A5\u09A6\u09A7\u09AB\u09AC\u09AD\u09B6\u09DC\u09DD\u09DF\u09E0\u09E1]", /*roundtrip exclusions*/
+                  "[\u0981\u09bc\u09c3\u09c4\u09e2\u09e3\u09f0\u09f1\u098B\u098C\u0996\u0997\u0998\u099B\u099D\u09A0\u09A1\u09A2\u09A5\u09A6\u09A7\u09AB\u09AC\u09AD\u09B6\u09DC\u09DD\u09DF\u09E0\u09E1]", /*roundtrip exclusions*/
                  },
        new String [] {  "BENGALI-Tamil",
           "[:BENGALI:]", "[:tamil:]",
@ -302,7 +302,7 @@ public class RoundTripTest extends TestFmwk {

        new String [] {  "KANNADA-BENGALI",
          "[:KANNADA:]", "[:BENGALI:]", 
-                "[\u09e2\u09e3\u09bc\u09d7\u09f0\u09f1\u09dc\u09dd\u09df]", /*roundtrip exclusions*/
+                "[\u0981\u09e2\u09e3\u09bc\u09d7\u09f0\u09f1\u09dc\u09dd\u09df]", /*roundtrip exclusions*/
                },
        new String [] {  "BENGALI-KANNADA",
           "[:BENGALI:]", "[:KANNADA:]",
@ -311,7 +311,7 @@ public class RoundTripTest extends TestFmwk {

        new String [] {  "MALAYALAM-BENGALI",
          "[:MALAYALAM:]", "[:BENGALI:]", 
-                "[\u09e2\u09e3\u09bc\u09c4\u09f0\u09f1\u09dc\u09dd\u09df]", /*roundtrip exclusions*/
+                "[\u0981\u09e2\u09e3\u09bc\u09c4\u09f0\u09f1\u09dc\u09dd\u09df]", /*roundtrip exclusions*/
                },
        new String [] {  "BENGALI-MALAYALAM",
           "[:BENGALI:]", "[:MALAYALAM:]",
@ -382,7 +382,7 @@ public class RoundTripTest extends TestFmwk {

        new String [] {  "TAMIL-GUJARATI",
          "[:TAMIL:]", "[:GUJARATI:]", 
-                "[\u0abc\u0ac3\u0Ac4\u0Ac5\u0Ac9\u0Ac7\u0A8B\u0A8D\u0A91\u0A96\u0A97\u0A98\u0A9B\u0A9D\u0AA0\u0AA1\u0AA2\u0AA5\u0AA6\u0AA7\u0AAB\u0AAC\u0AAD\u0AB6\u0ABD\u0AD0\u0AE0]", /*roundtrip exclusions*/
+                "[\u0A81\u0abc\u0ac3\u0Ac4\u0Ac5\u0Ac9\u0Ac7\u0A8B\u0A8D\u0A91\u0A96\u0A97\u0A98\u0A9B\u0A9D\u0AA0\u0AA1\u0AA2\u0AA5\u0AA6\u0AA7\u0AAB\u0AAC\u0AAD\u0AB6\u0ABD\u0AD0\u0AE0]", /*roundtrip exclusions*/
                },
        new String [] {  "GUJARATI-TAMIL",
           "[:GUJARATI:]", "[:TAMIL:]",
@ -400,7 +400,7 @@ public class RoundTripTest extends TestFmwk {

        new String [] {  "KANNADA-GUJARATI",
          "[:KANNADA:]", "[:GUJARATI:]", 
-                "[\u0abc\u0Ac5\u0Ac9\u0A8D\u0A91\u0ABD\u0Ad0]", /*roundtrip exclusions*/
+                "[\u0A81\u0abc\u0Ac5\u0Ac9\u0A8D\u0A91\u0ABD\u0Ad0]", /*roundtrip exclusions*/
                },
        new String [] {  "GUJARATI-KANNADA",
           "[:GUJARATI:]", "[:KANNADA:]",
@ -409,7 +409,7 @@ public class RoundTripTest extends TestFmwk {

        new String [] {  "MALAYALAM-GUJARATI",
          "[:MALAYALAM:]", "[:GUJARATI:]", 
-                "[\u0ac4\u0acb\u0acc\u0abc\u0Ac5\u0Ac9\u0A8D\u0A91\u0ABD\u0Ad0]", /*roundtrip exclusions*/
+                "[\u0A81\u0ac4\u0acb\u0acc\u0abc\u0Ac5\u0Ac9\u0A8D\u0A91\u0ABD\u0Ad0]", /*roundtrip exclusions*/
                },
        new String [] {  "GUJARATI-MALAYALAM",
           "[:GUJARATI:]", "[:MALAYALAM:]",
@ -418,7 +418,7 @@ public class RoundTripTest extends TestFmwk {

        new String [] {  "TAMIL-ORIYA",
          "[:TAMIL:]", "[:ORIYA:]", 
-                "[\u0b3c\u0b43\u0b56\u0B0B\u0B0C\u0B16\u0B17\u0B18\u0B1B\u0B1D\u0B20\u0B21\u0B22\u0B25\u0B26\u0B27\u0B2B\u0B2C\u0B2D\u0B36\u0B3D\u0B5C\u0B5D\u0B5F\u0B60\u0B61]", /*roundtrip exclusions*/
+                "[\u0B01\u0b3c\u0b43\u0b56\u0B0B\u0B0C\u0B16\u0B17\u0B18\u0B1B\u0B1D\u0B20\u0B21\u0B22\u0B25\u0B26\u0B27\u0B2B\u0B2C\u0B2D\u0B36\u0B3D\u0B5C\u0B5D\u0B5F\u0B60\u0B61]", /*roundtrip exclusions*/
                },
        new String [] {  "ORIYA-TAMIL",
           "[:ORIYA:]", "[:TAMIL:]",
@ -436,7 +436,7 @@ public class RoundTripTest extends TestFmwk {

        new String [] {  "KANNADA-ORIYA",
          "[:KANNADA:]", "[:ORIYA:]", 
-                "[\u0b3c\u0b57\u0B3D\u0B5C\u0B5D\u0B5F]", /*roundtrip exclusions*/
+                "[\u0B01\u0b3c\u0b57\u0B3D\u0B5C\u0B5D\u0B5F]", /*roundtrip exclusions*/
                },
        new String [] {  "ORIYA-KANNADA",
           "[:ORIYA:]", "[:KANNADA:]",
@ -445,7 +445,7 @@ public class RoundTripTest extends TestFmwk {

        new String [] {  "MALAYALAM-ORIYA",
          "[:MALAYALAM:]", "[:ORIYA:]", 
-                "[\u0b3c\u0b56\u0B3D\u0B5C\u0B5D\u0B5F]", /*roundtrip exclusions*/
+                "[\u0B01\u0b3c\u0b56\u0B3D\u0B5C\u0B5D\u0B5F]", /*roundtrip exclusions*/
                },
        new String [] {  "ORIYA-MALAYALAM",
           "[:ORIYA:]", "[:MALAYALAM:]",
@ -458,7 +458,7 @@ public class RoundTripTest extends TestFmwk {
                },
        new String [] {  "TAMIL-TELUGU",
           "[:TAMIL:]", "[:TELUGU:]",
-                  "[\u0c43\u0c44\u0c46\u0c47\u0c55\u0c56\u0c66\u0C0B\u0C0C\u0C16\u0C17\u0C18\u0C1B\u0C1D\u0C20\u0C21\u0C22\u0C25\u0C26\u0C27\u0C2B\u0C2C\u0C2D\u0C36\u0C60\u0C61]", /*roundtrip exclusions*/
+                  "[\u0C01\u0c43\u0c44\u0c46\u0c47\u0c55\u0c56\u0c66\u0C0B\u0C0C\u0C16\u0C17\u0C18\u0C1B\u0C1D\u0C20\u0C21\u0C22\u0C25\u0C26\u0C27\u0C2B\u0C2C\u0C2D\u0C36\u0C60\u0C61]", /*roundtrip exclusions*/
                  },

        new String [] {  "KANNADA-TAMIL",
@ -481,7 +481,7 @@ public class RoundTripTest extends TestFmwk {

        new String [] {  "KANNADA-TELUGU",
          "[:KANNADA:]", "[:TELUGU:]", 
-                "[\u0c3f\u0c46\u0c48\u0c4a]", /*roundtrip exclusions*/
+                "[\u0C01\u0c3f\u0c46\u0c48\u0c4a]", /*roundtrip exclusions*/
                },
        new String [] {  "TELUGU-KANNADA",
           "[:TELUGU:]", "[:KANNADA:]",
@ -490,7 +490,7 @@ public class RoundTripTest extends TestFmwk {

        new String [] {  "MALAYALAM-TELUGU",
          "[:MALAYALAM:]", "[:TELUGU:]", 
-                "[\u0c44\u0c4a\u0c4c\u0c4b\u0c55\u0c56]", /*roundtrip exclusions*/
+                "[\u0C01\u0c44\u0c4a\u0c4c\u0c4b\u0c55\u0c56]", /*roundtrip exclusions*/
                },
        new String [] {  "TELUGU-MALAYALAM",
           "[:TELUGU:]", "[:MALAYALAM:]",
@ -566,7 +566,7 @@ public class RoundTripTest extends TestFmwk {
        public boolean is(String sourceString) {
            try {
                int t;
-                String decomp = Normalizer.normalize(sourceString, Normalizer.DECOMP, 0);
+                String decomp = Normalizer.normalize(sourceString, Normalizer.NFD);
                for (int i = 0; i < decomp.length(); ++i) { // don't worry about surrogates
                    switch (getType(decomp.charAt(i))) {
                    case 0:
@ -619,11 +619,11 @@ public class RoundTripTest extends TestFmwk {
        
        public boolean is(String sourceString) { 
            try {
-                String decomp = Normalizer.normalize(sourceString, Normalizer.DECOMP, 0);
+                String decomp = Normalizer.normalize(sourceString, Normalizer.NFD);
                
                // modern is simpler: don't care about anything but a grave
                if (!full) {
-                    if (sourceString.equals("\u039C\u03C0")) return false;
+                    //if (sourceString.equals("\u039C\u03C0")) return false;
                    for (int i = 0; i < decomp.length(); ++i) {
                        char c = decomp.charAt(i);
                        // exclude all the accents
@ -714,8 +714,8 @@ public class RoundTripTest extends TestFmwk {
        public static boolean isSame(String a, String b) {
            if (a.equals(b)) return true;
            if (a.equalsIgnoreCase(b) && isCamel(a)) return true;
-            a = Normalizer.normalize(a, Normalizer.DECOMP, 0);
-            b = Normalizer.normalize(b, Normalizer.DECOMP, 0);
+            a = Normalizer.normalize(a, Normalizer.NFD);
+            b = Normalizer.normalize(b, Normalizer.NFD);
            if (a.equals(b)) return true;
            if (a.equalsIgnoreCase(b) && isCamel(a)) return true;
            return false;
@ -925,7 +925,7 @@ public class RoundTripTest extends TestFmwk {
                String targ = sourceToTarget.transliterate(cs);
                if (!toTarget.containsAll(targ) 
                        || badCharacters.containsSome(targ)) {
-                    String targD = Normalizer.normalize(targ, Normalizer.DECOMP, 0);
+                    String targD = Normalizer.normalize(targ, Normalizer.NFD);
                    if (!toTarget.containsAll(targD) 
                            || badCharacters.containsSome(targD)) {
                        logWrongScript("Source-Target", cs, targ);
@ -934,7 +934,7 @@ public class RoundTripTest extends TestFmwk {
                    }
                }
                
-                String cs2 = Normalizer.normalize(cs, Normalizer.DECOMP, 0);
+                String cs2 = Normalizer.normalize(cs, Normalizer.NFD);
                String targ2 = sourceToTarget.transliterate(cs2);
                if (!targ.equals(targ2)) {
                    logNotCanonical("Source-Target", cs, targ, cs2, targ2);
@ -978,14 +978,14 @@ public class RoundTripTest extends TestFmwk {
                    String targ = sourceToTarget.transliterate(cs);
                    if (!toTarget.containsAll(targ) 
                            || badCharacters.containsSome(targ)) {
-                        String targD = Normalizer.normalize(targ, Normalizer.DECOMP, 0);
+                        String targD = Normalizer.normalize(targ, Normalizer.NFD);
                        if (!toTarget.containsAll(targD) 
                                || badCharacters.containsSome(targD)) {
                            logWrongScript("Source-Target", cs, targ);
                            continue;
                        }
                    }
-                    String cs2 = Normalizer.normalize(cs, Normalizer.DECOMP, 0);
+                    String cs2 = Normalizer.normalize(cs, Normalizer.NFD);
                    String targ2 = sourceToTarget.transliterate(cs2);
                    if (!targ.equals(targ2)) {
                        logNotCanonical("Source-Target", cs, targ, cs2, targ2);
@ -1005,28 +1005,36 @@ public class RoundTripTest extends TestFmwk {
                    
            usi.reset(targetRange);
            while (usi.next()) {
-                int c = usi.codepoint;
+                String cs;
+                int c;
+                if(usi.codepoint == usi.IS_STRING){
+                    cs = usi.string;
+                    c = UTF16.charAt(cs,0);
+                }else{
+                    c = usi.codepoint;
+                    cs =UTF16.valueOf(c);
+                }
                    
-                String cs = UTF16.valueOf(c);
                String targ = targetToSource.transliterate(cs);
                String reverse = sourceToTarget.transliterate(targ);
                
                if (!toSource.containsAll(targ) 
                        || badCharacters.containsSome(targ)) {
-                    String targD = Normalizer.normalize(targ, Normalizer.DECOMP, 0);
+                    String targD = Normalizer.normalize(targ, Normalizer.NFD);
                    if (!toSource.containsAll(targD) 
                            || badCharacters.containsSome(targD)) {
                        logWrongScript("Target-Source", cs, targ);
-                        failTargSource.add(c);
+                        failTargSource.add(cs);
                        continue;
                    }
                }
-                if (!isSame(cs, reverse) && !roundtripExclusions.contains(c)) {
+                if (!isSame(cs, reverse) && !roundtripExclusions.contains(c)
+                    && !roundtripExclusions.contains(cs)) {
                    logRoundTripFailure(cs,targetToSource.getID(), targ,sourceToTarget.getID(), reverse);
                    failRound.add(c);
                    continue;
                }
-                String targ2 = Normalizer.normalize(targ, Normalizer.DECOMP, 0);
+                String targ2 = Normalizer.normalize(targ, Normalizer.NFD);
                String reverse2 = sourceToTarget.transliterate(targ2);
                if (!reverse.equals(reverse2)) {
                    logNotCanonical("Target-Source", targ, reverse, targ2, reverse2);
@ -1076,7 +1084,7 @@ public class RoundTripTest extends TestFmwk {
                    
                    if (!toSource.containsAll(targ) /*&& !failTargSource.contains(c) && !failTargSource.contains(d)*/
                            || badCharacters.containsSome(targ)) {
-                        String targD = Normalizer.normalize(targ, Normalizer.DECOMP, 0);
+                        String targD = Normalizer.normalize(targ, Normalizer.NFD);
                        if (!toSource.containsAll(targD) /*&& !failTargSource.contains(c) && !failTargSource.contains(d)*/
                                || badCharacters.containsSome(targD)) {
                            logWrongScript("Target-Source", cs, targ);
@ -1084,11 +1092,13 @@ public class RoundTripTest extends TestFmwk {
                        }
                    }
                    if (!isSame(cs, reverse) /*&& !failRound.contains(c) && !failRound.contains(d)*/
-                         && !roundtripExclusions.contains(c) && !roundtripExclusions.contains(d)) {
+                         && !roundtripExclusions.contains(c) 
+                         && !roundtripExclusions.contains(d) 
+                         && !roundtripExclusions.contains(cs)) {
                        logRoundTripFailure(cs,targetToSource.getID(), targ,sourceToTarget.getID(), reverse);
                        continue;
                    }
-                    String targ2 = Normalizer.normalize(targ, Normalizer.DECOMP, 0);
+                    String targ2 = Normalizer.normalize(targ, Normalizer.NFD);
                    String reverse2 = sourceToTarget.transliterate(targ2);
                    if (!reverse.equals(reverse2)) {
                        logNotCanonical("Target-Source", targ, reverse, targ2, reverse2);
--- a/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $
- * $Date: 2002/06/12 17:37:10 $
- * $Revision: 1.106 $
+ * $Date: 2002/06/20 01:16:48 $
+ * $Revision: 1.107 $
 *
 *****************************************************************************************
 */
@ -2313,10 +2313,10 @@ public class TransliteratorTest extends TestFmwk {
            // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
            
            if (testCases[i].length > 2)    target = testCases[i][2];
-            else if (id.equalsIgnoreCase("NFD"))    target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.DECOMP,0);
-            else if (id.equalsIgnoreCase("NFC"))    target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.COMPOSE,0);
-            else if (id.equalsIgnoreCase("NFKD"))   target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.DECOMP_COMPAT,0);
-            else if (id.equalsIgnoreCase("NFKC"))   target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.COMPOSE_COMPAT,0);
+            else if (id.equalsIgnoreCase("NFD"))    target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFD);
+            else if (id.equalsIgnoreCase("NFC"))    target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFC);
+            else if (id.equalsIgnoreCase("NFKD"))   target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKD);
+            else if (id.equalsIgnoreCase("NFKC"))   target = com.ibm.icu.text.Normalizer.normalize(source, com.ibm.icu.text.Normalizer.NFKC);
            else if (id.equalsIgnoreCase("Lower"))  target = UCharacter.toLowerCase(Locale.US, source);
            else if (id.equalsIgnoreCase("Upper"))  target = UCharacter.toUpperCase(Locale.US, source);

--- a/icu4j/src/com/ibm/icu/dev/test/translit/WriteCharts.java
+++ b/icu4j/src/com/ibm/icu/dev/test/translit/WriteCharts.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/WriteCharts.java,v $
- * $Date: 2002/03/13 19:52:34 $
- * $Revision: 1.14 $
+ * $Date: 2002/06/20 01:16:48 $
+ * $Revision: 1.15 $
 *
 *****************************************************************************************
 */
@ -198,7 +198,7 @@ public class WriteCharts {
                    group |= 16;
                }
                    
-                map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss, Normalizer.DECOMP_COMPAT, 0))
+                map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ss, Normalizer.NFKD))
                        + "\u0000" + ss, 
                    "<td class='s'>" + ss + "<br><tt>" + hex(ss)
                        + "</tt></td><td class='t'>" + ts + "<br><tt>" + hex(ts)
@ -262,7 +262,7 @@ public class WriteCharts {
                    group |= 16;
                }
                    
-                map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ts, Normalizer.DECOMP_COMPAT, 0)) + ts, 
+                map.put(group + UCharacter.toLowerCase(Normalizer.normalize(ts, Normalizer.NFKD)) + ts, 
                    "<td class='s'>-</td><td class='t'>" + ts + "<br><tt>" + hex(ts)
                    + "</tt></td><td class='r'>"
                    + rt + "<br><tt>" + hex(rt) + "</tt></td>");
--- a/icu4j/src/com/ibm/icu/dev/tool/localeconverter/ConvertPOSIXLocale.java
+++ b/icu4j/src/com/ibm/icu/dev/tool/localeconverter/ConvertPOSIXLocale.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/tool/localeconverter/ConvertPOSIXLocale.java,v $ 
- * $Date: 2002/02/16 03:05:27 $ 
- * $Revision: 1.2 $
+ * $Date: 2002/06/20 01:17:11 $ 
+ * $Revision: 1.3 $
 *
 *****************************************************************************************
 */
@ -223,11 +223,11 @@ public class ConvertPOSIXLocale {
        process(args);
            //{{INIT_CONTROLS
 		//}}
-}
+    }
    
    public void process(String args[]) {
        short options = identifyOptions(args);
-        String enc="";
+        String enc=null;
        if ((args.length < 2) || ((options & OPT_UNKNOWN) != 0)) {
            printUsage();
        } else {
@ -249,6 +249,9 @@ public class ConvertPOSIXLocale {
                }
                
            }
+            if(enc==null){
+                enc="Default";
+            }
            if ((fileName == null) || (locale == null) || (options == 0)) {
                printUsage();
            } else {
--- a/icu4j/src/com/ibm/icu/dev/tool/localeconverter/ICU2LocaleWriter.java
+++ b/icu4j/src/com/ibm/icu/dev/tool/localeconverter/ICU2LocaleWriter.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/tool/localeconverter/ICU2LocaleWriter.java,v $ 
- * $Date: 2002/02/16 03:05:28 $ 
- * $Revision: 1.2 $
+ * $Date: 2002/06/20 01:17:12 $ 
+ * $Revision: 1.3 $
 *
 *****************************************************************************************
 */
@ -58,17 +58,20 @@ public class ICU2LocaleWriter extends LocaleWriter {
            super.write(tag, o);
        } else {
            CollationItem[] items = (CollationItem[])o;
-            print("CollationElements");
-            println(" { ");
-            for (int i = 0; i < items.length; i++) {
-                if(items[i]!=null){
-                    printString(items[i].toString());
-                    if (items[i].comment != null) {
-                        tabTo(30);
-                        print("//");
-                        println(items[i].comment);
+            if(items[0]!=null){
+                print("Sequence");
+                println(" { ");
+                for (int i = 0; i < items.length; i++) {
+                    if(items[i]!=null){
+                        printString(items[i].toString());
+                        if (items[i].comment != null) {
+                            tabTo(30);
+                            print("//");
+                            println(items[i].comment);
+                        }
                    }
                }
+                println("}");
            }
        }
    }
--- a/icu4j/src/com/ibm/icu/dev/tool/localeconverter/ICULocaleWriter.java
+++ b/icu4j/src/com/ibm/icu/dev/tool/localeconverter/ICULocaleWriter.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/tool/localeconverter/ICULocaleWriter.java,v $ 
- * $Date: 2002/02/16 03:05:28 $ 
- * $Revision: 1.2 $
+ * $Date: 2002/06/20 01:17:12 $ 
+ * $Revision: 1.3 $
 *
 *****************************************************************************************
 */
@ -38,17 +38,20 @@ public class ICULocaleWriter extends LocaleWriter {
            super.write(tag, o);
        } else {
            CollationItem[] items = (CollationItem[])o;
-            print("CollationElements");
-            println(" { ");
-            for (int i = 0; i < items.length; i++) {
-                if(items[i]!=null){
-                    printString(items[i].toString());
-                    if (items[i].comment != null) {
-                        tabTo(30);
-                        print("//");
-                        println(items[i].comment);
-                    }
-                }
+            if(items[0]!=null){
+	            print("Sequence");
+	            println(" { ");
+	            for (int i = 0; i < items.length; i++) {
+	                if(items[i]!=null){
+	                    printString(items[i].toString());
+	                    if (items[i].comment != null) {
+	                        tabTo(30);
+	                        print("//");
+	                        println(items[i].comment);
+	                    }
+	                }
+	            }
+	            println("}");
            }
        }
    }
--- a/icu4j/src/com/ibm/icu/dev/tool/localeconverter/PosixCharMap.java
+++ b/icu4j/src/com/ibm/icu/dev/tool/localeconverter/PosixCharMap.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/tool/localeconverter/PosixCharMap.java,v $ 
- * $Date: 2002/02/16 03:05:30 $ 
- * $Revision: 1.2 $
+ * $Date: 2002/06/20 01:17:12 $ 
+ * $Revision: 1.3 $
 *
 *****************************************************************************************
 */
@ -62,6 +62,139 @@ public class PosixCharMap {
        encoding =enc;
        load(new BufferedReader(new FileReader(file)));
    }
+    /* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
+    static private final char[] UNESCAPE_MAP = {
+        /*"   0x22, 0x22 */
+        /*'   0x27, 0x27 */
+        /*?   0x3F, 0x3F */
+        /*\   0x5C, 0x5C */
+        /*a*/ 0x61, 0x07,
+        /*b*/ 0x62, 0x08,
+        /*f*/ 0x66, 0x0c,
+        /*n*/ 0x6E, 0x0a,
+        /*r*/ 0x72, 0x0d,
+        /*t*/ 0x74, 0x09,
+        /*v*/ 0x76, 0x0b
+    };
+        /**
+     * Convert an escape to a 32-bit code point value.  We attempt
+     * to parallel the icu4c unesacpeAt() function.
+     * @param offset16 an array containing offset to the character
+     * <em>after</em> the backslash.  Upon return offset16[0] will
+     * be updated to point after the escape sequence.
+     * @return character value from 0 to 10FFFF, or -1 on error.
+     */
+    public static int unescapeAt(String s, int[] offset16) {
+        int c;
+        int result = 0;
+        int n = 0;
+        int minDig = 0;
+        int maxDig = 0;
+        int bitsPerDigit = 4;
+        int dig;
+        int i;
+
+        /* Check that offset is in range */
+        int offset = offset16[0];
+        int length = s.length();
+        if (offset < 0 || offset >= length) {
+            return -1;
+        }
+
+        /* Fetch first UChar after '\\' */
+        c = UTF16.charAt(s, offset);
+        offset += UTF16.getCharCount(c);
+
+        /* Convert hexadecimal and octal escapes */
+        switch (c) {
+        case 'u':
+            minDig = maxDig = 4;
+            break;
+        case 'U':
+            minDig = maxDig = 8;
+            break;
+        case 'x':
+            minDig = 1;
+            maxDig = 2;
+            break;
+        default:
+            dig = UCharacter.digit(c, 8);
+            if (dig >= 0) {
+                minDig = 1;
+                maxDig = 3;
+                n = 1; /* Already have first octal digit */
+                bitsPerDigit = 3;
+                result = dig;
+            }
+            break;
+        }
+        if (minDig != 0) {
+            while (offset < length && n < maxDig) {
+                // TEMPORARY
+                // TODO: Restore the char32-based code when UCharacter.digit
+                // is working (Bug 66).
+
+                //c = UTF16.charAt(s, offset);
+                //dig = UCharacter.digit(c, (bitsPerDigit == 3) ? 8 : 16);
+                c = s.charAt(offset);
+                dig = Character.digit((char)c, (bitsPerDigit == 3) ? 8 : 16);
+                if (dig < 0) {
+                    break;
+                }
+                result = (result << bitsPerDigit) | dig;
+                //offset += UTF16.getCharCount(c);
+                ++offset;
+                ++n;
+            }
+            if (n < minDig) {
+                return -1;
+            }
+            offset16[0] = offset;
+            return result;
+        }
+
+        /* Convert C-style escapes in table */
+        for (i=0; i<UNESCAPE_MAP.length; i+=2) {
+            if (c == UNESCAPE_MAP[i]) {
+                offset16[0] = offset;
+                return UNESCAPE_MAP[i+1];
+            } else if (c < UNESCAPE_MAP[i]) {
+                break;
+            }
+        }
+
+        /* If no special forms are recognized, then consider
+         * the backslash to generically escape the next character. */
+        offset16[0] = offset;
+        return c;
+    }
+
+    /**
+     * Convert all escapes in a given string using unescapeAt().
+     * @exception IllegalArgumentException if an invalid escape is
+     * seen.
+     */
+    public static String unescape(String s) {
+        StringBuffer buf = new StringBuffer();
+        int[] pos = new int[1];
+        for (int i=0; i<s.length(); ) {
+            char c = s.charAt(i++);
+            if (c == '\\') {
+                pos[0] = i;
+                int e = unescapeAt(s, pos);
+                if (e < 0) {
+                    throw new IllegalArgumentException("Invalid escape sequence " +
+                                                       s.substring(i-1, Math.min(i+8, s.length())));
+                }
+                UTF16.append(buf, e);
+                i = pos[0];
+            } else {
+                buf.append(c);
+            }
+        }
+        return buf.toString();
+    }
+
    public void load(Reader inputReader) throws IOException {
        PosixCharMap oldMap = SymbolTransition.getCharMap();
        SymbolTransition.setCharMap(null);
@ -104,14 +237,21 @@ public class PosixCharMap {
                state = p.nextToken();
            } while ((state != EOF) && !p.dataEquals("CHARMAP"));
            p.accept(EOL);
-            if (state != EOF) {
+            if (state != EOF ) {
                p = new Lex(states2, input);
                state = p.nextToken();
-                while (state != EOF) {
+                while (state != EOF ) {
+
                    String key = p.getData();
+                    if(p.dataEquals("ENDCHARMAP")){
+                        break;
+                    }
                    state = p.nextToken();
                    while (state == EOL) {
-                        String data = p.getData();
+                        if(p.dataEquals("ENDCHARMAP")){
+                            break;
+                        }
+                        String data = unescape(p.getData());
                        data.trim();
                        if (data.startsWith("<U") || data.startsWith("#U")) {
                            String numData = data.substring(2,data.length()-1);
@ -154,8 +294,7 @@ public class PosixCharMap {
                        
                        state = p.nextToken();
                        key=p.getData();
-                     }
-                        
+                     }                       
                        
                    //state = p.nextToken();
                }
--- a/icu4j/src/com/ibm/icu/dev/tool/translit/UnicodeSetClosure.java
+++ b/icu4j/src/com/ibm/icu/dev/tool/translit/UnicodeSetClosure.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/tool/translit/UnicodeSetClosure.java,v $
- * $Date: 2002/02/25 22:43:59 $
- * $Revision: 1.6 $
+ * $Date: 2002/06/20 01:17:39 $
+ * $Revision: 1.7 $
 *
 *****************************************************************************************
 */
@ -95,7 +95,7 @@ public class UnicodeSetClosure {
    }
    
    static final Normalizer.Mode[] testModes = {
-        Normalizer.NO_OP, Normalizer.DECOMP, Normalizer.COMPOSE, Normalizer.DECOMP_COMPAT, Normalizer.COMPOSE_COMPAT};
+        Normalizer.NONE, Normalizer.NFD, Normalizer.NFC, Normalizer.NFKD, Normalizer.NFKC};
    static final String[] modeNames = {
        "NoNF", "NFD", "NFC", "NFKD", "NFKC"};
        
@ -197,7 +197,7 @@ public class UnicodeSetClosure {
            String source = UTF16.valueOf(cp);
            String result = source;
            if (lowerFirst) result = UCharacter.toLowerCase(Locale.US, result);
-            result = Normalizer.normalize(result, mode, 0);
+            result = Normalizer.normalize(result, mode);
            if (lowerFirst) result = UCharacter.toLowerCase(Locale.US, result);
            if (result.equals(source)) return null;
            return result;
--- a/icu4j/src/com/ibm/icu/dev/tool/translit/genIndexFilters.java
+++ b/icu4j/src/com/ibm/icu/dev/tool/translit/genIndexFilters.java
@ -31,13 +31,13 @@ import java.io.*;
 public class genIndexFilters {

    public static void main(String[] args) throws IOException {
-        Normalizer.Mode m = Normalizer.NO_OP;
+        Normalizer.Mode m = Normalizer.NONE;
        boolean lowerFirst = false;
        if (args.length >= 2) {
            if (args[1].equalsIgnoreCase("NFD")) {
-                m = Normalizer.DECOMP;
+                m = Normalizer.NFD;
            } else if (args[1].equalsIgnoreCase("NFKD")) {
-                m = Normalizer.DECOMP_COMPAT;
+                m = Normalizer.NFKD;
            } else {
                usage();
            }
@ -59,7 +59,7 @@ public class genIndexFilters {
        Transliterator t = Transliterator.getInstance(ID);
        // TransliteratorUtility gives us access to package private API
        UnicodeSet sourceSet = TransliteratorUtility.getSourceSet(t);
-        if (m != Normalizer.NO_OP || lowerFirst) {
+        if (m != Normalizer.NONE || lowerFirst) {
            UnicodeSetClosure.close(sourceSet, m, lowerFirst);
        }
        System.out.println(sourceSet.toPattern(true));
--- a/icu4j/src/com/ibm/icu/impl/ICUCharacterIterator.java
+++ b/icu4j/src/com/ibm/icu/impl/ICUCharacterIterator.java
@ -0,0 +1,157 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2000, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ *
+ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/Attic/ICUCharacterIterator.java,v $ 
+ * $Date: 2002/06/20 01:18:07 $ 
+ * $Revision: 1.1 $
+ *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import java.text.CharacterIterator;
+
+public class ICUCharacterIterator extends UCharacterIterator {
+    
+    private CharacterIterator iterator;
+    
+    /**
+     * Current index
+     */
+    private int currentIndex;
+    
+    /**
+     * length
+     */
+    private int length;
+
+    /**
+     * cache of begin offset in character iterator
+     */
+    private int beginIndex;
+    
+    public ICUCharacterIterator(CharacterIterator iter){
+        if(iter==null){
+            throw new IllegalArgumentException();
+        }
+        iterator     = iter;
+        currentIndex = 0;
+	    beginIndex   = iter.getBeginIndex();
+        length       = iter.getEndIndex() - beginIndex;	   
+    }
+
+    /**
+     * @see UCharacterIterator#current()
+     */
+    public int current() {
+		if (currentIndex < length) {
+		    return iterator.setIndex(beginIndex + currentIndex);
+		}
+		return DONE;
+    }
+
+    /**
+     * @see UCharacterIterator#getLength()
+     */
+    public int getLength() {
+	    return length;
+    }
+
+    /**
+     * @see UCharacterIterator#getIndex()
+     */
+    public int getIndex() {
+	    return currentIndex;
+    }
+
+    /**
+     * @see UCharacterIterator#next()
+     */
+    public int next() {
+		if(currentIndex < length){
+		    return iterator.setIndex(beginIndex + currentIndex++);
+		}
+		return DONE;
+    }
+
+    /**
+     * @see UCharacterIterator#previous()
+     */
+    public int previous() {
+	    if(currentIndex>0){
+	        return iterator.setIndex(beginIndex + --currentIndex);
+	    }
+	    return DONE;
+    }
+
+    /**
+     * @see UCharacterIterator#setIndex(int)
+     */
+    public void setIndex(int index) {
+		if (index < 0 || index > length) {
+		    throw new IndexOutOfBoundsException();
+		}
+		currentIndex = index;
+    }
+
+    /**
+     * @see UCharacterIterator#setToLimit()
+     */
+    public void setToLimit() {
+		currentIndex = length;
+    }
+
+    /**
+     * @see UCharacterIterator#getText(char[])
+     */
+    public int getText(char[] fillIn, int offset){
+        if(offset < 0 || offset + length > fillIn.length){
+            throw new IndexOutOfBoundsException(Integer.toString(length));
+        }
+	
+        for (char ch = iterator.first(); ch != iterator.DONE; ch = iterator.next()) {
+	        fillIn[offset++] = ch;
+	    }
+	    iterator.setIndex(beginIndex + currentIndex);
+
+        return length;
+    }
+
+    /**
+     * Creates a clone of this iterator.  Clones the underlying character iterator.
+     * @see UCharacterIterator#clone()
+     */
+    public Object clone(){
+		try {
+		    ICUCharacterIterator result = (ICUCharacterIterator) super.clone();
+		    result.iterator = (CharacterIterator)this.iterator.clone();
+		    return result;
+		} catch (CloneNotSupportedException e) {      
+            return null; // only invoked if bad underlying character iterator
+		}
+    }
+    
+    /**
+     * @see UCharacterIterator#moveIndex()
+     */
+    public int moveIndex(int index){
+        currentIndex += index;
+        
+        if(currentIndex < 0) {
+	        currentIndex = 0;
+		} else if(currentIndex > length) {
+		    currentIndex = length;
+		}
+        return currentIndex;
+    }
+    
+    /**
+     * @see UCharacterIterator#getCharacterIterator()
+     */
+    public CharacterIterator getCharacterIterator(){
+        return (CharacterIterator)iterator.clone();
+    } 
+}
--- a/icu4j/src/com/ibm/icu/impl/NormalizerDataReader.java
+++ b/icu4j/src/com/ibm/icu/impl/NormalizerDataReader.java
@ -5,14 +5,15 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/NormalizerDataReader.java,v $
- * $Date: 2002/03/28 01:50:59 $
- * $Revision: 1.3 $
+ * $Date: 2002/06/20 01:18:07 $
+ * $Revision: 1.4 $
 *******************************************************************************
 */
 
 package com.ibm.icu.impl;
 import java.io.*;
-import com.ibm.icu.impl.ICUDebug;	
+import com.ibm.icu.impl.ICUDebug;
+import com.ibm.icu.util.VersionInfo;	
 /**
 * @version 	1.0
 * @author		Ram Viswanadha
@ -288,8 +289,8 @@ final class NormalizerDataReader {
                                        throws IOException{
        if(debug) System.out.println("Bytes in inputStream " + inputStream.available());
        
-        ICUBinary.readHeader(inputStream, DATA_FORMAT_ID_, 
-                             DATA_FORMAT_VERSION_, UNICODE_VERSION_);
+        ICUBinary.readHeader(inputStream, DATA_FORMAT_ID, 
+                             DATA_FORMAT_VERSION, UNICODE_VERSION);
        
        if(debug) System.out.println("Bytes left in inputStream " +inputStream.available());
        
@ -299,61 +300,63 @@ final class NormalizerDataReader {
    }
    
    // protected methods -------------------------------------------------
-      
+    
+    protected int[] readIndexes(int length)throws IOException{
+        int[] indexes = new int[length];
+        //Read the indexes
+        for (int i = 0; i <length ; i++) {
+             indexes[i] = dataInputStream.readInt();
+        }
+        return indexes;
+    } 
    /**
    * <p>Reads uprops.dat, parse it into blocks of data to be stored in
    * NormalizerImpl.</P
-    * @param impl NormalizerImpl instance
+    * @param normBytes
+    * @param fcdBytes
+    * @param auxBytes
+    * @param extraData
+    * @param combiningTable
+    * @param canonStartSets
    * @exception thrown when data reading fails
    * @draft 2.1
    */
-    protected void read(NormalizerImpl impl) 
-    		throws IOException{
-	 
-	 	//Read the indexes
-	 	int[] indexes = new int[NormalizerImpl.INDEX_TOP];
-        for (int i = 0; i <indexes.length ; i++) {
-             indexes[i] = dataInputStream.readInt();
-        }
-	
- 	
-	 	//Read the bytes that make up the normTrie
-	 	byte[] normBytes = new byte[indexes[NormalizerImpl.INDEX_TRIE_SIZE]];
+    protected void read(byte[] normBytes, byte[] fcdBytes, byte[] auxBytes,
+                        char[] extraData, char[] combiningTable, 
+                        Object[] canonStartSets) 
+                        throws IOException{
+
+	 	//Read the bytes that make up the normTrie 	
 	 	dataInputStream.read(normBytes);
-	 	ByteArrayInputStream normTrieStream= new ByteArrayInputStream(normBytes);
+        
+	 	//normTrieStream= new ByteArrayInputStream(normBytes);

 	 	//Read the extra data
-	 	int extraDataTop = indexes[NormalizerImpl.INDEX_CHAR_COUNT];
-	 	char[] extraData = new char[extraDataTop];
-	 	for(int i=0;i<extraDataTop;i++){
+	 	for(int i=0;i<extraData.length;i++){
 	 		extraData[i]=dataInputStream.readChar();
 	 	}
 	 	
 	 	//Read the combining class table
-	 	int combiningTableTop = indexes[NormalizerImpl.INDEX_COMBINE_DATA_COUNT];
-	 	char[] combiningTable = new char[combiningTableTop];
-	 	for(int i=0; i<combiningTableTop; i++){
+	 	for(int i=0; i<combiningTable.length; i++){
 	 		combiningTable[i]=dataInputStream.readChar();
 	 	}
 	 	
 	 	//Read the fcdTrie
-	 	byte[] fcdBytes = new byte[indexes[NormalizerImpl.INDEX_FCD_TRIE_SIZE]];
 	 	dataInputStream.read(fcdBytes);
-	 	ByteArrayInputStream fcdTrieStream= new ByteArrayInputStream(fcdBytes);
 	 	
 	 	
-	 	//Read the AuxTrie
-	 	byte[] auxBytes = new byte[indexes[NormalizerImpl.INDEX_AUX_TRIE_SIZE]];
+	 	//Read the AuxTrie	 	
        dataInputStream.read(auxBytes);
-	 	ByteArrayInputStream auxTrieStream= new ByteArrayInputStream(auxBytes);
 		
 		//Read the canonical start sets
-		Object[] canonStartSets=new Object[NormalizerImpl.CANON_SET_MAX_CANON_SETS];
 		int[] canonStartSetsIndexes = new int[NormalizerImpl.SET_INDEX_TOP];
-		for(int i=0; i<canonStartSetsIndexes.length; i++){
+		
+        for(int i=0; i<canonStartSetsIndexes.length; i++){
 	 		canonStartSetsIndexes[i]=dataInputStream.readChar();
 	 	}
-		char[] startSets = new char[canonStartSetsIndexes[NormalizerImpl.SET_INDEX_CANON_SETS_LENGTH]-NormalizerImpl.SET_INDEX_TOP];
+		
+        char[] startSets = new char[canonStartSetsIndexes[NormalizerImpl.SET_INDEX_CANON_SETS_LENGTH]-NormalizerImpl.SET_INDEX_TOP];
+        
        for(int i=0; i<startSets.length; i++){
 	 		startSets[i]=dataInputStream.readChar();
 	 	}
@ -369,20 +372,11 @@ final class NormalizerDataReader {
 	 	canonStartSets[NormalizerImpl.CANON_SET_START_SETS_INDEX] = startSets;
 	 	canonStartSets[NormalizerImpl.CANON_SET_BMP_TABLE_INDEX	] = bmpTable;
 	 	canonStartSets[NormalizerImpl.CANON_SET_SUPP_TABLE_INDEX] = suppTable;	 	
- 	 	 	
-	 	//Now set the tries 
-	 	impl.normTrieImpl.normTrie  	= new IntTrie( normTrieStream,impl.normTrieImpl	);
-	 	impl.fcdTrieImpl.fcdTrie   		= new CharTrie(fcdTrieStream,impl.fcdTrieImpl	);
-	 	impl.auxTrieImpl.auxTrie		= new CharTrie( auxTrieStream, impl.auxTrieImpl	);
-	 	impl.indexes   					= indexes;
-	 	impl.extraData 					= extraData;
-	 	impl.combiningTable 			= combiningTable;
-	 	impl.isDataLoaded				= true;	
-	 	impl.canonStartSets				= canonStartSets;
-	 	impl.isFormatVersion_2_1		= DATA_FORMAT_VERSION_[0]>2 || (DATA_FORMAT_VERSION_[0]==2 && DATA_FORMAT_VERSION_[1]>=1);
-	 	
    }
-
+    
+    public byte[] getDataFormatVersion(){
+        return DATA_FORMAT_VERSION;
+    }
    // private data members -------------------------------------------------
      

@ -396,13 +390,13 @@ final class NormalizerDataReader {
    * No guarantees are made if a older version is used
    * see store.c of gennorm for more information and values
    */
-    private static final byte DATA_FORMAT_ID_[] = {(byte)0x4E, (byte)0x6F, 
+    private static final byte DATA_FORMAT_ID[] = {(byte)0x4E, (byte)0x6F, 
                                                    (byte)0x72, (byte)0x6D};
-    private static final byte DATA_FORMAT_VERSION_[] = {(byte)0x2, (byte)0x1, 
+    private static final byte DATA_FORMAT_VERSION[] = {(byte)0x2, (byte)0x1, 
                                                        (byte)0x5, (byte)0x2};
 	//TODO: Set the version info after the VersionInfo class is ported
-    private static final byte UNICODE_VERSION_[] = {(byte)0x3, (byte)0x1, 
-                                                    (byte)0x1, (byte)0x0};  
-    private static final String UNICODE_VERSION_STRING_ = "3.1.1.0";	
+    private static final byte UNICODE_VERSION[] = {(byte)0x3, (byte)0x2, 
+                                                    (byte)0x0, (byte)0x0};  
+    private static final String UNICODE_VERSION_STRING = "3.2.0.0";	
 	
 }
--- a/icu4j/src/com/ibm/icu/impl/NormalizerImpl.java
+++ b/icu4j/src/com/ibm/icu/impl/NormalizerImpl.java
--- a/icu4j/src/com/ibm/icu/impl/ReplaceableCharacterIterator.java
+++ b/icu4j/src/com/ibm/icu/impl/ReplaceableCharacterIterator.java
@ -0,0 +1,240 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2000, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ *
+ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/Attic/ReplaceableCharacterIterator.java,v $ 
+ * $Date: 2002/06/20 01:18:09 $ 
+ * $Revision: 1.1 $
+ *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import com.ibm.icu.text.Replaceable;
+import com.ibm.icu.text.ReplaceableString;
+import com.ibm.icu.text.UTF16;    
+
+/**
+ * DLF docs must define behavior when Replaceable is mutated underneath
+ * the iterator.
+ *
+ * This and ICUCharacterIterator share some code, maybe they should share
+ * an implementation, or the common state and implementation should be
+ * moved up into UCharacterIterator.
+ *
+ * What are first, last, and getBeginIndex doing here?!?!?!
+ */
+public class ReplaceableCharacterIterator extends UCharacterIterator {
+
+    // public constructor ------------------------------------------------------
+    
+    /**
+     * Public constructor
+     * @param replacable text which the iterator will be based on
+     */
+    public ReplaceableCharacterIterator(Replaceable replaceable){
+        if(replaceable==null){
+            throw new IllegalArgumentException();
+        }
+        this.replaceable  = replaceable;
+        this.currentIndex = 0;
+        this.length       = replaceable.length();
+    }
+    
+    /**
+     * Public constructor
+     * @param str text which the iterator will be based on
+     */
+    public ReplaceableCharacterIterator(String str){
+        if(str==null){
+            throw new IllegalArgumentException();
+        }
+        this.replaceable  = new ReplaceableString(str);
+        this.currentIndex = 0;
+        this.length       = replaceable.length();
+    }
+    
+    /**
+     * Public constructor
+     * @param src an array of characters on which the iterator will be based
+     */
+    public ReplaceableCharacterIterator(char[] src){
+        if(src==null){
+            throw new IllegalArgumentException();
+        }
+        this.replaceable  = new ReplaceableString(new String(src));
+        this.currentIndex = 0;
+        this.length       = replaceable.length();
+    }
+    
+    /**
+     * Public constructor
+     * @param buf buffer of text on which the iterator will be based
+     */
+    public ReplaceableCharacterIterator(StringBuffer buf){
+        if(buf==null){
+            throw new IllegalArgumentException();
+        }
+        this.replaceable  = new ReplaceableString(buf);
+        this.currentIndex = 0;
+        this.length       = replaceable.length();
+    }
+    
+    // public methods ----------------------------------------------------------
+    
+    /**
+     * Creates a copy of this iterator, does not clone the underlying 
+     * <code>Replaceable</code>object
+     * @return copy of this iterator
+     */
+    public Object clone(){
+		try {
+		  return super.clone();
+		} catch (CloneNotSupportedException e) {
+		    return null; // never invoked
+		}
+    }
+    
+    /**
+     * Returns the current UTF16 character.
+     * @return current UTF16 character
+     */
+    public int current(){
+        if (currentIndex < length) {
+            return replaceable.charAt(currentIndex);
+        }
+        return DONE;
+    }
+    
+    /**
+     * Returns the current codepoint
+     * @return current codepoint
+     */
+    public int currentCodePoint(){
+        // cannot use charAt due to it different 
+        // behaviour when index is pointing at a
+        // trail surrogate, check for surrogates
+         
+        int ch = current();
+        if(UTF16.isLeadSurrogate((char)ch)){
+            // advance the index to get the next code point
+            next();
+            // due to post increment semantics current() after next() 
+            // actually returns the next char which is what we want
+            int ch2 = current();
+            // current should never change the current index so back off
+            previous();
+            
+            if(UTF16.isTrailSurrogate((char)ch2)){
+                // we found a surrogate pair
+                return UCharacterProperty.getRawSupplementary(
+                                                         (char)ch,(char)ch2
+                                                             );
+            }
+        }
+        return ch;
+    }
+    
+    /**
+     * Returns the start of the text.
+     * @return 0
+     */
+    public int getBeginIndex(){
+        return 0;
+    }
+
+    /**
+     * Returns the length of the text
+     * @return length of the text
+     */
+    public int getLength(){
+        return length;
+    }
+    
+    /**
+     * Gets the current currentIndex in text.
+     * @return current currentIndex in text.
+     */
+    public int getIndex(){
+        return currentIndex;
+    }
+        
+    /**
+     * Returns next UTF16 character and increments the iterator's currentIndex by 1. 
+     * If the resulting currentIndex is greater or equal to the text length, the 
+     * currentIndex is reset to the text length and a value of DONECODEPOINT is 
+     * returned. 
+     * @return next UTF16 character in text or DONE if the new currentIndex is off the 
+     *         end of the text range.
+     */
+    public int next(){
+        if (currentIndex < length) {
+            return replaceable.charAt(currentIndex++);
+        }
+        return DONE;
+    }
+    
+                
+    /**
+     * Returns previous UTF16 character and decrements the iterator's currentIndex by 
+     * 1. 
+     * If the resulting currentIndex is less than 0, the currentIndex is reset to 0 and a 
+     * value of DONECODEPOINT is returned. 
+     * @return next UTF16 character in text or DONE if the new currentIndex is off the 
+     *         start of the text range.
+     */
+    public int previous(){
+        if (currentIndex > 0) {
+            return replaceable.charAt(--currentIndex);
+        }
+        return DONE;
+    }
+
+    /**
+     * <p>Sets the currentIndex to the specified currentIndex in the text and returns that 
+     * single UTF16 character at currentIndex. 
+     * This assumes the text is stored as 16-bit code units.</p>
+     * @param currentIndex the currentIndex within the text. 
+     * @exception IllegalArgumentException is thrown if an invalid currentIndex is 
+     *            supplied. i.e. currentIndex is out of bounds.
+     * @return the character at the specified currentIndex or DONE if the specified 
+     *         currentIndex is equal to the end of the text.
+     */
+    public void setIndex(int currentIndex) throws IndexOutOfBoundsException{
+        if (currentIndex < 0 || currentIndex > length) {
+            throw new IndexOutOfBoundsException();
+        }
+        this.currentIndex = currentIndex;
+    }
+    
+    public int getText(char[] fillIn, int offset){
+        if(offset < 0 || offset + length > fillIn.length){
+            throw new IndexOutOfBoundsException(Integer.toString(length));
+        }
+        replaceable.getChars(0,length,fillIn,offset);
+        return length;
+    }       
+    
+    public String getString(){
+        char[] arr = new char[length];
+        replaceable.getChars(0,length,arr,0);
+        return new String(arr);
+    }
+    
+    // private data members ----------------------------------------------------
+    
+    /**
+     * Replacable object
+     */
+    private Replaceable replaceable;
+    /**
+     * Current currentIndex
+     */
+    private int currentIndex;
+    /**
+     * Replaceable text length
+     */
+    private int length;
+}
--- a/icu4j/src/com/ibm/icu/impl/UCharArrayIterator.java
+++ b/icu4j/src/com/ibm/icu/impl/UCharArrayIterator.java
@ -0,0 +1,91 @@
+/**
+*******************************************************************************
+* Copyright (C) 1996-2001, International Business Machines Corporation and    *
+* others. All Rights Reserved.                                                *
+*******************************************************************************
+*
+* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/UCharArrayIterator.java,v $ 
+* $Date: 2002/06/20 01:18:09 $ 
+* $Revision: 1.1 $
+*
+*******************************************************************************
+*/
+
+package com.ibm.icu.impl;
+
+
+/**
+ * @author Doug Felt
+ *
+ */
+
+public final class UCharArrayIterator extends UCharacterIterator {
+    private final char[] text;
+    private final int start;
+    private final int limit;
+    private int pos;
+
+    public UCharArrayIterator(char[] text, int start, int limit) {
+        if (start < 0 || limit > text.length || start > limit) {
+            throw new IllegalArgumentException("start: " + start + " or limit: "
+                                               + limit + " out of range [0, " 
+                                               + text.length + ")");
+        }
+        this.text = text;
+        this.start = start;
+        this.limit = limit;
+
+        this.pos = start;
+    }
+
+    public int current() {
+        return pos < limit ? text[pos] : DONE;
+    }
+
+    public int getLength() {
+        return limit - start;
+    }
+
+    public int getIndex() {
+        return pos - start;
+    }
+
+    public int next() {
+        return pos < limit ? text[pos++] : DONE;
+    }
+
+    public int previous() {
+        return pos > start ? text[--pos] : DONE;
+    }
+
+    public void setIndex(int index) {
+        if (index < 0 || index > limit - start) {
+            throw new IndexOutOfBoundsException("index: " + index + 
+                                                " out of range [0, " 
+                                                + (limit - start) + ")");
+        }
+        pos = start + index;
+    }
+
+    public int getText(char[] fillIn, int offset) {
+        int len = limit - start;
+        System.arraycopy(text, start, fillIn, offset, len);
+        return len;
+    }
+
+    public String getString() {
+        return new String(text, start, limit - start);
+    }
+    /**
+     * Creates a copy of this iterator, does not clone the underlying 
+     * <code>Replaceable</code>object
+     * @return copy of this iterator
+     */
+    public Object clone(){
+        try {
+          return super.clone();
+        } catch (CloneNotSupportedException e) {
+            return null; // never invoked
+        }
+    }
+}
--- a/icu4j/src/com/ibm/icu/impl/UCharacterIterator.java
+++ b/icu4j/src/com/ibm/icu/impl/UCharacterIterator.java
@ -5,335 +5,399 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/Attic/UCharacterIterator.java,v $ 
- * $Date: 2002/05/14 16:48:49 $ 
- * $Revision: 1.5 $
+ * $Date: 2002/06/20 01:18:09 $ 
+ * $Revision: 1.6 $
 *
 *******************************************************************************
 */
 package com.ibm.icu.impl;

 import com.ibm.icu.text.Replaceable;
-import com.ibm.icu.text.ReplaceableString;
+import com.ibm.icu.text.StringCharacterIterator;
 import com.ibm.icu.text.UTF16;
+
 import java.text.CharacterIterator;
+import com.ibm.icu.impl.UCharArrayIterator;

 /**
- * Internal class that iterates through a com.ibm.text.Replacable text object 
- * to return either Unicode characters.
- * @author synwee
- * @version release 2.1, February 2002
+ * DLF- Docs mostly need 1) much more description of iteration behavior,
+ * especially at endpoints and with empty or single character strings,
+ * and 2) need to describe the other major difference with Java
+ * CharacterIterator, which is that this also returns code points as
+ * well as code units.  
+ *
+ * Don't understand why setIndex and moveIndex have different exception behavior.
+ * I expect they shouldn't.
 */
-public final class UCharacterIterator implements CharacterIterator
-{
-	// public data members -----------------------------------------------------
-	
-	/**
-	 * Indicator that we have reached the ends of the UTF16 text when returning
-	 * 16 bit character.
-	 */
-	public static final int DONE = 0xFFFF;
-	/**
-	 * Indicator that we have reached the ends of the UTF16 text when returning
-	 * codepoints.
-	 */
-	public static final int DONE_CODEPOINT = -1;
-	
-	// public constructor ------------------------------------------------------
-	
-	/**
-	 * Public constructor.
-	 * By default the iteration range will be from 0 to the end of the text.
-	 * @param replacable text which the iterator will be based on
-	 */
-	public UCharacterIterator(Replaceable replaceable)
-	{
-		m_replaceable_  = replaceable;
-		m_index_        = 0;
-		m_start_        = 0;
-		m_limit_        = replaceable.length();
-	}
-	
-	/**
-	 * Public constructor
-	 * By default the iteration range will be from 0 to the end of the text.
-	 * @param str text which the iterator will be based on
-	 */
-	public UCharacterIterator(String str)
-	{
-		m_replaceable_  = new ReplaceableString(str);
-		m_index_        = 0;
-		m_start_        = 0;
-		m_limit_        = m_replaceable_.length();
-	}
-	
-	/**
-     * Constructs an iterator over the given range of the given string.
-     * @param  text  text to be iterated over
-     * @param  start offset of the first character to iterate
-     * @param  limit offset of the character following the last character to
-     * 					iterate
-     */
-    public UCharacterIterator(String str, int start, int limit) 
-    {
-    	m_replaceable_  = new ReplaceableString(str);
-		m_start_        = start;
-		m_limit_        = limit;
-		m_index_        = m_start_;
-    }   
+
+/**
+ * Abstract class that defines an API for iteration on text objects.This is an 
+ * interface for forward and backward iteration and random access into a text 
+ * object. Forward iteration is done with post-increment and backward iteration 
+ * is done with pre-decrement semantics, while the 
+ * <code>java.text.CharacterIterator</code> interface methods provided forward 
+ * iteration with "pre-increment" and backward iteration with pre-decrement 
+ * semantics. This API is more efficient for forward iteration over code points.
+ * @author Ram
+ * @version release 2.2, May 2002
+ */
+public abstract class UCharacterIterator 
+                      implements Cloneable,UForwardCharacterIterator {
+
+    
+    // static final methods ----------------------------------------------------
    
    /**
-     * Constructs an iterator over the given range of the given replaceable 
-     * string.
-     * @param  text  text to be iterated over
-     * @param  start offset of the first character to iterate
-     * @param  limit offset of the character following the last character to
-     * 					iterate
+     * Returns a <code>UCharacterIterator</code> object given a 
+     * <code>Replaceable</code> object.
+     * @param source a valid source as a <code>Replaceable</code> object
+     * @return UCharacterIterator object
+     * @exception IllegalArgumentException if the argument is null
     */
-    public UCharacterIterator(Replaceable replaceable, int start, int limit) 
-    {
-    	m_replaceable_  = replaceable;
-		m_start_        = start;
-		m_limit_        = limit;
-		m_index_        = m_start_;
-    }   
-	
-	// public methods ----------------------------------------------------------
-	
-	/**
-     * Creates a copy of this iterator.
-     * Cloning will not duplicate a new Replaceable object.
-     * @return copy of this iterator
-     */
-    public Object clone()
-    {
-        try {
-            return super.clone();
-        }
-        catch (CloneNotSupportedException e) {
-            throw new InternalError(
-            "Cloning by the super class java.text.CharacterIterator is not " +
-            "supported");
-        }
-    }
-    
-	/**
-     * Returns the current UTF16 character.
-     * @return current UTF16 character
-     */
-    public char current()
-    {
-        if (m_index_ >= m_start_ && m_index_ < m_limit_) {
-            return m_replaceable_.charAt(m_index_);
-        }
-        return DONE;
+    public static final UCharacterIterator getInstance(Replaceable source){
+        return new ReplaceableCharacterIterator(source);
    }
    
    /**
-     * Returns the current codepoint
-     * @return current codepoint
+     * Returns a <code>UCharacterIterator</code> object given a 
+     * source string.
+     * @param source a string
+     * @return UCharacterIterator object
+     * @exception IllegalArgumentException if the argument is null
     */
-    public int currentCodePoint()
-    {
-        if (m_index_ >= m_start_ && m_index_ < m_limit_) {
-            return m_replaceable_.char32At(m_index_);
-        }
-        return DONE_CODEPOINT;
+    public static final UCharacterIterator getInstance(String source){
+        return new ReplaceableCharacterIterator(source);
    }
    
    /**
-     * Gets the first UTF16 character in text.
-     * @return the first UTF16 in text.
+     * Returns a <code>UCharacterIterator</code> object given a 
+     * source character array.
+     * @param source an array of UTF-16 code units
+     * @return UCharacterIterator object
+     * @exception IllegalArgumentException if the argument is null
     */
-    public char first()
-    {
-        m_index_ = m_start_;
-        return current();
+    public static final UCharacterIterator getInstance(char[] source){
+        return getInstance(source,0,source.length);
    }
    
    /**
-     * Returns the start of the text to iterate.
-     * @return by default this method will return 0, unless a range for 
-     * iteration had been specified during construction.
+     * Returns a <code>UCharacterIterator</code> object given a 
+     * source character array.
+     * @param source an array of UTF-16 code units
+     * @return UCharacterIterator object
+     * @exception IllegalArgumentException if the argument is null
     */
-    public int getBeginIndex()
-    {
-        return m_start_;
+    public static final UCharacterIterator getInstance(char[] source, int start, int limit){
+        return new UCharArrayIterator(source,start,limit);
+    }
+    /**
+     * Returns a <code>UCharacterIterator</code> object given a 
+     * source StringBuffer.
+     * @param source an string buffer of UTF-16 code units
+     * @return UCharacterIterator object
+     * @exception IllegalArgumentException if the argument is null
+     */
+    public static final UCharacterIterator getInstance(StringBuffer source){
+        return new ReplaceableCharacterIterator(source);
    }

    /**
-     * Returns the limit offset of the text to iterate
-     * @return by default this method returns the length of the text, unless a 
-     * range for iteration had been specified during construction.
-     */
-    public int getEndIndex()
-    {
-        return m_limit_;
+     * Returns a <code>UCharacterIterator</code> object given a 
+     * CharacterIterator.
+     * @param source a valid CharacterIterator object.
+     * @return UCharacterIterator object
+     * @exception IllegalArgumentException if the argument is null
+     */    
+    public static final UCharacterIterator getInstance(CharacterIterator source){
+        return new ICUCharacterIterator(source);
    }
+       
+    // public methods ----------------------------------------------------------
+    /**
+     * Returns a <code>java.text.CharacterIterator</code> object for
+     * the underlying text of this iterator.  The returned iterator is
+     * independent of this iterator.
+     * @return java.text.CharacterIterator object 
+     */
+    public CharacterIterator getCharacterIterator(){
+        return new StringCharacterIterator(this.getText());
+    }    
+   
+    /**
+     * Returns the code unit at the current index.  If index is out
+     * of range, returns DONE.  Index is not changed.
+     * @return current code unit
+     */
+    public abstract int current();
+    
+    /**
+     * Returns the codepoint at the current index.
+     * If the current index is invalid, DONE is returned.
+     * If the current index points to a lead surrogate, and there is a following
+     * trail surrogate, then the code point is returned.  Otherwise, the code
+     * unit at index is returned.  Index is not changed. 
+     * @return current codepoint
+     */
+    public int currentCodePoint(){
+        int ch = current();
+        if(UTF16.isLeadSurrogate((char)ch)){
+            // advance the index to get the
+            // next code point
+            next();
+            // due to post increment semantics
+            // current() after next() actually
+            // returns the char we want
+            int ch2 = current();
+            // current should never change
+            // the current index so back off
+            previous();
+            
+            if(UTF16.isTrailSurrogate((char)ch2)){
+                // we found a surrogate pair 
+                // return the codepoint
+                return UCharacterProperty.getRawSupplementary(
+                                                          (char)ch,(char)ch2
+                                                             );
+            }
+        }
+        return ch;
+    }
+    
+    /**
+     * Returns the length of the text
+     * @return length of the text
+     */
+    public abstract int getLength();
+
    
    /**
     * Gets the current index in text.
     * @return current index in text.
     */
-    public int getIndex()
-    {
-        return m_index_;
+    public abstract int getIndex();
+
+
+    /**
+     * Returns the UTF16 code unit at index, and increments to the next
+     * code unit (post-increment semantics).  If index is out of
+     * range, DONE is returned, and the iterator is reset to the limit
+     * of the text.
+     * @return the next UTF16 code unit, or DONE if the index is at the limit
+     *         of the text.  
+     */
+    public abstract int next();
+
+    /**
+     * Returns the code point at index, and increments to the next code
+     * point (post-increment semantics).  If index does not point to a
+     * valid surrogate pair, the behavior is the same as
+     * <code>next()<code>.  Otherwise the iterator is incremented past
+     * the surrogate pair, and the code point represented by the pair
+     * is returned.
+     * @return the next codepoint in text, or DONE if the index is at
+     *         the limit of the text.  
+     */
+    public int nextCodePoint(){
+        int ch1 = next();
+        if(UTF16.isLeadSurrogate((char)ch1)){
+            int ch2 = next();
+            if(UTF16.isTrailSurrogate((char)ch2)){
+                return UCharacterProperty.getRawSupplementary((char)ch1,
+                                                              (char)ch2);
+            }else{
+                // unmatched surrogate so back out
+                previous();
+            }
+        }
+        return ch1;
+    }
+
+    /**
+     * Decrement to the position of the previous code unit in the
+     * text, and return it (pre-decrement semantics).  If the
+     * resulting index is less than 0, the index is reset to 0 and
+     * DONE is returned.
+     * @return the previous code unit in the text, or DONE if the new
+     *         index is before the start of the text.  
+     */
+    public abstract int previous();
+
+    
+    /**
+     * Retreat to the start of the previous code point in the text,
+     * and return it (pre-decrement semantics).  If the index is not
+     * preceeded by a valid surrogate pair, the behavior is the same
+     * as <code>previous()</code>.  Otherwise the iterator is
+     * decremented to the start of the surrogate pair, and the code
+     * point represented by the pair is returned.
+     * @return the previous code point in the text, or DONE if the new
+     *         index is before the start of the text.  
+     */
+    public int previousCodePoint(){
+        int ch1 = previous();
+        if(UTF16.isTrailSurrogate((char)ch1)){
+            int ch2 = previous();
+            if(UTF16.isLeadSurrogate((char)ch2)){
+                return UCharacterProperty.getRawSupplementary((char)ch2,
+                                                              (char)ch1);
+            }else{
+                //unmatched trail surrogate so back out
+                next();
+            }   
+        }
+        return ch1;
+    }
+
+    /**
+     * Sets the index to the specified index in the text.
+     * @param index the index within the text. 
+     * @exception IndexOutOfBoundsException is thrown if an invalid index is 
+     *            supplied
+     */
+    public abstract void setIndex(int index);
+
+    /**
+     * Sets the current index to the limit.
+     */
+    public void setToLimit() {
+	    setIndex(getLength());
    }
    
    /**
-     * Gets the last UTF16 iterateable character from the text and shifts the 
-     * index to the end of the text accordingly.
-     * @return the last UTF16 iterateable character
+     * Sets the current index to the start.
     */
-    public char last()
-    {
-        if (m_limit_ != m_start_) {
-            m_index_ = m_limit_ - 1;
-            return m_replaceable_.charAt(m_index_);
-        } 
-		m_index_ = m_limit_;
-        return DONE;
+    public void setToStart() {
+	    setIndex(0);
    }
-    
-	/**
-     * Returns next UTF16 character and increments the iterator's index by 1. 
-	 * If the resulting index is greater or equal to the iteration limit, the 
-	 * index is reset to the text iteration limit and a value of DONE_CODEPOINT is 
-	 * returned. 
-	 * @return next UTF16 character in text or DONE if the new index is off the 
-	 *         end of the text iteration limit.
-     */
-    public char next()
-    {
-        if (m_index_ < m_limit_) {
-        	char result = m_replaceable_.charAt(m_index_);
-            m_index_ ++;
-            return result;
-        }
-        return DONE;
-    }
-
-	/**
-	 * Returns next codepoint after current index and increments the iterator's 
-	 * index by a number depending on the returned codepoint. 
-	 * This assumes the text is stored as 16-bit code units
-     * with surrogate pairs intermixed. If the index of a leading or trailing 
-     * code unit of a surrogate pair is given, return the code point after the 
-     * surrogate pair.
-	 * If the resulting index is greater or equal to the text iterateable limit,
-	 * the current index is reset to the text iterateable limit and a value of 
-	 * DONE_CODEPOINT is returned. 
-	 * @return next codepoint in text or DONE_CODEPOINT if the new index is off the 
-	 *         end of the text iterateable limit.
-	 */	
-	public int nextCodePoint()
-	{
-		if (m_index_ < m_limit_) {
-			char ch = m_replaceable_.charAt(m_index_);
-			m_index_ ++;
-			if (ch >= UTF16.LEAD_SURROGATE_MIN_VALUE &&
-			    ch <= UTF16.LEAD_SURROGATE_MAX_VALUE &&
-			    m_index_ < m_limit_) {
-			    char trail = m_replaceable_.charAt(m_index_);
-			    if (trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE &&
-			    	trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
-			    	m_index_ ++;
-			    	return UCharacterProperty.getRawSupplementary(ch, 
-			    	                                              trail);
-				}
-			}
-			return ch;
-        }
-        return DONE_CODEPOINT;
-	}

    /**
-     * Returns previous UTF16 character and decrements the iterator's index by 
-     * 1. 
-	 * If the resulting index is less than the text iterateable limit, the 
-	 * index is reset to the start of the text iteration and a value of 
-	 * DONE_CODEPOINT is returned. 
-	 * @return next UTF16 character in text or DONE if the new index is off the 
-	 *         start of the text iteration range.
+     * Fills the buffer with the underlying text storage of the iterator
+     * If the buffer capacity is not enough a exception is thrown. The capacity
+     * of the fill in buffer should at least be equal to length of text in the 
+     * iterator obtained by calling <code>getLength()</code).
+     * <b>Usage:</b>
+     * 
+     * <code>
+     * <pre>
+     *         UChacterIterator iter = new UCharacterIterator.getInstance(text);
+     *         char[] buf = new char[iter.getLength()];
+     *         iter.getText(buf);
+     *         
+     *         OR
+     *         char[] buf= new char[1];
+     *         int len = 0;
+     *         for(;;){
+     *             try{
+     *                 len = iter.getText(buf);
+     *                 break;
+     *             }catch(IndexOutOfBoundsException e){
+     *                 buf = new char[iter.getLength()];
+     *             }
+     *         }
+     * </pre>
+     * </code>
+     *             
+     * @param fillIn an array of chars to fill with the underlying UTF-16 code 
+     *         units.
+     * @param offset the position within the array to start putting the data.
+     * @return the number of code units added to fillIn, as a convenience
+     * @exception IndexOutOfBounds exception if there is not enough
+     *            room after offset in the array, or if offset < 0.  
     */
-    public char previous()
-    {
-        if (m_index_ > m_start_) {
-            m_index_ --;
-            return m_replaceable_.charAt(m_index_);
-        }
-        return DONE;
-    }
-    
-    /**
-     * Returns previous codepoint before current index and decrements the 
-     * iterator's index by a number depending on the returned codepoint. 
-	 * This assumes the text is stored as 16-bit code units
-     * with surrogate pairs intermixed. If the index of a leading or trailing 
-     * code unit of a surrogate pair is given, return the code point before the 
-     * surrogate pair.
-	 * If the resulting index is less than the text iterateable range, the 
-	 * current index is reset to the start of the range and a value of 
-	 * DONE_CODEPOINT is returned. 
-	 * @return previous codepoint in text or DONE_CODEPOINT if the new index is 
-	 *         off the start of the text iteration range.
-     */
-    public int previousCodePoint()
-    {
-        if (m_index_ > m_start_) {
-            m_index_ --;
-            char ch = m_replaceable_.charAt(m_index_);
-			if (ch >= UTF16.TRAIL_SURROGATE_MIN_VALUE &&
-			    ch <= UTF16.TRAIL_SURROGATE_MAX_VALUE &&
-			    m_index_ > m_start_) {
-			    char lead = m_replaceable_.charAt(m_index_);
-			    if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE &&
-			    	lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
-			    	m_index_ --;
-			    	return UCharacterProperty.getRawSupplementary(ch, 
-			    	                                              lead);
-				}
-			}
-   			return ch;
-        }
-        return DONE_CODEPOINT;
-    }
-
-	/**
-	 * <p>Sets the index to the specified index in the text and returns that 
-	 * single UTF16 character at index. 
-	 * This assumes the text is stored as 16-bit code units.</p>
-	 * @param index the index within the text. 
-	 * @exception IllegalArgumentException is thrown if an invalid index is 
-	 *            supplied. i.e. index is out of bounds.
-	 * @return the character at the specified index or DONE if the specified 
-	 *         index is equal to the limit of the text iteration range.
-	 */
-	public char setIndex(int index)
-	{
-		if (index < m_start_ || index > m_limit_) {
-			throw new IllegalArgumentException("Index index out of bounds");
+    public int getText(char[] fillIn, int offset) {
+		int len = getLength();
+		if (offset < 0 || offset + len > fillIn.length) {
+		    throw new IndexOutOfBoundsException(Integer.toString(offset));
 		}
-		m_index_ = index;
-		return current();
-	}
-	
-	// private data members ----------------------------------------------------
-	
-	/**
-	 * Replacable object
-	 */
-	private Replaceable m_replaceable_;
-	/**
-	 * Current index
-	 */
-	private int m_index_;
-	/**
-	 * Start offset of iterateable range, by default this is 0
-	 */
-	private int m_start_;
-	/**
-	 * Limit offset of iterateable range, by default this is the length of the
-	 * string
-	 */
-	private int m_limit_;
+		int index = getIndex();
+		setToStart();
+		int ch;
+		while ((ch = next())!= DONE) {
+		    fillIn[offset++] = (char)ch;
+		}
+		setIndex(index);
+		return len;
+    }
+
+    /**
+     * Convenience override for <code>getText(char[], int)>/code> that provides
+     * an offset of 0.
+     * @param fillIn an array of chars to fill with the underlying UTF-16 code 
+     *         units.
+     * @return the number of code units added to fillIn, as a convenience
+     * @exception IndexOutOfBounds exception if there is not enough
+     *            room in the array.  
+     */
+    public final int getText(char[] fillIn) {
+		return getText(fillIn, 0);
+    }
+         
+    /**
+     * Convenience method for returning the underlying text storage as as string
+     * @return the underlying text storage in the iterator as a string
+     */
+    public String getText() {
+		char[] text = new char[getLength()];
+		getText(text);
+		return new String(text);
+    }
+       
+    /**
+     * Moves the current position by the number of code units
+     * specified, either forward or backward depending on the sign
+     * of delta (positive or negative respectively).  If the resulting
+     * index would be less than zero, the index is set to zero, and if
+     * the resulting index would be greater than limit, the index is
+     * set to limit.
+     *
+     * @param delta the number of code units to move the current
+     *              index.
+     * @return the new index.
+     * @exception IndexOutOfBoundsException is thrown if an invalid index is 
+     *            supplied  
+     * 
+     */
+    public int moveIndex(int delta) {
+		int x = Math.max(0, Math.min(getIndex() + delta, getLength()));
+		setIndex(x);
+		return x;
+    }
+
+    /**
+     * Moves the current position by the number of code points
+     * specified, either forward or backward depending on the sign of
+     * delta (positive or negative respectively). If the current index
+     * is at a trail surrogate then the first adjustment is by code
+     * unit, and the remaining adjustments are by code points.  If the
+     * resulting index would be less than zero, the index is set to
+     * zero, and if the resulting index would be greater than limit,
+     * the index is set to limit.
+     * @param delta the number of code units to move the current index.
+     * @return the new index  
+     * @exception IndexOutOfBoundsException is thrown if an invalid delta is 
+     *            supplied
+     */
+    public int moveCodePointIndex(int delta){
+        if(delta>0){
+            while(delta-->0 && nextCodePoint() != DONE);
+        }else{
+	        while(delta++<0 && previousCodePoint() != DONE);
+        }
+        if(delta!=0){
+            throw new IndexOutOfBoundsException();
+        }
+          
+        return getIndex();
+    }
+
+    /**
+     * Creates a copy of this iterator, independent from other iterators.
+     * If it is not possible to clone the iterator, returns null.
+     * @return copy of this iterator
+     */
+    public Object clone() throws CloneNotSupportedException{
+	    return super.clone();
+    }   
+    
 }
+
--- a/icu4j/src/com/ibm/icu/impl/UCharacterProperty.java
+++ b/icu4j/src/com/ibm/icu/impl/UCharacterProperty.java
@ -6,8 +6,8 @@
 *
 * $Source: 
 *         /usr/cvs/icu4j/icu4j/src/com/ibm/icu/text/UCharacterPropertyDB.java $ 
-* $Date: 2002/04/04 00:52:27 $ 
-* $Revision: 1.8 $
+* $Date: 2002/06/20 01:18:09 $ 
+* $Revision: 1.9 $
 *
 *******************************************************************************
 */
@ -760,7 +760,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
    * @return size of the lower case character in UTF16 format
    */
    public int getSpecialLowerCase(Locale locale, int index, int ch, 
-                                   UCharacterIterator uchariter,
+                                   UnicodeCharacterIterator uchariter,
                                   StringBuffer buffer)
    {
    	int exception = getException(index, 
@ -874,7 +874,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
     * @return size of the lowercased codepoint in UTF16 format
     */
    public int toLowerCase(Locale locale, int ch, 
-                                   UCharacterIterator uchariter, 
+                                   UnicodeCharacterIterator uchariter, 
                                   StringBuffer buffer)
    {
    	int props = getProperty(ch);
@ -909,7 +909,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
     * @return size oflowercased codepoint in UTF16 format
     */
    public int toLowerCase(Locale locale, int ch, 
-                           UCharacterIterator uchariter, char buffer[])
+                           UnicodeCharacterIterator uchariter, char buffer[])
    {
        int props = getProperty(ch);
        if (!UCharacterProperty.isExceptionIndicator(props)) {
@ -953,7 +953,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
    public void toLowerCase(Locale locale, String str, int start, int limit, 
                            StringBuffer result) 
    {
-        UCharacterIterator ucharIter = new UCharacterIterator(str);
+        UnicodeCharacterIterator ucharIter = new UnicodeCharacterIterator(str);
        int                strIndex  = start;
        
        while (strIndex < limit) { 
@ -980,7 +980,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
    * @return size of uppercased codepoint in UTF16 format
    */
    public int getSpecialUpperOrTitleCase(Locale locale, int index, int ch, 
-                                          UCharacterIterator uchariter, 
+                                          UnicodeCharacterIterator uchariter, 
                                          boolean upperflag, 
                                          StringBuffer buffer)
    {
@ -1041,7 +1041,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
     * @return size of uppercased codepoint in UTF16 format
     */
 	public int toUpperOrTitleCase(Locale locale, int ch, 
-	                              UCharacterIterator uchariter, 
+	                              UnicodeCharacterIterator uchariter, 
 	                              boolean upperflag, StringBuffer buffer) 
    {
        int props = getProperty(ch);
@ -1083,7 +1083,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
     * @return size of uppercased codepoint in UTF16 format
     */
 	public int toUpperOrTitleCase(Locale locale, int ch, 
-	                              UCharacterIterator uchariter, 
+	                              UnicodeCharacterIterator uchariter, 
 	                              boolean upperflag, char buffer[]) 
    {
        int props = getProperty(ch);
@ -1133,7 +1133,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
     */
    public String toUpperCase(Locale locale, String str, int start, int limit) 
    {
-        UCharacterIterator ucharIter = new UCharacterIterator(str);
+        UnicodeCharacterIterator ucharIter = new UnicodeCharacterIterator(str);
        int                strIndex  = start;
        StringBuffer       result    = new StringBuffer(limit - start);
        
@ -1170,7 +1170,7 @@ public final class UCharacterProperty implements Trie.DataManipulate
 	public String toTitleCase(Locale locale, String str, 
 	                          BreakIterator breakiter)
 	{
- 		UCharacterIterator ucharIter = new UCharacterIterator(str);
+ 		UnicodeCharacterIterator ucharIter = new UnicodeCharacterIterator(str);
 		int                length    = str.length();
        StringBuffer       result    = new StringBuffer();
        
@ -1583,13 +1583,13 @@ public final class UCharacterProperty implements Trie.DataManipulate
    *         the set { 'i', 'j', U+012f, U+1e2d, U+1ecb }
    * @see SpecialCasing.txt
    */
-    private static boolean isAFTER_i(UCharacterIterator uchariter, int offset) 
+    private static boolean isAFTER_i(UnicodeCharacterIterator uchariter, int offset) 
    {
    	uchariter.setIndex(offset);
    	
    	int ch = uchariter.previousCodePoint();
    	
-        while (ch != UCharacterIterator.DONE_CODEPOINT) {
+        while (ch != UnicodeCharacterIterator.DONE_CODEPOINT) {
            if (ch == LATIN_SMALL_LETTER_I_ || ch == LATIN_SMALL_LETTER_J_ || 
                ch == LATIN_SMALL_LETTER_I_WITH_OGONEK_ ||
                ch == LATIN_SMALL_LETTER_I_WITH_TILDE_BELOW_ || 
@ -1618,13 +1618,13 @@ public final class UCharacterProperty implements Trie.DataManipulate
    *         character 'I' with no intervening combining class = 230
    * @see SpecialCasing.txt
    */
-    private static boolean isAFTER_I(UCharacterIterator uchariter, int offset) 
+    private static boolean isAFTER_I(UnicodeCharacterIterator uchariter, int offset) 
    {
    	uchariter.setIndex(offset);
    	
    	int ch = uchariter.previousCodePoint();
    	
-        while (ch != UCharacterIterator.DONE_CODEPOINT) {
+        while (ch != UnicodeCharacterIterator.DONE_CODEPOINT) {
            if (ch == LATIN_CAPITAL_LETTER_I_) {
                return true; // preceded by I
            }
@ -1650,14 +1650,14 @@ public final class UCharacterProperty implements Trie.DataManipulate
    * @return false if any character after offset in src is a cased letter
    * @see SpecialCasing.txt
    */
-    private boolean isCFINAL(UCharacterIterator uchariter, int offset) 
+    private boolean isCFINAL(UnicodeCharacterIterator uchariter, int offset) 
    {
    	// iterator should have been determined to be not null by caller
        uchariter.setIndex(offset);
    	uchariter.nextCodePoint(); // rid of current codepoint
        int ch = uchariter.nextCodePoint(); // start checking
    	
-    	while (ch != UCharacterIterator.DONE_CODEPOINT) {
+    	while (ch != UnicodeCharacterIterator.DONE_CODEPOINT) {
            int cat = getType(ch);
            if (cat == UCharacterCategory.LOWERCASE_LETTER || 
                cat == UCharacterCategory.UPPERCASE_LETTER ||
@ -1681,13 +1681,13 @@ public final class UCharacterProperty implements Trie.DataManipulate
    * @return true if any character before index in src is a cased letter
    * @see SpecialCasing.txt
    */
-    private boolean isNotCINITIAL(UCharacterIterator uchariter, 
+    private boolean isNotCINITIAL(UnicodeCharacterIterator uchariter, 
                                         int offset) 
    {
    	uchariter.setIndex(offset);
    	int ch = uchariter.previousCodePoint();
    	
-        while (ch != UCharacterIterator.DONE_CODEPOINT) {
+        while (ch != UnicodeCharacterIterator.DONE_CODEPOINT) {
            int cat = getType(ch);
            if (cat == UCharacterCategory.LOWERCASE_LETTER || 
                cat == UCharacterCategory.UPPERCASE_LETTER ||
@ -1712,14 +1712,14 @@ public final class UCharacterProperty implements Trie.DataManipulate
    *         of combining class = 230.
    * @see SpecialCasing.txt
    */
-    private static boolean isFollowedByMOREABOVE(UCharacterIterator uchariter, 
+    private static boolean isFollowedByMOREABOVE(UnicodeCharacterIterator uchariter, 
                                                 int offset) 
    {
        uchariter.setIndex(offset);
        uchariter.nextCodePoint(); // rid of current codepoint
        int ch = uchariter.nextCodePoint(); // start checking
        
-        while (ch != UCharacterIterator.DONE_CODEPOINT) {
+        while (ch != UnicodeCharacterIterator.DONE_CODEPOINT) {
            int cc = NormalizerImpl.getCombiningClass(ch);
            if (cc == COMBINING_MARK_ABOVE_CLASS_) {
                return true; // at least one cc==230 following 
@ -1742,14 +1742,14 @@ public final class UCharacterProperty implements Trie.DataManipulate
    *         with no characters of combining class == 230 in between
    * @see SpecialCasing.txt
    */
-    private static boolean isFollowedByDotAbove(UCharacterIterator uchariter, 
+    private static boolean isFollowedByDotAbove(UnicodeCharacterIterator uchariter, 
                                                int offset) 
    {
        uchariter.setIndex(offset);
        uchariter.nextCodePoint(); // rid off current character
        int ch = uchariter.nextCodePoint(); // start checking
        
-        while (ch != UCharacterIterator.DONE_CODEPOINT) {
+        while (ch != UnicodeCharacterIterator.DONE_CODEPOINT) {
            if (ch == COMBINING_DOT_ABOVE_) {
                return true;
            }
--- a/icu4j/src/com/ibm/icu/impl/UForwardCharacterIterator.java
+++ b/icu4j/src/com/ibm/icu/impl/UForwardCharacterIterator.java
@ -0,0 +1,93 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2000, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ *
+ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/Attic/UForwardCharacterIterator.java,v $ 
+ * $Date: 2002/06/20 01:18:09 $ 
+ * $Revision: 1.1 $
+ *
+ *****************************************************************************************
+ */
+package com.ibm.icu.impl;
+
+/**
+ * Interface that defines an API for forward-only iteration
+ * on text objects.
+ * This is a minimal interface for iteration without random access
+ * or backwards iteration. It is especially useful for wrapping
+ * streams with converters into an object for collation or
+ * normalization.
+ *
+ * <p>Characters can be accessed in two ways: as code units or as
+ * code points.
+ * Unicode code points are 21-bit integers and are the scalar values
+ * of Unicode characters. ICU uses the type <code>int</code> for them.
+ * Unicode code units are the storage units of a given
+ * Unicode/UCS Transformation Format (a character encoding scheme).
+ * With UTF-16, all code points can be represented with either one
+ * or two code units ("surrogates").
+ * String storage is typically based on code units, while properties
+ * of characters are typically determined using code point values.
+ * Some processes may be designed to work with sequences of code units,
+ * or it may be known that all characters that are important to an
+ * algorithm can be represented with single code units.
+ * Other processes will need to use the code point access functions.</p>
+ *
+ * <p>ForwardCharacterIterator provides next() to access
+ * a code unit and advance an internal position into the text object,
+ * similar to a <code>return text[position++]</code>.<br>
+ * It provides nextCodePoint() to access a code point and advance an internal
+ * position.</p>
+ *
+ * <p>nextCodePoint() assumes that the current position is that of
+ * the beginning of a code point, i.e., of its first code unit.
+ * After nextCodePoint(), this will be true again.
+ * In general, access to code units and code points in the same
+ * iteration loop should not be mixed. In UTF-16, if the current position
+ * is on a second code unit (Low Surrogate), then only that code unit
+ * is returned even by nextCodePoint().</p>
+ *
+ * Usage:
+ * <code> 
+ *  public void function1(UForwardCharacterIterator it) {
+ *     int c;
+ *     while((c=it.next())!=UForwardCharacterIterator.DONE) {
+ *         // use c
+ *      }
+ *  }
+ * </code>
+ * </p>
+ *
+ */
+
+public interface UForwardCharacterIterator {
+      
+    /**
+     * Indicator that we have reached the ends of the UTF16 text.
+     */
+    public static final int DONE = -1;
+    /**
+     * Returns the UTF16 code unit at index, and increments to the next
+     * code unit (post-increment semantics).  If index is out of
+     * range, DONE is returned, and the iterator is reset to the limit
+     * of the text.
+     * @return the next UTF16 code unit, or DONE if the index is at the limit
+     *         of the text.  
+     */
+    public int next();
+
+    /**
+     * Returns the code point at index, and increments to the next code
+     * point (post-increment semantics).  If index does not point to a
+     * valid surrogate pair, the behavior is the same as
+     * <code>next()<code>.  Otherwise the iterator is incremented past
+     * the surrogate pair, and the code point represented by the pair
+     * is returned.
+     * @return the next codepoint in text, or DONE if the index is at
+     *         the limit of the text.  
+     */
+    public int nextCodePoint();
+
+}
--- a/icu4j/src/com/ibm/icu/impl/USerializedSet.java
+++ b/icu4j/src/com/ibm/icu/impl/USerializedSet.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/USerializedSet.java,v $ 
- * $Date: 2002/03/28 01:50:59 $ 
- * $Revision: 1.2 $
+ * $Date: 2002/06/20 01:18:09 $ 
+ * $Revision: 1.3 $
 *
 *****************************************************************************************
 */
@ -31,6 +31,8 @@ public final class USerializedSet {
        arrayOffset=bmpLength=length=0;

        length=src[srcStart++];
+        
+        
        if((length&0x8000) >0) {
            /* there are supplementary values */
            length&=0x7fff;
@ -47,8 +49,9 @@ public final class USerializedSet {
            }
            bmpLength=length;
        }
-        array=src;
-        arrayOffset=srcStart;
+        array = new char[length];
+        System.arraycopy(src,srcStart,array,0,length);
+        //arrayOffset=srcStart;
        return true;
    }

@ -83,9 +86,7 @@ public final class USerializedSet {
        if(rangeIndex<0) {
            return false;
        }
-		if(array==null){
-			array = new char[8];
-		}
+
 		range=new int[2];
 		
        rangeIndex*=2; /* address start/limit pairs */
@ -122,7 +123,7 @@ public final class USerializedSet {
 	    if( 0x10ffff<c) {
 	        return;
 	    }
-	
+
 	    if(c<0xffff) {
 	        bmpLength=length=2;
 	        array[0]=(char)c;
@ -157,7 +158,9 @@ public final class USerializedSet {
 	    if(array==null){
 			array = new char[8];
 		}
-	    range=new int[2];
+        if(range==null || range.length <2){
+            throw new IllegalArgumentException();
+        }
        rangeIndex*=2; /* address start/limit pairs */
 	    if(rangeIndex<bmpLength) {
 	        range[0]=array[rangeIndex++];
@ -168,6 +171,7 @@ public final class USerializedSet {
 	        } else {
 	            range[1]=0x110000;
 	        }
+            range[1]-=1;
 	        return true;
 	    } else {
 	        rangeIndex-=bmpLength;
@ -182,7 +186,8 @@ public final class USerializedSet {
 	            } else {
 	                range[1]=0x110000;
 	            }
-	            return false;
+                range[1]-=1;
+	            return true;
 	        } else {
 	            return false;
 	        }
@ -216,6 +221,6 @@ public final class USerializedSet {
 	    return (bmpLength+(length-bmpLength)/2+1)/2;
 	}

-    private char array[];
+    private char array[] = new char[8];
    private int arrayOffset, bmpLength, length;
 }
--- a/icu4j/src/com/ibm/icu/impl/UnicodeCharacterIterator.java
+++ b/icu4j/src/com/ibm/icu/impl/UnicodeCharacterIterator.java
@ -0,0 +1,339 @@
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2000, International Business Machines Corporation and    *
+ * others. All Rights Reserved.                                                *
+ *******************************************************************************
+ *
+ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/Attic/UnicodeCharacterIterator.java,v $ 
+ * $Date: 2002/06/20 01:18:09 $ 
+ * $Revision: 1.1 $
+ *
+ *******************************************************************************
+ */
+package com.ibm.icu.impl;
+
+import com.ibm.icu.text.Replaceable;
+import com.ibm.icu.text.ReplaceableString;
+import com.ibm.icu.text.UTF16;
+import java.text.CharacterIterator;
+
+/**
+ * Internal class that iterates through a com.ibm.text.Replacable text object 
+ * to return either Unicode characters.
+ * @author synwee
+ * @version release 2.1, February 2002
+ */
+public final class UnicodeCharacterIterator implements CharacterIterator
+{
+	// public data members -----------------------------------------------------
+	
+	/**
+	 * Indicator that we have reached the ends of the UTF16 text when returning
+	 * 16 bit character.
+	 */
+	public static final int DONE = 0xFFFF;
+	/**
+	 * Indicator that we have reached the ends of the UTF16 text when returning
+	 * codepoints.
+	 */
+	public static final int DONE_CODEPOINT = -1;
+	
+	// public constructor ------------------------------------------------------
+	
+	/**
+	 * Public constructor.
+	 * By default the iteration range will be from 0 to the end of the text.
+	 * @param replacable text which the iterator will be based on
+	 */
+	public UnicodeCharacterIterator(Replaceable replaceable)
+	{
+		m_replaceable_  = replaceable;
+		m_index_        = 0;
+		m_start_        = 0;
+		m_limit_        = replaceable.length();
+	}
+	
+	/**
+	 * Public constructor
+	 * By default the iteration range will be from 0 to the end of the text.
+	 * @param str text which the iterator will be based on
+	 */
+	public UnicodeCharacterIterator(String str)
+	{
+		m_replaceable_  = new ReplaceableString(str);
+		m_index_        = 0;
+		m_start_        = 0;
+		m_limit_        = m_replaceable_.length();
+	}
+	
+	/**
+     * Constructs an iterator over the given range of the given string.
+     * @param  text  text to be iterated over
+     * @param  start offset of the first character to iterate
+     * @param  limit offset of the character following the last character to
+     * 					iterate
+     */
+    public UnicodeCharacterIterator(String str, int start, int limit) 
+    {
+    	m_replaceable_  = new ReplaceableString(str);
+		m_start_        = start;
+		m_limit_        = limit;
+		m_index_        = m_start_;
+    }   
+    
+    /**
+     * Constructs an iterator over the given range of the given replaceable 
+     * string.
+     * @param  text  text to be iterated over
+     * @param  start offset of the first character to iterate
+     * @param  limit offset of the character following the last character to
+     * 					iterate
+     */
+    public UnicodeCharacterIterator(Replaceable replaceable, int start, int limit) 
+    {
+    	m_replaceable_  = replaceable;
+		m_start_        = start;
+		m_limit_        = limit;
+		m_index_        = m_start_;
+    }   
+	
+	// public methods ----------------------------------------------------------
+	
+	/**
+     * Creates a copy of this iterator.
+     * Cloning will not duplicate a new Replaceable object.
+     * @return copy of this iterator
+     */
+    public Object clone()
+    {
+        try {
+            return super.clone();
+        }
+        catch (CloneNotSupportedException e) {
+            throw new InternalError(
+            "Cloning by the super class java.text.CharacterIterator is not " +
+            "supported");
+        }
+    }
+    
+	/**
+     * Returns the current UTF16 character.
+     * @return current UTF16 character
+     */
+    public char current()
+    {
+        if (m_index_ >= m_start_ && m_index_ < m_limit_) {
+            return m_replaceable_.charAt(m_index_);
+        }
+        return DONE;
+    }
+    
+    /**
+     * Returns the current codepoint
+     * @return current codepoint
+     */
+    public int currentCodePoint()
+    {
+        if (m_index_ >= m_start_ && m_index_ < m_limit_) {
+            return m_replaceable_.char32At(m_index_);
+        }
+        return DONE_CODEPOINT;
+    }
+    
+    /**
+     * Gets the first UTF16 character in text.
+     * @return the first UTF16 in text.
+     */
+    public char first()
+    {
+        m_index_ = m_start_;
+        return current();
+    }
+    
+    /**
+     * Returns the start of the text to iterate.
+     * @return by default this method will return 0, unless a range for 
+     * iteration had been specified during construction.
+     */
+    public int getBeginIndex()
+    {
+        return m_start_;
+    }
+
+    /**
+     * Returns the limit offset of the text to iterate
+     * @return by default this method returns the length of the text, unless a 
+     * range for iteration had been specified during construction.
+     */
+    public int getEndIndex()
+    {
+        return m_limit_;
+    }
+    
+    /**
+     * Gets the current index in text.
+     * @return current index in text.
+     */
+    public int getIndex()
+    {
+        return m_index_;
+    }
+    
+    /**
+     * Gets the last UTF16 iterateable character from the text and shifts the 
+     * index to the end of the text accordingly.
+     * @return the last UTF16 iterateable character
+     */
+    public char last()
+    {
+        if (m_limit_ != m_start_) {
+            m_index_ = m_limit_ - 1;
+            return m_replaceable_.charAt(m_index_);
+        } 
+		m_index_ = m_limit_;
+        return DONE;
+    }
+    
+	/**
+     * Returns next UTF16 character and increments the iterator's index by 1. 
+	 * If the resulting index is greater or equal to the iteration limit, the 
+	 * index is reset to the text iteration limit and a value of DONE_CODEPOINT is 
+	 * returned. 
+	 * @return next UTF16 character in text or DONE if the new index is off the 
+	 *         end of the text iteration limit.
+     */
+    public char next()
+    {
+        if (m_index_ < m_limit_) {
+        	char result = m_replaceable_.charAt(m_index_);
+            m_index_ ++;
+            return result;
+        }
+        return DONE;
+    }
+
+	/**
+	 * Returns next codepoint after current index and increments the iterator's 
+	 * index by a number depending on the returned codepoint. 
+	 * This assumes the text is stored as 16-bit code units
+     * with surrogate pairs intermixed. If the index of a leading or trailing 
+     * code unit of a surrogate pair is given, return the code point after the 
+     * surrogate pair.
+	 * If the resulting index is greater or equal to the text iterateable limit,
+	 * the current index is reset to the text iterateable limit and a value of 
+	 * DONE_CODEPOINT is returned. 
+	 * @return next codepoint in text or DONE_CODEPOINT if the new index is off the 
+	 *         end of the text iterateable limit.
+	 */	
+	public int nextCodePoint()
+	{
+		if (m_index_ < m_limit_) {
+			char ch = m_replaceable_.charAt(m_index_);
+			m_index_ ++;
+			if (ch >= UTF16.LEAD_SURROGATE_MIN_VALUE &&
+			    ch <= UTF16.LEAD_SURROGATE_MAX_VALUE &&
+			    m_index_ < m_limit_) {
+			    char trail = m_replaceable_.charAt(m_index_);
+			    if (trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE &&
+			    	trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) {
+			    	m_index_ ++;
+			    	return UCharacterProperty.getRawSupplementary(ch, 
+			    	                                              trail);
+				}
+			}
+			return ch;
+        }
+        return DONE_CODEPOINT;
+	}
+
+    /**
+     * Returns previous UTF16 character and decrements the iterator's index by 
+     * 1. 
+	 * If the resulting index is less than the text iterateable limit, the 
+	 * index is reset to the start of the text iteration and a value of 
+	 * DONE_CODEPOINT is returned. 
+	 * @return next UTF16 character in text or DONE if the new index is off the 
+	 *         start of the text iteration range.
+     */
+    public char previous()
+    {
+        if (m_index_ > m_start_) {
+            m_index_ --;
+            return m_replaceable_.charAt(m_index_);
+        }
+        return DONE;
+    }
+    
+    /**
+     * Returns previous codepoint before current index and decrements the 
+     * iterator's index by a number depending on the returned codepoint. 
+	 * This assumes the text is stored as 16-bit code units
+     * with surrogate pairs intermixed. If the index of a leading or trailing 
+     * code unit of a surrogate pair is given, return the code point before the 
+     * surrogate pair.
+	 * If the resulting index is less than the text iterateable range, the 
+	 * current index is reset to the start of the range and a value of 
+	 * DONE_CODEPOINT is returned. 
+	 * @return previous codepoint in text or DONE_CODEPOINT if the new index is 
+	 *         off the start of the text iteration range.
+     */
+    public int previousCodePoint()
+    {
+        if (m_index_ > m_start_) {
+            m_index_ --;
+            char ch = m_replaceable_.charAt(m_index_);
+			if (ch >= UTF16.TRAIL_SURROGATE_MIN_VALUE &&
+			    ch <= UTF16.TRAIL_SURROGATE_MAX_VALUE &&
+			    m_index_ > m_start_) {
+			    char lead = m_replaceable_.charAt(m_index_);
+			    if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE &&
+			    	lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
+			    	m_index_ --;
+			    	return UCharacterProperty.getRawSupplementary(ch, 
+			    	                                              lead);
+				}
+			}
+   			return ch;
+        }
+        return DONE_CODEPOINT;
+    }
+
+	/**
+	 * <p>Sets the index to the specified index in the text and returns that 
+	 * single UTF16 character at index. 
+	 * This assumes the text is stored as 16-bit code units.</p>
+	 * @param index the index within the text. 
+	 * @exception IllegalArgumentException is thrown if an invalid index is 
+	 *            supplied. i.e. index is out of bounds.
+	 * @return the character at the specified index or DONE if the specified 
+	 *         index is equal to the limit of the text iteration range.
+	 */
+	public char setIndex(int index)
+	{
+		if (index < m_start_ || index > m_limit_) {
+			throw new IllegalArgumentException("Index index out of bounds");
+		}
+		m_index_ = index;
+		return current();
+	}
+	
+	// private data members ----------------------------------------------------
+	
+	/**
+	 * Replacable object
+	 */
+	private Replaceable m_replaceable_;
+	/**
+	 * Current index
+	 */
+	private int m_index_;
+	/**
+	 * Start offset of iterateable range, by default this is 0
+	 */
+	private int m_start_;
+	/**
+	 * Limit offset of iterateable range, by default this is the length of the
+	 * string
+	 */
+	private int m_limit_;
+}
--- a/icu4j/src/com/ibm/icu/impl/Utility.java
+++ b/icu4j/src/com/ibm/icu/impl/Utility.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/Utility.java,v $
- * $Date: 2002/02/25 22:43:57 $
- * $Revision: 1.23 $
+ * $Date: 2002/06/20 01:18:09 $
+ * $Revision: 1.24 $
 *
 *****************************************************************************************
 */
@ -91,6 +91,25 @@ public final class Utility {
        return true;
    }

+    /**
+     * Convenience utility to compare two Object[]s
+     * Ought to be in System.
+     * @param len the length to compare.
+     * The start indices and start+len must be valid.
+     */
+    public final static boolean arrayRegionMatches(char[] source, int sourceStart,
+                                            char[] target, int targetStart,
+                                            int len)
+    {
+        int sourceEnd = sourceStart + len;
+        int delta = targetStart - sourceStart;
+        for (int i = sourceStart; i < sourceEnd; i++) {
+            if (source[i]!=target[i + delta])
+            return false;
+        }
+        return true;
+    }
+    
    /**
     * Convenience utility to compare two int[]s.
     * @param len the length to compare.
--- a/icu4j/src/com/ibm/icu/impl/data/unorm.dat
+++ b/icu4j/src/com/ibm/icu/impl/data/unorm.dat
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:665f02a0fd842a47ca65ecf36c1d301ef5cae01990b68f05695cfc693a783406
-size 106300
+oid sha256:a5b2036d17d077b24f01e187e005a8cd3d84bfd9fea94c505eb24db9ca57492a
+size 108044
--- a/icu4j/src/com/ibm/icu/text/BOSCU.java
+++ b/icu4j/src/com/ibm/icu/text/BOSCU.java
@ -5,14 +5,14 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Attic/BOSCU.java,v $ 
-* $Date: 2002/05/14 16:48:48 $ 
-* $Revision: 1.1 $
+* $Date: 2002/06/20 01:21:18 $ 
+* $Revision: 1.2 $
 *
 *******************************************************************************
 */
 package com.ibm.icu.text;

-import com.ibm.icu.impl.UCharacterIterator;
+import com.ibm.icu.impl.UnicodeCharacterIterator;

 /**
 * <p>Binary Ordered Compression Scheme for Unicode</p>
@ -105,9 +105,9 @@ public class BOSCU
 																int offset) 
 	{
 	    int prev = 0;
-	    UCharacterIterator iterator = new UCharacterIterator(source);
+	    UnicodeCharacterIterator iterator = new UnicodeCharacterIterator(source);
 	    int codepoint = iterator.nextCodePoint();
-	    while (codepoint != UCharacterIterator.DONE_CODEPOINT) {
+	    while (codepoint != UnicodeCharacterIterator.DONE_CODEPOINT) {
 	        if (prev < 0x4e00 || prev >= 0xa000) {
 	            prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
 	        } 
@ -133,9 +133,9 @@ public class BOSCU
 	{
 	    int prev = 0;
 	    int result = 0;
-	    UCharacterIterator iterator = new UCharacterIterator(source);
+	    UnicodeCharacterIterator iterator = new UnicodeCharacterIterator(source);
 	    int codepoint = iterator.nextCodePoint();
-	    while (codepoint != UCharacterIterator.DONE_CODEPOINT) {
+	    while (codepoint != UnicodeCharacterIterator.DONE_CODEPOINT) {
 	        if (prev < 0x4e00 || prev >= 0xa000) {
 	            prev = (prev & ~0x7f) - SLOPE_REACH_NEG_1_;
 	        } 
--- a/icu4j/src/com/ibm/icu/text/CanonicalIterator.java
+++ b/icu4j/src/com/ibm/icu/text/CanonicalIterator.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/CanonicalIterator.java,v $ 
- * $Date: 2002/03/20 22:55:33 $ 
- * $Revision: 1.9 $
+ * $Date: 2002/06/20 01:21:18 $ 
+ * $Revision: 1.10 $
 *
 *****************************************************************************************
 */
@ -17,7 +17,8 @@ import com.ibm.icu.lang.*;
 import java.util.Enumeration;
 import java.util.Vector;
 import java.util.*;
-
+import com.ibm.icu.impl.NormalizerImpl;
+import com.ibm.icu.impl.USerializedSet;
 /**
 * This class allows one to iterate through all the strings that are canonically equivalent to a given
 * string. For example, here are some sample results:
@ -103,7 +104,7 @@ public class CanonicalIterator {
     * while changing the source string, saving object creation.
     */
    public void setSource(String newSource) {
-        source = Normalizer.normalize(newSource, Normalizer.DECOMP, 0);
+        source = Normalizer.normalize(newSource, Normalizer.NFD);
        done = false;
        
        // catch degenerate case
@ -122,9 +123,10 @@ public class CanonicalIterator {
 	    // i should be the end of the first code point
 	    
 	    int i = UTF16.findOffsetFromCodePoint(source, 1);
+        
        for (; i < source.length(); i += UTF16.getCharCount(i)) {
            cp = UTF16.charAt(source, i);
-            if (SAFE_START.contains(cp)) {
+            if (NormalizerImpl.isCanonSafeStart(cp)) {
                list.add(source.substring(start, i)); // add up to i
                start = i;
            }
@ -195,21 +197,21 @@ public class CanonicalIterator {
    /**
     *@return the set of "safe starts", characters that are class zero AND are never non-initial in a decomposition.
     *@internal
-     */
+     *
    public static UnicodeSet getSafeStart() {
        return (UnicodeSet) SAFE_START.clone();
    }
-    
+    */
    /**
     *@return the set of characters whose decompositions start with the given character
     *@internal
-     */
+     *
    public static UnicodeSet getStarts(int cp) {
        UnicodeSet result = AT_START.get(cp);
        if (result == null) result = EMPTY;
        return (UnicodeSet) result.clone();
    }
-    
+    */
    
    // ===================== PRIVATES ==============================
    
@ -253,7 +255,7 @@ public class CanonicalIterator {
 				String attempt = Normalizer.normalize(possible, Normalizer.DECOMP, 0);
 				if (attempt.equals(segment)) {
 */
-                if (Normalizer.isEquivalent(possible, segment, Normalizer.DECOMP, 0)) {
+                if (Normalizer.compare(possible, segment,0)==0) {
             	               	
            		if (PROGRESS) System.out.println("Adding Permutation: " + NAME.transliterate(possible));
                	result.add(possible);
@ -272,6 +274,54 @@ public class CanonicalIterator {
    
     
    private Set getEquivalents2(String segment) {
+        
+        Set result = new HashSet();
+        
+        if (PROGRESS) System.out.println("Adding: " + NAME.transliterate(segment));
+        
+        result.add(segment);
+        StringBuffer workingBuffer = new StringBuffer();
+        
+        // cycle through all the characters
+        int cp=0,end=0;
+	    int[] range = new int[2];
+        for (int i = 0; i < segment.length(); i += UTF16.getCharCount(cp)) {
+            
+	        // see if any character is at the start of some decomposition
+	        cp = UTF16.charAt(segment, i);;
+	        USerializedSet starts = new USerializedSet();
+           
+            if (!NormalizerImpl.getCanonStartSet(cp, starts)) {
+	          continue;
+	        }
+	        int j=0;
+            // if so, see which decompositions match 
+	        for(j = 0, cp = end+1; cp <= end ||starts.getSerializedRange(j++, range); ++cp) {
+                if(cp>end){
+                    cp=range[0];
+                    end=range[1];
+                }
+                
+	            Set remainder = extract(cp, segment, i,workingBuffer);
+	            if (remainder == null) continue;
+	
+	            // there were some matches, so add all the possibilities to the set.
+	            String prefix= segment.substring(0,i);
+	            prefix += UTF16.valueOf(cp);
+	            int el = -1;
+	            Iterator iter = remainder.iterator();
+	            while (iter.hasNext()) {
+	                String item = (String) iter.next();
+	                String toAdd = new String(prefix);
+	                toAdd += item;
+	                result.add(toAdd);		
+	                //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(*toAdd)));
+	            }
+
+            }
+	    }
+	    return result;
+        /*
        Set result = new HashSet();
        if (PROGRESS) System.out.println("Adding: " + NAME.transliterate(segment));
        result.add(segment);
@ -283,6 +333,7 @@ public class CanonicalIterator {
        for (int i = 0; i < segment.length(); i += UTF16.getCharCount(cp)) {
            // see if any character is at the start of some decomposition
            cp = UTF16.charAt(segment, i);
+            NormalizerImpl.getCanonStartSet(c,fillSet)
            UnicodeSet starts = AT_START.get(cp);
            if (starts == null) continue;
            UnicodeSetIterator usi = new UnicodeSetIterator(starts);
@ -305,6 +356,7 @@ public class CanonicalIterator {
            }
        }
        return result;
+        */
    }
    
    /**
@ -317,7 +369,7 @@ public class CanonicalIterator {
            + ", " + NAME.transliterate(segment.substring(segmentPos)));
            
        //String decomp = Normalizer.normalize(UTF16.valueOf(comp), Normalizer.DECOMP, 0);
-        String decomp = Normalizer.normalize(comp, Normalizer.DECOMP, 0);
+        String decomp = Normalizer.normalize(comp, Normalizer.NFD);
        
        // See if it matches the start of segment (at segmentPos)
        boolean ok = false;
@ -369,7 +421,7 @@ public class CanonicalIterator {
        if (!segment.regionMatches(segmentPos, trial, 0, segment.length() - segmentPos)) return null;
        */
        
-        if (!Normalizer.isEquivalent(UTF16.valueOf(comp) + remainder, segment.substring(segmentPos), Normalizer.DECOMP, 0)) return null;
+        if (0!=Normalizer.compare(UTF16.valueOf(comp) + remainder, segment.substring(segmentPos), 0)) return null;
        
        // get the remaining combinations
        return getEquivalents2(remainder);
@ -392,16 +444,18 @@ public class CanonicalIterator {
        SET_WITH_NULL_STRING.add("");
    }
    
-    private static UnicodeSet SAFE_START = new UnicodeSet();
-    private static CharMap AT_START = new CharMap();
+  //  private static UnicodeSet SAFE_START = new UnicodeSet();
+  //  private static CharMap AT_START = new CharMap();
    
        // TODO: WARNING, NORMALIZER doesn't have supplementaries yet !!;
        // Change FFFF to 10FFFF in C, and in Java when normalizer is upgraded.
-    private static int LAST_UNICODE = 0x10FFFF;
+  //  private static int LAST_UNICODE = 0x10FFFF;
+    /*
    static {
        buildData();
    }
-    
+    */
+    /*
    private static void buildData() {

        if (PROGRESS) System.out.println("Getting Safe Start");
@ -417,10 +471,10 @@ public class CanonicalIterator {
        for (int cp = 0; cp <= LAST_UNICODE; ++cp) {
            if (PROGRESS & (cp & 0x7FF) == 0) System.out.print('.');
            
-            if (Normalizer.isNormalized(cp, Normalizer.DECOMP, 0)) continue;
+            if (Normalizer.isNormalized(cp, Normalizer.NFD)) continue;

            //String istr = UTF16.valueOf(cp);
-            String decomp = Normalizer.normalize(cp, Normalizer.DECOMP, 0);
+            String decomp = Normalizer.normalize(cp, Normalizer.NFD);
            //if (decomp.equals(istr)) continue;
            
            // add each character in the decomposition to canBeIn 
@ -437,7 +491,7 @@ public class CanonicalIterator {
        }
        if (PROGRESS) System.out.println();
    }
-    
+    */
    // the following is just for a map from characters to a set of characters
    
    private static class CharMap {
--- a/icu4j/src/com/ibm/icu/text/ComposedCharIter.java
+++ b/icu4j/src/com/ibm/icu/text/ComposedCharIter.java
@ -5,12 +5,14 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/ComposedCharIter.java,v $ 
- * $Date: 2002/02/16 03:06:05 $ 
- * $Revision: 1.3 $
+ * $Date: 2002/06/20 01:21:18 $ 
+ * $Revision: 1.4 $
 *
 *****************************************************************************************
 */
 package com.ibm.icu.text;
+import com.ibm.icu.impl.NormalizerImpl;
+import com.ibm.icu.impl.Utility;

 /**
 * <tt>ComposedCharIter</tt> is an iterator class that returns all
@ -51,6 +53,7 @@ package com.ibm.icu.text;
 * <tt>ComposedCharIter</tt> is currently based on version 2.1.8 of the
 * <a href="http://www.unicode.org" target="unicode">Unicode Standard</a>.
 * It will be updated as later versions of Unicode are released.
+ * @deprecated
 */
 public final class ComposedCharIter {
    
@ -59,7 +62,7 @@ public final class ComposedCharIter {
     * {@link #next} returns this value when there are no more composed characters
     * over which to iterate.
     */
-    public static final char DONE = Normalizer.DONE;
+    public static final  char DONE = (char) Normalizer.DONE;
    
    /**
     * Construct a new <tt>ComposedCharIter</tt>.  The iterator will return
@ -67,8 +70,8 @@ public final class ComposedCharIter {
     * Hangul characters.
     */
    public ComposedCharIter() {
-        minDecomp = DecompData.MAX_COMPAT;
-        hangul = false;
+        compat = false;
+        options =0;
    }
    
    
@ -86,10 +89,8 @@ public final class ComposedCharIter {
     *                  Jamo decompositions.
     */
    public ComposedCharIter(boolean compat, int options) {
-        // Compatibility explosions have lower indices; skip them if necessary
-        minDecomp = compat ? 0 : DecompData.MAX_COMPAT;
-        
-        hangul = (options & Normalizer.IGNORE_HANGUL) == 0;
+        this.compat = compat;
+        this.options = options;
    }
    
    /**
@ -97,10 +98,10 @@ public final class ComposedCharIter {
     * by {@link #next}.
     */
    public boolean hasNext() {
-        if (nextChar == DONE)  {
+        if (nextChar == Normalizer.DONE)  {
            findNextChar();
        }
-        return nextChar != DONE;
+        return nextChar != Normalizer.DONE;
    }
    
    /**
@ -111,12 +112,12 @@ public final class ComposedCharIter {
     * to <tt>next</tt> will return {@link #DONE}.
     */
    public char next() {
-        if (nextChar == DONE)  {
+        if (nextChar == Normalizer.DONE)  {
            findNextChar();
        }
        curChar = nextChar;
-        nextChar = DONE;
-        return curChar;
+        nextChar = Normalizer.DONE;
+        return (char) curChar;
    }
    
    /**
@ -126,42 +127,38 @@ public final class ComposedCharIter {
     * affected by the settings of the options passed to the constructor.
     */
    public String decomposition() {
-        StringBuffer result = new StringBuffer();
-        
-        int pos = (char)(DecompData.offsets.elementAt(curChar) & DecompData.DECOMP_MASK);
-        
-        if (pos > minDecomp) {
-            Normalizer.doAppend(DecompData.contents, pos, result);
-            
-            
-        } else if (hangul && curChar >= HANGUL_BASE && curChar < HANGUL_LIMIT) {
-            Normalizer.hangulToJamo(curChar, result, minDecomp);
-        } else {
-            result.append(curChar);
-        }
-        return result.toString();
+        // the decomposition buffer contains the decomposition of 
+        // current char so just return it
+        return new String(decompBuf,0, bufLen);
    }
    
    private void findNextChar() {
-        if (curChar != DONE) {
-            char ch = curChar;
-            while (++ch < 0xFFFF) {
-                int offset = DecompData.offsets.elementAt(ch) & DecompData.DECOMP_MASK;
-                if (offset > minDecomp
-                    || (hangul && ch >= HANGUL_BASE && ch < HANGUL_LIMIT) ) {
-                    nextChar = ch;
+        int c=curChar+1;
+        for(;;){
+           if(c < 0xFFFF){
+	           bufLen = NormalizerImpl.getDecomposition(c,compat,
+                                                        decompBuf,0,
+	                                                    decompBuf.length);
+	           if(bufLen>0){
+                    // the curChar can be decomposed... so it is a composed char
+	                // cache the result     
                    break;
-                }
-            }
+	           }
+	           c++;
+           }else{
+	           c=Normalizer.DONE;
+	           break;
+           }
        }
+        nextChar=c;  
    }
    
-    private final int minDecomp;
-    private final boolean hangul;
+    private int options;
+    private boolean compat;
+    private char[] decompBuf = new char[100];
+    private int bufLen=0;
+    private int curChar = 0;
+    private int nextChar = Normalizer.DONE;
    
-    private char curChar = 0;
-    private char nextChar = Normalizer.DONE;
-    
-    private static final char HANGUL_BASE = Normalizer.HANGUL_BASE;
-    private static final char HANGUL_LIMIT = Normalizer.HANGUL_LIMIT;
+
 };
--- a/icu4j/src/com/ibm/icu/text/LowercaseTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/LowercaseTransliterator.java
@ -5,15 +5,15 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/LowercaseTransliterator.java,v $ 
- * $Date: 2002/04/03 00:00:00 $ 
- * $Revision: 1.10 $
+ * $Date: 2002/06/20 01:21:18 $ 
+ * $Revision: 1.11 $
 *
 *****************************************************************************************
 */
 package com.ibm.icu.text;
 import java.util.*;
 import com.ibm.icu.impl.UCharacterProperty;
-import com.ibm.icu.impl.UCharacterIterator;
+import com.ibm.icu.impl.UnicodeCharacterIterator;

 /**
 * A transliterator that performs locale-sensitive toLower()
@ -63,7 +63,7 @@ class LowercaseTransliterator extends Transliterator{
        // get string for context
        // TODO: add convenience method to do this, since we do it all over
        
-        UCharacterIterator original = new UCharacterIterator(text);
+        UnicodeCharacterIterator original = new UnicodeCharacterIterator(text);
        
        // Walk through original string
        // If there is a case change, modify corresponding position in replaceable
--- a/icu4j/src/com/ibm/icu/text/NormalizationTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/NormalizationTransliterator.java
@ -14,7 +14,7 @@ import com.ibm.icu.lang.*;

 /**
 * @author Alan Liu
- * @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.17 $ $Date: 2002/02/25 22:43:58 $
+ * @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.18 $ $Date: 2002/06/20 01:21:18 $
 */
 final class NormalizationTransliterator extends Transliterator {
    
@ -57,25 +57,25 @@ final class NormalizationTransliterator extends Transliterator {
        Transliterator.registerFactory("Any-NFC", new Transliterator.Factory() {
            public Transliterator getInstance(String ID) {
                return NormalizationTransliterator.
-                    getInstance(Normalizer.COMPOSE);
+                    getInstance(Normalizer.NFC);
            }
        });
        Transliterator.registerFactory("Any-NFD", new Transliterator.Factory() {
            public Transliterator getInstance(String ID) {
                return NormalizationTransliterator.
-                    getInstance(Normalizer.DECOMP);
+                    getInstance(Normalizer.NFD);
            }
        });
        Transliterator.registerFactory("Any-NFKC", new Transliterator.Factory() {
            public Transliterator getInstance(String ID) {
                return NormalizationTransliterator.
-                    getInstance(Normalizer.COMPOSE_COMPAT);
+                    getInstance(Normalizer.NFKC);
            }
        });
        Transliterator.registerFactory("Any-NFKD", new Transliterator.Factory() {
            public Transliterator getInstance(String ID) {
                return NormalizationTransliterator.
-                    getInstance(Normalizer.DECOMP_COMPAT);
+                    getInstance(Normalizer.NFKD);
            }
        });
        Transliterator.registerSpecialInverse("NFC", "NFD", true);
@ -89,7 +89,21 @@ final class NormalizationTransliterator extends Transliterator {
                                                          int opt) {
        StringBuffer id = new StringBuffer("NF");
        int choice = 0;
-        if (m.compat()) {
+        if(m==Normalizer.NFC){
+            id.append("C");
+            choice |= C;
+        }else if(m==Normalizer.NFKC){
+            id.append("KC");
+            choice |= KC;
+        }else if(m==Normalizer.NFD){
+            id.append("D");
+            choice |= D;
+        }else if(m==Normalizer.NFKD){
+            id.append("KD");
+            choice |= KD;
+        }
+        
+        /*if (m.compat()) {
            id.append('K');
            choice |= KD;
        }
@ -98,7 +112,7 @@ final class NormalizationTransliterator extends Transliterator {
            choice |= C;
        } else {
            id.append('D');
-        }
+        }*/
        return new NormalizationTransliterator(id.toString(), m, choice, opt);
    }

@ -185,7 +199,7 @@ final class NormalizationTransliterator extends Transliterator {
        }
        text.getChars(lastSafe, limit, buffer, 0);
        String input = new String(buffer, 0, len); // TODO: fix normalizer to take char[]
-        String output = Normalizer.normalize(input, mode, options);
+        String output = Normalizer.normalize(input, mode);
        
        // verify OK, if specified
        if (verify != null) {
--- a/icu4j/src/com/ibm/icu/text/Normalizer.java
+++ b/icu4j/src/com/ibm/icu/text/Normalizer.java
--- a/icu4j/src/com/ibm/icu/text/TitlecaseTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/TitlecaseTransliterator.java
@ -3,13 +3,13 @@
 * others. All Rights Reserved.
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TitlecaseTransliterator.java,v $ 
- * $Date: 2002/04/02 23:59:59 $ 
- * $Revision: 1.15 $
+ * $Date: 2002/06/20 01:21:18 $ 
+ * $Revision: 1.16 $
 */
 package com.ibm.icu.text;
 import java.util.*;
 import com.ibm.icu.impl.UCharacterProperty;
-import com.ibm.icu.impl.UCharacterIterator;
+import com.ibm.icu.impl.UnicodeCharacterIterator;

 /**
 * A transliterator that converts all letters (as defined by
@ -92,7 +92,7 @@ class TitlecaseTransliterator extends Transliterator {
        // get string for context
        // TODO: add convenience method to do this, since we do it all over
        
-        UCharacterIterator original = new UCharacterIterator(text);
+        UnicodeCharacterIterator original = new UnicodeCharacterIterator(text);
        
        // Walk through original string
        // If there is a case change, modify corresponding position in replaceable
--- a/icu4j/src/com/ibm/icu/text/TransliteratorParser.java
+++ b/icu4j/src/com/ibm/icu/text/TransliteratorParser.java
@ -4,8 +4,8 @@
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliteratorParser.java,v $
-* $Date: 2002/04/17 16:46:11 $
-* $Revision: 1.21 $
+* $Date: 2002/06/20 01:21:18 $
+* $Revision: 1.22 $
 **********************************************************************
 */
 package com.ibm.icu.text;
@ -1334,13 +1334,13 @@ class TransliteratorParser {

        p = Utility.parsePattern(rule, pos, limit, "~nfd rules~;", null);
        if (p >= 0) {
-            pragmaNormalizeRules(Normalizer.DECOMP);
+            pragmaNormalizeRules(Normalizer.NFD);
            return p;
        }

        p = Utility.parsePattern(rule, pos, limit, "~nfc rules~;", null);
        if (p >= 0) {
-            pragmaNormalizeRules(Normalizer.COMPOSE);
+            pragmaNormalizeRules(Normalizer.NFC);
            return p;
        }

--- a/icu4j/src/com/ibm/icu/text/TransliteratorUtility.java
+++ b/icu4j/src/com/ibm/icu/text/TransliteratorUtility.java
@ -32,17 +32,17 @@ public class TransliteratorUtility {
            // transliterators.
            for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) {
                String ID = (String) e.nextElement();
-                showSourceSet(ID, Normalizer.NO_OP, false);
+                showSourceSet(ID, Normalizer.NONE, false);
            }
        } else {
            // Usage: ID [NFKD | NFD] [lower]
-            Normalizer.Mode m = Normalizer.NO_OP;
+            Normalizer.Mode m = Normalizer.NONE;
            boolean lowerFirst = false;
            if (args.length >= 2) {
                if (args[1].equalsIgnoreCase("NFD")) {
-                    m = Normalizer.DECOMP;
+                    m = Normalizer.NFD;
                } else if (args[1].equalsIgnoreCase("NFKD")) {
-                    m = Normalizer.DECOMP_COMPAT;
+                    m = Normalizer.NFKD;
                } else {
                    usage();
                }
@ -87,7 +87,7 @@ public class TransliteratorUtility {
    
    static void showSourceSetAux(Transliterator t, Normalizer.Mode m, boolean lowerFirst, boolean forward) throws IOException {
        UnicodeSet sourceSet = t.getSourceSet();
-        if (m != Normalizer.NO_OP || lowerFirst) {
+        if (m != Normalizer.NONE || lowerFirst) {
            UnicodeSetClosure.close(sourceSet, m, lowerFirst);
        }
        System.out.println(t.getID() + ": " +
--- a/icu4j/src/com/ibm/icu/text/UTF16.java
+++ b/icu4j/src/com/ibm/icu/text/UTF16.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UTF16.java,v $ 
-* $Date: 2002/05/14 23:45:46 $ 
-* $Revision: 1.20 $
+* $Date: 2002/06/20 01:21:18 $ 
+* $Revision: 1.21 $
 *
 *******************************************************************************
 */
@ -14,6 +14,7 @@
 package com.ibm.icu.text;

 import com.ibm.icu.impl.UCharacterProperty;
+import com.ibm.icu.impl.NormalizerImpl;
 /**
 * Standalone utility class providing UTF16 character conversions and indexing 
 * conversions.
@ -2213,6 +2214,35 @@ public final class UTF16
 	                
 	        return 0;
        }
+        
+        public int caseCompare(Object a, Object b, int options){
+            if (a == b) {
+                return 0;
+            }
+            if (a == null) {
+                return -1;
+            }
+            if (b == null) {
+                return 1;
+            }
+            String sa = (String) a;
+	        String sb = (String) b;
+            int la = sa.length();
+            int lb = sb.length();
+            if( sa != sb ){
+                int result = NormalizerImpl.cmpEquivFold(sa,sb,
+                                         options|Normalizer.COMPARE_IGNORE_CASE);
+                if(result!=0) {
+                  return (int)((byte)(result >> 24 | 1));
+                }
+
+            }else{
+                if(la != lb){
+                    return (int)((byte)((la-lb) >> 24 | 1));
+                }
+            }
+            return 0;
+        } 
    }
    
    // private data members -------------------------------------------------
--- a/icu4j/src/com/ibm/icu/text/UppercaseTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/UppercaseTransliterator.java
@ -5,15 +5,15 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UppercaseTransliterator.java,v $ 
- * $Date: 2002/04/02 23:59:59 $ 
- * $Revision: 1.9 $
+ * $Date: 2002/06/20 01:21:18 $ 
+ * $Revision: 1.10 $
 *
 *****************************************************************************************
 */
 package com.ibm.icu.text;
 import java.util.*;
 import com.ibm.icu.impl.UCharacterProperty;
-import com.ibm.icu.impl.UCharacterIterator;
+import com.ibm.icu.impl.UnicodeCharacterIterator;

 /**
 * A transliterator that performs locale-sensitive toUpper()
@ -59,7 +59,7 @@ class UppercaseTransliterator extends Transliterator {
        // get string for context
        // TODO: add convenience method to do this, since we do it all over
        
-        UCharacterIterator original = new UCharacterIterator(text);
+        UnicodeCharacterIterator original = new UnicodeCharacterIterator(text);
        
        // Walk through original string
        // If there is a case change, modify corresponding position in replaceable