ICU-1373 more fixes to support supplementals

X-SVN-Rev: 7285
2025-04-08 06:53:45 +00:00 · 2001-12-03 21:33:59 +00:00 · 2001-12-03 21:33:59 +00:00 · c7903f1367
commit c7903f1367
parent da0fef51a8
12 changed files with 334 additions and 302 deletions
--- a/icu4j/src/com/ibm/icu/text/NameUnicodeTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/NameUnicodeTransliterator.java
@ -3,8 +3,8 @@
 * others. All Rights Reserved.
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/NameUnicodeTransliterator.java,v $ 
- * $Date: 2001/11/21 20:56:50 $ 
- * $Revision: 1.5 $
+ * $Date: 2001/12/03 21:33:58 $ 
+ * $Revision: 1.6 $
 */
 package com.ibm.text;
 import java.util.*;
@ -73,9 +73,10 @@ class NameUnicodeTransliterator extends Transliterator {
        int mode = 0;
        int ibuf = 0;
        int openPos = offsets.start; // position of openDelimiter
-
-        for (; cursor < limit; ++cursor) {
-            char c = text.charAt(cursor);
+        
+        int c;
+        for (; cursor < limit; cursor+=UTF16.getCharCount(c)) {
+            c = UTF16.charAt(text, cursor);

            switch (mode) {
            case 0: // looking for open delimiter
--- a/icu4j/src/com/ibm/icu/text/StringMatcher.java
+++ b/icu4j/src/com/ibm/icu/text/StringMatcher.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/StringMatcher.java,v $ 
- * $Date: 2001/11/29 22:31:18 $ 
- * $Revision: 1.4 $
+ * $Date: 2001/12/03 21:33:58 $ 
+ * $Revision: 1.5 $
 *
 *****************************************************************************************
 */
@ -48,16 +48,20 @@ class StringMatcher implements UnicodeMatcher {
                       int[] offset,
                       int limit,
                       boolean incremental) {
+        // Note (1): We process text in 16-bit code units, rather than
+        // 32-bit code points.  This works because stand-ins are
+        // always in the BMP and because we are doing a literal match
+        // operation, which can be done 16-bits at a time.
        int i;
        int[] cursor = new int[] { offset[0] };
        if (limit < cursor[0]) {
            // Match in the reverse direction
            for (i=pattern.length()-1; i>=0; --i) {
-                char keyChar = pattern.charAt(i);
+                char keyChar = pattern.charAt(i); // OK; see note (1) above
                UnicodeMatcher subm = data.lookup(keyChar);
                if (subm == null) {
                    if (cursor[0] >= limit &&
-                        keyChar == text.charAt(cursor[0])) {
+                        keyChar == text.charAt(cursor[0])) { // OK; see note (1) above
                        --cursor[0];
                    } else {
                        return U_MISMATCH;
@ -84,14 +88,14 @@ class StringMatcher implements UnicodeMatcher {
                    // without completing our match.
                    return U_PARTIAL_MATCH;
                }
-                char keyChar = pattern.charAt(i);
+                char keyChar = pattern.charAt(i); // OK; see note (1) above
                UnicodeMatcher subm = data.lookup(keyChar);
                if (subm == null) {
                    // Don't need the cursor < limit check if
                    // incremental is true (because it's done above); do need
                    // it otherwise.
                    if (cursor[0] < limit &&
-                        keyChar == text.charAt(cursor[0])) {
+                        keyChar == text.charAt(cursor[0])) { // OK; see note (1) above
                        ++cursor[0];
                    } else {
                        return U_MISMATCH;
@ -123,7 +127,7 @@ class StringMatcher implements UnicodeMatcher {
            result.append('(');
        }
        for (int i=0; i<pattern.length(); ++i) {
-            char keyChar = pattern.charAt(i);
+            char keyChar = pattern.charAt(i); // OK; see note (1) above
            UnicodeMatcher m = data.lookup(keyChar);
            if (m == null) {
                TransliterationRule.appendToRule(result, keyChar, false, escapeUnprintable, quoteBuf);
--- a/icu4j/src/com/ibm/icu/text/TransformTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/TransformTransliterator.java
@ -3,124 +3,128 @@
 * others. All Rights Reserved.
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransformTransliterator.java,v $ 
- * $Date: 2001/11/17 20:45:35 $ 
- * $Revision: 1.3 $
+ * $Date: 2001/12/03 21:33:58 $ 
+ * $Revision: 1.4 $
 */
 package com.ibm.text;
 import java.util.*;

-/**
- * An abstract class for transliterators based on a transform
- * operation.  To create a transliterator that implements a
- * transformation, create a subclass of this class and implement the
- * abstract <code>transform()</code> and <code>hasTransform()</code>
- * methods.
- * @author Alan Liu
- */
-abstract class TransformTransliterator extends Transliterator {
-
-    /**
-     * Constructs a transliterator.  For use by subclasses.
-     */
-    protected TransformTransliterator(String id, UnicodeFilter f) {
-        super(id, f);
-    }
-
-    /**
-     * Implements {@link Transliterator#handleTransliterate}.
-     */
-    protected void handleTransliterate(Replaceable text,
-                                       Position offsets, boolean incremental) {
-
-        int start;
-        for (start = offsets.start; start < offsets.limit; ++start) {
-            // Scan for the first character that is != its transform.
-            // If there are none, we fall out without doing anything.
-            char c = text.charAt(start);
-            if (hasTransform(c)) {
-                // There is a transforming character at start.  Break
-                // up the remaining string, from start to
-                // offsets.limit, into segments of unfiltered and
-                // filtered characters.  Only transform the unfiltered
-                // characters.  As always, minimize the number of
-                // calls to Replaceable.replace().
-
-                int len = offsets.limit - start;
-                // assert(len >= 1);
-                
-                char[] buf = new char[len];
-                text.getChars(start, offsets.limit, buf, 0);
-
-                int segStart = 0;
-                int segLimit;
-                UnicodeFilter filt = getFilter();
-
-                // lenDelta is the accumulated length difference for
-                // all transformed segments.  It is new length - old
-                // length.
-                int lenDelta = 0;
-
-                // Set segStart, segLimit to the unfiltered segment
-                // starting with start.  If the filter is null, then
-                // segStart/Limit will be set to the whole string,
-                // that is, 0/len.
-                do {
-                    // Set segLimit to the first filtered char at or
-                    // after segStart.
-                    segLimit = len;
-                    if (filt != null) {
-                        segLimit = segStart;
-                        while (segLimit < len && filt.contains(buf[segLimit])) {
-                             ++segLimit;
-                        }
-                    }
-
-                    // Transform the unfiltered chars between segStart
-                    // and segLimit.
-                    int segLen = segLimit - segStart;
-                    if (segLen != 0) {
-                        String newStr = transform(
-                            new String(buf, segStart, segLen));
-                        text.replace(start, start + segLen, newStr);
-                        start += newStr.length();
-                        lenDelta += newStr.length() - segLen;
-                    }
-
-                    // Set segStart to the first unfiltered char at or
-                    // after segLimit.
-                    segStart = segLimit;
-                    if (filt != null) {
-                        while (segStart < len && !filt.contains(buf[segStart])) {
-                            ++segStart;
-                        }
-                    }
-                    start += segStart - segLimit;
-
-                } while (segStart < len);
-                
-                offsets.limit += lenDelta;
-                offsets.contextLimit += lenDelta;
-                offsets.start = offsets.limit;
-                return;
-            }
-        }
-        // assert(start == offsets.limit);
-        offsets.start = start;
-    }
-
-    /**
-     * Subclasses must implement this method to determine whether a
-     * given character has a transform that is not equal to itself.
-     * This is approximately equivalent to <code>c !=
-     * transform(String.valueOf(c))</code>, where
-     * <code>String.valueOf(c)</code> returns a String containing the
-     * single character (not integer) <code>c</code>.  Subclasses that
-     * transform all their input can simply return <code>true</code>.
-     */
-    protected abstract boolean hasTransform(int c);
-
-    /**
-     * Subclasses must implement this method to transform a string.
-     */
-    protected abstract String transform(String s);
+abstract class TransformTransliterator {
+    // Currently unused
 }
+
+///**
+// * An abstract class for transliterators based on a transform
+// * operation.  To create a transliterator that implements a
+// * transformation, create a subclass of this class and implement the
+// * abstract <code>transform()</code> and <code>hasTransform()</code>
+// * methods.
+// * @author Alan Liu
+// */
+//abstract class TransformTransliterator extends Transliterator {
+//
+//    /**
+//     * Constructs a transliterator.  For use by subclasses.
+//     */
+//    protected TransformTransliterator(String id, UnicodeFilter f) {
+//        super(id, f);
+//    }
+//
+//    /**
+//     * Implements {@link Transliterator#handleTransliterate}.
+//     */
+//    protected void handleTransliterate(Replaceable text,
+//                                       Position offsets, boolean incremental) {
+//
+//        int start;
+//        for (start = offsets.start; start < offsets.limit; ++start) {
+//            // Scan for the first character that is != its transform.
+//            // If there are none, we fall out without doing anything.
+//            char c = text.charAt(start);
+//            if (hasTransform(c)) {
+//                // There is a transforming character at start.  Break
+//                // up the remaining string, from start to
+//                // offsets.limit, into segments of unfiltered and
+//                // filtered characters.  Only transform the unfiltered
+//                // characters.  As always, minimize the number of
+//                // calls to Replaceable.replace().
+//
+//                int len = offsets.limit - start;
+//                // assert(len >= 1);
+//                
+//                char[] buf = new char[len];
+//                text.getChars(start, offsets.limit, buf, 0);
+//
+//                int segStart = 0;
+//                int segLimit;
+//                UnicodeFilter filt = getFilter();
+//
+//                // lenDelta is the accumulated length difference for
+//                // all transformed segments.  It is new length - old
+//                // length.
+//                int lenDelta = 0;
+//
+//                // Set segStart, segLimit to the unfiltered segment
+//                // starting with start.  If the filter is null, then
+//                // segStart/Limit will be set to the whole string,
+//                // that is, 0/len.
+//                do {
+//                    // Set segLimit to the first filtered char at or
+//                    // after segStart.
+//                    segLimit = len;
+//                    if (filt != null) {
+//                        segLimit = segStart;
+//                        while (segLimit < len && filt.contains(buf[segLimit])) {
+//                             ++segLimit;
+//                        }
+//                    }
+//
+//                    // Transform the unfiltered chars between segStart
+//                    // and segLimit.
+//                    int segLen = segLimit - segStart;
+//                    if (segLen != 0) {
+//                        String newStr = transform(
+//                            new String(buf, segStart, segLen));
+//                        text.replace(start, start + segLen, newStr);
+//                        start += newStr.length();
+//                        lenDelta += newStr.length() - segLen;
+//                    }
+//
+//                    // Set segStart to the first unfiltered char at or
+//                    // after segLimit.
+//                    segStart = segLimit;
+//                    if (filt != null) {
+//                        while (segStart < len && !filt.contains(buf[segStart])) {
+//                            ++segStart;
+//                        }
+//                    }
+//                    start += segStart - segLimit;
+//
+//                } while (segStart < len);
+//                
+//                offsets.limit += lenDelta;
+//                offsets.contextLimit += lenDelta;
+//                offsets.start = offsets.limit;
+//                return;
+//            }
+//        }
+//        // assert(start == offsets.limit);
+//        offsets.start = start;
+//    }
+//
+//    /**
+//     * Subclasses must implement this method to determine whether a
+//     * given character has a transform that is not equal to itself.
+//     * This is approximately equivalent to <code>c !=
+//     * transform(String.valueOf(c))</code>, where
+//     * <code>String.valueOf(c)</code> returns a String containing the
+//     * single character (not integer) <code>c</code>.  Subclasses that
+//     * transform all their input can simply return <code>true</code>.
+//     */
+//    protected abstract boolean hasTransform(int c);
+//
+//    /**
+//     * Subclasses must implement this method to transform a string.
+//     */
+//    protected abstract String transform(String s);
+//}
--- a/icu4j/src/com/ibm/icu/text/TransliterationRule.java
+++ b/icu4j/src/com/ibm/icu/text/TransliterationRule.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliterationRule.java,v $
- * $Date: 2001/11/30 22:27:29 $
- * $Revision: 1.38 $
+ * $Date: 2001/12/03 21:33:58 $
+ * $Revision: 1.39 $
 *
 *****************************************************************************************
 */
@ -46,7 +46,7 @@ import com.ibm.util.Utility;
 * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
 *
 * @author Alan Liu
- * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.38 $ $Date: 2001/11/30 22:27:29 $
+ * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.39 $ $Date: 2001/12/03 21:33:58 $
 */
 class TransliterationRule {

@ -396,12 +396,17 @@ class TransliterationRule {
        // Backup oText by one
        oText = posBefore(text, pos.start);

+        // Note (1): We process text in 16-bit code units, rather than
+        // 32-bit code points.  This works because stand-ins are
+        // always in the BMP and because we are doing a literal match
+        // operation, which can be done 16-bits at a time.
+
        for (oPattern=anteContextLength-1; oPattern>=0; --oPattern) {
-            char keyChar = pattern.charAt(oPattern);
+            char keyChar = pattern.charAt(oPattern); // See note (1)
            UnicodeMatcher matcher = data.lookup(keyChar);
            if (matcher == null) {
                if (oText >= pos.contextStart &&
-                    keyChar == text.charAt(oText)) {
+                    keyChar == text.charAt(oText)) { // See note (1)
                    --oText;
                } else {
                    return UnicodeMatcher.U_MISMATCH;
@ -457,14 +462,14 @@ class TransliterationRule {
            // can match up to pos.contextLimit.
            int matchLimit = (oPattern < keyLength) ? pos.limit : pos.contextLimit;

-            char keyChar = pattern.charAt(anteContextLength + oPattern++);
+            char keyChar = pattern.charAt(anteContextLength + oPattern++); // See note (1)
            UnicodeMatcher matcher = data.lookup(keyChar);
            if (matcher == null) {
                // Don't need the oText < pos.contextLimit check if
                // incremental is TRUE (because it's done above); do need
                // it otherwise.
                if (oText < matchLimit &&
-                    keyChar == text.charAt(oText)) {
+                    keyChar == text.charAt(oText)) { // See note (1)
                    ++oText;
                } else {
                    return UnicodeMatcher.U_MISMATCH;
@ -716,6 +721,7 @@ class TransliterationRule {
                                   boolean escapeUnprintable,
                                   StringBuffer quoteBuf) {
        for (int i=0; i<text.length(); ++i) {
+            // Okay to process in 16-bit code units here
            appendToRule(rule, text.charAt(i), isLiteral, escapeUnprintable, quoteBuf);
        }
    }
@ -757,7 +763,7 @@ class TransliterationRule {
                appendToRule(rule, '}', true, escapeUnprintable, quoteBuf);
            }

-            char c = pattern.charAt(i);
+            char c = pattern.charAt(i); // Ok to use 16-bits here
            UnicodeMatcher matcher = data.lookup(c);
            if (matcher == null) {
                appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
@ -793,7 +799,7 @@ class TransliterationRule {
            if (i == cursor) {
                appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
            }
-            char c = output.charAt(i);
+            char c = output.charAt(i); // Ok to use 16-bits here
            int seg = data.lookupSegmentReference(c);
            if (seg < 0) {
                appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
@ -872,6 +878,9 @@ class TransliterationRule {

 /**
 * $Log: TransliterationRule.java,v $
+ * Revision 1.39  2001/12/03 21:33:58  alan
+ * jitterbug 1373: more fixes to support supplementals
+ *
 * Revision 1.38  2001/11/30 22:27:29  alan
 * jitterbug 1560: fix double increment bug in getSourceSet
 *
--- a/icu4j/src/com/ibm/icu/text/UnicodeNameTransliterator.java
+++ b/icu4j/src/com/ibm/icu/text/UnicodeNameTransliterator.java
@ -3,8 +3,8 @@
 * others. All Rights Reserved.
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UnicodeNameTransliterator.java,v $ 
- * $Date: 2001/11/17 20:45:35 $ 
- * $Revision: 1.4 $
+ * $Date: 2001/12/03 21:33:59 $ 
+ * $Revision: 1.5 $
 */
 package com.ibm.text;
 import java.util.*;
@ -63,16 +63,17 @@ class UnicodeNameTransliterator extends Transliterator {
        String name;
        
        while (cursor < limit) {
-            char c = text.charAt(cursor);
+            int c = UTF16.charAt(text, cursor);
            if ((name=UCharacter.getName(c)) != null) {
                
                str.setLength(1);
                str.append(name).append(closeDelimiter);
-                
-                text.replace(cursor, cursor+1, str.toString());
+
+                int clen = UTF16.getCharCount(c);
+                text.replace(cursor, cursor+clen, str.toString());
                len = str.length();
                cursor += len; // advance cursor by 1 and adjust for new text
-                limit += len-1; // change in length is (len - 1)
+                limit += len-clen; // change in length
            } else {
                ++cursor;
            }
--- a/icu4j/src/com/ibm/icu/text/UnicodeSet.java
+++ b/icu4j/src/com/ibm/icu/text/UnicodeSet.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UnicodeSet.java,v $
- * $Date: 2001/12/03 20:26:24 $
- * $Revision: 1.52 $
+ * $Date: 2001/12/03 21:33:59 $
+ * $Revision: 1.53 $
 *
 *****************************************************************************************
 */
@ -204,7 +204,7 @@ import com.ibm.util.Utility;
 * Unicode property
 * </table>
 * @author Alan Liu
- * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.52 $ $Date: 2001/12/03 20:26:24 $
+ * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.53 $ $Date: 2001/12/03 21:33:59 $
 */
 public class UnicodeSet extends UnicodeFilter {

@ -396,16 +396,13 @@ public class UnicodeSet extends UnicodeFilter {
        applyPattern(pattern, pos, null, ignoreWhitespace);

        int i = pos.getIndex();
-        int n = pattern.length();

        // Skip over trailing whitespace
        if (ignoreWhitespace) {
-            while (i < n && Character.isWhitespace(pattern.charAt(i))) {
-                ++i;
-            }
+            i = Utility.skipWhitespace(pattern, i);
        }

-        if (i != n) {
+        if (i != pattern.length()) {
            throw new IllegalArgumentException("Parse of \"" + pattern +
                                               "\" failed at " + i);
        }
--- a/icu4j/src/com/ibm/text/NameUnicodeTransliterator.java
+++ b/icu4j/src/com/ibm/text/NameUnicodeTransliterator.java
@ -3,8 +3,8 @@
 * others. All Rights Reserved.
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/NameUnicodeTransliterator.java,v $ 
- * $Date: 2001/11/21 20:56:50 $ 
- * $Revision: 1.5 $
+ * $Date: 2001/12/03 21:33:58 $ 
+ * $Revision: 1.6 $
 */
 package com.ibm.text;
 import java.util.*;
@ -73,9 +73,10 @@ class NameUnicodeTransliterator extends Transliterator {
        int mode = 0;
        int ibuf = 0;
        int openPos = offsets.start; // position of openDelimiter
-
-        for (; cursor < limit; ++cursor) {
-            char c = text.charAt(cursor);
+        
+        int c;
+        for (; cursor < limit; cursor+=UTF16.getCharCount(c)) {
+            c = UTF16.charAt(text, cursor);

            switch (mode) {
            case 0: // looking for open delimiter
--- a/icu4j/src/com/ibm/text/StringMatcher.java
+++ b/icu4j/src/com/ibm/text/StringMatcher.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/StringMatcher.java,v $ 
- * $Date: 2001/11/29 22:31:18 $ 
- * $Revision: 1.4 $
+ * $Date: 2001/12/03 21:33:58 $ 
+ * $Revision: 1.5 $
 *
 *****************************************************************************************
 */
@ -48,16 +48,20 @@ class StringMatcher implements UnicodeMatcher {
                       int[] offset,
                       int limit,
                       boolean incremental) {
+        // Note (1): We process text in 16-bit code units, rather than
+        // 32-bit code points.  This works because stand-ins are
+        // always in the BMP and because we are doing a literal match
+        // operation, which can be done 16-bits at a time.
        int i;
        int[] cursor = new int[] { offset[0] };
        if (limit < cursor[0]) {
            // Match in the reverse direction
            for (i=pattern.length()-1; i>=0; --i) {
-                char keyChar = pattern.charAt(i);
+                char keyChar = pattern.charAt(i); // OK; see note (1) above
                UnicodeMatcher subm = data.lookup(keyChar);
                if (subm == null) {
                    if (cursor[0] >= limit &&
-                        keyChar == text.charAt(cursor[0])) {
+                        keyChar == text.charAt(cursor[0])) { // OK; see note (1) above
                        --cursor[0];
                    } else {
                        return U_MISMATCH;
@ -84,14 +88,14 @@ class StringMatcher implements UnicodeMatcher {
                    // without completing our match.
                    return U_PARTIAL_MATCH;
                }
-                char keyChar = pattern.charAt(i);
+                char keyChar = pattern.charAt(i); // OK; see note (1) above
                UnicodeMatcher subm = data.lookup(keyChar);
                if (subm == null) {
                    // Don't need the cursor < limit check if
                    // incremental is true (because it's done above); do need
                    // it otherwise.
                    if (cursor[0] < limit &&
-                        keyChar == text.charAt(cursor[0])) {
+                        keyChar == text.charAt(cursor[0])) { // OK; see note (1) above
                        ++cursor[0];
                    } else {
                        return U_MISMATCH;
@ -123,7 +127,7 @@ class StringMatcher implements UnicodeMatcher {
            result.append('(');
        }
        for (int i=0; i<pattern.length(); ++i) {
-            char keyChar = pattern.charAt(i);
+            char keyChar = pattern.charAt(i); // OK; see note (1) above
            UnicodeMatcher m = data.lookup(keyChar);
            if (m == null) {
                TransliterationRule.appendToRule(result, keyChar, false, escapeUnprintable, quoteBuf);
--- a/icu4j/src/com/ibm/text/TransformTransliterator.java
+++ b/icu4j/src/com/ibm/text/TransformTransliterator.java
@ -3,124 +3,128 @@
 * others. All Rights Reserved.
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransformTransliterator.java,v $ 
- * $Date: 2001/11/17 20:45:35 $ 
- * $Revision: 1.3 $
+ * $Date: 2001/12/03 21:33:58 $ 
+ * $Revision: 1.4 $
 */
 package com.ibm.text;
 import java.util.*;

-/**
- * An abstract class for transliterators based on a transform
- * operation.  To create a transliterator that implements a
- * transformation, create a subclass of this class and implement the
- * abstract <code>transform()</code> and <code>hasTransform()</code>
- * methods.
- * @author Alan Liu
- */
-abstract class TransformTransliterator extends Transliterator {
-
-    /**
-     * Constructs a transliterator.  For use by subclasses.
-     */
-    protected TransformTransliterator(String id, UnicodeFilter f) {
-        super(id, f);
-    }
-
-    /**
-     * Implements {@link Transliterator#handleTransliterate}.
-     */
-    protected void handleTransliterate(Replaceable text,
-                                       Position offsets, boolean incremental) {
-
-        int start;
-        for (start = offsets.start; start < offsets.limit; ++start) {
-            // Scan for the first character that is != its transform.
-            // If there are none, we fall out without doing anything.
-            char c = text.charAt(start);
-            if (hasTransform(c)) {
-                // There is a transforming character at start.  Break
-                // up the remaining string, from start to
-                // offsets.limit, into segments of unfiltered and
-                // filtered characters.  Only transform the unfiltered
-                // characters.  As always, minimize the number of
-                // calls to Replaceable.replace().
-
-                int len = offsets.limit - start;
-                // assert(len >= 1);
-                
-                char[] buf = new char[len];
-                text.getChars(start, offsets.limit, buf, 0);
-
-                int segStart = 0;
-                int segLimit;
-                UnicodeFilter filt = getFilter();
-
-                // lenDelta is the accumulated length difference for
-                // all transformed segments.  It is new length - old
-                // length.
-                int lenDelta = 0;
-
-                // Set segStart, segLimit to the unfiltered segment
-                // starting with start.  If the filter is null, then
-                // segStart/Limit will be set to the whole string,
-                // that is, 0/len.
-                do {
-                    // Set segLimit to the first filtered char at or
-                    // after segStart.
-                    segLimit = len;
-                    if (filt != null) {
-                        segLimit = segStart;
-                        while (segLimit < len && filt.contains(buf[segLimit])) {
-                             ++segLimit;
-                        }
-                    }
-
-                    // Transform the unfiltered chars between segStart
-                    // and segLimit.
-                    int segLen = segLimit - segStart;
-                    if (segLen != 0) {
-                        String newStr = transform(
-                            new String(buf, segStart, segLen));
-                        text.replace(start, start + segLen, newStr);
-                        start += newStr.length();
-                        lenDelta += newStr.length() - segLen;
-                    }
-
-                    // Set segStart to the first unfiltered char at or
-                    // after segLimit.
-                    segStart = segLimit;
-                    if (filt != null) {
-                        while (segStart < len && !filt.contains(buf[segStart])) {
-                            ++segStart;
-                        }
-                    }
-                    start += segStart - segLimit;
-
-                } while (segStart < len);
-                
-                offsets.limit += lenDelta;
-                offsets.contextLimit += lenDelta;
-                offsets.start = offsets.limit;
-                return;
-            }
-        }
-        // assert(start == offsets.limit);
-        offsets.start = start;
-    }
-
-    /**
-     * Subclasses must implement this method to determine whether a
-     * given character has a transform that is not equal to itself.
-     * This is approximately equivalent to <code>c !=
-     * transform(String.valueOf(c))</code>, where
-     * <code>String.valueOf(c)</code> returns a String containing the
-     * single character (not integer) <code>c</code>.  Subclasses that
-     * transform all their input can simply return <code>true</code>.
-     */
-    protected abstract boolean hasTransform(int c);
-
-    /**
-     * Subclasses must implement this method to transform a string.
-     */
-    protected abstract String transform(String s);
+abstract class TransformTransliterator {
+    // Currently unused
 }
+
+///**
+// * An abstract class for transliterators based on a transform
+// * operation.  To create a transliterator that implements a
+// * transformation, create a subclass of this class and implement the
+// * abstract <code>transform()</code> and <code>hasTransform()</code>
+// * methods.
+// * @author Alan Liu
+// */
+//abstract class TransformTransliterator extends Transliterator {
+//
+//    /**
+//     * Constructs a transliterator.  For use by subclasses.
+//     */
+//    protected TransformTransliterator(String id, UnicodeFilter f) {
+//        super(id, f);
+//    }
+//
+//    /**
+//     * Implements {@link Transliterator#handleTransliterate}.
+//     */
+//    protected void handleTransliterate(Replaceable text,
+//                                       Position offsets, boolean incremental) {
+//
+//        int start;
+//        for (start = offsets.start; start < offsets.limit; ++start) {
+//            // Scan for the first character that is != its transform.
+//            // If there are none, we fall out without doing anything.
+//            char c = text.charAt(start);
+//            if (hasTransform(c)) {
+//                // There is a transforming character at start.  Break
+//                // up the remaining string, from start to
+//                // offsets.limit, into segments of unfiltered and
+//                // filtered characters.  Only transform the unfiltered
+//                // characters.  As always, minimize the number of
+//                // calls to Replaceable.replace().
+//
+//                int len = offsets.limit - start;
+//                // assert(len >= 1);
+//                
+//                char[] buf = new char[len];
+//                text.getChars(start, offsets.limit, buf, 0);
+//
+//                int segStart = 0;
+//                int segLimit;
+//                UnicodeFilter filt = getFilter();
+//
+//                // lenDelta is the accumulated length difference for
+//                // all transformed segments.  It is new length - old
+//                // length.
+//                int lenDelta = 0;
+//
+//                // Set segStart, segLimit to the unfiltered segment
+//                // starting with start.  If the filter is null, then
+//                // segStart/Limit will be set to the whole string,
+//                // that is, 0/len.
+//                do {
+//                    // Set segLimit to the first filtered char at or
+//                    // after segStart.
+//                    segLimit = len;
+//                    if (filt != null) {
+//                        segLimit = segStart;
+//                        while (segLimit < len && filt.contains(buf[segLimit])) {
+//                             ++segLimit;
+//                        }
+//                    }
+//
+//                    // Transform the unfiltered chars between segStart
+//                    // and segLimit.
+//                    int segLen = segLimit - segStart;
+//                    if (segLen != 0) {
+//                        String newStr = transform(
+//                            new String(buf, segStart, segLen));
+//                        text.replace(start, start + segLen, newStr);
+//                        start += newStr.length();
+//                        lenDelta += newStr.length() - segLen;
+//                    }
+//
+//                    // Set segStart to the first unfiltered char at or
+//                    // after segLimit.
+//                    segStart = segLimit;
+//                    if (filt != null) {
+//                        while (segStart < len && !filt.contains(buf[segStart])) {
+//                            ++segStart;
+//                        }
+//                    }
+//                    start += segStart - segLimit;
+//
+//                } while (segStart < len);
+//                
+//                offsets.limit += lenDelta;
+//                offsets.contextLimit += lenDelta;
+//                offsets.start = offsets.limit;
+//                return;
+//            }
+//        }
+//        // assert(start == offsets.limit);
+//        offsets.start = start;
+//    }
+//
+//    /**
+//     * Subclasses must implement this method to determine whether a
+//     * given character has a transform that is not equal to itself.
+//     * This is approximately equivalent to <code>c !=
+//     * transform(String.valueOf(c))</code>, where
+//     * <code>String.valueOf(c)</code> returns a String containing the
+//     * single character (not integer) <code>c</code>.  Subclasses that
+//     * transform all their input can simply return <code>true</code>.
+//     */
+//    protected abstract boolean hasTransform(int c);
+//
+//    /**
+//     * Subclasses must implement this method to transform a string.
+//     */
+//    protected abstract String transform(String s);
+//}
--- a/icu4j/src/com/ibm/text/TransliterationRule.java
+++ b/icu4j/src/com/ibm/text/TransliterationRule.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransliterationRule.java,v $
- * $Date: 2001/11/30 22:27:29 $
- * $Revision: 1.38 $
+ * $Date: 2001/12/03 21:33:58 $
+ * $Revision: 1.39 $
 *
 *****************************************************************************************
 */
@ -46,7 +46,7 @@ import com.ibm.util.Utility;
 * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
 *
 * @author Alan Liu
- * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.38 $ $Date: 2001/11/30 22:27:29 $
+ * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.39 $ $Date: 2001/12/03 21:33:58 $
 */
 class TransliterationRule {

@ -396,12 +396,17 @@ class TransliterationRule {
        // Backup oText by one
        oText = posBefore(text, pos.start);

+        // Note (1): We process text in 16-bit code units, rather than
+        // 32-bit code points.  This works because stand-ins are
+        // always in the BMP and because we are doing a literal match
+        // operation, which can be done 16-bits at a time.
+
        for (oPattern=anteContextLength-1; oPattern>=0; --oPattern) {
-            char keyChar = pattern.charAt(oPattern);
+            char keyChar = pattern.charAt(oPattern); // See note (1)
            UnicodeMatcher matcher = data.lookup(keyChar);
            if (matcher == null) {
                if (oText >= pos.contextStart &&
-                    keyChar == text.charAt(oText)) {
+                    keyChar == text.charAt(oText)) { // See note (1)
                    --oText;
                } else {
                    return UnicodeMatcher.U_MISMATCH;
@ -457,14 +462,14 @@ class TransliterationRule {
            // can match up to pos.contextLimit.
            int matchLimit = (oPattern < keyLength) ? pos.limit : pos.contextLimit;

-            char keyChar = pattern.charAt(anteContextLength + oPattern++);
+            char keyChar = pattern.charAt(anteContextLength + oPattern++); // See note (1)
            UnicodeMatcher matcher = data.lookup(keyChar);
            if (matcher == null) {
                // Don't need the oText < pos.contextLimit check if
                // incremental is TRUE (because it's done above); do need
                // it otherwise.
                if (oText < matchLimit &&
-                    keyChar == text.charAt(oText)) {
+                    keyChar == text.charAt(oText)) { // See note (1)
                    ++oText;
                } else {
                    return UnicodeMatcher.U_MISMATCH;
@ -716,6 +721,7 @@ class TransliterationRule {
                                   boolean escapeUnprintable,
                                   StringBuffer quoteBuf) {
        for (int i=0; i<text.length(); ++i) {
+            // Okay to process in 16-bit code units here
            appendToRule(rule, text.charAt(i), isLiteral, escapeUnprintable, quoteBuf);
        }
    }
@ -757,7 +763,7 @@ class TransliterationRule {
                appendToRule(rule, '}', true, escapeUnprintable, quoteBuf);
            }

-            char c = pattern.charAt(i);
+            char c = pattern.charAt(i); // Ok to use 16-bits here
            UnicodeMatcher matcher = data.lookup(c);
            if (matcher == null) {
                appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
@ -793,7 +799,7 @@ class TransliterationRule {
            if (i == cursor) {
                appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
            }
-            char c = output.charAt(i);
+            char c = output.charAt(i); // Ok to use 16-bits here
            int seg = data.lookupSegmentReference(c);
            if (seg < 0) {
                appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
@ -872,6 +878,9 @@ class TransliterationRule {

 /**
 * $Log: TransliterationRule.java,v $
+ * Revision 1.39  2001/12/03 21:33:58  alan
+ * jitterbug 1373: more fixes to support supplementals
+ *
 * Revision 1.38  2001/11/30 22:27:29  alan
 * jitterbug 1560: fix double increment bug in getSourceSet
 *
--- a/icu4j/src/com/ibm/text/UnicodeNameTransliterator.java
+++ b/icu4j/src/com/ibm/text/UnicodeNameTransliterator.java
@ -3,8 +3,8 @@
 * others. All Rights Reserved.
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UnicodeNameTransliterator.java,v $ 
- * $Date: 2001/11/17 20:45:35 $ 
- * $Revision: 1.4 $
+ * $Date: 2001/12/03 21:33:59 $ 
+ * $Revision: 1.5 $
 */
 package com.ibm.text;
 import java.util.*;
@ -63,16 +63,17 @@ class UnicodeNameTransliterator extends Transliterator {
        String name;
        
        while (cursor < limit) {
-            char c = text.charAt(cursor);
+            int c = UTF16.charAt(text, cursor);
            if ((name=UCharacter.getName(c)) != null) {
                
                str.setLength(1);
                str.append(name).append(closeDelimiter);
-                
-                text.replace(cursor, cursor+1, str.toString());
+
+                int clen = UTF16.getCharCount(c);
+                text.replace(cursor, cursor+clen, str.toString());
                len = str.length();
                cursor += len; // advance cursor by 1 and adjust for new text
-                limit += len-1; // change in length is (len - 1)
+                limit += len-clen; // change in length
            } else {
                ++cursor;
            }
--- a/icu4j/src/com/ibm/text/UnicodeSet.java
+++ b/icu4j/src/com/ibm/text/UnicodeSet.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UnicodeSet.java,v $
- * $Date: 2001/12/03 20:26:24 $
- * $Revision: 1.52 $
+ * $Date: 2001/12/03 21:33:59 $
+ * $Revision: 1.53 $
 *
 *****************************************************************************************
 */
@ -204,7 +204,7 @@ import com.ibm.util.Utility;
 * Unicode property
 * </table>
 * @author Alan Liu
- * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.52 $ $Date: 2001/12/03 20:26:24 $
+ * @version $RCSfile: UnicodeSet.java,v $ $Revision: 1.53 $ $Date: 2001/12/03 21:33:59 $
 */
 public class UnicodeSet extends UnicodeFilter {

@ -396,16 +396,13 @@ public class UnicodeSet extends UnicodeFilter {
        applyPattern(pattern, pos, null, ignoreWhitespace);

        int i = pos.getIndex();
-        int n = pattern.length();

        // Skip over trailing whitespace
        if (ignoreWhitespace) {
-            while (i < n && Character.isWhitespace(pattern.charAt(i))) {
-                ++i;
-            }
+            i = Utility.skipWhitespace(pattern, i);
        }

-        if (i != n) {
+        if (i != pattern.length()) {
            throw new IllegalArgumentException("Parse of \"" + pattern +
                                               "\" failed at " + i);
        }