ICU-1052 redesign of engine to support supplemental characters

X-SVN-Rev: 5341
2025-04-07 14:31:31 +00:00 · 2001-07-25 19:11:02 +00:00 · 2001-07-25 19:11:02 +00:00 · 7edf9d3e80
commit 7edf9d3e80
parent d7c3eebf46
23 changed files with 675 additions and 637 deletions
--- a/icu4c/source/i18n/Makefile.in
+++ b/icu4c/source/i18n/Makefile.in
@ -71,7 +71,8 @@ udat.o umsg.o \
 unifltlg.o unirange.o uniset.o unitohex.o unum.o \
 dbbi.o dbbi_tbl.o rbbi.o rbbi_tbl.o brkdict.o nultrans.o jamohang.o hangjamo.o \
 remtrans.o utrans.o \
-titletrn.o tolowtrn.o toupptrn.o xformtrn.o name2uni.o uni2name.o nortrans.o
+titletrn.o tolowtrn.o toupptrn.o xformtrn.o name2uni.o uni2name.o nortrans.o \
+unifilt.o

 STATIC_OBJECTS = $(OBJECTS:.o=.$(STATIC_O))

--- a/icu4c/source/i18n/i18n.dsp
+++ b/icu4c/source/i18n/i18n.dsp
@ -318,6 +318,10 @@ SOURCE=.\uni2name.cpp
 # End Source File
 # Begin Source File

+SOURCE=.\unifilt.cpp
+# End Source File
+# Begin Source File
+
 SOURCE=.\unifltlg.cpp
 # End Source File
 # Begin Source File
@ -1548,6 +1552,25 @@ InputPath=.\unicode\unifltlg.h
 # End Source File
 # Begin Source File

+SOURCE=.\unicode\unimatch.h
+
+!IF  "$(CFG)" == "i18n - Win32 Release"
+
+!ELSEIF  "$(CFG)" == "i18n - Win32 Debug"
+
+# Begin Custom Build
+InputPath=.\unicode\unimatch.h
+
+"..\..\include\unicode\unimatch.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy  unicode\unimatch.h  ..\..\include\unicode
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
 SOURCE=.\unirange.h
 # End Source File
 # Begin Source File
--- a/icu4c/source/i18n/rbt.cpp
+++ b/icu4c/source/i18n/rbt.cpp
@ -89,18 +89,18 @@ RuleBasedTransliterator::clone(void) const {
 void
 RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition& index,
                                             UBool isIncremental) const {
-    /* We keep start and limit fixed the entire time,
-     * relative to the text -- limit may move numerically if text is
-     * inserted or removed.  The cursor moves from start to limit, with
-     * replacements happening under it.
+    /* We keep contextStart and contextLimit fixed the entire time,
+     * relative to the text -- contextLimit may move numerically if
+     * text is inserted or removed.  The start offset moves toward
+     * limit, with replacements happening under it.
     *
     * Example: rules 1. ab>x|y
     *                2. yc>z
     *
-     * |eabcd   start - no match, advance cursor
-     * e|abcd   match rule 1 - change text & adjust cursor
-     * ex|ycd   match rule 2 - change text & adjust cursor
-     * exz|d    no match, advance cursor
+     * |eabcd   begin - no match, advance start
+     * e|abcd   match rule 1 - change text & adjust start
+     * ex|ycd   match rule 2 - change text & adjust start
+     * exz|d    no match, advance start
     * exzd|    done
     */

@ -121,39 +121,14 @@ RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition&
        loopLimit <<= 4;
    }

-    UBool isPartial = FALSE;
-
-    while (index.start < index.limit && loopCount <= loopLimit) {
-        TransliterationRule* r = isIncremental ?
-            data->ruleSet.findIncrementalMatch(text, index, *data, isPartial) :
-            data->ruleSet.findMatch(text, index, *data);
-
-        /* If we match a rule then apply it by replacing the key
-         * with the rule output and repositioning the cursor
-         * appropriately.  If we get a partial match, then we
-         * can't do anything without more text; return with the
-         * cursor at the current position.  If we get null, then
-         * there is no match at this position, and we can advance
-         * the cursor.
-         */
-        if (r == 0) {
-            if (isPartial) { // always FALSE unless isIncremental
-                break;
-            } else {
-                ++index.start;
-            }
-        } else {
-            // Delegate replacement to TransliterationRule object
-            int32_t lenDelta = r->replace(text, index.start, *data);
-            index.limit += lenDelta;
-            index.contextLimit += lenDelta;
-            index.start += r->getCursorPos();
-            ++loopCount;
-        }
+    while (index.start < index.limit &&
+           loopCount <= loopLimit &&
+           data->ruleSet.transliterate(text, index, isIncremental)) {
+        ++loopCount;
    }
 }

 UnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource,
                                                UBool escapeUnprintable) const {
-    return data->ruleSet.toRules(rulesSource, *data, escapeUnprintable);
+    return data->ruleSet.toRules(rulesSource, escapeUnprintable);
 }
--- a/icu4c/source/i18n/rbt_data.cpp
+++ b/icu4c/source/i18n/rbt_data.cpp
@ -64,13 +64,13 @@ TransliterationRuleData::~TransliterationRuleData() {
 }

 const UnicodeSet*
-TransliterationRuleData::lookupSet(UChar standIn) const {
+TransliterationRuleData::lookupSet(UChar32 standIn) const {
    int32_t i = standIn - setVariablesBase;
    return (i >= 0 && i < setVariablesLength) ? setVariables[i] : 0;
 }

 int32_t
-TransliterationRuleData::lookupSegmentReference(UChar c) const {
+TransliterationRuleData::lookupSegmentReference(UChar32 c) const {
    int32_t i = c - segmentBase;
    return (i >= 0 && i < 9) ? i : -1;
 }
--- a/icu4c/source/i18n/rbt_data.h
+++ b/icu4c/source/i18n/rbt_data.h
@ -90,14 +90,14 @@ public:

    ~TransliterationRuleData();

-    const UnicodeSet* lookupSet(UChar standIn) const;
+    const UnicodeSet* lookupSet(UChar32 standIn) const;

    /**
     * Return the zero-based index of the segment represented by the given
     * character, or -1 if none.  Repeat: This is a zero-based return value,
     * 0..8, even though these are notated "$1".."$9".
     */
-    int32_t lookupSegmentReference(UChar c) const;
+    int32_t lookupSegmentReference(UChar32 c) const;

    /**
     * Return the character used to stand for the given segment reference.
--- a/icu4c/source/i18n/rbt_pars.cpp
+++ b/icu4c/source/i18n/rbt_pars.cpp
@ -75,7 +75,7 @@ public:

    virtual const UnicodeString* lookup(const UnicodeString& s) const;

-    virtual const UnicodeSet* lookupSet(UChar ch) const;
+    virtual const UnicodeSet* lookupSet(UChar32 ch) const;

    virtual UnicodeString parseReference(const UnicodeString& text,
                                         ParsePosition& pos, int32_t limit) const;
@ -95,7 +95,7 @@ const UnicodeString* ParseData::lookup(const UnicodeString& name) const {
 /**
 * Implement SymbolTable API.
 */
-const UnicodeSet* ParseData::lookupSet(UChar ch) const {
+const UnicodeSet* ParseData::lookupSet(UChar32 ch) const {
    // Note that we cannot use data.lookupSet() because the
    // set array has not been constructed yet.
    const UnicodeSet* set = NULL;
@ -682,7 +682,7 @@ void TransliteratorParser::parseRules(UnicodeString& idBlockResult,

    // Index the rules
    if (U_SUCCESS(status)) {
-        data->ruleSet.freeze(*data, status);
+        data->ruleSet.freeze(status);
        if (idSplitPointResult < 0) {
            idSplitPointResult = idBlockResult.length();
        }
@ -849,6 +849,7 @@ int32_t TransliteratorParser::parseRule(int32_t pos, int32_t limit) {
                                 right->text, right->cursor, right->cursorOffset,
                                 left->createSegments(),
                                 left->anchorStart, left->anchorEnd,
+                                 *data,
                                 status), status);

    return pos;
--- a/icu4c/source/i18n/rbt_rule.cpp
+++ b/icu4c/source/i18n/rbt_rule.cpp
@ -52,7 +52,9 @@ TransliterationRule::TransliterationRule(const UnicodeString& input,
                                         int32_t cursorPosition, int32_t cursorOffset,
                                         int32_t* adoptedSegs,
                                         UBool anchorStart, UBool anchorEnd,
-                                         UErrorCode& status) {
+                                         const TransliterationRuleData& theData,
+                                         UErrorCode& status) :
+    data(theData) {
    init(input, anteContextPos, postContextPos,
         outputStr, cursorPosition, cursorOffset, adoptedSegs,
         anchorStart, anchorEnd, status);
@ -79,7 +81,9 @@ TransliterationRule::TransliterationRule(const UnicodeString& input,
                                         int32_t anteContextPos, int32_t postContextPos,
                                         const UnicodeString& outputStr,
                                         int32_t cursorPosition,
-                                         UErrorCode& status) {
+                                         const TransliterationRuleData& theData,
+                                         UErrorCode& status) :
+    data(theData) {
    init(input, anteContextPos, postContextPos,
         outputStr, cursorPosition, 0, NULL, FALSE, FALSE, status);
 }
@ -92,7 +96,9 @@ TransliterationRule::TransliterationRule(TransliterationRule& other) :
    output(other.output),
    anteContextLength(other.anteContextLength),
    keyLength(other.keyLength),
-    cursorPos(other.cursorPos) {
+    cursorPos(other.cursorPos),
+    flags(other.flags),
+    data(other.data) {

    segments = 0;
    if (other.segments != 0) {
@ -153,32 +159,27 @@ void TransliterationRule::init(const UnicodeString& input,
    // We don't validate the segments array.  The caller must
    // guarantee that the segments are well-formed.
    this->segments = adoptedSegs;
+    // Find the position of the first segment index that is after the
+    // anteContext (in the key).  Note that this may be a start or a
+    // limit index.
+    firstKeySeg = -1;
+    if (segments != 0) {
+        do {
+            ++firstKeySeg;
+        } while (segments[firstKeySeg] >= 0 &&
+                 segments[firstKeySeg] < anteContextLength);
+        if (segments[firstKeySeg] < 0) {
+            firstKeySeg = -1;
+        }
+    }

-    // Implement anchors by inserting an ETHER character on the
-    // left or right.  If on the left, then the indices must be
-    // incremented.  If on the right, no index change is
-    // necessary.
-    if (anchorStart || anchorEnd) {
-        pattern.truncate(0);
-        if (anchorStart) {
-            pattern.append(ETHER);
-            ++anteContextLength;
-            // Adjust segment offsets
-            if (segments != 0) {
-                int32_t *p = segments;
-                // The end marker is a -1.
-                while (*p != -1) {
-                    ++(*p);
-                    ++p;
-                }
-            }
-        }
-        pattern.append(input);
-        if (anchorEnd) {
-            pattern.append(ETHER);
-        }
-    } else {
-        pattern = input;
+    pattern = input;
+    flags = 0;
+    if (anchorStart) {
+        flags |= ANCHOR_START;
+    }
+    if (anchorEnd) {
+        flags |= ANCHOR_END;
    }
 }

@ -197,10 +198,14 @@ int32_t TransliterationRule::getCursorPos(void) const {
 /**
 * Return the preceding context length.  This method is needed to
 * support the <code>Transliterator</code> method
- * <code>getMaximumContextLength()</code>.
+ * <code>getMaximumContextLength()</code>.  Internally, this is
+ * implemented as the anteContextLength, optionally plus one if
+ * there is a start anchor.  The one character anchor gap is
+ * needed to make repeated incremental transliteration with
+ * anchors work.
 */
-int32_t TransliterationRule::getAnteContextLength(void) const {
-    return anteContextLength;
+int32_t TransliterationRule::getContextLength(void) const {
+    return anteContextLength + ((flags & ANCHOR_START) ? 1 : 0);
 }

 /**
@ -209,81 +214,16 @@ int32_t TransliterationRule::getAnteContextLength(void) const {
 * unless the first character of the key is a set.  If it's a
 * set, or otherwise can match multiple keys, the index value is -1.
 */
-int16_t TransliterationRule::getIndexValue(const TransliterationRuleData& data) const {
+int16_t TransliterationRule::getIndexValue() const {
    if (anteContextLength == pattern.length()) {
        // A pattern with just ante context {such as foo)>bar} can
        // match any key.
        return -1;
    }
-    UChar c = pattern.charAt(anteContextLength);
+    UChar32 c = pattern.char32At(anteContextLength);
    return (int16_t)(data.lookupSet(c) == NULL ? (c & 0xFF) : -1);
 }

-/**
- * Do a replacement of the input pattern with the output text in
- * the given string, at the given offset.  This method assumes
- * that a match has already been found in the given text at the
- * given position.
- * @param text the text containing the substring to be replaced
- * @param offset the offset into the text at which the pattern
- * matches.  This is the offset to the point after the ante
- * context, if any, and before the match string and any post
- * context.
- * @param data the RuleBasedTransliterator.Data object specifying
- * context for this transliterator.
- * @return the change in the length of the text
- */
-int32_t TransliterationRule::replace(Replaceable& text, int32_t offset,
-                                     const TransliterationRuleData& data) const {
-    if (segments == NULL) {
-        text.handleReplaceBetween(offset, offset + keyLength, output);
-        return output.length() - keyLength;
-    } else {
-        /* When there are segments to be copied, use the Replaceable.copy()
-         * API in order to retain out-of-band data.  Copy everything to the
-         * point after the key, then delete the key.  That is, copy things
-         * into offset + keyLength, then replace offset .. offset +
-         * keyLength with the empty string.
-         *
-         * Minimize the number of calls to Replaceable.replace() and
-         * Replaceable.copy().
-         */
-        int32_t textStart = offset - anteContextLength;
-        int32_t dest = offset + keyLength; // copy new text to here
-        UnicodeString buf;
-        for (int32_t i=0; i<output.length(); ++i) {
-            UChar c = output.charAt(i);
-            int32_t b = data.lookupSegmentReference(c);
-            if (b < 0) {
-                // Accumulate straight (non-segment) text.
-                buf.append(c);
-            } else {
-                // Insert any accumulated straight text.
-                if (buf.length() > 0) {
-                    text.handleReplaceBetween(dest, dest, buf);
-                    dest += buf.length();
-                    buf.remove();
-                }
-                // Copy segment with out-of-band data
-                b *= 2;
-                text.copy(textStart + segments[b],
-                          textStart + segments[b+1], dest);
-                dest += segments[b+1] - segments[b];
-            }
-
-        }
-        // Insert any accumulated straight text.
-        if (buf.length() > 0) {
-            text.handleReplaceBetween(dest, dest, buf);
-            dest += buf.length();
-        }
-        // Delete the key
-        buf.remove();
-        text.handleReplaceBetween(offset, offset + keyLength, buf);
-        return dest - (offset + keyLength) - keyLength;
-    }
-}
-
 /**
 * Internal method.  Returns true if this rule matches the given
 * index value.  The index value is an 8-bit integer, 0..255,
@ -294,14 +234,13 @@ int32_t TransliterationRule::replace(Replaceable& text, int32_t offset,
 * value.  If the rule contains only ante context, as in foo)>bar,
 * then it will match any key.
 */
-UBool TransliterationRule::matchesIndexValue(uint8_t v,
-                                   const TransliterationRuleData& data) const {
+UBool TransliterationRule::matchesIndexValue(uint8_t v) const {
    if (anteContextLength == pattern.length()) {
        // A pattern with just ante context {such as foo)>bar} can
        // match any key.
        return TRUE;
    }
-    UChar c = pattern.charAt(anteContextLength);
+    UChar32 c = pattern.char32At(anteContextLength);
    const UnicodeSet* set = data.lookupSet(c);
    return set == NULL ? (uint8_t(c) == v) : set->containsIndexValue(v);
 }
@ -328,6 +267,22 @@ UBool TransliterationRule::masks(const TransliterationRule& r2) const {
     * of) the corresponding characters of r2.  The superset
     * operation should be performed to check for UnicodeSet
     * masking.
+     *
+     * Anchors:  Two patterns that differ only in anchors only
+     * mask one another if they are exactly equal, and r2 has
+     * all the anchors r1 has (optionally, plus some).  Here Y
+     * means the row masks the column, N means it doesn't.
+     *
+     *         ab   ^ab    ab$  ^ab$
+     *   ab    Y     Y     Y     Y
+     *  ^ab    N     Y     N     Y
+     *   ab$   N     N     Y     Y
+     *  ^ab$   N     N     N     Y
+     *
+     * Post context: {a}b masks ab, but not vice versa, since {a}b
+     * matches everything ab matches, and {a}b matches {|a|}b but ab
+     * does not.  Pre context is different (a{b} does not align with
+     * ab).
     */

    /* LIMITATION of the current mask algorithm: Some rule
@ -340,126 +295,242 @@ UBool TransliterationRule::masks(const TransliterationRule& r2) const {
    int32_t left2 = r2.anteContextLength;
    int32_t right = len - left;
    int32_t right2 = r2.pattern.length() - left2;
-    return left <= left2 && right <= right2 &&
+
+    // TODO Clean this up -- some logic might be combinable with the
+    // next statement.
+
+    // Test for anchor masking
+    if (left == left2 && right == right2 &&
+        keyLength <= r2.keyLength &&
+        0 == r2.pattern.compare(0, len, pattern)) {
+        // The following boolean logic implements the table above
+        return (flags == r2.flags) ||
+            (!(flags & ANCHOR_START) && !(flags & ANCHOR_END)) ||
+            ((r2.flags & ANCHOR_START) && (r2.flags & ANCHOR_END));
+    }
+
+    return left <= left2 &&
+        (right < right2 ||
+         (right == right2 && keyLength <= r2.keyLength)) &&
        0 == r2.pattern.compare(left2 - left, len, pattern);
 }

 /**
- * Return true if this rule matches the given text.
- * @param text the text, both translated and untranslated
- * @param start the beginning index, inclusive; <code>0 <= start
- * <= limit</code>.
- * @param limit the ending index, exclusive; <code>start <= limit
- * <= text.length()</code>.
- * @param cursor position at which to translate next, representing offset
- * into text.  This value must be between <code>start</code> and
- * <code>limit</code>.
+ * Attempt a match and replacement at the given position.  Return
+ * the degree of match between this rule and the given text.  The
+ * degree of match may be mismatch, a partial match, or a full
+ * match.  A mismatch means at least one character of the text
+ * does not match the context or key.  A partial match means some
+ * context and key characters match, but the text is not long
+ * enough to match all of them.  A full match means all context
+ * and key characters match.
+ * 
+ * If a full match is obtained, perform a replacement, update pos,
+ * and return U_MATCH.  Otherwise both text and pos are unchanged.
+ * 
+ * @param text the text
+ * @param pos the position indices
+ * @param incremental if TRUE, test for partial matches that may
+ * be completed by additional text inserted at pos.limit.
+ * @return one of <code>U_MISMATCH</code>,
+ * <code>U_PARTIAL_MATCH</code>, or <code>U_MATCH</code>.  If
+ * incremental is FALSE then U_PARTIAL_MATCH will not be returned.
 */
-UBool TransliterationRule::matches(const Replaceable& text,
-                                   const UTransPosition& pos,
-                                   const TransliterationRuleData& data) const {
-    // Match anteContext, key, and postContext
-    int32_t cursor = pos.start - anteContextLength;
-    // Quick length check; this is a performance win for long rules.
-    // Widen by one (on both sides) to allow anchor matching.
-    if (cursor < (pos.contextStart - 1) ||
-        (cursor + pattern.length()) > (pos.contextLimit + 1)) {
-        return FALSE;
-    }
-    for (int32_t i=0; i<pattern.length(); ++i, ++cursor) {
-        if (!charMatches(pattern.charAt(i), text, cursor, pos,
-                         data)) {
-            return FALSE;
-        }
-    }
-    return TRUE;
-}
+UMatchDegree TransliterationRule::matchAndReplace(Replaceable& text,
+                                                  UTransPosition& pos,
+                                                  UBool incremental) const {
+    // Matching and replacing are done in one method because the
+    // replacement operation needs information obtained during the
+    // match.  Another way to do this is to have the match method
+    // create a match result struct with relevant offsets, and to pass
+    // this into the replace method.

-/**
- * Return the degree of match between this rule and the given text.  The
- * degree of match may be mismatch, a partial match, or a full match.  A
- * mismatch means at least one character of the text does not match the
- * context or key.  A partial match means some context and key characters
- * match, but the text is not long enough to match all of them.  A full
- * match means all context and key characters match.
- * @param text the text, both translated and untranslated
- * @param start the beginning index, inclusive; <code>0 <= start
- * <= limit</code>.
- * @param limit the ending index, exclusive; <code>start <= limit
- * <= text.length()</code>.
- * @param cursor position at which to translate next, representing offset
- * into text.  This value must be between <code>start</code> and
- * <code>limit</code>.
- * @return one of <code>MISMATCH</code>, <code>PARTIAL_MATCH</code>, or
- * <code>FULL_MATCH</code>.
- * @see #MISMATCH
- * @see #PARTIAL_MATCH
- * @see #FULL_MATCH
- */
-int32_t TransliterationRule::getMatchDegree(const Replaceable& text,
-                                            const UTransPosition& pos,
-                                            const TransliterationRuleData& data) const {
-    int len = getRegionMatchLength(text, pos, data);
-    return len < anteContextLength ? MISMATCH :
-        (len < pattern.length() ? PARTIAL_MATCH : FULL_MATCH);
-}
+    // ============================ MATCH ===========================

-/**
- * Return the number of characters of the text that match this rule.  If
- * there is a mismatch, return -1.  If the text is not long enough to match
- * any characters, return 0.
- * @param text the text, both translated and untranslated
- * @param start the beginning index, inclusive; <code>0 <= start
- * <= limit</code>.
- * @param limit the ending index, exclusive; <code>start <= limit
- * <= text.length()</code>.
- * @param cursor position at which to translate next, representing offset
- * into text.  This value must be between <code>start</code> and
- * <code>limit</code>.
- * @param data a dictionary of variables mapping <code>Character</code>
- * to <code>UnicodeSet</code>
- * @return -1 if there is a mismatch, 0 if the text is not long enough to
- * match any characters, otherwise the number of characters of text that
- * match this rule.
- */
-int32_t TransliterationRule::getRegionMatchLength(const Replaceable& text,
-                                          const UTransPosition& pos,
-                                          const TransliterationRuleData& data) const {
-    int32_t cursor = pos.start - anteContextLength;
-    // Quick length check; this is a performance win for long rules.
-    // Widen by one to allow anchor matching.
-    if (cursor < (pos.contextStart - 1)) {
-        return -1;
-    }
+    // Record the positions of segments.  We assume the following:
+    // - The maximum number of segments is 9.
+    // - The segment indices occur in ascending order.  That is,
+    //   segment 1 start <= segment 1 limit <= sement 2 start...
+    // - The segments have been validated such that there are no
+    //   references to nonexistent segments.
+    // - The end of the segment array is marked by a start of -1.
+    // Currently, the parser enforces all of these constraints.
+    // In the future, the first two constraints may be lifted,
+    // in which case this method will have to be modified.
+
+    int32_t segPos[18];
+    int32_t iSeg = firstKeySeg - 1;
+    int32_t nextSegPos = (iSeg >= 0) ? segments[iSeg] : -1;
+
+    // ------------------------ Ante Context ------------------------
+
+    // A mismatch in the ante context, or with the start anchor,
+    // is an outright U_MISMATCH regardless of whether we are
+    // incremental or not.
+    int32_t cursor = pos.start - 1;
    int32_t i;
-    for (i=0; i<pattern.length() && cursor<pos.contextLimit; ++i, ++cursor) {
-        if (!charMatches(pattern.charAt(i), text, cursor, pos,
-                         data)) {
-            return -1;
+    for (i=anteContextLength-1; i>=0; --i) {
+        while (i == nextSegPos) {
+            segPos[iSeg] = cursor;
+            nextSegPos == (--iSeg >= 0) ? segments[iSeg] : -1;
+        }
+        UChar keyChar = pattern.charAt(i);
+        const UnicodeSet* set = data.lookupSet(keyChar);
+        if (set == 0) {
+            if (cursor >= pos.contextStart &&
+                keyChar == text.charAt(cursor)) {
+                --cursor;
+            } else {
+                return U_MISMATCH;
+            }
+        } else {
+            // Subtract 1 from contextStart to make it a reverse limit
+            if (set->matches(text, cursor, pos.contextStart-1, FALSE)
+                != U_MATCH) {
+                return U_MISMATCH;
+            }
        }
    }
-    return i;
-}

-/**
- * Return true if the given key matches the given text.  This method
- * accounts for the fact that the key character may represent a character
- * set.  Note that the key and text characters may not be interchanged
- * without altering the results.
- * @param keyChar a character in the match key
- * @param textChar a character in the text being transliterated
- * @param data a dictionary of variables mapping <code>Character</code>
- * to <code>UnicodeSet</code>
- */
-UBool TransliterationRule::charMatches(UChar keyChar, const Replaceable& text,
-                                       int32_t index,
-                                       const UTransPosition& pos,
-                                       const TransliterationRuleData& data) const {
-    const UnicodeSet* set = 0;
-    UChar textChar = (index >= pos.contextStart && index < pos.contextLimit)
-            ? text.charAt(index) : ETHER;
-    return ((set = data.lookupSet(keyChar)) == 0) ?
-            keyChar == textChar : set->contains(textChar);
+    // ------------------------ Start Anchor ------------------------
+
+    if ((flags & ANCHOR_START) && cursor != (pos.contextStart-1)) {
+        return U_MISMATCH;
+    }
+
+    // -------------------- Key and Post Context --------------------
+
+    iSeg = firstKeySeg;
+    nextSegPos = (iSeg >= 0) ? segments[iSeg] : -1;
+
+    i = 0;
+    cursor = pos.start;
+    int32_t keyLimit = 0;
+    while (i < (pattern.length() - anteContextLength)) {
+        if (incremental && cursor == pos.contextLimit) {
+            // We've reached the context limit without a mismatch and
+            // without completing our match.
+            return U_PARTIAL_MATCH;
+        }
+        if (cursor == pos.limit && i < keyLength) {
+            // We're still in the pattern key but we're entering the
+            // post context.
+            return U_MISMATCH;
+        }
+        while (i == nextSegPos) {
+            segPos[iSeg] = cursor;
+            nextSegPos = segments[++iSeg];
+        }
+        if (i == keyLength) {
+            keyLimit = cursor;
+        }
+        UChar keyChar = pattern.charAt(anteContextLength + i++);
+        const UnicodeSet* set = data.lookupSet(keyChar);
+        if (set == 0) {
+            // Don't need the cursor < pos.contextLimit check if
+            // incremental is TRUE (because it's done above); do need
+            // it otherwise.
+            if (cursor < pos.contextLimit &&
+                keyChar == text.charAt(cursor)) {
+                ++cursor;
+            } else {
+                return U_MISMATCH;
+            }
+        } else {
+            UMatchDegree m =
+                set->matches(text, cursor, pos.contextLimit, incremental);
+            if (m != U_MATCH) {
+                return m;
+            }
+        }
+    }
+    while (i == nextSegPos) {
+        segPos[iSeg] = cursor;
+        nextSegPos = segments[++iSeg];
+    }
+	if (i == keyLength) {
+		keyLimit = cursor;
+	}
+
+    // ------------------------- Stop Anchor ------------------------
+
+    if ((flags & ANCHOR_END) != 0) {
+        if (cursor != pos.contextLimit) {
+            return U_MISMATCH;
+        }
+        if (incremental) {
+            return U_PARTIAL_MATCH;
+        }
+    }
+
+    // =========================== REPLACE ==========================
+
+    // We have a full match.  The key is between pos.start and
+    // keyLimit.  Segment indices have been recorded in segPos[].
+    // Perform a replacement.
+
+    int32_t lenDelta = 0;
+
+    if (segments == NULL) {
+        text.handleReplaceBetween(pos.start, keyLimit, output);
+        lenDelta = output.length() - (keyLimit - pos.start);
+        pos.start += cursorPos;
+    } else {
+        /* When there are segments to be copied, use the Replaceable.copy()
+         * API in order to retain out-of-band data.  Copy everything to the
+         * point after the key, then delete the key.  That is, copy things
+         * into offset + keyLength, then replace offset .. offset +
+         * keyLength with the empty string.
+         *
+         * Minimize the number of calls to Replaceable.replace() and
+         * Replaceable.copy().
+         */
+        int32_t dest = keyLimit; // copy new text to here
+        UnicodeString buf;
+        for (i=0; i<output.length(); ) {
+            if (i == cursorPos) {
+                // Record the position of the cursor
+                cursor = dest;
+            }
+            UChar32 c = output.char32At(i);
+            int32_t b = data.lookupSegmentReference(c);
+            if (b < 0) {
+                // Accumulate straight (non-segment) text.
+                buf.append(c);
+            } else {
+                // Insert any accumulated straight text.
+                if (buf.length() > 0) {
+                    text.handleReplaceBetween(dest, dest, buf);
+                    dest += buf.length();
+                    buf.remove();
+                }
+                // Copy segment with out-of-band data
+                b *= 2;
+                text.copy(segPos[b], segPos[b+1], dest);
+                dest += segPos[b+1] - segPos[b];
+            }
+            i += UTF_CHAR_LENGTH(c);
+        }
+        // Insert any accumulated straight text.
+        if (buf.length() > 0) {
+            text.handleReplaceBetween(dest, dest, buf);
+            dest += buf.length();
+        }
+        if (i == cursorPos) {
+            // Record the position of the cursor
+            cursor = dest;
+        }
+        // Delete the key
+        buf.remove();
+        text.handleReplaceBetween(pos.start, keyLimit, buf);
+        lenDelta = dest - keyLimit - (keyLimit - pos.start);
+        pos.start = cursor - (keyLimit - pos.start);
+    }
+    
+    pos.limit += lenDelta;
+    pos.contextLimit += lenDelta;
+    
+    return U_MATCH;
 }

 /**
@ -570,7 +641,6 @@ void TransliterationRule::_appendToRule(UnicodeString& rule,
 * given string.
 */
 UnicodeString& TransliterationRule::toRule(UnicodeString& rule,
-                                           const TransliterationRuleData& data,
                                           UBool escapeUnprintable) const {
    int32_t i;

@ -674,3 +744,5 @@ UnicodeString& TransliterationRule::toRule(UnicodeString& rule,

    return rule;
 }
+
+//eof
--- a/icu4c/source/i18n/rbt_rule.h
+++ b/icu4c/source/i18n/rbt_rule.h
@ -10,6 +10,7 @@

 #include "unicode/unistr.h"
 #include "unicode/utrans.h"
+#include "unicode/unimatch.h"

 class Replaceable;
 class TransliterationRuleData;
@ -36,39 +37,6 @@ class TransliterationRule {

 public:

-    /**
-     * Constants returned by <code>getMatchDegree()</code> indicating
-     * the degree of match between the text and this rule.
-     * @see #getMatchDegree
-     */
-    enum {
-        /**
-         * Constant returned by <code>getMatchDegree()</code>
-         * indicating a mismatch between the text and this rule.  One
-         * or more characters of the context or key do not match the
-         * text.
-         */
-        MISMATCH,
-
-        /**
-         * Constant returned by <code>getMatchDegree()</code>
-         * indicating a partial match between the text and this rule.
-         * All characters of the text match the corresponding context
-         * or key, but more characters are required for a complete
-         * match.  There are some key or context characters at the end
-         * of the pattern that remain unmatched because the text isn't
-         * long enough.
-         */
-        PARTIAL_MATCH,
-
-        /**
-         * Constant returned by <code>getMatchDegree()</code>
-         * indicating a complete match between the text and this rule.
-         * The text matches all context and key characters.
-         */
-        FULL_MATCH
-    };
-
    /**
     * The character at index i, where i < contextStart || i >= contextLimit,
     * is ETHER.  This allows explicit matching by rules and UnicodeSets
@ -109,6 +77,14 @@ private:
     */
    int32_t* segments;

+    /**
+     * A value we compute from segments.  The first index into segments[]
+     * that is >= anteContextLength.  That is, the first one that is within
+     * the forward scanned part of the pattern -- the key or the postContext.
+     * If there are no segments, this has the value -1.
+     */
+    int32_t firstKeySeg;
+
    /**
     * The length of the string that must match before the key.  If
     * zero, then there is no matching requirement before the key.
@ -130,6 +106,25 @@ private:
     */
    int32_t cursorPos;

+    /**
+     * Miscellaneous attributes.
+     */
+    int8_t flags;
+
+    /**
+     * Flag attributes.
+     */
+    enum {
+        ANCHOR_START = 1,
+        ANCHOR_END   = 2,
+    };
+
+    /**
+     * A reference to the data for this rule.  The data provides
+     * lookup services for matchers and segments.
+     */
+    const TransliterationRuleData& data;
+
 public:

    /**
@ -169,6 +164,7 @@ public:
                        int32_t cursorPosition, int32_t cursorOffset,
                        int32_t* adoptedSegs,
                        UBool anchorStart, UBool anchorEnd,
+                        const TransliterationRuleData& data,
                        UErrorCode& status);

    /**
@ -192,6 +188,7 @@ public:
                        int32_t anteContextPos, int32_t postContextPos,
                        const UnicodeString& outputStr,
                        int32_t cursorPosition,
+                        const TransliterationRuleData& data,
                        UErrorCode& status);

    /**
@ -213,9 +210,13 @@ public:
    /**
     * Return the preceding context length.  This method is needed to
     * support the <code>Transliterator</code> method
-     * <code>getMaximumContextLength()</code>.
+     * <code>getMaximumContextLength()</code>.  Internally, this is
+     * implemented as the anteContextLength, optionally plus one if
+     * there is a start anchor.  The one character anchor gap is
+     * needed to make repeated incremental transliteration with
+     * anchors work.
     */
-    virtual int32_t getAnteContextLength(void) const;
+    virtual int32_t getContextLength(void) const;

    /**
     * Internal method.  Returns 8-bit index value for this rule.
@ -223,24 +224,7 @@ public:
     * unless the first character of the key is a set.  If it's a
     * set, or otherwise can match multiple keys, the index value is -1.
     */
-    int16_t getIndexValue(const TransliterationRuleData& data) const;
-
-    /**
-     * Do a replacement of the input pattern with the output text in
-     * the given string, at the given offset.  This method assumes
-     * that a match has already been found in the given text at the
-     * given position.
-     * @param text the text containing the substring to be replaced
-     * @param offset the offset into the text at which the pattern
-     * matches.  This is the offset to the point after the ante
-     * context, if any, and before the match string and any post
-     * context.
-     * @param data the RuleBasedTransliterator.Data object specifying
-     * context for this transliterator.
-     * @return the change in the length of the text
-     */
-    int32_t replace(Replaceable& text, int32_t offset,
-                    const TransliterationRuleData& data) const;
+    int16_t getIndexValue() const;

    /**
     * Internal method.  Returns true if this rule matches the given
@ -252,8 +236,7 @@ public:
     * value.  If the rule contains only ante context, as in foo)>bar,
     * then it will match any key.
     */
-    UBool matchesIndexValue(uint8_t v,
-                             const TransliterationRuleData& data) const;
+    UBool matchesIndexValue(uint8_t v) const;

    /**
     * Return true if this rule masks another rule.  If r1 masks r2 then
@ -264,88 +247,35 @@ public:
    virtual UBool masks(const TransliterationRule& r2) const;

    /**
-     * Return true if this rule matches the given text.
-     * @param text the text, both translated and untranslated
-     * @param start the beginning index, inclusive; <code>0 <= start
-     * <= limit</code>.
-     * @param limit the ending index, exclusive; <code>start <= limit
-     * <= text.length()</code>.
-     * @param cursor position at which to translate next, representing offset
-     * into text.  This value must be between <code>start</code> and
-     * <code>limit</code>.
+     * Attempt a match and replacement at the given position.  Return
+     * the degree of match between this rule and the given text.  The
+     * degree of match may be mismatch, a partial match, or a full
+     * match.  A mismatch means at least one character of the text
+     * does not match the context or key.  A partial match means some
+     * context and key characters match, but the text is not long
+     * enough to match all of them.  A full match means all context
+     * and key characters match.
+     * 
+     * If a full match is obtained, perform a replacement, update pos,
+     * and return U_MATCH.  Otherwise both text and pos are unchanged.
+     * 
+     * @param text the text
+     * @param pos the position indices
+     * @param incremental if TRUE, test for partial matches that may
+     * be completed by additional text inserted at pos.limit.
+     * @return one of <code>U_MISMATCH</code>,
+     * <code>U_PARTIAL_MATCH</code>, or <code>U_MATCH</code>.  If
+     * incremental is FALSE then U_PARTIAL_MATCH will not be returned.
     */
-    virtual UBool matches(const Replaceable& text,
-                          const UTransPosition& pos,
-                          const TransliterationRuleData& data) const;
-
-    /**
-     * Return the degree of match between this rule and the given text.  The
-     * degree of match may be mismatch, a partial match, or a full match.  A
-     * mismatch means at least one character of the text does not match the
-     * context or key.  A partial match means some context and key characters
-     * match, but the text is not long enough to match all of them.  A full
-     * match means all context and key characters match.
-     * @param text the text, both translated and untranslated
-     * @param start the beginning index, inclusive; <code>0 <= start
-     * <= limit</code>.
-     * @param limit the ending index, exclusive; <code>start <= limit
-     * <= text.length()</code>.
-     * @param cursor position at which to translate next, representing offset
-     * into text.  This value must be between <code>start</code> and
-     * <code>limit</code>.
-     * @return one of <code>MISMATCH</code>, <code>PARTIAL_MATCH</code>, or
-     * <code>FULL_MATCH</code>.
-     * @see #MISMATCH
-     * @see #PARTIAL_MATCH
-     * @see #FULL_MATCH
-     */
-    virtual int32_t getMatchDegree(const Replaceable& text,
-                                   const UTransPosition& pos,
-                                   const TransliterationRuleData& data) const;
-
-    /**
-     * Return the number of characters of the text that match this rule.  If
-     * there is a mismatch, return -1.  If the text is not long enough to match
-     * any characters, return 0.
-     * @param text the text, both translated and untranslated
-     * @param start the beginning index, inclusive; <code>0 <= start
-     * <= limit</code>.
-     * @param limit the ending index, exclusive; <code>start <= limit
-     * <= text.length()</code>.
-     * @param cursor position at which to translate next, representing offset
-     * into text.  This value must be between <code>start</code> and
-     * <code>limit</code>.
-     * @param data a dictionary of variables mapping <code>Character</code>
-     * to <code>UnicodeSet</code>
-     * @return -1 if there is a mismatch, 0 if the text is not long enough to
-     * match any characters, otherwise the number of characters of text that
-     * match this rule.
-     */
-    virtual int32_t getRegionMatchLength(const Replaceable& text,
-                                         const UTransPosition& pos,
-                                         const TransliterationRuleData& data) const;
-
-    /**
-     * Return true if the given key matches the given text.  This method
-     * accounts for the fact that the key character may represent a character
-     * set.  Note that the key and text characters may not be interchanged
-     * without altering the results.
-     * @param keyChar a character in the match key
-     * @param textChar a character in the text being transliterated
-     * @param data a dictionary of variables mapping <code>Character</code>
-     * to <code>UnicodeSet</code>
-     */
-    virtual UBool charMatches(UChar keyChar, const Replaceable& textChar,
-                              int32_t index,
-                              const UTransPosition& pos,
-                              const TransliterationRuleData& data) const;
+    UMatchDegree matchAndReplace(Replaceable& text,
+                                 UTransPosition& pos,
+                                 UBool incremental) const;

    /**
     * Create a rule string that represents this rule object.  Append
     * it to the given string.
     */
    virtual UnicodeString& toRule(UnicodeString& pat,
-                                  const TransliterationRuleData& data,
                                  UBool escapeUnprintable) const;
 private:

--- a/icu4c/source/i18n/rbt_set.cpp
+++ b/icu4c/source/i18n/rbt_set.cpp
@ -74,7 +74,7 @@ void TransliterationRuleSet::addRule(TransliterationRule* adoptedRule,
    ruleVector->addElement(adoptedRule);

    int32_t len;
-    if ((len = adoptedRule->getAnteContextLength()) > maxContextLength) {
+    if ((len = adoptedRule->getContextLength()) > maxContextLength) {
        maxContextLength = len;
    }

@ -92,8 +92,7 @@ void TransliterationRuleSet::addRule(TransliterationRule* adoptedRule,
 * That is, <code>freeze()</code> may be called multiple times,
 * although for optimal performance it shouldn't be.
 */
-void TransliterationRuleSet::freeze(const TransliterationRuleData& data,
-                                    UErrorCode& status) {
+void TransliterationRuleSet::freeze(UErrorCode& status) {
    if (U_FAILURE(status)) {
        return;
    }
@ -124,7 +123,7 @@ void TransliterationRuleSet::freeze(const TransliterationRuleData& data,
    int16_t* indexValue = new int16_t[n];
    for (j=0; j<n; ++j) {
        TransliterationRule* r = (TransliterationRule*) ruleVector->elementAt(j);
-        indexValue[j] = r->getIndexValue(data);
+        indexValue[j] = r->getIndexValue();
    }
    for (x=0; x<256; ++x) {
        index[x] = v.size();
@ -139,7 +138,7 @@ void TransliterationRuleSet::freeze(const TransliterationRuleData& data,
                // matchesIndexValue check.  In practice this happens
                // rarely, so we seldom tread this code path.
                TransliterationRule* r = (TransliterationRule*) ruleVector->elementAt(j);
-                if (r->matchesIndexValue((uint8_t)x, data)) {
+                if (r->matchesIndexValue((uint8_t)x)) {
                    v.addElement(r);
                }
            }
@ -192,87 +191,40 @@ void TransliterationRuleSet::freeze(const TransliterationRuleData& data,
 }

 /**
- * Attempt to find a matching rule at the specified point in the text.
- * @param text the text, both translated and untranslated
- * @param start the beginning index, inclusive; <code>0 <= start
- * <= limit</code>.
- * @param limit the ending index, exclusive; <code>start <= limit
- * <= text.length()</code>.
- * @param cursor position at which to translate next, representing offset
- * into text.  This value must be between <code>start</code> and
- * <code>limit</code>.
- * @param data a dictionary mapping variables to the sets they
- * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
- * @return the matching rule, or null if none found.
+ * Transliterate the given text with the given UTransPosition
+ * indices.  Return TRUE if the transliteration should continue
+ * or FALSE if it should halt (because of a U_PARTIAL_MATCH match).
+ * Note that FALSE is only ever returned if isIncremental is TRUE.
+ * @param text the text to be transliterated
+ * @param pos the position indices, which will be updated
+ * @param incremental if TRUE, assume new text may be inserted
+ * at index.limit, and return FALSE if thre is a partial match.
+ * @return TRUE unless a U_PARTIAL_MATCH has been obtained,
+ * indicating that transliteration should stop until more text
+ * arrives.
 */
-TransliterationRule*
-TransliterationRuleSet::findMatch(const Replaceable& text,
-                                  const UTransPosition& pos,
-                                  const TransliterationRuleData& data) const {
-    /* We only need to check our indexed bin of the rule table,
-     * based on the low byte of the first key character.
-     */
-    int16_t x = (int16_t) (text.charAt(pos.start) & 0xFF);
-    for (int32_t i=index[x]; i<index[x+1]; ++i) {
-        if (rules[i]->matches(text, pos, data)) {
-            return rules[i];
+UBool TransliterationRuleSet::transliterate(Replaceable& text,
+                                            UTransPosition& pos,
+                                            UBool incremental) {
+    int16_t indexByte = (int16_t) (text.char32At(pos.start) & 0xFF);
+    for (int32_t i=index[indexByte]; i<index[indexByte+1]; ++i) {
+        UMatchDegree m = rules[i]->matchAndReplace(text, pos, incremental);
+        switch (m) {
+        case U_MATCH:
+            return TRUE;
+        case U_PARTIAL_MATCH:
+            return FALSE;
        }
    }
-    return NULL;
-}
-
-/**
- * Attempt to find a matching rule at the specified point in the text.
- * Unlike <code>findMatch()</code>, this method does an incremental match.
- * An incremental match requires that there be no partial matches that might
- * pre-empt the full match that is found.  If there are partial matches,
- * then null is returned.  A non-null result indicates that a full match has
- * been found, and that it cannot be pre-empted by a partial match
- * regardless of what additional text is added to the translation buffer.
- * @param text the text, both translated and untranslated
- * @param start the beginning index, inclusive; <code>0 <= start
- * <= limit</code>.
- * @param limit the ending index, exclusive; <code>start <= limit
- * <= text.length()</code>.
- * @param cursor position at which to translate next, representing offset
- * into text.  This value must be between <code>start</code> and
- * <code>limit</code>.
- * @param data a dictionary mapping variables to the sets they
- * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
- * @param partial output parameter.  <code>partial[0]</code> is set to
- * true if a partial match is returned.
- * @return the matching rule, or null if none found, or if the text buffer
- * does not have enough text yet to unambiguously match a rule.
- */
-TransliterationRule*
-TransliterationRuleSet::findIncrementalMatch(const Replaceable& text,
-                                             const UTransPosition& pos,
-                                             const TransliterationRuleData& data,
-                                             UBool& isPartial) const {
-
-    /* We only need to check our indexed bin of the rule table,
-     * based on the low byte of the first key character.
-     */
-    isPartial = FALSE;
-    int16_t x = (int16_t) (text.charAt(pos.start) & 0xFF);
-    for (int32_t i=index[x]; i<index[x+1]; ++i) {
-        int32_t match = rules[i]->getMatchDegree(text, pos, data);
-        switch (match) {
-        case TransliterationRule::FULL_MATCH:
-            return rules[i];
-        case TransliterationRule::PARTIAL_MATCH:
-            isPartial = TRUE;
-            return NULL;
-        }
-    }
-    return NULL;
+    // No match or partial match from any rule
+    ++pos.start;
+    return TRUE;
 }

 /**
 * Create rule strings that represents this rule set.
 */
 UnicodeString& TransliterationRuleSet::toRules(UnicodeString& ruleSource,
-                                               const TransliterationRuleData& data,
                                               UBool escapeUnprintable) const {
    int32_t i;
    int32_t count = index[256];
@ -281,7 +233,7 @@ UnicodeString& TransliterationRuleSet::toRules(UnicodeString& ruleSource,
        if (i != 0) {
            ruleSource.append((UChar) 0x000A /*\n*/);
        }
-        rules[i]->toRule(ruleSource, data, escapeUnprintable);
+        rules[i]->toRule(ruleSource, escapeUnprintable);
    }
    return ruleSource;
 }
--- a/icu4c/source/i18n/rbt_set.h
+++ b/icu4c/source/i18n/rbt_set.h
@ -18,15 +18,7 @@ class UnicodeFilter;
 class UnicodeString;

 /**
- * A set of rules for a <code>RuleBasedTransliterator</code>.  This set encodes
- * the transliteration in one direction from one set of characters or short
- * strings to another.  A <code>RuleBasedTransliterator</code> consists of up to
- * two such sets, one for the forward direction, and one for the reverse.
- *
- * <p>A <code>TransliterationRuleSet</code> has one important operation, that of
- * finding a matching rule at a given point in the text.  This is accomplished
- * by the <code>findMatch()</code> method.
- *
+ * A set of rules for a <code>RuleBasedTransliterator</code>.
 * @author Alan Liu
 */
 class TransliterationRuleSet {
@ -98,59 +90,24 @@ public:
     * That is, <code>freeze()</code> may be called multiple times,
     * although for optimal performance it shouldn't be.
     */
-    virtual void freeze(const TransliterationRuleData& data,
-                        UErrorCode& status);
-
+    virtual void freeze(UErrorCode& status);
+    
    /**
-     * Attempt to find a matching rule at the specified point in the text.
-     * @param text the text, both translated and untranslated
-     * @param start the beginning index, inclusive; <code>0 <= start
-     * <= limit</code>.
-     * @param limit the ending index, exclusive; <code>start <= limit
-     * <= text.length()</code>.
-     * @param cursor position at which to translate next, representing offset
-     * into text.  This value must be between <code>start</code> and
-     * <code>limit</code>.
-     * @param data a dictionary mapping variables to the sets they
-     * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
-     * <tt>null</tt> then no filtering is applied.
-     * @return the matching rule, or null if none found.
+     * Transliterate the given text with the given UTransPosition
+     * indices.  Return TRUE if the transliteration should continue
+     * or FALSE if it should halt (because of a U_PARTIAL_MATCH match).
+     * Note that FALSE is only ever returned if isIncremental is TRUE.
+     * @param text the text to be transliterated
+     * @param index the position indices, which will be updated
+     * @param isIncremental if TRUE, assume new text may be inserted
+     * at index.limit, and return FALSE if thre is a partial match.
+     * @return TRUE unless a U_PARTIAL_MATCH has been obtained,
+     * indicating that transliteration should stop until more text
+     * arrives.
     */
-    virtual TransliterationRule* findMatch(const Replaceable& text,
-                                           const UTransPosition& pos,
-                                           const TransliterationRuleData& data) const;
-
-    /**
-     * Attempt to find a matching rule at the specified point in the text.
-     * Unlike <code>findMatch()</code>, this method does an incremental match.
-     * An incremental match requires that there be no partial matches that might
-     * pre-empt the full match that is found.  If there are partial matches,
-     * then null is returned.  A non-null result indicates that a full match has
-     * been found, and that it cannot be pre-empted by a partial match
-     * regardless of what additional text is added to the translation buffer.
-     * @param text the text, both translated and untranslated
-     * @param start the beginning index, inclusive; <code>0 <= start
-     * <= limit</code>.
-     * @param limit the ending index, exclusive; <code>start <= limit
-     * <= text.length()</code>.
-     * @param cursor position at which to translate next, representing offset
-     * into text.  This value must be between <code>start</code> and
-     * <code>limit</code>.
-     * @param data a dictionary mapping variables to the sets they
-     * represent (maps <code>Character</code> to <code>UnicodeSet</code>)
-     * @param partial output parameter.  <code>partial[0]</code> is set to
-     * true if a partial match is returned.
-     * @param filter the filter.  Any character for which
-     * <tt>filter.isIn()</tt> returns <tt>false</tt> will not be
-     * altered by this transliterator.  If <tt>filter</tt> is
-     * <tt>null</tt> then no filtering is applied.
-     * @return the matching rule, or null if none found, or if the text buffer
-     * does not have enough text yet to unambiguously match a rule.
-     */
-    virtual TransliterationRule* findIncrementalMatch(const Replaceable& text,
-                                              const UTransPosition& pos,
-                                              const TransliterationRuleData& data,
-                                              UBool& isPartial) const;
+    UBool transliterate(Replaceable& text,
+                        UTransPosition& index,
+                        UBool isIncremental);

    /**
     * Create rule strings that represents this rule set.
@ -158,7 +115,6 @@ public:
     * contents will be deleted.
     */
    virtual UnicodeString& toRules(UnicodeString& result,
-                                   const TransliterationRuleData& data,
                                   UBool escapeUnprintable) const;
 };
 #endif
--- a/icu4c/source/i18n/symtable.h
+++ b/icu4c/source/i18n/symtable.h
@ -44,7 +44,7 @@ public:
     * Lookup the UnicodeSet associated with the given character, and
     * return it.  Return <tt>null</tt> if not found.
     */
-    virtual const UnicodeSet* lookupSet(UChar ch) const = 0;
+    virtual const UnicodeSet* lookupSet(UChar32 ch) const = 0;

    /**
     * Parse a symbol reference name from the given string, starting
--- a/icu4c/source/i18n/translit.cpp
+++ b/icu4c/source/i18n/translit.cpp
@ -281,10 +281,20 @@ void Transliterator::transliterate(Replaceable& text,
 */
 void Transliterator::transliterate(Replaceable& text,
                                   UTransPosition& index,
-                                   UChar insertion,
+                                   UChar32 insertion,
                                   UErrorCode& status) const {
    UnicodeString str(insertion);
-    _transliterate(text, index, &str, status);
+    if (UTF_IS_LEAD(insertion)) {
+        // Oops, the caller passed us a single lead surrogate.  In
+        // general, we don't support this, but we'll do the caller a
+        // favor in the special case of LEAD followed by TRAIL
+        // insertion.  Anything else won't work.
+        text.handleReplaceBetween(index.limit, index.limit, str);
+        ++index.limit;
+        ++index.contextLimit;
+    } else {
+        _transliterate(text, index, &str, status);
+    }
 }

 /**
@ -351,8 +361,18 @@ void Transliterator::_transliterate(Replaceable& text,

    filteredTransliterate(text, index, TRUE);

-    index.contextStart = uprv_max(index.start - getMaximumContextLength(),
-                           originalStart);
+    // The purpose of the code below is to keep the context small
+    // while doing incremental transliteration.  When part of the left
+    // context (between contextStart and start) is no longer needed,
+    // we try to advance contextStart past that portion.  We use the
+    // maximum context length to do so.
+    int32_t newCS = index.start;
+    int32_t n = getMaximumContextLength();
+    while (newCS > originalStart && n-- > 0) {
+        --newCS;
+        newCS -= UTF_CHAR_LENGTH(text.char32At(newCS)) - 1;
+    }
+    index.contextStart = uprv_max(newCS, originalStart);
 }

 /**
--- a/icu4c/source/i18n/unifltlg.cpp
+++ b/icu4c/source/i18n/unifltlg.cpp
@ -21,7 +21,7 @@ public:
    NullFilter(UBool r) { result = r; }
    NullFilter(const NullFilter& f) : UnicodeFilter(f) { result = f.result; }
    virtual ~NullFilter() {}
-    virtual UBool contains(UChar /*c*/) const { return result; }
+    virtual UBool contains(UChar32 /*c*/) const { return result; }
    virtual UnicodeFilter* clone() const { return new NullFilter(*this); }
 };

@ -31,7 +31,7 @@ public:
    UnicodeNotFilter(UnicodeFilter* adopted);
    UnicodeNotFilter(const UnicodeNotFilter&);
    virtual ~UnicodeNotFilter();
-    virtual UBool contains(UChar c) const;
+    virtual UBool contains(UChar32 c) const;
    virtual UnicodeFilter* clone() const;
 };

@ -39,7 +39,7 @@ UnicodeNotFilter::UnicodeNotFilter(UnicodeFilter* adopted) : filt(adopted) {}
 UnicodeNotFilter::UnicodeNotFilter(const UnicodeNotFilter& f)
 : UnicodeFilter(f), filt(f.filt->clone()) {}
 UnicodeNotFilter::~UnicodeNotFilter() { delete filt; }
-UBool UnicodeNotFilter::contains(UChar c) const { return !filt->contains(c); }
+UBool UnicodeNotFilter::contains(UChar32 c) const { return !filt->contains(c); }
 UnicodeFilter* UnicodeNotFilter::clone() const { return new UnicodeNotFilter(*this); }

 /**
@ -61,7 +61,7 @@ public:
    UnicodeAndFilter(UnicodeFilter* adopted1, UnicodeFilter* adopted2);
    UnicodeAndFilter(const UnicodeAndFilter&);
    virtual ~UnicodeAndFilter();
-    virtual UBool contains(UChar c) const;
+    virtual UBool contains(UChar32 c) const;
    virtual UnicodeFilter* clone() const;
 };

@ -69,7 +69,7 @@ UnicodeAndFilter::UnicodeAndFilter(UnicodeFilter* f1, UnicodeFilter* f2) : filt1
 UnicodeAndFilter::UnicodeAndFilter(const UnicodeAndFilter& f)
 : UnicodeFilter(f), filt1(f.filt1->clone()), filt2(f.filt2->clone()) {}
 UnicodeAndFilter::~UnicodeAndFilter() { delete filt1; delete filt2; }
-UBool UnicodeAndFilter::contains(UChar c) const { return filt1->contains(c) && filt2->contains(c); }
+UBool UnicodeAndFilter::contains(UChar32 c) const { return filt1->contains(c) && filt2->contains(c); }
 UnicodeFilter* UnicodeAndFilter::clone() const { return new UnicodeAndFilter(*this); }

 /**
@ -99,7 +99,7 @@ public:
    UnicodeOrFilter(UnicodeFilter* adopted1, UnicodeFilter* adopted2);
    UnicodeOrFilter(const UnicodeOrFilter&);
    virtual ~UnicodeOrFilter();
-    virtual UBool contains(UChar c) const;
+    virtual UBool contains(UChar32 c) const;
    virtual UnicodeFilter* clone() const;
 };

@ -107,7 +107,7 @@ UnicodeOrFilter::UnicodeOrFilter(UnicodeFilter* f1, UnicodeFilter* f2) : filt1(f
 UnicodeOrFilter::UnicodeOrFilter(const UnicodeOrFilter& f)
 : UnicodeFilter(f), filt1(f.filt1->clone()), filt2(f.filt2->clone()) {}
 UnicodeOrFilter::~UnicodeOrFilter() { delete filt1; delete filt2; }
-UBool UnicodeOrFilter::contains(UChar c) const { return filt1->contains(c) || filt2->contains(c); }
+UBool UnicodeOrFilter::contains(UChar32 c) const { return filt1->contains(c) || filt2->contains(c); }
 UnicodeFilter* UnicodeOrFilter::clone() const { return new UnicodeOrFilter(*this); }

 /**
--- a/icu4c/source/i18n/uniset.cpp
+++ b/icu4c/source/i18n/uniset.cpp
@ -542,17 +542,6 @@ UBool UnicodeSet::contains(UChar32 c) const {
    return ((i & 1) != 0); // return true if odd
 }

-/**
- * Implement UnicodeFilter:
- * Returns <tt>true</tt> if this set contains the specified char.
- *
- * @return <tt>true</tt> if this set contains the specified char.
- * @draft
- */
-UBool UnicodeSet::contains(UChar c) const {
-    return contains((UChar32) c);
-}
-
 /**
 * Returns <tt>true</tt> if this set contains any character whose low byte
 * is the given value.  This is used by <tt>RuleBasedTransliterator</tt> for
@ -581,6 +570,24 @@ UBool UnicodeSet::containsIndexValue(uint8_t v) const {
    return FALSE;
 }

+/**
+ * Implementation of UnicodeMatcher::matches().
+ */
+UMatchDegree UnicodeSet::matches(const Replaceable& text,
+                                 int32_t& offset,
+                                 int32_t limit,
+                                 UBool incremental) const {
+    if (offset == limit) {
+        if (contains(TransliterationRule::ETHER)) {
+            return incremental ? U_PARTIAL_MATCH : U_MATCH;
+        } else {
+            return U_MISMATCH;
+        }
+    } else {
+        return UnicodeFilter::matches(text, offset, limit, incremental);
+    }
+}
+
 /**
 * Adds the specified range to this set if it is not already
 * present.  If this set already contains the specified range,
@ -895,7 +902,8 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
    UBool invert = FALSE;
    clear();

-    int32_t lastChar = -1; // This is either a char (0..FFFF) or -1
+    const UChar32 NONE = (UChar32) -1;
+    UChar32 lastChar = NONE; // This is either a char (0..10FFFF) or NONE
    UChar lastOp = 0;

    /* This loop iterates over the characters in the pattern.  We start at
@ -916,8 +924,9 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
    // mode 1: '[' seen; if next is '^' or ':' then special
    // mode 2: '[' '^'? seen; parse pattern and close with ']'
    // mode 3: '[:' seen; parse category and close with ':]'
+    // mode 4: Pattern closed cleanly
    int8_t mode = 0;
-    int32_t openPos = 0; // offset to opening '['
+    int32_t colonPos = 0; // Expected pos of ':' in '[:'
    int32_t i = pos.getIndex();
    int32_t limit = pattern.length();
    UnicodeSet nestedAux;
@ -930,7 +939,8 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
    const UnicodeString* varValueBuffer = NULL;
    int32_t ivarValueBuffer = 0;
    int32_t anchor = 0;
-    for (; i<limit; i+=((varValueBuffer==NULL)?1:0)) {
+    UChar32 c;
+    while (i<limit) {
        /* If the next element is a single character, c will be set to it,
         * and nestedSet will be null.  In this case isLiteral indicates
         * whether the character should assume special meaning if it has
@ -941,23 +951,25 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
         */
        nestedSet = NULL;
        UBool isLiteral = FALSE;
-        UChar c;
        if (varValueBuffer != NULL) {
            if (ivarValueBuffer < varValueBuffer->length()) {
-                c = varValueBuffer->charAt(ivarValueBuffer++);
+                c = varValueBuffer->char32At(ivarValueBuffer);
+                ivarValueBuffer += UTF_CHAR_LENGTH(c);
                nestedSet = symbols->lookupSet(c); // may be NULL
                nestedPatDone = FALSE;
            } else {
                varValueBuffer = NULL;
-                c = pattern.charAt(i);
+                c = pattern.char32At(i);
+                i += UTF_CHAR_LENGTH(c);
            }
        } else {
-            c = pattern.charAt(i);
+            c = pattern.char32At(i);
+            i += UTF_CHAR_LENGTH(c);
        }

        // Ignore whitespace.  This is not Unicode whitespace, but Java
        // whitespace, a subset of Unicode whitespace.
-        if (Unicode::isWhitespace(c)) {
+        if (u_isspace(c)) {
            continue;
        }

@ -971,7 +983,7 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
        case 0:
            if (c == SET_OPEN) {
                mode = 1; // Next look for '^' or ':'
-                openPos = i;
+                colonPos = i; // Expect ':' at next offset
                continue;
            } else {
                // throw new IllegalArgumentException("Missing opening '['");
@ -986,9 +998,10 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
                newPat.append(c);
                continue; // Back to top to fetch next character
            case COLON:
-                if (i == openPos+1) {
-                    // '[:' cannot have whitespace in it
-                    --i;
+                // '[:' cannot have whitespace in it.  'i' has already
+                // been advanced.
+                if (i-1 == colonPos) {
+                    --i; // Backup to the '['
                    c = SET_OPEN;
                    mode = 3;
                    // Fall through and parse category using the same
@ -1018,15 +1031,13 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
             * interpret '\\uxxxx' Unicode escapes here (as literals).
             */
            if (c == BACKSLASH) {
-                ++i; // Advance past '\\'
                UChar32 escaped = pattern.unescapeAt(i);
                if (escaped == (UChar32) -1) {
                    status = U_ILLEGAL_ARGUMENT_ERROR;
                    return;
                }
                isLiteral = TRUE;
-                --i; // Move i back to last parsed character
-                c = (UChar) escaped;
+                c = escaped;
            }

            /* Parse variable references.  These are treated as literals.  If a
@ -1036,7 +1047,7 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
             * Set variables are only looked up if varCharToSet is not null.
             */
            else if (symbols != NULL && !isLiteral && c == SymbolTable::SYMBOL_REF) {
-                pos.setIndex(++i);
+                pos.setIndex(i);
                UnicodeString name = symbols->parseReference(pattern, pos, limit);
                if (name.length() != 0) {
                    varValueBuffer = symbols->lookup(name);
@ -1052,7 +1063,6 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
                    // Got a null; this means we have an isolated $.
                    // Tentatively assume this is an anchor.
                    anchor = 1;
-                    --i; // Back up so loop increment works properly
                }
                continue; // Back to the top to get varValueBuffer[0]
            }
@ -1069,9 +1079,8 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
                nestedPatStart = newPat.length();

                // Handle "[:...:]", representing a character category
-                UChar d = charAfter(pattern, i);
-                if (d == COLON) {
-                    i += 2;
+                if (i < pattern.length() && pattern.charAt(i) == COLON) {
+                    ++i;
                    int32_t j = pattern.indexOf(CATEGORY_CLOSE, i);
                    if (j < 0) {
                        // throw new IllegalArgumentException("Missing \":]\"");
@ -1086,7 +1095,7 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
                    if (U_FAILURE(status)) {
                        return;
                    }
-                    i = j+1; // Make i point to ']' in ":]"
+                    i = j+2; // Advance i past ":]"

                    // Use a rebuilt pattern.  If we are top level,
                    // then there is already a SET_OPEN in newPat, and
@ -1105,11 +1114,13 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
                        // loop.  This is one of 2 ways we leave this
                        // loop if the pattern is well-formed.
                        *this = *nestedSet;
+                        mode = 4;
                        break;
                    }
                } else {
                    // Recurse to get the pairs for this nested set.
-                    pos.setIndex(i);
+                    // Backup i to '['.
+                    pos.setIndex(--i);
                    switch (lastOp) {
                    case HYPHEN:
                    case INTERSECTION:
@ -1122,7 +1133,7 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
                    if (U_FAILURE(status)) {
                        return;
                    }
-                    i = pos.getIndex() - 1; // - 1 to point at ']'
+                    i = pos.getIndex();
                }
            }
        }
@ -1136,7 +1147,7 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
         * ']' have special meanings.
         */
        if (nestedSet != NULL) {
-            if (lastChar >= 0) {
+            if (lastChar != NONE) {
                if (lastOp != 0) {
                    // throw new IllegalArgumentException("Illegal rhs for " + lastChar + lastOp);
                    status = U_ILLEGAL_ARGUMENT_ERROR;
@ -1154,7 +1165,7 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
                } else {
                    _appendToPat(newPat, lastChar, FALSE);
                }
-                lastChar = -1;
+                lastChar = NONE;
            }
            switch (lastOp) {
            case HYPHEN:
@ -1193,9 +1204,11 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
                newPat.append((UChar)SymbolTable::SYMBOL_REF);
                add(TransliterationRule::ETHER);
            }
+            mode = 4;
            break;
        } else if (lastOp == 0 && !isLiteral && (c == HYPHEN || c == INTERSECTION)) {
-            lastOp = c;
+            // assert(c <= 0xFFFF);
+            lastOp = (UChar) c;
        } else if (lastOp == HYPHEN) {
            if (lastChar >= c) {
                // Don't allow redundant (a-a) or empty (b-a) ranges;
@ -1210,14 +1223,14 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
            newPat.append(HYPHEN);
            _appendToPat(newPat, c, FALSE);
            lastOp = 0;
-            lastChar = -1;
+            lastChar = NONE;
        } else if (lastOp != 0) {
            // We have <set>&<char> or <char>&<char>
            // throw new IllegalArgumentException("Unquoted " + lastOp);
            status = U_ILLEGAL_ARGUMENT_ERROR;
            return;
        } else {
-            if (lastChar >= 0) {
+            if (lastChar != NONE) {
                // We have <char><char>
                add(lastChar, lastChar);
                _appendToPat(newPat, lastChar, FALSE);
@ -1226,7 +1239,7 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
        }
    }

-    if (lastChar >= 0) {
+    if (lastChar != NONE) {
        add(lastChar, lastChar);
        _appendToPat(newPat, lastChar, FALSE);
    }
@ -1252,19 +1265,13 @@ void UnicodeSet::_applyPattern(const UnicodeString& pattern,
        complement();
    }

-    /**
-     * i indexes the last character we parsed or is pattern.length().  In
-     * the latter case, we have run off the end without finding a closing
-     * ']'.  Otherwise, we know i < pattern.length(), and we set the
-     * ParsePosition to the next character to be parsed.
-     */
-    if (i == limit) {
+    if (mode != 4) {
        // throw new IllegalArgumentException("Missing ']'");
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }

-    pos.setIndex(i+1);
+    pos.setIndex(i);

    // Use the rebuilt pattern (newPat) only if necessary.  Prefer the
    // generated pattern.
@ -1393,14 +1400,6 @@ const UnicodeSet& UnicodeSet::getCategorySet(int8_t cat) {
 // Implementation: Utility methods
 //----------------------------------------------------------------

-/**
- * Returns the character after the given position, or '\uFFFE' if
- * there is none.
- */
-UChar UnicodeSet::charAfter(const UnicodeString& str, int32_t i) {
-    return ((++i) < str.length()) ? str.charAt(i) : (UChar)0xFFFE;
-}
-
 void UnicodeSet::ensureCapacity(int32_t newLen) {
    if (newLen <= capacity) return;
    capacity = newLen + GROW_EXTRA;
--- a/icu4c/source/test/intltest/hajatrts.cpp
+++ b/icu4c/source/test/intltest/hajatrts.cpp
@ -75,7 +75,7 @@ class TestHangulFilter : public UnicodeFilter {
    virtual UnicodeFilter* clone() const {
        return new TestHangulFilter(*this);
    }
-    virtual UBool contains(UChar c) const {
+    virtual UBool contains(UChar32 c) const {
       if(c == 0xae4c )
          return FALSE;
       else
--- a/icu4c/source/test/intltest/hxuntrts.cpp
+++ b/icu4c/source/test/intltest/hxuntrts.cpp
@ -59,7 +59,7 @@ class TestHexFilter : public UnicodeFilter {
    virtual UnicodeFilter* clone() const {
        return new TestHexFilter(*this);
    }
-    virtual UBool contains(UChar c) const {
+    virtual UBool contains(UChar32 c) const {
       if(c == 0x0061 || c == 0x0063 )
          return FALSE;
       else
--- a/icu4c/source/test/intltest/intltest.cpp
+++ b/icu4c/source/test/intltest/intltest.cpp
@ -314,14 +314,20 @@ IntlTest::prettify(const UnicodeString &source,
    target.remove();
    target += "\"";

-    for (i = 0; i < source.length(); i += 1)
+    for (i = 0; i < source.length(); )
    {
-        UChar ch = source[i];
+        UChar32 ch = source.char32At(i);
+        i += UTF_CHAR_LENGTH(ch);

        if (ch < 0x09 || (ch > 0x0A && ch < 0x20)|| ch > 0x7E)
        {
-            target += "\\u";
-            appendHex(ch, 4, target);
+            if (ch <= 0xFFFF) {
+                target += "\\u";
+                appendHex(ch, 4, target);
+            } else {
+                target += "\\U";
+                appendHex(ch, 8, target);
+            }
        }
        else
        {
@ -343,9 +349,10 @@ IntlTest::prettify(const UnicodeString &source, UBool parseBackslash)
    target.remove();
    target += "\"";

-    for (i = 0; i < source.length(); i += 1)
+    for (i = 0; i < source.length();)
    {
-        UChar ch = source[i];
+        UChar32 ch = source.char32At(i);
+        i += UTF_CHAR_LENGTH(ch);

        if (ch < 0x09 || (ch > 0x0A && ch < 0x20)|| ch > 0x7E)
        {
@ -365,8 +372,13 @@ IntlTest::prettify(const UnicodeString &source, UBool parseBackslash)
                    target.truncate(target.length() - 1);
                }
            }
-            target += "\\u";
-            appendHex(ch, 4, target);
+            if (ch <= 0xFFFF) {
+                target += "\\u";
+                appendHex(ch, 4, target);
+            } else {
+                target += "\\U";
+                appendHex(ch, 8, target);
+            }
        }
        else
        {
--- a/icu4c/source/test/intltest/jahatrts.cpp
+++ b/icu4c/source/test/intltest/jahatrts.cpp
@ -73,7 +73,7 @@ class TestJamoFilter : public UnicodeFilter {
    virtual UnicodeFilter* clone() const {
        return new TestJamoFilter(*this);
    }
-    virtual UBool contains(UChar c) const {
+    virtual UBool contains(UChar32 c) const {
       if(c == 0x1101 )
          return FALSE;
       else
--- a/icu4c/source/test/intltest/transapi.cpp
+++ b/icu4c/source/test/intltest/transapi.cpp
@ -618,7 +618,7 @@ class TestFilter1 : public UnicodeFilter {
    virtual UnicodeFilter* clone() const {
        return new TestFilter1(*this);
    }
-    virtual UBool contains(UChar c) const {
+    virtual UBool contains(UChar32 c) const {
       if(c==0x63 || c==0x61 || c==0x43 || c==0x41)
          return FALSE;
       else
@ -629,7 +629,7 @@ class TestFilter2 : public UnicodeFilter {
    virtual UnicodeFilter* clone() const {
        return new TestFilter2(*this);
    }
-    virtual UBool contains(UChar c) const {
+    virtual UBool contains(UChar32 c) const {
        if(c==0x65 || c==0x6c)
           return FALSE;
        else
@ -640,7 +640,7 @@ class TestFilter3 : public UnicodeFilter {
    virtual UnicodeFilter* clone() const {
        return new TestFilter3(*this);
    }
-    virtual UBool contains(UChar c) const {
+    virtual UBool contains(UChar32 c) const {
        if(c==0x6f || c==0x77)
           return FALSE;
        else
--- a/icu4c/source/test/intltest/transtst.cpp
+++ b/icu4c/source/test/intltest/transtst.cpp
@ -66,6 +66,8 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
        TESTCASE(30,TestCompoundFilter);
        TESTCASE(31,TestRemove);
        TESTCASE(32,TestToRules);
+        TESTCASE(33,TestContext);
+        TESTCASE(34,TestSupplemental);
        default: name = ""; break;
    }
 }
@ -152,7 +154,9 @@ void TransliteratorTest::TestSimpleRules(void) {
     */
    expect(UnicodeString("ab>x|y;", "") +
           "yc>z",
-           "eabcd", "exzd");        /* Another set of rules:
+           "eabcd", "exzd");
+
+    /* Another set of rules:
     *    1. ab>x|yzacw
     *    2. za>q
     *    3. qc>r
@ -476,7 +480,7 @@ class TestFilter : public UnicodeFilter {
    virtual UnicodeFilter* clone() const {
        return new TestFilter(*this);
    }
-    virtual UBool contains(UChar c) const {
+    virtual UBool contains(UChar32 c) const {
        return c != (UChar)0x0063 /*c*/;
    }
 };
@ -506,6 +510,12 @@ void TransliteratorTest::TestFiltering(void) {
 * Test anchors
 */
 void TransliteratorTest::TestAnchors(void) {
+    expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
+           "aaa",
+           "012");
+    expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
+           "aaa",
+           "012");
    expect(UnicodeString("^ab  > 01 ;"
           " ab  > |8 ;"
           "  b  > k ;"
@ -1451,18 +1461,44 @@ void TransliteratorTest::TestToRules(void) {
    }
 }

+void TransliteratorTest::TestContext() {
+    UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
+    expect("de > x; {d}e > y;",
+           "de",
+           "ye",
+           &pos);
+
+    expect("ab{c} > z;",
+           "xadabdabcy",
+           "xadabdabzy");
+}
+
+void TransliteratorTest::TestSupplemental() {
+    expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
+                                "a > $a; $s > i;"),
+           CharsToUnicodeString("ab\\U0001030Fx"),
+           CharsToUnicodeString("\\U00010300bix"));
+
+    expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
+                                "$b=[A-Z\\U00010400-\\U0001044D];"
+                                "($a)($b) > $2 $1;"),
+           CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
+           CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
+}
+
 //======================================================================
 // Support methods
 //======================================================================
 void TransliteratorTest::expect(const UnicodeString& rules,
                                const UnicodeString& source,
-                                const UnicodeString& expectedResult) {
+                                const UnicodeString& expectedResult,
+                                UTransPosition *pos) {
    UErrorCode status = U_ZERO_ERROR;
    Transliterator *t = new RuleBasedTransliterator("<ID>", rules, status);
    if (U_FAILURE(status)) {
        errln("FAIL: Transliterator constructor failed");
    } else {
-        expect(*t, source, expectedResult);
+        expect(*t, source, expectedResult, pos);
    }
    delete t;
 }
@ -1477,34 +1513,49 @@ void TransliteratorTest::expect(const Transliterator& t,

 void TransliteratorTest::expect(const Transliterator& t,
                                const UnicodeString& source,
-                                const UnicodeString& expectedResult) {
-    UnicodeString result(source);
-    t.transliterate(result);
-    expectAux(t.getID() + ":String", source, result, expectedResult);
+                                const UnicodeString& expectedResult,
+                                UTransPosition *pos) {
+    if (pos == 0) {
+        UnicodeString result(source);
+        t.transliterate(result);
+        expectAux(t.getID() + ":String", source, result, expectedResult);
+    }
+
+    UTransPosition index={0, 0, 0, 0};
+    if (pos != 0) {
+        index = *pos;
+    }

    UnicodeString rsource(source);
-    t.transliterate(rsource);
+    if (pos == 0) {
+        t.transliterate(rsource);
+    } else {
+        // Do it all at once -- below we do it incrementally
+        t.finishTransliteration(rsource, *pos);
+    }
    expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);

    // Test keyboard (incremental) transliteration -- this result
    // must be the same after we finalize (see below).
-    rsource.remove();
-    UTransPosition index={0, 0, 0, 0};
    UnicodeString log;
-
-    for (int32_t i=0; i<source.length(); ++i) {
-        if (i != 0) {
-            log.append(" + ");
-        }
-        log.append(source.charAt(i)).append(" -> ");
+    rsource.remove();
+    if (pos != 0) {
+        rsource = source;
+        formatInput(log, rsource, index);
+        log.append(" -> ");
        UErrorCode status = U_ZERO_ERROR;
-        t.transliterate(rsource, index, source.charAt(i), status);
-        // Append the string buffer with a vertical bar '|' where
-        // the committed index is.
-        UnicodeString left, right;
-        rsource.extractBetween(0, index.start, left);
-        rsource.extractBetween(index.start, rsource.length(), right);
-        log.append(left).append((UChar)PIPE).append(right);
+        t.transliterate(rsource, index, status);
+        formatInput(log, rsource, index);
+    } else {
+        for (int32_t i=0; i<source.length(); ++i) {
+            if (i != 0) {
+                log.append(" + ");
+            }
+            log.append(source.charAt(i)).append(" -> ");
+            UErrorCode status = U_ZERO_ERROR;
+            t.transliterate(rsource, index, source.charAt(i), status);
+            formatInput(log, rsource, index);
+        }
    }
    
    // As a final step in keyboard transliteration, we must call
@ -1518,6 +1569,41 @@ void TransliteratorTest::expect(const Transliterator& t,
              expectedResult);
 }

+/**
+ * @param appendTo result is appended to this param.
+ * @param input the string being transliterated
+ * @param pos the index struct
+ */
+UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
+                                               const UnicodeString& input,
+                                               const UTransPosition& pos) {
+    // Output a string of the form aaa{bbb|ccc|ddd}eee, where
+    // the {} indicate the context start and limit, and the ||
+    // indicate the start and limit.
+    if (0 <= pos.contextStart &&
+        pos.contextStart <= pos.start &&
+        pos.start <= pos.limit &&
+        pos.limit <= pos.contextLimit &&
+        pos.contextLimit <= input.length()) {
+
+        UnicodeString a, b, c, d, e;
+        input.extractBetween(0, pos.contextStart, a);
+        input.extractBetween(pos.contextStart, pos.start, b);
+        input.extractBetween(pos.start, pos.limit, c);
+        input.extractBetween(pos.limit, pos.contextLimit, d);
+        input.extractBetween(pos.contextLimit, input.length(), e);
+        appendTo.append(a).append((UChar)123/*{*/).append(b).
+            append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
+            append((UChar)125/*}*/).append(e);
+    } else {
+        appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
+                        pos.contextStart + ", s=" + pos.start + ", l=" +
+                        pos.limit + ", cl=" + pos.contextLimit + "} on " +
+                        input);
+    }
+    return appendTo;
+}
+
 void TransliteratorTest::expectAux(const UnicodeString& tag,
                                   const UnicodeString& source,
                                   const UnicodeString& result,
--- a/icu4c/source/test/intltest/transtst.h
+++ b/icu4c/source/test/intltest/transtst.h
@ -11,6 +11,7 @@
 #define TRANSTST_H

 #include "unicode/utypes.h"
+#include "unicode/translit.h"
 #include "intltest.h"

 class Transliterator;
@ -167,13 +168,18 @@ class TransliteratorTest : public IntlTest {

    void TestToRules(void);

+    void TestContext(void);
+
+    void TestSupplemental(void);
+
    //======================================================================
    // Support methods
    //======================================================================
 protected:
    void expect(const UnicodeString& rules,
                const UnicodeString& source,
-                const UnicodeString& expectedResult);
+                const UnicodeString& expectedResult,
+                UTransPosition *pos=0);

    void expect(const Transliterator& t,
                const UnicodeString& source,
@ -182,7 +188,8 @@ class TransliteratorTest : public IntlTest {
    
    void expect(const Transliterator& t,
                const UnicodeString& source,
-                const UnicodeString& expectedResult);
+                const UnicodeString& expectedResult,
+                UTransPosition *pos=0);
    
    void expectAux(const UnicodeString& tag,
                   const UnicodeString& source,
@ -192,6 +199,10 @@ class TransliteratorTest : public IntlTest {
    virtual void expectAux(const UnicodeString& tag,
                   const UnicodeString& summary, UBool pass,
                   const UnicodeString& expectedResult);
+
+    static UnicodeString& formatInput(UnicodeString &appendTo,
+                                      const UnicodeString& input,
+                                      const UTransPosition& pos);
 };

 #endif
--- a/icu4c/source/test/intltest/ufltlgts.cpp
+++ b/icu4c/source/test/intltest/ufltlgts.cpp
@ -39,7 +39,7 @@ class Filter1: public UnicodeFilter{
    virtual UnicodeFilter* clone() const{
        return new Filter1(*this);
    }
-    virtual UBool contains(UChar c) const {
+    virtual UBool contains(UChar32 c) const {
        if(c == 0x0061 || c == 0x0041 || c == 0x0063 || c == 0x0043)
            return FALSE;
        else
@ -50,7 +50,7 @@ class Filter2: public UnicodeFilter{
    virtual UnicodeFilter* clone() const{
        return new Filter2(*this);
    }
-    virtual UBool contains(UChar c) const {
+    virtual UBool contains(UChar32 c) const {
        if(c == 0x0079 || c == 0x0059 || c == 0x007a || c == 0x005a  || c == 0x0061 || c == 0x0063)
            return FALSE;
        else
--- a/icu4c/source/test/intltest/unhxtrts.cpp
+++ b/icu4c/source/test/intltest/unhxtrts.cpp
@ -71,7 +71,7 @@ class TestUniFilter : public UnicodeFilter {
    virtual UnicodeFilter* clone() const {
        return new TestUniFilter(*this);
    }
-    virtual UBool contains(UChar c) const {
+    virtual UBool contains(UChar32 c) const {
       if(c==0x0063 || c==0x0061 || c==0x0043 || c==0x0041)
          return FALSE;
       else