From 16fcadebb01421ffb43a29d7d5a6a7d115db74a5 Mon Sep 17 00:00:00 2001
From: Alan Liu <alansliu@gmail.com>
Date: Tue, 11 Dec 2001 17:45:13 +0000
Subject: [PATCH] ICU-1591 clean up TransliterationRule

X-SVN-Rev: 7342
---
 icu4c/source/i18n/strmatch.cpp                |   2 +-
 icu4j/src/com/ibm/icu/text/StringMatcher.java |   6 +-
 .../com/ibm/icu/text/TransliterationRule.java | 235 ++++++++----------
 icu4j/src/com/ibm/text/StringMatcher.java     |   6 +-
 .../src/com/ibm/text/TransliterationRule.java | 235 ++++++++----------
 5 files changed, 217 insertions(+), 267 deletions(-)
diff --git a/icu4c/source/i18n/strmatch.cpp b/icu4c/source/i18n/strmatch.cpp
index 4884521c2f6..5d4cb9afe38 100644
--- a/icu4c/source/i18n/strmatch.cpp
+++ b/icu4c/source/i18n/strmatch.cpp
@@ -63,7 +63,7 @@ UMatchDegree StringMatcher::matches(const Replaceable& text,
             UChar keyChar = pattern.charAt(i);
             UnicodeMatcher* subm = data.lookup(keyChar);
             if (subm == 0) {
-                if (cursor >= limit &&
+                if (cursor > limit &&
                     keyChar == text.charAt(cursor)) {
                     --cursor;
                 } else {
diff --git a/icu4j/src/com/ibm/icu/text/StringMatcher.java b/icu4j/src/com/ibm/icu/text/StringMatcher.java
index a5faf3d5ff7..c14e4e8e2ee 100755
--- a/icu4j/src/com/ibm/icu/text/StringMatcher.java
+++ b/icu4j/src/com/ibm/icu/text/StringMatcher.java
@@ -5,8 +5,8 @@
  *******************************************************************************
  *
  * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/StringMatcher.java,v $ 
- * $Date: 2001/12/03 21:33:58 $ 
- * $Revision: 1.5 $
+ * $Date: 2001/12/11 17:43:56 $ 
+ * $Revision: 1.6 $
  *
  *****************************************************************************************
  */
@@ -60,7 +60,7 @@ class StringMatcher implements UnicodeMatcher {
                 char keyChar = pattern.charAt(i); // OK; see note (1) above
                 UnicodeMatcher subm = data.lookup(keyChar);
                 if (subm == null) {
-                    if (cursor[0] >= limit &&
+                    if (cursor[0] > limit &&
                         keyChar == text.charAt(cursor[0])) { // OK; see note (1) above
                         --cursor[0];
                     } else {
diff --git a/icu4j/src/com/ibm/icu/text/TransliterationRule.java b/icu4j/src/com/ibm/icu/text/TransliterationRule.java
index 53bff4eb2e4..d51ba209e1b 100755
--- a/icu4j/src/com/ibm/icu/text/TransliterationRule.java
+++ b/icu4j/src/com/ibm/icu/text/TransliterationRule.java
@@ -5,8 +5,8 @@
  *******************************************************************************
  *
  * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliterationRule.java,v $
- * $Date: 2001/12/03 21:33:58 $
- * $Revision: 1.39 $
+ * $Date: 2001/12/11 17:43:57 $
+ * $Revision: 1.40 $
  *
  *****************************************************************************************
  */
@@ -46,10 +46,25 @@ import com.ibm.util.Utility;
  * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
  *
  * @author Alan Liu
- * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.39 $ $Date: 2001/12/03 21:33:58 $
+ * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.40 $ $Date: 2001/12/11 17:43:57 $
  */
 class TransliterationRule {
 
+    /**
+     * The match that must occur before the key, or null if there is no
+     * antecedent constraint.
+     */
+    private StringMatcher anteContext;
+
+    /**
+     * The 
+     */
+    private StringMatcher key;
+
+    private StringMatcher postContext;
+
+
+
     /**
      * The string that must be matched, consisting of the anteContext, key,
      * and postContext, concatenated together, in that order.  Some components
@@ -182,8 +197,7 @@ class TransliterationRule {
         }
         if (cursorPos < 0) {
             cursorPos = output.length();
-        }
-        if (cursorPos > output.length()) {
+        } else if (cursorPos > output.length()) {
             throw new IllegalArgumentException("Invalid cursor position");
         }
         this.cursorPos = cursorPos + cursorOffset;
@@ -202,6 +216,25 @@ class TransliterationRule {
         if (anchorEnd) {
             flags |= ANCHOR_END;
         }
+
+        anteContext = null;
+        if (anteContextLength > 0) {
+            anteContext = new StringMatcher(pattern.substring(0, anteContextLength),
+                                            false, data);
+        }
+
+        key = null;
+        if (keyLength > 0) {
+            key = new StringMatcher(pattern.substring(anteContextLength, anteContextLength + keyLength),
+                                    false, data);
+        }
+
+        int postContextLength = pattern.length() - keyLength - anteContextLength;
+        postContext = null;
+        if (postContextLength > 0) {
+            postContext = new StringMatcher(pattern.substring(anteContextLength + keyLength),
+                                            false, data);
+        }
     }
 
     /**
@@ -227,7 +260,7 @@ class TransliterationRule {
      * unless the first character of the key is a set.  If it's a
      * set, or otherwise can match multiple keys, the index value is -1.
      */
-   final int getIndexValue() {
+    final int getIndexValue() {
         if (anteContextLength == pattern.length()) {
             // A pattern with just ante context {such as foo)>bar} can
             // match any key.
@@ -248,15 +281,10 @@ class TransliterationRule {
      * then it will match any key.
      */
     final boolean matchesIndexValue(int v) {
-        if (anteContextLength == pattern.length()) {
-            // A pattern with just ante context {such as foo)>bar} can
-            // match any key.
-            return true;
-        }
-        int c = UTF16.charAt(pattern, anteContextLength);
-        UnicodeMatcher matcher = data.lookup(c);
-        return matcher == null ? (c & 0xFF) == v :
-            matcher.matchesIndexValue(v);
+        // Delegate to the key, or if there is none, to the postContext.
+        // If there is neither then we match any key; return true.
+        UnicodeMatcher m = (key != null) ? key : postContext;
+        return (m != null) ? m.matchesIndexValue(v) : true;
     }
 
     /**
@@ -391,112 +419,66 @@ class TransliterationRule {
         int oText; // offset into 'text'
         int newStart = 0;
         int minOText;
-        int oPattern; // offset into 'pattern'
-
-        // Backup oText by one
-        oText = posBefore(text, pos.start);
 
         // Note (1): We process text in 16-bit code units, rather than
         // 32-bit code points.  This works because stand-ins are
         // always in the BMP and because we are doing a literal match
         // operation, which can be done 16-bits at a time.
 
-        for (oPattern=anteContextLength-1; oPattern>=0; --oPattern) {
-            char keyChar = pattern.charAt(oPattern); // See note (1)
-            UnicodeMatcher matcher = data.lookup(keyChar);
-            if (matcher == null) {
-                if (oText >= pos.contextStart &&
-                    keyChar == text.charAt(oText)) { // See note (1)
-                    --oText;
-                } else {
-                    return UnicodeMatcher.U_MISMATCH;
-                }
-            } else {
-                // Subtract 1 from contextStart to make it a reverse limit
-                intRef[0] = oText;
-                if (matcher.matches(text, intRef, pos.contextStart-1, false)
-                    != UnicodeMatcher.U_MATCH) {
-                    return UnicodeMatcher.U_MISMATCH;
-                }
-                oText = intRef[0];
+        int anteLimit = posBefore(text, pos.contextStart);
+
+        int match;
+
+        // Start reverse match at char before pos.start
+        intRef[0] = posBefore(text, pos.start);
+
+        if (anteContext != null) {
+            match = anteContext.matches(text, intRef, anteLimit, false);
+            if (match != UnicodeMatcher.U_MATCH) {
+                return UnicodeMatcher.U_MISMATCH;
             }
         }
 
+        oText = intRef[0];
+        
         minOText = posAfter(text, oText);
 
         // ------------------------ Start Anchor ------------------------
 
-        if (((flags & ANCHOR_START) != 0) && oText != posBefore(text, pos.contextStart)) {
+        if (((flags & ANCHOR_START) != 0) && oText != anteLimit) {
             return UnicodeMatcher.U_MISMATCH;
         }
 
         // -------------------- Key and Post Context --------------------
 
-        oPattern = 0;
-        oText = pos.start;
-        keyLimit = 0;
-        while (oPattern < (pattern.length() - anteContextLength)) {
-            if (incremental && oText == pos.limit) {
-                // We've reached the limit without a mismatch and
-                // without completing our match.
+        intRef[0] = pos.start;
+
+        if (key != null) {
+            match = key.matches(text, intRef, pos.limit, incremental);
+            if (match != UnicodeMatcher.U_MATCH) {
+                return match;
+            }
+        }
+
+        keyLimit = intRef[0];
+
+        if (postContext != null) {
+            if (incremental && keyLimit == pos.limit) {
+                // The key matches just before pos.limit, and there is
+                // a postContext.  Since we are in incremental mode,
+                // we must assume more characters may be inserted at
+                // pos.limit -- this is a partial match.
                 return UnicodeMatcher.U_PARTIAL_MATCH;
             }
 
-            // It might seem that we could do a check like this here:
-            //!if (oText == pos.limit && oPattern < keyLength) {
-            //!    // We're still in the pattern key but we're entering the
-            //!    // post context.
-            // but this won't work if the end of the key is a
-            // zero-length matcher, followed by post context: {a b?} c
-            // Instead, what we do is proceed with matching as usual
-            // so zero-length matchers can work, but restrict the
-            // limit to either pos.limit or pos.contextLimit,
-            // depending on whether we're in the key or in the post
-            // context.
-
-            if (oPattern == keyLength) {
-                keyLimit = oText;
+            match = postContext.matches(text, intRef, pos.contextLimit, incremental);
+            if (match != UnicodeMatcher.U_MATCH) {
+                return match;
             }
-
-            // Restrict the key to match up to pos.limit; the post-context
-            // can match up to pos.contextLimit.
-            int matchLimit = (oPattern < keyLength) ? pos.limit : pos.contextLimit;
-
-            char keyChar = pattern.charAt(anteContextLength + oPattern++); // See note (1)
-            UnicodeMatcher matcher = data.lookup(keyChar);
-            if (matcher == null) {
-                // Don't need the oText < pos.contextLimit check if
-                // incremental is TRUE (because it's done above); do need
-                // it otherwise.
-                if (oText < matchLimit &&
-                    keyChar == text.charAt(oText)) { // See note (1)
-                    ++oText;
-                } else {
-                    return UnicodeMatcher.U_MISMATCH;
-                }
-            } else {
-                intRef[0] = oText;
-                int m = matcher.matches(text, intRef, matchLimit, incremental);                
-                if (m != UnicodeMatcher.U_MATCH) {
-                    return m;
-                }
-                oText = intRef[0];
-            }
-            
-            // This check rendered superfluous by above use of
-            // matchLimit, but kept around for documentation.
-            //!if (oText > pos.limit && oPattern < keyLength) {
-            //!    // We're still in the pattern key but we've entering the
-            //!    // post context.  We must do this check _after_ doing the
-            //!    // match in case we have zero-length matchers like /a?/
-            //!    // at the end of the key.
-            //!    return UnicodeMatcher.U_MISMATCH;
-            //!}
-        }
-        if (oPattern == keyLength) {
-            keyLimit = oText;
         }
 
+        oText = intRef[0];
+
         // ------------------------- Stop Anchor ------------------------
 
         if (((flags & ANCHOR_END)) != 0) {
@@ -711,8 +693,6 @@ class TransliterationRule {
         else {
             UTF16.append(rule, c);
         }
-
-        //System.out.println("rule=" + rule.toString() + " qb=" + quoteBuf.toString());
     }
 
     static final void appendToRule(StringBuffer rule,
@@ -726,8 +706,19 @@ class TransliterationRule {
         }
     }
 
-    static private int[] POW10 = {1, 10, 100, 1000, 10000, 100000, 1000000,
-                                  10000000, 100000000, 1000000000};
+    /**
+     * Given a matcher reference, which may be null, append its
+     * pattern as a literal to the given rule.
+     */
+    static final void appendToRule(StringBuffer rule,
+                                   UnicodeMatcher matcher,
+                                   boolean escapeUnprintable,
+                                   StringBuffer quoteBuf) {
+        if (matcher != null) {
+            appendToRule(rule, matcher.toPattern(escapeUnprintable),
+                         true, escapeUnprintable, quoteBuf);
+        }
+    }
 
     /**
      * Create a source string that represents this rule.  Append it to the
@@ -746,7 +737,7 @@ class TransliterationRule {
         // Do not emit the braces '{' '}' around the pattern if there
         // is neither anteContext nor postContext.
         boolean emitBraces =
-            (anteContextLength != 0) || (keyLength != pattern.length());
+            (anteContext != null) || (postContext != null);
 
         // Emit start anchor
         if ((flags & ANCHOR_START) != 0) {
@@ -754,29 +745,20 @@ class TransliterationRule {
         }
 
         // Emit the input pattern
-        for (i=0; i<pattern.length(); ++i) {
-            if (emitBraces && i == anteContextLength) {
-                appendToRule(rule, '{', true, escapeUnprintable, quoteBuf);
-            }
+        appendToRule(rule, anteContext, escapeUnprintable, quoteBuf);
 
-            if (emitBraces && i == (anteContextLength + keyLength)) {
-                appendToRule(rule, '}', true, escapeUnprintable, quoteBuf);
-            }
-
-            char c = pattern.charAt(i); // Ok to use 16-bits here
-            UnicodeMatcher matcher = data.lookup(c);
-            if (matcher == null) {
-                appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
-            } else {
-                appendToRule(rule, matcher.toPattern(escapeUnprintable),
-                              true, escapeUnprintable, quoteBuf);
-            }
+        if (emitBraces) {
+            appendToRule(rule, '{', true, escapeUnprintable, quoteBuf);
         }
 
-        if (emitBraces && i == (anteContextLength + keyLength)) {
+        appendToRule(rule, key, escapeUnprintable, quoteBuf);
+
+        if (emitBraces) {
             appendToRule(rule, '}', true, escapeUnprintable, quoteBuf);
         }
 
+        appendToRule(rule, postContext, escapeUnprintable, quoteBuf);
+
         // Emit end anchor
         if ((flags & ANCHOR_END) != 0) {
             rule.append('$');
@@ -807,17 +789,7 @@ class TransliterationRule {
                 ++seg; // make 1-based
                 appendToRule(rule, 0x20, true, escapeUnprintable, quoteBuf);
                 rule.append('$');
-                boolean show = false; // true if we should display digits
-                for (int p=9; p>=0; --p) {
-                    int d = seg / POW10[p];
-                    seg -= d * POW10[p];
-                    if (d != 0 || p == 0) {
-                        show = true;
-                    }
-                    if (show) {
-                        rule.append((char)(48+d));
-                    }
-                }
+                Utility.appendNumber(rule, seg, 10, 1);
                 rule.append(' ');
             }
         }
@@ -878,6 +850,9 @@ class TransliterationRule {
 
 /**
  * $Log: TransliterationRule.java,v $
+ * Revision 1.40  2001/12/11 17:43:57  alan
+ * jitterbug 1591: clean up TransliterationRule
+ *
  * Revision 1.39  2001/12/03 21:33:58  alan
  * jitterbug 1373: more fixes to support supplementals
  *
diff --git a/icu4j/src/com/ibm/text/StringMatcher.java b/icu4j/src/com/ibm/text/StringMatcher.java
index b68d2a09649..c9ec25994da 100755
--- a/icu4j/src/com/ibm/text/StringMatcher.java
+++ b/icu4j/src/com/ibm/text/StringMatcher.java
@@ -5,8 +5,8 @@
  *******************************************************************************
  *
  * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/StringMatcher.java,v $ 
- * $Date: 2001/12/03 21:33:58 $ 
- * $Revision: 1.5 $
+ * $Date: 2001/12/11 17:43:56 $ 
+ * $Revision: 1.6 $
  *
  *****************************************************************************************
  */
@@ -60,7 +60,7 @@ class StringMatcher implements UnicodeMatcher {
                 char keyChar = pattern.charAt(i); // OK; see note (1) above
                 UnicodeMatcher subm = data.lookup(keyChar);
                 if (subm == null) {
-                    if (cursor[0] >= limit &&
+                    if (cursor[0] > limit &&
                         keyChar == text.charAt(cursor[0])) { // OK; see note (1) above
                         --cursor[0];
                     } else {
diff --git a/icu4j/src/com/ibm/text/TransliterationRule.java b/icu4j/src/com/ibm/text/TransliterationRule.java
index 8008e09a90a..f27c1b613bf 100755
--- a/icu4j/src/com/ibm/text/TransliterationRule.java
+++ b/icu4j/src/com/ibm/text/TransliterationRule.java
@@ -5,8 +5,8 @@
  *******************************************************************************
  *
  * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/TransliterationRule.java,v $
- * $Date: 2001/12/03 21:33:58 $
- * $Revision: 1.39 $
+ * $Date: 2001/12/11 17:43:57 $
+ * $Revision: 1.40 $
  *
  *****************************************************************************************
  */
@@ -46,10 +46,25 @@ import com.ibm.util.Utility;
  * <p>Copyright &copy; IBM Corporation 1999.  All rights reserved.
  *
  * @author Alan Liu
- * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.39 $ $Date: 2001/12/03 21:33:58 $
+ * @version $RCSfile: TransliterationRule.java,v $ $Revision: 1.40 $ $Date: 2001/12/11 17:43:57 $
  */
 class TransliterationRule {
 
+    /**
+     * The match that must occur before the key, or null if there is no
+     * antecedent constraint.
+     */
+    private StringMatcher anteContext;
+
+    /**
+     * The 
+     */
+    private StringMatcher key;
+
+    private StringMatcher postContext;
+
+
+
     /**
      * The string that must be matched, consisting of the anteContext, key,
      * and postContext, concatenated together, in that order.  Some components
@@ -182,8 +197,7 @@ class TransliterationRule {
         }
         if (cursorPos < 0) {
             cursorPos = output.length();
-        }
-        if (cursorPos > output.length()) {
+        } else if (cursorPos > output.length()) {
             throw new IllegalArgumentException("Invalid cursor position");
         }
         this.cursorPos = cursorPos + cursorOffset;
@@ -202,6 +216,25 @@ class TransliterationRule {
         if (anchorEnd) {
             flags |= ANCHOR_END;
         }
+
+        anteContext = null;
+        if (anteContextLength > 0) {
+            anteContext = new StringMatcher(pattern.substring(0, anteContextLength),
+                                            false, data);
+        }
+
+        key = null;
+        if (keyLength > 0) {
+            key = new StringMatcher(pattern.substring(anteContextLength, anteContextLength + keyLength),
+                                    false, data);
+        }
+
+        int postContextLength = pattern.length() - keyLength - anteContextLength;
+        postContext = null;
+        if (postContextLength > 0) {
+            postContext = new StringMatcher(pattern.substring(anteContextLength + keyLength),
+                                            false, data);
+        }
     }
 
     /**
@@ -227,7 +260,7 @@ class TransliterationRule {
      * unless the first character of the key is a set.  If it's a
      * set, or otherwise can match multiple keys, the index value is -1.
      */
-   final int getIndexValue() {
+    final int getIndexValue() {
         if (anteContextLength == pattern.length()) {
             // A pattern with just ante context {such as foo)>bar} can
             // match any key.
@@ -248,15 +281,10 @@ class TransliterationRule {
      * then it will match any key.
      */
     final boolean matchesIndexValue(int v) {
-        if (anteContextLength == pattern.length()) {
-            // A pattern with just ante context {such as foo)>bar} can
-            // match any key.
-            return true;
-        }
-        int c = UTF16.charAt(pattern, anteContextLength);
-        UnicodeMatcher matcher = data.lookup(c);
-        return matcher == null ? (c & 0xFF) == v :
-            matcher.matchesIndexValue(v);
+        // Delegate to the key, or if there is none, to the postContext.
+        // If there is neither then we match any key; return true.
+        UnicodeMatcher m = (key != null) ? key : postContext;
+        return (m != null) ? m.matchesIndexValue(v) : true;
     }
 
     /**
@@ -391,112 +419,66 @@ class TransliterationRule {
         int oText; // offset into 'text'
         int newStart = 0;
         int minOText;
-        int oPattern; // offset into 'pattern'
-
-        // Backup oText by one
-        oText = posBefore(text, pos.start);
 
         // Note (1): We process text in 16-bit code units, rather than
         // 32-bit code points.  This works because stand-ins are
         // always in the BMP and because we are doing a literal match
         // operation, which can be done 16-bits at a time.
 
-        for (oPattern=anteContextLength-1; oPattern>=0; --oPattern) {
-            char keyChar = pattern.charAt(oPattern); // See note (1)
-            UnicodeMatcher matcher = data.lookup(keyChar);
-            if (matcher == null) {
-                if (oText >= pos.contextStart &&
-                    keyChar == text.charAt(oText)) { // See note (1)
-                    --oText;
-                } else {
-                    return UnicodeMatcher.U_MISMATCH;
-                }
-            } else {
-                // Subtract 1 from contextStart to make it a reverse limit
-                intRef[0] = oText;
-                if (matcher.matches(text, intRef, pos.contextStart-1, false)
-                    != UnicodeMatcher.U_MATCH) {
-                    return UnicodeMatcher.U_MISMATCH;
-                }
-                oText = intRef[0];
+        int anteLimit = posBefore(text, pos.contextStart);
+
+        int match;
+
+        // Start reverse match at char before pos.start
+        intRef[0] = posBefore(text, pos.start);
+
+        if (anteContext != null) {
+            match = anteContext.matches(text, intRef, anteLimit, false);
+            if (match != UnicodeMatcher.U_MATCH) {
+                return UnicodeMatcher.U_MISMATCH;
             }
         }
 
+        oText = intRef[0];
+        
         minOText = posAfter(text, oText);
 
         // ------------------------ Start Anchor ------------------------
 
-        if (((flags & ANCHOR_START) != 0) && oText != posBefore(text, pos.contextStart)) {
+        if (((flags & ANCHOR_START) != 0) && oText != anteLimit) {
             return UnicodeMatcher.U_MISMATCH;
         }
 
         // -------------------- Key and Post Context --------------------
 
-        oPattern = 0;
-        oText = pos.start;
-        keyLimit = 0;
-        while (oPattern < (pattern.length() - anteContextLength)) {
-            if (incremental && oText == pos.limit) {
-                // We've reached the limit without a mismatch and
-                // without completing our match.
+        intRef[0] = pos.start;
+
+        if (key != null) {
+            match = key.matches(text, intRef, pos.limit, incremental);
+            if (match != UnicodeMatcher.U_MATCH) {
+                return match;
+            }
+        }
+
+        keyLimit = intRef[0];
+
+        if (postContext != null) {
+            if (incremental && keyLimit == pos.limit) {
+                // The key matches just before pos.limit, and there is
+                // a postContext.  Since we are in incremental mode,
+                // we must assume more characters may be inserted at
+                // pos.limit -- this is a partial match.
                 return UnicodeMatcher.U_PARTIAL_MATCH;
             }
 
-            // It might seem that we could do a check like this here:
-            //!if (oText == pos.limit && oPattern < keyLength) {
-            //!    // We're still in the pattern key but we're entering the
-            //!    // post context.
-            // but this won't work if the end of the key is a
-            // zero-length matcher, followed by post context: {a b?} c
-            // Instead, what we do is proceed with matching as usual
-            // so zero-length matchers can work, but restrict the
-            // limit to either pos.limit or pos.contextLimit,
-            // depending on whether we're in the key or in the post
-            // context.
-
-            if (oPattern == keyLength) {
-                keyLimit = oText;
+            match = postContext.matches(text, intRef, pos.contextLimit, incremental);
+            if (match != UnicodeMatcher.U_MATCH) {
+                return match;
             }
-
-            // Restrict the key to match up to pos.limit; the post-context
-            // can match up to pos.contextLimit.
-            int matchLimit = (oPattern < keyLength) ? pos.limit : pos.contextLimit;
-
-            char keyChar = pattern.charAt(anteContextLength + oPattern++); // See note (1)
-            UnicodeMatcher matcher = data.lookup(keyChar);
-            if (matcher == null) {
-                // Don't need the oText < pos.contextLimit check if
-                // incremental is TRUE (because it's done above); do need
-                // it otherwise.
-                if (oText < matchLimit &&
-                    keyChar == text.charAt(oText)) { // See note (1)
-                    ++oText;
-                } else {
-                    return UnicodeMatcher.U_MISMATCH;
-                }
-            } else {
-                intRef[0] = oText;
-                int m = matcher.matches(text, intRef, matchLimit, incremental);                
-                if (m != UnicodeMatcher.U_MATCH) {
-                    return m;
-                }
-                oText = intRef[0];
-            }
-            
-            // This check rendered superfluous by above use of
-            // matchLimit, but kept around for documentation.
-            //!if (oText > pos.limit && oPattern < keyLength) {
-            //!    // We're still in the pattern key but we've entering the
-            //!    // post context.  We must do this check _after_ doing the
-            //!    // match in case we have zero-length matchers like /a?/
-            //!    // at the end of the key.
-            //!    return UnicodeMatcher.U_MISMATCH;
-            //!}
-        }
-        if (oPattern == keyLength) {
-            keyLimit = oText;
         }
 
+        oText = intRef[0];
+
         // ------------------------- Stop Anchor ------------------------
 
         if (((flags & ANCHOR_END)) != 0) {
@@ -711,8 +693,6 @@ class TransliterationRule {
         else {
             UTF16.append(rule, c);
         }
-
-        //System.out.println("rule=" + rule.toString() + " qb=" + quoteBuf.toString());
     }
 
     static final void appendToRule(StringBuffer rule,
@@ -726,8 +706,19 @@ class TransliterationRule {
         }
     }
 
-    static private int[] POW10 = {1, 10, 100, 1000, 10000, 100000, 1000000,
-                                  10000000, 100000000, 1000000000};
+    /**
+     * Given a matcher reference, which may be null, append its
+     * pattern as a literal to the given rule.
+     */
+    static final void appendToRule(StringBuffer rule,
+                                   UnicodeMatcher matcher,
+                                   boolean escapeUnprintable,
+                                   StringBuffer quoteBuf) {
+        if (matcher != null) {
+            appendToRule(rule, matcher.toPattern(escapeUnprintable),
+                         true, escapeUnprintable, quoteBuf);
+        }
+    }
 
     /**
      * Create a source string that represents this rule.  Append it to the
@@ -746,7 +737,7 @@ class TransliterationRule {
         // Do not emit the braces '{' '}' around the pattern if there
         // is neither anteContext nor postContext.
         boolean emitBraces =
-            (anteContextLength != 0) || (keyLength != pattern.length());
+            (anteContext != null) || (postContext != null);
 
         // Emit start anchor
         if ((flags & ANCHOR_START) != 0) {
@@ -754,29 +745,20 @@ class TransliterationRule {
         }
 
         // Emit the input pattern
-        for (i=0; i<pattern.length(); ++i) {
-            if (emitBraces && i == anteContextLength) {
-                appendToRule(rule, '{', true, escapeUnprintable, quoteBuf);
-            }
+        appendToRule(rule, anteContext, escapeUnprintable, quoteBuf);
 
-            if (emitBraces && i == (anteContextLength + keyLength)) {
-                appendToRule(rule, '}', true, escapeUnprintable, quoteBuf);
-            }
-
-            char c = pattern.charAt(i); // Ok to use 16-bits here
-            UnicodeMatcher matcher = data.lookup(c);
-            if (matcher == null) {
-                appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
-            } else {
-                appendToRule(rule, matcher.toPattern(escapeUnprintable),
-                              true, escapeUnprintable, quoteBuf);
-            }
+        if (emitBraces) {
+            appendToRule(rule, '{', true, escapeUnprintable, quoteBuf);
         }
 
-        if (emitBraces && i == (anteContextLength + keyLength)) {
+        appendToRule(rule, key, escapeUnprintable, quoteBuf);
+
+        if (emitBraces) {
             appendToRule(rule, '}', true, escapeUnprintable, quoteBuf);
         }
 
+        appendToRule(rule, postContext, escapeUnprintable, quoteBuf);
+
         // Emit end anchor
         if ((flags & ANCHOR_END) != 0) {
             rule.append('$');
@@ -807,17 +789,7 @@ class TransliterationRule {
                 ++seg; // make 1-based
                 appendToRule(rule, 0x20, true, escapeUnprintable, quoteBuf);
                 rule.append('$');
-                boolean show = false; // true if we should display digits
-                for (int p=9; p>=0; --p) {
-                    int d = seg / POW10[p];
-                    seg -= d * POW10[p];
-                    if (d != 0 || p == 0) {
-                        show = true;
-                    }
-                    if (show) {
-                        rule.append((char)(48+d));
-                    }
-                }
+                Utility.appendNumber(rule, seg, 10, 1);
                 rule.append(' ');
             }
         }
@@ -878,6 +850,9 @@ class TransliterationRule {
 
 /**
  * $Log: TransliterationRule.java,v $
+ * Revision 1.40  2001/12/11 17:43:57  alan
+ * jitterbug 1591: clean up TransliterationRule
+ *
  * Revision 1.39  2001/12/03 21:33:58  alan
  * jitterbug 1373: more fixes to support supplementals
  *