ICU-474 fix UTransPosition handling

X-SVN-Rev: 1688
2025-04-07 14:31:31 +00:00 · 2000-06-29 00:18:43 +00:00 · 2000-06-29 00:18:43 +00:00 · eff9454c76
commit eff9454c76
parent d7b44985af
5 changed files with 37 additions and 52 deletions
--- a/icu4c/source/i18n/rbt.cpp
+++ b/icu4c/source/i18n/rbt.cpp
@ -88,10 +88,6 @@ RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition&
     * exzd|    done
     */

-    int32_t contextStart = index.contextStart;
-    int32_t limit = index.limit;
-    int32_t cursor = index.start;
-
    /* A rule like
     *   a>b|a
     * creates an infinite loop. To prevent that, we put an arbitrary
@ -102,7 +98,7 @@ RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition&
     * uint32_t.
     */
    uint32_t loopCount = 0;
-    uint32_t loopLimit = limit - cursor;
+    uint32_t loopLimit = index.limit - index.start;
    if (loopLimit >= 0x10000000) {
        loopLimit = 0xFFFFFFFF;
    } else {
@ -111,13 +107,11 @@ RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition&

    UBool isPartial = FALSE;

-    while (cursor < limit && loopCount <= loopLimit) {
+    while (index.start < index.limit && loopCount <= loopLimit) {
        TransliterationRule* r = isIncremental ?
-            data->ruleSet.findIncrementalMatch(text, contextStart, limit, cursor,
-                                               *data, isPartial,
+            data->ruleSet.findIncrementalMatch(text, index, *data, isPartial,
                                               getFilter()) :
-            data->ruleSet.findMatch(text, contextStart, limit,
-                                    cursor, *data,
+            data->ruleSet.findMatch(text, index, *data,
                                    getFilter());

        /* If we match a rule then apply it by replacing the key
@ -132,17 +126,15 @@ RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition&
            if (isPartial) { // always FALSE unless isIncremental
                break;
            } else {
-                ++cursor;
+                ++index.start;
            }
        } else {
            // Delegate replacement to TransliterationRule object
-            limit += r->replace(text, cursor, *data);
-            cursor += r->getCursorPos();
+            int32_t lenDelta = r->replace(text, index.start, *data);
+            index.limit += lenDelta;
+            index.contextLimit += lenDelta;
+            index.start += r->getCursorPos();
            ++loopCount;
        }
    }
-
-    index.contextLimit += limit - index.limit;
-    index.limit = limit;
-    index.start = cursor;
 }
--- a/icu4c/source/i18n/rbt_rule.cpp
+++ b/icu4c/source/i18n/rbt_rule.cpp
@ -297,13 +297,13 @@ UBool TransliterationRule::masks(const TransliterationRule& r2) const {
 * <tt>null</tt> then no filtering is applied.
 */
 UBool TransliterationRule::matches(const Replaceable& text,
-                                    int32_t start, int32_t limit,
-                                    int32_t cursor,
-                                    const TransliterationRuleData& data,
-                                    const UnicodeFilter* filter) const {
+                                   const UTransPosition& pos,
+                                   const TransliterationRuleData& data,
+                                   const UnicodeFilter* filter) const {
    // Match anteContext, key, and postContext
-    cursor -= anteContextLength;
-    if (cursor < start || (cursor + pattern.length()) > limit) {
+    int32_t cursor = pos.start - anteContextLength;
+    if (cursor < pos.contextStart ||
+        (cursor + pattern.length()) > pos.contextLimit) {
        return FALSE;
    }
    for (int32_t i=0; i<pattern.length(); ++i, ++cursor) {
@ -341,12 +341,10 @@ UBool TransliterationRule::matches(const Replaceable& text,
 * @see #FULL_MATCH
 */
 int32_t TransliterationRule::getMatchDegree(const Replaceable& text,
-                                            int32_t start, int32_t limit,
-                                            int32_t cursor,
+                                            const UTransPosition& pos,
                                            const TransliterationRuleData& data,
                                            const UnicodeFilter* filter) const {
-    int len = getRegionMatchLength(text, start, limit, cursor - anteContextLength,
-                                   pattern, data, filter);
+    int len = getRegionMatchLength(text, pos, pattern, data, filter);
    return len < anteContextLength ? MISMATCH :
        (len < pattern.length() ? PARTIAL_MATCH : FULL_MATCH);
 }
@ -375,16 +373,16 @@ int32_t TransliterationRule::getMatchDegree(const Replaceable& text,
 * match this rule.
 */
 int32_t TransliterationRule::getRegionMatchLength(const Replaceable& text,
-                                          int32_t start,
-                                          int32_t limit, int32_t cursor,
+                                          const UTransPosition& pos,
                                          const UnicodeString& templ,
                                          const TransliterationRuleData& data,
                                          const UnicodeFilter* filter) const {
-    if (cursor < start) {
+    int32_t cursor = pos.start - anteContextLength;
+    if (cursor < pos.contextStart) {
        return -1;
    }
    int32_t i;
-    for (i=0; i<templ.length() && cursor<limit; ++i, ++cursor) {
+    for (i=0; i<templ.length() && cursor<pos.contextLimit; ++i, ++cursor) {
        if (!charMatches(templ.charAt(i), text.charAt(cursor),
                         data, filter)) {
            return -1;
--- a/icu4c/source/i18n/rbt_rule.h
+++ b/icu4c/source/i18n/rbt_rule.h
@ -9,6 +9,7 @@
 #define RBT_RULE_H

 #include "unicode/unistr.h"
+#include "unicode/utrans.h"

 class Replaceable;
 class TransliterationRuleData;
@ -256,10 +257,9 @@ public:
     * <tt>null</tt> then no filtering is applied.
     */
    virtual UBool matches(const Replaceable& text,
-                           int32_t start, int32_t limit,
-                           int32_t cursor,
-                           const TransliterationRuleData& data,
-                           const UnicodeFilter* filter) const;
+                          const UTransPosition& pos,
+                          const TransliterationRuleData& data,
+                          const UnicodeFilter* filter) const;

    /**
     * Return the degree of match between this rule and the given text.  The
@ -287,8 +287,7 @@ public:
     * @see #FULL_MATCH
     */
    virtual int32_t getMatchDegree(const Replaceable& text,
-                                   int32_t start, int32_t limit,
-                                   int32_t cursor,
+                                   const UTransPosition& pos,
                                   const TransliterationRuleData& data,
                                   const UnicodeFilter* filter) const;

@ -315,8 +314,8 @@ public:
     * match any characters, otherwise the number of characters of text that
     * match this rule.
     */
-    virtual int32_t getRegionMatchLength(const Replaceable& text, int32_t start,
-                                         int32_t limit, int32_t cursor,
+    virtual int32_t getRegionMatchLength(const Replaceable& text,
+                                         const UTransPosition& pos,
                                         const UnicodeString& templ,
                                         const TransliterationRuleData& data,
                                         const UnicodeFilter* filter) const;
--- a/icu4c/source/i18n/rbt_set.cpp
+++ b/icu4c/source/i18n/rbt_set.cpp
@ -203,16 +203,15 @@ void TransliterationRuleSet::freeze(const TransliterationRuleData& data,
 */
 TransliterationRule*
 TransliterationRuleSet::findMatch(const Replaceable& text,
-                                  int32_t start, int32_t limit,
-                                  int32_t cursor,
+                                  const UTransPosition& pos,
                                  const TransliterationRuleData& data,
                                  const UnicodeFilter* filter) const {
    /* We only need to check our indexed bin of the rule table,
     * based on the low byte of the first key character.
     */
-    int16_t x = text.charAt(cursor) & 0xFF;
+    int16_t x = text.charAt(pos.start) & 0xFF;
    for (int32_t i=index[x]; i<index[x+1]; ++i) {
-        if (rules[i]->matches(text, start, limit, cursor, data, filter)) {
+        if (rules[i]->matches(text, pos, data, filter)) {
            return rules[i];
        }
    }
@ -248,8 +247,7 @@ TransliterationRuleSet::findMatch(const Replaceable& text,
 */
 TransliterationRule*
 TransliterationRuleSet::findIncrementalMatch(const Replaceable& text,
-                                             int32_t start,
-                                             int32_t limit, int32_t cursor,
+                                             const UTransPosition& pos,
                                             const TransliterationRuleData& data,
                                             UBool& isPartial,
                                             const UnicodeFilter* filter) const {
@ -258,10 +256,9 @@ TransliterationRuleSet::findIncrementalMatch(const Replaceable& text,
     * based on the low byte of the first key character.
     */
    isPartial = FALSE;
-    int16_t x = text.charAt(cursor) & 0xFF;
+    int16_t x = text.charAt(pos.start) & 0xFF;
    for (int32_t i=index[x]; i<index[x+1]; ++i) {
-        int32_t match = rules[i]->getMatchDegree(text, start, limit, cursor,
-                                                 data, filter);
+        int32_t match = rules[i]->getMatchDegree(text, pos, data, filter);
        switch (match) {
        case TransliterationRule::FULL_MATCH:
            return rules[i];
--- a/icu4c/source/i18n/rbt_set.h
+++ b/icu4c/source/i18n/rbt_set.h
@ -9,6 +9,7 @@
 #define RBT_SET_H

 #include "uvector.h"
+#include "unicode/utrans.h"

 class Replaceable;
 class TransliterationRule;
@ -110,8 +111,7 @@ public:
     * @return the matching rule, or null if none found.
     */
    virtual TransliterationRule* findMatch(const Replaceable& text,
-                                           int32_t start, int32_t limit,
-                                           int32_t cursor,
+                                           const UTransPosition& pos,
                                           const TransliterationRuleData& data,
                                           const UnicodeFilter* filter) const;
    
@ -143,8 +143,7 @@ public:
     * does not have enough text yet to unambiguously match a rule.
     */
    virtual TransliterationRule* findIncrementalMatch(const Replaceable& text,
-                                              int32_t start,
-                                              int32_t limit, int32_t cursor,
+                                              const UTransPosition& pos,
                                              const TransliterationRuleData& data,
                                              UBool& isPartial,
                                              const UnicodeFilter* filter) const;