ICU-1052 fix TransliterationRuleData copy constructor to reset data pointer in contained RBT objects

X-SVN-Rev: 5766
2025-04-07 14:31:31 +00:00 · 2001-09-18 00:24:14 +00:00 · 2001-09-18 00:24:14 +00:00 · 2aa6b22c0f
commit 2aa6b22c0f
parent 21f946cdd2
6 changed files with 39 additions and 12 deletions
--- a/icu4c/source/i18n/rbt_data.cpp
+++ b/icu4c/source/i18n/rbt_data.cpp
@ -33,6 +33,7 @@ TransliterationRuleData::TransliterationRuleData(const TransliterationRuleData&
    variablesLength(other.variablesLength),
    segmentBase(other.segmentBase)
 {
+    ruleSet.setData(this); // ruleSet must already be frozen

    UErrorCode status = U_ZERO_ERROR;
    variableNames = new Hashtable(status);
--- a/icu4c/source/i18n/rbt_pars.cpp
+++ b/icu4c/source/i18n/rbt_pars.cpp
@ -1169,7 +1169,7 @@ int32_t TransliteratorParser::parseRule(int32_t pos, int32_t limit) {
                                 right->text, right->cursor, right->cursorOffset,
                                 left->createSegments(status),
                                 left->anchorStart, left->anchorEnd,
-                                 *data,
+                                 data,
                                 status), status);

    return pos;
--- a/icu4c/source/i18n/rbt_rule.cpp
+++ b/icu4c/source/i18n/rbt_rule.cpp
@ -70,7 +70,7 @@ TransliterationRule::TransliterationRule(const UnicodeString& input,
                                         int32_t cursorPosition, int32_t cursorOffset,
                                         int32_t* adoptedSegs,
                                         UBool anchorStart, UBool anchorEnd,
-                                         const TransliterationRuleData& theData,
+                                         const TransliterationRuleData* theData,
                                         UErrorCode& status) :
    data(theData) {

@ -199,7 +199,7 @@ int16_t TransliterationRule::getIndexValue() const {
        return -1;
    }
    UChar32 c = pattern.char32At(anteContextLength);
-    return (int16_t)(data.lookup(c) == NULL ? (c & 0xFF) : -1);
+    return (int16_t)(data->lookup(c) == NULL ? (c & 0xFF) : -1);
 }

 /**
@ -219,7 +219,7 @@ UBool TransliterationRule::matchesIndexValue(uint8_t v) const {
        return TRUE;
    }
    UChar32 c = pattern.char32At(anteContextLength);
-    const UnicodeMatcher* matcher = data.lookup(c);
+    const UnicodeMatcher* matcher = data->lookup(c);
    return matcher == NULL ? (uint8_t(c) == v) :
        matcher->matchesIndexValue(v);
 }
@ -367,7 +367,7 @@ UMatchDegree TransliterationRule::matchAndReplace(Replaceable& text,

    for (i=anteContextLength-1; i>=0; --i) {
        UChar keyChar = pattern.charAt(i);
-        const UnicodeMatcher* matcher = data.lookup(keyChar);
+        const UnicodeMatcher* matcher = data->lookup(keyChar);
        if (matcher == 0) {
            if (cursor >= pos.contextStart &&
                keyChar == text.charAt(cursor)) {
@ -433,7 +433,7 @@ UMatchDegree TransliterationRule::matchAndReplace(Replaceable& text,
            keyLimit = cursor;
        }
        UChar keyChar = pattern.charAt(anteContextLength + i++);
-        const UnicodeMatcher* matcher = data.lookup(keyChar);
+        const UnicodeMatcher* matcher = data->lookup(keyChar);
        if (matcher == 0) {
            // Don't need the cursor < pos.contextLimit check if
            // incremental is TRUE (because it's done above); do need
@ -514,7 +514,7 @@ UMatchDegree TransliterationRule::matchAndReplace(Replaceable& text,
                newStart = dest - (keyLimit - pos.start);
            }
            UChar32 c = output.char32At(i);
-            int32_t b = data.lookupSegmentReference(c);
+            int32_t b = data->lookupSegmentReference(c);
            if (b < 0) {
                // Accumulate straight (non-segment) text.
                buf.append(c);
@ -736,7 +736,7 @@ UnicodeString& TransliterationRule::toRule(UnicodeString& rule,
        }

        UChar c = pattern.charAt(i);
-        const UnicodeMatcher *matcher = data.lookup(c);
+        const UnicodeMatcher *matcher = data->lookup(c);
        if (matcher == 0) {
            appendToRule(rule, c, FALSE, escapeUnprintable, quoteBuf);
        } else {
@ -772,7 +772,7 @@ UnicodeString& TransliterationRule::toRule(UnicodeString& rule,
            appendToRule(rule, (UChar) 0x007C /*|*/, TRUE, escapeUnprintable, quoteBuf);
        }
        UChar c = output.charAt(i);
-        int32_t seg = data.lookupSegmentReference(c);
+        int32_t seg = data->lookupSegmentReference(c);
        if (seg < 0) {
            appendToRule(rule, c, FALSE, escapeUnprintable, quoteBuf);
        } else {
--- a/icu4c/source/i18n/rbt_rule.h
+++ b/icu4c/source/i18n/rbt_rule.h
@ -111,10 +111,10 @@ private:
    };

    /**
-     * A reference to the data for this rule.  The data provides
+     * An alias pointer to the data for this rule.  The data provides
     * lookup services for matchers and segments.
     */
-    const TransliterationRuleData& data;
+    const TransliterationRuleData* data;

 public:

@ -155,7 +155,7 @@ public:
                        int32_t cursorPosition, int32_t cursorOffset,
                        int32_t* adoptedSegs,
                        UBool anchorStart, UBool anchorEnd,
-                        const TransliterationRuleData& data,
+                        const TransliterationRuleData* data,
                        UErrorCode& status);

    /**
@ -168,6 +168,12 @@ public:
     */
    virtual ~TransliterationRule();

+    /**
+     * Change the data object that this rule belongs to.  Used
+     * internally by the TransliterationRuleData copy constructor.
+     */
+    inline void setData(const TransliterationRuleData* data);
+
    /**
     * Return the position of the cursor within the output string.
     * @return a value from 0 to <code>getOutput().length()</code>, inclusive.
@ -261,4 +267,8 @@ public:
                             UnicodeString& quoteBuf);
 };

+inline void TransliterationRule::setData(const TransliterationRuleData* d) {
+    data = d;
+}
+
 #endif
--- a/icu4c/source/i18n/rbt_set.cpp
+++ b/icu4c/source/i18n/rbt_set.cpp
@ -78,6 +78,16 @@ TransliterationRuleSet::~TransliterationRuleSet() {
    delete[] rules;
 }

+void TransliterationRuleSet::setData(const TransliterationRuleData* d) {
+    /**
+     * We assume that the ruleset has already been frozen.
+     */
+    int32_t len = index[256]; // see freeze()
+    for (int32_t i=0; i<len; ++i) {
+        rules[i]->setData(d);
+    }
+}
+
 /**
 * Return the maximum context length.
 * @return the length of the longest preceding context.
--- a/icu4c/source/i18n/rbt_set.h
+++ b/icu4c/source/i18n/rbt_set.h
@ -64,6 +64,12 @@ public:
     */
    virtual ~TransliterationRuleSet();

+    /**
+     * Change the data object that this rule belongs to.  Used
+     * internally by the TransliterationRuleData copy constructor.
+     */
+    void setData(const TransliterationRuleData* data);
+
    /**
     * Return the maximum context length.
     * @return the length of the longest preceding context.