ICU-1779 set data object for entire tree of functors under a rule

X-SVN-Rev: 8131
2025-04-07 22:44:49 +00:00 · 2002-03-20 00:42:02 +00:00 · 2002-03-20 00:42:02 +00:00 · c5d4c5ee6c
commit c5d4c5ee6c
parent 1660406201
15 changed files with 121 additions and 21 deletions
--- a/icu4c/source/i18n/funcrepl.cpp
+++ b/icu4c/source/i18n/funcrepl.cpp
@ -91,6 +91,13 @@ UnicodeString& FunctionReplacer::toReplacerPattern(UnicodeString& rule,
    return rule;
 }

+/**
+ * UnicodeFunctor API
+ */
+void FunctionReplacer::setData(const TransliterationRuleData* d) {
+    replacer->setData(d);
+}
+
 U_NAMESPACE_END

 //eof
--- a/icu4c/source/i18n/funcrepl.h
+++ b/icu4c/source/i18n/funcrepl.h
@ -80,6 +80,11 @@ class FunctionReplacer : public UnicodeFunctor, public UnicodeReplacer {
     */
    virtual UnicodeString& toReplacerPattern(UnicodeString& rule,
                                             UBool escapeUnprintable) const;
+
+    /**
+     * UnicodeFunctor API
+     */
+    virtual void setData(const TransliterationRuleData*);
 };

 U_NAMESPACE_END
--- a/icu4c/source/i18n/quant.cpp
+++ b/icu4c/source/i18n/quant.cpp
@ -114,6 +114,13 @@ UBool Quantifier::matchesIndexValue(uint8_t v) const {
    return (minCount == 0) || matcher->toMatcher()->matchesIndexValue(v);
 }

+/**
+ * Implement UnicodeFunctor
+ */
+void Quantifier::setData(const TransliterationRuleData* d) {
+    matcher->setData(d);
+}
+
 U_NAMESPACE_END

 //eof
--- a/icu4c/source/i18n/quant.h
+++ b/icu4c/source/i18n/quant.h
@ -56,6 +56,11 @@ class Quantifier : public UnicodeFunctor, public UnicodeMatcher {
     */
    virtual UBool matchesIndexValue(uint8_t v) const;

+    /**
+     * UnicodeFunctor API
+     */
+    virtual void setData(const TransliterationRuleData*);
+
 private:

    static void appendNumber(UnicodeString& result, int32_t n);
--- a/icu4c/source/i18n/rbt_data.cpp
+++ b/icu4c/source/i18n/rbt_data.cpp
@ -68,16 +68,22 @@ TransliterationRuleData::~TransliterationRuleData() {
    }
 }

+UnicodeFunctor*
+TransliterationRuleData::lookup(UChar32 standIn) const {
+    int32_t i = standIn - variablesBase;
+    return (i >= 0 && i < variablesLength) ? variables[i] : 0;
+}
+
 UnicodeMatcher*
 TransliterationRuleData::lookupMatcher(UChar32 standIn) const {
-    int32_t i = standIn - variablesBase;
-    return (i >= 0 && i < variablesLength) ? variables[i]->toMatcher() : 0;
+    UnicodeFunctor *f = lookup(standIn);
+    return (f != 0) ? f->toMatcher() : 0;
 }

 UnicodeReplacer*
 TransliterationRuleData::lookupReplacer(UChar32 standIn) const {
-    int32_t i = standIn - variablesBase;
-    return (i >= 0 && i < variablesLength) ? variables[i]->toReplacer() : 0;
+    UnicodeFunctor *f = lookup(standIn);
+    return (f != 0) ? f->toReplacer() : 0;
 }

 U_NAMESPACE_END
--- a/icu4c/source/i18n/rbt_data.h
+++ b/icu4c/source/i18n/rbt_data.h
@ -35,7 +35,7 @@ class Hashtable;
 * data structure handles this.  See the parsing code for more
 * details.
 */
-class TransliterationRuleData {
+class U_I18N_API TransliterationRuleData {

 public:

@ -88,15 +88,23 @@ public:

    ~TransliterationRuleData();

+    /**
+     * Given a stand-in character, return the UnicodeFunctor that it
+     * represents, or NULL if it doesn't represent anything.
+     */
+    UnicodeFunctor* lookup(UChar32 standIn) const;
+
    /**
     * Given a stand-in character, return the UnicodeMatcher that it
-     * represents, or NULL.
+     * represents, or NULL if it doesn't represent anything or if it
+     * represents something that is not a matcher.
     */
    UnicodeMatcher* lookupMatcher(UChar32 standIn) const;

    /**
     * Given a stand-in character, return the UnicodeReplacer that it
-     * represents, or NULL.
+     * represents, or NULL if it doesn't represent anything or if it
+     * represents something that is not a replacer.
     */
    UnicodeReplacer* lookupReplacer(UChar32 standIn) const;
 };
--- a/icu4c/source/i18n/rbt_rule.cpp
+++ b/icu4c/source/i18n/rbt_rule.cpp
@ -486,6 +486,15 @@ UnicodeString& TransliterationRule::toRule(UnicodeString& rule,
    return rule;
 }

+void TransliterationRule::setData(const TransliterationRuleData* d) {
+    data = d;
+    anteContext->setData(d);
+    postContext->setData(d);
+    key->setData(d);
+    output->setData(d);
+    // Don't have to do segments since they are in the context or key
+}
+
 U_NAMESPACE_END

 //eof
--- a/icu4c/source/i18n/rbt_rule.h
+++ b/icu4c/source/i18n/rbt_rule.h
@ -204,7 +204,7 @@ public:
     * Change the data object that this rule belongs to.  Used
     * internally by the TransliterationRuleData copy constructor.
     */
-    inline void setData(const TransliterationRuleData* data);
+    void setData(const TransliterationRuleData* data);

    /**
     * Return the preceding context length.  This method is needed to
@ -281,10 +281,6 @@ public:
    friend class StringMatcher;
 };

-inline void TransliterationRule::setData(const TransliterationRuleData* d) {
-    data = d;
-}
-
 U_NAMESPACE_END

 #endif
--- a/icu4c/source/i18n/rbt_set.h
+++ b/icu4c/source/i18n/rbt_set.h
@ -23,7 +23,7 @@ class UnicodeString;
 * A set of rules for a <code>RuleBasedTransliterator</code>.
 * @author Alan Liu
 */
-class TransliterationRuleSet {
+class U_I18N_API TransliterationRuleSet {
    /**
     * Vector of rules, in the order added.  This is used while the
     * rule set is getting built.  After that, freeze() reorders and
--- a/icu4c/source/i18n/strmatch.cpp
+++ b/icu4c/source/i18n/strmatch.cpp
@ -19,7 +19,7 @@ StringMatcher::StringMatcher(const UnicodeString& theString,
                             int32_t limit,
                             int32_t segmentNum,
                             const TransliterationRuleData& theData) :
-    data(theData),
+    data(&theData),
    segmentNumber(segmentNum),
    matchStart(-1),
    matchLimit(-1)
@ -79,7 +79,7 @@ UMatchDegree StringMatcher::matches(const Replaceable& text,
        // Match in the reverse direction
        for (i=pattern.length()-1; i>=0; --i) {
            UChar keyChar = pattern.charAt(i);
-            UnicodeMatcher* subm = data.lookupMatcher(keyChar);
+            UnicodeMatcher* subm = data->lookupMatcher(keyChar);
            if (subm == 0) {
                if (cursor > limit &&
                    keyChar == text.charAt(cursor)) {
@ -110,7 +110,7 @@ UMatchDegree StringMatcher::matches(const Replaceable& text,
                return U_PARTIAL_MATCH;
            }
            UChar keyChar = pattern.charAt(i);
-            UnicodeMatcher* subm = data.lookupMatcher(keyChar);
+            UnicodeMatcher* subm = data->lookupMatcher(keyChar);
            if (subm == 0) {
                // Don't need the cursor < limit check if
                // incremental is TRUE (because it's done above); do need
@ -151,7 +151,7 @@ UnicodeString& StringMatcher::toPattern(UnicodeString& result,
    }
    for (int32_t i=0; i<pattern.length(); ++i) {
        UChar keyChar = pattern.charAt(i);
-        const UnicodeMatcher* m = data.lookupMatcher(keyChar);
+        const UnicodeMatcher* m = data->lookupMatcher(keyChar);
        if (m == 0) {
            ICU_Utility::appendToRule(result, keyChar, FALSE, escapeUnprintable, quoteBuf);
        } else {
@ -176,7 +176,7 @@ UBool StringMatcher::matchesIndexValue(uint8_t v) const {
        return TRUE;
    }
    UChar32 c = pattern.char32At(0);
-    const UnicodeMatcher *m = data.lookupMatcher(c);
+    const UnicodeMatcher *m = data->lookupMatcher(c);
    return (m == 0) ? ((c & 0xFF) == v) : m->matchesIndexValue(v);
 }

@ -219,14 +219,29 @@ UnicodeString& StringMatcher::toReplacerPattern(UnicodeString& rule,
 }

 /**
- * Remove any match data.  This must be called before performing a
+ * Remove any match info.  This must be called before performing a
 * set of matches with this segment.
 */
 void StringMatcher::resetMatch() {
    matchStart = matchLimit = -1;
 }

+/**
+ * Implement UnicodeFunctor
+ */
+void StringMatcher::setData(const TransliterationRuleData* d) {
+    data = d;
+    int32_t i = 0;
+    while (i<pattern.length()) {
+        UChar32 c = pattern.char32At(i);
+        UnicodeFunctor* f = data->lookup(c);
+        if (f != NULL) {
+            f->setData(data);
+        }
+        i += UTF_CHAR_LENGTH(c);
+    }    
+}
+
 U_NAMESPACE_END

 //eof
-
--- a/icu4c/source/i18n/strmatch.h
+++ b/icu4c/source/i18n/strmatch.h
@ -96,6 +96,11 @@ class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public Unico
     */
    virtual UBool matchesIndexValue(uint8_t v) const;

+    /**
+     * Implement UnicodeFunctor
+     */
+    virtual void setData(const TransliterationRuleData*);
+
    /**
     * Replace characters in 'text' from 'start' to 'limit' with the
     * output text of this object.  Update the 'cursor' parameter to
@ -151,7 +156,7 @@ class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public Unico
     * Context object that maps stand-ins to matcher and replacer
     * objects.
     */
-    const TransliterationRuleData& data;
+    const TransliterationRuleData* data;

    /**
     * The segment number, 1-based, or 0 if not a segment.
--- a/icu4c/source/i18n/strrepl.cpp
+++ b/icu4c/source/i18n/strrepl.cpp
@ -257,6 +257,22 @@ UnicodeString& StringReplacer::toReplacerPattern(UnicodeString& rule,
    return rule;
 }

+/**
+ * UnicodeFunctor API
+ */
+void StringReplacer::setData(const TransliterationRuleData* d) {
+    data = d;
+    int32_t i = 0;
+    while (i<output.length()) {
+        UChar32 c = output.char32At(i);
+        UnicodeFunctor* f = data->lookup(c);
+        if (f != NULL) {
+            f->setData(data);
+        }
+        i += UTF_CHAR_LENGTH(c);
+    }
+}
+
 U_NAMESPACE_END

 //eof
--- a/icu4c/source/i18n/strrepl.h
+++ b/icu4c/source/i18n/strrepl.h
@ -126,6 +126,11 @@ class StringReplacer : public UnicodeFunctor, public UnicodeReplacer {
     */
    virtual UnicodeString& toReplacerPattern(UnicodeString& result,
                                             UBool escapeUnprintable) const;
+
+    /**
+     * UnicodeFunctor API
+     */
+    virtual void setData(const TransliterationRuleData*);
 };

 U_NAMESPACE_END
--- a/icu4c/source/i18n/unicode/unifilt.h
+++ b/icu4c/source/i18n/unicode/unifilt.h
@ -81,6 +81,11 @@ public:
                                 int32_t limit,
                                 UBool incremental);

+    /**
+     * UnicodeFunctor API.  Nothing to do.
+     */
+    virtual void setData(const TransliterationRuleData*) {}
+
 protected:

    UnicodeFilter();
--- a/icu4c/source/i18n/unicode/unifunct.h
+++ b/icu4c/source/i18n/unicode/unifunct.h
@ -16,6 +16,7 @@ U_NAMESPACE_BEGIN

 class UnicodeMatcher;
 class UnicodeReplacer;
+class TransliterationRuleData;

 /**
 * <code>UnicodeFunctor</code> is an abstract base class for objects
@ -97,6 +98,16 @@ class U_I18N_API UnicodeFunctor {
     */
    virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); };

+    /**
+     * Set the data object associated with this functor.  The data
+     * object provides context for functor-to-standin mapping.  This
+     * method is required when assigning a functor to a different data
+     * object.  This function MAY GO AWAY later if the architecture is
+     * changed to pass data object pointers through the API.
+     * @draft
+     */
+    virtual void setData(const TransliterationRuleData*) = 0;
+
 protected:

    UnicodeFunctor();