From c5d4c5ee6c0749c13803b725c31a716748d05cf2 Mon Sep 17 00:00:00 2001 From: Alan Liu Date: Wed, 20 Mar 2002 00:42:02 +0000 Subject: [PATCH] ICU-1779 set data object for entire tree of functors under a rule X-SVN-Rev: 8131 --- icu4c/source/i18n/funcrepl.cpp | 7 +++++++ icu4c/source/i18n/funcrepl.h | 5 +++++ icu4c/source/i18n/quant.cpp | 7 +++++++ icu4c/source/i18n/quant.h | 5 +++++ icu4c/source/i18n/rbt_data.cpp | 14 ++++++++++---- icu4c/source/i18n/rbt_data.h | 14 +++++++++++--- icu4c/source/i18n/rbt_rule.cpp | 9 +++++++++ icu4c/source/i18n/rbt_rule.h | 6 +----- icu4c/source/i18n/rbt_set.h | 2 +- icu4c/source/i18n/strmatch.cpp | 29 +++++++++++++++++++++------- icu4c/source/i18n/strmatch.h | 7 ++++++- icu4c/source/i18n/strrepl.cpp | 16 +++++++++++++++ icu4c/source/i18n/strrepl.h | 5 +++++ icu4c/source/i18n/unicode/unifilt.h | 5 +++++ icu4c/source/i18n/unicode/unifunct.h | 11 +++++++++++ 15 files changed, 121 insertions(+), 21 deletions(-) diff --git a/icu4c/source/i18n/funcrepl.cpp b/icu4c/source/i18n/funcrepl.cpp index 937f0e55e72..2b13654903d 100644 --- a/icu4c/source/i18n/funcrepl.cpp +++ b/icu4c/source/i18n/funcrepl.cpp @@ -91,6 +91,13 @@ UnicodeString& FunctionReplacer::toReplacerPattern(UnicodeString& rule, return rule; } +/** + * UnicodeFunctor API + */ +void FunctionReplacer::setData(const TransliterationRuleData* d) { + replacer->setData(d); +} + U_NAMESPACE_END //eof diff --git a/icu4c/source/i18n/funcrepl.h b/icu4c/source/i18n/funcrepl.h index 878b079a488..3a882f64291 100644 --- a/icu4c/source/i18n/funcrepl.h +++ b/icu4c/source/i18n/funcrepl.h @@ -80,6 +80,11 @@ class FunctionReplacer : public UnicodeFunctor, public UnicodeReplacer { */ virtual UnicodeString& toReplacerPattern(UnicodeString& rule, UBool escapeUnprintable) const; + + /** + * UnicodeFunctor API + */ + virtual void setData(const TransliterationRuleData*); }; U_NAMESPACE_END diff --git a/icu4c/source/i18n/quant.cpp b/icu4c/source/i18n/quant.cpp index e345d99930a..5a4702603c7 100644 --- a/icu4c/source/i18n/quant.cpp +++ b/icu4c/source/i18n/quant.cpp @@ -114,6 +114,13 @@ UBool Quantifier::matchesIndexValue(uint8_t v) const { return (minCount == 0) || matcher->toMatcher()->matchesIndexValue(v); } +/** + * Implement UnicodeFunctor + */ +void Quantifier::setData(const TransliterationRuleData* d) { + matcher->setData(d); +} + U_NAMESPACE_END //eof diff --git a/icu4c/source/i18n/quant.h b/icu4c/source/i18n/quant.h index bf6b4a6a462..6720f94777f 100644 --- a/icu4c/source/i18n/quant.h +++ b/icu4c/source/i18n/quant.h @@ -56,6 +56,11 @@ class Quantifier : public UnicodeFunctor, public UnicodeMatcher { */ virtual UBool matchesIndexValue(uint8_t v) const; + /** + * UnicodeFunctor API + */ + virtual void setData(const TransliterationRuleData*); + private: static void appendNumber(UnicodeString& result, int32_t n); diff --git a/icu4c/source/i18n/rbt_data.cpp b/icu4c/source/i18n/rbt_data.cpp index 2266c64ad47..1e0f9d6792e 100644 --- a/icu4c/source/i18n/rbt_data.cpp +++ b/icu4c/source/i18n/rbt_data.cpp @@ -68,16 +68,22 @@ TransliterationRuleData::~TransliterationRuleData() { } } +UnicodeFunctor* +TransliterationRuleData::lookup(UChar32 standIn) const { + int32_t i = standIn - variablesBase; + return (i >= 0 && i < variablesLength) ? variables[i] : 0; +} + UnicodeMatcher* TransliterationRuleData::lookupMatcher(UChar32 standIn) const { - int32_t i = standIn - variablesBase; - return (i >= 0 && i < variablesLength) ? variables[i]->toMatcher() : 0; + UnicodeFunctor *f = lookup(standIn); + return (f != 0) ? f->toMatcher() : 0; } UnicodeReplacer* TransliterationRuleData::lookupReplacer(UChar32 standIn) const { - int32_t i = standIn - variablesBase; - return (i >= 0 && i < variablesLength) ? variables[i]->toReplacer() : 0; + UnicodeFunctor *f = lookup(standIn); + return (f != 0) ? f->toReplacer() : 0; } U_NAMESPACE_END diff --git a/icu4c/source/i18n/rbt_data.h b/icu4c/source/i18n/rbt_data.h index 2b6c7e35851..57d73c19422 100644 --- a/icu4c/source/i18n/rbt_data.h +++ b/icu4c/source/i18n/rbt_data.h @@ -35,7 +35,7 @@ class Hashtable; * data structure handles this. See the parsing code for more * details. */ -class TransliterationRuleData { +class U_I18N_API TransliterationRuleData { public: @@ -88,15 +88,23 @@ public: ~TransliterationRuleData(); + /** + * Given a stand-in character, return the UnicodeFunctor that it + * represents, or NULL if it doesn't represent anything. + */ + UnicodeFunctor* lookup(UChar32 standIn) const; + /** * Given a stand-in character, return the UnicodeMatcher that it - * represents, or NULL. + * represents, or NULL if it doesn't represent anything or if it + * represents something that is not a matcher. */ UnicodeMatcher* lookupMatcher(UChar32 standIn) const; /** * Given a stand-in character, return the UnicodeReplacer that it - * represents, or NULL. + * represents, or NULL if it doesn't represent anything or if it + * represents something that is not a replacer. */ UnicodeReplacer* lookupReplacer(UChar32 standIn) const; }; diff --git a/icu4c/source/i18n/rbt_rule.cpp b/icu4c/source/i18n/rbt_rule.cpp index 9fabb8b391b..297877beae1 100644 --- a/icu4c/source/i18n/rbt_rule.cpp +++ b/icu4c/source/i18n/rbt_rule.cpp @@ -486,6 +486,15 @@ UnicodeString& TransliterationRule::toRule(UnicodeString& rule, return rule; } +void TransliterationRule::setData(const TransliterationRuleData* d) { + data = d; + anteContext->setData(d); + postContext->setData(d); + key->setData(d); + output->setData(d); + // Don't have to do segments since they are in the context or key +} + U_NAMESPACE_END //eof diff --git a/icu4c/source/i18n/rbt_rule.h b/icu4c/source/i18n/rbt_rule.h index 39bf113e990..290e66b83aa 100644 --- a/icu4c/source/i18n/rbt_rule.h +++ b/icu4c/source/i18n/rbt_rule.h @@ -204,7 +204,7 @@ public: * Change the data object that this rule belongs to. Used * internally by the TransliterationRuleData copy constructor. */ - inline void setData(const TransliterationRuleData* data); + void setData(const TransliterationRuleData* data); /** * Return the preceding context length. This method is needed to @@ -281,10 +281,6 @@ public: friend class StringMatcher; }; -inline void TransliterationRule::setData(const TransliterationRuleData* d) { - data = d; -} - U_NAMESPACE_END #endif diff --git a/icu4c/source/i18n/rbt_set.h b/icu4c/source/i18n/rbt_set.h index 79622366c25..13e7475f49f 100644 --- a/icu4c/source/i18n/rbt_set.h +++ b/icu4c/source/i18n/rbt_set.h @@ -23,7 +23,7 @@ class UnicodeString; * A set of rules for a RuleBasedTransliterator. * @author Alan Liu */ -class TransliterationRuleSet { +class U_I18N_API TransliterationRuleSet { /** * Vector of rules, in the order added. This is used while the * rule set is getting built. After that, freeze() reorders and diff --git a/icu4c/source/i18n/strmatch.cpp b/icu4c/source/i18n/strmatch.cpp index 4d21def96a9..8e7bfb46491 100644 --- a/icu4c/source/i18n/strmatch.cpp +++ b/icu4c/source/i18n/strmatch.cpp @@ -19,7 +19,7 @@ StringMatcher::StringMatcher(const UnicodeString& theString, int32_t limit, int32_t segmentNum, const TransliterationRuleData& theData) : - data(theData), + data(&theData), segmentNumber(segmentNum), matchStart(-1), matchLimit(-1) @@ -79,7 +79,7 @@ UMatchDegree StringMatcher::matches(const Replaceable& text, // Match in the reverse direction for (i=pattern.length()-1; i>=0; --i) { UChar keyChar = pattern.charAt(i); - UnicodeMatcher* subm = data.lookupMatcher(keyChar); + UnicodeMatcher* subm = data->lookupMatcher(keyChar); if (subm == 0) { if (cursor > limit && keyChar == text.charAt(cursor)) { @@ -110,7 +110,7 @@ UMatchDegree StringMatcher::matches(const Replaceable& text, return U_PARTIAL_MATCH; } UChar keyChar = pattern.charAt(i); - UnicodeMatcher* subm = data.lookupMatcher(keyChar); + UnicodeMatcher* subm = data->lookupMatcher(keyChar); if (subm == 0) { // Don't need the cursor < limit check if // incremental is TRUE (because it's done above); do need @@ -151,7 +151,7 @@ UnicodeString& StringMatcher::toPattern(UnicodeString& result, } for (int32_t i=0; ilookupMatcher(keyChar); if (m == 0) { ICU_Utility::appendToRule(result, keyChar, FALSE, escapeUnprintable, quoteBuf); } else { @@ -176,7 +176,7 @@ UBool StringMatcher::matchesIndexValue(uint8_t v) const { return TRUE; } UChar32 c = pattern.char32At(0); - const UnicodeMatcher *m = data.lookupMatcher(c); + const UnicodeMatcher *m = data->lookupMatcher(c); return (m == 0) ? ((c & 0xFF) == v) : m->matchesIndexValue(v); } @@ -219,14 +219,29 @@ UnicodeString& StringMatcher::toReplacerPattern(UnicodeString& rule, } /** - * Remove any match data. This must be called before performing a + * Remove any match info. This must be called before performing a * set of matches with this segment. */ void StringMatcher::resetMatch() { matchStart = matchLimit = -1; } +/** + * Implement UnicodeFunctor + */ +void StringMatcher::setData(const TransliterationRuleData* d) { + data = d; + int32_t i = 0; + while (ilookup(c); + if (f != NULL) { + f->setData(data); + } + i += UTF_CHAR_LENGTH(c); + } +} + U_NAMESPACE_END //eof - diff --git a/icu4c/source/i18n/strmatch.h b/icu4c/source/i18n/strmatch.h index 8243b0ef18c..148802323c1 100644 --- a/icu4c/source/i18n/strmatch.h +++ b/icu4c/source/i18n/strmatch.h @@ -96,6 +96,11 @@ class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public Unico */ virtual UBool matchesIndexValue(uint8_t v) const; + /** + * Implement UnicodeFunctor + */ + virtual void setData(const TransliterationRuleData*); + /** * Replace characters in 'text' from 'start' to 'limit' with the * output text of this object. Update the 'cursor' parameter to @@ -151,7 +156,7 @@ class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public Unico * Context object that maps stand-ins to matcher and replacer * objects. */ - const TransliterationRuleData& data; + const TransliterationRuleData* data; /** * The segment number, 1-based, or 0 if not a segment. diff --git a/icu4c/source/i18n/strrepl.cpp b/icu4c/source/i18n/strrepl.cpp index 1ecdb796c40..c38c3b1820d 100644 --- a/icu4c/source/i18n/strrepl.cpp +++ b/icu4c/source/i18n/strrepl.cpp @@ -257,6 +257,22 @@ UnicodeString& StringReplacer::toReplacerPattern(UnicodeString& rule, return rule; } +/** + * UnicodeFunctor API + */ +void StringReplacer::setData(const TransliterationRuleData* d) { + data = d; + int32_t i = 0; + while (ilookup(c); + if (f != NULL) { + f->setData(data); + } + i += UTF_CHAR_LENGTH(c); + } +} + U_NAMESPACE_END //eof diff --git a/icu4c/source/i18n/strrepl.h b/icu4c/source/i18n/strrepl.h index 964c3fc6b1f..b8537a01863 100644 --- a/icu4c/source/i18n/strrepl.h +++ b/icu4c/source/i18n/strrepl.h @@ -126,6 +126,11 @@ class StringReplacer : public UnicodeFunctor, public UnicodeReplacer { */ virtual UnicodeString& toReplacerPattern(UnicodeString& result, UBool escapeUnprintable) const; + + /** + * UnicodeFunctor API + */ + virtual void setData(const TransliterationRuleData*); }; U_NAMESPACE_END diff --git a/icu4c/source/i18n/unicode/unifilt.h b/icu4c/source/i18n/unicode/unifilt.h index 0e38264fd1e..f7a61949ee9 100644 --- a/icu4c/source/i18n/unicode/unifilt.h +++ b/icu4c/source/i18n/unicode/unifilt.h @@ -81,6 +81,11 @@ public: int32_t limit, UBool incremental); + /** + * UnicodeFunctor API. Nothing to do. + */ + virtual void setData(const TransliterationRuleData*) {} + protected: UnicodeFilter(); diff --git a/icu4c/source/i18n/unicode/unifunct.h b/icu4c/source/i18n/unicode/unifunct.h index 87c18c07a28..f58dd1be1a8 100644 --- a/icu4c/source/i18n/unicode/unifunct.h +++ b/icu4c/source/i18n/unicode/unifunct.h @@ -16,6 +16,7 @@ U_NAMESPACE_BEGIN class UnicodeMatcher; class UnicodeReplacer; +class TransliterationRuleData; /** * UnicodeFunctor is an abstract base class for objects @@ -97,6 +98,16 @@ class U_I18N_API UnicodeFunctor { */ virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); }; + /** + * Set the data object associated with this functor. The data + * object provides context for functor-to-standin mapping. This + * method is required when assigning a functor to a different data + * object. This function MAY GO AWAY later if the architecture is + * changed to pass data object pointers through the API. + * @draft + */ + virtual void setData(const TransliterationRuleData*) = 0; + protected: UnicodeFunctor();