mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-1779 set data object for entire tree of functors under a rule
X-SVN-Rev: 8131
This commit is contained in:
parent
1660406201
commit
c5d4c5ee6c
15 changed files with 121 additions and 21 deletions
|
@ -91,6 +91,13 @@ UnicodeString& FunctionReplacer::toReplacerPattern(UnicodeString& rule,
|
|||
return rule;
|
||||
}
|
||||
|
||||
/**
|
||||
* UnicodeFunctor API
|
||||
*/
|
||||
void FunctionReplacer::setData(const TransliterationRuleData* d) {
|
||||
replacer->setData(d);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
//eof
|
||||
|
|
|
@ -80,6 +80,11 @@ class FunctionReplacer : public UnicodeFunctor, public UnicodeReplacer {
|
|||
*/
|
||||
virtual UnicodeString& toReplacerPattern(UnicodeString& rule,
|
||||
UBool escapeUnprintable) const;
|
||||
|
||||
/**
|
||||
* UnicodeFunctor API
|
||||
*/
|
||||
virtual void setData(const TransliterationRuleData*);
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -114,6 +114,13 @@ UBool Quantifier::matchesIndexValue(uint8_t v) const {
|
|||
return (minCount == 0) || matcher->toMatcher()->matchesIndexValue(v);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement UnicodeFunctor
|
||||
*/
|
||||
void Quantifier::setData(const TransliterationRuleData* d) {
|
||||
matcher->setData(d);
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
//eof
|
||||
|
|
|
@ -56,6 +56,11 @@ class Quantifier : public UnicodeFunctor, public UnicodeMatcher {
|
|||
*/
|
||||
virtual UBool matchesIndexValue(uint8_t v) const;
|
||||
|
||||
/**
|
||||
* UnicodeFunctor API
|
||||
*/
|
||||
virtual void setData(const TransliterationRuleData*);
|
||||
|
||||
private:
|
||||
|
||||
static void appendNumber(UnicodeString& result, int32_t n);
|
||||
|
|
|
@ -68,16 +68,22 @@ TransliterationRuleData::~TransliterationRuleData() {
|
|||
}
|
||||
}
|
||||
|
||||
UnicodeFunctor*
|
||||
TransliterationRuleData::lookup(UChar32 standIn) const {
|
||||
int32_t i = standIn - variablesBase;
|
||||
return (i >= 0 && i < variablesLength) ? variables[i] : 0;
|
||||
}
|
||||
|
||||
UnicodeMatcher*
|
||||
TransliterationRuleData::lookupMatcher(UChar32 standIn) const {
|
||||
int32_t i = standIn - variablesBase;
|
||||
return (i >= 0 && i < variablesLength) ? variables[i]->toMatcher() : 0;
|
||||
UnicodeFunctor *f = lookup(standIn);
|
||||
return (f != 0) ? f->toMatcher() : 0;
|
||||
}
|
||||
|
||||
UnicodeReplacer*
|
||||
TransliterationRuleData::lookupReplacer(UChar32 standIn) const {
|
||||
int32_t i = standIn - variablesBase;
|
||||
return (i >= 0 && i < variablesLength) ? variables[i]->toReplacer() : 0;
|
||||
UnicodeFunctor *f = lookup(standIn);
|
||||
return (f != 0) ? f->toReplacer() : 0;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -35,7 +35,7 @@ class Hashtable;
|
|||
* data structure handles this. See the parsing code for more
|
||||
* details.
|
||||
*/
|
||||
class TransliterationRuleData {
|
||||
class U_I18N_API TransliterationRuleData {
|
||||
|
||||
public:
|
||||
|
||||
|
@ -88,15 +88,23 @@ public:
|
|||
|
||||
~TransliterationRuleData();
|
||||
|
||||
/**
|
||||
* Given a stand-in character, return the UnicodeFunctor that it
|
||||
* represents, or NULL if it doesn't represent anything.
|
||||
*/
|
||||
UnicodeFunctor* lookup(UChar32 standIn) const;
|
||||
|
||||
/**
|
||||
* Given a stand-in character, return the UnicodeMatcher that it
|
||||
* represents, or NULL.
|
||||
* represents, or NULL if it doesn't represent anything or if it
|
||||
* represents something that is not a matcher.
|
||||
*/
|
||||
UnicodeMatcher* lookupMatcher(UChar32 standIn) const;
|
||||
|
||||
/**
|
||||
* Given a stand-in character, return the UnicodeReplacer that it
|
||||
* represents, or NULL.
|
||||
* represents, or NULL if it doesn't represent anything or if it
|
||||
* represents something that is not a replacer.
|
||||
*/
|
||||
UnicodeReplacer* lookupReplacer(UChar32 standIn) const;
|
||||
};
|
||||
|
|
|
@ -486,6 +486,15 @@ UnicodeString& TransliterationRule::toRule(UnicodeString& rule,
|
|||
return rule;
|
||||
}
|
||||
|
||||
void TransliterationRule::setData(const TransliterationRuleData* d) {
|
||||
data = d;
|
||||
anteContext->setData(d);
|
||||
postContext->setData(d);
|
||||
key->setData(d);
|
||||
output->setData(d);
|
||||
// Don't have to do segments since they are in the context or key
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
//eof
|
||||
|
|
|
@ -204,7 +204,7 @@ public:
|
|||
* Change the data object that this rule belongs to. Used
|
||||
* internally by the TransliterationRuleData copy constructor.
|
||||
*/
|
||||
inline void setData(const TransliterationRuleData* data);
|
||||
void setData(const TransliterationRuleData* data);
|
||||
|
||||
/**
|
||||
* Return the preceding context length. This method is needed to
|
||||
|
@ -281,10 +281,6 @@ public:
|
|||
friend class StringMatcher;
|
||||
};
|
||||
|
||||
inline void TransliterationRule::setData(const TransliterationRuleData* d) {
|
||||
data = d;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif
|
||||
|
|
|
@ -23,7 +23,7 @@ class UnicodeString;
|
|||
* A set of rules for a <code>RuleBasedTransliterator</code>.
|
||||
* @author Alan Liu
|
||||
*/
|
||||
class TransliterationRuleSet {
|
||||
class U_I18N_API TransliterationRuleSet {
|
||||
/**
|
||||
* Vector of rules, in the order added. This is used while the
|
||||
* rule set is getting built. After that, freeze() reorders and
|
||||
|
|
|
@ -19,7 +19,7 @@ StringMatcher::StringMatcher(const UnicodeString& theString,
|
|||
int32_t limit,
|
||||
int32_t segmentNum,
|
||||
const TransliterationRuleData& theData) :
|
||||
data(theData),
|
||||
data(&theData),
|
||||
segmentNumber(segmentNum),
|
||||
matchStart(-1),
|
||||
matchLimit(-1)
|
||||
|
@ -79,7 +79,7 @@ UMatchDegree StringMatcher::matches(const Replaceable& text,
|
|||
// Match in the reverse direction
|
||||
for (i=pattern.length()-1; i>=0; --i) {
|
||||
UChar keyChar = pattern.charAt(i);
|
||||
UnicodeMatcher* subm = data.lookupMatcher(keyChar);
|
||||
UnicodeMatcher* subm = data->lookupMatcher(keyChar);
|
||||
if (subm == 0) {
|
||||
if (cursor > limit &&
|
||||
keyChar == text.charAt(cursor)) {
|
||||
|
@ -110,7 +110,7 @@ UMatchDegree StringMatcher::matches(const Replaceable& text,
|
|||
return U_PARTIAL_MATCH;
|
||||
}
|
||||
UChar keyChar = pattern.charAt(i);
|
||||
UnicodeMatcher* subm = data.lookupMatcher(keyChar);
|
||||
UnicodeMatcher* subm = data->lookupMatcher(keyChar);
|
||||
if (subm == 0) {
|
||||
// Don't need the cursor < limit check if
|
||||
// incremental is TRUE (because it's done above); do need
|
||||
|
@ -151,7 +151,7 @@ UnicodeString& StringMatcher::toPattern(UnicodeString& result,
|
|||
}
|
||||
for (int32_t i=0; i<pattern.length(); ++i) {
|
||||
UChar keyChar = pattern.charAt(i);
|
||||
const UnicodeMatcher* m = data.lookupMatcher(keyChar);
|
||||
const UnicodeMatcher* m = data->lookupMatcher(keyChar);
|
||||
if (m == 0) {
|
||||
ICU_Utility::appendToRule(result, keyChar, FALSE, escapeUnprintable, quoteBuf);
|
||||
} else {
|
||||
|
@ -176,7 +176,7 @@ UBool StringMatcher::matchesIndexValue(uint8_t v) const {
|
|||
return TRUE;
|
||||
}
|
||||
UChar32 c = pattern.char32At(0);
|
||||
const UnicodeMatcher *m = data.lookupMatcher(c);
|
||||
const UnicodeMatcher *m = data->lookupMatcher(c);
|
||||
return (m == 0) ? ((c & 0xFF) == v) : m->matchesIndexValue(v);
|
||||
}
|
||||
|
||||
|
@ -219,14 +219,29 @@ UnicodeString& StringMatcher::toReplacerPattern(UnicodeString& rule,
|
|||
}
|
||||
|
||||
/**
|
||||
* Remove any match data. This must be called before performing a
|
||||
* Remove any match info. This must be called before performing a
|
||||
* set of matches with this segment.
|
||||
*/
|
||||
void StringMatcher::resetMatch() {
|
||||
matchStart = matchLimit = -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement UnicodeFunctor
|
||||
*/
|
||||
void StringMatcher::setData(const TransliterationRuleData* d) {
|
||||
data = d;
|
||||
int32_t i = 0;
|
||||
while (i<pattern.length()) {
|
||||
UChar32 c = pattern.char32At(i);
|
||||
UnicodeFunctor* f = data->lookup(c);
|
||||
if (f != NULL) {
|
||||
f->setData(data);
|
||||
}
|
||||
i += UTF_CHAR_LENGTH(c);
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
//eof
|
||||
|
||||
|
|
|
@ -96,6 +96,11 @@ class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public Unico
|
|||
*/
|
||||
virtual UBool matchesIndexValue(uint8_t v) const;
|
||||
|
||||
/**
|
||||
* Implement UnicodeFunctor
|
||||
*/
|
||||
virtual void setData(const TransliterationRuleData*);
|
||||
|
||||
/**
|
||||
* Replace characters in 'text' from 'start' to 'limit' with the
|
||||
* output text of this object. Update the 'cursor' parameter to
|
||||
|
@ -151,7 +156,7 @@ class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public Unico
|
|||
* Context object that maps stand-ins to matcher and replacer
|
||||
* objects.
|
||||
*/
|
||||
const TransliterationRuleData& data;
|
||||
const TransliterationRuleData* data;
|
||||
|
||||
/**
|
||||
* The segment number, 1-based, or 0 if not a segment.
|
||||
|
|
|
@ -257,6 +257,22 @@ UnicodeString& StringReplacer::toReplacerPattern(UnicodeString& rule,
|
|||
return rule;
|
||||
}
|
||||
|
||||
/**
|
||||
* UnicodeFunctor API
|
||||
*/
|
||||
void StringReplacer::setData(const TransliterationRuleData* d) {
|
||||
data = d;
|
||||
int32_t i = 0;
|
||||
while (i<output.length()) {
|
||||
UChar32 c = output.char32At(i);
|
||||
UnicodeFunctor* f = data->lookup(c);
|
||||
if (f != NULL) {
|
||||
f->setData(data);
|
||||
}
|
||||
i += UTF_CHAR_LENGTH(c);
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
//eof
|
||||
|
|
|
@ -126,6 +126,11 @@ class StringReplacer : public UnicodeFunctor, public UnicodeReplacer {
|
|||
*/
|
||||
virtual UnicodeString& toReplacerPattern(UnicodeString& result,
|
||||
UBool escapeUnprintable) const;
|
||||
|
||||
/**
|
||||
* UnicodeFunctor API
|
||||
*/
|
||||
virtual void setData(const TransliterationRuleData*);
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -81,6 +81,11 @@ public:
|
|||
int32_t limit,
|
||||
UBool incremental);
|
||||
|
||||
/**
|
||||
* UnicodeFunctor API. Nothing to do.
|
||||
*/
|
||||
virtual void setData(const TransliterationRuleData*) {}
|
||||
|
||||
protected:
|
||||
|
||||
UnicodeFilter();
|
||||
|
|
|
@ -16,6 +16,7 @@ U_NAMESPACE_BEGIN
|
|||
|
||||
class UnicodeMatcher;
|
||||
class UnicodeReplacer;
|
||||
class TransliterationRuleData;
|
||||
|
||||
/**
|
||||
* <code>UnicodeFunctor</code> is an abstract base class for objects
|
||||
|
@ -97,6 +98,16 @@ class U_I18N_API UnicodeFunctor {
|
|||
*/
|
||||
virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); };
|
||||
|
||||
/**
|
||||
* Set the data object associated with this functor. The data
|
||||
* object provides context for functor-to-standin mapping. This
|
||||
* method is required when assigning a functor to a different data
|
||||
* object. This function MAY GO AWAY later if the architecture is
|
||||
* changed to pass data object pointers through the API.
|
||||
* @draft
|
||||
*/
|
||||
virtual void setData(const TransliterationRuleData*) = 0;
|
||||
|
||||
protected:
|
||||
|
||||
UnicodeFunctor();
|
||||
|
|
Loading…
Add table
Reference in a new issue