From d1773b2571e5f77125041e336d4c74169ca035ec Mon Sep 17 00:00:00 2001 From: Alan Liu Date: Fri, 28 Jun 2002 21:13:54 +0000 Subject: [PATCH] ICU-1434 initial implementation of source/target set API X-SVN-Rev: 8971 --- icu4c/source/common/unicode/unifilt.h | 7 ++++ icu4c/source/common/unicode/unimatch.h | 8 +++++ icu4c/source/common/unicode/uniset.h | 9 +++++ icu4c/source/common/unifilt.cpp | 4 +++ icu4c/source/common/uniset.cpp | 7 ++++ icu4c/source/i18n/cpdtrans.cpp | 35 ++++++++++++++++++ icu4c/source/i18n/funcrepl.cpp | 9 +++++ icu4c/source/i18n/funcrepl.h | 5 +++ icu4c/source/i18n/quant.cpp | 9 +++++ icu4c/source/i18n/quant.h | 5 +++ icu4c/source/i18n/rbt.cpp | 14 ++++++++ icu4c/source/i18n/rbt_rule.cpp | 26 ++++++++++++++ icu4c/source/i18n/rbt_rule.h | 13 +++++++ icu4c/source/i18n/rbt_set.cpp | 21 +++++++++++ icu4c/source/i18n/rbt_set.h | 8 +++++ icu4c/source/i18n/strmatch.cpp | 30 ++++++++++++++++ icu4c/source/i18n/strmatch.h | 12 +++++++ icu4c/source/i18n/strrepl.cpp | 17 +++++++++ icu4c/source/i18n/strrepl.h | 5 +++ icu4c/source/i18n/translit.cpp | 30 ++++++++++++++++ icu4c/source/i18n/unicode/cpdtrans.h | 18 ++++++++++ icu4c/source/i18n/unicode/rbt.h | 14 ++++++++ icu4c/source/i18n/unicode/translit.h | 45 +++++++++++++++++++++++ icu4c/source/i18n/unicode/unirepl.h | 8 +++++ icu4c/source/test/intltest/cpdtrtst.cpp | 10 ++++-- icu4c/source/test/intltest/transtst.cpp | 48 +++++++++++++++++++++++++ icu4c/source/test/intltest/transtst.h | 2 ++ 27 files changed, 416 insertions(+), 3 deletions(-) diff --git a/icu4c/source/common/unicode/unifilt.h b/icu4c/source/common/unicode/unifilt.h index d9311d5ba61..da14e0ff47f 100644 --- a/icu4c/source/common/unicode/unifilt.h +++ b/icu4c/source/common/unicode/unifilt.h @@ -95,6 +95,13 @@ public: */ virtual void setData(const TransliterationRuleData*) {} + /** + * Stubbed out implementation of UnicodeMatcher API. + * @param toUnionTo the set into which to union the source characters + * @return a reference to toUnionTo + */ + virtual void addMatchSetTo(UnicodeSet& toUnionTo) const; + protected: UnicodeFilter(); diff --git a/icu4c/source/common/unicode/unimatch.h b/icu4c/source/common/unicode/unimatch.h index 3bcfcdfa51d..8040456274c 100644 --- a/icu4c/source/common/unicode/unimatch.h +++ b/icu4c/source/common/unicode/unimatch.h @@ -14,6 +14,7 @@ U_NAMESPACE_BEGIN class Replaceable; class UnicodeString; +class UnicodeSet; /** * Constants returned by UnicodeMatcher::matches() @@ -128,6 +129,13 @@ public: * indexing. */ virtual UBool matchesIndexValue(uint8_t v) const = 0; + + /** + * Union the set of all characters that may be matched by this object + * into the given set. + * @param toUnionTo the set into which to union the source characters + */ + virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0; }; U_NAMESPACE_END diff --git a/icu4c/source/common/unicode/uniset.h b/icu4c/source/common/unicode/uniset.h index 87d86989c8a..45ec6d2bd65 100644 --- a/icu4c/source/common/unicode/uniset.h +++ b/icu4c/source/common/unicode/uniset.h @@ -536,6 +536,15 @@ public: const UnicodeString& s); public: + + /** + * Implementation of UnicodeMatcher API. Union the set of all + * characters that may be matched by this object into the given + * set. + * @param toUnionTo the set into which to union the source characters + */ + void addMatchSetTo(UnicodeSet& toUnionTo) const; + /** * Returns the index of the given character within this set, where * the set is ordered by ascending code point. If the character diff --git a/icu4c/source/common/unifilt.cpp b/icu4c/source/common/unifilt.cpp index e8aeffbc350..192453b1d9c 100644 --- a/icu4c/source/common/unifilt.cpp +++ b/icu4c/source/common/unifilt.cpp @@ -62,6 +62,10 @@ UBool UnicodeFilter::matchesIndexValue(uint8_t v) const { return FALSE; } +// Stub this out for filters that do not implement this +void UnicodeFilter::addMatchSetTo(UnicodeSet& toUnionTo) const { +} + U_NAMESPACE_END //eof diff --git a/icu4c/source/common/uniset.cpp b/icu4c/source/common/uniset.cpp index ada4cb699e3..910d5d9812a 100644 --- a/icu4c/source/common/uniset.cpp +++ b/icu4c/source/common/uniset.cpp @@ -900,6 +900,13 @@ int32_t UnicodeSet::matchRest(const Replaceable& text, return maxLen; } +/** + * Implement of UnicodeMatcher + */ +void UnicodeSet::addMatchSetTo(UnicodeSet& toUnionTo) const { + toUnionTo.addAll(*this); +} + /** * Returns the index of the given character within this set, where * the set is ordered by ascending code point. If the character diff --git a/icu4c/source/i18n/cpdtrans.cpp b/icu4c/source/i18n/cpdtrans.cpp index aa6b71c7b3c..dd6bb27c02f 100644 --- a/icu4c/source/i18n/cpdtrans.cpp +++ b/icu4c/source/i18n/cpdtrans.cpp @@ -366,6 +366,41 @@ UnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource, return rulesSource; } +/** + * Implement Transliterator framework + */ +void CompoundTransliterator::handleGetSourceSet(UnicodeSet& result) const { + UnicodeSet set; + result.clear(); + for (int32_t i=0; igetSourceSet(set)); + // Take the example of Hiragana-Latin. This is really + // Hiragana-Katakana; Katakana-Latin. The source set of + // these two is roughly [:Hiragana:] and [:Katakana:]. + // But the source set for the entire transliterator is + // actually [:Hiragana:] ONLY -- that is, the first + // non-empty source set. + + // This is a heuristic, and not 100% reliable. + if (!result.isEmpty()) { + break; + } + } +} + +/** + * Override Transliterator framework + */ +UnicodeSet& CompoundTransliterator::getTargetSet(UnicodeSet& result) const { + UnicodeSet set; + result.clear(); + for (int32_t i=0; igetTargetSet(set)); + } + return result; +} + /** * Implements {@link Transliterator#handleTransliterate}. */ diff --git a/icu4c/source/i18n/funcrepl.cpp b/icu4c/source/i18n/funcrepl.cpp index 2b13654903d..aa9f14c8887 100644 --- a/icu4c/source/i18n/funcrepl.cpp +++ b/icu4c/source/i18n/funcrepl.cpp @@ -9,6 +9,7 @@ */ #include "funcrepl.h" #include "unicode/translit.h" +#include "unicode/uniset.h" static const UChar AMPERSAND = 38; // '&' static const UChar OPEN[] = {40,32,0}; // "( " @@ -91,6 +92,14 @@ UnicodeString& FunctionReplacer::toReplacerPattern(UnicodeString& rule, return rule; } +/** + * Implement UnicodeReplacer + */ +void FunctionReplacer::addReplacementSetTo(UnicodeSet& toUnionTo) const { + UnicodeSet set; + toUnionTo.addAll(translit->getTargetSet(set)); +} + /** * UnicodeFunctor API */ diff --git a/icu4c/source/i18n/funcrepl.h b/icu4c/source/i18n/funcrepl.h index 3a882f64291..f64d3b184ec 100644 --- a/icu4c/source/i18n/funcrepl.h +++ b/icu4c/source/i18n/funcrepl.h @@ -81,6 +81,11 @@ class FunctionReplacer : public UnicodeFunctor, public UnicodeReplacer { virtual UnicodeString& toReplacerPattern(UnicodeString& rule, UBool escapeUnprintable) const; + /** + * Implement UnicodeReplacer + */ + virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const; + /** * UnicodeFunctor API */ diff --git a/icu4c/source/i18n/quant.cpp b/icu4c/source/i18n/quant.cpp index 5a4702603c7..7d936380c92 100644 --- a/icu4c/source/i18n/quant.cpp +++ b/icu4c/source/i18n/quant.cpp @@ -114,6 +114,15 @@ UBool Quantifier::matchesIndexValue(uint8_t v) const { return (minCount == 0) || matcher->toMatcher()->matchesIndexValue(v); } +/** + * Implement UnicodeMatcher + */ +void Quantifier::addMatchSetTo(UnicodeSet& toUnionTo) const { + if (maxCount > 0) { + matcher->toMatcher()->addMatchSetTo(toUnionTo); + } +} + /** * Implement UnicodeFunctor */ diff --git a/icu4c/source/i18n/quant.h b/icu4c/source/i18n/quant.h index 6720f94777f..cb50e4df871 100644 --- a/icu4c/source/i18n/quant.h +++ b/icu4c/source/i18n/quant.h @@ -56,6 +56,11 @@ class Quantifier : public UnicodeFunctor, public UnicodeMatcher { */ virtual UBool matchesIndexValue(uint8_t v) const; + /** + * Implement UnicodeMatcher + */ + virtual void addMatchSetTo(UnicodeSet& toUnionTo) const; + /** * UnicodeFunctor API */ diff --git a/icu4c/source/i18n/rbt.cpp b/icu4c/source/i18n/rbt.cpp index e2ed1b698ac..e40f0fce48e 100644 --- a/icu4c/source/i18n/rbt.cpp +++ b/icu4c/source/i18n/rbt.cpp @@ -151,5 +151,19 @@ UnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource, return data->ruleSet.toRules(rulesSource, escapeUnprintable); } +/** + * Implement Transliterator framework + */ +void RuleBasedTransliterator::handleGetSourceSet(UnicodeSet& result) const { + data->ruleSet.getSourceTargetSet(result, FALSE); +} + +/** + * Override Transliterator framework + */ +UnicodeSet& RuleBasedTransliterator::getTargetSet(UnicodeSet& result) const { + return data->ruleSet.getSourceTargetSet(result, TRUE); +} + U_NAMESPACE_END diff --git a/icu4c/source/i18n/rbt_rule.cpp b/icu4c/source/i18n/rbt_rule.cpp index 6ee55234ae0..102b41ea99a 100644 --- a/icu4c/source/i18n/rbt_rule.cpp +++ b/icu4c/source/i18n/rbt_rule.cpp @@ -494,6 +494,32 @@ void TransliterationRule::setData(const TransliterationRuleData* d) { // Don't have to do segments since they are in the context or key } +/** + * Union the set of all characters that may be modified by this rule + * into the given set. + */ +void TransliterationRule::addSourceSetTo(UnicodeSet& toUnionTo) const { + int32_t limit = anteContextLength + keyLength; + for (int32_t i=anteContextLength; ilookupMatcher(ch); + if (matcher == NULL) { + toUnionTo.add(ch); + } else { + matcher->addMatchSetTo(toUnionTo); + } + } +} + +/** + * Union the set of all characters that may be emitted by this rule + * into the given set. + */ +void TransliterationRule::addTargetSetTo(UnicodeSet& toUnionTo) const { + output->toReplacer()->addReplacementSetTo(toUnionTo); +} + U_NAMESPACE_END //eof diff --git a/icu4c/source/i18n/rbt_rule.h b/icu4c/source/i18n/rbt_rule.h index ed78836105b..136c848e86c 100644 --- a/icu4c/source/i18n/rbt_rule.h +++ b/icu4c/source/i18n/rbt_rule.h @@ -268,6 +268,19 @@ public: */ virtual UnicodeString& toRule(UnicodeString& pat, UBool escapeUnprintable) const; + + /** + * Union the set of all characters that may be modified by this rule + * into the given set. + */ + void addSourceSetTo(UnicodeSet& toUnionTo) const; + + /** + * Union the set of all characters that may be emitted by this rule + * into the given set. + */ + void addTargetSetTo(UnicodeSet& toUnionTo) const; + private: friend class StringMatcher; diff --git a/icu4c/source/i18n/rbt_set.cpp b/icu4c/source/i18n/rbt_set.cpp index ecdd04fada4..c86dc0e0dac 100644 --- a/icu4c/source/i18n/rbt_set.cpp +++ b/icu4c/source/i18n/rbt_set.cpp @@ -10,6 +10,7 @@ #include "rbt_set.h" #include "rbt_rule.h" #include "unicode/unistr.h" +#include "unicode/uniset.h" #include "cmemory.h" U_CDECL_BEGIN @@ -404,4 +405,24 @@ UnicodeString& TransliterationRuleSet::toRules(UnicodeString& ruleSource, return ruleSource; } +/** + * Return the set of all characters that may be modified + * (getTarget=false) or emitted (getTarget=true) by this set. + */ +UnicodeSet& TransliterationRuleSet::getSourceTargetSet(UnicodeSet& result, + UBool getTarget) const { + result.clear(); + int32_t count = ruleVector->size(); + for (int32_t i=0; ielementAt(i); + if (getTarget) { + r->addTargetSetTo(result); + } else { + r->addSourceSetTo(result); + } + } + return result; +} + U_NAMESPACE_END diff --git a/icu4c/source/i18n/rbt_set.h b/icu4c/source/i18n/rbt_set.h index 9c0f8b7eba7..b3e4e0a153d 100644 --- a/icu4c/source/i18n/rbt_set.h +++ b/icu4c/source/i18n/rbt_set.h @@ -20,6 +20,7 @@ class TransliterationRule; class TransliterationRuleData; class UnicodeFilter; class UnicodeString; +class UnicodeSet; /** * A set of rules for a RuleBasedTransliterator. @@ -132,6 +133,13 @@ public: */ virtual UnicodeString& toRules(UnicodeString& result, UBool escapeUnprintable) const; + + /** + * Return the set of all characters that may be modified + * (getTarget=false) or emitted (getTarget=true) by this set. + */ + UnicodeSet& getSourceTargetSet(UnicodeSet& result, + UBool getTarget) const; }; U_NAMESPACE_END diff --git a/icu4c/source/i18n/strmatch.cpp b/icu4c/source/i18n/strmatch.cpp index 8e7bfb46491..3fceb8dbd25 100644 --- a/icu4c/source/i18n/strmatch.cpp +++ b/icu4c/source/i18n/strmatch.cpp @@ -9,6 +9,7 @@ #include "strmatch.h" #include "rbt_data.h" #include "util.h" +#include "unicode/uniset.h" U_NAMESPACE_BEGIN @@ -180,6 +181,22 @@ UBool StringMatcher::matchesIndexValue(uint8_t v) const { return (m == 0) ? ((c & 0xFF) == v) : m->matchesIndexValue(v); } +/** + * Implement UnicodeMatcher + */ +void StringMatcher::addMatchSetTo(UnicodeSet& toUnionTo) const { + UChar32 ch; + for (int32_t i=0; ilookupMatcher(ch); + if (matcher == NULL) { + toUnionTo.add(ch); + } else { + matcher->addMatchSetTo(toUnionTo); + } + } +} + /** * UnicodeReplacer API */ @@ -226,6 +243,19 @@ UnicodeString& StringMatcher::toReplacerPattern(UnicodeString& rule, matchStart = matchLimit = -1; } +/** + * Union the set of all characters that may output by this object + * into the given set. + * @param toUnionTo the set into which to union the output characters + */ +void StringMatcher::addReplacementSetTo(UnicodeSet& toUnionTo) const { + // The output of this replacer varies; it is the source text between + // matchStart and matchLimit. Since this varies depending on the + // input text, we can't compute it here. We can either do nothing + // or we can add ALL characters to the set. It's probably more useful + // to do nothing. +} + /** * Implement UnicodeFunctor */ diff --git a/icu4c/source/i18n/strmatch.h b/icu4c/source/i18n/strmatch.h index 148802323c1..296b9c08d46 100644 --- a/icu4c/source/i18n/strmatch.h +++ b/icu4c/source/i18n/strmatch.h @@ -96,6 +96,11 @@ class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public Unico */ virtual UBool matchesIndexValue(uint8_t v) const; + /** + * Implement UnicodeMatcher + */ + virtual void addMatchSetTo(UnicodeSet& toUnionTo) const; + /** * Implement UnicodeFunctor */ @@ -145,6 +150,13 @@ class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public Unico */ void resetMatch(); + /** + * Union the set of all characters that may output by this object + * into the given set. + * @param toUnionTo the set into which to union the output characters + */ + virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const; + private: /** diff --git a/icu4c/source/i18n/strrepl.cpp b/icu4c/source/i18n/strrepl.cpp index c38c3b1820d..577760ea93a 100644 --- a/icu4c/source/i18n/strrepl.cpp +++ b/icu4c/source/i18n/strrepl.cpp @@ -11,6 +11,7 @@ #include "strrepl.h" #include "rbt_data.h" #include "util.h" +#include "unicode/uniset.h" U_NAMESPACE_BEGIN @@ -257,6 +258,22 @@ UnicodeString& StringReplacer::toReplacerPattern(UnicodeString& rule, return rule; } +/** + * Implement UnicodeReplacer + */ +void StringReplacer::addReplacementSetTo(UnicodeSet& toUnionTo) const { + UChar32 ch; + for (int32_t i=0; ilookupReplacer(ch); + if (r == NULL) { + toUnionTo.add(ch); + } else { + r->addReplacementSetTo(toUnionTo); + } + } +} + /** * UnicodeFunctor API */ diff --git a/icu4c/source/i18n/strrepl.h b/icu4c/source/i18n/strrepl.h index b8537a01863..c6b62e3a17a 100644 --- a/icu4c/source/i18n/strrepl.h +++ b/icu4c/source/i18n/strrepl.h @@ -127,6 +127,11 @@ class StringReplacer : public UnicodeFunctor, public UnicodeReplacer { virtual UnicodeString& toReplacerPattern(UnicodeString& result, UBool escapeUnprintable) const; + /** + * Implement UnicodeReplacer + */ + virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const; + /** * UnicodeFunctor API */ diff --git a/icu4c/source/i18n/translit.cpp b/icu4c/source/i18n/translit.cpp index 910701cbff2..92e22ec61ec 100644 --- a/icu4c/source/i18n/translit.cpp +++ b/icu4c/source/i18n/translit.cpp @@ -1057,6 +1057,36 @@ UnicodeString& Transliterator::toRules(UnicodeString& rulesSource, return rulesSource; } +UnicodeSet& Transliterator::getSourceSet(UnicodeSet& result) const { + handleGetSourceSet(result); + if (filter != NULL) { + UnicodeSet* filterSet; + UBool deleteFilterSet = FALSE; + // Most, but not all filters will be UnicodeSets. Optimize for + // the high-runner case. + if (filter->getDynamicClassID() == UnicodeSet::getStaticClassID()) { + filterSet = (UnicodeSet*) filter; + } else { + filterSet = new UnicodeSet(); + deleteFilterSet = TRUE; + filter->addMatchSetTo(*filterSet); + } + result.retainAll(*filterSet); + if (deleteFilterSet) { + delete filterSet; + } + } + return result; +} + +void Transliterator::handleGetSourceSet(UnicodeSet& result) const { + result.clear(); +} + +UnicodeSet& Transliterator::getTargetSet(UnicodeSet& result) const { + return result.clear(); +} + // For public consumption void Transliterator::registerFactory(const UnicodeString& id, Transliterator::Factory factory, diff --git a/icu4c/source/i18n/unicode/cpdtrans.h b/icu4c/source/i18n/unicode/cpdtrans.h index 5e8452b8f25..5104a2643cf 100644 --- a/icu4c/source/i18n/unicode/cpdtrans.h +++ b/icu4c/source/i18n/unicode/cpdtrans.h @@ -159,6 +159,24 @@ public: virtual UnicodeString& toRules(UnicodeString& result, UBool escapeUnprintable) const; + protected: + /** + * Implement Transliterator framework + */ + virtual void handleGetSourceSet(UnicodeSet& result) const; + + public: + /** + * Override Transliterator framework + */ + virtual UnicodeSet& getTargetSet(UnicodeSet& result) const; + +// handleTransliterate should be protected, but was declared public before ICU 2.2. +// We do not have a separate deprecation date for this method since the entire class +// will become internal after 2002-sep-30. +#ifndef U_USE_DEPRECATED_TRANSLITERATOR_API + protected: +#endif /** * Implements {@link Transliterator#handleTransliterate}. * @deprecated To be removed after 2002-sep-30. diff --git a/icu4c/source/i18n/unicode/rbt.h b/icu4c/source/i18n/unicode/rbt.h index 67d11cf21a0..4be13729d70 100644 --- a/icu4c/source/i18n/unicode/rbt.h +++ b/icu4c/source/i18n/unicode/rbt.h @@ -382,6 +382,7 @@ public: */ Transliterator* clone(void) const; + protected: /** * Implements {@link Transliterator#handleTransliterate}. * @deprecated To be removed after 2002-sep-30. @@ -389,6 +390,7 @@ public: virtual void handleTransliterate(Replaceable& text, UTransPosition& offsets, UBool isIncremental) const; + public: /** * Return a representation of this transliterator as source rules. * These rules will produce an equivalent transliterator if used @@ -404,6 +406,18 @@ public: virtual UnicodeString& toRules(UnicodeString& result, UBool escapeUnprintable) const; + protected: + /** + * Implement Transliterator framework + */ + virtual void handleGetSourceSet(UnicodeSet& result) const; + + public: + /** + * Override Transliterator framework + */ + virtual UnicodeSet& getTargetSet(UnicodeSet& result) const; + /** * Return the class ID for this class. This is useful only for * comparing to a return value from getDynamicClassID(). For example: diff --git a/icu4c/source/i18n/unicode/translit.h b/icu4c/source/i18n/unicode/translit.h index a23556083d5..31ed5d52be0 100644 --- a/icu4c/source/i18n/unicode/translit.h +++ b/icu4c/source/i18n/unicode/translit.h @@ -780,6 +780,51 @@ public: virtual UnicodeString& toRules(UnicodeString& result, UBool escapeUnprintable) const; + /** + * Returns the set of all characters that may be modified in the + * input text by this Transliterator. This incorporates this + * object's current filter; if the filter is changed, the return + * value of this function will change. The default implementation + * returns an empty set. Some subclasses may override {@link + * #handleGetSourceSet} to return a more precise result. The + * return result is approximate in any case and is intended for + * use by tests, tools, or utilities. + * @param result receives result set; previous contents lost + * @return a reference to result + * @see #getTargetSet + * @see #handleGetSourceSet + */ + UnicodeSet& getSourceSet(UnicodeSet& result) const; + + /** + * Framework method that returns the set of all characters that + * may be modified in the input text by this Transliterator, + * ignoring the effect of this object's filter. The base class + * implementation returns the empty set. Subclasses that wish to + * implement this should override this method. + * @return the set of characters that this transliterator may + * modify. The set may be modified, so subclasses should return a + * newly-created object. + * @param result receives result set; previous contents lost + * @see #getSourceSet + * @see #getTargetSet + */ + virtual void handleGetSourceSet(UnicodeSet& result) const; + + /** + * Returns the set of all characters that may be generated as + * replacement text by this transliterator. The default + * implementation returns the empty set. Some subclasses may + * override this method to return a more precise result. The + * return result is approximate in any case and is intended for + * use by tests, tools, or utilities requiring such + * meta-information. + * @param result receives result set; previous contents lost + * @return a reference to result + * @see #getTargetSet + */ + virtual UnicodeSet& getTargetSet(UnicodeSet& result) const; + public: /** diff --git a/icu4c/source/i18n/unicode/unirepl.h b/icu4c/source/i18n/unicode/unirepl.h index 563bb4daac4..9cc6aba7d34 100644 --- a/icu4c/source/i18n/unicode/unirepl.h +++ b/icu4c/source/i18n/unicode/unirepl.h @@ -16,6 +16,7 @@ U_NAMESPACE_BEGIN class Replaceable; class UnicodeString; +class UnicodeSet; /** * UnicodeReplacer defines a protocol for objects that @@ -67,6 +68,13 @@ class U_I18N_API UnicodeReplacer /* not : public UObject because this is an inte */ virtual UnicodeString& toReplacerPattern(UnicodeString& result, UBool escapeUnprintable) const = 0; + + /** + * Union the set of all characters that may output by this object + * into the given set. + * @param toUnionTo the set into which to union the output characters + */ + virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const = 0; }; U_NAMESPACE_END diff --git a/icu4c/source/test/intltest/cpdtrtst.cpp b/icu4c/source/test/intltest/cpdtrtst.cpp index e513c3fb152..f0884f40c57 100644 --- a/icu4c/source/test/intltest/cpdtrtst.cpp +++ b/icu4c/source/test/intltest/cpdtrtst.cpp @@ -331,6 +331,9 @@ void CompoundTransliteratorTest::TestTransliterate(){ if(U_FAILURE(status)){ errln("CompoundTransliterator construction failed"); }else { +#if 0 + // handleTransliterate is a protected method that was erroneously made + // public. It is not public API that needs to be tested. UnicodeString s("abcabc"); expect(*ct1, s, s); UTransPosition index = { 0, 0, 0, 0 }; @@ -343,7 +346,7 @@ void CompoundTransliteratorTest::TestTransliterate(){ UnicodeString rsource3(s); ct1->handleTransliterate(rsource3, index, TRUE); expectAux(ct1->getID() + ":String, index(1,2,3), incremental=TRUE", rsource3 + "->" + rsource3, rsource3==expectedResult, expectedResult); - +#endif } delete ct1; UnicodeString Data[]={ @@ -391,7 +394,7 @@ void CompoundTransliteratorTest::expect(const CompoundTransliterator& t, t.transliterate(rsource); expectAux(t.getID() + ":Replaceable", source + "->" + rsource, rsource==expectedResult, expectedResult); - // Test handleTransliterate (incremental) transliteration -- + // Test transliterate (incremental) transliteration -- rsource.remove(); rsource.append(source); UTransPosition index; @@ -399,7 +402,8 @@ void CompoundTransliteratorTest::expect(const CompoundTransliterator& t, index.contextLimit = source.length(); index.start = 0; index.limit = source.length(); - t.handleTransliterate(rsource, index, TRUE); + UErrorCode ec = U_ZERO_ERROR; + t.transliterate(rsource, index, ec); t.finishTransliteration(rsource,index); expectAux(t.getID() + ":handleTransliterate ", source + "->" + rsource, rsource==expectedResult, expectedResult); diff --git a/icu4c/source/test/intltest/transtst.cpp b/icu4c/source/test/intltest/transtst.cpp index a0aceaa0c6d..029af3f26a8 100644 --- a/icu4c/source/test/intltest/transtst.cpp +++ b/icu4c/source/test/intltest/transtst.cpp @@ -159,6 +159,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec, TESTCASE(69,TestMulticharStringSet); TESTCASE(70,TestUserFunction); TESTCASE(71,TestAnyX); + TESTCASE(72,TestSourceTargetSet); default: name = ""; break; } @@ -3570,6 +3571,53 @@ void TransliteratorTest::TestAnyX(void) { delete anyLatin; } +/** + * Test the source and target set API. These are only implemented + * for RBT and CompoundTransliterator at this time. + */ +void TransliteratorTest::TestSourceTargetSet() { + UErrorCode ec = U_ZERO_ERROR; + + // Rules + const char* r = + "a > b; " + "r [x{lu}] > q;"; + + // Expected source + UnicodeSet expSrc("[arx{lu}]", ec); + + // Expected target + UnicodeSet expTrg("[bq]", ec); + + UParseError pe; + Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec); + + if (U_FAILURE(ec)) { + delete t; + errln("FAIL: Couldn't set up test"); + return; + } + + UnicodeSet src; t->getSourceSet(src); + UnicodeSet trg; t->getTargetSet(trg); + + if (src == expSrc && trg == expTrg) { + UnicodeString a, b; + logln((UnicodeString)"Ok: " + + r + " => source = " + src.toPattern(a, TRUE) + + ", target = " + trg.toPattern(b, TRUE)); + } else { + UnicodeString a, b, c, d; + errln((UnicodeString)"FAIL: " + + r + " => source = " + src.toPattern(a, TRUE) + + ", expected " + expSrc.toPattern(b, TRUE) + + "; target = " + trg.toPattern(c, TRUE) + + ", expected " + expTrg.toPattern(d, TRUE)); + } + + delete t; +} + //====================================================================== // Support methods //====================================================================== diff --git a/icu4c/source/test/intltest/transtst.h b/icu4c/source/test/intltest/transtst.h index da9dc2be06c..6a3cb7288f4 100644 --- a/icu4c/source/test/intltest/transtst.h +++ b/icu4c/source/test/intltest/transtst.h @@ -328,6 +328,8 @@ private: void TestAnyX(void); + void TestSourceTargetSet(void); + //====================================================================== // Support methods //======================================================================