diff --git a/icu4c/source/i18n/Makefile.in b/icu4c/source/i18n/Makefile.in index f80f4a4e3c4..de8ea6b150b 100644 --- a/icu4c/source/i18n/Makefile.in +++ b/icu4c/source/i18n/Makefile.in @@ -72,7 +72,7 @@ unifltlg.o unirange.o uniset.o unitohex.o unum.o \ dbbi.o dbbi_tbl.o rbbi.o rbbi_tbl.o brkdict.o nultrans.o jamohang.o hangjamo.o \ remtrans.o utrans.o \ titletrn.o tolowtrn.o toupptrn.o xformtrn.o name2uni.o uni2name.o nortrans.o \ -unifilt.o +unifilt.o quant.o strmatch.o STATIC_OBJECTS = $(OBJECTS:.o=.$(STATIC_O)) diff --git a/icu4c/source/i18n/i18n.dsp b/icu4c/source/i18n/i18n.dsp index 84e32d02b66..1b90be349a5 100644 --- a/icu4c/source/i18n/i18n.dsp +++ b/icu4c/source/i18n/i18n.dsp @@ -198,6 +198,10 @@ SOURCE=.\numfmt.cpp # End Source File # Begin Source File +SOURCE=.\quant.cpp +# End Source File +# Begin Source File + SOURCE=.\rbbi.cpp # End Source File # Begin Source File @@ -242,6 +246,10 @@ SOURCE=.\sortkey.cpp # End Source File # Begin Source File +SOURCE=.\strmatch.cpp +# End Source File +# Begin Source File + SOURCE=.\tblcoll.cpp # End Source File # Begin Source File @@ -1029,6 +1037,10 @@ InputPath=.\unicode\parsepos.h # End Source File # Begin Source File +SOURCE=.\quant.h +# End Source File +# Begin Source File + SOURCE=.\unicode\rbbi.h !IF "$(CFG)" == "i18n - Win32 Release" @@ -1188,6 +1200,10 @@ InputPath=.\unicode\sortkey.h # End Source File # Begin Source File +SOURCE=.\strmatch.h +# End Source File +# Begin Source File + SOURCE=.\unicode\tblcoll.h !IF "$(CFG)" == "i18n - Win32 Release" diff --git a/icu4c/source/i18n/quant.cpp b/icu4c/source/i18n/quant.cpp new file mode 100644 index 00000000000..0652d4bc0f1 --- /dev/null +++ b/icu4c/source/i18n/quant.cpp @@ -0,0 +1,80 @@ +/* +* Copyright (C) 2001, International Business Machines Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 07/26/01 aliu Creation. +********************************************************************** +*/ + +#include "quant.h" + +Quantifier::Quantifier(UnicodeMatcher *adopted, + uint32_t minCount, uint32_t maxCount) { + // assert(adopted != 0); + // assert(minCount <= maxCount); + matcher = adopted; + this->minCount = minCount; + this->maxCount = maxCount; +} + +Quantifier::Quantifier(const Quantifier& o) : + matcher(o.matcher->clone()), + minCount(o.minCount), + maxCount(o.maxCount) { + delete matcher; +} + +Quantifier::~Quantifier() { + delete matcher; +} + +/** + * Implement UnicodeMatcher + */ +UnicodeMatcher* Quantifier::clone() const { + return new Quantifier(*this); +} + +UMatchDegree Quantifier::matches(const Replaceable& text, + int32_t& offset, + int32_t limit, + UBool incremental) const { + int32_t start = offset; + uint32_t count = 0; + while (count < maxCount) { + UMatchDegree m = matcher->matches(text, offset, limit, incremental); + if (m == U_MATCH) { + ++count; + } else if (incremental && m == U_PARTIAL_MATCH) { + return U_PARTIAL_MATCH; + } else { + break; + } + } + if (incremental && offset == limit) { + return U_PARTIAL_MATCH; + } + if (count >= minCount) { + return U_MATCH; + } + offset = start; + return U_MISMATCH; +} + +/** + * Implement UnicodeMatcher + */ +UnicodeString& Quantifier::toPattern(UnicodeString& result, + UBool escapeUnprintable) const { + // TODO finish this + return result; +} + +/** + * Implement UnicodeMatcher + */ +UBool Quantifier::matchesIndexValue(uint8_t v) const { + return (minCount == 0) || matcher->matchesIndexValue(v); +} + +//eof diff --git a/icu4c/source/i18n/quant.h b/icu4c/source/i18n/quant.h new file mode 100644 index 00000000000..58ffdf2fef4 --- /dev/null +++ b/icu4c/source/i18n/quant.h @@ -0,0 +1,57 @@ +/* +* Copyright (C) 2001, International Business Machines Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 07/26/01 aliu Creation. +********************************************************************** +*/ +#ifndef QUANT_H +#define QUANT_H + +#include "unicode/unimatch.h" + +class Quantifier : public UnicodeMatcher { + + public: + + Quantifier(UnicodeMatcher *adopted, + uint32_t minCount, uint32_t maxCount); + + Quantifier(const Quantifier& o); + + virtual ~Quantifier(); + + /** + * Implement UnicodeMatcher + */ + virtual UnicodeMatcher* clone() const; + + /** + * Implement UnicodeMatcher + */ + virtual UMatchDegree matches(const Replaceable& text, + int32_t& offset, + int32_t limit, + UBool incremental) const; + + /** + * Implement UnicodeMatcher + */ + virtual UnicodeString& toPattern(UnicodeString& result, + UBool escapeUnprintable = FALSE) const; + + /** + * Implement UnicodeMatcher + */ + virtual UBool matchesIndexValue(uint8_t v) const; + + private: + + UnicodeMatcher* matcher; // owned + + uint32_t minCount; + + uint32_t maxCount; +}; + +#endif diff --git a/icu4c/source/i18n/rbt_data.cpp b/icu4c/source/i18n/rbt_data.cpp index 87323237a1c..0cef0ad2e1e 100644 --- a/icu4c/source/i18n/rbt_data.cpp +++ b/icu4c/source/i18n/rbt_data.cpp @@ -13,7 +13,7 @@ #include "unicode/uniset.h" TransliterationRuleData::TransliterationRuleData(UErrorCode& status) : - variableNames(0), setVariables(0) { + variableNames(0), variables(0) { if (U_FAILURE(status)) { return; } @@ -21,14 +21,14 @@ TransliterationRuleData::TransliterationRuleData(UErrorCode& status) : if (U_SUCCESS(status)) { variableNames->setValueDeleter(uhash_deleteUnicodeString); } - setVariables = 0; - setVariablesLength = 0; + variables = 0; + variablesLength = 0; } TransliterationRuleData::TransliterationRuleData(const TransliterationRuleData& other) : ruleSet(other.ruleSet), - setVariablesBase(other.setVariablesBase), - setVariablesLength(other.setVariablesLength), + variablesBase(other.variablesBase), + variablesLength(other.variablesLength), segmentBase(other.segmentBase) { UErrorCode status = U_ZERO_ERROR; @@ -44,29 +44,29 @@ TransliterationRuleData::TransliterationRuleData(const TransliterationRuleData& } } - setVariables = 0; - if (other.setVariables != 0) { - setVariables = new UnicodeSet*[setVariablesLength]; - for (int32_t i=0; iclone(); } } } TransliterationRuleData::~TransliterationRuleData() { delete variableNames; - if (setVariables != 0) { - for (int32_t i=0; i= 0 && i < setVariablesLength) ? setVariables[i] : 0; +const UnicodeMatcher* +TransliterationRuleData::lookup(UChar32 standIn) const { + int32_t i = standIn - variablesBase; + return (i >= 0 && i < variablesLength) ? variables[i] : 0; } int32_t diff --git a/icu4c/source/i18n/rbt_data.h b/icu4c/source/i18n/rbt_data.h index c81ac9ceda0..c60e718c460 100644 --- a/icu4c/source/i18n/rbt_data.h +++ b/icu4c/source/i18n/rbt_data.h @@ -11,7 +11,7 @@ #include "rbt_set.h" class UnicodeString; -class UnicodeSet; +class UnicodeMatcher; class Hashtable; /** @@ -46,35 +46,35 @@ public: * Map variable name (String) to variable (UnicodeString). A variable name * corresponds to zero or more characters, stored in a UnicodeString in * this hash. One or more of these chars may also correspond to a - * UnicodeSet, in which case the character in the UnicodeString in this hash is + * UnicodeMatcher, in which case the character in the UnicodeString in this hash is * a stand-in: it is an index for a secondary lookup in - * data.setVariables. The stand-in also represents the UnicodeSet in + * data.variables. The stand-in also represents the UnicodeMatcher in * the stored rules. */ Hashtable* variableNames; /** - * Map category variable (UChar) to set (UnicodeSet). + * Map category variable (UChar) to set (UnicodeMatcher). * Variables that correspond to a set of characters are mapped * from variable name to a stand-in character in data.variableNames. * The stand-in then serves as a key in this hash to lookup the - * actual UnicodeSet object. In addition, the stand-in is + * actual UnicodeMatcher object. In addition, the stand-in is * stored in the rule text to represent the set of characters. - * setVariables[i] represents character (setVariablesBase + i). + * variables[i] represents character (variablesBase + i). */ - UnicodeSet** setVariables; + UnicodeMatcher** variables; /** - * The character that represents setVariables[0]. Characters - * setVariablesBase through setVariablesBase + - * setVariables.length - 1 represent UnicodeSet objects. + * The character that represents variables[0]. Characters + * variablesBase through variablesBase + + * variablesLength - 1 represent UnicodeMatcher objects. */ - UChar setVariablesBase; + UChar variablesBase; /** - * The length of setVariables. + * The length of variables. */ - int32_t setVariablesLength; + int32_t variablesLength; /** * The character that represents segment 1. Characters segmentBase @@ -90,7 +90,11 @@ public: ~TransliterationRuleData(); - const UnicodeSet* lookupSet(UChar32 standIn) const; + /** + * Given a stand-in character, return the UnicodeMatcher that it + * represents, or NULL. + */ + const UnicodeMatcher* lookup(UChar32 standIn) const; /** * Return the zero-based index of the segment represented by the given diff --git a/icu4c/source/i18n/rbt_pars.cpp b/icu4c/source/i18n/rbt_pars.cpp index 61c3a926a23..f68a410b72d 100644 --- a/icu4c/source/i18n/rbt_pars.cpp +++ b/icu4c/source/i18n/rbt_pars.cpp @@ -7,19 +7,21 @@ * 11/17/99 aliu Creation. ********************************************************************** */ -#include "rbt_pars.h" -#include "unicode/rbt.h" -#include "rbt_rule.h" -#include "unirange.h" -#include "rbt_data.h" -#include "unicode/uniset.h" #include "cstring.h" -#include "unicode/parsepos.h" -#include "symtable.h" -#include "unicode/parseerr.h" #include "hash.h" -#include "unicode/unicode.h" +#include "quant.h" +#include "rbt_data.h" +#include "rbt_pars.h" +#include "rbt_rule.h" +#include "strmatch.h" +#include "symtable.h" +#include "unirange.h" +#include "unicode/parseerr.h" +#include "unicode/parsepos.h" #include "unicode/putil.h" +#include "unicode/rbt.h" +#include "unicode/unicode.h" +#include "unicode/uniset.h" // Operators #define VARIABLE_DEF_OP ((UChar)0x003D) /*=*/ @@ -43,6 +45,8 @@ #define CURSOR_POS ((UChar)0x007C) /*|*/ #define CURSOR_OFFSET ((UChar)0x0040) /*@*/ #define ANCHOR_START ((UChar)0x005E) /*^*/ +#define KLEENE_STAR ((UChar)0x002A) /***/ +#define ONE_OR_MORE ((UChar)0x002B) /*+*/ // By definition, the ANCHOR_END special character is a // trailing SymbolTable.SYMBOL_REF character. @@ -61,17 +65,17 @@ static const UChar ID_TOKEN[] = { 0x3A, 0x3A }; // ':', ':' /** * This class implements the SymbolTable interface. It is used * during parsing to give UnicodeSet access to variables that - * have been defined so far. Note that it uses setVariablesVector, + * have been defined so far. Note that it uses variablesVector, * _not_ data.setVariables. */ class ParseData : public SymbolTable { public: const TransliterationRuleData* data; // alias - const UVector* setVariablesVector; // alias + const UVector* variablesVector; // alias ParseData(const TransliterationRuleData* data = 0, - const UVector* setVariablesVector = 0); + const UVector* variablesVector = 0); virtual const UnicodeString* lookup(const UnicodeString& s) const; @@ -83,7 +87,7 @@ public: ParseData::ParseData(const TransliterationRuleData* d, const UVector* sets) : - data(d), setVariablesVector(sets) {} + data(d), variablesVector(sets) {} /** * Implement SymbolTable API. @@ -99,11 +103,11 @@ const UnicodeSet* ParseData::lookupSet(UChar32 ch) const { // Note that we cannot use data.lookupSet() because the // set array has not been constructed yet. const UnicodeSet* set = NULL; - int32_t i = ch - data->setVariablesBase; - if (i >= 0 && i < setVariablesVector->size()) { - int32_t i = ch - data->setVariablesBase; - set = (i < setVariablesVector->size()) ? - (UnicodeSet*) setVariablesVector->elementAt(i) : 0; + int32_t i = ch - data->variablesBase; + if (i >= 0 && i < variablesVector->size()) { + int32_t i = ch - data->variablesBase; + set = (i < variablesVector->size()) ? + (UnicodeSet*) variablesVector->elementAt(i) : 0; } return set; } @@ -276,7 +280,7 @@ int32_t RuleHalf::parse(const UnicodeString& rule, int32_t pos, int32_t limit) { if (escaped == (UChar32) -1) { return syntaxError(RuleBasedTransliterator::MALFORMED_UNICODE_ESCAPE, rule, start); } - buf.append((UChar) escaped); + buf.append(escaped); continue; } // Handle quoted matter @@ -431,6 +435,40 @@ int32_t RuleHalf::parse(const UnicodeString& rule, int32_t pos, int32_t limit) { } } break; + case KLEENE_STAR: + case ONE_OR_MORE: + // Very limited initial implementation. Note that this + // works strangely for quotes and variables -- + // 'foo'* => fo o* + // $a = foo; $a * => fo o* + // We will fix this later so that + // 'foo'* => (foo) * + // $a = foo; $a * => (foo) * + // Implement with hidden segments, perhaps at # 10+. + { + int32_t start, limit; + if (segments != 0 && + segments->size() >= 2 && + segments->size() % 2 == 0 && + _voidPtr_to_int32(segments->elementAt(segments->size()-1)) == buf.length()) { + // The * immediately follows a segment + int32_t len = segments->size(); + start = _voidPtr_to_int32(segments->elementAt(len - 2)); + limit = _voidPtr_to_int32(segments->elementAt(len - 1)); + segments->setElementAt(_int32_to_voidPtr(start+1), len-1); + } else { + // The * follows an isolated character + // (or quote, or variable reference) + start = buf.length() - 1; + limit = start + 1; + } + UnicodeMatcher *m = + new StringMatcher(buf, start, limit, *parser.data); + m = new Quantifier(m, (c == ONE_OR_MORE)?1:0, 0x7FFFFFFF); + buf.truncate(start); + buf.append(parser.generateStandInFor(m)); + } + break; // case SET_CLOSE: default: // Disallow unquoted characters other than [0-9A-Za-z] @@ -551,7 +589,7 @@ TransliteratorParser::TransliteratorParser( UTransDirection theDirection, UParseError* theParseError) : rules(theRules), direction(theDirection), data(0), parseError(theParseError) { - parseData = new ParseData(0, &setVariablesVector); + parseData = new ParseData(0, &variablesVector); } /** @@ -589,7 +627,7 @@ void TransliteratorParser::parseRules(UnicodeString& idBlockResult, } parseData->data = data; - setVariablesVector.removeAllElements(); + variablesVector.removeAllElements(); if (parseError != 0) { parseError->code = 0; } @@ -668,16 +706,16 @@ void TransliteratorParser::parseRules(UnicodeString& idBlockResult, } // Convert the set vector to an array - data->setVariablesLength = setVariablesVector.size(); - data->setVariables = data->setVariablesLength == 0 ? 0 : new UnicodeSet*[data->setVariablesLength]; + data->variablesLength = variablesVector.size(); + data->variables = data->variablesLength == 0 ? 0 : new UnicodeMatcher*[data->variablesLength]; // orphanElement removes the given element and shifts all other // elements down. For performance (and code clarity) we work from // the end back to index 0. int32_t i; - for (i=data->setVariablesLength; i>0; ) { + for (i=data->variablesLength; i>0; ) { --i; - data->setVariables[i] = - (UnicodeSet*) setVariablesVector.orphanElementAt(i); + data->variables[i] = + (UnicodeSet*) variablesVector.orphanElementAt(i); } // Index the rules @@ -894,14 +932,23 @@ int32_t TransliteratorParser::syntaxError(int32_t parseErrorCode, UChar TransliteratorParser::parseSet(const UnicodeString& rule, ParsePosition& pos) { UnicodeSet* set = new UnicodeSet(rule, pos, *parseData, status); + set->compact(); + return generateStandInFor(set); +} + +/** + * Generate and return a stand-in for a new UnicodeMatcher. Store + * the matcher (adopt it). + */ +UChar TransliteratorParser::generateStandInFor(UnicodeMatcher* adopted) { + // assert(adopted != 0); if (variableNext >= variableLimit) { // throw new RuntimeException("Private use variables exhausted"); - delete set; + delete adopted; status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } - set->compact(); - setVariablesVector.addElement(set); + variablesVector.addElement(adopted); return variableNext++; } @@ -949,12 +996,12 @@ void TransliteratorParser::determineVariableRange(void) { UnicodeRange* r = privateUse.largestUnusedSubrange(rules); - data->setVariablesBase = variableNext = variableLimit = (UChar) 0; + data->variablesBase = variableNext = variableLimit = (UChar) 0; if (r != 0) { // Allocate 9 characters for segment references 1 through 9 data->segmentBase = r->start; - data->setVariablesBase = variableNext = (UChar) (data->segmentBase + 9); + data->variablesBase = variableNext = (UChar) (data->segmentBase + 9); variableLimit = (UChar) (r->start + r->length); delete r; } diff --git a/icu4c/source/i18n/rbt_pars.h b/icu4c/source/i18n/rbt_pars.h index 7490ab89bef..10d412724c8 100644 --- a/icu4c/source/i18n/rbt_pars.h +++ b/icu4c/source/i18n/rbt_pars.h @@ -13,7 +13,7 @@ #include "unicode/parseerr.h" class TransliterationRuleData; -class UnicodeSet; +class UnicodeMatcher; class ParseData; class RuleHalf; class ParsePosition; @@ -48,11 +48,11 @@ class TransliteratorParser { ParseData* parseData; /** - * Temporary vector of set variables. When parsing is complete, this - * is copied into the array data.setVariables. As with data.setVariables, - * element 0 corresponds to character data.setVariablesBase. + * Temporary vector of matcher variables. When parsing is complete, this + * is copied into the array data.variables. As with data.variables, + * element 0 corresponds to character data.variablesBase. */ - UVector setVariablesVector; + UVector variablesVector; /** * The next available stand-in for variables. This starts at some point in @@ -169,6 +169,12 @@ private: UChar parseSet(const UnicodeString& rule, ParsePosition& pos); + /** + * Generate and return a stand-in for a new UnicodeMatcher. Store + * the matcher (adopt it). + */ + UChar generateStandInFor(UnicodeMatcher* adopted); + /** * Append the value of the given variable name to the given * UnicodeString. diff --git a/icu4c/source/i18n/rbt_rule.cpp b/icu4c/source/i18n/rbt_rule.cpp index 58d19fb7d7f..d451a63e992 100644 --- a/icu4c/source/i18n/rbt_rule.cpp +++ b/icu4c/source/i18n/rbt_rule.cpp @@ -161,16 +161,16 @@ void TransliterationRule::init(const UnicodeString& input, this->segments = adoptedSegs; // Find the position of the first segment index that is after the // anteContext (in the key). Note that this may be a start or a - // limit index. + // limit index. If all segments are in the ante context, + // firstKeySeg should point past the last segment -- that is, it + // should point at the end marker, which is -1. This allows the + // code to back up by one to obtain the last ante context segment. firstKeySeg = -1; if (segments != 0) { do { ++firstKeySeg; } while (segments[firstKeySeg] >= 0 && segments[firstKeySeg] < anteContextLength); - if (segments[firstKeySeg] < 0) { - firstKeySeg = -1; - } } pattern = input; @@ -221,7 +221,7 @@ int16_t TransliterationRule::getIndexValue() const { return -1; } UChar32 c = pattern.char32At(anteContextLength); - return (int16_t)(data.lookupSet(c) == NULL ? (c & 0xFF) : -1); + return (int16_t)(data.lookup(c) == NULL ? (c & 0xFF) : -1); } /** @@ -241,8 +241,9 @@ UBool TransliterationRule::matchesIndexValue(uint8_t v) const { return TRUE; } UChar32 c = pattern.char32At(anteContextLength); - const UnicodeSet* set = data.lookupSet(c); - return set == NULL ? (uint8_t(c) == v) : set->containsIndexValue(v); + const UnicodeMatcher* matcher = data.lookup(c); + return matcher == NULL ? (uint8_t(c) == v) : + matcher->matchesIndexValue(v); } /** @@ -367,17 +368,21 @@ UMatchDegree TransliterationRule::matchAndReplace(Replaceable& text, // A mismatch in the ante context, or with the start anchor, // is an outright U_MISMATCH regardless of whether we are // incremental or not. - int32_t cursor = pos.start - UTF_CHAR_LENGTH(text.char32At(pos.start-1)); + int32_t cursor = pos.start; int32_t newStart = 0; int32_t i; + + // Backup cursor by one + if (cursor > 0) { + cursor -= UTF_CHAR_LENGTH(text.char32At(cursor-1)); + } else { + --cursor; + } + for (i=anteContextLength-1; i>=0; --i) { - while (i == nextSegPos) { - segPos[iSeg] = cursor; - nextSegPos == (--iSeg >= 0) ? segments[iSeg] : -1; - } UChar keyChar = pattern.charAt(i); - const UnicodeSet* set = data.lookupSet(keyChar); - if (set == 0) { + const UnicodeMatcher* matcher = data.lookup(keyChar); + if (matcher == 0) { if (cursor >= pos.contextStart && keyChar == text.charAt(cursor)) { --cursor; @@ -386,7 +391,7 @@ UMatchDegree TransliterationRule::matchAndReplace(Replaceable& text, } } else { // Subtract 1 from contextStart to make it a reverse limit - if (set->matches(text, cursor, pos.contextStart-1, FALSE) + if (matcher->matches(text, cursor, pos.contextStart-1, FALSE) != U_MATCH) { return U_MISMATCH; } @@ -395,6 +400,15 @@ UMatchDegree TransliterationRule::matchAndReplace(Replaceable& text, // Record the position of the cursor newStart = cursor; } + while (nextSegPos == i) { + segPos[iSeg] = cursor; + if (cursor >= 0) { + segPos[iSeg] += UTF_CHAR_LENGTH(text.char32At(cursor)); + } else { + ++segPos[iSeg]; + } + nextSegPos = (--iSeg >= 0) ? segments[iSeg] : -1; + } } // ------------------------ Start Anchor ------------------------ @@ -405,8 +419,15 @@ UMatchDegree TransliterationRule::matchAndReplace(Replaceable& text, // -------------------- Key and Post Context -------------------- + // YUCKY OPTIMIZATION. To make things a miniscule amount faster, + // subtract anteContextLength from all segments[i] with i >= + // firstKeySeg. Then we don't have to do so here. I only mention + // this here in order to say DO NOT DO THIS. The gain is + // miniscule (how long does an integer subtraction take?) and the + // increase in confusion isn't worth it. + iSeg = firstKeySeg; - nextSegPos = (iSeg >= 0) ? segments[iSeg] : -1; + nextSegPos = (iSeg >= 0) ? (segments[iSeg] - anteContextLength) : -1; i = 0; cursor = pos.start; @@ -424,14 +445,14 @@ UMatchDegree TransliterationRule::matchAndReplace(Replaceable& text, } while (i == nextSegPos) { segPos[iSeg] = cursor; - nextSegPos = segments[++iSeg]; + nextSegPos = segments[++iSeg] - anteContextLength; } if (i == keyLength) { keyLimit = cursor; } UChar keyChar = pattern.charAt(anteContextLength + i++); - const UnicodeSet* set = data.lookupSet(keyChar); - if (set == 0) { + const UnicodeMatcher* matcher = data.lookup(keyChar); + if (matcher == 0) { // Don't need the cursor < pos.contextLimit check if // incremental is TRUE (because it's done above); do need // it otherwise. @@ -443,7 +464,7 @@ UMatchDegree TransliterationRule::matchAndReplace(Replaceable& text, } } else { UMatchDegree m = - set->matches(text, cursor, pos.contextLimit, incremental); + matcher->matches(text, cursor, pos.contextLimit, incremental); if (m != U_MATCH) { return m; } @@ -451,7 +472,7 @@ UMatchDegree TransliterationRule::matchAndReplace(Replaceable& text, } while (i == nextSegPos) { segPos[iSeg] = cursor; - nextSegPos = segments[++iSeg]; + nextSegPos = segments[++iSeg] - anteContextLength; } if (i == keyLength) { keyLimit = cursor; @@ -686,11 +707,11 @@ UnicodeString& TransliterationRule::toRule(UnicodeString& rule, } UChar c = pattern.charAt(i); - const UnicodeSet *set = data.lookupSet(c); - if (set == 0) { + const UnicodeMatcher *matcher = data.lookup(c); + if (matcher == 0) { _appendToRule(rule, c, FALSE, escapeUnprintable, quoteBuf); } else { - _appendToRule(rule, set->toPattern(str, escapeUnprintable), + _appendToRule(rule, matcher->toPattern(str, escapeUnprintable), TRUE, escapeUnprintable, quoteBuf); } } diff --git a/icu4c/source/i18n/strmatch.cpp b/icu4c/source/i18n/strmatch.cpp new file mode 100644 index 00000000000..27bca30469c --- /dev/null +++ b/icu4c/source/i18n/strmatch.cpp @@ -0,0 +1,128 @@ +/* +* Copyright (C) 2001, International Business Machines Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 07/23/01 aliu Creation. +********************************************************************** +*/ + +#include "strmatch.h" +#include "rbt_data.h" + +StringMatcher::StringMatcher(const UnicodeString& theString, + int32_t start, + int32_t limit, + const TransliterationRuleData& theData) : + data(theData) { + theString.extractBetween(start, limit, pattern); +} + +StringMatcher::StringMatcher(const UnicodeString& theString, + const TransliterationRuleData& theData) : + pattern(theString), + data(theData) { +} + +StringMatcher::StringMatcher(const StringMatcher& o) : + pattern(o.pattern), + data(o.data) { +} + +/** + * Destructor + */ +StringMatcher::~StringMatcher() { +} + +/** + * Implement UnicodeMatcher + */ +UnicodeMatcher* StringMatcher::clone() const { + return new StringMatcher(*this); +} + +/** + * Implement UnicodeMatcher + */ +UMatchDegree StringMatcher::matches(const Replaceable& text, + int32_t& offset, + int32_t limit, + UBool incremental) const { + int32_t i; + int32_t cursor = offset; + if (limit < cursor) { + for (i=pattern.length()-1; i>=0; --i) { + UChar keyChar = pattern.charAt(i); + const UnicodeMatcher* subm = data.lookup(keyChar); + if (subm == 0) { + if (cursor >= limit && + keyChar == text.charAt(cursor)) { + --cursor; + } else { + return U_MISMATCH; + } + } else { + UMatchDegree m = + subm->matches(text, cursor, limit, incremental); + if (m != U_MATCH) { + return m; + } + } + } + } else { + for (i=0; imatches(text, cursor, limit, incremental); + if (m != U_MATCH) { + return m; + } + } + } + } + + offset = cursor; + return U_MATCH; +} + +/** + * Implement UnicodeMatcher + */ +UnicodeString& StringMatcher::toPattern(UnicodeString& result, + UBool escapeUnprintable) const { + for (int32_t i=0; imatchesIndexValue(v); +} + +//eof diff --git a/icu4c/source/i18n/strmatch.h b/icu4c/source/i18n/strmatch.h new file mode 100644 index 00000000000..255f978a8db --- /dev/null +++ b/icu4c/source/i18n/strmatch.h @@ -0,0 +1,69 @@ +/* +* Copyright (C) 2001, International Business Machines Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 07/23/01 aliu Creation. +********************************************************************** +*/ +#ifndef STRMATCH_H +#define STRMATCH_H + +#include "unicode/unistr.h" +#include "unicode/unimatch.h" + +class TransliterationRuleData; + +/** + * An object that matches a string. + */ +class StringMatcher : public UnicodeMatcher { + + public: + + StringMatcher(const UnicodeString& string, + int32_t start, + int32_t limit, + const TransliterationRuleData& data); + + StringMatcher(const UnicodeString& string, + const TransliterationRuleData& data); + + StringMatcher(const StringMatcher& o); + + /** + * Destructor + */ + virtual ~StringMatcher(); + + /** + * Implement UnicodeMatcher + */ + virtual UnicodeMatcher* clone() const; + + /** + * Implement UnicodeMatcher + */ + virtual UMatchDegree matches(const Replaceable& text, + int32_t& offset, + int32_t limit, + UBool incremental) const; + + /** + * Implement UnicodeMatcher + */ + virtual UnicodeString& toPattern(UnicodeString& result, + UBool escapeUnprintable = FALSE) const; + + /** + * Implement UnicodeMatcher + */ + virtual UBool matchesIndexValue(uint8_t v) const; + + private: + + UnicodeString pattern; + + const TransliterationRuleData& data; +}; + +#endif diff --git a/icu4c/source/i18n/translit.cpp b/icu4c/source/i18n/translit.cpp index 03a2f91f9c5..ab2498c443b 100644 --- a/icu4c/source/i18n/translit.cpp +++ b/icu4c/source/i18n/translit.cpp @@ -149,7 +149,7 @@ Transliterator::Transliterator(const Transliterator& other) : maximumContextLength(other.maximumContextLength) { if (other.filter != 0) { // We own the filter, so we must have our own copy - filter = other.filter->clone(); + filter = (UnicodeFilter*) other.filter->clone(); } } @@ -160,7 +160,7 @@ Transliterator& Transliterator::operator=(const Transliterator& other) { ID = other.ID; maximumContextLength = other.maximumContextLength; // MUST go through adoptFilter in case latter is overridden - adoptFilter((other.filter == 0) ? 0 : other.filter->clone()); + adoptFilter((other.filter == 0) ? 0 : (UnicodeFilter*) other.filter->clone()); return *this; } @@ -361,6 +361,25 @@ void Transliterator::_transliterate(Replaceable& text, filteredTransliterate(text, index, TRUE); +#if 0 + // I CAN'T DO what I'm attempting below now that the Kleene star + // operator is supported. For example, in the rule + + // ([:Lu:]+) { x } > $1; + + // what is the maximum context length? getMaximumContextLength() + // will return 1, but this is just the length of the ante context + // part of the pattern string -- 1 character, which is a standin + // for a Quantifier, which contains a StringMatcher, which + // contains a UnicodeSet. + + // There is a complicated way to make this work again, and that's + // to add a "maximum left context" protocol into the + // UnicodeMatcher hierarchy. At present I'm not convinced this is + // worth it. + + // --- + // The purpose of the code below is to keep the context small // while doing incremental transliteration. When part of the left // context (between contextStart and start) is no longer needed, @@ -373,6 +392,7 @@ void Transliterator::_transliterate(Replaceable& text, newCS -= UTF_CHAR_LENGTH(text.char32At(newCS)) - 1; } index.contextStart = uprv_max(newCS, originalStart); +#endif } /** diff --git a/icu4c/source/i18n/unicode/unifilt.h b/icu4c/source/i18n/unicode/unifilt.h index e5568f13574..a18e1dd0e59 100644 --- a/icu4c/source/i18n/unicode/unifilt.h +++ b/icu4c/source/i18n/unicode/unifilt.h @@ -38,12 +38,15 @@ public: virtual UBool contains(UChar32 c) const = 0; /** - * Returns a copy of this object. All UnicodeFilter objects have - * to support cloning in order to allow classes using - * UnicodeFilters, such as Transliterator, to implement cloning. - * @draft + * UnicodeMatcher API. This class stubs this out. */ - virtual UnicodeFilter* clone() const = 0; + UnicodeString& toPattern(UnicodeString& result, + UBool escapeUnprintable) const; + + /** + * UnicodeMatcher API. This class stubs this out. + */ + UBool matchesIndexValue(uint8_t v) const; /** * Implement UnicodeMatcher API. diff --git a/icu4c/source/i18n/unicode/unimatch.h b/icu4c/source/i18n/unicode/unimatch.h index 5710333984d..4bb723d3742 100644 --- a/icu4c/source/i18n/unicode/unimatch.h +++ b/icu4c/source/i18n/unicode/unimatch.h @@ -11,6 +11,7 @@ #include "unicode/utypes.h" class Replaceable; +class UnicodeString; /** * Constants returned by UnicodeMatcher::matches() @@ -59,6 +60,13 @@ public: */ virtual ~UnicodeMatcher(); + /** + * Returns a copy of this object. All UnicodeMatcher objects have + * to support cloning in order to allow classes using + * UnicodeMatchers to implement cloning. + */ + virtual UnicodeMatcher* clone() const = 0; + /** * Return a UMatchDegree value indicating the degree of match for * the given text at the given offset. Zero, one, or more @@ -106,6 +114,28 @@ public: int32_t limit, UBool incremental) const = 0; + /** + * Returns a string representation of this matcher. If the result of + * calling this function is passed to the appropriate parser, it + * will produce another matcher that is equal to this one. + * @param result the string to receive the pattern. Previous + * contents will be deleted. + * @param escapeUnprintable if TRUE then convert unprintable + * character to their hex escape representations, \uxxxx or + * \Uxxxxxxxx. Unprintable characters are those other than + * U+000A, U+0020..U+007E. + */ + virtual UnicodeString& toPattern(UnicodeString& result, + UBool escapeUnprintable = FALSE) const = 0; + + /** + * Returns TRUE if this matcher will match a character c, where c + * & 0xFF == v, at offset, in the forward direction (with limit > + * offset). This is used by RuleBasedTransliterator for + * indexing. + */ + virtual UBool matchesIndexValue(uint8_t v) const = 0; + protected: UnicodeMatcher(); diff --git a/icu4c/source/i18n/unicode/uniset.h b/icu4c/source/i18n/unicode/uniset.h index 326fb451c0c..9b0ed7b242b 100644 --- a/icu4c/source/i18n/unicode/uniset.h +++ b/icu4c/source/i18n/unicode/uniset.h @@ -365,12 +365,12 @@ public: UBool operator!=(const UnicodeSet& o) const; /** - * Returns a copy of this object. All UnicodeFilter objects have + * Returns a copy of this object. All UnicodeMatcher objects have * to support cloning in order to allow classes using - * UnicodeFilters, such as Transliterator, to implement cloning. + * UnicodeMatchers, such as Transliterator, to implement cloning. * @draft */ - virtual UnicodeFilter* clone() const; + virtual UnicodeMatcher* clone() const; /** * Returns the hash code value for this set. @@ -691,7 +691,7 @@ private: * is the given value. This is used by RuleBasedTransliterator for * indexing. */ - UBool containsIndexValue(uint8_t v) const; + virtual UBool matchesIndexValue(uint8_t v) const; private: diff --git a/icu4c/source/i18n/unifilt.cpp b/icu4c/source/i18n/unifilt.cpp index c39805c1e7b..1231f54339c 100644 --- a/icu4c/source/i18n/unifilt.cpp +++ b/icu4c/source/i18n/unifilt.cpp @@ -40,3 +40,16 @@ UMatchDegree UnicodeFilter::matches(const Replaceable& text, } return U_MISMATCH; } + +// Stub this out for filters that do not implement a pattern +UnicodeString& UnicodeFilter::toPattern(UnicodeString& result, + UBool escapeUnprintable) const { + return result; +} + +// Stub this out for filters that do not implement indexing +UBool UnicodeFilter::matchesIndexValue(uint8_t v) const { + return FALSE; +} + +//eof diff --git a/icu4c/source/i18n/unifltlg.cpp b/icu4c/source/i18n/unifltlg.cpp index bfd689e65c4..d4f732d3db3 100644 --- a/icu4c/source/i18n/unifltlg.cpp +++ b/icu4c/source/i18n/unifltlg.cpp @@ -22,7 +22,7 @@ public: NullFilter(const NullFilter& f) : UnicodeFilter(f) { result = f.result; } virtual ~NullFilter() {} virtual UBool contains(UChar32 /*c*/) const { return result; } - virtual UnicodeFilter* clone() const { return new NullFilter(*this); } + virtual UnicodeMatcher* clone() const { return new NullFilter(*this); } }; class UnicodeNotFilter : public UnicodeFilter { @@ -32,15 +32,15 @@ public: UnicodeNotFilter(const UnicodeNotFilter&); virtual ~UnicodeNotFilter(); virtual UBool contains(UChar32 c) const; - virtual UnicodeFilter* clone() const; + virtual UnicodeMatcher* clone() const; }; UnicodeNotFilter::UnicodeNotFilter(UnicodeFilter* adopted) : filt(adopted) {} UnicodeNotFilter::UnicodeNotFilter(const UnicodeNotFilter& f) - : UnicodeFilter(f), filt(f.filt->clone()) {} + : UnicodeFilter(f), filt((UnicodeFilter*) f.filt->clone()) {} UnicodeNotFilter::~UnicodeNotFilter() { delete filt; } UBool UnicodeNotFilter::contains(UChar32 c) const { return !filt->contains(c); } -UnicodeFilter* UnicodeNotFilter::clone() const { return new UnicodeNotFilter(*this); } +UnicodeMatcher* UnicodeNotFilter::clone() const { return new UnicodeNotFilter(*this); } /** * Returns a UnicodeFilter that implements the inverse of @@ -50,7 +50,7 @@ UnicodeFilter* UnicodeFilterLogic::createNot(const UnicodeFilter* f) { if (f == 0) { return new NullFilter(FALSE); } else { - return new UnicodeNotFilter(f->clone()); + return new UnicodeNotFilter((UnicodeFilter*)f->clone()); } } @@ -62,15 +62,15 @@ public: UnicodeAndFilter(const UnicodeAndFilter&); virtual ~UnicodeAndFilter(); virtual UBool contains(UChar32 c) const; - virtual UnicodeFilter* clone() const; + virtual UnicodeMatcher* clone() const; }; UnicodeAndFilter::UnicodeAndFilter(UnicodeFilter* f1, UnicodeFilter* f2) : filt1(f1), filt2(f2) {} UnicodeAndFilter::UnicodeAndFilter(const UnicodeAndFilter& f) - : UnicodeFilter(f), filt1(f.filt1->clone()), filt2(f.filt2->clone()) {} + : UnicodeFilter(f), filt1((UnicodeFilter*)f.filt1->clone()), filt2((UnicodeFilter*)f.filt2->clone()) {} UnicodeAndFilter::~UnicodeAndFilter() { delete filt1; delete filt2; } UBool UnicodeAndFilter::contains(UChar32 c) const { return filt1->contains(c) && filt2->contains(c); } -UnicodeFilter* UnicodeAndFilter::clone() const { return new UnicodeAndFilter(*this); } +UnicodeMatcher* UnicodeAndFilter::clone() const { return new UnicodeAndFilter(*this); } /** * Returns a UnicodeFilter that implements a short @@ -84,12 +84,12 @@ UnicodeFilter* UnicodeFilterLogic::createAnd(const UnicodeFilter* f, if (g == 0) { return NULL; } - return g->clone(); + return (UnicodeFilter*)g->clone(); } if (g == 0) { - return f->clone(); + return (UnicodeFilter*)f->clone(); } - return new UnicodeAndFilter(f->clone(), g->clone()); + return new UnicodeAndFilter((UnicodeFilter*)f->clone(), (UnicodeFilter*)g->clone()); } class UnicodeOrFilter : public UnicodeFilter { @@ -100,15 +100,15 @@ public: UnicodeOrFilter(const UnicodeOrFilter&); virtual ~UnicodeOrFilter(); virtual UBool contains(UChar32 c) const; - virtual UnicodeFilter* clone() const; + virtual UnicodeMatcher* clone() const; }; UnicodeOrFilter::UnicodeOrFilter(UnicodeFilter* f1, UnicodeFilter* f2) : filt1(f1), filt2(f2) {} UnicodeOrFilter::UnicodeOrFilter(const UnicodeOrFilter& f) - : UnicodeFilter(f), filt1(f.filt1->clone()), filt2(f.filt2->clone()) {} + : UnicodeFilter(f), filt1((UnicodeFilter*)f.filt1->clone()), filt2((UnicodeFilter*)f.filt2->clone()) {} UnicodeOrFilter::~UnicodeOrFilter() { delete filt1; delete filt2; } UBool UnicodeOrFilter::contains(UChar32 c) const { return filt1->contains(c) || filt2->contains(c); } -UnicodeFilter* UnicodeOrFilter::clone() const { return new UnicodeOrFilter(*this); } +UnicodeMatcher* UnicodeOrFilter::clone() const { return new UnicodeOrFilter(*this); } /** * Returns a UnicodeFilter that implements a short @@ -122,10 +122,10 @@ UnicodeFilter* UnicodeFilterLogic::createOr(const UnicodeFilter* f, if (g == 0) { return NULL; } - return g->clone(); + return (UnicodeFilter*)g->clone(); } if (g == 0) { - return f->clone(); + return (UnicodeFilter*)f->clone(); } - return new UnicodeOrFilter(f->clone(), g->clone()); + return new UnicodeOrFilter((UnicodeFilter*)f->clone(), (UnicodeFilter*)g->clone()); } diff --git a/icu4c/source/i18n/uniset.cpp b/icu4c/source/i18n/uniset.cpp index d8aaa9971ae..d53a28f6479 100644 --- a/icu4c/source/i18n/uniset.cpp +++ b/icu4c/source/i18n/uniset.cpp @@ -228,11 +228,11 @@ UBool UnicodeSet::operator==(const UnicodeSet& o) const { } /** - * Returns a copy of this object. All UnicodeFilter objects have + * Returns a copy of this object. All UnicodeMatcher objects have * to support cloning in order to allow classes using - * UnicodeFilters, such as Transliterator, to implement cloning. + * UnicodeMatchers, such as Transliterator, to implement cloning. */ -UnicodeFilter* UnicodeSet::clone() const { +UnicodeMatcher* UnicodeSet::clone() const { return new UnicodeSet(*this); } @@ -547,7 +547,7 @@ UBool UnicodeSet::contains(UChar32 c) const { * is the given value. This is used by RuleBasedTransliterator for * indexing. */ -UBool UnicodeSet::containsIndexValue(uint8_t v) const { +UBool UnicodeSet::matchesIndexValue(uint8_t v) const { /* The index value v, in the range [0,255], is contained in this set if * it is contained in any pair of this set. Pairs either have the high * bytes equal, or unequal. If the high bytes are equal, then we have diff --git a/icu4c/source/test/intltest/hajatrts.cpp b/icu4c/source/test/intltest/hajatrts.cpp index 9a3aa17b1f7..cbe8e13dbfa 100644 --- a/icu4c/source/test/intltest/hajatrts.cpp +++ b/icu4c/source/test/intltest/hajatrts.cpp @@ -72,7 +72,7 @@ static void pseudoHandleTransliterate(const Transliterator* t, * Used by TestConstruction() and TestTransliterate. */ class TestHangulFilter : public UnicodeFilter { - virtual UnicodeFilter* clone() const { + virtual UnicodeMatcher* clone() const { return new TestHangulFilter(*this); } virtual UBool contains(UChar32 c) const { diff --git a/icu4c/source/test/intltest/hxuntrts.cpp b/icu4c/source/test/intltest/hxuntrts.cpp index 400d44ed2f3..3f925dc8b53 100644 --- a/icu4c/source/test/intltest/hxuntrts.cpp +++ b/icu4c/source/test/intltest/hxuntrts.cpp @@ -56,7 +56,7 @@ void HexToUniTransliteratorTest::runIndexedTest( int32_t index, UBool exec, cons * Used by TestConstruction() and TestTransliterate. */ class TestHexFilter : public UnicodeFilter { - virtual UnicodeFilter* clone() const { + virtual UnicodeMatcher* clone() const { return new TestHexFilter(*this); } virtual UBool contains(UChar32 c) const { diff --git a/icu4c/source/test/intltest/jahatrts.cpp b/icu4c/source/test/intltest/jahatrts.cpp index e622d5b5006..20915a45de8 100644 --- a/icu4c/source/test/intltest/jahatrts.cpp +++ b/icu4c/source/test/intltest/jahatrts.cpp @@ -70,7 +70,7 @@ static void pseudoHandleTransliterate(const Transliterator* t, * Used by TestConstruction() and TestTransliterate. */ class TestJamoFilter : public UnicodeFilter { - virtual UnicodeFilter* clone() const { + virtual UnicodeMatcher* clone() const { return new TestJamoFilter(*this); } virtual UBool contains(UChar32 c) const { diff --git a/icu4c/source/test/intltest/transapi.cpp b/icu4c/source/test/intltest/transapi.cpp index c6fe5047c45..fcec3bb91a3 100644 --- a/icu4c/source/test/intltest/transapi.cpp +++ b/icu4c/source/test/intltest/transapi.cpp @@ -615,7 +615,7 @@ void TransliteratorAPITest::TestRegisterUnregister(){ * Used by TestFiltering(). */ class TestFilter1 : public UnicodeFilter { - virtual UnicodeFilter* clone() const { + virtual UnicodeMatcher* clone() const { return new TestFilter1(*this); } virtual UBool contains(UChar32 c) const { @@ -626,7 +626,7 @@ class TestFilter1 : public UnicodeFilter { } }; class TestFilter2 : public UnicodeFilter { - virtual UnicodeFilter* clone() const { + virtual UnicodeMatcher* clone() const { return new TestFilter2(*this); } virtual UBool contains(UChar32 c) const { @@ -637,7 +637,7 @@ class TestFilter2 : public UnicodeFilter { } }; class TestFilter3 : public UnicodeFilter { - virtual UnicodeFilter* clone() const { + virtual UnicodeMatcher* clone() const { return new TestFilter3(*this); } virtual UBool contains(UChar32 c) const { diff --git a/icu4c/source/test/intltest/transtst.cpp b/icu4c/source/test/intltest/transtst.cpp index 17c4251e0b8..8cc67680b71 100644 --- a/icu4c/source/test/intltest/transtst.cpp +++ b/icu4c/source/test/intltest/transtst.cpp @@ -68,6 +68,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec, TESTCASE(32,TestToRules); TESTCASE(33,TestContext); TESTCASE(34,TestSupplemental); + TESTCASE(35,TestQuantifier); default: name = ""; break; } } @@ -477,7 +478,7 @@ void TransliteratorTest::TestCompoundHex(void) { * Used by TestFiltering(). */ class TestFilter : public UnicodeFilter { - virtual UnicodeFilter* clone() const { + virtual UnicodeMatcher* clone() const { return new TestFilter(*this); } virtual UBool contains(UChar32 c) const { @@ -1501,6 +1502,36 @@ void TransliteratorTest::TestSupplemental() { CharsToUnicodeString("ky\\U00010400y\\U00010400m")); } +void TransliteratorTest::TestQuantifier() { + + expect("(ab)+ {x} > '(' $1 ')';", + "x abx ababxy", + "x ab(ab) abab(abab)y"); + + expect("b+ > x;", + "ac abc abbc abbbc", + "ac axc axc axc"); + + expect("[abc]+ > x;", + "qac abrc abbcs abtbbc", + "qx xrx xs xtx"); + + expect("q{(ab)+} > x;", + "qa qab qaba qababc qaba", + "qa qx qxa qxc qxa"); + + expect("q(ab)* > x;", + "qa qab qaba qababc", + "xa x xa xc"); + + // Oddity -- "(foo)* > $1" causes $1 to match the run of "foo"s + // In perl, it only matches the first occurrence, so the output + // is "()a (ab) (ab)a (ab)c". + expect("q(ab)* > '(' $1 ')';", + "qa qab qaba qababc", + "()a (ab) (ab)a (abab)c"); +} + //====================================================================== // Support methods //====================================================================== diff --git a/icu4c/source/test/intltest/transtst.h b/icu4c/source/test/intltest/transtst.h index 8f0572e72fd..9b3202f5431 100644 --- a/icu4c/source/test/intltest/transtst.h +++ b/icu4c/source/test/intltest/transtst.h @@ -172,6 +172,8 @@ class TransliteratorTest : public IntlTest { void TestSupplemental(void); + void TestQuantifier(void); + //====================================================================== // Support methods //====================================================================== diff --git a/icu4c/source/test/intltest/ufltlgts.cpp b/icu4c/source/test/intltest/ufltlgts.cpp index 94c1b834f34..aa1fca810e5 100644 --- a/icu4c/source/test/intltest/ufltlgts.cpp +++ b/icu4c/source/test/intltest/ufltlgts.cpp @@ -36,7 +36,7 @@ void UnicodeFilterLogicTest::runIndexedTest( int32_t index, UBool exec, const ch } class Filter1: public UnicodeFilter{ - virtual UnicodeFilter* clone() const{ + virtual UnicodeMatcher* clone() const{ return new Filter1(*this); } virtual UBool contains(UChar32 c) const { @@ -47,7 +47,7 @@ class Filter1: public UnicodeFilter{ } }; class Filter2: public UnicodeFilter{ - virtual UnicodeFilter* clone() const{ + virtual UnicodeMatcher* clone() const{ return new Filter2(*this); } virtual UBool contains(UChar32 c) const { diff --git a/icu4c/source/test/intltest/unhxtrts.cpp b/icu4c/source/test/intltest/unhxtrts.cpp index 539d3e0185b..1be38ac7245 100644 --- a/icu4c/source/test/intltest/unhxtrts.cpp +++ b/icu4c/source/test/intltest/unhxtrts.cpp @@ -68,7 +68,7 @@ static void pseudoHandleTransliterate(const Transliterator* t, * Used by TestConstruction() and TestTransliterate. */ class TestUniFilter : public UnicodeFilter { - virtual UnicodeFilter* clone() const { + virtual UnicodeMatcher* clone() const { return new TestUniFilter(*this); } virtual UBool contains(UChar32 c) const {