From 59893d78c9b292c6e9a92defc57c66007b2ac712 Mon Sep 17 00:00:00 2001 From: Syn Wee Quek Date: Tue, 10 Dec 2002 06:02:50 +0000 Subject: [PATCH] ICU-2294 subclassability test X-SVN-Rev: 10576 --- icu4c/source/i18n/search.cpp | 12 +- icu4c/source/i18n/stsearch.cpp | 14 +- icu4c/source/i18n/usearch.cpp | 73 +++---- icu4c/source/test/intltest/apicoll.cpp | 226 ++++++++++++++++++++ icu4c/source/test/intltest/apicoll.h | 5 + icu4c/source/test/intltest/srchtest.cpp | 265 ++++++++++++------------ icu4c/source/test/intltest/srchtest.h | 4 +- 7 files changed, 421 insertions(+), 178 deletions(-) diff --git a/icu4c/source/i18n/search.cpp b/icu4c/source/i18n/search.cpp index 18a282d0d4f..557eb50491f 100644 --- a/icu4c/source/i18n/search.cpp +++ b/icu4c/source/i18n/search.cpp @@ -233,9 +233,15 @@ int32_t SearchIterator::next(UErrorCode &status) } } - if (matchindex != USEARCH_DONE) { - return handleNext(matchindex + matchlength, status); - } + if (matchlength > 0) { + // if matchlength is 0 we are at the start of the iteration + if (m_search_->isOverlap) { + offset ++; + } + else { + offset += matchlength; + } + } return handleNext(offset, status); } return USEARCH_DONE; diff --git a/icu4c/source/i18n/stsearch.cpp b/icu4c/source/i18n/stsearch.cpp index d7af74b5c65..bd79a56cee7 100644 --- a/icu4c/source/i18n/stsearch.cpp +++ b/icu4c/source/i18n/stsearch.cpp @@ -349,12 +349,12 @@ int32_t StringSearch::handleNext(int32_t position, UErrorCode &status) // looking at usearch.cpp, this part is shifted out to // StringSearch instead of SearchIterator because m_strsrch_ is // not accessible in SearchIterator - if (!m_search_->isOverlap && - position + m_strsrch_->pattern.defaultShiftSize > - m_search_->textLength) { + if (position + m_strsrch_->pattern.defaultShiftSize + > m_search_->textLength) { setMatchNotFound(); return USEARCH_DONE; } + ucol_setOffset(m_strsrch_->textIter, position, &status); while (TRUE) { if (m_search_->isCanonicalMatch) { // can't use exact here since extra accents are allowed. @@ -375,6 +375,14 @@ int32_t StringSearch::handleNext(int32_t position, UErrorCode &status) m_search_->matchedLength)) #endif ) { + if (m_search_->matchedIndex == USEARCH_DONE) { + ucol_setOffset(m_strsrch_->textIter, + m_search_->textLength, &status); + } + else { + ucol_setOffset(m_strsrch_->textIter, + m_search_->matchedIndex, &status); + } return m_search_->matchedIndex; } } diff --git a/icu4c/source/i18n/usearch.cpp b/icu4c/source/i18n/usearch.cpp index 02f21d334da..366c566a5ee 100644 --- a/icu4c/source/i18n/usearch.cpp +++ b/icu4c/source/i18n/usearch.cpp @@ -554,28 +554,18 @@ inline int32_t shiftForward(UStringSearch *strsrch, int32_t patternceindex) { UPattern *pattern = &(strsrch->pattern); - if (strsrch->search->isOverlap) { - if (textoffset > 0) { - textoffset ++; - } - else { - textoffset = pattern->defaultShiftSize; + if (ce != UCOL_NULLORDER) { + int32_t shift = pattern->shift[hash(ce)]; + // this is to adjust for characters in the middle of the + // substring for matching that failed. + int32_t adjust = pattern->CELength - patternceindex; + if (adjust > 1 && shift >= adjust) { + shift -= adjust - 1; } + textoffset += shift; } else { - if (ce != UCOL_NULLORDER) { - int32_t shift = pattern->shift[hash(ce)]; - // this is to adjust for characters in the middle of the - // substring for matching that failed. - int32_t adjust = pattern->CELength - patternceindex; - if (adjust > 1 && shift >= adjust) { - shift -= adjust - 1; - } - textoffset += shift; - } - else { - textoffset += pattern->defaultShiftSize; - } + textoffset += pattern->defaultShiftSize; } textoffset = getNextUStringSearchBaseOffset(strsrch, textoffset); @@ -2977,13 +2967,26 @@ U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch, search->matchedIndex = USEARCH_DONE; } } - else if (search->isCanonicalMatch) { - // can't use exact here since extra accents are allowed. - usearch_handleNextCanonical(strsrch, status); - } else { - usearch_handleNextExact(strsrch, status); - } + if (search->matchedLength > 0) { + // if matchlength is 0 we are at the start of the iteration + int offset = ucol_getOffset(strsrch->textIter); + if (search->isOverlap) { + ucol_setOffset(strsrch->textIter, offset + 1, status); + } + else { + ucol_setOffset(strsrch->textIter, + offset + search->matchedLength, status); + } + } + if (search->isCanonicalMatch) { + // can't use exact here since extra accents are allowed. + usearch_handleNextCanonical(strsrch, status); + } + else { + usearch_handleNextExact(strsrch, status); + } + } if (U_FAILURE(*status)) { return USEARCH_DONE; @@ -3138,14 +3141,6 @@ UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status) int32_t patterncelength = strsrch->pattern.CELength; int32_t textoffset = ucol_getOffset(coleiter); - // shifting it check for setting offset - // if setOffset is called previously or there was no previous match, we - // leave the offset as it is. - if (strsrch->search->matchedIndex != USEARCH_DONE) { - textoffset = strsrch->search->matchedIndex + - strsrch->search->matchedLength; - } - // status used in setting coleiter offset, since offset is checked in // shiftForward before setting the coleiter offset, status never // a failure @@ -3219,7 +3214,7 @@ UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status) if (checkNextExactMatch(strsrch, &textoffset, status)) { // status checked in ucol_setOffset - setColEIterOffset(coleiter, textoffset); + setColEIterOffset(coleiter, strsrch->search->matchedIndex); return TRUE; } } @@ -3241,14 +3236,6 @@ UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status) int32_t textoffset = ucol_getOffset(coleiter); UBool hasPatternAccents = strsrch->pattern.hasSuffixAccents || strsrch->pattern.hasPrefixAccents; - - // shifting it check for setting offset - // if setOffset is called previously or there was no previous match, we - // leave the offset as it is. - if (strsrch->search->matchedIndex != USEARCH_DONE) { - textoffset = strsrch->search->matchedIndex + - strsrch->search->matchedLength; - } textoffset = shiftForward(strsrch, textoffset, UCOL_NULLORDER, patterncelength); @@ -3326,7 +3313,7 @@ UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status) } if (checkNextCanonicalMatch(strsrch, &textoffset, status)) { - setColEIterOffset(coleiter, textoffset); + setColEIterOffset(coleiter, strsrch->search->matchedIndex); return TRUE; } } diff --git a/icu4c/source/test/intltest/apicoll.cpp b/icu4c/source/test/intltest/apicoll.cpp index 2edb1807369..660a171733d 100644 --- a/icu4c/source/test/intltest/apicoll.cpp +++ b/icu4c/source/test/intltest/apicoll.cpp @@ -1743,6 +1743,231 @@ void CollationAPITest::TestUClassID() delete coll; } +class TestCollator: Collator +{ +public: + virtual Collator* clone(void) const; + virtual EComparisonResult compare(const UnicodeString& source, + const UnicodeString& target) const; + virtual EComparisonResult compare(const UnicodeString& source, + const UnicodeString& target, + int32_t length) const; + virtual EComparisonResult compare(const UChar* source, + int32_t sourceLength, + const UChar* target, + int32_t targetLength) const; + virtual CollationKey& getCollationKey(const UnicodeString& source, + CollationKey& key, + UErrorCode& status) const; + virtual CollationKey& getCollationKey(const UChar*source, + int32_t sourceLength, + CollationKey& key, + UErrorCode& status) const; + virtual int32_t hashCode(void) const; + virtual const Locale getLocale(ULocDataLocaleType type, + UErrorCode& status) const; + virtual ECollationStrength getStrength(void) const; + virtual void setStrength(ECollationStrength newStrength); + virtual UClassID getDynamicClassID(void) const; + virtual void getVersion(UVersionInfo info) const; + virtual void setAttribute(UColAttribute attr, UColAttributeValue value, + UErrorCode &status); + virtual UColAttributeValue getAttribute(UColAttribute attr, + UErrorCode &status); + virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, + UErrorCode &status); + virtual uint32_t setVariableTop(const UnicodeString varTop, + UErrorCode &status); + virtual void setVariableTop(const uint32_t varTop, UErrorCode &status); + virtual uint32_t getVariableTop(UErrorCode &status) const; + virtual Collator* safeClone(void); + virtual int32_t getSortKey(const UnicodeString& source, + uint8_t* result, + int32_t resultLength) const; + virtual int32_t getSortKey(const UChar*source, int32_t sourceLength, + uint8_t*result, int32_t resultLength) const; + virtual UnicodeSet *getTailoredSet(UErrorCode &status) const; +}; + +#define returnEComparisonResult(data) \ + if (data < 0) return EComparisonResult::LESS;\ + if (data > 0) return EComparisonResult::GREATER;\ + return EComparisonResult::EQUAL; + +Collator* TestCollator::clone() const +{ + return new TestCollator(); +} + +Collator::EComparisonResult TestCollator::compare(const UnicodeString& source, + const UnicodeString& target) const +{ + returnEComparisonResult(source.compare(target)); +} + +Collator::EComparisonResult TestCollator::compare(const UnicodeString& source, + const UnicodeString& target, + int32_t length) const +{ + returnEComparisonResult(source.compare(0, length, target)); +} + +Collator::EComparisonResult TestCollator::compare(const UChar* source, + int32_t sourceLength, + const UChar* target, + int32_t targetLength) const +{ + UnicodeString s(source, sourceLength); + UnicodeString t(target, targetLength); + return compare(s, t); +} + +CollationKey& TestCollator::getCollationKey(const UnicodeString& source, + CollationKey& key, + UErrorCode& status) const +{ + char temp[100]; + int length = 100; + length = source.extract(temp, length, NULL, status); + temp[length] = 0; + CollationKey tempkey((uint8_t*)temp, length); + key = tempkey; + return key; +} + +CollationKey& TestCollator::getCollationKey(const UChar*source, + int32_t sourceLength, + CollationKey& key, + UErrorCode& status) const +{ + //s tack allocation used since collationkey does not keep the unicodestring + UnicodeString str(source, sourceLength); + return getCollationKey(str, key, status); +} + +int32_t TestCollator::getSortKey(const UnicodeString& source, uint8_t* result, + int32_t resultLength) const +{ + UErrorCode status = U_ZERO_ERROR; + int32_t length = source.extract((char *)result, resultLength, NULL, + status); + result[length] = 0; + return length; +} + +int32_t TestCollator::getSortKey(const UChar*source, int32_t sourceLength, + uint8_t*result, int32_t resultLength) const +{ + UnicodeString str(source, sourceLength); + return getSortKey(str, result, resultLength); +} + +int32_t TestCollator::hashCode() const +{ + return 0; +} + +const Locale TestCollator::getLocale(ULocDataLocaleType type, + UErrorCode& status) const +{ + return NULL; +} + +Collator::ECollationStrength TestCollator::getStrength() const +{ + return TERTIARY; +} + +void TestCollator::setStrength(Collator::ECollationStrength newStrength) +{ +} + +UClassID TestCollator::getDynamicClassID(void) const +{ + return 0; +} + +void TestCollator::getVersion(UVersionInfo info) const +{ +} + +void TestCollator::setAttribute(UColAttribute attr, UColAttributeValue value, + UErrorCode &status) +{ +} + +UColAttributeValue TestCollator::getAttribute(UColAttribute attr, + UErrorCode &status) +{ + return UCOL_DEFAULT; +} + +uint32_t TestCollator::setVariableTop(const UChar *varTop, int32_t len, + UErrorCode &status) +{ + return 0; +} + +uint32_t TestCollator::setVariableTop(const UnicodeString varTop, + UErrorCode &status) +{ + return 0; +} + +void TestCollator::setVariableTop(const uint32_t varTop, UErrorCode &status) +{ +} + +uint32_t TestCollator::getVariableTop(UErrorCode &status) const +{ + return 0; +} + +Collator* TestCollator::safeClone(void) +{ + return new TestCollator(); +} + +UnicodeSet * TestCollator::getTailoredSet(UErrorCode &status) const +{ + return Collator::getTailoredSet(status); +} + +void CollationAPITest::TestSubclass() +{ + TestCollator col1; + TestCollator col2; + if (&col1 == &col2) { + errln("2 different instance of TestCollator should fail"); + } + if (col1.hashCode() != col2.hashCode()) { + errln("Every TestCollator has the same hashcode"); + } + UnicodeString abc("abc", 3); + UnicodeString bcd("bcd", 3); + if (col1.compare(abc, bcd) != abc.compare(bcd)) { + errln("TestCollator compare should be the same as the default " + "string comparison"); + } + CollationKey key; + UErrorCode status = U_ZERO_ERROR; + col1.getCollationKey(abc, key, status); + int32_t length = 0; + char *bytearray = (char *)key.toByteArray(length); + UnicodeString keyarray(bytearray, length, NULL, status); + if (abc != keyarray) { + errln("TestCollator collationkey API is returning wrong values"); + } + + UnicodeSet expectedset(0, 0x10FFFF); + UnicodeSet *defaultset = col1.getTailoredSet(status); + if (!defaultset->containsAll(expectedset) + || !expectedset.containsAll(*defaultset)) { + errln("Error: expected default tailoring to be 0 to 0x10ffff"); + } + delete defaultset; +} + void CollationAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */) { if (exec) logln("TestSuite CollationAPITest: "); @@ -1768,6 +1993,7 @@ void CollationAPITest::runIndexedTest( int32_t index, UBool exec, const char* &n case 18: name = "TestBounds"; if (exec) TestBounds(); break; case 19: name = "TestGetTailoredSet"; if (exec) TestGetTailoredSet(); break; case 20: name = "TestUClassID"; if (exec) TestUClassID(); break; + case 21: name = "TestSubclass"; if (exec) TestSubclass(); break; default: name = ""; break; } } diff --git a/icu4c/source/test/intltest/apicoll.h b/icu4c/source/test/intltest/apicoll.h index 79cb3ca0d38..95710e5eaad 100644 --- a/icu4c/source/test/intltest/apicoll.h +++ b/icu4c/source/test/intltest/apicoll.h @@ -149,6 +149,11 @@ public: */ void TestGetTailoredSet(); + /** + * Tests the subclassability + */ + void TestSubclass(); + /** * Tests the dynamic and static ids of collation classes */ diff --git a/icu4c/source/test/intltest/srchtest.cpp b/icu4c/source/test/intltest/srchtest.cpp index 1b6196c069e..23037cfb4a8 100644 --- a/icu4c/source/test/intltest/srchtest.cpp +++ b/icu4c/source/test/intltest/srchtest.cpp @@ -134,8 +134,8 @@ void StringSearchTest::runIndexedTest(int32_t index, UBool exec, CASE(30, TestGetSetOffsetCanonical) CASE(31, TestSupplementaryCanonical) CASE(32, TestContractionCanonical) - CASE(33, TestSearchIterator) - CASE(34, TestUClassID) + CASE(33, TestUClassID) + CASE(34, TestSubclass) default: name = ""; break; } } @@ -2041,131 +2041,6 @@ void StringSearchTest::TestContractionCanonical() delete collator; } -class TempSearch : public SearchIterator -{ -public: - TempSearch(); - TempSearch(TempSearch &search); - ~TempSearch(); - void setOffset(int32_t position, UErrorCode &status); - int32_t getOffset() const; - SearchIterator* safeClone() const; - - /** - * ICU "poor man's RTTI", returns a UClassID for the actual class. - * - * @draft ICU 2.2 - */ - virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); } - - /** - * ICU "poor man's RTTI", returns a UClassID for this class. - * - * @draft ICU 2.2 - */ - static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; } - -protected: - int32_t handleNext(int32_t position, UErrorCode &status); - int32_t handlePrev(int32_t position, UErrorCode &status); - -private: - - /** - * The address of this static class variable serves as this class's ID - * for ICU "poor man's RTTI". - */ - static const char fgClassID; -}; - -const char TempSearch::fgClassID=0; - -TempSearch::TempSearch() : SearchIterator() -{ -} - -TempSearch::TempSearch(TempSearch &search) : SearchIterator(search) -{ -} - -TempSearch::~TempSearch() -{ -} - -void TempSearch::setOffset(int32_t /*position*/, UErrorCode &/*status*/) -{ -} - -int32_t TempSearch::getOffset() const -{ - return USEARCH_DONE; -} - -SearchIterator * TempSearch::safeClone() const -{ - return NULL; -} - -int32_t TempSearch::handleNext(int32_t /*position*/, UErrorCode &/*status*/) -{ - return USEARCH_DONE; -} - -int32_t TempSearch::handlePrev(int32_t /*position*/, UErrorCode &/*status*/) -{ - return USEARCH_DONE; -} - -void StringSearchTest::TestSearchIterator() -{ - TempSearch search; - if (search.getBreakIterator() != NULL || - search.getAttribute(USEARCH_OVERLAP) != USEARCH_OFF || - search.getAttribute(USEARCH_CANONICAL_MATCH) != USEARCH_OFF || - search.getMatchedStart() != USEARCH_DONE || - search.getMatchedLength() != 0 || search.getText().length() != 0) { - errln("Error subclassing SearchIterator, default constructor failed"); - return; - } - if (search.getAttribute(USEARCH_ATTRIBUTE_COUNT) != USEARCH_DEFAULT) { - errln("Error getting illegal attribute failed"); - return; - } - UnicodeString text("abc"); - StringCharacterIterator striter(text); - UErrorCode status = U_ZERO_ERROR; - search.setText(text, status); - TempSearch search2; - search2.setText(striter, status); - if (U_FAILURE(status) || search != search2) { - errln("Error setting text"); - return; - } - if (search != search) { - errln("Error: search object has to be equals to itself"); - return; - } - TempSearch search3(search); - if (search != search3) { - errln("Error: search object has to be equals to its copy"); - return; - } - search.setAttribute(USEARCH_OVERLAP, USEARCH_ON, status); - if (U_FAILURE(status) || - search.getAttribute(USEARCH_OVERLAP) != USEARCH_ON) { - errln("Error setting overlap attribute"); - } - search.reset(); - if (search.getAttribute(USEARCH_OVERLAP) != USEARCH_OFF) { - errln("Error resetting search"); - } - search2 = search3; - if (search2 != search3) { - errln("Error: search object has to be equals to its assignment copy"); - return; - } -} - void StringSearchTest::TestUClassID() { char id = *((char *)StringSearch::getStaticClassID()); @@ -2184,4 +2059,140 @@ void StringSearchTest::TestUClassID() delete strsrch; } +class TestSearch : public SearchIterator +{ +public: + TestSearch(const UnicodeString &text, + BreakIterator *breakiter, + const UnicodeString &pattern); + ~TestSearch(); + void setOffset(int32_t position, UErrorCode &status); + int32_t getOffset() const; + SearchIterator* safeClone() const; + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @draft ICU 2.2 + */ + virtual inline UClassID getDynamicClassID() const { return getStaticClassID(); } + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @draft ICU 2.2 + */ + static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; } + + UnicodeString m_pattern_; + +protected: + int32_t handleNext(int32_t position, UErrorCode &status); + int32_t handlePrev(int32_t position, UErrorCode &status); + +private: + + /** + * The address of this static class variable serves as this class's ID + * for ICU "poor man's RTTI". + */ + static const char fgClassID; + uint32_t m_offset_; +}; + +const char TestSearch::fgClassID=0; + +TestSearch::TestSearch(const UnicodeString &text, + BreakIterator *breakiter, + const UnicodeString &pattern) : SearchIterator(text, breakiter) +{ + m_offset_ = 0; + m_pattern_ = pattern; +} + +TestSearch::~TestSearch() +{ +} + +void TestSearch::setOffset(int32_t position, UErrorCode &status) +{ + if (position >= 0 && position <= m_text_.length()) { + m_offset_ = position; + } + else { + status = U_INDEX_OUTOFBOUNDS_ERROR; + } +} + +int32_t TestSearch::getOffset() const +{ + return m_offset_; +} + +SearchIterator * TestSearch::safeClone() const +{ + return new TestSearch(m_text_, m_breakiterator_, m_pattern_); +} + +int32_t TestSearch::handleNext(int32_t start, UErrorCode &status) +{ + int match = m_text_.indexOf(m_pattern_, start); + if (match < 0) { + m_offset_ = m_text_.length(); + setMatchStart(m_offset_); + setMatchLength(0); + return USEARCH_DONE; + } + setMatchStart(match); + m_offset_ = match; + setMatchLength(m_pattern_.length()); + return match; +} + +int32_t TestSearch::handlePrev(int32_t start, UErrorCode &status) +{ + int match = m_text_.lastIndexOf(m_pattern_, 0, start); + if (match < 0) { + m_offset_ = 0; + setMatchStart(m_offset_); + setMatchLength(0); + return USEARCH_DONE; + } + setMatchStart(match); + m_offset_ = match; + setMatchLength(m_pattern_.length()); + return match; +} + +void StringSearchTest::TestSubclass() +{ + UnicodeString text("abc abcd abc"); + UnicodeString pattern("abc"); + TestSearch search(text, NULL, pattern); + int expected[] = {0, 4, 9}; + UErrorCode status = U_ZERO_ERROR; + for (int i = 0; i < sizeof(expected) / sizeof(int); i ++) { + if (search.next(status) != expected[i]) { + errln("Error getting next match"); + } + if (search.getMatchedLength() != search.m_pattern_.length()) { + errln("Error getting next match length"); + } + } + if (search.next(status) != USEARCH_DONE) { + errln("Error should have reached the end of the iteration"); + } + for (int i = sizeof(expected) / sizeof(int) - 1; i >= 0; i --) { + if (search.previous(status) != expected[i]) { + errln("Error getting previous match"); + } + if (search.getMatchedLength() != search.m_pattern_.length()) { + errln("Error getting previous match length"); + } + } + if (search.previous(status) != USEARCH_DONE) { + errln("Error should have reached the start of the iteration"); + } +} + #endif /* #if !UCONFIG_NO_COLLATION */ diff --git a/icu4c/source/test/intltest/srchtest.h b/icu4c/source/test/intltest/srchtest.h index c65487f47f2..24331ab8eb9 100644 --- a/icu4c/source/test/intltest/srchtest.h +++ b/icu4c/source/test/intltest/srchtest.h @@ -85,8 +85,8 @@ private: void TestGetSetOffsetCanonical(); void TestSupplementaryCanonical(); void TestContractionCanonical(); - void TestSearchIterator(); - void TestUClassID(); + void TestUClassID(); + void TestSubclass(); }; #endif /* #if !UCONFIG_NO_COLLATION */