diff --git a/icu4c/source/i18n/usearch.cpp b/icu4c/source/i18n/usearch.cpp index 945daf28619..3952d7d0ef7 100644 --- a/icu4c/source/i18n/usearch.cpp +++ b/icu4c/source/i18n/usearch.cpp @@ -443,7 +443,7 @@ void checkBreakBoundary(const UStringSearch *strsrch, int32_t *start, int32_t *end) { #if !UCONFIG_NO_BREAK_ITERATION - UBreakIterator *breakiterator = strsrch->search->breakIter; + UBreakIterator *breakiterator = strsrch->search->_breakIter_; if (breakiterator) { int32_t matchend = *end; int32_t matchstart = *start; @@ -475,7 +475,7 @@ UBool isBreakUnit(const UStringSearch *strsrch, int32_t start, #if !UCONFIG_NO_BREAK_ITERATION UBreakIterator *breakiterator = strsrch->search->breakIter; //TODO: Add here. - if (breakiterator && strsrch->search->breakIterGiven) { + if (breakiterator) { int32_t startindex = ubrk_first(breakiterator); int32_t endindex = ubrk_last(breakiterator); @@ -1161,7 +1161,7 @@ inline UBool checkNextExactMatch(UStringSearch *strsrch, } //Add breakiterator boundary check for primary strength search. - if (!strsrch->search->breakIterGiven && strsrch->strength == UCOL_PRIMARY) { + if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) { checkBreakBoundary(strsrch, &start, textoffset); } @@ -2009,7 +2009,7 @@ inline UBool checkPreviousExactMatch(UStringSearch *strsrch, } //Add breakiterator boundary check for primary strength search. - if (!strsrch->search->breakIterGiven && strsrch->strength == UCOL_PRIMARY) { + if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) { checkBreakBoundary(strsrch, textoffset, &end); } @@ -2600,17 +2600,13 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator( result->pattern.textLength = patternlength; result->pattern.CE = NULL; - // If a breakiterator is given, use that one, otherwise create a character break iterator. - result->search->breakIterGiven = breakiter ? TRUE : FALSE; + result->search->breakIter = breakiter; #if !UCONFIG_NO_BREAK_ITERATION - if (!breakiter && result->strength == UCOL_PRIMARY) { - breakiter = ubrk_open(UBRK_CHARACTER, ucol_getLocale(result->collator, ULOC_VALID_LOCALE, status), NULL, 0, status); - } + result->search->_breakIter_ = ubrk_open(UBRK_CHARACTER, ucol_getLocale(result->collator, ULOC_VALID_LOCALE, status), text, textlength, status); if (breakiter) { - ubrk_setText(breakiter, text, textlength, status); + ubrk_setText(breakiter, text, textlength, status); } #endif - result->search->breakIter = breakiter; result->ownCollator = FALSE; result->search->matchedLength = 0; @@ -2791,7 +2787,7 @@ U_CAPI void U_EXPORT2 usearch_setBreakIterator(UStringSearch *strsrch, UErrorCode *status) { if (U_SUCCESS(*status) && strsrch) { - strsrch->search->breakIter = breakiter; + strsrch->search->breakIter = breakiter; if (breakiter) { ubrk_setText(breakiter, strsrch->search->text, strsrch->search->textLength, status); @@ -2835,6 +2831,7 @@ U_CAPI void U_EXPORT2 usearch_setText( UStringSearch *strsrch, ubrk_setText(strsrch->search->breakIter, text, textlength, status); } + ubrk_setText(strsrch->search->_breakIter_, text, textlength, status); #endif } } @@ -2867,6 +2864,11 @@ U_CAPI void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch, strsrch->collator = collator; strsrch->strength = ucol_getStrength(collator); strsrch->ceMask = getMask(strsrch->strength); +#if !UCONFIG_NO_BREAK_ITERATION + ubrk_close(strsrch->search->_breakIter_); + strsrch->search->_breakIter_ = ubrk_open(UBRK_CHARACTER, ucol_getLocale(collator, ULOC_VALID_LOCALE, status), + strsrch->search->text, strsrch->search->textLength, status); +#endif // if status is a failure, ucol_getAttribute returns UCOL_DEFAULT strsrch->toShift = ucol_getAttribute(collator, UCOL_ALTERNATE_HANDLING, status) == @@ -3469,7 +3471,7 @@ UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status) if (firstce == UCOL_NULLORDER || firstce == UCOL_IGNORABLE) { firstce = targetce; } - if (targetce == UCOL_IGNORABLE) { + if (targetce == UCOL_IGNORABLE && strsrch->strength != UCOL_PRIMARY) { continue; } if (targetce == patternce[0]) { diff --git a/icu4c/source/i18n/usrchimp.h b/icu4c/source/i18n/usrchimp.h index b7a1bfa08ea..4df21427807 100644 --- a/icu4c/source/i18n/usrchimp.h +++ b/icu4c/source/i18n/usrchimp.h @@ -26,7 +26,7 @@ struct USearch { int32_t textLength; // exact length UBool isOverlap; UBool isCanonicalMatch; - UBool breakIterGiven; + UBreakIterator *_breakIter_; //internal character breakiterator UBreakIterator *breakIter; // value USEARCH_DONE is the default value // if we are not at the start of the text or the end of the text, diff --git a/icu4c/source/test/cintltst/usrchdat.c b/icu4c/source/test/cintltst/usrchdat.c index f65b35b808b..af0fdf680b6 100644 --- a/icu4c/source/test/cintltst/usrchdat.c +++ b/icu4c/source/test/cintltst/usrchdat.c @@ -487,6 +487,8 @@ static const SearchData CONTRACTIONCANONICAL[] = { static const SearchData DIACTRICMATCH[] = { {"\\u0061\\u0061\\u00E1", "\\u0061\\u00E1", NULL, UCOL_SECONDARY, NULL, {1, -1}, {2}}, + {"\\u0020\\u00C2\\u0303\\u0020\\u0041\\u0061\\u1EAA\\u0041\\u0302\\u0303\\u00C2\\u0303\\u1EAB\\u0061\\u0302\\u0303\\u00E2\\u0303\\uD806\\uDC01\\u0300\\u0020", + "\\u00C2\\u0303", "LDE_AN_CX_EX_FX_HX_NX_S1", UCOL_PRIMARY, NULL, {1, 4, 5, 6, 7, 10, 12, 13, 16,-1}, {2, 1, 1, 1, 3, 2, 1, 3, 2}}, {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} }; diff --git a/icu4c/source/test/cintltst/usrchtst.c b/icu4c/source/test/cintltst/usrchtst.c index b9fce7f2a9a..9b2c9cf380a 100644 --- a/icu4c/source/test/cintltst/usrchtst.c +++ b/icu4c/source/test/cintltst/usrchtst.c @@ -1542,21 +1542,32 @@ static void TestDiactricMatch(void) UChar pattern[128]; UChar text[128]; UErrorCode status = U_ZERO_ERROR; - UStringSearch *strsrch; - uint32_t count = 0; + UStringSearch *strsrch = NULL; + UCollator *coll = NULL; + uint32_t count = 1; + UBool collatorCreated = FALSE; memset(pattern, 0, 128*sizeof(UChar)); memset(text, 0, 128*sizeof(UChar)); - - strsrch = usearch_open(pattern, 1, text, 1, uloc_getDefault(), NULL, - &status); - if (U_FAILURE(status)) { - log_err("Error opening string search %s\n", u_errorName(status)); - } - - ucol_setStrength(usearch_getCollator(strsrch), DIACTRICMATCH[count].strength); + while (DIACTRICMATCH[count].text != NULL) { + if (DIACTRICMATCH[count].collator != NULL) { + coll = ucol_openFromShortString(DIACTRICMATCH[count].collator, FALSE, NULL, &status); + collatorCreated = TRUE; + } else { + coll = getCollator(DIACTRICMATCH[count].collator); + collatorCreated = FALSE; + } + if (U_FAILURE(status)) { + log_err("Error opening string search collator %s\n", u_errorName(status)); + return; + } + strsrch = usearch_openFromCollator(pattern, 1, text, 1, coll, DIACTRICMATCH[count].breaker, &status); + if (U_FAILURE(status)) { + log_err("Error opening string search %s\n", u_errorName(status)); + return; + } u_unescape(DIACTRICMATCH[count].text, text, 128); u_unescape(DIACTRICMATCH[count].pattern, pattern, 128); usearch_setText(strsrch, text, -1, &status); @@ -1565,8 +1576,11 @@ static void TestDiactricMatch(void) log_err("Error at test number %d\n", count); } count ++; + if (collatorCreated) { + ucol_close(coll); + } + usearch_close(strsrch); } - usearch_close(strsrch); } static void TestCanonical(void) diff --git a/icu4c/source/test/intltest/srchtest.cpp b/icu4c/source/test/intltest/srchtest.cpp index cdec4db7363..cde9da4bdd3 100644 --- a/icu4c/source/test/intltest/srchtest.cpp +++ b/icu4c/source/test/intltest/srchtest.cpp @@ -1551,31 +1551,36 @@ void StringSearchTest::TestDiactricMatch() UChar temp[128]; UErrorCode status = U_ZERO_ERROR; int count = 0; + RuleBasedCollator* coll = NULL; + StringSearch *strsrch = NULL; UnicodeString pattern("pattern"); UnicodeString text("text"); - StringSearch *strsrch = new StringSearch(pattern, text, Locale::getDefault(), NULL, - status); - if (U_FAILURE(status)) { - errln("Error opening string search %s", u_errorName(status)); - return; - } - strsrch->getCollator()->setStrength(getECollationStrength(DIACTRICMATCH[count].strength)); + const SearchData *search; - while (DIACTRICMATCH[count].text != NULL) { - u_unescape(DIACTRICMATCH[count].text, temp, 128); + search = &(DIACTRICMATCH[count]); + while (search->text != NULL) { + coll = getCollator(search->collator); + coll->setStrength(getECollationStrength(search->strength)); + strsrch = new StringSearch(pattern, text, coll, getBreakIterator(search->breaker), status); + if (U_FAILURE(status)) { + errln("Error opening string search %s", u_errorName(status)); + return; + } + u_unescape(search->text, temp, 128); text.setTo(temp, u_strlen(temp)); - u_unescape(DIACTRICMATCH[count].pattern, temp, 128); + u_unescape(search->pattern, temp, 128); pattern.setTo(temp, u_strlen(temp)); strsrch->setText(text, status); strsrch->setPattern(pattern, status); - if (!assertEqualWithStringSearch(strsrch, &DIACTRICMATCH[count])) { + if (!assertEqualWithStringSearch(strsrch, search)) { errln("Error at test number %d", count); } - count ++; + search = &(DIACTRICMATCH[++count]); + delete strsrch; } - delete strsrch; + } void StringSearchTest::TestCanonical()