From 1ba4040cd8c92b6aeb2936ba3198f16cebe506c2 Mon Sep 17 00:00:00 2001 From: Syn Wee Quek Date: Wed, 24 Jul 2002 19:57:46 +0000 Subject: [PATCH] ICU-2018 breakiterator bug terminated X-SVN-Rev: 9318 --- icu4c/source/i18n/usearch.cpp | 12 +++++++++++- icu4c/source/test/cintltst/usrchdat.c | 3 +++ icu4c/source/test/cintltst/usrchtst.c | 5 ++++- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/icu4c/source/i18n/usearch.cpp b/icu4c/source/i18n/usearch.cpp index 964662cb711..77f1f52bbf3 100644 --- a/icu4c/source/i18n/usearch.cpp +++ b/icu4c/source/i18n/usearch.cpp @@ -428,11 +428,21 @@ inline UBool isBreakUnit(const UStringSearch *strsrch, int32_t start, for (int32_t count = 0; count < strsrch->pattern.CELength; count ++) { uint32_t ce = getCE(strsrch, ucol_next(coleiter, &status)); + if (ce == UCOL_IGNORABLE) { + count --; + continue; + } if (U_FAILURE(status) || ce != strsrch->pattern.CE[count]) { return FALSE; } } - if (ucol_next(coleiter, &status) != (int32_t)UCOL_NULLORDER) { + uint32_t nextce = ucol_next(coleiter, &status); + while (ucol_getOffset(coleiter) == (end - start) + && getCE(strsrch, nextce) == UCOL_IGNORABLE) { + nextce = ucol_next(coleiter, &status); + } + if (ucol_getOffset(coleiter) == (end - start) + && nextce != UCOL_NULLORDER) { // extra collation elements at the end of the match return FALSE; } diff --git a/icu4c/source/test/cintltst/usrchdat.c b/icu4c/source/test/cintltst/usrchdat.c index 1f315f0c19a..0935df057cb 100644 --- a/icu4c/source/test/cintltst/usrchdat.c +++ b/icu4c/source/test/cintltst/usrchdat.c @@ -47,6 +47,7 @@ static const SearchData BASIC[] = { {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}}, {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, + {"É", "e", NULL, UCOL_PRIMARY, NULL, {0, -1}, {1}}, {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} }; @@ -65,6 +66,7 @@ static const SearchData BREAKITERATOREXACT[] = { "characterbreaker", {1, 17, 30, -1}, {1, 1, 1}}, {"testing that string ab\\u00e9cd does not match e", "e", NULL, UCOL_TERTIARY, "characterbreaker", {1, 28, 41, -1}, {1, 1, 1}}, + {"É", "e", "fr", UCOL_PRIMARY, "characterbreaker", {0, -1}, {1}}, {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} }; @@ -296,6 +298,7 @@ static const SearchData BREAKITERATORCANONICAL[] = { "characterbreaker", {1, 17, 30, -1}, {1, 1, 1}}, {"testing that string ab\\u00e9cd does not match e", "e", NULL, UCOL_TERTIARY, "characterbreaker", {1, 28, 41, -1}, {1, 1, 1}}, + {"É", "e", "fr", UCOL_PRIMARY, "characterbreaker", {0, -1}, {1}}, {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} }; diff --git a/icu4c/source/test/cintltst/usrchtst.c b/icu4c/source/test/cintltst/usrchtst.c index ff75a67cf71..721b3288322 100644 --- a/icu4c/source/test/cintltst/usrchtst.c +++ b/icu4c/source/test/cintltst/usrchtst.c @@ -1004,12 +1004,14 @@ static void TestGetSetOffset(void) SearchData search = BASIC[index ++]; int32_t matchindex = search.offset[count]; int32_t textlength; - + u_unescape(search.text, text, 128); u_unescape(search.pattern, pattern, 32); status = U_ZERO_ERROR; usearch_setText(strsrch, text, -1, &status); usearch_setPattern(strsrch, pattern, -1, &status); + ucol_setStrength(usearch_getCollator(strsrch), search.strength); + usearch_reset(strsrch); while (U_SUCCESS(status) && matchindex >= 0) { uint32_t matchlength = search.size[count]; usearch_next(strsrch, &status); @@ -1065,6 +1067,7 @@ static void TestGetSetOffset(void) return; } } + ucol_setStrength(usearch_getCollator(strsrch), UCOL_TERTIARY); usearch_close(strsrch); close(); }