From 72601a817c9a46a4a4f6d63c1fee0b3f14c1375a Mon Sep 17 00:00:00 2001 From: Syn Wee Quek Date: Tue, 3 Dec 2002 23:59:06 +0000 Subject: [PATCH] ICU-2135 modified checks for unsafe characters X-SVN-Rev: 10460 --- icu4c/source/i18n/usearch.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/icu4c/source/i18n/usearch.cpp b/icu4c/source/i18n/usearch.cpp index b42455762d7..4cbc6d3e83c 100644 --- a/icu4c/source/i18n/usearch.cpp +++ b/icu4c/source/i18n/usearch.cpp @@ -1003,6 +1003,13 @@ UBool checkNextExactContractionMatch(UStringSearch *strsrch, const UChar *text = strsrch->search->text; // This part checks if either ends of the match contains potential // contraction. If so we'll have to iterate through them + // The start contraction needs to be checked since ucol_previous dumps + // all characters till the first safe character into the buffer. + // *start + 1 is used to test for the unsafe characters instead of *start + // because ucol_prev takes all unsafe characters till the first safe + // character ie *start. so by testing *start + 1, we can estimate if + // excess prefix characters has been included in the potential search + // results. if ((*end < textlength && ucol_unsafeCP(text[*end], collator)) || (*start + 1 < textlength && ucol_unsafeCP(text[*start + 1], collator))) { @@ -1832,13 +1839,15 @@ UBool checkPreviousExactContractionMatch(UStringSearch *strsrch, { UCollationElements *coleiter = strsrch->textIter; int32_t textlength = strsrch->search->textLength; - int32_t temp = *end; + int32_t temp = *end; const UCollator *collator = strsrch->collator; const UChar *text = strsrch->search->text; - // This part checks if either ends of the match contains potential + // This part checks if either if the start of the match contains potential // contraction. If so we'll have to iterate through them - if ((*end < textlength && ucol_unsafeCP(text[*end], collator)) || - (*start < textlength && ucol_unsafeCP(text[*start + 1], collator))) { + // Since we used ucol_next while previously looking for the potential + // match, this guarantees that our end will not be a partial contraction, + // or a partial supplementary character. + if (*start < textlength && ucol_unsafeCP(text[*start], collator)) { int32_t expansion = getExpansionSuffix(coleiter); UBool expandflag = expansion > 0; setColEIterOffset(coleiter, *end);