mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-21 12:40:02 +00:00
ICU-8495 Merging the fix for ticket#8415 (r29681, r29786) from trunk to maint-4-4
X-SVN-Rev: 30118
This commit is contained in:
parent
748c590392
commit
10553035a1
6 changed files with 454 additions and 194 deletions
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2001-2008 IBM and others. All rights reserved.
|
||||
* Copyright (C) 2001-2011 IBM and others. All rights reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 03/22/2000 helena Creation.
|
||||
|
@ -306,12 +306,12 @@ public:
|
|||
int32_t first(UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Returns the first index greater than <tt>position</tt> at which the
|
||||
* Returns the first index equal or greater than <tt>position</tt> at which the
|
||||
* string text matches the search pattern. The iterator is adjusted so
|
||||
* that its current index (as returned by <tt>getOffset</tt>) is the
|
||||
* match position if one was found. If a match is not found,
|
||||
* <tt>USEARCH_DONE</tt> will be returned and the iterator will be
|
||||
* adjusted to the index USEARCH_DONE
|
||||
* match position if one was found.
|
||||
* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and the
|
||||
* iterator will be adjusted to the index <tt>USEARCH_DONE</tt>.
|
||||
* @param position where search if to start from. If position is less
|
||||
* than or greater than the text range for searching,
|
||||
* an U_INDEX_OUTOFBOUNDS_ERROR will be returned
|
||||
|
@ -346,6 +346,12 @@ public:
|
|||
* position if one was found. If a match is not found,
|
||||
* <tt>USEARCH_DONE</tt> will be returned and the iterator will be
|
||||
* adjusted to the index USEARCH_DONE
|
||||
* <p>
|
||||
* When <tt>USEARCH_OVERLAP</tt> option is off, the last index of the
|
||||
* result match is always less than <tt>position</tt>.
|
||||
* When <tt>USERARCH_OVERLAP</tt> is on, the result match may span across
|
||||
* <tt>position</tt>.
|
||||
*
|
||||
* @param position where search is to start from. If position is less
|
||||
* than or greater than the text range for searching,
|
||||
* an U_INDEX_OUTOFBOUNDS_ERROR will be returned
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2001-2010 IBM and others. All rights reserved.
|
||||
* Copyright (C) 2001-2011 IBM and others. All rights reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 06/28/2001 synwee Creation.
|
||||
|
@ -586,8 +586,8 @@ U_STABLE int32_t U_EXPORT2 usearch_first(UStringSearch *strsrch,
|
|||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Returns the first index greater than <tt>position</tt> at which the string
|
||||
* text
|
||||
* Returns the first index equal or greater than <tt>position</tt> at which
|
||||
* the string text
|
||||
* matches the search pattern. The iterator is adjusted so that its current
|
||||
* index (as returned by <tt>usearch_getOffset</tt>) is the match position if
|
||||
* one was found.
|
||||
|
@ -638,7 +638,12 @@ U_STABLE int32_t U_EXPORT2 usearch_last(UStringSearch *strsrch,
|
|||
* <p>
|
||||
* Search positions that may render incorrect results are highlighted in the
|
||||
* header comments. If position is less than or greater than the text range
|
||||
* for searching, an U_INDEX_OUTOFBOUNDS_ERROR will be returned
|
||||
* for searching, an U_INDEX_OUTOFBOUNDS_ERROR will be returned.
|
||||
* <p>
|
||||
* When <tt>USEARCH_OVERLAP</tt> option is off, the last index of the
|
||||
* result match is always less than <tt>position</tt>.
|
||||
* When <tt>USERARCH_OVERLAP</tt> is on, the result match may span across
|
||||
* <tt>position</tt>.
|
||||
* @param strsrch search iterator data struct
|
||||
* @param position index position the search is to begin at
|
||||
* @param status for errors if it occurs
|
||||
|
|
|
@ -59,9 +59,9 @@ inline void setColEIterOffset(UCollationElements *elems,
|
|||
}
|
||||
ci->fcdPosition = NULL;
|
||||
|
||||
ci->offsetReturn = NULL;
|
||||
ci->offsetReturn = NULL;
|
||||
ci->offsetStore = ci->offsetBuffer;
|
||||
ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
|
||||
ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -453,15 +453,15 @@ inline int16_t initializePattern(UStringSearch *strsrch, UErrorCode *status)
|
|||
|
||||
// Since the strength is primary, accents are ignored in the pattern.
|
||||
if (strsrch->strength == UCOL_PRIMARY) {
|
||||
pattern->hasPrefixAccents = 0;
|
||||
pattern->hasSuffixAccents = 0;
|
||||
pattern->hasPrefixAccents = 0;
|
||||
pattern->hasSuffixAccents = 0;
|
||||
} else {
|
||||
pattern->hasPrefixAccents = getFCD(patterntext, &index, length) >>
|
||||
SECOND_LAST_BYTE_SHIFT_;
|
||||
index = length;
|
||||
UTF_BACK_1(patterntext, 0, index);
|
||||
pattern->hasSuffixAccents = getFCD(patterntext, &index, length) &
|
||||
LAST_BYTE_MASK_;
|
||||
pattern->hasPrefixAccents = getFCD(patterntext, &index, length) >>
|
||||
SECOND_LAST_BYTE_SHIFT_;
|
||||
index = length;
|
||||
UTF_BACK_1(patterntext, 0, index);
|
||||
pattern->hasSuffixAccents = getFCD(patterntext, &index, length) &
|
||||
LAST_BYTE_MASK_;
|
||||
}
|
||||
|
||||
// ** HACK **
|
||||
|
@ -586,18 +586,18 @@ void checkBreakBoundary(const UStringSearch *strsrch, int32_t * /*start*/,
|
|||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
UBreakIterator *breakiterator = strsrch->search->internalBreakIter;
|
||||
if (breakiterator) {
|
||||
int32_t matchend = *end;
|
||||
//int32_t matchstart = *start;
|
||||
int32_t matchend = *end;
|
||||
//int32_t matchstart = *start;
|
||||
|
||||
if (!ubrk_isBoundary(breakiterator, matchend)) {
|
||||
*end = ubrk_following(breakiterator, matchend);
|
||||
if (!ubrk_isBoundary(breakiterator, matchend)) {
|
||||
*end = ubrk_following(breakiterator, matchend);
|
||||
}
|
||||
|
||||
/* Check the start of the matched text to make sure it doesn't have any accents
|
||||
* before it. This code may not be necessary and so it is commented out */
|
||||
/*if (!ubrk_isBoundary(breakiterator, matchstart) && !ubrk_isBoundary(breakiterator, matchstart-1)) {
|
||||
*start = ubrk_preceding(breakiterator, matchstart);
|
||||
}*/
|
||||
/* Check the start of the matched text to make sure it doesn't have any accents
|
||||
* before it. This code may not be necessary and so it is commented out */
|
||||
/*if (!ubrk_isBoundary(breakiterator, matchstart) && !ubrk_isBoundary(breakiterator, matchstart-1)) {
|
||||
*start = ubrk_preceding(breakiterator, matchstart);
|
||||
}*/
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -1020,7 +1020,7 @@ UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start,
|
|||
int32_t firstce = strsrch->pattern.CE[0];
|
||||
UCollationElements *coleiter = strsrch->textIter;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t ce;
|
||||
int32_t ce;
|
||||
setColEIterOffset(coleiter, start);
|
||||
while ((ce = getCE(strsrch, ucol_next(coleiter, &status))) != firstce) {
|
||||
if (U_FAILURE(status) || ce == UCOL_NULLORDER) {
|
||||
|
@ -1040,12 +1040,12 @@ UBool hasAccentsAfterMatch(const UStringSearch *strsrch, int32_t start,
|
|||
count ++;
|
||||
}
|
||||
|
||||
ce = ucol_next(coleiter, &status);
|
||||
ce = ucol_next(coleiter, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
return TRUE;
|
||||
}
|
||||
if (ce != UCOL_NULLORDER && ce != UCOL_IGNORABLE) {
|
||||
ce = getCE(strsrch, ce);
|
||||
ce = getCE(strsrch, ce);
|
||||
}
|
||||
if (ce != UCOL_NULLORDER && ce != UCOL_IGNORABLE) {
|
||||
if (ucol_getOffset(coleiter) <= end) {
|
||||
|
@ -1280,7 +1280,7 @@ inline UBool checkNextExactMatch(UStringSearch *strsrch,
|
|||
|
||||
//Add breakiterator boundary check for primary strength search.
|
||||
if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) {
|
||||
checkBreakBoundary(strsrch, &start, textoffset);
|
||||
checkBreakBoundary(strsrch, &start, textoffset);
|
||||
}
|
||||
|
||||
// totally match, we will get rid of the ending ignorables.
|
||||
|
@ -2128,7 +2128,7 @@ inline UBool checkPreviousExactMatch(UStringSearch *strsrch,
|
|||
|
||||
//Add breakiterator boundary check for primary strength search.
|
||||
if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) {
|
||||
checkBreakBoundary(strsrch, textoffset, &end);
|
||||
checkBreakBoundary(strsrch, textoffset, &end);
|
||||
}
|
||||
|
||||
strsrch->search->matchedIndex = *textoffset;
|
||||
|
@ -2726,7 +2726,7 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator(
|
|||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
result->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(result->collator, ULOC_VALID_LOCALE, status), text, textlength, status);
|
||||
if (breakiter) {
|
||||
ubrk_setText(breakiter, text, textlength, status);
|
||||
ubrk_setText(breakiter, text, textlength, status);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -2781,7 +2781,7 @@ U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch)
|
|||
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
if (strsrch->search->internalBreakIter) {
|
||||
ubrk_close(strsrch->search->internalBreakIter);
|
||||
ubrk_close(strsrch->search->internalBreakIter);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -2939,7 +2939,7 @@ U_CAPI void U_EXPORT2 usearch_setBreakIterator(UStringSearch *strsrch,
|
|||
UErrorCode *status)
|
||||
{
|
||||
if (U_SUCCESS(*status) && strsrch) {
|
||||
strsrch->search->breakIter = breakiter;
|
||||
strsrch->search->breakIter = breakiter;
|
||||
if (breakiter) {
|
||||
ubrk_setText(breakiter, strsrch->search->text,
|
||||
strsrch->search->textLength, status);
|
||||
|
@ -3018,9 +3018,9 @@ U_CAPI void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch,
|
|||
strsrch->strength = ucol_getStrength(collator);
|
||||
strsrch->ceMask = getMask(strsrch->strength);
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
ubrk_close(strsrch->search->internalBreakIter);
|
||||
strsrch->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(collator, ULOC_VALID_LOCALE, status),
|
||||
strsrch->search->text, strsrch->search->textLength, status);
|
||||
ubrk_close(strsrch->search->internalBreakIter);
|
||||
strsrch->search->internalBreakIter = ubrk_open(UBRK_CHARACTER, ucol_getLocaleByType(collator, ULOC_VALID_LOCALE, status),
|
||||
strsrch->search->text, strsrch->search->textLength, status);
|
||||
#endif
|
||||
// if status is a failure, ucol_getAttribute returns UCOL_DEFAULT
|
||||
strsrch->toShift =
|
||||
|
@ -3348,14 +3348,6 @@ U_CAPI int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch,
|
|||
}
|
||||
}
|
||||
else {
|
||||
#if !BOYER_MOORE
|
||||
if (search->matchedIndex != USEARCH_DONE) {
|
||||
if (search->isOverlap) {
|
||||
ucol_setOffset(strsrch->textIter, search->matchedIndex + search->matchedLength - 2, status);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (strsrch->search->isCanonicalMatch) {
|
||||
// can't use exact here since extra accents are allowed.
|
||||
usearch_handlePreviousCanonical(strsrch, status);
|
||||
|
@ -3642,7 +3634,7 @@ static int32_t nextBoundaryAfter(UStringSearch *strsrch, int32_t startIndex) {
|
|||
}
|
||||
|
||||
if (breakiterator != NULL) {
|
||||
return ubrk_following(breakiterator, startIndex);
|
||||
return ubrk_following(breakiterator, startIndex);
|
||||
}
|
||||
|
||||
return startIndex;
|
||||
|
@ -3925,7 +3917,7 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
|
|||
for ( ; ; ++targetIxOffset ) {
|
||||
nextCEI = ceb.get(targetIx + targetIxOffset);
|
||||
maxLimit = nextCEI->lowIndex;
|
||||
// If we are at the end of the target too, match succeeds
|
||||
// If we are at the end of the target too, match succeeds
|
||||
if ( nextCEI->ce == UCOL_PROCESSED_NULLORDER ) {
|
||||
break;
|
||||
}
|
||||
|
@ -3933,19 +3925,19 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
|
|||
// it is part of the last target element matched by the pattern;
|
||||
// make sure it can be part of a match with the last patCE
|
||||
if ( (((nextCEI->ce) >> 32) & 0xFFFF0000UL) == 0 ) {
|
||||
UCompareCEsResult ceMatch = compareCE64s(nextCEI->ce, patCE, strsrch->search->elementComparisonType);
|
||||
if ( ceMatch == U_CE_NO_MATCH || ceMatch == U_CE_SKIP_PATN ) {
|
||||
found = FALSE;
|
||||
break;
|
||||
}
|
||||
UCompareCEsResult ceMatch = compareCE64s(nextCEI->ce, patCE, strsrch->search->elementComparisonType);
|
||||
if ( ceMatch == U_CE_NO_MATCH || ceMatch == U_CE_SKIP_PATN ) {
|
||||
found = FALSE;
|
||||
break;
|
||||
}
|
||||
// If lowIndex == highIndex, this target CE is part of an expansion of the last matched
|
||||
// target element, but it has non-zero primary weight => match fails
|
||||
} else if ( nextCEI->lowIndex == nextCEI->highIndex ) {
|
||||
found = false;
|
||||
break;
|
||||
found = false;
|
||||
break;
|
||||
// Else the target CE is not part of an expansion of the last matched element, match succeeds
|
||||
} else {
|
||||
break;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4036,7 +4028,6 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
|
|||
return found;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
|
||||
int32_t startIdx,
|
||||
int32_t *matchStart,
|
||||
|
@ -4166,27 +4157,7 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
|
|||
// an acceptable character range.
|
||||
//
|
||||
const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength - 1 + targetIxOffset);
|
||||
const CEI *lastCEI = ceb.getPrevious(targetIx);
|
||||
const CEI *nextCEI = targetIx > 0? ceb.getPrevious(targetIx - 1) : NULL;
|
||||
|
||||
mStart = firstCEI->lowIndex;
|
||||
minLimit = lastCEI->lowIndex;
|
||||
maxLimit = targetIx > 0? nextCEI->lowIndex : lastCEI->highIndex;
|
||||
|
||||
// Look at the CE following the match. If it is UCOL_NULLORDER the match
|
||||
// extended to the end of input, and the match is good.
|
||||
|
||||
// Look at the high and low indices of the CE following the match. If
|
||||
// they are the same it means one of two things:
|
||||
// 1. The match extended to the last CE from the target text, which is OK, or
|
||||
// 2. The last CE that was part of the match is in an expansion that extends
|
||||
// to the first CE after the match. In this case, we reject the match.
|
||||
if (targetIx >= 1) {
|
||||
if (nextCEI->lowIndex == nextCEI->highIndex && nextCEI->ce != UCOL_PROCESSED_NULLORDER) {
|
||||
found = FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Check for the start of the match being within a combining sequence.
|
||||
// This can happen if the pattern itself begins with a combining char, and
|
||||
|
@ -4204,15 +4175,55 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
|
|||
found = FALSE;
|
||||
}
|
||||
|
||||
// Advance the match end position to the first acceptable match boundary.
|
||||
// This advances the index over any combining charcters.
|
||||
mLimit = maxLimit;
|
||||
if (/*targetIx > 0 &&*/ minLimit < maxLimit) {
|
||||
int32_t nba = nextBoundaryAfter(strsrch, minLimit);
|
||||
|
||||
if (nba >= lastCEI->highIndex) {
|
||||
mLimit = nba;
|
||||
const CEI *lastCEI = ceb.getPrevious(targetIx);
|
||||
minLimit = lastCEI->lowIndex;
|
||||
|
||||
if (targetIx > 0) {
|
||||
// Look at the CE following the match. If it is UCOL_NULLORDER the match
|
||||
// extended to the end of input, and the match is good.
|
||||
|
||||
// Look at the high and low indices of the CE following the match. If
|
||||
// they are the same it means one of two things:
|
||||
// 1. The match extended to the last CE from the target text, which is OK, or
|
||||
// 2. The last CE that was part of the match is in an expansion that extends
|
||||
// to the first CE after the match. In this case, we reject the match.
|
||||
const CEI *nextCEI = ceb.getPrevious(targetIx - 1);
|
||||
|
||||
if (nextCEI->lowIndex == nextCEI->highIndex && nextCEI->ce != UCOL_PROCESSED_NULLORDER) {
|
||||
found = FALSE;
|
||||
}
|
||||
|
||||
mLimit = maxLimit = nextCEI->lowIndex;
|
||||
|
||||
// Advance the match end position to the first acceptable match boundary.
|
||||
// This advances the index over any combining charcters.
|
||||
if (minLimit < maxLimit) {
|
||||
int32_t nba = nextBoundaryAfter(strsrch, minLimit);
|
||||
|
||||
if (nba >= lastCEI->highIndex) {
|
||||
mLimit = nba;
|
||||
}
|
||||
}
|
||||
|
||||
// If advancing to the end of a combining sequence in character indexing space
|
||||
// advanced us beyond the end of the match in CE space, reject this match.
|
||||
if (mLimit > maxLimit) {
|
||||
found = FALSE;
|
||||
}
|
||||
|
||||
// Make sure the end of the match is on a break boundary
|
||||
if (isBreakBoundary(strsrch, mLimit)) {
|
||||
found = FALSE;
|
||||
}
|
||||
|
||||
} else {
|
||||
// No non-ignorable CEs after this point.
|
||||
// The maximum position is detected by boundary after
|
||||
// the last non-ignorable CE. Combining sequence
|
||||
// across the start index will be truncated.
|
||||
int32_t nba = nextBoundaryAfter(strsrch, minLimit);
|
||||
mLimit = maxLimit = (nba > 0) && (startIdx > nba) ? nba : startIdx;
|
||||
}
|
||||
|
||||
#ifdef USEARCH_DEBUG
|
||||
|
@ -4221,16 +4232,6 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
|
|||
}
|
||||
#endif
|
||||
|
||||
// If advancing to the end of a combining sequence in character indexing space
|
||||
// advanced us beyond the end of the match in CE space, reject this match.
|
||||
if (mLimit > maxLimit) {
|
||||
found = FALSE;
|
||||
}
|
||||
|
||||
// Make sure the end of the match is on a break boundary
|
||||
if (isBreakBoundary(strsrch, mLimit)) {
|
||||
found = FALSE;
|
||||
}
|
||||
|
||||
if (! checkIdentical(strsrch, mStart, mLimit)) {
|
||||
found = FALSE;
|
||||
|
@ -4270,9 +4271,6 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
|
|||
return found;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// internal use methods declared in usrchimp.h -----------------------------
|
||||
|
||||
UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status)
|
||||
|
@ -4336,7 +4334,7 @@ UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status)
|
|||
//targetce = lastce;
|
||||
|
||||
while (found && patternceindex > 0) {
|
||||
lastce = targetce;
|
||||
lastce = targetce;
|
||||
targetce = ucol_previous(coleiter, status);
|
||||
if (U_FAILURE(*status) || targetce == UCOL_NULLORDER) {
|
||||
found = FALSE;
|
||||
|
@ -4569,7 +4567,7 @@ UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
|
|||
//targetce = firstce;
|
||||
|
||||
while (found && (patternceindex < patterncelength)) {
|
||||
firstce = targetce;
|
||||
firstce = targetce;
|
||||
targetce = ucol_next(coleiter, status);
|
||||
if (U_FAILURE(*status) || targetce == UCOL_NULLORDER) {
|
||||
found = FALSE;
|
||||
|
@ -4606,7 +4604,31 @@ UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
|
|||
setMatchNotFound(strsrch);
|
||||
return FALSE;
|
||||
#else
|
||||
int32_t textOffset = ucol_getOffset(strsrch->textIter);
|
||||
int32_t textOffset;
|
||||
|
||||
if (strsrch->search->isOverlap) {
|
||||
if (strsrch->search->matchedIndex != USEARCH_DONE) {
|
||||
textOffset = strsrch->search->matchedIndex + strsrch->search->matchedLength - 1;
|
||||
} else {
|
||||
// move the start position at the end of possible match
|
||||
initializePatternPCETable(strsrch, status);
|
||||
for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.PCELength - 1; nPCEs++) {
|
||||
int64_t pce = ucol_nextProcessed(strsrch->textIter, NULL, NULL, status);
|
||||
if (pce == UCOL_PROCESSED_NULLORDER) {
|
||||
// at the end of the text
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (U_FAILURE(*status)) {
|
||||
setMatchNotFound(strsrch);
|
||||
return FALSE;
|
||||
}
|
||||
textOffset = ucol_getOffset(strsrch->textIter);
|
||||
}
|
||||
} else {
|
||||
textOffset = ucol_getOffset(strsrch->textIter);
|
||||
}
|
||||
|
||||
int32_t start = -1;
|
||||
int32_t end = -1;
|
||||
|
||||
|
@ -4731,7 +4753,31 @@ UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
|
|||
setMatchNotFound(strsrch);
|
||||
return FALSE;
|
||||
#else
|
||||
int32_t textOffset = ucol_getOffset(strsrch->textIter);
|
||||
int32_t textOffset;
|
||||
|
||||
if (strsrch->search->isOverlap) {
|
||||
if (strsrch->search->matchedIndex != USEARCH_DONE) {
|
||||
textOffset = strsrch->search->matchedIndex + strsrch->search->matchedLength - 1;
|
||||
} else {
|
||||
// move the start position at the end of possible match
|
||||
initializePatternPCETable(strsrch, status);
|
||||
for (int32_t nPCEs = 0; nPCEs < strsrch->pattern.PCELength - 1; nPCEs++) {
|
||||
int64_t pce = ucol_nextProcessed(strsrch->textIter, NULL, NULL, status);
|
||||
if (pce == UCOL_PROCESSED_NULLORDER) {
|
||||
// at the end of the text
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (U_FAILURE(*status)) {
|
||||
setMatchNotFound(strsrch);
|
||||
return FALSE;
|
||||
}
|
||||
textOffset = ucol_getOffset(strsrch->textIter);
|
||||
}
|
||||
} else {
|
||||
textOffset = ucol_getOffset(strsrch->textIter);
|
||||
}
|
||||
|
||||
int32_t start = -1;
|
||||
int32_t end = -1;
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/********************************************************************
|
||||
* Copyright (c) 2001-2008,2010 International Business Machines
|
||||
* Copyright (c) 2001-2011 International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
********************************************************************
|
||||
* File USRCHDAT.H
|
||||
|
@ -65,6 +65,7 @@ static const SearchData BASIC[] = {
|
|||
#endif
|
||||
|
||||
{"\\u00c9", "e", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
|
||||
{"x a\\u0301", "a\\u0301", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2, -1}, {2}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
|
@ -746,11 +747,11 @@ static const SearchData CONTRACTIONCANONICAL[] = {
|
|||
};
|
||||
|
||||
static const SearchData DIACRITICMATCH[] = {
|
||||
{"\\u03BA\\u03B1\\u03B9\\u0300\\u0020\\u03BA\\u03B1\\u1F76", "\\u03BA\\u03B1\\u03B9", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 5,-1}, {4, 3}},
|
||||
{"\\u0061\\u0061\\u00E1", "\\u0061\\u00E1", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}},
|
||||
{"\\u0020\\u00C2\\u0303\\u0020\\u0041\\u0061\\u1EAA\\u0041\\u0302\\u0303\\u00C2\\u0303\\u1EAB\\u0061\\u0302\\u0303\\u00E2\\u0303\\uD806\\uDC01\\u0300\\u0020",
|
||||
"\\u00C2\\u0303", "LDE_AN_CX_EX_FX_HX_NX_S1", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, 4, 5, 6, 7, 10, 12, 13, 16,-1}, {2, 1, 1, 1, 3, 2, 1, 3, 2}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
|
||||
{"\\u03BA\\u03B1\\u03B9\\u0300\\u0020\\u03BA\\u03B1\\u1F76", "\\u03BA\\u03B1\\u03B9", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 5,-1}, {4, 3}},
|
||||
{"\\u0061\\u0061\\u00E1", "\\u0061\\u00E1", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}},
|
||||
{"\\u0020\\u00C2\\u0303\\u0020\\u0041\\u0061\\u1EAA\\u0041\\u0302\\u0303\\u00C2\\u0303\\u1EAB\\u0061\\u0302\\u0303\\u00E2\\u0303\\uD806\\uDC01\\u0300\\u0020",
|
||||
"\\u00C2\\u0303", "LDE_AN_CX_EX_FX_HX_NX_S1", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, 4, 5, 6, 7, 10, 12, 13, 16,-1}, {2, 1, 1, 1, 3, 2, 1, 3, 2}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
#endif /* #if !UCONFIG_NO_COLLATION */
|
||||
|
|
|
@ -337,25 +337,27 @@ static UBool assertEqualWithUStringSearch( UStringSearch *strsrch,
|
|||
const SearchData search)
|
||||
{
|
||||
int count = 0;
|
||||
int matchlimit = 0;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t matchindex = search.offset[count];
|
||||
int32_t textlength;
|
||||
UChar matchtext[128];
|
||||
int32_t matchlength;
|
||||
int32_t nextStart;
|
||||
UBool isOverlap;
|
||||
|
||||
usearch_setAttribute(strsrch, USEARCH_ELEMENT_COMPARISON, search.elemCompare, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
log_err("Error setting USEARCH_ELEMENT_COMPARISON attribute %s\n", u_errorName(status));
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
if (usearch_getMatchedStart(strsrch) != USEARCH_DONE ||
|
||||
usearch_getMatchedLength(strsrch) != 0) {
|
||||
log_err("Error with the initialization of match start and length\n");
|
||||
}
|
||||
/* start of following matches */
|
||||
/* start of next matches */
|
||||
while (U_SUCCESS(status) && matchindex >= 0) {
|
||||
uint32_t matchlength = search.size[count];
|
||||
matchlength = search.size[count];
|
||||
usearch_next(strsrch, &status);
|
||||
if (matchindex != usearch_getMatchedStart(strsrch) ||
|
||||
matchlength != (uint32_t)usearch_getMatchedLength(strsrch)) {
|
||||
|
@ -363,7 +365,7 @@ static UBool assertEqualWithUStringSearch( UStringSearch *strsrch,
|
|||
log_err("Text: %s\n", str);
|
||||
str = toCharString(usearch_getPattern(strsrch, &textlength));
|
||||
log_err("Pattern: %s\n", str);
|
||||
log_err("Error following match found at idx,len %d,%d; expected %d,%d\n",
|
||||
log_err("Error next match found at idx %d (len:%d); expected %d (len:%d)\n",
|
||||
usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch),
|
||||
matchindex, matchlength);
|
||||
return FALSE;
|
||||
|
@ -375,7 +377,7 @@ static UBool assertEqualWithUStringSearch( UStringSearch *strsrch,
|
|||
memcmp(matchtext,
|
||||
usearch_getText(strsrch, &textlength) + matchindex,
|
||||
matchlength * sizeof(UChar)) != 0) {
|
||||
log_err("Error getting following matched text\n");
|
||||
log_err("Error getting next matched text\n");
|
||||
}
|
||||
|
||||
matchindex = search.offset[count];
|
||||
|
@ -387,18 +389,17 @@ static UBool assertEqualWithUStringSearch( UStringSearch *strsrch,
|
|||
log_err("Text: %s\n", str);
|
||||
str = toCharString(usearch_getPattern(strsrch, &textlength));
|
||||
log_err("Pattern: %s\n", str);
|
||||
log_err("Error following match found at %d %d\n",
|
||||
log_err("Error next match found at %d (len:%d); expected <NO MATCH>\n",
|
||||
usearch_getMatchedStart(strsrch),
|
||||
usearch_getMatchedLength(strsrch));
|
||||
return FALSE;
|
||||
}
|
||||
/* start of preceding matches */
|
||||
/* start of previous matches */
|
||||
count = count == 0 ? 0 : count - 1;
|
||||
matchlimit = count;
|
||||
matchindex = search.offset[count];
|
||||
|
||||
while (U_SUCCESS(status) && matchindex >= 0) {
|
||||
uint32_t matchlength = search.size[count];
|
||||
matchlength = search.size[count];
|
||||
usearch_previous(strsrch, &status);
|
||||
if (matchindex != usearch_getMatchedStart(strsrch) ||
|
||||
matchlength != (uint32_t)usearch_getMatchedLength(strsrch)) {
|
||||
|
@ -406,9 +407,9 @@ static UBool assertEqualWithUStringSearch( UStringSearch *strsrch,
|
|||
log_err("Text: %s\n", str);
|
||||
str = toCharString(usearch_getPattern(strsrch, &textlength));
|
||||
log_err("Pattern: %s\n", str);
|
||||
log_err("Error preceding match found at %d %d\n",
|
||||
usearch_getMatchedStart(strsrch),
|
||||
usearch_getMatchedLength(strsrch));
|
||||
log_err("Error previous match found at %d (len:%d); expected %d (len:%d)\n",
|
||||
usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch),
|
||||
matchindex, matchlength);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
@ -417,7 +418,7 @@ static UBool assertEqualWithUStringSearch( UStringSearch *strsrch,
|
|||
memcmp(matchtext,
|
||||
usearch_getText(strsrch, &textlength) + matchindex,
|
||||
matchlength * sizeof(UChar)) != 0) {
|
||||
log_err("Error getting preceding matched text\n");
|
||||
log_err("Error getting previous matched text\n");
|
||||
}
|
||||
|
||||
matchindex = count > 0 ? search.offset[count - 1] : -1;
|
||||
|
@ -430,12 +431,110 @@ static UBool assertEqualWithUStringSearch( UStringSearch *strsrch,
|
|||
log_err("Text: %s\n", str);
|
||||
str = toCharString(usearch_getPattern(strsrch, &textlength));
|
||||
log_err("Pattern: %s\n", str);
|
||||
log_err("Error preceding match found at %d %d\n",
|
||||
log_err("Error previous match found at %d (len:%d); expected <NO MATCH>\n",
|
||||
usearch_getMatchedStart(strsrch),
|
||||
usearch_getMatchedLength(strsrch));
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
isOverlap = (usearch_getAttribute(strsrch, USEARCH_OVERLAP) == USEARCH_ON);
|
||||
|
||||
/* start of following matches */
|
||||
count = 0;
|
||||
matchindex = search.offset[count];
|
||||
nextStart = 0;
|
||||
|
||||
while (TRUE) {
|
||||
usearch_following(strsrch, nextStart, &status);
|
||||
|
||||
if (matchindex < 0) {
|
||||
if (usearch_getMatchedStart(strsrch) != USEARCH_DONE || usearch_getMatchedLength(strsrch) != 0) {
|
||||
char *str = toCharString(usearch_getText(strsrch, &textlength));
|
||||
log_err("Text: %s\n", str);
|
||||
str = toCharString(usearch_getPattern(strsrch, &textlength));
|
||||
log_err("Pattern: %s\n", str);
|
||||
log_err("Error following match starting at %d (overlap:%d) found at %d (len:%d); expected <NO MATCH>\n",
|
||||
nextStart, isOverlap,
|
||||
usearch_getMatchedStart(strsrch),
|
||||
usearch_getMatchedLength(strsrch));
|
||||
return FALSE;
|
||||
}
|
||||
/* no more matches */
|
||||
break;
|
||||
}
|
||||
|
||||
matchlength = search.size[count];
|
||||
if (usearch_getMatchedStart(strsrch) != matchindex
|
||||
|| usearch_getMatchedLength(strsrch) != matchlength
|
||||
|| U_FAILURE(status)) {
|
||||
char *str = toCharString(usearch_getText(strsrch, &textlength));
|
||||
log_err("Text: %s\n", str);
|
||||
str = toCharString(usearch_getPattern(strsrch, &textlength));
|
||||
log_err("Pattern: %s\n", str);
|
||||
log_err("Error following match starting at %d (overlap: %d) found at %d (len:%d); expected %d (len:%d)\n",
|
||||
nextStart, isOverlap,
|
||||
usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch),
|
||||
matchindex, matchlength);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (isOverlap || usearch_getMatchedLength(strsrch) == 0) {
|
||||
nextStart = usearch_getMatchedStart(strsrch) + 1;
|
||||
} else {
|
||||
nextStart = usearch_getMatchedStart(strsrch) + usearch_getMatchedLength(strsrch);
|
||||
}
|
||||
|
||||
count++;
|
||||
matchindex = search.offset[count];
|
||||
}
|
||||
|
||||
/* start of preceding matches */
|
||||
count = -1; /* last non-negative offset index, could be -1 if no match */
|
||||
while (search.offset[count + 1] >= 0) {
|
||||
count++;
|
||||
}
|
||||
usearch_getText(strsrch, &nextStart);
|
||||
|
||||
while (TRUE) {
|
||||
usearch_preceding(strsrch, nextStart, &status);
|
||||
|
||||
if (count < 0) {
|
||||
if (usearch_getMatchedStart(strsrch) != USEARCH_DONE || usearch_getMatchedLength(strsrch) != 0) {
|
||||
char *str = toCharString(usearch_getText(strsrch, &textlength));
|
||||
log_err("Text: %s\n", str);
|
||||
str = toCharString(usearch_getPattern(strsrch, &textlength));
|
||||
log_err("Pattern: %s\n", str);
|
||||
log_err("Error preceding match starting at %d (overlap: %d) found at %d (len:%d); expected <NO MATCH>\n",
|
||||
nextStart, isOverlap,
|
||||
usearch_getMatchedStart(strsrch),
|
||||
usearch_getMatchedLength(strsrch));
|
||||
return FALSE;
|
||||
}
|
||||
/* no more matches */
|
||||
break;
|
||||
}
|
||||
|
||||
matchindex = search.offset[count];
|
||||
matchlength = search.size[count];
|
||||
if (usearch_getMatchedStart(strsrch) != matchindex
|
||||
|| usearch_getMatchedLength(strsrch) != matchlength
|
||||
|| U_FAILURE(status)) {
|
||||
char *str = toCharString(usearch_getText(strsrch, &textlength));
|
||||
log_err("Text: %s\n", str);
|
||||
str = toCharString(usearch_getPattern(strsrch, &textlength));
|
||||
log_err("Pattern: %s\n", str);
|
||||
log_err("Error preceding match starting at %d (overlap: %d) found at %d (len:%d); expected %d (len:%d)\n",
|
||||
nextStart, isOverlap,
|
||||
usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch),
|
||||
matchindex, matchlength);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
nextStart = matchindex;
|
||||
count--;
|
||||
}
|
||||
|
||||
usearch_setAttribute(strsrch, USEARCH_ELEMENT_COMPARISON, USEARCH_STANDARD_ELEMENT_COMPARISON, &status);
|
||||
return TRUE;
|
||||
}
|
||||
|
@ -1649,30 +1748,30 @@ static void TestDiacriticMatch(void)
|
|||
memset(text, 0, 128*sizeof(UChar));
|
||||
|
||||
strsrch = usearch_open(pattern, 1, text, 1, uloc_getDefault(), NULL, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
if (U_FAILURE(status)) {
|
||||
log_err_status(status, "Error opening string search %s\n", u_errorName(status));
|
||||
return;
|
||||
}
|
||||
|
||||
search = DIACRITICMATCH[count];
|
||||
while (search.text != NULL) {
|
||||
if (search.collator != NULL) {
|
||||
coll = ucol_openFromShortString(search.collator, FALSE, NULL, &status);
|
||||
} else {
|
||||
if (search.collator != NULL) {
|
||||
coll = ucol_openFromShortString(search.collator, FALSE, NULL, &status);
|
||||
} else {
|
||||
/* Always use "en_US" because some of these tests fail in Danish locales. */
|
||||
coll = ucol_open("en_US"/*uloc_getDefault()*/, &status);
|
||||
ucol_setStrength(coll, search.strength);
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
log_err("Error opening string search collator(\"%s\") %s\n", search.collator, u_errorName(status));
|
||||
return;
|
||||
}
|
||||
|
||||
usearch_setCollator(strsrch, coll, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
log_err("Error setting string search collator %s\n", u_errorName(status));
|
||||
return;
|
||||
}
|
||||
coll = ucol_open("en_US"/*uloc_getDefault()*/, &status);
|
||||
ucol_setStrength(coll, search.strength);
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
log_err("Error opening string search collator(\"%s\") %s\n", search.collator, u_errorName(status));
|
||||
return;
|
||||
}
|
||||
|
||||
usearch_setCollator(strsrch, coll, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
log_err("Error setting string search collator %s\n", u_errorName(status));
|
||||
return;
|
||||
}
|
||||
|
||||
u_unescape(search.text, text, 128);
|
||||
u_unescape(search.pattern, pattern, 128);
|
||||
|
@ -2499,54 +2598,54 @@ static void TestSearchForNull(void) {
|
|||
|
||||
static void TestStrengthIdentical(void)
|
||||
{
|
||||
UCollator *coll;
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
UStringSearch *search;
|
||||
|
||||
UCollator *coll;
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
UStringSearch *search;
|
||||
|
||||
UChar pattern[] = {0x05E9, 0x0591, 0x05E9};
|
||||
UChar text[] = {0x05E9, 0x0592, 0x05E9};
|
||||
int32_t pLen = sizeof (pattern) / sizeof(pattern[0]);
|
||||
int32_t tLen = sizeof(text) / sizeof (text[0]);
|
||||
int32_t expectedPos = 0;
|
||||
int32_t expectedLen = 3;
|
||||
int32_t expectedPos = 0;
|
||||
int32_t expectedLen = 3;
|
||||
|
||||
int32_t pos;
|
||||
int32_t len;
|
||||
int32_t pos;
|
||||
int32_t len;
|
||||
|
||||
/* create a US-English collator */
|
||||
coll = ucol_open ("en_US", &ec);
|
||||
coll = ucol_open ("en_US", &ec);
|
||||
|
||||
/* make sure we didn't fail. */
|
||||
TEST_ASSERT (ec);
|
||||
/* make sure we didn't fail. */
|
||||
TEST_ASSERT (ec);
|
||||
|
||||
ucol_setStrength( coll, UCOL_TERTIARY);
|
||||
|
||||
/* open a search looking for 0 */
|
||||
search = usearch_openFromCollator (pattern, pLen, text, tLen, coll, NULL, &ec);
|
||||
TEST_ASSERT (ec);
|
||||
/* open a search looking for 0 */
|
||||
search = usearch_openFromCollator (pattern, pLen, text, tLen, coll, NULL, &ec);
|
||||
TEST_ASSERT (ec);
|
||||
|
||||
if (coll != NULL && search != NULL) {
|
||||
pos = usearch_first(search, &ec);
|
||||
len = usearch_getMatchedLength(search);
|
||||
pos = usearch_first(search, &ec);
|
||||
len = usearch_getMatchedLength(search);
|
||||
|
||||
if(pos != expectedPos) {
|
||||
log_err("Expected search result: %d; Got instead: %d\n", expectedPos, pos);
|
||||
}
|
||||
|
||||
if(len != expectedLen) {
|
||||
log_err("Expected search result length: %d; Got instead: %d\n", expectedLen, len);
|
||||
}
|
||||
|
||||
if(pos != expectedPos) {
|
||||
log_err("Expected search result: %d; Got instead: %d\n", expectedPos, pos);
|
||||
}
|
||||
|
||||
if(len != expectedLen) {
|
||||
log_err("Expected search result length: %d; Got instead: %d\n", expectedLen, len);
|
||||
}
|
||||
|
||||
/* Now try it at strength == UCOL_IDENTICAL */
|
||||
ucol_setStrength(coll, UCOL_IDENTICAL);
|
||||
usearch_reset(search);
|
||||
usearch_reset(search);
|
||||
|
||||
pos = usearch_first(search, &ec);
|
||||
len = usearch_getMatchedLength(search);
|
||||
pos = usearch_first(search, &ec);
|
||||
len = usearch_getMatchedLength(search);
|
||||
|
||||
if(pos != -1) {
|
||||
log_err("Expected failure for strentgh = UCOL_IDENTICAL: got %d instead.\n", pos);
|
||||
}
|
||||
if(pos != -1) {
|
||||
log_err("Expected failure for strentgh = UCOL_IDENTICAL: got %d instead.\n", pos);
|
||||
}
|
||||
}
|
||||
|
||||
usearch_close(search);
|
||||
|
@ -2707,7 +2806,7 @@ void addSearchTest(TestNode** root)
|
|||
addTest(root, &TestNumeric, "tscoll/usrchtst/TestNumeric");
|
||||
addTest(root, &TestDiacriticMatch, "tscoll/usrchtst/TestDiacriticMatch");
|
||||
addTest(root, &TestForwardBackward, "tscoll/usrchtst/TestForwardBackward");
|
||||
addTest(root, &TestSearchForNull, "tscoll/usrchtst/TestSearchForNull");
|
||||
addTest(root, &TestSearchForNull, "tscoll/usrchtst/TestSearchForNull");
|
||||
addTest(root, &TestStrengthIdentical, "tscoll/usrchtst/TestStrengthIdentical");
|
||||
addTest(root, &TestPCEBuffer_100df, "tscoll/usrchtst/TestPCEBuffer/1_00df");
|
||||
addTest(root, &TestPCEBuffer_2surr, "tscoll/usrchtst/TestPCEBuffer/2_dfff");
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2001-2010, International Business Machines orporation
|
||||
* Copyright (C) 2001-2011, International Business Machines orporation
|
||||
* and others. All Rights Reserved.
|
||||
****************************************************************************/
|
||||
|
||||
|
@ -252,11 +252,12 @@ Collator::ECollationStrength StringSearchTest::getECollationStrength(
|
|||
UBool StringSearchTest::assertEqualWithStringSearch(StringSearch *strsrch,
|
||||
const SearchData *search)
|
||||
{
|
||||
int count = 0;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t matchindex = search->offset[count];
|
||||
UnicodeString matchtext;
|
||||
|
||||
int32_t count = 0;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t matchindex = search->offset[count];
|
||||
UnicodeString matchtext;
|
||||
int32_t matchlength;
|
||||
|
||||
strsrch->setAttribute(USEARCH_ELEMENT_COMPARISON, search->elemCompare, status);
|
||||
if (U_FAILURE(status)) {
|
||||
errln("Error setting USEARCH_ELEMENT_COMPARISON attribute %s", u_errorName(status));
|
||||
|
@ -267,17 +268,18 @@ UBool StringSearchTest::assertEqualWithStringSearch(StringSearch *strsrch,
|
|||
strsrch->getMatchedLength() != 0) {
|
||||
errln("Error with the initialization of match start and length");
|
||||
}
|
||||
// start of following matches
|
||||
|
||||
// start of next matches
|
||||
while (U_SUCCESS(status) && matchindex >= 0) {
|
||||
int32_t matchlength = search->size[count];
|
||||
matchlength = search->size[count];
|
||||
strsrch->next(status);
|
||||
if (matchindex != strsrch->getMatchedStart() ||
|
||||
matchlength != strsrch->getMatchedLength()) {
|
||||
char *str = toCharString(strsrch->getText());
|
||||
errln("Text: %s", str);
|
||||
str = toCharString(strsrch->getPattern());
|
||||
infoln("Pattern: %s", str);
|
||||
infoln("Error following match found at idx,len %d,%d; expected %d,%d",
|
||||
errln("Pattern: %s", str);
|
||||
errln("Error next match found at %d (len:%d); expected %d (len:%d)",
|
||||
strsrch->getMatchedStart(), strsrch->getMatchedLength(),
|
||||
matchindex, matchlength);
|
||||
return FALSE;
|
||||
|
@ -291,7 +293,7 @@ UBool StringSearchTest::assertEqualWithStringSearch(StringSearch *strsrch,
|
|||
matchindex + matchlength,
|
||||
matchtext, 0,
|
||||
matchtext.length())) {
|
||||
errln("Error getting following matched text");
|
||||
errln("Error getting next matched text");
|
||||
}
|
||||
|
||||
matchindex = search->offset[count];
|
||||
|
@ -303,15 +305,16 @@ UBool StringSearchTest::assertEqualWithStringSearch(StringSearch *strsrch,
|
|||
errln("Text: %s", str);
|
||||
str = toCharString(strsrch->getPattern());
|
||||
errln("Pattern: %s", str);
|
||||
errln("Error following match found at %d %d",
|
||||
errln("Error next match found at %d (len:%d); expected <NO MATCH>",
|
||||
strsrch->getMatchedStart(), strsrch->getMatchedLength());
|
||||
return FALSE;
|
||||
}
|
||||
// start of preceding matches
|
||||
|
||||
// start of previous matches
|
||||
count = count == 0 ? 0 : count - 1;
|
||||
matchindex = search->offset[count];
|
||||
while (U_SUCCESS(status) && matchindex >= 0) {
|
||||
int32_t matchlength = search->size[count];
|
||||
matchlength = search->size[count];
|
||||
strsrch->previous(status);
|
||||
if (matchindex != strsrch->getMatchedStart() ||
|
||||
matchlength != strsrch->getMatchedLength()) {
|
||||
|
@ -319,8 +322,9 @@ UBool StringSearchTest::assertEqualWithStringSearch(StringSearch *strsrch,
|
|||
errln("Text: %s", str);
|
||||
str = toCharString(strsrch->getPattern());
|
||||
errln("Pattern: %s", str);
|
||||
errln("Error following match found at %d %d",
|
||||
strsrch->getMatchedStart(), strsrch->getMatchedLength());
|
||||
errln("Error previous match found at %d (len:%d); expected %d (len:%d)",
|
||||
strsrch->getMatchedStart(), strsrch->getMatchedLength(),
|
||||
matchindex, matchlength);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
@ -331,7 +335,7 @@ UBool StringSearchTest::assertEqualWithStringSearch(StringSearch *strsrch,
|
|||
matchindex + matchlength,
|
||||
matchtext, 0,
|
||||
matchtext.length())) {
|
||||
errln("Error getting following matched text");
|
||||
errln("Error getting previous matched text");
|
||||
}
|
||||
|
||||
matchindex = count > 0 ? search->offset[count - 1] : -1;
|
||||
|
@ -341,13 +345,112 @@ UBool StringSearchTest::assertEqualWithStringSearch(StringSearch *strsrch,
|
|||
if (strsrch->getMatchedStart() != USEARCH_DONE ||
|
||||
strsrch->getMatchedLength() != 0) {
|
||||
char *str = toCharString(strsrch->getText());
|
||||
errln("Text: %s", str);
|
||||
str = toCharString(strsrch->getPattern());
|
||||
errln("Pattern: %s", str);
|
||||
errln("Error following match found at %d %d",
|
||||
strsrch->getMatchedStart(), strsrch->getMatchedLength());
|
||||
return FALSE;
|
||||
errln("Text: %s", str);
|
||||
str = toCharString(strsrch->getPattern());
|
||||
errln("Pattern: %s", str);
|
||||
errln("Error previous match found at %d (len:%d); expected <NO MATCH>",
|
||||
strsrch->getMatchedStart(), strsrch->getMatchedLength());
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
int32_t nextStart;
|
||||
UBool isOverlap = (strsrch->getAttribute(USEARCH_OVERLAP) == USEARCH_ON);
|
||||
|
||||
// start of following matches
|
||||
count = 0;
|
||||
matchindex = search->offset[count];
|
||||
nextStart = 0;
|
||||
|
||||
while (TRUE) {
|
||||
strsrch->following(nextStart, status);
|
||||
|
||||
if (matchindex < 0) {
|
||||
if (strsrch->getMatchedStart() != USEARCH_DONE ||
|
||||
strsrch->getMatchedLength() != 0) {
|
||||
char *str = toCharString(strsrch->getText());
|
||||
errln("Text: %s", str);
|
||||
str = toCharString(strsrch->getPattern());
|
||||
errln("Pattern: %s", str);
|
||||
errln("Error following match starting at %d (overlap:%d) found at %d (len:%d); expected <NO MATCH>",
|
||||
nextStart, isOverlap,
|
||||
strsrch->getMatchedStart(), strsrch->getMatchedLength());
|
||||
return FALSE;
|
||||
}
|
||||
// no more matches
|
||||
break;
|
||||
}
|
||||
|
||||
matchlength = search->size[count];
|
||||
if (strsrch->getMatchedStart() != matchindex
|
||||
|| strsrch->getMatchedLength() != matchlength
|
||||
|| U_FAILURE(status)) {
|
||||
char *str = toCharString(strsrch->getText());
|
||||
errln("Text: %s\n", str);
|
||||
str = toCharString(strsrch->getPattern());
|
||||
errln("Pattern: %s\n", str);
|
||||
errln("Error following match starting at %d (overlap: %d) found at %d (len:%d); expected %d (len:%d)\n",
|
||||
nextStart, isOverlap,
|
||||
strsrch->getMatchedStart(), strsrch->getMatchedLength(),
|
||||
matchindex, matchlength);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (isOverlap || strsrch->getMatchedLength() == 0) {
|
||||
nextStart = strsrch->getMatchedStart() + 1;
|
||||
} else {
|
||||
nextStart = strsrch->getMatchedStart() + strsrch->getMatchedLength();
|
||||
}
|
||||
|
||||
count++;
|
||||
matchindex = search->offset[count];
|
||||
}
|
||||
|
||||
// start preceding matches
|
||||
count = -1; // last non-negative offset index, could be -1 if no match
|
||||
while (search->offset[count + 1] >= 0) {
|
||||
count++;
|
||||
}
|
||||
nextStart = strsrch->getText().length();
|
||||
|
||||
while (TRUE) {
|
||||
strsrch->preceding(nextStart, status);
|
||||
|
||||
if (count < 0) {
|
||||
if (strsrch->getMatchedStart() != USEARCH_DONE || strsrch->getMatchedLength() != 0) {
|
||||
char *str = toCharString(strsrch->getText());
|
||||
errln("Text: %s\n", str);
|
||||
str = toCharString(strsrch->getPattern());
|
||||
errln("Pattern: %s\n", str);
|
||||
errln("Error preceding match starting at %d (overlap: %d) found at %d (len:%d); expected <NO MATCH>\n",
|
||||
nextStart, isOverlap,
|
||||
strsrch->getMatchedStart(),
|
||||
strsrch->getMatchedLength());
|
||||
return FALSE;
|
||||
}
|
||||
// no more matches
|
||||
break;
|
||||
}
|
||||
|
||||
matchindex = search->offset[count];
|
||||
matchlength = search->size[count];
|
||||
if (strsrch->getMatchedStart() != matchindex
|
||||
|| strsrch->getMatchedLength() != matchlength
|
||||
|| U_FAILURE(status)) {
|
||||
char *str = toCharString(strsrch->getText());
|
||||
errln("Text: %s\n", str);
|
||||
str = toCharString(strsrch->getPattern());
|
||||
errln("Pattern: %s\n", str);
|
||||
errln("Error preceding match starting at %d (overlap: %d) found at %d (len:%d); expected %d (len:%d)\n",
|
||||
nextStart, isOverlap,
|
||||
strsrch->getMatchedStart(), strsrch->getMatchedLength(),
|
||||
matchindex, matchlength);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
nextStart = matchindex;
|
||||
count--;
|
||||
}
|
||||
|
||||
strsrch->setAttribute(USEARCH_ELEMENT_COMPARISON, USEARCH_STANDARD_ELEMENT_COMPARISON, status);
|
||||
return TRUE;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue