ICU-7093 (merge from branch) Implement asymmetric search, add tests, kill some other warnings

X-SVN-Rev: 27714
2025-04-14 17:24:01 +00:00 · 2010-02-27 01:01:45 +00:00 · 2010-02-27 01:01:45 +00:00 · ad87082ae1
commit ad87082ae1
parent cef470f60d
5 changed files with 571 additions and 329 deletions
--- a/icu4c/source/i18n/search.cpp
+++ b/icu4c/source/i18n/search.cpp
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 2001-2008 IBM and others. All rights reserved.
+*   Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
 **********************************************************************
 *   Date        Name        Description
 *  03/22/2000   helena      Creation.
@ -29,6 +29,7 @@ SearchIterator::SearchIterator(const SearchIterator &other)
    m_search_->breakIter        = other.m_search_->breakIter;
    m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
    m_search_->isOverlap        = other.m_search_->isOverlap;
+    m_search_->elementComparisonType = other.m_search_->elementComparisonType;
    m_search_->matchedIndex     = other.m_search_->matchedIndex;
    m_search_->matchedLength    = other.m_search_->matchedLength;
    m_search_->text             = other.m_search_->text;
@ -57,6 +58,13 @@ void SearchIterator::setAttribute(USearchAttribute       attribute,
        case USEARCH_CANONICAL_MATCH :
            m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
            break;
+        case USEARCH_ELEMENT_COMPARISON :
+            if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
+                m_search_->elementComparisonType = (int16_t)value;
+            } else {
+                m_search_->elementComparisonType = 0;
+            }
+            break;
        default:
            status = U_ILLEGAL_ARGUMENT_ERROR;
        }
@ -75,6 +83,15 @@ USearchAttributeValue SearchIterator::getAttribute(
    case USEARCH_CANONICAL_MATCH :
        return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON : 
                                                                USEARCH_OFF);
+    case USEARCH_ELEMENT_COMPARISON :
+        {
+            int16_t value = m_search_->elementComparisonType;
+            if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
+                return (USearchAttributeValue)value;
+            } else {
+                return USEARCH_STANDARD_ELEMENT_COMPARISON;
+            }
+        }
    default :
        return USEARCH_DEFAULT;
    }
@ -167,6 +184,7 @@ UBool SearchIterator::operator==(const SearchIterator &that) const
    return (m_breakiterator_            == that.m_breakiterator_ &&
            m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
            m_search_->isOverlap        == that.m_search_->isOverlap &&
+            m_search_->elementComparisonType == that.m_search_->elementComparisonType &&
            m_search_->matchedIndex     == that.m_search_->matchedIndex &&
            m_search_->matchedLength    == that.m_search_->matchedLength &&
            m_search_->textLength       == that.m_search_->textLength &&
@ -315,6 +333,7 @@ void SearchIterator::reset()
    setOffset(0, status);
    m_search_->isOverlap          = FALSE;
    m_search_->isCanonicalMatch   = FALSE;
+    m_search_->elementComparisonType = 0;
    m_search_->isForwardSearching = TRUE;
    m_search_->reset              = TRUE;
 }
@ -327,6 +346,7 @@ SearchIterator::SearchIterator()
    m_search_->breakIter          = NULL;
    m_search_->isOverlap          = FALSE;
    m_search_->isCanonicalMatch   = FALSE;
+    m_search_->elementComparisonType = 0;
    m_search_->isForwardSearching = TRUE;
    m_search_->reset              = TRUE;
    m_search_->matchedIndex       = USEARCH_DONE;
@ -345,6 +365,7 @@ SearchIterator::SearchIterator(const UnicodeString &text,
    m_search_->breakIter          = NULL;
    m_search_->isOverlap          = FALSE;
    m_search_->isCanonicalMatch   = FALSE;
+    m_search_->elementComparisonType = 0;
    m_search_->isForwardSearching = TRUE;
    m_search_->reset              = TRUE;
    m_search_->matchedIndex       = USEARCH_DONE;
@ -361,6 +382,7 @@ SearchIterator::SearchIterator(CharacterIterator &text,
    m_search_->breakIter          = NULL;
    m_search_->isOverlap          = FALSE;
    m_search_->isCanonicalMatch   = FALSE;
+    m_search_->elementComparisonType = 0;
    m_search_->isForwardSearching = TRUE;
    m_search_->reset              = TRUE;
    m_search_->matchedIndex       = USEARCH_DONE;
@ -381,6 +403,7 @@ SearchIterator & SearchIterator::operator=(const SearchIterator &that)
        m_search_->breakIter        = that.m_search_->breakIter;
        m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
        m_search_->isOverlap        = that.m_search_->isOverlap;
+        m_search_->elementComparisonType = that.m_search_->elementComparisonType;
        m_search_->matchedIndex     = that.m_search_->matchedIndex;
        m_search_->matchedLength    = that.m_search_->matchedLength;
        m_search_->text             = that.m_search_->text;
--- a/icu4c/source/i18n/usearch.cpp
+++ b/icu4c/source/i18n/usearch.cpp
@ -25,6 +25,7 @@
 U_NAMESPACE_USE

 // don't use Boyer-Moore
+// (and if we decide to turn this on again there are several new TODOs that will need to be addressed)
 #define BOYER_MOORE 0

 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
@ -3723,6 +3724,68 @@ static UBool onBreakBoundaries(const UStringSearch *strsrch, int32_t start, int3
 }
 #endif

+typedef enum {
+    U_CE_MATCH = -1,
+    U_CE_NO_MATCH = 0,
+    U_CE_SKIP_TARG,
+    U_CE_SKIP_PATN
+} UCompareCEsResult;
+#define U_CE_LEVEL2_BASE 0x00000005
+#define U_CE_LEVEL3_BASE 0x00050000
+
+static UCompareCEsResult compareCE64s(int64_t targCE, int64_t patCE, int16_t compareType) {
+    if (targCE == patCE) {
+        return U_CE_MATCH;
+    }
+    if (compareType == 0) {
+        return U_CE_NO_MATCH;
+    }
+    
+    int64_t targCEshifted = targCE >> 32;
+    int64_t patCEshifted = patCE >> 32;
+    int64_t mask;
+
+    mask = 0xFFFF0000;
+    int32_t targLev1 = targCEshifted & mask;
+    int32_t patLev1 = patCEshifted & mask;
+    if ( targLev1 != patLev1 ) {
+        if ( targLev1 == 0 ) {
+            return U_CE_SKIP_TARG;
+        }
+        if ( patLev1 == 0 && compareType == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD ) {
+            return U_CE_SKIP_PATN;
+        }
+        return U_CE_NO_MATCH;
+    }
+
+    mask = 0x0000FFFF;
+    int32_t targLev2 = targCEshifted & mask;
+    int32_t patLev2 = patCEshifted & mask;
+    if ( targLev2 != patLev2 ) {
+        if ( targLev2 == 0 ) {
+            return U_CE_SKIP_TARG;
+        }
+        if ( patLev2 == 0 && compareType == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD ) {
+            return U_CE_SKIP_PATN;
+        }
+        return (patLev2 == U_CE_LEVEL2_BASE || (compareType == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD && targLev2 == U_CE_LEVEL2_BASE) )?
+            U_CE_MATCH: U_CE_NO_MATCH;
+    }
+    
+    mask = 0xFFFF0000;
+    int32_t targLev3 = targCE & mask;
+    int32_t patLev3 = patCE & mask;
+    if ( targLev3 != patLev3 ) {
+        return (patLev3 == U_CE_LEVEL3_BASE || (compareType == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD && targLev3 == U_CE_LEVEL3_BASE) )?
+            U_CE_MATCH: U_CE_NO_MATCH;
+   }
+
+    return U_CE_MATCH;
+}
+
+#if BOYER_MOORE
+// TODO: #if BOYER_MOORE, need 32-bit version of compareCE64s
+#endif

 U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch  *strsrch,
                                       int32_t        startIdx,
@ -3779,22 +3842,47 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch  *strsrch,

    // Outer loop moves over match starting positions in the
    //      target CE space.
+    // Here we see the target as a sequence of collation elements, resulting from the following:
+    // 1. Target characters were decomposed, and (if appropriate) other compressions and expansions are applied
+    //    (for example, digraphs such as IJ may be broken into two characters).
+    // 2. An int64_t CE weight is determined for each resulting unit (high 16 bits are primary strength, next
+    //    16 bits are secondary, next 16 (the high 16 bits of the low 32-bit half) are tertiary. Any of these
+    //    fields that are for strengths below that of the collator are set to 0. If this makes the int64_t
+    //    CE weight 0 (as for a combining diacritic with secondary weight when the collator strentgh is primary),
+    //    then the CE is deleted, so the following code sees only CEs that are relevant.
+    // For each CE, the lowIndex and highIndex correspond to where this CE begins and ends in the original text.
+    // If lowIndex==highIndex, either the CE resulted from an expansion/decomposition of one of the original text
+    // characters, or the CE marks the limit of the target text (in which case the CE weight is UCOL_PROCESSED_NULLORDER).
+    //
    for(targetIx=0; ; targetIx++)
    {
        found = TRUE;
        //  Inner loop checks for a match beginning at each
        //  position from the outer loop.
+        int32_t targetIxOffset = 0;
+        int64_t patCE = 0;
        for (patIx=0; patIx<strsrch->pattern.PCELength; patIx++) {
-            int64_t patCE = strsrch->pattern.PCE[patIx];
-            targetCEI = ceb.get(targetIx+patIx);
+            patCE = strsrch->pattern.PCE[patIx];
+            targetCEI = ceb.get(targetIx+patIx+targetIxOffset);
            //  Compare CE from target string with CE from the pattern.
-            //    Note that the target CE will be UCOL_NULLORDER if we reach the end of input,
+            //    Note that the target CE will be UCOL_PROCESSED_NULLORDER if we reach the end of input,
            //    which will fail the compare, below.
-            if (targetCEI->ce != patCE) {
+            UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsrch->search->elementComparisonType);
+            if ( ceMatch == U_CE_NO_MATCH ) {
                found = FALSE;
                break;
+            } else if ( ceMatch > U_CE_NO_MATCH ) {
+                if ( ceMatch == U_CE_SKIP_TARG ) {
+                    // redo with same patCE, next targCE
+                    patIx--;
+                    targetIxOffset++;
+                } else { // ceMatch == U_CE_SKIP_PATN
+                    // redo with same targCE, next patCE
+                    targetIxOffset--;
+                }
            }
        }
+        targetIxOffset += strsrch->pattern.PCELength; // this is now the offset in target CE space to end of the match so far

        if (!found && targetCEI->ce != UCOL_PROCESSED_NULLORDER) {
            // No match at this targetIx.  Try again at the next.
@ -3813,12 +3901,10 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch  *strsrch,
        //     an acceptable character range.
        //
        const CEI *firstCEI = ceb.get(targetIx);
-        const CEI *lastCEI  = ceb.get(targetIx + strsrch->pattern.PCELength - 1);
-        const CEI *nextCEI  = ceb.get(targetIx + strsrch->pattern.PCELength);
+        const CEI *lastCEI  = ceb.get(targetIx + targetIxOffset - 1);

        mStart   = firstCEI->lowIndex;
        minLimit = lastCEI->lowIndex;
-        maxLimit = nextCEI->lowIndex;

        // Look at the CE following the match.  If it is UCOL_NULLORDER the match
        //   extended to the end of input, and the match is good.
@ -3828,8 +3914,40 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch  *strsrch,
        //    1. The match extended to the last CE from the target text, which is OK, or
        //    2. The last CE that was part of the match is in an expansion that extends
        //       to the first CE after the match. In this case, we reject the match.
-        if (nextCEI->lowIndex == nextCEI->highIndex && nextCEI->ce != UCOL_PROCESSED_NULLORDER) {
-            found = FALSE;
+        if (strsrch->search->elementComparisonType == 0) {
+            const CEI *nextCEI  = ceb.get(targetIx + targetIxOffset);
+            maxLimit = nextCEI->lowIndex;
+            if (nextCEI->lowIndex == nextCEI->highIndex && nextCEI->ce != UCOL_PROCESSED_NULLORDER) {
+                found = FALSE;
+            }
+        } else {
+            const CEI *nextCEI;
+            for ( ; ; ++targetIxOffset ) {
+                nextCEI = ceb.get(targetIx + targetIxOffset);
+                maxLimit = nextCEI->lowIndex;
+				// If we are at the end of the target too, match succeeds
+                if (  nextCEI->ce == UCOL_PROCESSED_NULLORDER ) {
+                    break;
+                }
+                // As long as the next CE has primary weight of 0,
+                // it is part of the last target element matched by the pattern;
+                // make sure it can be part of a match with the last patCE
+                if ( (((nextCEI->ce) >> 32) & 0xFFFF0000UL) == 0 ) {
+                	UCompareCEsResult ceMatch = compareCE64s(nextCEI->ce, patCE, strsrch->search->elementComparisonType);
+                	if ( ceMatch == U_CE_NO_MATCH || ceMatch == U_CE_SKIP_PATN ) {
+                		found = FALSE;
+                		break;
+                	}
+                // If lowIndex == highIndex, this target CE is part of an expansion of the last matched
+                // target element, but it has non-zero primary weight => match fails
+                } else if ( nextCEI->lowIndex == nextCEI->highIndex ) {
+                	found = false;
+                	break;
+                // Else the target CE is not part of an expansion of the last matched element, match succeeds
+                } else {
+                	break;
+                }
+            }
        }


@ -3998,21 +4116,36 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch  *strsrch,

    // Outer loop moves over match starting positions in the
    //      target CE space.
+    // Here, targetIx values increase toward the beginning of the base text (i.e. we get the text CEs in reverse order).
+    // But  patIx is 0 at the beginning of the pattern and increases toward the end.
+    // So this loop performs a comparison starting with the end of pattern, and prcessd toward the beginning of the pattern
+    // and the beginning of the base text.
    for(targetIx = limitIx; ; targetIx += 1)
    {
        found = TRUE;
        //  Inner loop checks for a match beginning at each
        //  position from the outer loop.
+        int32_t targetIxOffset = 0;
        for (patIx = strsrch->pattern.PCELength - 1; patIx >= 0; patIx -= 1) {
            int64_t patCE = strsrch->pattern.PCE[patIx];

-            targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength - 1 - patIx);
+            targetCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength - 1 - patIx + targetIxOffset);
            //  Compare CE from target string with CE from the pattern.
            //    Note that the target CE will be UCOL_NULLORDER if we reach the end of input,
            //    which will fail the compare, below.
-            if (targetCEI->ce != patCE) {
+            UCompareCEsResult ceMatch = compareCE64s(targetCEI->ce, patCE, strsrch->search->elementComparisonType);
+            if ( ceMatch == U_CE_NO_MATCH ) {
                found = FALSE;
                break;
+            } else if ( ceMatch > U_CE_NO_MATCH ) {
+                if ( ceMatch == U_CE_SKIP_TARG ) {
+                    // redo with same patCE, next targCE
+                    patIx++;
+                    targetIxOffset++;
+                } else { // ceMatch == U_CE_SKIP_PATN
+                    // redo with same targCE, next patCE
+                    targetIxOffset--;
+                }
            }
        }

@ -4032,7 +4165,7 @@ U_CAPI UBool U_EXPORT2 usearch_searchBackwards(UStringSearch  *strsrch,
        //  There still is a chance of match failure if the CE range not correspond to
        //     an acceptable character range.
        //
-        const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength - 1);
+        const CEI *firstCEI = ceb.getPrevious(targetIx + strsrch->pattern.PCELength - 1 + targetIxOffset);
        const CEI *lastCEI  = ceb.getPrevious(targetIx);
        const CEI *nextCEI  = targetIx > 0? ceb.getPrevious(targetIx - 1) : NULL;

@ -4188,6 +4321,7 @@ UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status)
            if (lastce == UCOL_NULLORDER || lastce == UCOL_IGNORABLE) {
                lastce = targetce;
            }
+            // TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s
            if (targetce == patternce[patternceindex]) {
                // the first ce can be a contraction
                found = TRUE;
@ -4214,6 +4348,7 @@ UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status)
            }

            patternceindex --;
+            // TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s
            found = found && targetce == patternce[patternceindex];
        }

@ -4297,6 +4432,7 @@ UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status)
            if (lastce == UCOL_NULLORDER || lastce == UCOL_IGNORABLE) {
                lastce = targetce;
            }
+            // TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s
            if (targetce == patternce[patternceindex]) {
                // the first ce can be a contraction
                found = TRUE;
@ -4320,6 +4456,7 @@ UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status)
            }

            patternceindex --;
+            // TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s
            found = found && targetce == patternce[patternceindex];
        }

@ -4417,6 +4554,7 @@ UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
            if (targetce == UCOL_IGNORABLE && strsrch->strength != UCOL_PRIMARY) {
                continue;
            }
+            // TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s
            if (targetce == patternce[0]) {
                found = TRUE;
                break;
@ -4442,6 +4580,7 @@ UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
                continue;
            }

+            // TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s
            found = found && targetce == patternce[patternceindex];
            patternceindex ++;
        }
@ -4533,6 +4672,7 @@ UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
                firstce = targetce;
            }

+            // TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s
            if (targetce == patternce[0]) {
                // the first ce can be a contraction
                found = TRUE;
@ -4558,6 +4698,7 @@ UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
                continue;
            }

+            // TODO: #if BOYER_MOORE, replace with code using 32-bit version of compareCE64s
            found = found && targetce == patternce[patternceindex];
            patternceindex ++;
        }
--- a/icu4c/source/test/cintltst/usrchdat.c
+++ b/icu4c/source/test/cintltst/usrchdat.c
--- a/icu4c/source/test/cintltst/usrchtst.c
+++ b/icu4c/source/test/cintltst/usrchtst.c
@ -1,5 +1,5 @@
 /********************************************************************
- * Copyright (c) 2001-2009 International Business Machines 
+ * Copyright (c) 2001-2010 International Business Machines 
 * Corporation and others. All Rights Reserved.
 ********************************************************************
 * File usrchtst.c
@ -343,6 +343,12 @@ static UBool assertEqualWithUStringSearch(      UStringSearch *strsrch,
    int32_t     textlength;
    UChar       matchtext[128];

+    usearch_setAttribute(strsrch, USEARCH_ELEMENT_COMPARISON, search.elemCompare, &status);
+    if (U_FAILURE(status)) {
+        log_err("Error setting USEARCH_ELEMENT_COMPARISON attribute %s\n", u_errorName(status));
+        return FALSE;
+    }   
+
    if (usearch_getMatchedStart(strsrch) != USEARCH_DONE ||
        usearch_getMatchedLength(strsrch) != 0) {
        log_err("Error with the initialization of match start and length\n");
@ -357,9 +363,9 @@ static UBool assertEqualWithUStringSearch(      UStringSearch *strsrch,
            log_err("Text: %s\n", str);
            str = toCharString(usearch_getPattern(strsrch, &textlength));
            log_err("Pattern: %s\n", str);
-            log_err("Error following match found at %d %d\n", 
-                    usearch_getMatchedStart(strsrch), 
-                    usearch_getMatchedLength(strsrch));
+            log_err("Error following match found at idx,len %d,%d; expected %d,%d\n", 
+                    usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch),
+                    matchindex, matchlength);
            return FALSE;
        }
        count ++;
@ -430,6 +436,7 @@ static UBool assertEqualWithUStringSearch(      UStringSearch *strsrch,
        return FALSE;
    }

+    usearch_setAttribute(strsrch, USEARCH_ELEMENT_COMPARISON, USEARCH_STANDARD_ELEMENT_COMPARISON, &status);
    return TRUE;
 }

@ -1106,7 +1113,7 @@ static void TestCompositeBoundaries(void)

 static void TestGetSetOffset(void)
 {
-    int            index   = 0;
+    int            searchDataIndex   = 0;
    UChar          pattern[32];
    UChar          text[128];
    UErrorCode     status  = U_ZERO_ERROR;
@ -1133,9 +1140,9 @@ static void TestGetSetOffset(void)
    if (U_SUCCESS(status)) {
        log_err("Error expecting set offset error\n");
    }
-    while (BASIC[index].text != NULL) {
+    while (BASIC[searchDataIndex].text != NULL) {
        int         count       = 0;
-        SearchData  search      = BASIC[index ++];
+        SearchData  search      = BASIC[searchDataIndex ++];
        int32_t matchindex  = search.offset[count];
        int32_t     textlength;
        
@ -1404,7 +1411,7 @@ static void TestSetMatch(void)
    while (MATCH[count].text != NULL) {
        SearchData     search = MATCH[count];
        int            size   = 0;
-        int            index = 0;
+        int            offsetIndex = 0;
        UChar          text[128];
        UChar          pattern[32];
        UStringSearch *strsrch;
@ -1440,25 +1447,25 @@ static void TestSetMatch(void)
            log_err("Error getting last match\n");
        }
        
-        while (index < size) {
-            if (index + 2 < size) {
-                if (usearch_following(strsrch, search.offset[index + 2] - 1,
-                                      &status) != search.offset[index + 2] ||
+        while (offsetIndex < size) {
+            if (offsetIndex + 2 < size) {
+                if (usearch_following(strsrch, search.offset[offsetIndex + 2] - 1,
+                                      &status) != search.offset[offsetIndex + 2] ||
                    U_FAILURE(status)) {
                    log_err("Error getting following match at index %d\n", 
-                            search.offset[index + 2] - 1);
+                            search.offset[offsetIndex + 2] - 1);
                }
            }
-            if (index + 1 < size) {
-                if (usearch_preceding(strsrch, search.offset[index + 1] + 
-                                               search.size[index + 1] + 1, 
-                                      &status) != search.offset[index + 1] ||
+            if (offsetIndex + 1 < size) {
+                if (usearch_preceding(strsrch, search.offset[offsetIndex + 1] + 
+                                               search.size[offsetIndex + 1] + 1, 
+                                      &status) != search.offset[offsetIndex + 1] ||
                    U_FAILURE(status)) {
                    log_err("Error getting preceeding match at index %d\n", 
-                            search.offset[index + 1] + 1);
+                            search.offset[offsetIndex + 1] + 1);
                }
            }
-            index += 2;
+            offsetIndex += 2;
        }
        status = U_ZERO_ERROR;
        if (usearch_following(strsrch, u_strlen(text), &status) != 
@ -2155,7 +2162,7 @@ static void TestCompositeBoundariesCanonical(void)

 static void TestGetSetOffsetCanonical(void)
 {
-    int            index   = 0;
+    int            searchDataIndex   = 0;
    UChar          pattern[32];
    UChar          text[128];
    UErrorCode     status  = U_ZERO_ERROR;
@ -2188,13 +2195,13 @@ static void TestGetSetOffsetCanonical(void)
    if (U_SUCCESS(status)) {
        log_err("Error expecting set offset error\n");
    }
-    while (BASICCANONICAL[index].text != NULL) {
+    while (BASICCANONICAL[searchDataIndex].text != NULL) {
        int         count       = 0;
-        SearchData  search      = BASICCANONICAL[index ++];
+        SearchData  search      = BASICCANONICAL[searchDataIndex ++];
        int32_t matchindex  = search.offset[count];
        int32_t     textlength;

-        if (BASICCANONICAL[index].text == NULL) {
+        if (BASICCANONICAL[searchDataIndex].text == NULL) {
            /* skip the last one */
            break;
        }
--- a/icu4c/source/test/intltest/srchtest.cpp
+++ b/icu4c/source/test/intltest/srchtest.cpp
@ -1,6 +1,6 @@
 /*
 *****************************************************************************
-* Copyright (C) 2001-2009, International Business Machines orporation  
+* Copyright (C) 2001-2010, International Business Machines orporation  
 * and others. All Rights Reserved.
 ****************************************************************************/

@ -257,6 +257,12 @@ UBool StringSearchTest::assertEqualWithStringSearch(StringSearch *strsrch,
    int32_t   matchindex  = search->offset[count];
    UnicodeString matchtext;
    
+    strsrch->setAttribute(USEARCH_ELEMENT_COMPARISON, search->elemCompare, status);
+    if (U_FAILURE(status)) {
+        errln("Error setting USEARCH_ELEMENT_COMPARISON attribute %s", u_errorName(status));
+        return FALSE;
+    }   
+
    if (strsrch->getMatchedStart() != USEARCH_DONE ||
        strsrch->getMatchedLength() != 0) {
        errln("Error with the initialization of match start and length");
@ -271,8 +277,9 @@ UBool StringSearchTest::assertEqualWithStringSearch(StringSearch *strsrch,
            errln("Text: %s", str);
            str = toCharString(strsrch->getPattern());
            infoln("Pattern: %s", str);
-            infoln("Error following match found at %d %d", 
-                    strsrch->getMatchedStart(), strsrch->getMatchedLength());
+            infoln("Error following match found at idx,len %d,%d; expected %d,%d", 
+                    strsrch->getMatchedStart(), strsrch->getMatchedLength(),
+                    matchindex, matchlength);
            return FALSE;
        }
        count ++;
@ -341,6 +348,7 @@ UBool StringSearchTest::assertEqualWithStringSearch(StringSearch *strsrch,
                    strsrch->getMatchedStart(), strsrch->getMatchedLength());
            return FALSE;
    }
+    strsrch->setAttribute(USEARCH_ELEMENT_COMPARISON, USEARCH_STANDARD_ELEMENT_COMPARISON, status);
    return TRUE;
 }