ICU-5950 Added test case and added internal breakiterator to hold the character breakiterator.

X-SVN-Rev: 22854
This commit is contained in:
Michael Ow 2007-10-25 22:02:54 +00:00
parent f8339df718
commit 3b76a8e2fb
5 changed files with 61 additions and 38 deletions

View file

@ -443,7 +443,7 @@ void checkBreakBoundary(const UStringSearch *strsrch, int32_t *start,
int32_t *end)
{
#if !UCONFIG_NO_BREAK_ITERATION
UBreakIterator *breakiterator = strsrch->search->breakIter;
UBreakIterator *breakiterator = strsrch->search->_breakIter_;
if (breakiterator) {
int32_t matchend = *end;
int32_t matchstart = *start;
@ -475,7 +475,7 @@ UBool isBreakUnit(const UStringSearch *strsrch, int32_t start,
#if !UCONFIG_NO_BREAK_ITERATION
UBreakIterator *breakiterator = strsrch->search->breakIter;
//TODO: Add here.
if (breakiterator && strsrch->search->breakIterGiven) {
if (breakiterator) {
int32_t startindex = ubrk_first(breakiterator);
int32_t endindex = ubrk_last(breakiterator);
@ -1161,7 +1161,7 @@ inline UBool checkNextExactMatch(UStringSearch *strsrch,
}
//Add breakiterator boundary check for primary strength search.
if (!strsrch->search->breakIterGiven && strsrch->strength == UCOL_PRIMARY) {
if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) {
checkBreakBoundary(strsrch, &start, textoffset);
}
@ -2009,7 +2009,7 @@ inline UBool checkPreviousExactMatch(UStringSearch *strsrch,
}
//Add breakiterator boundary check for primary strength search.
if (!strsrch->search->breakIterGiven && strsrch->strength == UCOL_PRIMARY) {
if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) {
checkBreakBoundary(strsrch, textoffset, &end);
}
@ -2600,17 +2600,13 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator(
result->pattern.textLength = patternlength;
result->pattern.CE = NULL;
// If a breakiterator is given, use that one, otherwise create a character break iterator.
result->search->breakIterGiven = breakiter ? TRUE : FALSE;
result->search->breakIter = breakiter;
#if !UCONFIG_NO_BREAK_ITERATION
if (!breakiter && result->strength == UCOL_PRIMARY) {
breakiter = ubrk_open(UBRK_CHARACTER, ucol_getLocale(result->collator, ULOC_VALID_LOCALE, status), NULL, 0, status);
}
result->search->_breakIter_ = ubrk_open(UBRK_CHARACTER, ucol_getLocale(result->collator, ULOC_VALID_LOCALE, status), text, textlength, status);
if (breakiter) {
ubrk_setText(breakiter, text, textlength, status);
ubrk_setText(breakiter, text, textlength, status);
}
#endif
result->search->breakIter = breakiter;
result->ownCollator = FALSE;
result->search->matchedLength = 0;
@ -2791,7 +2787,7 @@ U_CAPI void U_EXPORT2 usearch_setBreakIterator(UStringSearch *strsrch,
UErrorCode *status)
{
if (U_SUCCESS(*status) && strsrch) {
strsrch->search->breakIter = breakiter;
strsrch->search->breakIter = breakiter;
if (breakiter) {
ubrk_setText(breakiter, strsrch->search->text,
strsrch->search->textLength, status);
@ -2835,6 +2831,7 @@ U_CAPI void U_EXPORT2 usearch_setText( UStringSearch *strsrch,
ubrk_setText(strsrch->search->breakIter, text,
textlength, status);
}
ubrk_setText(strsrch->search->_breakIter_, text, textlength, status);
#endif
}
}
@ -2867,6 +2864,11 @@ U_CAPI void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch,
strsrch->collator = collator;
strsrch->strength = ucol_getStrength(collator);
strsrch->ceMask = getMask(strsrch->strength);
#if !UCONFIG_NO_BREAK_ITERATION
ubrk_close(strsrch->search->_breakIter_);
strsrch->search->_breakIter_ = ubrk_open(UBRK_CHARACTER, ucol_getLocale(collator, ULOC_VALID_LOCALE, status),
strsrch->search->text, strsrch->search->textLength, status);
#endif
// if status is a failure, ucol_getAttribute returns UCOL_DEFAULT
strsrch->toShift =
ucol_getAttribute(collator, UCOL_ALTERNATE_HANDLING, status) ==
@ -3469,7 +3471,7 @@ UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
if (firstce == UCOL_NULLORDER || firstce == UCOL_IGNORABLE) {
firstce = targetce;
}
if (targetce == UCOL_IGNORABLE) {
if (targetce == UCOL_IGNORABLE && strsrch->strength != UCOL_PRIMARY) {
continue;
}
if (targetce == patternce[0]) {

View file

@ -26,7 +26,7 @@ struct USearch {
int32_t textLength; // exact length
UBool isOverlap;
UBool isCanonicalMatch;
UBool breakIterGiven;
UBreakIterator *_breakIter_; //internal character breakiterator
UBreakIterator *breakIter;
// value USEARCH_DONE is the default value
// if we are not at the start of the text or the end of the text,

View file

@ -487,6 +487,8 @@ static const SearchData CONTRACTIONCANONICAL[] = {
static const SearchData DIACTRICMATCH[] = {
{"\\u0061\\u0061\\u00E1", "\\u0061\\u00E1", NULL, UCOL_SECONDARY, NULL, {1, -1}, {2}},
{"\\u0020\\u00C2\\u0303\\u0020\\u0041\\u0061\\u1EAA\\u0041\\u0302\\u0303\\u00C2\\u0303\\u1EAB\\u0061\\u0302\\u0303\\u00E2\\u0303\\uD806\\uDC01\\u0300\\u0020",
"\\u00C2\\u0303", "LDE_AN_CX_EX_FX_HX_NX_S1", UCOL_PRIMARY, NULL, {1, 4, 5, 6, 7, 10, 12, 13, 16,-1}, {2, 1, 1, 1, 3, 2, 1, 3, 2}},
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
};

View file

@ -1542,21 +1542,32 @@ static void TestDiactricMatch(void)
UChar pattern[128];
UChar text[128];
UErrorCode status = U_ZERO_ERROR;
UStringSearch *strsrch;
uint32_t count = 0;
UStringSearch *strsrch = NULL;
UCollator *coll = NULL;
uint32_t count = 1;
UBool collatorCreated = FALSE;
memset(pattern, 0, 128*sizeof(UChar));
memset(text, 0, 128*sizeof(UChar));
strsrch = usearch_open(pattern, 1, text, 1, uloc_getDefault(), NULL,
&status);
if (U_FAILURE(status)) {
log_err("Error opening string search %s\n", u_errorName(status));
}
ucol_setStrength(usearch_getCollator(strsrch), DIACTRICMATCH[count].strength);
while (DIACTRICMATCH[count].text != NULL) {
if (DIACTRICMATCH[count].collator != NULL) {
coll = ucol_openFromShortString(DIACTRICMATCH[count].collator, FALSE, NULL, &status);
collatorCreated = TRUE;
} else {
coll = getCollator(DIACTRICMATCH[count].collator);
collatorCreated = FALSE;
}
if (U_FAILURE(status)) {
log_err("Error opening string search collator %s\n", u_errorName(status));
return;
}
strsrch = usearch_openFromCollator(pattern, 1, text, 1, coll, DIACTRICMATCH[count].breaker, &status);
if (U_FAILURE(status)) {
log_err("Error opening string search %s\n", u_errorName(status));
return;
}
u_unescape(DIACTRICMATCH[count].text, text, 128);
u_unescape(DIACTRICMATCH[count].pattern, pattern, 128);
usearch_setText(strsrch, text, -1, &status);
@ -1565,8 +1576,11 @@ static void TestDiactricMatch(void)
log_err("Error at test number %d\n", count);
}
count ++;
if (collatorCreated) {
ucol_close(coll);
}
usearch_close(strsrch);
}
usearch_close(strsrch);
}
static void TestCanonical(void)

View file

@ -1551,31 +1551,36 @@ void StringSearchTest::TestDiactricMatch()
UChar temp[128];
UErrorCode status = U_ZERO_ERROR;
int count = 0;
RuleBasedCollator* coll = NULL;
StringSearch *strsrch = NULL;
UnicodeString pattern("pattern");
UnicodeString text("text");
StringSearch *strsrch = new StringSearch(pattern, text, Locale::getDefault(), NULL,
status);
if (U_FAILURE(status)) {
errln("Error opening string search %s", u_errorName(status));
return;
}
strsrch->getCollator()->setStrength(getECollationStrength(DIACTRICMATCH[count].strength));
const SearchData *search;
while (DIACTRICMATCH[count].text != NULL) {
u_unescape(DIACTRICMATCH[count].text, temp, 128);
search = &(DIACTRICMATCH[count]);
while (search->text != NULL) {
coll = getCollator(search->collator);
coll->setStrength(getECollationStrength(search->strength));
strsrch = new StringSearch(pattern, text, coll, getBreakIterator(search->breaker), status);
if (U_FAILURE(status)) {
errln("Error opening string search %s", u_errorName(status));
return;
}
u_unescape(search->text, temp, 128);
text.setTo(temp, u_strlen(temp));
u_unescape(DIACTRICMATCH[count].pattern, temp, 128);
u_unescape(search->pattern, temp, 128);
pattern.setTo(temp, u_strlen(temp));
strsrch->setText(text, status);
strsrch->setPattern(pattern, status);
if (!assertEqualWithStringSearch(strsrch, &DIACTRICMATCH[count])) {
if (!assertEqualWithStringSearch(strsrch, search)) {
errln("Error at test number %d", count);
}
count ++;
search = &(DIACTRICMATCH[++count]);
delete strsrch;
}
delete strsrch;
}
void StringSearchTest::TestCanonical()