mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-5950 Added test case and added internal breakiterator to hold the character breakiterator.
X-SVN-Rev: 22854
This commit is contained in:
parent
f8339df718
commit
3b76a8e2fb
5 changed files with 61 additions and 38 deletions
|
@ -443,7 +443,7 @@ void checkBreakBoundary(const UStringSearch *strsrch, int32_t *start,
|
|||
int32_t *end)
|
||||
{
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
UBreakIterator *breakiterator = strsrch->search->breakIter;
|
||||
UBreakIterator *breakiterator = strsrch->search->_breakIter_;
|
||||
if (breakiterator) {
|
||||
int32_t matchend = *end;
|
||||
int32_t matchstart = *start;
|
||||
|
@ -475,7 +475,7 @@ UBool isBreakUnit(const UStringSearch *strsrch, int32_t start,
|
|||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
UBreakIterator *breakiterator = strsrch->search->breakIter;
|
||||
//TODO: Add here.
|
||||
if (breakiterator && strsrch->search->breakIterGiven) {
|
||||
if (breakiterator) {
|
||||
int32_t startindex = ubrk_first(breakiterator);
|
||||
int32_t endindex = ubrk_last(breakiterator);
|
||||
|
||||
|
@ -1161,7 +1161,7 @@ inline UBool checkNextExactMatch(UStringSearch *strsrch,
|
|||
}
|
||||
|
||||
//Add breakiterator boundary check for primary strength search.
|
||||
if (!strsrch->search->breakIterGiven && strsrch->strength == UCOL_PRIMARY) {
|
||||
if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) {
|
||||
checkBreakBoundary(strsrch, &start, textoffset);
|
||||
}
|
||||
|
||||
|
@ -2009,7 +2009,7 @@ inline UBool checkPreviousExactMatch(UStringSearch *strsrch,
|
|||
}
|
||||
|
||||
//Add breakiterator boundary check for primary strength search.
|
||||
if (!strsrch->search->breakIterGiven && strsrch->strength == UCOL_PRIMARY) {
|
||||
if (!strsrch->search->breakIter && strsrch->strength == UCOL_PRIMARY) {
|
||||
checkBreakBoundary(strsrch, textoffset, &end);
|
||||
}
|
||||
|
||||
|
@ -2600,17 +2600,13 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator(
|
|||
result->pattern.textLength = patternlength;
|
||||
result->pattern.CE = NULL;
|
||||
|
||||
// If a breakiterator is given, use that one, otherwise create a character break iterator.
|
||||
result->search->breakIterGiven = breakiter ? TRUE : FALSE;
|
||||
result->search->breakIter = breakiter;
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
if (!breakiter && result->strength == UCOL_PRIMARY) {
|
||||
breakiter = ubrk_open(UBRK_CHARACTER, ucol_getLocale(result->collator, ULOC_VALID_LOCALE, status), NULL, 0, status);
|
||||
}
|
||||
result->search->_breakIter_ = ubrk_open(UBRK_CHARACTER, ucol_getLocale(result->collator, ULOC_VALID_LOCALE, status), text, textlength, status);
|
||||
if (breakiter) {
|
||||
ubrk_setText(breakiter, text, textlength, status);
|
||||
ubrk_setText(breakiter, text, textlength, status);
|
||||
}
|
||||
#endif
|
||||
result->search->breakIter = breakiter;
|
||||
|
||||
result->ownCollator = FALSE;
|
||||
result->search->matchedLength = 0;
|
||||
|
@ -2791,7 +2787,7 @@ U_CAPI void U_EXPORT2 usearch_setBreakIterator(UStringSearch *strsrch,
|
|||
UErrorCode *status)
|
||||
{
|
||||
if (U_SUCCESS(*status) && strsrch) {
|
||||
strsrch->search->breakIter = breakiter;
|
||||
strsrch->search->breakIter = breakiter;
|
||||
if (breakiter) {
|
||||
ubrk_setText(breakiter, strsrch->search->text,
|
||||
strsrch->search->textLength, status);
|
||||
|
@ -2835,6 +2831,7 @@ U_CAPI void U_EXPORT2 usearch_setText( UStringSearch *strsrch,
|
|||
ubrk_setText(strsrch->search->breakIter, text,
|
||||
textlength, status);
|
||||
}
|
||||
ubrk_setText(strsrch->search->_breakIter_, text, textlength, status);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@ -2867,6 +2864,11 @@ U_CAPI void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch,
|
|||
strsrch->collator = collator;
|
||||
strsrch->strength = ucol_getStrength(collator);
|
||||
strsrch->ceMask = getMask(strsrch->strength);
|
||||
#if !UCONFIG_NO_BREAK_ITERATION
|
||||
ubrk_close(strsrch->search->_breakIter_);
|
||||
strsrch->search->_breakIter_ = ubrk_open(UBRK_CHARACTER, ucol_getLocale(collator, ULOC_VALID_LOCALE, status),
|
||||
strsrch->search->text, strsrch->search->textLength, status);
|
||||
#endif
|
||||
// if status is a failure, ucol_getAttribute returns UCOL_DEFAULT
|
||||
strsrch->toShift =
|
||||
ucol_getAttribute(collator, UCOL_ALTERNATE_HANDLING, status) ==
|
||||
|
@ -3469,7 +3471,7 @@ UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status)
|
|||
if (firstce == UCOL_NULLORDER || firstce == UCOL_IGNORABLE) {
|
||||
firstce = targetce;
|
||||
}
|
||||
if (targetce == UCOL_IGNORABLE) {
|
||||
if (targetce == UCOL_IGNORABLE && strsrch->strength != UCOL_PRIMARY) {
|
||||
continue;
|
||||
}
|
||||
if (targetce == patternce[0]) {
|
||||
|
|
|
@ -26,7 +26,7 @@ struct USearch {
|
|||
int32_t textLength; // exact length
|
||||
UBool isOverlap;
|
||||
UBool isCanonicalMatch;
|
||||
UBool breakIterGiven;
|
||||
UBreakIterator *_breakIter_; //internal character breakiterator
|
||||
UBreakIterator *breakIter;
|
||||
// value USEARCH_DONE is the default value
|
||||
// if we are not at the start of the text or the end of the text,
|
||||
|
|
|
@ -487,6 +487,8 @@ static const SearchData CONTRACTIONCANONICAL[] = {
|
|||
|
||||
static const SearchData DIACTRICMATCH[] = {
|
||||
{"\\u0061\\u0061\\u00E1", "\\u0061\\u00E1", NULL, UCOL_SECONDARY, NULL, {1, -1}, {2}},
|
||||
{"\\u0020\\u00C2\\u0303\\u0020\\u0041\\u0061\\u1EAA\\u0041\\u0302\\u0303\\u00C2\\u0303\\u1EAB\\u0061\\u0302\\u0303\\u00E2\\u0303\\uD806\\uDC01\\u0300\\u0020",
|
||||
"\\u00C2\\u0303", "LDE_AN_CX_EX_FX_HX_NX_S1", UCOL_PRIMARY, NULL, {1, 4, 5, 6, 7, 10, 12, 13, 16,-1}, {2, 1, 1, 1, 3, 2, 1, 3, 2}},
|
||||
{NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
|
||||
};
|
||||
|
||||
|
|
|
@ -1542,21 +1542,32 @@ static void TestDiactricMatch(void)
|
|||
UChar pattern[128];
|
||||
UChar text[128];
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UStringSearch *strsrch;
|
||||
uint32_t count = 0;
|
||||
UStringSearch *strsrch = NULL;
|
||||
UCollator *coll = NULL;
|
||||
uint32_t count = 1;
|
||||
UBool collatorCreated = FALSE;
|
||||
|
||||
memset(pattern, 0, 128*sizeof(UChar));
|
||||
memset(text, 0, 128*sizeof(UChar));
|
||||
|
||||
strsrch = usearch_open(pattern, 1, text, 1, uloc_getDefault(), NULL,
|
||||
&status);
|
||||
if (U_FAILURE(status)) {
|
||||
log_err("Error opening string search %s\n", u_errorName(status));
|
||||
}
|
||||
|
||||
ucol_setStrength(usearch_getCollator(strsrch), DIACTRICMATCH[count].strength);
|
||||
|
||||
|
||||
while (DIACTRICMATCH[count].text != NULL) {
|
||||
if (DIACTRICMATCH[count].collator != NULL) {
|
||||
coll = ucol_openFromShortString(DIACTRICMATCH[count].collator, FALSE, NULL, &status);
|
||||
collatorCreated = TRUE;
|
||||
} else {
|
||||
coll = getCollator(DIACTRICMATCH[count].collator);
|
||||
collatorCreated = FALSE;
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
log_err("Error opening string search collator %s\n", u_errorName(status));
|
||||
return;
|
||||
}
|
||||
strsrch = usearch_openFromCollator(pattern, 1, text, 1, coll, DIACTRICMATCH[count].breaker, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
log_err("Error opening string search %s\n", u_errorName(status));
|
||||
return;
|
||||
}
|
||||
u_unescape(DIACTRICMATCH[count].text, text, 128);
|
||||
u_unescape(DIACTRICMATCH[count].pattern, pattern, 128);
|
||||
usearch_setText(strsrch, text, -1, &status);
|
||||
|
@ -1565,8 +1576,11 @@ static void TestDiactricMatch(void)
|
|||
log_err("Error at test number %d\n", count);
|
||||
}
|
||||
count ++;
|
||||
if (collatorCreated) {
|
||||
ucol_close(coll);
|
||||
}
|
||||
usearch_close(strsrch);
|
||||
}
|
||||
usearch_close(strsrch);
|
||||
}
|
||||
|
||||
static void TestCanonical(void)
|
||||
|
|
|
@ -1551,31 +1551,36 @@ void StringSearchTest::TestDiactricMatch()
|
|||
UChar temp[128];
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int count = 0;
|
||||
RuleBasedCollator* coll = NULL;
|
||||
StringSearch *strsrch = NULL;
|
||||
|
||||
UnicodeString pattern("pattern");
|
||||
UnicodeString text("text");
|
||||
StringSearch *strsrch = new StringSearch(pattern, text, Locale::getDefault(), NULL,
|
||||
status);
|
||||
if (U_FAILURE(status)) {
|
||||
errln("Error opening string search %s", u_errorName(status));
|
||||
return;
|
||||
}
|
||||
|
||||
strsrch->getCollator()->setStrength(getECollationStrength(DIACTRICMATCH[count].strength));
|
||||
const SearchData *search;
|
||||
|
||||
while (DIACTRICMATCH[count].text != NULL) {
|
||||
u_unescape(DIACTRICMATCH[count].text, temp, 128);
|
||||
search = &(DIACTRICMATCH[count]);
|
||||
while (search->text != NULL) {
|
||||
coll = getCollator(search->collator);
|
||||
coll->setStrength(getECollationStrength(search->strength));
|
||||
strsrch = new StringSearch(pattern, text, coll, getBreakIterator(search->breaker), status);
|
||||
if (U_FAILURE(status)) {
|
||||
errln("Error opening string search %s", u_errorName(status));
|
||||
return;
|
||||
}
|
||||
u_unescape(search->text, temp, 128);
|
||||
text.setTo(temp, u_strlen(temp));
|
||||
u_unescape(DIACTRICMATCH[count].pattern, temp, 128);
|
||||
u_unescape(search->pattern, temp, 128);
|
||||
pattern.setTo(temp, u_strlen(temp));
|
||||
strsrch->setText(text, status);
|
||||
strsrch->setPattern(pattern, status);
|
||||
if (!assertEqualWithStringSearch(strsrch, &DIACTRICMATCH[count])) {
|
||||
if (!assertEqualWithStringSearch(strsrch, search)) {
|
||||
errln("Error at test number %d", count);
|
||||
}
|
||||
count ++;
|
||||
search = &(DIACTRICMATCH[++count]);
|
||||
delete strsrch;
|
||||
}
|
||||
delete strsrch;
|
||||
|
||||
}
|
||||
|
||||
void StringSearchTest::TestCanonical()
|
||||
|
|
Loading…
Add table
Reference in a new issue