mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-21 04:29:31 +00:00
ICU-2412 check for identical prefixes at the start of ucol_strcollIter, better handling of Thai.
X-SVN-Rev: 11112
This commit is contained in:
parent
96724d239f
commit
72c8bbe382
1 changed files with 88 additions and 36 deletions
|
@ -2408,8 +2408,12 @@ uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, col
|
|||
//source->writableBuffer[1] = *(source->pos - 1);
|
||||
source->writableBuffer[2] = 0;
|
||||
|
||||
source->fcdPosition = source->pos+1; // Indicate where to continue in main input string
|
||||
// after exhausting the writableBuffer
|
||||
if(source->pos) {
|
||||
source->fcdPosition = source->pos+1; // Indicate where to continue in main input string
|
||||
// after exhausting the writableBuffer
|
||||
} else if(source->iterator) {
|
||||
source->iterator->next(source->iterator);
|
||||
}
|
||||
source->pos = source->writableBuffer;
|
||||
source->origFlags = source->flags;
|
||||
source->flags |= UCOL_ITER_INNORMBUF;
|
||||
|
@ -5282,11 +5286,18 @@ ucol_nextSortKeyPart(UCollator *coll,
|
|||
}
|
||||
}
|
||||
}
|
||||
if(s.CEpos - s.toReturn) {
|
||||
if(s.CEpos - s.toReturn || (s.pos && *s.pos != 0)) {
|
||||
// s.pos != NULL means there is a normalization buffer in effect
|
||||
// in iterative case, this means that we are doing Thai (maybe discontiguos)
|
||||
consumedExpansionCEs++;
|
||||
} else {
|
||||
consumedExpansionCEs = 0;
|
||||
}
|
||||
if(s.pos && *s.pos == 0) {
|
||||
// maybe it is the end of Thai - we have to have
|
||||
// an extra skip
|
||||
iterSkips++;
|
||||
}
|
||||
}
|
||||
/* fall through to next level */
|
||||
case UCOL_PSK_SECONDARY:
|
||||
|
@ -5329,11 +5340,14 @@ ucol_nextSortKeyPart(UCollator *coll,
|
|||
dest[i++]=(uint8_t)CE;
|
||||
}
|
||||
}
|
||||
if(s.CEpos - s.toReturn) {
|
||||
if(s.CEpos - s.toReturn || (s.pos && *s.pos != 0)) {
|
||||
consumedExpansionCEs++;
|
||||
} else {
|
||||
consumedExpansionCEs = 0;
|
||||
}
|
||||
if(s.pos && *s.pos == 0) {
|
||||
iterSkips++;
|
||||
}
|
||||
}
|
||||
} else { // French secondary processing
|
||||
uint8_t frenchBuff[UCOL_MAX_BUFFER];
|
||||
|
@ -5395,11 +5409,14 @@ ucol_nextSortKeyPart(UCollator *coll,
|
|||
}
|
||||
}
|
||||
}
|
||||
if(s.CEpos - s.toReturn) {
|
||||
if(s.CEpos - s.toReturn || (s.pos && *s.pos != 0)) {
|
||||
consumedExpansionCEs++;
|
||||
} else {
|
||||
consumedExpansionCEs = 0;
|
||||
}
|
||||
if(s.pos && *s.pos == 0) {
|
||||
iterSkips++;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -5495,11 +5512,14 @@ ucol_nextSortKeyPart(UCollator *coll,
|
|||
}
|
||||
}
|
||||
// Not sure this is correct for the case level - revisit
|
||||
if(s.CEpos - s.toReturn) {
|
||||
if(s.CEpos - s.toReturn || (s.pos && *s.pos != 0)) {
|
||||
consumedExpansionCEs++;
|
||||
} else {
|
||||
consumedExpansionCEs = 0;
|
||||
}
|
||||
if(s.pos && *s.pos == 0) {
|
||||
iterSkips++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
level = UCOL_PSK_TERTIARY;
|
||||
|
@ -5553,11 +5573,14 @@ ucol_nextSortKeyPart(UCollator *coll,
|
|||
dest[i++]=(uint8_t)CE;
|
||||
}
|
||||
}
|
||||
if(s.CEpos - s.toReturn) {
|
||||
if(s.CEpos - s.toReturn || (s.pos && *s.pos != 0)) {
|
||||
consumedExpansionCEs++;
|
||||
} else {
|
||||
consumedExpansionCEs = 0;
|
||||
}
|
||||
if(s.pos && *s.pos == 0) {
|
||||
iterSkips++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// if we're not doing tertiary
|
||||
|
@ -5625,11 +5648,14 @@ ucol_nextSortKeyPart(UCollator *coll,
|
|||
}
|
||||
}
|
||||
}
|
||||
if(s.CEpos - s.toReturn) {
|
||||
if(s.CEpos - s.toReturn || (s.pos && *s.pos != 0)) {
|
||||
consumedExpansionCEs++;
|
||||
} else {
|
||||
consumedExpansionCEs = 0;
|
||||
}
|
||||
if(s.pos && *s.pos == 0) {
|
||||
iterSkips++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// if we're not doing quaternary
|
||||
|
@ -6718,7 +6744,7 @@ UCollationResult ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBoo
|
|||
sBuf = sColl->string;
|
||||
tLen = (tColl->flags & UCOL_ITER_HASLEN) ? tColl->endp - tColl->string : -1;
|
||||
tBuf = tColl->string;
|
||||
// }
|
||||
|
||||
if (normalize) {
|
||||
*status = U_ZERO_ERROR;
|
||||
if (unorm_quickCheck(sBuf, sLen, UNORM_NFD, status) != UNORM_YES) {
|
||||
|
@ -6798,14 +6824,7 @@ UCollationResult ucol_checkIdent(collIterate *sColl, collIterate *tColl, UBoo
|
|||
}
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
if(freeSBuf) {
|
||||
uprv_free(sBuf);
|
||||
}
|
||||
if(freeTBuf) {
|
||||
uprv_free(tBuf);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (comparison < 0) {
|
||||
return UCOL_LESS;
|
||||
} else if (comparison == 0) {
|
||||
|
@ -7784,11 +7803,15 @@ ucol_strcollIter( const UCollator *coll,
|
|||
UCharIterator *sIter,
|
||||
UCharIterator *tIter,
|
||||
UErrorCode *status) {
|
||||
if(!status || U_FAILURE(*status)) {
|
||||
if(!status || U_FAILURE(*status) || sIter == tIter) {
|
||||
return UCOL_EQUAL;
|
||||
}
|
||||
// Preparing the context objects for iterating over strings
|
||||
|
||||
UCollationResult result = UCOL_EQUAL;
|
||||
|
||||
// Preparing the context objects for iterating over strings
|
||||
collIterate sColl, tColl;
|
||||
UNormIterator *sNormIter = NULL, *tNormIter = NULL;
|
||||
|
||||
IInit_collIterate(coll, NULL, -1, &sColl);
|
||||
sColl.iterator = sIter;
|
||||
|
@ -7798,30 +7821,59 @@ ucol_strcollIter( const UCollator *coll,
|
|||
tColl.iterator = tIter;
|
||||
|
||||
if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON) {
|
||||
UNormIterator *sNormIter = unorm_openIter(status);
|
||||
sNormIter = unorm_openIter(status);
|
||||
sColl.iterator = unorm_setIter(sNormIter, sIter, UNORM_FCD, status);
|
||||
sColl.flags &= ~UCOL_ITER_NORM;
|
||||
|
||||
UNormIterator *tNormIter = unorm_openIter(status);
|
||||
tNormIter = unorm_openIter(status);
|
||||
tColl.iterator = unorm_setIter(tNormIter, tIter, UNORM_FCD, status);
|
||||
tColl.flags &= ~UCOL_ITER_NORM;
|
||||
|
||||
UCollationResult normResult;
|
||||
|
||||
if(U_SUCCESS(*status)) {
|
||||
normResult = ucol_strcollRegular(&sColl, &tColl, status);
|
||||
}
|
||||
|
||||
unorm_closeIter(sNormIter);
|
||||
unorm_closeIter(tNormIter);
|
||||
|
||||
return normResult;
|
||||
} else {
|
||||
return ucol_strcollRegular(&sColl, &tColl, status);
|
||||
}
|
||||
|
||||
//*status = U_UNSUPPORTED_ERROR;
|
||||
//return UCOL_EQUAL;
|
||||
UChar32 sChar = U_SENTINEL, tChar = U_SENTINEL;
|
||||
|
||||
while((sChar = sColl.iterator->next(sColl.iterator)) ==
|
||||
(tChar = tColl.iterator->next(tColl.iterator))) {
|
||||
if(sChar == U_SENTINEL) {
|
||||
return UCOL_EQUAL;
|
||||
}
|
||||
}
|
||||
|
||||
if(sChar == U_SENTINEL) {
|
||||
tChar = tColl.iterator->previous(tColl.iterator);
|
||||
}
|
||||
|
||||
if(tChar == U_SENTINEL) {
|
||||
sChar = sColl.iterator->previous(sColl.iterator);
|
||||
}
|
||||
|
||||
sChar = sColl.iterator->previous(sColl.iterator);
|
||||
tChar = tColl.iterator->previous(tColl.iterator);
|
||||
|
||||
if (ucol_unsafeCP((UChar)sChar, coll) || ucol_unsafeCP((UChar)tChar, coll))
|
||||
{
|
||||
// We are stopped in the middle of a contraction.
|
||||
// Scan backwards through the == part of the string looking for the start of the contraction.
|
||||
// It doesn't matter which string we scan, since they are the same in this region.
|
||||
do
|
||||
{
|
||||
sChar = sColl.iterator->previous(sColl.iterator);
|
||||
tChar = tColl.iterator->previous(tColl.iterator);
|
||||
}
|
||||
while (sChar != U_SENTINEL && ucol_unsafeCP((UChar)sChar, coll));
|
||||
}
|
||||
|
||||
|
||||
if(U_SUCCESS(*status)) {
|
||||
result = ucol_strcollRegular(&sColl, &tColl, status);
|
||||
}
|
||||
|
||||
if(sNormIter || tNormIter) {
|
||||
unorm_closeIter(sNormIter);
|
||||
unorm_closeIter(tNormIter);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue