mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-96 more changes to sortkey generation and CE getting
X-SVN-Rev: 3389
This commit is contained in:
parent
bcda7c7e67
commit
d2ccd0caf7
3 changed files with 266 additions and 189 deletions
|
@ -1,3 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f941c458eacb7f52929b2f8633249bfc4f803feac4be8fb642265c0cff6baa0a
|
||||
oid sha256:4f351caf9c3fa730c95dfe8cdf703657753af6741c88c5a5f16553b6dcc7ea2a
|
||||
size 86832
|
||||
|
|
|
@ -87,19 +87,22 @@ UCollatorNew* ucol_initCollator(const UCATableHeader *image, UCollatorNew *fillI
|
|||
result->contractionIndex = (UChar*)((uint8_t*)result->image+result->image->contractionIndex);
|
||||
result->expansion = (uint32_t*)((uint8_t*)result->image+result->image->expansion);
|
||||
/* set attributes */
|
||||
result->caseFirstDefault = result->image->caseFirst;
|
||||
result->caseLevelDefault = result->image->caseLevel;
|
||||
result->frenchCollationDefault = result->image->frenchCollation;
|
||||
result->normalizationModeDefault = result->image->normalizationMode;
|
||||
result->strengthDefault = result->image->strength;
|
||||
result->variableTopValueDefault = result->image->variableTopValue;
|
||||
result->caseFirst = result->image->caseFirst;
|
||||
result->caseLevel = result->image->caseLevel;
|
||||
result->frenchCollation = result->image->frenchCollation;
|
||||
result->normalizationMode = result->image->normalizationMode;
|
||||
result->strength = result->image->strength;
|
||||
result->variableTopValue = result->image->variableTopValue;
|
||||
|
||||
result->caseFirst = UCOL_DEFAULT;
|
||||
result->caseLevel = UCOL_DEFAULT;
|
||||
result->frenchCollation = UCOL_DEFAULT;
|
||||
result->normalizationMode = UCOL_DEFAULT;
|
||||
result->strength = UCOL_DEFAULT;
|
||||
result->variableTopValue = UCOL_DEFAULT;
|
||||
result->caseFirstisDefault = TRUE;
|
||||
result->caseLevelisDefault = TRUE;
|
||||
result->frenchCollationisDefault = TRUE;
|
||||
result->normalizationModeisDefault = TRUE;
|
||||
result->strengthisDefault = TRUE;
|
||||
result->variableTopValueisDefault = TRUE;
|
||||
|
||||
uint32_t variableMaxCE = ucmp32_get(result->mapping, result->variableTopValue);
|
||||
result->variableMax = (variableMaxCE & 0xFF000000) >> 24;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -198,17 +201,7 @@ uint32_t ucol_getNextCENew(const UCollatorNew *coll, collIterate *collationSourc
|
|||
*(collationSource->CEpos) = order; /* prepare the buffer */
|
||||
order = getSpecialCENew(coll, collationSource, status); /* and try to get the special CE */
|
||||
if(order == UCOL_NOT_FOUND) { /* We couldn't find a good CE in the tailoring */
|
||||
if(ch < 0xFF) { /* so we'll try to find it in the UCA */
|
||||
order = UCA->latinOneMapping[ch];
|
||||
} else {
|
||||
order = ucmp32_get(UCA->mapping, ch);
|
||||
}
|
||||
}
|
||||
if(order >= UCOL_NOT_FOUND) { /* UCA also gives us a special CE */
|
||||
order = getSpecialCENew(UCA, collationSource, status);
|
||||
}
|
||||
if(order == UCOL_NOT_FOUND) { /* This is where we have to resort to algorithmical generation */
|
||||
/* Make up an artifical CE from code point as per UCA */
|
||||
ucol_getNextUCA(ch, collationSource, status);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -217,6 +210,22 @@ uint32_t ucol_getNextCENew(const UCollatorNew *coll, collIterate *collationSourc
|
|||
return order; /* return the CE */
|
||||
}
|
||||
|
||||
uint32_t ucol_getNextUCA(UChar ch, collIterate *collationSource, UErrorCode *status) {
|
||||
uint32_t order;
|
||||
if(ch < 0xFF) { /* so we'll try to find it in the UCA */
|
||||
order = UCA->latinOneMapping[ch];
|
||||
} else {
|
||||
order = ucmp32_get(UCA->mapping, ch);
|
||||
}
|
||||
if(order >= UCOL_NOT_FOUND) { /* UCA also gives us a special CE */
|
||||
order = getSpecialCENew(UCA, collationSource, status);
|
||||
}
|
||||
if(order == UCOL_NOT_FOUND) { /* This is where we have to resort to algorithmical generation */
|
||||
/* Make up an artifical CE from code point as per UCA */
|
||||
}
|
||||
return order; /* return the CE */
|
||||
}
|
||||
|
||||
uint32_t getSpecialCENew(const UCollatorNew *coll, collIterate *source, UErrorCode *status) {
|
||||
int32_t i = 0; /* general counter */
|
||||
uint32_t CE = *source->CEpos;
|
||||
|
@ -271,24 +280,30 @@ uint32_t getSpecialCENew(const UCollatorNew *coll, collIterate *source, UErrorCo
|
|||
/* This should handle contractions */
|
||||
while(true) {
|
||||
/* First we position ourselves at the begining of contraction sequence */
|
||||
UCharOffset = (UChar *)coll->image+getContractOffset(CE);
|
||||
const UChar *ContractionStart = UCharOffset = (UChar *)coll->image+getContractOffset(CE);
|
||||
|
||||
/* we need to convey the notion of having a backward search - most probably through the context object */
|
||||
/* if (backwardsSearch) offset += contractionUChars[(int16_t)offset]; else ++offset */
|
||||
/* if (backwardsSearch) offset += contractionUChars[(int16_t)offset]; else UCharOffset++; */
|
||||
UCharOffset++; /* skip the backward offset, see above */
|
||||
if (source->pos>=source->len) { /* this is the end of string */
|
||||
CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex)); /* So we'll pick whatever we have at the point... */
|
||||
source->pos--; /* I think, since we'll advance in the getCE */
|
||||
break;
|
||||
}
|
||||
schar = *(source->pos++);
|
||||
schar = *(++source->pos);
|
||||
while(schar > (tchar = *(UCharOffset++))) ; /* since the contraction codepoints should be ordered, we skip all that are smaller */
|
||||
if(schar != tchar) { /* we didn't find the correct codepoint. We can use either the first or the last CE */
|
||||
UCharOffset--; /* We moved one after the 0xFFFF, so we better back up. */
|
||||
if(tchar == 0xFFFF) {
|
||||
UCharOffset--; /* We moved one after the 0xFFFF, so we better back up. We're gonna use the last CE*/
|
||||
} else {
|
||||
UCharOffset = ContractionStart; /* We're not at the end, bailed out in the middle. Better use starting CE */
|
||||
}
|
||||
source->pos--; /* Spit out the last char of the string, wasn't tasty enough */
|
||||
}
|
||||
CE = *(coll->contractionCEs + (UCharOffset - coll->contractionIndex));
|
||||
if(getCETag(CE) != CONTRACTION_TAG) {
|
||||
source->pos--; /* I think, since we'll advance in the getCE */
|
||||
if(!isContraction(CE)) {
|
||||
/* Maybe not */
|
||||
/*source->pos--;*/ /* I think, since we'll advance in the getCE */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -324,69 +339,6 @@ uint32_t getSpecialCENew(const UCollatorNew *coll, collIterate *source, UErrorCo
|
|||
return CE;
|
||||
}
|
||||
|
||||
int32_t ucol_getSortKeySizeNew(const UCollatorNew *coll, collIterate *s, int32_t currentSize, UColAttributeValue strength, int32_t len) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UBool compareSec = (strength >= UCOL_SECONDARY);
|
||||
UBool compareTer = (strength >= UCOL_TERTIARY);
|
||||
UBool compareQuad = (strength >= UCOL_QUATERNARY);
|
||||
UBool compareIdent = (strength == UCOL_IDENTICAL);
|
||||
int32_t order = UCOL_NULLORDER;
|
||||
uint16_t primary = 0;
|
||||
uint8_t secondary = 0;
|
||||
uint8_t tertiary = 0;
|
||||
|
||||
|
||||
for(;;) {
|
||||
order = ucol_getNextCENew(coll, s, &status);
|
||||
if(order == UCOL_NULLORDER) {
|
||||
break;
|
||||
}
|
||||
|
||||
primary = ((order & UCOL_PRIMARYORDERMASK)>> UCOL_PRIMARYORDERSHIFT);
|
||||
secondary = ((order & UCOL_SECONDARYORDERMASK)>> UCOL_SECONDARYORDERSHIFT);
|
||||
tertiary = (order & UCOL_TERTIARYORDERMASK);
|
||||
|
||||
if(primary != UCOL_PRIMIGNORABLE) {
|
||||
currentSize += 2;
|
||||
if(compareSec) {
|
||||
currentSize++;
|
||||
}
|
||||
if(compareTer) {
|
||||
currentSize++;
|
||||
}
|
||||
} else if(secondary != 0) {
|
||||
if(compareSec) {
|
||||
currentSize++;
|
||||
}
|
||||
if(compareTer) {
|
||||
currentSize++;
|
||||
}
|
||||
} else if(tertiary != 0) {
|
||||
if(compareTer) {
|
||||
currentSize++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(compareIdent) {
|
||||
currentSize += len*sizeof(UChar);
|
||||
UChar *ident = s->string;
|
||||
while(ident<s->len) {
|
||||
if((*(ident) >> 8) + utf16fixup[*(ident) >> 11]<0x02) {
|
||||
|
||||
currentSize++;
|
||||
}
|
||||
if((*(ident) & 0xFF)<0x02) {
|
||||
currentSize++;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return currentSize;
|
||||
|
||||
}
|
||||
|
||||
uint8_t *reallocateBuffer(uint8_t **secondaries, uint8_t *secStart, uint8_t *second, int32_t *secSize, UErrorCode *status) {
|
||||
uint8_t *newStart = NULL;
|
||||
|
||||
|
@ -410,10 +362,6 @@ uint8_t *reallocateBuffer(uint8_t **secondaries, uint8_t *secStart, uint8_t *sec
|
|||
}
|
||||
|
||||
|
||||
#define MIN_VALUE 0x02
|
||||
#define UNMARKED 0x03
|
||||
#define UCOL_VARIABLE_MAX 0x20
|
||||
|
||||
void uprv_ucol_reverse_buffer(uint8_t *start, uint8_t *end) {
|
||||
uint8_t temp;
|
||||
while(start<end) {
|
||||
|
@ -423,6 +371,132 @@ void uprv_ucol_reverse_buffer(uint8_t *start, uint8_t *end) {
|
|||
}
|
||||
}
|
||||
|
||||
#define MIN_VALUE 0x02
|
||||
#define UNMARKED 0x03
|
||||
#define UCOL_VARIABLE_MAX 0x20
|
||||
#define UCOL_NEW_IGNORABLE 0
|
||||
|
||||
int32_t ucol_getSortKeySizeNew(const UCollatorNew *coll, collIterate *s, int32_t currentSize, UColAttributeValue strength, int32_t len) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
uint8_t compareSec = (strength >= UCOL_SECONDARY)?0:0xFF;
|
||||
uint8_t compareTer = (strength >= UCOL_TERTIARY)?0:0xFF;
|
||||
uint8_t compareQuad = (strength >= UCOL_QUATERNARY)?0:0xFF;
|
||||
UBool compareIdent = (strength == UCOL_IDENTICAL);
|
||||
UBool doCase = (coll->caseLevel == UCOL_ON);
|
||||
UBool shifted = (coll->alternateHandling == UCOL_SHIFTED);
|
||||
|
||||
uint8_t variableMax = coll->variableMax;
|
||||
|
||||
int32_t order = UCOL_NULLORDER;
|
||||
uint16_t primary = 0;
|
||||
uint8_t primary1 = 0;
|
||||
uint8_t primary2 = 0;
|
||||
uint8_t primary3 = 0;
|
||||
uint32_t ce = 0;
|
||||
uint8_t secondary = 0;
|
||||
uint8_t tertiary = 0;
|
||||
int32_t caseShift = 0;
|
||||
|
||||
|
||||
for(;;) {
|
||||
/*order = ucol_getNextCENew(coll, s, status);*/
|
||||
UCOL_GETNEXTCENEW(order, coll, *s, &status);
|
||||
|
||||
if(order == UCOL_NULLORDER) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* We're saving order in ce, since we will destroy order in order to get primary, secondary, tertiary in order ;)*/
|
||||
ce = order;
|
||||
|
||||
|
||||
tertiary = (order & UCOL_TERTIARYORDERMASK);
|
||||
secondary = (order >>= 8) & 0xFF;
|
||||
primary3 = 0; /* the third primary */
|
||||
primary2 = (order >>= 8) & 0xFF;;
|
||||
primary1 = order >>= 8;
|
||||
|
||||
if((tertiary & 0xF0) == 0xF0) { /* This indicates a long primary (11110000) */
|
||||
/* Note: long primary can appear both as a normal CE or as a continuation CE (not that it matters much) */
|
||||
primary3 = secondary;
|
||||
secondary = (tertiary & 0x0F) + MIN_VALUE;
|
||||
tertiary = UNMARKED;
|
||||
}
|
||||
|
||||
if(shifted && primary1 < variableMax && primary1 != 0) {
|
||||
currentSize++;
|
||||
if(primary2 != 0) {
|
||||
currentSize++;
|
||||
}
|
||||
} else {
|
||||
/* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */
|
||||
/* Usually, we'll have non-zero primary1 & primary2, except in cases of LatinOne and friends, when primary2 will */
|
||||
/* be zero with non zero primary1. primary3 is different than 0 only for long primaries - see above. */
|
||||
if(primary1 != UCOL_NEW_IGNORABLE) {
|
||||
currentSize++;
|
||||
if(primary2 != UCOL_NEW_IGNORABLE) {
|
||||
currentSize++;
|
||||
if(primary3 != UCOL_NEW_IGNORABLE) {
|
||||
currentSize++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(secondary > compareSec) { /* I think that != 0 test should be != IGNORABLE */
|
||||
/* This thing should also contain the compression logic, as in: */
|
||||
/*
|
||||
if (ws == COMMON2 && COMMON2 <= secondary[-1] && secondary[-1] < COMMON_MAX2)
|
||||
++secondary[-1]; // simply increment!!
|
||||
else *secondary++ = ws;
|
||||
*/
|
||||
|
||||
currentSize++;
|
||||
}
|
||||
|
||||
if(doCase) {
|
||||
if (caseShift == 0) {
|
||||
currentSize++;
|
||||
caseShift = 7;
|
||||
}
|
||||
caseShift--;
|
||||
}
|
||||
|
||||
if(tertiary > compareTer) { /* I think that != 0 test should be != IGNORABLE */
|
||||
/* This thing should also contain the compression logic, as in: */
|
||||
/*
|
||||
if (ws == COMMON2 && COMMON2 <= secondary[-1] && secondary[-1] < COMMON_MAX2)
|
||||
++secondary[-1]; // simply increment!!
|
||||
else *secondary++ = ws;
|
||||
*/
|
||||
currentSize++;
|
||||
}
|
||||
|
||||
if(shifted && primary1 > compareQuad) {
|
||||
currentSize++;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if(compareIdent) {
|
||||
currentSize += len*sizeof(UChar);
|
||||
UChar *ident = s->string;
|
||||
while(ident<s->len) {
|
||||
if((*(ident) >> 8) + utf16fixup[*(ident) >> 11]<0x02) {
|
||||
|
||||
currentSize++;
|
||||
}
|
||||
if((*(ident) & 0xFF)<0x02) {
|
||||
currentSize++;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return currentSize;
|
||||
|
||||
}
|
||||
|
||||
int32_t
|
||||
ucol_calcSortKeyNew(const UCollatorNew *coll,
|
||||
const UChar *source,
|
||||
|
@ -459,17 +533,18 @@ ucol_calcSortKeyNew(const UCollatorNew *coll,
|
|||
|
||||
int32_t len = (sourceLength == -1 ? u_strlen(source) : sourceLength);
|
||||
|
||||
uint8_t variableMax = coll->variableMax;
|
||||
|
||||
UColAttributeValue strength = ucol_getAttributeNew(coll, UCOL_STRENGTH, status);
|
||||
UColAttributeValue strength = coll->strength;
|
||||
|
||||
UBool compareSec = (strength >= UCOL_SECONDARY);
|
||||
UBool compareTer = (strength >= UCOL_TERTIARY);
|
||||
UBool compareQuad = (strength >= UCOL_QUATERNARY);
|
||||
uint8_t compareSec = (strength >= UCOL_SECONDARY)?0:0xFF;
|
||||
uint8_t compareTer = (strength >= UCOL_TERTIARY)?0:0xFF;
|
||||
uint8_t compareQuad = (strength >= UCOL_QUATERNARY)?0:0xFF;
|
||||
UBool compareIdent = (strength == UCOL_IDENTICAL);
|
||||
UBool doCase = (ucol_getAttributeNew(coll, UCOL_CASE_LEVEL, status) == UCOL_ON);
|
||||
UBool upperFirst = (ucol_getAttributeNew(coll, UCOL_CASE_FIRST, status) == UCOL_UPPER_FIRST);
|
||||
UBool shifted = (ucol_getAttributeNew(coll, UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED);
|
||||
UBool isFrenchSec = (ucol_getAttributeNew(coll, UCOL_FRENCH_COLLATION, status) == UCOL_ON);
|
||||
UBool doCase = (coll->caseLevel == UCOL_ON);
|
||||
UBool upperFirst = (coll->caseFirst == UCOL_UPPER_FIRST);
|
||||
UBool shifted = (coll->alternateHandling == UCOL_SHIFTED);
|
||||
UBool isFrenchSec = (coll->frenchCollation == UCOL_ON);
|
||||
|
||||
/* support for special features like caselevel and funky secondaries */
|
||||
uint8_t *frenchStartPtr = NULL;
|
||||
|
@ -482,7 +557,7 @@ ucol_calcSortKeyNew(const UCollatorNew *coll,
|
|||
init_collIterate((UChar *)source, len, &s, FALSE);
|
||||
|
||||
// If we need to normalize, we'll do it all at once at the beggining!
|
||||
UColAttributeValue normMode = ucol_getAttributeNew(coll, UCOL_NORMALIZATION_MODE, status);
|
||||
UColAttributeValue normMode = coll->normalizationMode;
|
||||
if(normMode != UCOL_OFF) {
|
||||
normSourceLen = u_normalize(source, sourceLength, UNORM_NFD, 0, normSource, normSourceLen, status);
|
||||
if(U_FAILURE(*status)) {
|
||||
|
@ -513,7 +588,6 @@ ucol_calcSortKeyNew(const UCollatorNew *coll,
|
|||
uint32_t order = 0;
|
||||
uint32_t ce = 0;
|
||||
|
||||
uint16_t primary = 0;
|
||||
uint8_t primary1 = 0;
|
||||
uint8_t primary2 = 0;
|
||||
uint8_t primary3 = 0;
|
||||
|
@ -528,7 +602,8 @@ ucol_calcSortKeyNew(const UCollatorNew *coll,
|
|||
for(;;) {
|
||||
for(i=prevBuffSize; i<minBufferSize; ++i) {
|
||||
|
||||
order = ucol_getNextCENew(coll, &s, status);
|
||||
/*order = ucol_getNextCENew(coll, &s, status);*/
|
||||
UCOL_GETNEXTCENEW(order, coll, s, status);
|
||||
|
||||
if(order == UCOL_NULLORDER) {
|
||||
finished = TRUE;
|
||||
|
@ -538,7 +613,6 @@ ucol_calcSortKeyNew(const UCollatorNew *coll,
|
|||
/* We're saving order in ce, since we will destroy order in order to get primary, secondary, tertiary in order ;)*/
|
||||
ce = order;
|
||||
|
||||
|
||||
tertiary = (order & UCOL_TERTIARYORDERMASK);
|
||||
secondary = (order >>= 8) & 0xFF;
|
||||
primary3 = 0; /* the third primary */
|
||||
|
@ -563,7 +637,7 @@ ucol_calcSortKeyNew(const UCollatorNew *coll,
|
|||
/* we're using too much space and need to reallocate the primary buffer or easily bail */
|
||||
/* out to ucol_getSortKeySizeNew. */
|
||||
|
||||
if(shifted && primary1 < UCOL_VARIABLE_MAX && primary1 > 0) {
|
||||
if(shifted && primary1 < variableMax && primary1 != 0) {
|
||||
/* We are dealing with a variable and we're treating them as shifted */
|
||||
/* This is a shifted ignorable */
|
||||
*quads++ = primary1;
|
||||
|
@ -576,20 +650,20 @@ ucol_calcSortKeyNew(const UCollatorNew *coll,
|
|||
/* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */
|
||||
/* Usually, we'll have non-zero primary1 & primary2, except in cases of LatinOne and friends, when primary2 will */
|
||||
/* be zero with non zero primary1. primary3 is different than 0 only for long primaries - see above. */
|
||||
if(primary1 != 0) {
|
||||
if(primary1 != UCOL_NEW_IGNORABLE) {
|
||||
*primaries++ = primary1; /* scriptOrder[primary1]; */ /* This is the script ordering thingie */
|
||||
sortKeySize++;
|
||||
if(primary2 != UCOL_NEW_IGNORABLE) {
|
||||
*primaries++ = primary2; /* second part */
|
||||
sortKeySize++;
|
||||
if(primary3 != UCOL_NEW_IGNORABLE) {
|
||||
*primaries++ = primary2; /* third part */
|
||||
sortKeySize++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(primary2 != 0) {
|
||||
*primaries++ = primary2; /* second part */
|
||||
sortKeySize++;
|
||||
}
|
||||
if(primary3 != 0) {
|
||||
*primaries++ = primary2; /* third part */
|
||||
sortKeySize++;
|
||||
}
|
||||
|
||||
if(compareSec && secondary != 0) { /* I think that != 0 test should be != IGNORABLE */
|
||||
if(secondary > compareSec) { /* I think that != 0 test should be != IGNORABLE */
|
||||
/* This thing should also contain the compression logic, as in: */
|
||||
/*
|
||||
if (ws == COMMON2 && COMMON2 <= secondary[-1] && secondary[-1] < COMMON_MAX2)
|
||||
|
@ -625,7 +699,7 @@ ucol_calcSortKeyNew(const UCollatorNew *coll,
|
|||
*(cases-1) |= (tertiary & 0x80) >> (8-caseShift--);
|
||||
}
|
||||
|
||||
if(compareTer && tertiary != 0) { /* I think that != 0 test should be != IGNORABLE */
|
||||
if(tertiary > compareTer) { /* I think that != 0 test should be != IGNORABLE */
|
||||
/* This thing should also contain the compression logic, as in: */
|
||||
/*
|
||||
if (ws == COMMON2 && COMMON2 <= secondary[-1] && secondary[-1] < COMMON_MAX2)
|
||||
|
@ -637,46 +711,13 @@ ucol_calcSortKeyNew(const UCollatorNew *coll,
|
|||
sortKeySize++;
|
||||
}
|
||||
|
||||
if(compareQuad && shifted && primary1 > 0) {
|
||||
if(shifted && primary1 > compareQuad) {
|
||||
*quads++ = 0xFF;
|
||||
sortKeySize++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* This is an old peace of code... I'm leaving it here just for discussion regarding */
|
||||
/* ignorables and situations with primary ignorable vs. variable top and ignorables */
|
||||
#if 0
|
||||
/*
|
||||
if(primary != UCOL_PRIMIGNORABLE) {
|
||||
*(primaries++) = (primary>>8);
|
||||
*(primaries++) = (primary&0xFF);
|
||||
sortKeySize += 2;
|
||||
if(compareSec) {
|
||||
*(secondaries++) = secondary;
|
||||
sortKeySize++;
|
||||
}
|
||||
if(compareTer) {
|
||||
*(tertiaries++) = tertiary;
|
||||
sortKeySize++;
|
||||
}
|
||||
} else if(secondary != UCOL_SECIGNORABLE) {
|
||||
if(compareSec) {
|
||||
*(secondaries++) = secondary;
|
||||
sortKeySize++;
|
||||
}
|
||||
if(compareTer) {
|
||||
*(tertiaries++) = tertiary;
|
||||
sortKeySize++;
|
||||
}
|
||||
} else if(tertiary != UCOL_TERIGNORABLE) {
|
||||
if(compareTer) {
|
||||
*(tertiaries++) = tertiary;
|
||||
sortKeySize++;
|
||||
}
|
||||
}
|
||||
*/
|
||||
#endif
|
||||
if(sortKeySize>resultLength) { /* We have stepped over the primary buffer */
|
||||
if(allocatePrimary == FALSE) { /* need to save our butts if we cannot reallocate */
|
||||
resultOverflow = TRUE;
|
||||
|
@ -712,7 +753,7 @@ ucol_calcSortKeyNew(const UCollatorNew *coll,
|
|||
|
||||
if(U_SUCCESS(*status)) {
|
||||
/* we have done all the CE's, now let's put them together to form a key */
|
||||
if(compareSec) {
|
||||
if(compareSec == 0) {
|
||||
*(primaries++) = UCOL_LEVELTERMINATOR;
|
||||
uint32_t secsize = secondaries-secStart;
|
||||
if(isFrenchSec) { /* do the reverse copy */
|
||||
|
@ -737,14 +778,14 @@ ucol_calcSortKeyNew(const UCollatorNew *coll,
|
|||
primaries += casesize;
|
||||
}
|
||||
|
||||
if(compareTer) {
|
||||
if(compareTer == 0) {
|
||||
*(primaries++) = UCOL_LEVELTERMINATOR;
|
||||
uint32_t tersize = tertiaries - terStart;
|
||||
uprv_memcpy(primaries, terStart, tersize);
|
||||
primaries += tersize;
|
||||
}
|
||||
|
||||
if(compareQuad) {
|
||||
if(compareQuad == 0) {
|
||||
*(primaries++) = UCOL_LEVELTERMINATOR;
|
||||
uint32_t quadsize = quads - quadStart;
|
||||
uprv_memcpy(primaries, quadStart, quadsize);
|
||||
|
@ -804,14 +845,8 @@ ucol_calcSortKeyNew(const UCollatorNew *coll,
|
|||
|
||||
if(terStart != tert) {
|
||||
uprv_free(terStart);
|
||||
}
|
||||
if(secStart != second) {
|
||||
uprv_free(secStart);
|
||||
}
|
||||
if(caseStart != caseB) {
|
||||
uprv_free(caseStart);
|
||||
}
|
||||
if(quadStart != quad) {
|
||||
uprv_free(quadStart);
|
||||
}
|
||||
|
||||
|
@ -839,10 +874,13 @@ U_CAPI void ucol_setAttributeNew(UCollatorNew *coll, UColAttribute attr, UColAtt
|
|||
case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*/
|
||||
if(value == UCOL_ON) {
|
||||
coll->frenchCollation = UCOL_ON;
|
||||
coll->frenchCollationisDefault = FALSE;
|
||||
} else if (value == UCOL_OFF) {
|
||||
coll->frenchCollation = UCOL_OFF;
|
||||
coll->frenchCollationisDefault = FALSE;
|
||||
} else if (value == UCOL_DEFAULT) {
|
||||
coll->frenchCollation = UCOL_DEFAULT;
|
||||
coll->frenchCollationisDefault = TRUE;
|
||||
coll->frenchCollation = coll->image->frenchCollation;
|
||||
} else {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR ;
|
||||
}
|
||||
|
@ -850,10 +888,13 @@ U_CAPI void ucol_setAttributeNew(UCollatorNew *coll, UColAttribute attr, UColAtt
|
|||
case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/
|
||||
if(value == UCOL_SHIFTED) {
|
||||
coll->alternateHandling = UCOL_SHIFTED;
|
||||
coll->alternateHandlingisDefault = FALSE;
|
||||
} else if (value == UCOL_NON_IGNORABLE) {
|
||||
coll->alternateHandling = UCOL_NON_IGNORABLE;
|
||||
coll->alternateHandlingisDefault = FALSE;
|
||||
} else if (value == UCOL_DEFAULT) {
|
||||
coll->alternateHandling = UCOL_DEFAULT;
|
||||
coll->alternateHandlingisDefault = TRUE;
|
||||
coll->alternateHandling = coll->image->alternateHandling ;
|
||||
} else {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR ;
|
||||
}
|
||||
|
@ -861,10 +902,13 @@ U_CAPI void ucol_setAttributeNew(UCollatorNew *coll, UColAttribute attr, UColAtt
|
|||
case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */
|
||||
if(value == UCOL_LOWER_FIRST) {
|
||||
coll->caseFirst = UCOL_LOWER_FIRST;
|
||||
coll->caseFirstisDefault = FALSE;
|
||||
} else if (value == UCOL_UPPER_FIRST) {
|
||||
coll->caseFirst = UCOL_UPPER_FIRST;
|
||||
coll->caseFirstisDefault = FALSE;
|
||||
} else if (value == UCOL_DEFAULT) {
|
||||
coll->caseFirst = UCOL_DEFAULT;
|
||||
coll->caseFirst = coll->image->caseFirst;
|
||||
coll->caseFirstisDefault = TRUE;
|
||||
} else {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR ;
|
||||
}
|
||||
|
@ -872,10 +916,13 @@ U_CAPI void ucol_setAttributeNew(UCollatorNew *coll, UColAttribute attr, UColAtt
|
|||
case UCOL_CASE_LEVEL: /* do we have an extra case level */
|
||||
if(value == UCOL_ON) {
|
||||
coll->caseLevel = UCOL_ON;
|
||||
coll->caseLevelisDefault = FALSE;
|
||||
} else if (value == UCOL_OFF) {
|
||||
coll->caseLevel = UCOL_OFF;
|
||||
coll->caseLevelisDefault = FALSE;
|
||||
} else if (value == UCOL_DEFAULT) {
|
||||
coll->caseLevel = UCOL_DEFAULT;
|
||||
coll->caseLevel = coll->image->caseLevel;
|
||||
coll->caseLevelisDefault = TRUE;
|
||||
} else {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR ;
|
||||
}
|
||||
|
@ -883,20 +930,26 @@ U_CAPI void ucol_setAttributeNew(UCollatorNew *coll, UColAttribute attr, UColAtt
|
|||
case UCOL_NORMALIZATION_MODE: /* attribute for normalization */
|
||||
if(value == UCOL_ON) {
|
||||
coll->normalizationMode = UCOL_ON;
|
||||
coll->normalizationModeisDefault = FALSE;
|
||||
} else if (value == UCOL_OFF) {
|
||||
coll->normalizationMode = UCOL_OFF;
|
||||
coll->normalizationModeisDefault = FALSE;
|
||||
} else if (value == UCOL_ON_WITHOUT_HANGUL) {
|
||||
coll->normalizationMode = UCOL_ON_WITHOUT_HANGUL ;
|
||||
coll->normalizationModeisDefault = FALSE;
|
||||
} else if (value == UCOL_DEFAULT) {
|
||||
coll->normalizationMode = UCOL_DEFAULT;
|
||||
coll->normalizationModeisDefault = TRUE;
|
||||
coll->normalizationMode = coll->image->normalizationMode;
|
||||
} else {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR ;
|
||||
}
|
||||
break;
|
||||
case UCOL_STRENGTH: /* attribute for strength */
|
||||
if (value == UCOL_DEFAULT) {
|
||||
coll->strength = UCOL_DEFAULT;
|
||||
coll->strengthisDefault = TRUE;
|
||||
coll->strength = coll->image->strength;
|
||||
} else if (value <= UCOL_IDENTICAL) {
|
||||
coll->strengthisDefault = FALSE;
|
||||
coll->strength = value;
|
||||
} else {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR ;
|
||||
|
@ -912,43 +965,43 @@ U_CAPI void ucol_setAttributeNew(UCollatorNew *coll, UColAttribute attr, UColAtt
|
|||
U_CAPI UColAttributeValue ucol_getAttributeNew(const UCollatorNew *coll, UColAttribute attr, UErrorCode *status) {
|
||||
switch(attr) {
|
||||
case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights*/
|
||||
if(coll->frenchCollation == UCOL_DEFAULT) {
|
||||
return coll->frenchCollationDefault;
|
||||
if(coll->frenchCollationisDefault) {
|
||||
return coll->image->frenchCollation;
|
||||
} else {
|
||||
return coll->frenchCollation;
|
||||
}
|
||||
break;
|
||||
case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/
|
||||
if(coll->alternateHandling == UCOL_DEFAULT) {
|
||||
return coll->alternateHandlingDefault;
|
||||
if(coll->alternateHandlingisDefault) {
|
||||
return coll->image->alternateHandling;
|
||||
} else {
|
||||
return coll->alternateHandling;
|
||||
}
|
||||
break;
|
||||
case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */
|
||||
if(coll->caseFirst == UCOL_DEFAULT) {
|
||||
return coll->caseFirstDefault;
|
||||
if(coll->caseFirstisDefault) {
|
||||
return coll->image->caseFirst;
|
||||
} else {
|
||||
return coll->caseFirst;
|
||||
}
|
||||
break;
|
||||
case UCOL_CASE_LEVEL: /* do we have an extra case level */
|
||||
if(coll->caseLevel == UCOL_DEFAULT) {
|
||||
return coll->caseLevelDefault;
|
||||
if(coll->caseLevelisDefault) {
|
||||
return coll->image->caseLevel;
|
||||
} else {
|
||||
return coll->caseLevel;
|
||||
}
|
||||
break;
|
||||
case UCOL_NORMALIZATION_MODE: /* attribute for normalization */
|
||||
if(coll->normalizationMode == UCOL_DEFAULT) {
|
||||
return coll->normalizationModeDefault;
|
||||
if(coll->normalizationModeisDefault) {
|
||||
return coll->image->normalizationMode;
|
||||
} else {
|
||||
return coll->normalizationMode;
|
||||
}
|
||||
break;
|
||||
case UCOL_STRENGTH: /* attribute for strength */
|
||||
if(coll->strength == UCOL_DEFAULT) {
|
||||
return coll->strengthDefault;
|
||||
if(coll->strengthisDefault) {
|
||||
return coll->image->strength;
|
||||
} else {
|
||||
return coll->strength;
|
||||
}
|
||||
|
|
|
@ -191,6 +191,29 @@ static uint8_t utf16fixup[32] = {
|
|||
}
|
||||
|
||||
|
||||
#define UCOL_GETNEXTCENEW(order, coll, collationSource, status) { \
|
||||
if (U_FAILURE(*(status)) || ((collationSource).pos>=(collationSource).len \
|
||||
&& (collationSource).CEpos <= (collationSource).toReturn)) { \
|
||||
(order) = UCOL_NULLORDER; \
|
||||
} else if ((collationSource).CEpos > (collationSource).toReturn) { \
|
||||
(order) = *((collationSource).toReturn++); \
|
||||
} else { \
|
||||
UChar ch = *(collationSource).pos; \
|
||||
(collationSource).CEpos = (collationSource).toReturn = (collationSource).CEs; \
|
||||
if(ch < 0xFF) { (order) = (coll)->latinOneMapping[ch]; } \
|
||||
else { (order) = ucmp32_get((coll)->mapping, ch); } \
|
||||
if((order) >= UCOL_NOT_FOUND) { \
|
||||
*((collationSource).CEpos) = (order); \
|
||||
(order) = getSpecialCENew((coll), &(collationSource), (status)); \
|
||||
if((order) == UCOL_NOT_FOUND) { \
|
||||
(order) = ucol_getNextUCA(ch, &(collationSource), (status)); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
(collationSource).pos++; \
|
||||
}
|
||||
|
||||
uint32_t ucol_getNextUCA(UChar ch, collIterate *collationSource, UErrorCode *status);
|
||||
int32_t getComplicatedCE(const UCollator *coll, collIterate *source, UErrorCode *status);
|
||||
void incctx_cleanUpContext(incrementalContext *ctx);
|
||||
UChar incctx_appendChar(incrementalContext *ctx, UChar c);
|
||||
|
@ -287,6 +310,7 @@ struct UCollatorNew {
|
|||
const uint32_t *expansion;
|
||||
const UChar *contractionIndex;
|
||||
const uint32_t *contractionCEs;
|
||||
uint8_t variableMax;
|
||||
UChar variableTopValue;
|
||||
UColAttributeValue frenchCollation;
|
||||
UColAttributeValue alternateHandling; /* attribute for handling variable elements*/
|
||||
|
@ -294,13 +318,13 @@ struct UCollatorNew {
|
|||
UColAttributeValue caseLevel; /* do we have an extra case level */
|
||||
UColAttributeValue normalizationMode; /* attribute for normalization */
|
||||
UColAttributeValue strength; /* attribute for strength */
|
||||
UChar variableTopValueDefault;
|
||||
UColAttributeValue frenchCollationDefault;
|
||||
UColAttributeValue alternateHandlingDefault; /* attribute for handling variable elements*/
|
||||
UColAttributeValue caseFirstDefault; /* who goes first, lower case or uppercase */
|
||||
UColAttributeValue caseLevelDefault; /* do we have an extra case level */
|
||||
UColAttributeValue normalizationModeDefault; /* attribute for normalization */
|
||||
UColAttributeValue strengthDefault; /* attribute for strength */
|
||||
UBool variableTopValueisDefault;
|
||||
UBool frenchCollationisDefault;
|
||||
UBool alternateHandlingisDefault; /* attribute for handling variable elements*/
|
||||
UBool caseFirstisDefault; /* who goes first, lower case or uppercase */
|
||||
UBool caseLevelisDefault; /* do we have an extra case level */
|
||||
UBool normalizationModeisDefault; /* attribute for normalization */
|
||||
UBool strengthisDefault; /* attribute for strength */
|
||||
};
|
||||
|
||||
uint32_t getSpecialCENew(const UCollatorNew *coll, collIterate *source, UErrorCode *status);
|
||||
|
|
Loading…
Add table
Reference in a new issue