mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-96 correct handling of level separator for quad level, sortkeytostring private function, some tweaks for CE generation, rule parser factored out
X-SVN-Rev: 4189
This commit is contained in:
parent
a6265c42b8
commit
cda9dc782f
6 changed files with 868 additions and 248 deletions
|
@ -1431,7 +1431,7 @@ ucol_calcSortKey(const UCollator *coll,
|
|||
uint8_t *frenchEndPtr = NULL;
|
||||
uint32_t caseShift = 0;
|
||||
|
||||
sortKeySize += ((compareSec?0:1) + (compareTer?0:1) + (doCase?1:0) + (compareQuad?0:1) + (compareIdent?1:0));
|
||||
sortKeySize += ((compareSec?0:1) + (compareTer?0:1) + (doCase?1:0) + (qShifted?1:0)/*(compareQuad?0:1)*/ + (compareIdent?1:0));
|
||||
|
||||
collIterate s;
|
||||
init_collIterate(coll, (UChar *)source, len, &s, FALSE);
|
||||
|
@ -1780,7 +1780,7 @@ ucol_calcSortKey(const UCollator *coll,
|
|||
if(sortKeySize <= resultLength) {
|
||||
uprv_memcpy(primaries, terStart, tersize);
|
||||
primaries += tersize;
|
||||
if(compareQuad == 0) {
|
||||
if(/*compareQuad == 0*/qShifted == TRUE) {
|
||||
if(count4 > 0) {
|
||||
while (count4 >= UCOL_BOT_COUNT4) {
|
||||
*quads++ = (uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4);
|
||||
|
@ -2198,6 +2198,65 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
|
|||
return sortKeySize;
|
||||
}
|
||||
|
||||
/* this function makes a string with representation of a sortkey */
|
||||
U_CAPI char U_EXPORT2 *ucol_sortKeyToString(const UCollator *coll, const uint8_t *sortkey, char *buffer, uint32_t *len) {
|
||||
uint32_t strength = UCOL_PRIMARY;
|
||||
uint32_t res_size = 0;
|
||||
UBool doneCase = FALSE;
|
||||
|
||||
char *current = buffer;
|
||||
const uint8_t *currentSk = sortkey;
|
||||
|
||||
sprintf(current, "[");
|
||||
current++;
|
||||
|
||||
while(strength <= UCOL_QUATERNARY && strength <= coll->strength) {
|
||||
if(strength > UCOL_PRIMARY) {
|
||||
sprintf(current, " . ");
|
||||
current += 3;
|
||||
}
|
||||
while(*currentSk != 0x01 && *currentSk != 0x00) { /* print a level */
|
||||
sprintf(current, "%02X ", *currentSk++);
|
||||
current+=3;
|
||||
}
|
||||
if(coll->caseLevel == UCOL_ON && strength == UCOL_SECONDARY && doneCase == FALSE) {
|
||||
doneCase = TRUE;
|
||||
} else if(coll->caseLevel == UCOL_OFF || doneCase == TRUE || strength != UCOL_SECONDARY) {
|
||||
strength ++;
|
||||
}
|
||||
sprintf(current, "%02X", *(currentSk++)); /* This should print '01' */
|
||||
current +=2;
|
||||
if(strength == UCOL_QUATERNARY && coll->alternateHandling == UCOL_NON_IGNORABLE) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(coll->strength == UCOL_IDENTICAL) {
|
||||
sprintf(current, " . ");
|
||||
current += 3;
|
||||
while(*currentSk != 0) {
|
||||
if(*currentSk == 0x01) {
|
||||
sprintf(current, "%02X", *(currentSk++));
|
||||
current +=2;
|
||||
}
|
||||
|
||||
sprintf(current, "%02X%02X ", *currentSk, *(currentSk+1));
|
||||
current +=5;
|
||||
currentSk+=2;
|
||||
}
|
||||
|
||||
sprintf(current, "%02X", *(currentSk++)); /* This should print '00' */
|
||||
current += 2;
|
||||
|
||||
}
|
||||
sprintf(current, "]");
|
||||
current += 3;
|
||||
|
||||
return buffer;
|
||||
|
||||
|
||||
}
|
||||
|
||||
/* This is a trick string compare function that goes in and uses sortkeys to compare */
|
||||
/* It is used when compare gets in trouble and needs to bail out */
|
||||
UCollationResult ucol_compareUsingSortKeys(const UCollator *coll,
|
||||
|
|
|
@ -309,6 +309,10 @@ U_CFUNC uint32_t ucol_getCEGenerator(ucolCEGenerator *g, uint32_t* lows, uint32_
|
|||
}
|
||||
}
|
||||
|
||||
if(low == 0) {
|
||||
low = 0x01000000;
|
||||
}
|
||||
|
||||
if(strength == UCOL_SECONDARY) { /* similar as simple */
|
||||
if(low >= UCOL_COMMON_BOT2<<24 && low < UCOL_COMMON_TOP2<<24) {
|
||||
low = UCOL_COMMON_TOP2<<24;
|
||||
|
|
|
@ -549,6 +549,7 @@ uint32_t ucol_getIncrementalUCA(UChar ch, incrementalContext *collationSource, U
|
|||
int32_t ucol_getIncrementalSpecialCE(const UCollator *coll, uint32_t CE, incrementalContext *ctx, UErrorCode *status);
|
||||
void ucol_updateInternalState(UCollator *coll);
|
||||
uint32_t ucol_getFirstCE(const UCollator *coll, UChar u, UErrorCode *status);
|
||||
U_CAPI char U_EXPORT2 *ucol_sortKeyToString(const UCollator *coll, const uint8_t *sortkey, char *buffer, uint32_t *len);
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -312,6 +312,254 @@ UBool ucol_uprv_tok_readAndSetOption(UCATableHeader *image, const UChar* start,
|
|||
#define UCOL_TOK_UNSET 0xFFFFFFFF
|
||||
#define UCOL_TOK_RESET 0xDEADBEEF
|
||||
|
||||
const UChar *ucol_tok_parseNextToken(UColTokenParser *src,
|
||||
uint32_t *strength,
|
||||
uint32_t *chOffset, uint32_t *chLen,
|
||||
uint32_t *exOffset, uint32_t *exLen,
|
||||
UBool *varT, UBool *top_,
|
||||
UBool startOfRules,
|
||||
UErrorCode *status) {
|
||||
/* parsing part */
|
||||
|
||||
UBool variableTop = FALSE;
|
||||
UBool top = FALSE;
|
||||
UBool inChars = TRUE;
|
||||
UBool inQuote = FALSE;
|
||||
UBool wasInQuote = FALSE;
|
||||
UChar *optionEnd = NULL;
|
||||
|
||||
uint32_t newCharsLen = 0, newExtensionLen = 0;
|
||||
uint32_t charsOffset = 0, extensionOffset = 0;
|
||||
uint32_t newStrength = UCOL_TOK_UNSET;
|
||||
|
||||
while (src->current < src->end) {
|
||||
UChar ch = *(src->current);
|
||||
|
||||
if (inQuote) {
|
||||
if (ch == 0x0027/*'\''*/) {
|
||||
inQuote = FALSE;
|
||||
} else {
|
||||
if ((newCharsLen == 0) || inChars) {
|
||||
if(newCharsLen == 0) {
|
||||
charsOffset = src->extraCurrent - src->source;
|
||||
}
|
||||
newCharsLen++;
|
||||
} else {
|
||||
if(newExtensionLen == 0) {
|
||||
extensionOffset = src->extraCurrent - src->source;
|
||||
}
|
||||
newExtensionLen++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Sets the strength for this entry */
|
||||
switch (ch) {
|
||||
case 0x003D/*'='*/ :
|
||||
if (newStrength != UCOL_TOK_UNSET) {
|
||||
goto EndOfLoop;
|
||||
}
|
||||
|
||||
/* if we start with strength, we'll reset to top */
|
||||
if(startOfRules == TRUE) {
|
||||
top = TRUE;
|
||||
newStrength = UCOL_TOK_RESET;
|
||||
goto EndOfLoop;
|
||||
}
|
||||
newStrength = UCOL_IDENTICAL;
|
||||
break;
|
||||
|
||||
case 0x002C/*','*/:
|
||||
if (newStrength != UCOL_TOK_UNSET) {
|
||||
goto EndOfLoop;
|
||||
}
|
||||
|
||||
/* if we start with strength, we'll reset to top */
|
||||
if(startOfRules == TRUE) {
|
||||
top = TRUE;
|
||||
newStrength = UCOL_TOK_RESET;
|
||||
goto EndOfLoop;
|
||||
}
|
||||
newStrength = UCOL_TERTIARY;
|
||||
break;
|
||||
|
||||
case 0x003B/*';'*/:
|
||||
if (newStrength != UCOL_TOK_UNSET) {
|
||||
goto EndOfLoop;
|
||||
}
|
||||
|
||||
/* if we start with strength, we'll reset to top */
|
||||
if(startOfRules == TRUE) {
|
||||
top = TRUE;
|
||||
newStrength = UCOL_TOK_RESET;
|
||||
goto EndOfLoop;
|
||||
}
|
||||
newStrength = UCOL_SECONDARY;
|
||||
break;
|
||||
|
||||
case 0x003C/*'<'*/:
|
||||
if (newStrength != UCOL_TOK_UNSET) {
|
||||
goto EndOfLoop;
|
||||
}
|
||||
|
||||
/* if we start with strength, we'll reset to top */
|
||||
if(startOfRules == TRUE) {
|
||||
top = TRUE;
|
||||
newStrength = UCOL_TOK_RESET;
|
||||
goto EndOfLoop;
|
||||
}
|
||||
/* before this, do a scan to verify whether this is */
|
||||
/* another strength */
|
||||
if(*(src->current+1) == 0x003C) {
|
||||
src->current++;
|
||||
if(*(src->current+1) == 0x003C) {
|
||||
src->current++; /* three in a row! */
|
||||
newStrength = UCOL_TERTIARY;
|
||||
} else { /* two in a row */
|
||||
newStrength = UCOL_SECONDARY;
|
||||
}
|
||||
} else { /* just one */
|
||||
newStrength = UCOL_PRIMARY;
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x0026/*'&'*/:
|
||||
if (newStrength != UCOL_TOK_UNSET) {
|
||||
goto EndOfLoop;
|
||||
}
|
||||
|
||||
newStrength = UCOL_TOK_RESET; /* PatternEntry::RESET = 0 */
|
||||
break;
|
||||
|
||||
case 0x005b/*'['*/:
|
||||
/* options - read an option, analyze it */
|
||||
if((optionEnd = u_strchr(src->current, 0x005d /*']'*/)) != NULL) {
|
||||
ucol_uprv_tok_readAndSetOption(src->image, src->current, optionEnd, &variableTop, &top, status);
|
||||
src->current = optionEnd;
|
||||
if(top == TRUE) {
|
||||
if(newStrength == UCOL_TOK_RESET) {
|
||||
src->current++;
|
||||
goto EndOfLoop;
|
||||
} else {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
}
|
||||
}
|
||||
if(U_FAILURE(*status)) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/* Ignore the white spaces */
|
||||
case 0x0009/*'\t'*/:
|
||||
case 0x000C/*'\f'*/:
|
||||
case 0x000D/*'\r'*/:
|
||||
case 0x000A/*'\n'*/:
|
||||
case 0x0020/*' '*/:
|
||||
break; /* skip whitespace TODO use Unicode */
|
||||
|
||||
case 0x002F/*'/'*/:
|
||||
/* This entry has an extension. */
|
||||
inChars = FALSE;
|
||||
break;
|
||||
|
||||
/* found a quote, we're gonna start copying */
|
||||
case 0x0027/*'\''*/:
|
||||
inQuote = TRUE;
|
||||
wasInQuote = TRUE;
|
||||
|
||||
if (newCharsLen == 0) {
|
||||
charsOffset = src->extraCurrent - src->source;
|
||||
newCharsLen++;
|
||||
} else if (inChars) { /* we're reading some chars */
|
||||
charsOffset = src->extraCurrent - src->source;
|
||||
if(newCharsLen != 0) {
|
||||
uprv_memcpy(src->extraCurrent, src->current - newCharsLen, newCharsLen*sizeof(UChar));
|
||||
src->extraCurrent += newCharsLen;
|
||||
}
|
||||
newCharsLen++;
|
||||
} else {
|
||||
if(newExtensionLen != 0) {
|
||||
uprv_memcpy(src->extraCurrent, src->current - newExtensionLen, newExtensionLen*sizeof(UChar));
|
||||
src->extraCurrent += newExtensionLen;
|
||||
}
|
||||
newExtensionLen++;
|
||||
}
|
||||
|
||||
ch = *(++(src->current)); /*pattern[++index]; */
|
||||
break;
|
||||
|
||||
/* '@' is french only if the strength is not currently set */
|
||||
/* if it is, it's just a regular character in collation rules */
|
||||
case 0x0040/*'@'*/:
|
||||
if (newStrength == UCOL_TOK_UNSET) {
|
||||
src->image->frenchCollation = UCOL_ON;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
if (newStrength == UCOL_TOK_UNSET) {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (ucol_tok_isSpecialChar(ch) && (inQuote == FALSE)) {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (inChars) {
|
||||
if(newCharsLen == 0) {
|
||||
charsOffset = src->current - src->source;
|
||||
}
|
||||
newCharsLen++;
|
||||
} else {
|
||||
if(newExtensionLen == 0) {
|
||||
extensionOffset = src->current - src->source;
|
||||
}
|
||||
newExtensionLen++;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(wasInQuote) {
|
||||
if(ch != 0x27 || newCharsLen == 1) {
|
||||
*src->extraCurrent++ = ch;
|
||||
}
|
||||
if(src->extraCurrent == src->extraEnd) {
|
||||
/* reallocate */
|
||||
}
|
||||
}
|
||||
|
||||
src->current++;
|
||||
}
|
||||
|
||||
EndOfLoop:
|
||||
wasInQuote = FALSE;
|
||||
if (newStrength == UCOL_TOK_UNSET) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (newCharsLen == 0 && top == FALSE) {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*strength = newStrength;
|
||||
|
||||
*chOffset = charsOffset;
|
||||
*chLen = newCharsLen;
|
||||
*exOffset = extensionOffset;
|
||||
*exLen = newExtensionLen;
|
||||
*varT = variableTop;
|
||||
*top_ = top;
|
||||
|
||||
return src->current;
|
||||
}
|
||||
|
||||
/*
|
||||
Processing Description
|
||||
1 Build a ListList. Each list has a header, which contains two lists (positive
|
||||
|
@ -323,14 +571,15 @@ Processing Description
|
|||
|
||||
uint32_t ucol_uprv_tok_assembleTokenList(UColTokenParser *src, UErrorCode *status) {
|
||||
UColToken *lastToken = NULL;
|
||||
uint32_t newCharsLen = 0, newExtensionsLen = 0;
|
||||
uint32_t charsOffset = 0, extensionOffset = 0;
|
||||
const UChar *parseEnd = NULL;
|
||||
uint32_t expandNext = 0;
|
||||
UBool variableTop = FALSE;
|
||||
UBool top = FALSE;
|
||||
|
||||
UColTokListHeader *ListList = NULL;
|
||||
|
||||
uint32_t newCharsLen = 0, newExtensionsLen = 0;
|
||||
uint32_t charsOffset = 0, extensionOffset = 0;
|
||||
uint32_t newStrength = UCOL_TOK_UNSET;
|
||||
|
||||
ucol_tok_initTokenList(src, status);
|
||||
|
@ -340,235 +589,16 @@ uint32_t ucol_uprv_tok_assembleTokenList(UColTokenParser *src, UErrorCode *statu
|
|||
src->image->variableTopValue = 0;
|
||||
|
||||
while(src->current < src->end) {
|
||||
{ /* parsing part */
|
||||
|
||||
parseEnd = ucol_tok_parseNextToken(src,
|
||||
&newStrength,
|
||||
&charsOffset, &newCharsLen,
|
||||
&extensionOffset, &newExtensionsLen,
|
||||
&variableTop, &top,
|
||||
(UBool)(lastToken == NULL),
|
||||
status);
|
||||
|
||||
UBool inChars = TRUE;
|
||||
UBool inQuote = FALSE;
|
||||
UBool wasInQuote = FALSE;
|
||||
UChar *optionEnd = NULL;
|
||||
|
||||
newStrength = UCOL_TOK_UNSET;
|
||||
newCharsLen = 0; newExtensionsLen = 0;
|
||||
charsOffset = 0; extensionOffset = 0;
|
||||
|
||||
while (src->current < src->end) {
|
||||
UChar ch = *(src->current);
|
||||
|
||||
if (inQuote) {
|
||||
if (ch == 0x0027/*'\''*/) {
|
||||
inQuote = FALSE;
|
||||
} else {
|
||||
if ((newCharsLen == 0) || inChars) {
|
||||
if(newCharsLen == 0) {
|
||||
charsOffset = src->extraCurrent - src->source;
|
||||
}
|
||||
newCharsLen++;
|
||||
} else {
|
||||
if(newExtensionsLen == 0) {
|
||||
extensionOffset = src->extraCurrent - src->source;
|
||||
}
|
||||
newExtensionsLen++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Sets the strength for this entry */
|
||||
switch (ch) {
|
||||
case 0x003D/*'='*/ :
|
||||
if (newStrength != UCOL_TOK_UNSET) {
|
||||
goto EndOfLoop;
|
||||
}
|
||||
|
||||
/* if we start with strength, we'll reset to top */
|
||||
if(lastToken == NULL) {
|
||||
top = TRUE;
|
||||
newStrength = UCOL_TOK_RESET;
|
||||
goto EndOfLoop;
|
||||
}
|
||||
newStrength = UCOL_IDENTICAL;
|
||||
break;
|
||||
|
||||
case 0x002C/*','*/:
|
||||
if (newStrength != UCOL_TOK_UNSET) {
|
||||
goto EndOfLoop;
|
||||
}
|
||||
|
||||
/* if we start with strength, we'll reset to top */
|
||||
if(lastToken == NULL) {
|
||||
top = TRUE;
|
||||
newStrength = UCOL_TOK_RESET;
|
||||
goto EndOfLoop;
|
||||
}
|
||||
newStrength = UCOL_TERTIARY;
|
||||
break;
|
||||
|
||||
case 0x003B/*';'*/:
|
||||
if (newStrength != UCOL_TOK_UNSET) {
|
||||
goto EndOfLoop;
|
||||
}
|
||||
|
||||
/* if we start with strength, we'll reset to top */
|
||||
if(lastToken == NULL) {
|
||||
top = TRUE;
|
||||
newStrength = UCOL_TOK_RESET;
|
||||
goto EndOfLoop;
|
||||
}
|
||||
newStrength = UCOL_SECONDARY;
|
||||
break;
|
||||
|
||||
case 0x003C/*'<'*/:
|
||||
if (newStrength != UCOL_TOK_UNSET) {
|
||||
goto EndOfLoop;
|
||||
}
|
||||
|
||||
/* if we start with strength, we'll reset to top */
|
||||
if(lastToken == NULL) {
|
||||
top = TRUE;
|
||||
newStrength = UCOL_TOK_RESET;
|
||||
goto EndOfLoop;
|
||||
}
|
||||
/* before this, do a scan to verify whether this is */
|
||||
/* another strength */
|
||||
if(*(src->current+1) == 0x003C) {
|
||||
src->current++;
|
||||
if(*(src->current+1) == 0x003C) {
|
||||
src->current++; /* three in a row! */
|
||||
newStrength = UCOL_TERTIARY;
|
||||
} else { /* two in a row */
|
||||
newStrength = UCOL_SECONDARY;
|
||||
}
|
||||
} else { /* just one */
|
||||
newStrength = UCOL_PRIMARY;
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x0026/*'&'*/:
|
||||
if (newStrength != UCOL_TOK_UNSET) {
|
||||
goto EndOfLoop;
|
||||
}
|
||||
|
||||
newStrength = UCOL_TOK_RESET; /* PatternEntry::RESET = 0 */
|
||||
break;
|
||||
|
||||
case 0x005b/*'['*/:
|
||||
/* options - read an option, analyze it */
|
||||
if((optionEnd = u_strchr(src->current, 0x005d /*']'*/)) != NULL) {
|
||||
ucol_uprv_tok_readAndSetOption(src->image, src->current, optionEnd, &variableTop, &top, status);
|
||||
src->current = optionEnd;
|
||||
if(top == TRUE) {
|
||||
if(newStrength == UCOL_TOK_RESET) {
|
||||
src->current++;
|
||||
goto EndOfLoop;
|
||||
} else {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
}
|
||||
}
|
||||
if(U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/* Ignore the white spaces */
|
||||
case 0x0009/*'\t'*/:
|
||||
case 0x000C/*'\f'*/:
|
||||
case 0x000D/*'\r'*/:
|
||||
case 0x000A/*'\n'*/:
|
||||
case 0x0020/*' '*/:
|
||||
break; /* skip whitespace TODO use Unicode */
|
||||
|
||||
case 0x002F/*'/'*/:
|
||||
/* This entry has an extension. */
|
||||
inChars = FALSE;
|
||||
break;
|
||||
|
||||
/* found a quote, we're gonna start copying */
|
||||
case 0x0027/*'\''*/:
|
||||
inQuote = TRUE;
|
||||
wasInQuote = TRUE;
|
||||
|
||||
if (newCharsLen == 0) {
|
||||
charsOffset = src->extraCurrent - src->source;
|
||||
newCharsLen++;
|
||||
} else if (inChars) { /* we're reading some chars */
|
||||
charsOffset = src->extraCurrent - src->source;
|
||||
if(newCharsLen != 0) {
|
||||
uprv_memcpy(src->extraCurrent, src->current - newCharsLen, newCharsLen*sizeof(UChar));
|
||||
src->extraCurrent += newCharsLen;
|
||||
}
|
||||
newCharsLen++;
|
||||
} else {
|
||||
if(newExtensionsLen != 0) {
|
||||
uprv_memcpy(src->extraCurrent, src->current - newExtensionsLen, newExtensionsLen*sizeof(UChar));
|
||||
src->extraCurrent += newExtensionsLen;
|
||||
}
|
||||
newExtensionsLen++;
|
||||
}
|
||||
|
||||
ch = *(++(src->current)); /*pattern[++index]; */
|
||||
break;
|
||||
|
||||
/* '@' is french only if the strength is not currently set */
|
||||
/* if it is, it's just a regular character in collation rules */
|
||||
case 0x0040/*'@'*/:
|
||||
if (newStrength == UCOL_TOK_UNSET) {
|
||||
src->image->frenchCollation = UCOL_ON;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
if (newStrength == UCOL_TOK_UNSET) {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (ucol_tok_isSpecialChar(ch) && (inQuote == FALSE)) {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (inChars) {
|
||||
if(newCharsLen == 0) {
|
||||
charsOffset = src->current - src->source;
|
||||
}
|
||||
newCharsLen++;
|
||||
} else {
|
||||
if(newExtensionsLen == 0) {
|
||||
extensionOffset = src->current - src->source;
|
||||
}
|
||||
newExtensionsLen++;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(wasInQuote) {
|
||||
if(ch != 0x27) {
|
||||
*src->extraCurrent++ = ch;
|
||||
}
|
||||
if(src->extraCurrent == src->extraEnd) {
|
||||
/* reallocate */
|
||||
}
|
||||
}
|
||||
|
||||
src->current++;
|
||||
}
|
||||
|
||||
EndOfLoop:
|
||||
wasInQuote = FALSE;
|
||||
if (newStrength == UCOL_TOK_UNSET) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (newCharsLen == 0 && top == FALSE) {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
if(U_SUCCESS(*status) && parseEnd != NULL) {
|
||||
UColToken *sourceToken = NULL;
|
||||
UColToken key;
|
||||
|
||||
|
@ -789,7 +819,9 @@ uint32_t ucol_uprv_tok_assembleTokenList(UColTokenParser *src, UErrorCode *statu
|
|||
}
|
||||
/* 7 After all this, set LAST to point to sourceToken, and goto step 3. */
|
||||
lastToken = sourceToken;
|
||||
}
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return src->resultLen;
|
||||
|
|
|
@ -117,6 +117,13 @@ int32_t uhash_hashTokens(const void *k);
|
|||
UBool uhash_compareTokens(const void *key1, const void *key2);
|
||||
void ucol_tok_initTokenList(UColTokenParser *src, UErrorCode *status);
|
||||
uint32_t ucol_uprv_tok_assembleTokenList(UColTokenParser *src, UErrorCode *status);
|
||||
U_CAPI const UChar U_EXPORT2 *ucol_tok_parseNextToken(UColTokenParser *src,
|
||||
uint32_t *strength,
|
||||
uint32_t *chOffset, uint32_t *chLen,
|
||||
uint32_t *exOffset, uint32_t *exLen,
|
||||
UBool *varT, UBool *top_,
|
||||
UBool startOfRules,
|
||||
UErrorCode *status);
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "callcoll.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "string.h"
|
||||
#include "ucol_imp.h"
|
||||
|
||||
static UCollator *myCollation;
|
||||
const static UChar rules[MAX_TOKEN_LEN] =
|
||||
|
@ -435,24 +436,55 @@ static void FunkyATest( )
|
|||
ucol_close(myCollation);
|
||||
}
|
||||
|
||||
UColAttributeValue caseFirst[] = {
|
||||
UCOL_OFF,
|
||||
UCOL_LOWER_FIRST,
|
||||
UCOL_UPPER_FIRST
|
||||
};
|
||||
|
||||
|
||||
UColAttributeValue alternateHandling[] = {
|
||||
UCOL_NON_IGNORABLE,
|
||||
UCOL_NON_IGNORABLE,
|
||||
UCOL_SHIFTED
|
||||
};
|
||||
|
||||
UColAttributeValue caseLevel[] = {
|
||||
UCOL_OFF,
|
||||
UCOL_OFF,
|
||||
UCOL_ON
|
||||
};
|
||||
|
||||
UColAttributeValue strengths[] = {
|
||||
UCOL_PRIMARY,
|
||||
UCOL_PRIMARY,
|
||||
UCOL_SECONDARY,
|
||||
UCOL_TERTIARY,
|
||||
UCOL_QUATERNARY,
|
||||
UCOL_IDENTICAL
|
||||
};
|
||||
|
||||
char * caseFirstC[] = {
|
||||
"UCOL_OFF",
|
||||
"UCOL_LOWER_FIRST",
|
||||
"UCOL_UPPER_FIRST"
|
||||
};
|
||||
|
||||
|
||||
char * alternateHandlingC[] = {
|
||||
"UCOL_NON_IGNORABLE",
|
||||
"UCOL_SHIFTED"
|
||||
};
|
||||
|
||||
char * caseLevelC[] = {
|
||||
"UCOL_OFF",
|
||||
"UCOL_ON"
|
||||
};
|
||||
|
||||
char * strengthsC[] = {
|
||||
"UCOL_PRIMARY",
|
||||
"UCOL_SECONDARY",
|
||||
"UCOL_TERTIARY",
|
||||
"UCOL_QUATERNARY",
|
||||
"UCOL_IDENTICAL"
|
||||
};
|
||||
|
||||
|
||||
static void PrintMarkDavis( )
|
||||
|
@ -461,8 +493,10 @@ static void PrintMarkDavis( )
|
|||
UChar m[256];
|
||||
uint8_t sortkey[256];
|
||||
UCollator *coll = ucol_open(NULL, &status);
|
||||
uint32_t i,j,k,l, sortkeysize;
|
||||
uint32_t h,i,j,k, sortkeysize;
|
||||
uint32_t sizem = 0;
|
||||
char buffer[512];
|
||||
uint32_t len = 512;
|
||||
|
||||
u_uastrcpy(m, "Mark Davis");
|
||||
sizem = u_strlen(m);
|
||||
|
@ -475,20 +509,29 @@ static void PrintMarkDavis( )
|
|||
}
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
|
||||
ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
|
||||
for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
|
||||
ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
|
||||
for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
|
||||
ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
|
||||
sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
|
||||
fprintf(stderr, "aH: %i, case: %i, st: %i\nSortkey: ", alternateHandling[i], caseLevel[j], strengths[k]);
|
||||
for(l = 0; l<sortkeysize; l++) {
|
||||
fprintf(stderr, "%02X", sortkey[l]);
|
||||
for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {
|
||||
ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);
|
||||
fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);
|
||||
|
||||
for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {
|
||||
ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &status);
|
||||
fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]);
|
||||
|
||||
for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {
|
||||
ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);
|
||||
fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]);
|
||||
|
||||
for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {
|
||||
ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);
|
||||
sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);
|
||||
fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]);
|
||||
fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &len));
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -502,3 +545,477 @@ void addMiscCollTest(TestNode** root)
|
|||
/*addTest(root, &PrintMarkDavis, "tscoll/cmsccoll/PrintMarkDavis");*/
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
/* Ram's rule test */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "unicode\ucol.h"
|
||||
#include "unicode\ustdio.h"
|
||||
#include "unicode\ustring.h"
|
||||
#include "ucol_tok.h"
|
||||
#define AMP '&'
|
||||
#define GREAT '<'
|
||||
#define EQUAL '='
|
||||
#define COMA ','
|
||||
#define SEMIC ';'
|
||||
#define BRACKET '['
|
||||
#define ACCENT '@'
|
||||
#define AMP_STR "&"
|
||||
#define GREAT_STR "<"
|
||||
#define EQUAL_STR "="
|
||||
#define COMA_STR ","
|
||||
#define SEMIC_STR ";"
|
||||
#define DG_STR "<<"
|
||||
#define TG_STR "<<<"
|
||||
|
||||
static FILE* file;
|
||||
|
||||
|
||||
int32_t transformUTF16ToUTF8(uint8_t *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength) {
|
||||
int32_t srcIndex, destIndex;
|
||||
UChar32 c;
|
||||
|
||||
for(srcIndex=destIndex=0; srcIndex<srcLength && destIndex<destCapacity;) {
|
||||
/* get code point from UTF-16 */
|
||||
UTF_NEXT_CHAR(src, srcIndex, srcLength, c);
|
||||
/* write code point in UTF-8 */
|
||||
UTF8_APPEND_CHAR_SAFE(dest, destIndex, destCapacity, c);
|
||||
}
|
||||
|
||||
return destIndex; /* return destination length */
|
||||
}
|
||||
void resetBuf(UChar** src,int len){
|
||||
UChar* local = *src;
|
||||
int i=0;
|
||||
while(i<len){
|
||||
*local++ = '\0';
|
||||
i++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
UChar* findDelimiter(UChar* source,int srcLen){
|
||||
UChar* local = source;
|
||||
int i=0;
|
||||
while(i<srcLen){
|
||||
switch(*local){
|
||||
case AMP:
|
||||
case EQUAL :
|
||||
case COMA :
|
||||
case SEMIC :
|
||||
case GREAT :
|
||||
return local;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
local++;
|
||||
i++;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
char *aescstrdup(const UChar* unichars, char* buf,int len){
|
||||
int length;
|
||||
char *newString,*targetLimit,*target;
|
||||
UConverterFromUCallback cb;
|
||||
void *p;
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
UConverter* conv = ucnv_open("US-ASCII",&errorCode);
|
||||
length = u_strlen( unichars);
|
||||
newString = buf;
|
||||
target = newString;
|
||||
targetLimit = newString+len;
|
||||
ucnv_setFromUCallBack(conv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA, &cb, &p, &errorCode);
|
||||
ucnv_fromUnicode(conv,&target,targetLimit, &unichars, (UChar*)(unichars+length),NULL,TRUE,&errorCode);
|
||||
*target = '\0';
|
||||
return newString;
|
||||
}
|
||||
void testPrimary(UCollator* col, const UChar* p,const UChar* q){
|
||||
UChar source[256] = { '\0'};
|
||||
UChar target[256] = { '\0'};
|
||||
UChar temp[2] = {'\0'};
|
||||
unsigned char utfSource[256] = {'\0'};
|
||||
unsigned char utfTarget[256] = {'\0'};
|
||||
UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
|
||||
|
||||
if(result!=UCOL_LESS){
|
||||
aescstrdup(p,utfSource,256);
|
||||
aescstrdup(q,utfTarget,256);
|
||||
fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
|
||||
}
|
||||
source[0] = 0x00E0;
|
||||
u_strcat(source,p);
|
||||
target[0] = 0x0061;
|
||||
u_strcat(target,q);
|
||||
result = ucol_strcoll(col,source,u_strlen(source),target,u_strlen(target));
|
||||
if(result!=UCOL_LESS){
|
||||
aescstrdup(source,utfSource,256);
|
||||
aescstrdup(target,utfTarget,256);
|
||||
fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSource,utfTarget);
|
||||
}
|
||||
}
|
||||
|
||||
void testSecondary(UCollator* col, const UChar* p,const UChar* q){
|
||||
UChar source[256] = { '\0'};
|
||||
UChar target[256] = { '\0'};
|
||||
UChar temp[2] = {'\0'};
|
||||
unsigned char utfSource[256] = {'\0'};
|
||||
unsigned char utfTarget[256] = {'\0'};
|
||||
|
||||
UCollationResult result= ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
|
||||
|
||||
if(result!=UCOL_LESS){
|
||||
aescstrdup(p,utfSource,256);
|
||||
aescstrdup(q,utfTarget,256);
|
||||
fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarget);
|
||||
}
|
||||
source[0] = 0x0041;
|
||||
u_strcat(source,p);
|
||||
target[0]= 0x0061;
|
||||
u_strcat(target,q);
|
||||
result = ucol_strcoll(col,source,u_strlen(source),target,u_strlen(target));
|
||||
if(result!=UCOL_LESS){
|
||||
aescstrdup(source,utfSource,256);
|
||||
aescstrdup(target,utfTarget,256);
|
||||
fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSource,utfTarget);
|
||||
}
|
||||
source[0] = '\0';
|
||||
u_strcat(source,p);
|
||||
u_strcat(source,(UChar*)"b");
|
||||
target[0] = '\0';
|
||||
u_strcat(target,q);
|
||||
u_strcat(target,(UChar*)"a");
|
||||
result = ucol_strcoll(col,source,u_strlen(source),target,u_strlen(target));
|
||||
if(result!=UCOL_GREATER){
|
||||
aescstrdup(source,utfSource,256);
|
||||
aescstrdup(target,utfTarget,256);
|
||||
fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",utfSource,utfTarget);
|
||||
}
|
||||
}
|
||||
|
||||
void testTertiary(UCollator* col, const UChar* p,const UChar* q){
|
||||
UChar source[256] = { '\0'};
|
||||
UChar target[256] = { '\0'};
|
||||
UChar temp[2] = {'\0'};
|
||||
unsigned char utfSource[256] = {'\0'};
|
||||
unsigned char utfTarget[256] = {'\0'};
|
||||
UCollationResult result= ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
|
||||
if(result!=UCOL_LESS){
|
||||
aescstrdup(p,utfSource,256);
|
||||
aescstrdup(q,utfTarget,256);
|
||||
fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget);
|
||||
}
|
||||
|
||||
source[0] = 0x0020;
|
||||
u_strcat(source,p);
|
||||
target[0]= 0x002D;
|
||||
u_strcat(target,q);
|
||||
result = ucol_strcoll(col,source,u_strlen(source),target,u_strlen(target));
|
||||
if(result!=UCOL_LESS){
|
||||
aescstrdup(source,utfSource,256);
|
||||
aescstrdup(target,utfTarget,256);
|
||||
fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSource,utfTarget);
|
||||
}
|
||||
|
||||
source[0] = '\0';
|
||||
u_strcat(source,p);
|
||||
*temp = 0x00E0;
|
||||
u_strcat(source,temp);
|
||||
target[0] = '\0';
|
||||
u_strcat(target,q);
|
||||
u_strcat(target,(UChar*)"a");
|
||||
result = ucol_strcoll(col,source,u_strlen(source),target,u_strlen(target));
|
||||
if(result!=UCOL_GREATER){
|
||||
aescstrdup(source,utfSource,256);
|
||||
aescstrdup(target,utfTarget,256);
|
||||
fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",utfSource,utfTarget);
|
||||
}
|
||||
}
|
||||
void testEquality(UCollator* col, const UChar* p,const UChar* q){
|
||||
UChar source[256] = { '\0'};
|
||||
UChar target[256] = { '\0'};
|
||||
UChar temp[2] = {'\0'};
|
||||
unsigned char utfSource[256] = {'\0'};
|
||||
unsigned char utfTarget[256] = {'\0'};
|
||||
UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));
|
||||
|
||||
if(result!=UCOL_EQUAL){
|
||||
aescstrdup(p,utfSource,256);
|
||||
aescstrdup(q,utfTarget,256);
|
||||
fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget);
|
||||
}
|
||||
}
|
||||
|
||||
void testCollator(UCollator* col, const UChar* p,const UChar* q, UChar* delimiter,int strength){
|
||||
UChar source[256] = { '\0'};
|
||||
UChar target[256] = { '\0'};
|
||||
UChar temp[2] = {'\0'};
|
||||
unsigned char utfSource[256] = {'\0'};
|
||||
unsigned char utfTarget[256] = {'\0'};
|
||||
UCollationResult result=0;
|
||||
switch(strength){
|
||||
case 0:
|
||||
testEquality(col,p,q);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
testPrimary(col,p,q);
|
||||
break;
|
||||
case 2:
|
||||
testSecondary(col,p,q);
|
||||
break;
|
||||
case 3:
|
||||
testTertiary(col,p,q);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
/*ar bg ca cs da el en_BE en_US_POSIX es et fi fr hi hr hu is iw ja ko lt lv mk mt nb nn nn_NO pl ro ru sh sk sl sq sr sv th tr uk vi zh zh_TW*/
|
||||
UChar* consumeDelimiter(UChar** source, int srcLen,int* strength, UChar** delimiter){
|
||||
UChar* local = *source;
|
||||
UBool foundDelimiter = FALSE;
|
||||
int i=0;
|
||||
while(i<srcLen){
|
||||
switch(*local){
|
||||
case AMP:
|
||||
*strength=1;
|
||||
*delimiter = (UChar*)AMP_STR ;
|
||||
if(*(local+1) == BRACKET ||*(local+2) == BRACKET ){
|
||||
local++;
|
||||
continue;
|
||||
}
|
||||
if(*(local-1)!= 0x0027)
|
||||
foundDelimiter = TRUE;
|
||||
break;
|
||||
case BRACKET:
|
||||
{
|
||||
if(*(local-1)!= 0x0027){
|
||||
UChar* limit;
|
||||
limit = findDelimiter(local,srcLen-i);
|
||||
*source=local=limit;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case EQUAL :
|
||||
*strength=0;
|
||||
if(*(local-1)!= 0x0027){
|
||||
*delimiter = (UChar*)EQUAL_STR;
|
||||
foundDelimiter = TRUE;
|
||||
}
|
||||
break;
|
||||
case COMA :
|
||||
*strength = 3;
|
||||
*delimiter =(UChar*)COMA_STR ;
|
||||
foundDelimiter = TRUE;
|
||||
break;
|
||||
case SEMIC :
|
||||
*delimiter = (UChar*)SEMIC_STR;
|
||||
*strength = 2;
|
||||
foundDelimiter = TRUE;
|
||||
break;
|
||||
case GREAT :
|
||||
if(*(local+1)== GREAT){
|
||||
local++;
|
||||
if(*(local+2)==GREAT){
|
||||
*delimiter = (UChar*)DG_STR;
|
||||
*strength = 2;
|
||||
local++;
|
||||
}
|
||||
else{
|
||||
*delimiter = (UChar*)TG_STR;
|
||||
*strength =3;
|
||||
}
|
||||
}
|
||||
else{
|
||||
*delimiter = (UChar*)GREAT_STR ;
|
||||
*strength =1;
|
||||
}
|
||||
if(*(local-1)!= 0x0027)
|
||||
foundDelimiter =TRUE;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if(foundDelimiter){
|
||||
if(local ==*source){
|
||||
*source = ++local;
|
||||
return NULL;
|
||||
}
|
||||
else{
|
||||
return local;
|
||||
}
|
||||
}
|
||||
local++;
|
||||
i++;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
UChar* istrncpy(UChar* dst,const UChar* src,int32_t n){
|
||||
|
||||
UChar *anchor = dst; /* save a pointer to start of dst */
|
||||
|
||||
while( (n-- > 0) ) { /* copy string 2 over */
|
||||
if(*src!=0x0020 && *src!=0 && *src!=0x0027){
|
||||
*(dst++) = *(src);
|
||||
}
|
||||
*src++;
|
||||
}
|
||||
|
||||
return anchor;
|
||||
|
||||
}
|
||||
|
||||
|
||||
void parseAndPrintRules(UCollator* col,const char* loc, const UChar* rules, int length){
|
||||
UChar *local = (UChar*)rules;
|
||||
UChar current[20]={'\0'};
|
||||
UChar previous[20]= {'\0'};
|
||||
UChar *first =current, *second = previous;
|
||||
UChar* delimiter = (UChar*)" ";
|
||||
int i = 0, strength;
|
||||
char fileName[20] = {'\0'};
|
||||
UBool gotBoth = FALSE;
|
||||
|
||||
if(loc){
|
||||
strcpy(fileName,loc);
|
||||
}
|
||||
strcat(fileName,"TestCases.txt");
|
||||
file = fopen(fileName,"wb");
|
||||
if(file){
|
||||
while((local-rules < length) && i<300){
|
||||
UChar* limit =consumeDelimiter(&local,length-i,&strength,&delimiter);
|
||||
if(limit==NULL ){
|
||||
if(u_strcmp(delimiter ,(UChar*) AMP_STR)==0){
|
||||
resetBuf(&first,20);
|
||||
}
|
||||
limit =findDelimiter(local,length-(local-rules));
|
||||
if(limit==NULL){
|
||||
limit= (UChar*)rules+length;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if(limit){
|
||||
if(*first=='\0'){
|
||||
istrncpy(first,local,(int)(limit-local));
|
||||
local=limit;
|
||||
|
||||
}
|
||||
else{
|
||||
if((local-rules) < length){
|
||||
istrncpy(second,local,(int)(limit-local));
|
||||
}
|
||||
local=limit;
|
||||
gotBoth=TRUE;
|
||||
}
|
||||
}
|
||||
if(gotBoth){
|
||||
unsigned char tempFirst[20] = {'\0'};
|
||||
unsigned char tempSecond[20] = {'\0'};
|
||||
aescstrdup(first,tempFirst,20);
|
||||
aescstrdup(second,tempSecond,20);
|
||||
//fprintf(file,"first:%s second: %s delimiter: %s strength:%i \n ",tempFirst,tempSecond,delimiter,strength);
|
||||
|
||||
testCollator(col,first,second,delimiter,strength);
|
||||
|
||||
//fprintf(file,"first:%s second: %s delimiter: %s strength:%i \n ",tempFirst,tempSecond,delimiter,strength);
|
||||
resetBuf(&first,20);
|
||||
u_strcpy(first,second);
|
||||
resetBuf(&second,20);
|
||||
gotBoth=FALSE;
|
||||
}
|
||||
i++;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void parseAndPrintRules2(UCollator* col,const char* loc, const UChar* rules, int length){
|
||||
UChar *local = (UChar*)rules;
|
||||
UChar current[20]={'\0'};
|
||||
UChar previous[20]= {'\0'};
|
||||
UChar *first =current, *second = previous;
|
||||
UChar* delimiter = (UChar*)" ";
|
||||
int i = 0, strength;
|
||||
char fileName[20] = {'\0'};
|
||||
UBool gotBoth = FALSE;
|
||||
|
||||
if(loc){
|
||||
strcpy(fileName,loc);
|
||||
}
|
||||
strcat(fileName,"TestCases.txt");
|
||||
file = fopen(fileName,"wb");
|
||||
if(file){
|
||||
if(limit){
|
||||
if(*first=='\0'){
|
||||
istrncpy(first,local,(int)(limit-local));
|
||||
local=limit;
|
||||
|
||||
}
|
||||
else{
|
||||
if((local-rules) < length){
|
||||
istrncpy(second,local,(int)(limit-local));
|
||||
}
|
||||
local=limit;
|
||||
gotBoth=TRUE;
|
||||
}
|
||||
}
|
||||
if(gotBoth){
|
||||
unsigned char tempFirst[20] = {'\0'};
|
||||
unsigned char tempSecond[20] = {'\0'};
|
||||
aescstrdup(first,tempFirst,20);
|
||||
aescstrdup(second,tempSecond,20);
|
||||
//fprintf(file,"first:%s second: %s delimiter: %s strength:%i \n ",tempFirst,tempSecond,delimiter,strength);
|
||||
|
||||
testCollator(col,first,second,delimiter,strength);
|
||||
|
||||
//fprintf(file,"first:%s second: %s delimiter: %s strength:%i \n ",tempFirst,tempSecond,delimiter,strength);
|
||||
resetBuf(&first,20);
|
||||
u_strcpy(first,second);
|
||||
resetBuf(&second,20);
|
||||
gotBoth=FALSE;
|
||||
}
|
||||
i++;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void processRules(const char* loc){
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UCollator* col = ucol_open(loc,&status);
|
||||
int length=0;
|
||||
const UChar* rules;
|
||||
if(loc){
|
||||
rules = ucol_getRules(col,&length);
|
||||
}
|
||||
ucol_setAttribute(col,UCOL_STRENGTH,UCOL_QUATERNARY,&status);
|
||||
parseAndPrintRules2(col,loc,rules,length);
|
||||
}
|
||||
|
||||
|
||||
extern int
|
||||
main(int argc, const char *argv[]) {
|
||||
if(argc<2) {
|
||||
|
||||
fprintf(stderr,
|
||||
"usage: %s { rpmap/rxmap-filename }+\n",
|
||||
argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
while(--argc>0) {
|
||||
processRules(*++argv);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Add table
Reference in a new issue