From cda9dc782f9d7f0bb3d4ba5d0693554f5d95df4e Mon Sep 17 00:00:00 2001 From: Vladimir Weinstein Date: Tue, 20 Mar 2001 00:56:37 +0000 Subject: [PATCH] ICU-96 correct handling of level separator for quad level, sortkeytostring private function, some tweaks for CE generation, rule parser factored out X-SVN-Rev: 4189 --- icu4c/source/i18n/ucol.cpp | 63 ++- icu4c/source/i18n/ucol_bld.cpp | 4 + icu4c/source/i18n/ucol_imp.h | 1 + icu4c/source/i18n/ucol_tok.c | 494 ++++++++++++----------- icu4c/source/i18n/ucol_tok.h | 7 + icu4c/source/test/cintltst/cmsccoll.c | 547 +++++++++++++++++++++++++- 6 files changed, 868 insertions(+), 248 deletions(-) diff --git a/icu4c/source/i18n/ucol.cpp b/icu4c/source/i18n/ucol.cpp index bc78687cf0e..5bcc9fe5a89 100644 --- a/icu4c/source/i18n/ucol.cpp +++ b/icu4c/source/i18n/ucol.cpp @@ -1431,7 +1431,7 @@ ucol_calcSortKey(const UCollator *coll, uint8_t *frenchEndPtr = NULL; uint32_t caseShift = 0; - sortKeySize += ((compareSec?0:1) + (compareTer?0:1) + (doCase?1:0) + (compareQuad?0:1) + (compareIdent?1:0)); + sortKeySize += ((compareSec?0:1) + (compareTer?0:1) + (doCase?1:0) + (qShifted?1:0)/*(compareQuad?0:1)*/ + (compareIdent?1:0)); collIterate s; init_collIterate(coll, (UChar *)source, len, &s, FALSE); @@ -1780,7 +1780,7 @@ ucol_calcSortKey(const UCollator *coll, if(sortKeySize <= resultLength) { uprv_memcpy(primaries, terStart, tersize); primaries += tersize; - if(compareQuad == 0) { + if(/*compareQuad == 0*/qShifted == TRUE) { if(count4 > 0) { while (count4 >= UCOL_BOT_COUNT4) { *quads++ = (uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4); @@ -2198,6 +2198,65 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll, return sortKeySize; } +/* this function makes a string with representation of a sortkey */ +U_CAPI char U_EXPORT2 *ucol_sortKeyToString(const UCollator *coll, const uint8_t *sortkey, char *buffer, uint32_t *len) { + uint32_t strength = UCOL_PRIMARY; + uint32_t res_size = 0; + UBool doneCase = FALSE; + + char *current = buffer; + const uint8_t *currentSk = sortkey; + + sprintf(current, "["); + current++; + + while(strength <= UCOL_QUATERNARY && strength <= coll->strength) { + if(strength > UCOL_PRIMARY) { + sprintf(current, " . "); + current += 3; + } + while(*currentSk != 0x01 && *currentSk != 0x00) { /* print a level */ + sprintf(current, "%02X ", *currentSk++); + current+=3; + } + if(coll->caseLevel == UCOL_ON && strength == UCOL_SECONDARY && doneCase == FALSE) { + doneCase = TRUE; + } else if(coll->caseLevel == UCOL_OFF || doneCase == TRUE || strength != UCOL_SECONDARY) { + strength ++; + } + sprintf(current, "%02X", *(currentSk++)); /* This should print '01' */ + current +=2; + if(strength == UCOL_QUATERNARY && coll->alternateHandling == UCOL_NON_IGNORABLE) { + break; + } + } + + if(coll->strength == UCOL_IDENTICAL) { + sprintf(current, " . "); + current += 3; + while(*currentSk != 0) { + if(*currentSk == 0x01) { + sprintf(current, "%02X", *(currentSk++)); + current +=2; + } + + sprintf(current, "%02X%02X ", *currentSk, *(currentSk+1)); + current +=5; + currentSk+=2; + } + + sprintf(current, "%02X", *(currentSk++)); /* This should print '00' */ + current += 2; + + } + sprintf(current, "]"); + current += 3; + + return buffer; + + +} + /* This is a trick string compare function that goes in and uses sortkeys to compare */ /* It is used when compare gets in trouble and needs to bail out */ UCollationResult ucol_compareUsingSortKeys(const UCollator *coll, diff --git a/icu4c/source/i18n/ucol_bld.cpp b/icu4c/source/i18n/ucol_bld.cpp index 51b25ab769d..ed270e6462c 100644 --- a/icu4c/source/i18n/ucol_bld.cpp +++ b/icu4c/source/i18n/ucol_bld.cpp @@ -309,6 +309,10 @@ U_CFUNC uint32_t ucol_getCEGenerator(ucolCEGenerator *g, uint32_t* lows, uint32_ } } + if(low == 0) { + low = 0x01000000; + } + if(strength == UCOL_SECONDARY) { /* similar as simple */ if(low >= UCOL_COMMON_BOT2<<24 && low < UCOL_COMMON_TOP2<<24) { low = UCOL_COMMON_TOP2<<24; diff --git a/icu4c/source/i18n/ucol_imp.h b/icu4c/source/i18n/ucol_imp.h index 19a6b15aa66..9795ffe1e15 100644 --- a/icu4c/source/i18n/ucol_imp.h +++ b/icu4c/source/i18n/ucol_imp.h @@ -549,6 +549,7 @@ uint32_t ucol_getIncrementalUCA(UChar ch, incrementalContext *collationSource, U int32_t ucol_getIncrementalSpecialCE(const UCollator *coll, uint32_t CE, incrementalContext *ctx, UErrorCode *status); void ucol_updateInternalState(UCollator *coll); uint32_t ucol_getFirstCE(const UCollator *coll, UChar u, UErrorCode *status); +U_CAPI char U_EXPORT2 *ucol_sortKeyToString(const UCollator *coll, const uint8_t *sortkey, char *buffer, uint32_t *len); #endif diff --git a/icu4c/source/i18n/ucol_tok.c b/icu4c/source/i18n/ucol_tok.c index 2407e89f611..8656f46e308 100644 --- a/icu4c/source/i18n/ucol_tok.c +++ b/icu4c/source/i18n/ucol_tok.c @@ -312,6 +312,254 @@ UBool ucol_uprv_tok_readAndSetOption(UCATableHeader *image, const UChar* start, #define UCOL_TOK_UNSET 0xFFFFFFFF #define UCOL_TOK_RESET 0xDEADBEEF +const UChar *ucol_tok_parseNextToken(UColTokenParser *src, + uint32_t *strength, + uint32_t *chOffset, uint32_t *chLen, + uint32_t *exOffset, uint32_t *exLen, + UBool *varT, UBool *top_, + UBool startOfRules, + UErrorCode *status) { +/* parsing part */ + + UBool variableTop = FALSE; + UBool top = FALSE; + UBool inChars = TRUE; + UBool inQuote = FALSE; + UBool wasInQuote = FALSE; + UChar *optionEnd = NULL; + + uint32_t newCharsLen = 0, newExtensionLen = 0; + uint32_t charsOffset = 0, extensionOffset = 0; + uint32_t newStrength = UCOL_TOK_UNSET; + + while (src->current < src->end) { + UChar ch = *(src->current); + + if (inQuote) { + if (ch == 0x0027/*'\''*/) { + inQuote = FALSE; + } else { + if ((newCharsLen == 0) || inChars) { + if(newCharsLen == 0) { + charsOffset = src->extraCurrent - src->source; + } + newCharsLen++; + } else { + if(newExtensionLen == 0) { + extensionOffset = src->extraCurrent - src->source; + } + newExtensionLen++; + } + } + } else { + /* Sets the strength for this entry */ + switch (ch) { + case 0x003D/*'='*/ : + if (newStrength != UCOL_TOK_UNSET) { + goto EndOfLoop; + } + + /* if we start with strength, we'll reset to top */ + if(startOfRules == TRUE) { + top = TRUE; + newStrength = UCOL_TOK_RESET; + goto EndOfLoop; + } + newStrength = UCOL_IDENTICAL; + break; + + case 0x002C/*','*/: + if (newStrength != UCOL_TOK_UNSET) { + goto EndOfLoop; + } + + /* if we start with strength, we'll reset to top */ + if(startOfRules == TRUE) { + top = TRUE; + newStrength = UCOL_TOK_RESET; + goto EndOfLoop; + } + newStrength = UCOL_TERTIARY; + break; + + case 0x003B/*';'*/: + if (newStrength != UCOL_TOK_UNSET) { + goto EndOfLoop; + } + + /* if we start with strength, we'll reset to top */ + if(startOfRules == TRUE) { + top = TRUE; + newStrength = UCOL_TOK_RESET; + goto EndOfLoop; + } + newStrength = UCOL_SECONDARY; + break; + + case 0x003C/*'<'*/: + if (newStrength != UCOL_TOK_UNSET) { + goto EndOfLoop; + } + + /* if we start with strength, we'll reset to top */ + if(startOfRules == TRUE) { + top = TRUE; + newStrength = UCOL_TOK_RESET; + goto EndOfLoop; + } + /* before this, do a scan to verify whether this is */ + /* another strength */ + if(*(src->current+1) == 0x003C) { + src->current++; + if(*(src->current+1) == 0x003C) { + src->current++; /* three in a row! */ + newStrength = UCOL_TERTIARY; + } else { /* two in a row */ + newStrength = UCOL_SECONDARY; + } + } else { /* just one */ + newStrength = UCOL_PRIMARY; + } + break; + + case 0x0026/*'&'*/: + if (newStrength != UCOL_TOK_UNSET) { + goto EndOfLoop; + } + + newStrength = UCOL_TOK_RESET; /* PatternEntry::RESET = 0 */ + break; + + case 0x005b/*'['*/: + /* options - read an option, analyze it */ + if((optionEnd = u_strchr(src->current, 0x005d /*']'*/)) != NULL) { + ucol_uprv_tok_readAndSetOption(src->image, src->current, optionEnd, &variableTop, &top, status); + src->current = optionEnd; + if(top == TRUE) { + if(newStrength == UCOL_TOK_RESET) { + src->current++; + goto EndOfLoop; + } else { + *status = U_INVALID_FORMAT_ERROR; + } + } + if(U_FAILURE(*status)) { + return NULL; + } + } + break; + + /* Ignore the white spaces */ + case 0x0009/*'\t'*/: + case 0x000C/*'\f'*/: + case 0x000D/*'\r'*/: + case 0x000A/*'\n'*/: + case 0x0020/*' '*/: + break; /* skip whitespace TODO use Unicode */ + + case 0x002F/*'/'*/: + /* This entry has an extension. */ + inChars = FALSE; + break; + + /* found a quote, we're gonna start copying */ + case 0x0027/*'\''*/: + inQuote = TRUE; + wasInQuote = TRUE; + + if (newCharsLen == 0) { + charsOffset = src->extraCurrent - src->source; + newCharsLen++; + } else if (inChars) { /* we're reading some chars */ + charsOffset = src->extraCurrent - src->source; + if(newCharsLen != 0) { + uprv_memcpy(src->extraCurrent, src->current - newCharsLen, newCharsLen*sizeof(UChar)); + src->extraCurrent += newCharsLen; + } + newCharsLen++; + } else { + if(newExtensionLen != 0) { + uprv_memcpy(src->extraCurrent, src->current - newExtensionLen, newExtensionLen*sizeof(UChar)); + src->extraCurrent += newExtensionLen; + } + newExtensionLen++; + } + + ch = *(++(src->current)); /*pattern[++index]; */ + break; + + /* '@' is french only if the strength is not currently set */ + /* if it is, it's just a regular character in collation rules */ + case 0x0040/*'@'*/: + if (newStrength == UCOL_TOK_UNSET) { + src->image->frenchCollation = UCOL_ON; + break; + } + + default: + if (newStrength == UCOL_TOK_UNSET) { + *status = U_INVALID_FORMAT_ERROR; + return NULL; + } + + if (ucol_tok_isSpecialChar(ch) && (inQuote == FALSE)) { + *status = U_INVALID_FORMAT_ERROR; + return NULL; + } + + + + if (inChars) { + if(newCharsLen == 0) { + charsOffset = src->current - src->source; + } + newCharsLen++; + } else { + if(newExtensionLen == 0) { + extensionOffset = src->current - src->source; + } + newExtensionLen++; + } + + break; + } + } + + if(wasInQuote) { + if(ch != 0x27 || newCharsLen == 1) { + *src->extraCurrent++ = ch; + } + if(src->extraCurrent == src->extraEnd) { + /* reallocate */ + } + } + + src->current++; + } + + EndOfLoop: + wasInQuote = FALSE; + if (newStrength == UCOL_TOK_UNSET) { + return NULL; + } + + if (newCharsLen == 0 && top == FALSE) { + *status = U_INVALID_FORMAT_ERROR; + return NULL; + } + + *strength = newStrength; + + *chOffset = charsOffset; + *chLen = newCharsLen; + *exOffset = extensionOffset; + *exLen = newExtensionLen; + *varT = variableTop; + *top_ = top; + + return src->current; +} + /* Processing Description 1 Build a ListList. Each list has a header, which contains two lists (positive @@ -323,14 +571,15 @@ Processing Description uint32_t ucol_uprv_tok_assembleTokenList(UColTokenParser *src, UErrorCode *status) { UColToken *lastToken = NULL; - uint32_t newCharsLen = 0, newExtensionsLen = 0; - uint32_t charsOffset = 0, extensionOffset = 0; + const UChar *parseEnd = NULL; uint32_t expandNext = 0; UBool variableTop = FALSE; UBool top = FALSE; UColTokListHeader *ListList = NULL; + uint32_t newCharsLen = 0, newExtensionsLen = 0; + uint32_t charsOffset = 0, extensionOffset = 0; uint32_t newStrength = UCOL_TOK_UNSET; ucol_tok_initTokenList(src, status); @@ -340,235 +589,16 @@ uint32_t ucol_uprv_tok_assembleTokenList(UColTokenParser *src, UErrorCode *statu src->image->variableTopValue = 0; while(src->current < src->end) { - { /* parsing part */ + + parseEnd = ucol_tok_parseNextToken(src, + &newStrength, + &charsOffset, &newCharsLen, + &extensionOffset, &newExtensionsLen, + &variableTop, &top, + (UBool)(lastToken == NULL), + status); - UBool inChars = TRUE; - UBool inQuote = FALSE; - UBool wasInQuote = FALSE; - UChar *optionEnd = NULL; - - newStrength = UCOL_TOK_UNSET; - newCharsLen = 0; newExtensionsLen = 0; - charsOffset = 0; extensionOffset = 0; - - while (src->current < src->end) { - UChar ch = *(src->current); - - if (inQuote) { - if (ch == 0x0027/*'\''*/) { - inQuote = FALSE; - } else { - if ((newCharsLen == 0) || inChars) { - if(newCharsLen == 0) { - charsOffset = src->extraCurrent - src->source; - } - newCharsLen++; - } else { - if(newExtensionsLen == 0) { - extensionOffset = src->extraCurrent - src->source; - } - newExtensionsLen++; - } - } - } else { - /* Sets the strength for this entry */ - switch (ch) { - case 0x003D/*'='*/ : - if (newStrength != UCOL_TOK_UNSET) { - goto EndOfLoop; - } - - /* if we start with strength, we'll reset to top */ - if(lastToken == NULL) { - top = TRUE; - newStrength = UCOL_TOK_RESET; - goto EndOfLoop; - } - newStrength = UCOL_IDENTICAL; - break; - - case 0x002C/*','*/: - if (newStrength != UCOL_TOK_UNSET) { - goto EndOfLoop; - } - - /* if we start with strength, we'll reset to top */ - if(lastToken == NULL) { - top = TRUE; - newStrength = UCOL_TOK_RESET; - goto EndOfLoop; - } - newStrength = UCOL_TERTIARY; - break; - - case 0x003B/*';'*/: - if (newStrength != UCOL_TOK_UNSET) { - goto EndOfLoop; - } - - /* if we start with strength, we'll reset to top */ - if(lastToken == NULL) { - top = TRUE; - newStrength = UCOL_TOK_RESET; - goto EndOfLoop; - } - newStrength = UCOL_SECONDARY; - break; - - case 0x003C/*'<'*/: - if (newStrength != UCOL_TOK_UNSET) { - goto EndOfLoop; - } - - /* if we start with strength, we'll reset to top */ - if(lastToken == NULL) { - top = TRUE; - newStrength = UCOL_TOK_RESET; - goto EndOfLoop; - } - /* before this, do a scan to verify whether this is */ - /* another strength */ - if(*(src->current+1) == 0x003C) { - src->current++; - if(*(src->current+1) == 0x003C) { - src->current++; /* three in a row! */ - newStrength = UCOL_TERTIARY; - } else { /* two in a row */ - newStrength = UCOL_SECONDARY; - } - } else { /* just one */ - newStrength = UCOL_PRIMARY; - } - break; - - case 0x0026/*'&'*/: - if (newStrength != UCOL_TOK_UNSET) { - goto EndOfLoop; - } - - newStrength = UCOL_TOK_RESET; /* PatternEntry::RESET = 0 */ - break; - - case 0x005b/*'['*/: - /* options - read an option, analyze it */ - if((optionEnd = u_strchr(src->current, 0x005d /*']'*/)) != NULL) { - ucol_uprv_tok_readAndSetOption(src->image, src->current, optionEnd, &variableTop, &top, status); - src->current = optionEnd; - if(top == TRUE) { - if(newStrength == UCOL_TOK_RESET) { - src->current++; - goto EndOfLoop; - } else { - *status = U_INVALID_FORMAT_ERROR; - } - } - if(U_FAILURE(*status)) { - return 0; - } - } - break; - - /* Ignore the white spaces */ - case 0x0009/*'\t'*/: - case 0x000C/*'\f'*/: - case 0x000D/*'\r'*/: - case 0x000A/*'\n'*/: - case 0x0020/*' '*/: - break; /* skip whitespace TODO use Unicode */ - - case 0x002F/*'/'*/: - /* This entry has an extension. */ - inChars = FALSE; - break; - - /* found a quote, we're gonna start copying */ - case 0x0027/*'\''*/: - inQuote = TRUE; - wasInQuote = TRUE; - - if (newCharsLen == 0) { - charsOffset = src->extraCurrent - src->source; - newCharsLen++; - } else if (inChars) { /* we're reading some chars */ - charsOffset = src->extraCurrent - src->source; - if(newCharsLen != 0) { - uprv_memcpy(src->extraCurrent, src->current - newCharsLen, newCharsLen*sizeof(UChar)); - src->extraCurrent += newCharsLen; - } - newCharsLen++; - } else { - if(newExtensionsLen != 0) { - uprv_memcpy(src->extraCurrent, src->current - newExtensionsLen, newExtensionsLen*sizeof(UChar)); - src->extraCurrent += newExtensionsLen; - } - newExtensionsLen++; - } - - ch = *(++(src->current)); /*pattern[++index]; */ - break; - - /* '@' is french only if the strength is not currently set */ - /* if it is, it's just a regular character in collation rules */ - case 0x0040/*'@'*/: - if (newStrength == UCOL_TOK_UNSET) { - src->image->frenchCollation = UCOL_ON; - break; - } - - default: - if (newStrength == UCOL_TOK_UNSET) { - *status = U_INVALID_FORMAT_ERROR; - return 0; - } - - if (ucol_tok_isSpecialChar(ch) && (inQuote == FALSE)) { - *status = U_INVALID_FORMAT_ERROR; - return 0; - } - - - - if (inChars) { - if(newCharsLen == 0) { - charsOffset = src->current - src->source; - } - newCharsLen++; - } else { - if(newExtensionsLen == 0) { - extensionOffset = src->current - src->source; - } - newExtensionsLen++; - } - - break; - } - } - - if(wasInQuote) { - if(ch != 0x27) { - *src->extraCurrent++ = ch; - } - if(src->extraCurrent == src->extraEnd) { - /* reallocate */ - } - } - - src->current++; - } - - EndOfLoop: - wasInQuote = FALSE; - if (newStrength == UCOL_TOK_UNSET) { - return 0; - } - - if (newCharsLen == 0 && top == FALSE) { - *status = U_INVALID_FORMAT_ERROR; - return 0; - } - } - - { + if(U_SUCCESS(*status) && parseEnd != NULL) { UColToken *sourceToken = NULL; UColToken key; @@ -789,7 +819,9 @@ uint32_t ucol_uprv_tok_assembleTokenList(UColTokenParser *src, UErrorCode *statu } /* 7 After all this, set LAST to point to sourceToken, and goto step 3. */ lastToken = sourceToken; - } + } else { + return 0; + } } return src->resultLen; diff --git a/icu4c/source/i18n/ucol_tok.h b/icu4c/source/i18n/ucol_tok.h index 3c407a9cbb1..f8c76a80894 100644 --- a/icu4c/source/i18n/ucol_tok.h +++ b/icu4c/source/i18n/ucol_tok.h @@ -117,6 +117,13 @@ int32_t uhash_hashTokens(const void *k); UBool uhash_compareTokens(const void *key1, const void *key2); void ucol_tok_initTokenList(UColTokenParser *src, UErrorCode *status); uint32_t ucol_uprv_tok_assembleTokenList(UColTokenParser *src, UErrorCode *status); +U_CAPI const UChar U_EXPORT2 *ucol_tok_parseNextToken(UColTokenParser *src, + uint32_t *strength, + uint32_t *chOffset, uint32_t *chLen, + uint32_t *exOffset, uint32_t *exLen, + UBool *varT, UBool *top_, + UBool startOfRules, + UErrorCode *status); #endif diff --git a/icu4c/source/test/cintltst/cmsccoll.c b/icu4c/source/test/cintltst/cmsccoll.c index 533d340be6a..01447675aa2 100644 --- a/icu4c/source/test/cintltst/cmsccoll.c +++ b/icu4c/source/test/cintltst/cmsccoll.c @@ -25,6 +25,7 @@ #include "callcoll.h" #include "unicode/ustring.h" #include "string.h" +#include "ucol_imp.h" static UCollator *myCollation; const static UChar rules[MAX_TOKEN_LEN] = @@ -435,24 +436,55 @@ static void FunkyATest( ) ucol_close(myCollation); } +UColAttributeValue caseFirst[] = { + UCOL_OFF, + UCOL_LOWER_FIRST, + UCOL_UPPER_FIRST +}; + + UColAttributeValue alternateHandling[] = { - UCOL_NON_IGNORABLE, + UCOL_NON_IGNORABLE, UCOL_SHIFTED }; UColAttributeValue caseLevel[] = { - UCOL_OFF, + UCOL_OFF, UCOL_ON }; UColAttributeValue strengths[] = { - UCOL_PRIMARY, + UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL }; +char * caseFirstC[] = { + "UCOL_OFF", + "UCOL_LOWER_FIRST", + "UCOL_UPPER_FIRST" +}; + + +char * alternateHandlingC[] = { + "UCOL_NON_IGNORABLE", + "UCOL_SHIFTED" +}; + +char * caseLevelC[] = { + "UCOL_OFF", + "UCOL_ON" +}; + +char * strengthsC[] = { + "UCOL_PRIMARY", + "UCOL_SECONDARY", + "UCOL_TERTIARY", + "UCOL_QUATERNARY", + "UCOL_IDENTICAL" +}; static void PrintMarkDavis( ) @@ -461,8 +493,10 @@ static void PrintMarkDavis( ) UChar m[256]; uint8_t sortkey[256]; UCollator *coll = ucol_open(NULL, &status); - uint32_t i,j,k,l, sortkeysize; + uint32_t h,i,j,k, sortkeysize; uint32_t sizem = 0; + char buffer[512]; + uint32_t len = 512; u_uastrcpy(m, "Mark Davis"); sizem = u_strlen(m); @@ -475,20 +509,29 @@ static void PrintMarkDavis( ) } fprintf(stderr, "\n"); - for(i = 0; i +#include +#include +#include "unicode\ucol.h" +#include "unicode\ustdio.h" +#include "unicode\ustring.h" +#include "ucol_tok.h" +#define AMP '&' +#define GREAT '<' +#define EQUAL '=' +#define COMA ',' +#define SEMIC ';' +#define BRACKET '[' +#define ACCENT '@' +#define AMP_STR "&" +#define GREAT_STR "<" +#define EQUAL_STR "=" +#define COMA_STR "," +#define SEMIC_STR ";" +#define DG_STR "<<" +#define TG_STR "<<<" + +static FILE* file; + + +int32_t transformUTF16ToUTF8(uint8_t *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength) { + int32_t srcIndex, destIndex; + UChar32 c; + + for(srcIndex=destIndex=0; srcIndex 0) ) { /* copy string 2 over */ + if(*src!=0x0020 && *src!=0 && *src!=0x0027){ + *(dst++) = *(src); + } + *src++; + } + + return anchor; + +} + + +void parseAndPrintRules(UCollator* col,const char* loc, const UChar* rules, int length){ + UChar *local = (UChar*)rules; + UChar current[20]={'\0'}; + UChar previous[20]= {'\0'}; + UChar *first =current, *second = previous; + UChar* delimiter = (UChar*)" "; + int i = 0, strength; + char fileName[20] = {'\0'}; + UBool gotBoth = FALSE; + + if(loc){ + strcpy(fileName,loc); + } + strcat(fileName,"TestCases.txt"); + file = fopen(fileName,"wb"); + if(file){ + while((local-rules < length) && i<300){ + UChar* limit =consumeDelimiter(&local,length-i,&strength,&delimiter); + if(limit==NULL ){ + if(u_strcmp(delimiter ,(UChar*) AMP_STR)==0){ + resetBuf(&first,20); + } + limit =findDelimiter(local,length-(local-rules)); + if(limit==NULL){ + limit= (UChar*)rules+length; + } + + } + + if(limit){ + if(*first=='\0'){ + istrncpy(first,local,(int)(limit-local)); + local=limit; + + } + else{ + if((local-rules) < length){ + istrncpy(second,local,(int)(limit-local)); + } + local=limit; + gotBoth=TRUE; + } + } + if(gotBoth){ + unsigned char tempFirst[20] = {'\0'}; + unsigned char tempSecond[20] = {'\0'}; + aescstrdup(first,tempFirst,20); + aescstrdup(second,tempSecond,20); + //fprintf(file,"first:%s second: %s delimiter: %s strength:%i \n ",tempFirst,tempSecond,delimiter,strength); + + testCollator(col,first,second,delimiter,strength); + + //fprintf(file,"first:%s second: %s delimiter: %s strength:%i \n ",tempFirst,tempSecond,delimiter,strength); + resetBuf(&first,20); + u_strcpy(first,second); + resetBuf(&second,20); + gotBoth=FALSE; + } + i++; + + } + + } +} + +void parseAndPrintRules2(UCollator* col,const char* loc, const UChar* rules, int length){ + UChar *local = (UChar*)rules; + UChar current[20]={'\0'}; + UChar previous[20]= {'\0'}; + UChar *first =current, *second = previous; + UChar* delimiter = (UChar*)" "; + int i = 0, strength; + char fileName[20] = {'\0'}; + UBool gotBoth = FALSE; + + if(loc){ + strcpy(fileName,loc); + } + strcat(fileName,"TestCases.txt"); + file = fopen(fileName,"wb"); + if(file){ + if(limit){ + if(*first=='\0'){ + istrncpy(first,local,(int)(limit-local)); + local=limit; + + } + else{ + if((local-rules) < length){ + istrncpy(second,local,(int)(limit-local)); + } + local=limit; + gotBoth=TRUE; + } + } + if(gotBoth){ + unsigned char tempFirst[20] = {'\0'}; + unsigned char tempSecond[20] = {'\0'}; + aescstrdup(first,tempFirst,20); + aescstrdup(second,tempSecond,20); + //fprintf(file,"first:%s second: %s delimiter: %s strength:%i \n ",tempFirst,tempSecond,delimiter,strength); + + testCollator(col,first,second,delimiter,strength); + + //fprintf(file,"first:%s second: %s delimiter: %s strength:%i \n ",tempFirst,tempSecond,delimiter,strength); + resetBuf(&first,20); + u_strcpy(first,second); + resetBuf(&second,20); + gotBoth=FALSE; + } + i++; + + } + + } +} + +void processRules(const char* loc){ + UErrorCode status = U_ZERO_ERROR; + UCollator* col = ucol_open(loc,&status); + int length=0; + const UChar* rules; + if(loc){ + rules = ucol_getRules(col,&length); + } + ucol_setAttribute(col,UCOL_STRENGTH,UCOL_QUATERNARY,&status); + parseAndPrintRules2(col,loc,rules,length); +} + + +extern int +main(int argc, const char *argv[]) { + if(argc<2) { + + fprintf(stderr, + "usage: %s { rpmap/rxmap-filename }+\n", + argv[0]); + exit(1); + } + + while(--argc>0) { + processRules(*++argv); + } + + return 0; +} + +#endif