From f70eda36bfc1c08ae35204fd5715440c62a6765b Mon Sep 17 00:00:00 2001 From: Vladimir Weinstein Date: Thu, 13 Jun 2002 18:29:49 +0000 Subject: [PATCH] ICU-1930 use values from the UCA for CE generation. refactored closure code and moved it to ucol_elm & made it usable from the genUCA X-SVN-Rev: 8875 --- icu4c/source/i18n/ucol_bld.cpp | 128 +++++---------------------------- 1 file changed, 19 insertions(+), 109 deletions(-) diff --git a/icu4c/source/i18n/ucol_bld.cpp b/icu4c/source/i18n/ucol_bld.cpp index 99e633b135c..6008982e4a5 100644 --- a/icu4c/source/i18n/ucol_bld.cpp +++ b/icu4c/source/i18n/ucol_bld.cpp @@ -226,7 +226,7 @@ inline int32_t ucol_inv_getNext(UColTokListHeader *lh, uint32_t strength) { return iCE; } -U_CFUNC void ucol_inv_getGapPositions(/*UColTokenParser *src,*/ UColTokListHeader *lh, UErrorCode *status) { +U_CFUNC void ucol_inv_getGapPositions(UColTokenParser *src, UColTokListHeader *lh, UErrorCode *status) { /* reset all the gaps */ int32_t i = 0; uint32_t *CETable = (uint32_t *)((uint8_t *)invUCA+invUCA->table); @@ -251,7 +251,10 @@ U_CFUNC void ucol_inv_getGapPositions(/*UColTokenParser *src,*/ UColTokListHeade lh->pos[i] = -1; } - if(lh->baseCE >= PRIMARY_IMPLICIT_MIN && lh->baseCE < PRIMARY_IMPLICIT_MAX ) { /* implicits - */ + UCAConstants *consts = (UCAConstants *)((uint8_t *)src->UCA->image + src->UCA->image->UCAConsts); + + if(lh->baseCE >= (consts->UCA_PRIMARY_IMPLICIT_MIN<<24) && lh->baseCE < (consts->UCA_PRIMARY_IMPLICIT_MAX<<24) ) { /* implicits - */ + //if(lh->baseCE >= PRIMARY_IMPLICIT_MIN && lh->baseCE < PRIMARY_IMPLICIT_MAX ) { /* implicits - */ lh->pos[0] = 0; t1 = lh->baseCE; t2 = lh->baseContCE; @@ -488,7 +491,7 @@ U_CFUNC void ucol_doCE(uint32_t *CEparts, UColToken *tok) { #endif } -U_CFUNC void ucol_initBuffers(/*UColTokenParser *src,*/ UColTokListHeader *lh, UErrorCode *status) { +U_CFUNC void ucol_initBuffers(UColTokenParser *src, UColTokListHeader *lh, UErrorCode *status) { ucolCEGenerator Gens[UCOL_CE_STRENGTH_LIMIT]; uint32_t CEparts[UCOL_CE_STRENGTH_LIMIT]; @@ -498,9 +501,11 @@ U_CFUNC void ucol_initBuffers(/*UColTokenParser *src,*/ UColTokListHeader *lh, U UColToken *tok = lh->last; uint32_t t[UCOL_STRENGTH_LIMIT]; - for(i=0; itoInsert = 1; t[tok->strength] = 1; @@ -519,7 +524,7 @@ U_CFUNC void ucol_initBuffers(/*UColTokenParser *src,*/ UColTokListHeader *lh, U } tok->toInsert = t[tok->strength]; - ucol_inv_getGapPositions(lh, status); + ucol_inv_getGapPositions(src, lh, status); #if UCOL_DEBUG fprintf(stderr, "BaseCE: %08X %08X\n", lh->baseCE, lh->baseContCE); @@ -870,77 +875,6 @@ U_CFUNC void ucol_createElements(UColTokenParser *src, tempUCATable *t, UColTokL } } -struct enumStruct { - tempUCATable *t; - UCollator *tempColl; - UCollationElements* colEl; - UErrorCode *status; -}; - -U_CDECL_BEGIN -static UBool U_CALLCONV -_enumCategoryRangeClosureCategory(const void *context, UChar32 start, UChar32 limit, UCharCategory type) { - - UErrorCode *status = ((enumStruct *)context)->status; - tempUCATable *t = ((enumStruct *)context)->t; - UCollator *tempColl = ((enumStruct *)context)->tempColl; - UCollationElements* colEl = ((enumStruct *)context)->colEl; - UCAElements el; - UChar decomp[256] = { 0 }; - uint32_t noOfDec = 0; - - UChar32 u32 = 0; - UChar comp[2]; - uint32_t len = 0; - - if (type > 0) { // if the range is assigned - we might ommit more categories later - for(u32 = start; u32 < limit; u32++) { - len = 0; - UTF_APPEND_CHAR_UNSAFE(comp, len, u32); - if((noOfDec = unorm_normalize(comp, len, UNORM_NFD, 0, decomp, 256, status)) > 1 - || (noOfDec == 1 && *decomp != (UChar)u32)) - { - if(ucol_strcoll(tempColl, comp, len, decomp, noOfDec) != UCOL_EQUAL) { - el.cPoints = decomp; - el.cSize = noOfDec; - el.noOfCEs = 0; - el.prefix = el.prefixChars; - el.prefixSize = 0; - - UCAElements *prefix=(UCAElements *)uhash_get(t->prefixLookup, &el); - if(prefix == NULL) { - el.cPoints = comp; - el.cSize = len; - el.prefix = el.prefixChars; - el.prefixSize = 0; - el.noOfCEs = 0; - ucol_setText(colEl, decomp, noOfDec, status); - while((el.CEs[el.noOfCEs] = ucol_next(colEl, status)) != UCOL_NULLORDER) { - el.noOfCEs++; - } - } else { - el.cPoints = comp; - el.cSize = len; - el.prefix = el.prefixChars; - el.prefixSize = 0; - el.noOfCEs = 1; - el.CEs[0] = prefix->mapCE; - // This character uses a prefix. We have to add it - // to the unsafe table, as it decomposed form is already - // in. In Japanese, this happens for \u309e & \u30fe - // Since unsafeCPSet is static in ucol_elm, we are going - // to wrap it up in the uprv_uca_unsafeCPAddCCNZ function - } - uprv_uca_addAnElement(t, &el, status); - } - } - } - } - return TRUE; -} -U_CDECL_END - - UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *status) { uint32_t i = 0; if(U_FAILURE(*status)) { @@ -993,7 +927,7 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st /* We stuff the initial value in the buffers, and increase the appropriate buffer */ /* According to strength */ if(U_SUCCESS(*status)) { - ucol_initBuffers(&src->lh[i], status); + ucol_initBuffers(src, &src->lh[i], status); } if(U_FAILURE(*status)) { return NULL; @@ -1076,7 +1010,8 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st /* copy contractions from the UCA - this is felt mostly for cyrillic*/ uint32_t tailoredCE = UCOL_NOT_FOUND; - UChar *conts = (UChar *)((uint8_t *)src->UCA->image + src->UCA->image->contractionUCACombos); + UChar *conts = (UChar *)((uint8_t *)src->UCA->image + src->UCA->image->UCAConsts+sizeof(UCAConstants)); + //UChar *conts = (UChar *)((uint8_t *)src->UCA->image + src->UCA->image->contractionUCACombos); UCollationElements *ucaEl = ucol_openElements(src->UCA, NULL, 0, status); while(*conts != 0) { /*tailoredCE = ucmpe32_get(t->mapping, *conts);*/ @@ -1113,37 +1048,12 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st conts+=3; } ucol_closeElements(ucaEl); - - UCollator *tempColl = NULL; - if(U_SUCCESS(*status)) { - tempUCATable *tempTable = uprv_uca_cloneTempTable(t, status); - - UCATableHeader *tempData = uprv_uca_assembleTable(tempTable, status); - tempColl = ucol_initCollator(tempData, 0, status); - - if(U_SUCCESS(*status)) { - tempColl->rb = NULL; - tempColl->binary = NULL; - tempColl->requestedLocale = NULL; - tempColl->hasRealData = TRUE; - } - uprv_uca_closeTempTable(tempTable); - } - - /* produce canonical closure */ - UCollationElements* colEl = ucol_openElements(tempColl, NULL, 0, status); - - enumStruct context; - context.t = t; - context.tempColl = tempColl; - context.colEl = colEl; - context.status = status; - u_enumCharTypes(_enumCategoryRangeClosureCategory, &context); - - ucol_closeElements(colEl); - ucol_close(tempColl); } + // canonical closure + uprv_uca_canonicalClosure(t, status); + + /* still need to produce compatibility closure */ UCATableHeader *myData = uprv_uca_assembleTable(t, status);