From 630b305e23bb5c67a87ba36a3938e4b0cf45597d Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Tue, 4 Jan 2005 00:47:51 +0000 Subject: [PATCH] ICU-3881 modularize unorm.icu using build-time switches; build FCD trie at load time if omitted from data file; cope with missing data X-SVN-Rev: 17083 --- icu4c/source/common/unorm.cpp | 136 ++++++++++++++++++++++++++++++---- 1 file changed, 120 insertions(+), 16 deletions(-) diff --git a/icu4c/source/common/unorm.cpp b/icu4c/source/common/unorm.cpp index 22cc261977d..64bc2cbfd5f 100644 --- a/icu4c/source/common/unorm.cpp +++ b/icu4c/source/common/unorm.cpp @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (c) 1996-2004, International Business Machines +* Copyright (c) 1996-2005, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * File unorm.cpp @@ -173,6 +173,8 @@ static int8_t haveNormData=0; static int32_t indexes[_NORM_INDEX_TOP]={ 0 }; static UTrie normTrie={ 0,0,0,0,0,0,0 }, fcdTrie={ 0,0,0,0,0,0,0 }, auxTrie={ 0,0,0,0,0,0,0 }; +static uint8_t *gFCDBlock=NULL; + /* * pointers into the memory-mapped unorm.icu */ @@ -198,6 +200,9 @@ unorm_cleanup() { if(normData!=NULL) { udata_close(normData); normData=NULL; + + uprv_free(gFCDBlock); + gFCDBlock=NULL; } dataErrorCode=U_ZERO_ERROR; haveNormData=0; @@ -223,12 +228,6 @@ getFoldingNormOffset(uint32_t norm32) { } } -/* fcdTrie: the folding offset is the lead FCD value itself */ -static int32_t U_CALLCONV -getFoldingFCDOffset(uint32_t data) { - return (int32_t)data; -} - /* auxTrie: the folding offset is in bits 9..0 of the 16-bit trie result */ static int32_t U_CALLCONV getFoldingAuxOffset(uint32_t data) { @@ -267,8 +266,99 @@ _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*limit*/, return TRUE; } +struct EnumNormFCDContext { + UNewTrie *newFCD; + const uint16_t *eData; + UBool ok; +}; + +static UBool U_CALLCONV +_enumNormFCD(const void *context, UChar32 start, UChar32 limit, uint32_t norm32) { + uint32_t fcd; + + fcd=0; + + if((norm32&_NORM_QC_NFD) && isNorm32Regular(norm32)) { + /* get the lead/trail cc from the decomposition data */ + const uint16_t *nfd= + ((EnumNormFCDContext *)context)->eData+ + (norm32>>_NORM_EXTRA_SHIFT); + if(*nfd&_NORM_DECOMP_FLAG_LENGTH_HAS_CC) { + fcd=nfd[1]; + } + } else { + fcd=norm32&_NORM_CC_MASK; + if(fcd!=0) { + /* use the code point cc value for both lead and trail cc's */ + fcd|=fcd>>_NORM_CC_SHIFT; /* assume that the cc is in bits 15..8 */ + } + } + + if(fcd!=0) { + if(!utrie_setRange32(((EnumNormFCDContext *)context)->newFCD, start, limit, fcd, TRUE)) { + return ((EnumNormFCDContext *)context)->ok=FALSE; + } + } + + return TRUE; +} + U_CDECL_END +/* make the FCD trie on the fly if it was not stored in the data file */ +static uint8_t * +makeFCDTrie(UTrie &nTrie, const uint16_t *eData, int32_t &fcdLength, UErrorCode &errorCode) { + UNewTrie *newFCD; + uint8_t *fcdBlock; + + fcdLength=0; + + if(U_FAILURE(errorCode)) { + return NULL; + } + + newFCD=utrie_open(NULL, NULL, 20000, 0, 0, TRUE); + if(newFCD==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + + /* + * enumerate the just-loaded normalization main data trie, + * compute the FCD value for each range, + * and store it in newFCD + */ + EnumNormFCDContext context={ newFCD, eData, TRUE }; + utrie_enum(&nTrie, NULL, _enumNormFCD, &context); + if(!context.ok) { + errorCode=U_BUFFER_OVERFLOW_ERROR; + utrie_close(newFCD); + return NULL; + } + + fcdLength=utrie_serialize(newFCD, NULL, 0, NULL, TRUE, &errorCode); + if(U_FAILURE(errorCode)) { + utrie_close(newFCD); + return NULL; + } + + fcdBlock=(uint8_t *)uprv_malloc(fcdLength); + if(fcdBlock==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + utrie_close(newFCD); + return NULL; + } + + fcdLength=utrie_serialize(newFCD, fcdBlock, fcdLength, NULL, TRUE, &errorCode); + utrie_close(newFCD); + if(U_FAILURE(errorCode)) { + uprv_free(fcdBlock); + return NULL; + } + + return fcdBlock; +} + static int8_t loadNormData(UErrorCode &errorCode) { /* load Unicode normalization data from file */ @@ -290,6 +380,8 @@ loadNormData(UErrorCode &errorCode) { if(haveNormData==0) { UTrie _normTrie={ 0,0,0,0,0,0,0 }, _fcdTrie={ 0,0,0,0,0,0,0 }, _auxTrie={ 0,0,0,0,0,0,0 }; UDataMemory *data; + uint8_t *fcdBlock=NULL; + const int32_t *p=NULL; const uint8_t *pb; @@ -310,11 +402,19 @@ loadNormData(UErrorCode &errorCode) { _normTrie.getFoldingOffset=getFoldingNormOffset; pb+=p[_NORM_INDEX_TRIE_SIZE]+p[_NORM_INDEX_UCHAR_COUNT]*2+p[_NORM_INDEX_COMBINE_DATA_COUNT]*2; - utrie_unserialize(&_fcdTrie, pb, p[_NORM_INDEX_FCD_TRIE_SIZE], &errorCode); - _fcdTrie.getFoldingOffset=getFoldingFCDOffset; - if(p[_NORM_INDEX_FCD_TRIE_SIZE]!=0) { - pb+=p[_NORM_INDEX_FCD_TRIE_SIZE]; + utrie_unserialize(&_fcdTrie, pb, p[_NORM_INDEX_FCD_TRIE_SIZE], &errorCode); + } else { + /* the FCD trie was not stored, create one on the fly */ + int32_t fcdLength; + fcdBlock=makeFCDTrie(_normTrie, + (uint16_t *)((uint8_t *)(p+_NORM_INDEX_TOP)+p[_NORM_INDEX_TRIE_SIZE]), + fcdLength, errorCode); + utrie_unserialize(&_fcdTrie, fcdBlock, fcdLength, &errorCode); + } + pb+=p[_NORM_INDEX_FCD_TRIE_SIZE]; + + if(p[_NORM_INDEX_AUX_TRIE_SIZE]!=0) { utrie_unserialize(&_auxTrie, pb, p[_NORM_INDEX_AUX_TRIE_SIZE], &errorCode); _auxTrie.getFoldingOffset=getFoldingAuxOffset; } @@ -331,6 +431,9 @@ loadNormData(UErrorCode &errorCode) { normData=data; data=NULL; + gFCDBlock=fcdBlock; + fcdBlock=NULL; + uprv_memcpy(&indexes, p, sizeof(indexes)); uprv_memcpy(&normTrie, &_normTrie, sizeof(UTrie)); uprv_memcpy(&fcdTrie, &_fcdTrie, sizeof(UTrie)); @@ -356,6 +459,7 @@ loadNormData(UErrorCode &errorCode) { /* if a different thread set it first, then close the extra data */ if(data!=NULL) { udata_close(data); /* NULL if it was set correctly */ + uprv_free(fcdBlock); } } @@ -896,7 +1000,7 @@ u_getCombiningClass(UChar32 c) { U_CAPI UBool U_EXPORT2 unorm_internalIsFullCompositionExclusion(UChar32 c) { UErrorCode errorCode=U_ZERO_ERROR; - if(_haveData(errorCode) && formatVersion_2_1) { + if(_haveData(errorCode) && auxTrie.index!=NULL) { uint16_t aux; UTRIE_GET16(&auxTrie, c, aux); @@ -909,7 +1013,7 @@ unorm_internalIsFullCompositionExclusion(UChar32 c) { U_CAPI UBool U_EXPORT2 unorm_isCanonSafeStart(UChar32 c) { UErrorCode errorCode=U_ZERO_ERROR; - if(_haveData(errorCode) && formatVersion_2_1) { + if(_haveData(errorCode) && auxTrie.index!=NULL) { uint16_t aux; UTRIE_GET16(&auxTrie, c, aux); @@ -1031,7 +1135,7 @@ u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *p *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } - if(!_haveData(*pErrorCode) || !formatVersion_2_1) { + if(!_haveData(*pErrorCode) || auxTrie.index==NULL) { return 0; } @@ -1118,7 +1222,7 @@ unorm_isNFSkippable(UChar32 c, UNormalizationMode mode) { /* if(mode<=UNORM_NFKC) { -- enable when implementing FCC */ /* NF*C, test (f) flag */ - if(!formatVersion_2_2) { + if(!formatVersion_2_2 || auxTrie.index==NULL) { return FALSE; /* no (f) data, say not skippable to be safe */ } @@ -1139,7 +1243,7 @@ unorm_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { /* add the start code point of each same-value range of each trie */ utrie_enum(&normTrie, NULL, _enumPropertyStartsRange, sa); utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, sa); - if(formatVersion_2_1) { + if(auxTrie.index!=NULL) { utrie_enum(&auxTrie, NULL, _enumPropertyStartsRange, sa); }