mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 14:05:32 +00:00
ICU-1586 handle non-zero initial values
X-SVN-Rev: 7350
This commit is contained in:
parent
a903b84867
commit
e4b419efb7
4 changed files with 118 additions and 73 deletions
|
@ -32,8 +32,11 @@
|
|||
/* Building a trie ----------------------------------------------------------*/
|
||||
|
||||
U_CAPI UNewTrie * U_EXPORT2
|
||||
utrie_open(UNewTrie *fillIn, uint32_t *aliasData, int32_t maxDataLength, UBool latin1Linear) {
|
||||
utrie_open(UNewTrie *fillIn,
|
||||
uint32_t *aliasData, int32_t maxDataLength,
|
||||
uint32_t initialValue, UBool latin1Linear) {
|
||||
UNewTrie *trie;
|
||||
int32_t i, j;
|
||||
|
||||
if( maxDataLength<UTRIE_DATA_BLOCK_LENGTH ||
|
||||
(latin1Linear && maxDataLength<1024)
|
||||
|
@ -64,25 +67,26 @@ utrie_open(UNewTrie *fillIn, uint32_t *aliasData, int32_t maxDataLength, UBool l
|
|||
trie->isDataAllocated=TRUE;
|
||||
}
|
||||
|
||||
/* preallocate and reset the first data block (block index 0, all values 0) */
|
||||
uprv_memset(trie->data, 0, 4*UTRIE_DATA_BLOCK_LENGTH);
|
||||
trie->dataLength=UTRIE_DATA_BLOCK_LENGTH;
|
||||
/* preallocate and reset the first data block (block index 0) */
|
||||
j=UTRIE_DATA_BLOCK_LENGTH;
|
||||
|
||||
/* preallocate and reset Latin-1 (U+0000..U+00ff) after that if requested */
|
||||
if(latin1Linear) {
|
||||
int32_t i, j;
|
||||
|
||||
uprv_memset(trie->data, 0, 4096); /* made sure above that maxDataLength>=1024 */
|
||||
/* preallocate and reset the first block (number 0) and Latin-1 (U+0000..U+00ff) after that */
|
||||
/* made sure above that maxDataLength>=1024 */
|
||||
|
||||
/* set indexes to point to consecutive data blocks */
|
||||
i=0;
|
||||
j=UTRIE_DATA_BLOCK_LENGTH;
|
||||
do {
|
||||
/* do this at least for trie->index[0] even if that block is only partly used for Latin-1 */
|
||||
trie->index[i++]=j;
|
||||
j+=UTRIE_DATA_BLOCK_LENGTH;
|
||||
} while(i<(256>>UTRIE_SHIFT));
|
||||
trie->dataLength=j;
|
||||
}
|
||||
|
||||
/* reset the initially allocated blocks to the initial value */
|
||||
trie->dataLength=j;
|
||||
while(j>0) {
|
||||
trie->data[--j]=initialValue;
|
||||
}
|
||||
|
||||
trie->indexLength=UTRIE_MAX_INDEX_LENGTH;
|
||||
|
@ -114,7 +118,7 @@ utrie_clone(UNewTrie *fillIn, const UNewTrie *other, uint32_t *aliasData, int32_
|
|||
isDataAllocated=TRUE;
|
||||
}
|
||||
|
||||
trie=utrie_open(fillIn, aliasData, aliasDataCapacity, other->isLatin1Linear);
|
||||
trie=utrie_open(fillIn, aliasData, aliasDataCapacity, other->data[0], other->isLatin1Linear);
|
||||
if(trie==NULL) {
|
||||
uprv_free(aliasData);
|
||||
} else {
|
||||
|
@ -176,12 +180,8 @@ utrie_getDataBlock(UNewTrie *trie, UChar32 c) {
|
|||
trie->dataLength=newTop;
|
||||
trie->index[c]=newBlock;
|
||||
|
||||
if(indexValue==0) {
|
||||
uprv_memset(trie->data+newBlock, 0, 4*UTRIE_DATA_BLOCK_LENGTH);
|
||||
} else /* indexValue<0 */ {
|
||||
/* copy-on-write for a block from a setRange() */
|
||||
uprv_memcpy(trie->data+newBlock, trie->data-indexValue, 4*UTRIE_DATA_BLOCK_LENGTH);
|
||||
}
|
||||
/* copy-on-write for a block from a setRange() */
|
||||
uprv_memcpy(trie->data+newBlock, trie->data-indexValue, 4*UTRIE_DATA_BLOCK_LENGTH);
|
||||
return newBlock;
|
||||
}
|
||||
|
||||
|
@ -230,7 +230,8 @@ utrie_get32(UNewTrie *trie, UChar32 c, UBool *pInBlockZero) {
|
|||
* @internal
|
||||
*/
|
||||
static void
|
||||
utrie_fillBlock(uint32_t *block, UChar32 start, UChar32 limit, uint32_t value, UBool overwrite) {
|
||||
utrie_fillBlock(uint32_t *block, UChar32 start, UChar32 limit,
|
||||
uint32_t value, uint32_t initialValue, UBool overwrite) {
|
||||
uint32_t *pLimit;
|
||||
|
||||
pLimit=block+limit;
|
||||
|
@ -241,7 +242,7 @@ utrie_fillBlock(uint32_t *block, UChar32 start, UChar32 limit, uint32_t value, U
|
|||
}
|
||||
} else {
|
||||
while(block<pLimit) {
|
||||
if(*block==0) {
|
||||
if(*block==initialValue) {
|
||||
*block=value;
|
||||
}
|
||||
++block;
|
||||
|
@ -256,6 +257,7 @@ utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, U
|
|||
* mark index values for repeat-data blocks by setting bit 31 of the index values
|
||||
* fill around existing values if any, if(overwrite)
|
||||
*/
|
||||
uint32_t initialValue;
|
||||
int32_t block, rest, repeatBlock;
|
||||
|
||||
/* valid, uncompacted trie and valid indexes? */
|
||||
|
@ -268,6 +270,7 @@ utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, U
|
|||
return TRUE; /* nothing to do */
|
||||
}
|
||||
|
||||
initialValue=trie->data[0];
|
||||
if(start&UTRIE_MASK) {
|
||||
UChar32 nextStart;
|
||||
|
||||
|
@ -279,10 +282,12 @@ utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, U
|
|||
|
||||
nextStart=(start+UTRIE_DATA_BLOCK_LENGTH)&~UTRIE_MASK;
|
||||
if(nextStart<=limit) {
|
||||
utrie_fillBlock(trie->data+block, start&UTRIE_MASK, UTRIE_DATA_BLOCK_LENGTH, value, overwrite);
|
||||
utrie_fillBlock(trie->data+block, start&UTRIE_MASK, UTRIE_DATA_BLOCK_LENGTH,
|
||||
value, initialValue, overwrite);
|
||||
start=nextStart;
|
||||
} else {
|
||||
utrie_fillBlock(trie->data+block, start&UTRIE_MASK, limit&UTRIE_MASK, value, overwrite);
|
||||
utrie_fillBlock(trie->data+block, start&UTRIE_MASK, limit&UTRIE_MASK,
|
||||
value, initialValue, overwrite);
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
@ -294,7 +299,7 @@ utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, U
|
|||
limit&=~UTRIE_MASK;
|
||||
|
||||
/* iterate over all-value blocks */
|
||||
if(value==0) {
|
||||
if(value==initialValue) {
|
||||
repeatBlock=0;
|
||||
} else {
|
||||
repeatBlock=-1;
|
||||
|
@ -304,7 +309,7 @@ utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, U
|
|||
block=trie->index[start>>UTRIE_SHIFT];
|
||||
if(block>0) {
|
||||
/* already allocated, fill in value */
|
||||
utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, value, overwrite);
|
||||
utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, overwrite);
|
||||
} else if(trie->data[-block]!=value && (block==0 || overwrite)) {
|
||||
/* set the repeatBlock instead of the current block 0 or range block */
|
||||
if(repeatBlock>=0) {
|
||||
|
@ -318,7 +323,7 @@ utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, U
|
|||
|
||||
/* set the negative block number to indicate that it is a repeat block */
|
||||
trie->index[start>>UTRIE_SHIFT]=-repeatBlock;
|
||||
utrie_fillBlock(trie->data+repeatBlock, 0, UTRIE_DATA_BLOCK_LENGTH, value, TRUE);
|
||||
utrie_fillBlock(trie->data+repeatBlock, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, TRUE);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -332,7 +337,7 @@ utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, U
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
utrie_fillBlock(trie->data+block, 0, rest, value, overwrite);
|
||||
utrie_fillBlock(trie->data+block, 0, rest, value, initialValue, overwrite);
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
|
@ -476,7 +481,7 @@ utrie_fold(UNewTrie *trie, UNewTrieGetFoldedValue *getFoldedValue, UErrorCode *p
|
|||
* Compact a folded build-time trie.
|
||||
*
|
||||
* The compaction
|
||||
* - removes all-zero blocks
|
||||
* - removes all-initial-value blocks
|
||||
* - maps all blocks that are completely filled with the same values to only of them
|
||||
* - overlaps adjacent blocks as much as possible
|
||||
*
|
||||
|
@ -513,11 +518,12 @@ utrie_compact(UNewTrie *trie, UErrorCode *pErrorCode) {
|
|||
|
||||
/* compaction */
|
||||
|
||||
/* never move the all-zero block */
|
||||
/* never move the all-initial-value block 0 */
|
||||
trie->map[0]=0;
|
||||
|
||||
/* prime the whole blocks lookup table with the all-zero block */
|
||||
wholeBlockValues[0]=wholeBlockIndexes[0]=0;
|
||||
/* prime the whole blocks lookup table with the all-initial-value block 0 */
|
||||
wholeBlockValues[0]=trie->data[0];
|
||||
wholeBlockIndexes[0]=0;
|
||||
countWholeBlocks=1;
|
||||
|
||||
/* if Latin-1 is preallocated and linear, then do not compact Latin-1 data */
|
||||
|
@ -842,6 +848,7 @@ utrie_unserialize(UTrie *trie, const uint8_t *data, int32_t length, UErrorCode *
|
|||
return -1;
|
||||
}
|
||||
trie->data32=(const uint32_t *)p16;
|
||||
trie->initialValue=trie->data32[0];
|
||||
return sizeof(UTrieHeader)+2*trie->indexLength+4*trie->dataLength;
|
||||
} else {
|
||||
if(length<2*trie->dataLength) {
|
||||
|
@ -851,6 +858,7 @@ utrie_unserialize(UTrie *trie, const uint8_t *data, int32_t length, UErrorCode *
|
|||
|
||||
/* the "data16" data is used via the index pointer */
|
||||
trie->data32=NULL;
|
||||
trie->initialValue=trie->index[trie->indexLength];
|
||||
return sizeof(UTrieHeader)+2*trie->indexLength+2*trie->dataLength;
|
||||
}
|
||||
}
|
||||
|
@ -873,7 +881,7 @@ utrie_enum(UTrie *trie,
|
|||
const uint32_t *data32;
|
||||
const uint16_t *index;
|
||||
|
||||
uint32_t value, prevValue, noValue;
|
||||
uint32_t value, prevValue, initialValue;
|
||||
UChar32 c, prev;
|
||||
int32_t l, i, j, block, prevBlock, offset;
|
||||
|
||||
|
@ -888,13 +896,13 @@ utrie_enum(UTrie *trie,
|
|||
index=trie->index;
|
||||
data32=trie->data32;
|
||||
|
||||
/* get the non-value that corresponds to a trie data entry of 0 */
|
||||
noValue=enumValue(context, 0);
|
||||
/* get the enumeration value that corresponds to an initial-value trie data entry */
|
||||
initialValue=enumValue(context, trie->initialValue);
|
||||
|
||||
/* set variables for previous range */
|
||||
prevBlock=0;
|
||||
prev=0;
|
||||
prevValue=noValue;
|
||||
prevValue=initialValue;
|
||||
|
||||
/* enumerate BMP - the main loop enumerates data blocks */
|
||||
for(i=0, c=0; c<=0xffff; ++i) {
|
||||
|
@ -911,20 +919,20 @@ utrie_enum(UTrie *trie,
|
|||
/* the block is the same as the previous one, and filled with value */
|
||||
c+=UTRIE_DATA_BLOCK_LENGTH;
|
||||
} else if(block==0) {
|
||||
/* this is the all-zero block */
|
||||
if(prevValue!=noValue) {
|
||||
/* this is the all-initial-value block */
|
||||
if(prevValue!=initialValue) {
|
||||
if(prev<c) {
|
||||
enumRange(context, prev, c, prevValue);
|
||||
}
|
||||
prevBlock=0;
|
||||
prev=c;
|
||||
prevValue=noValue;
|
||||
prevValue=initialValue;
|
||||
}
|
||||
c+=UTRIE_DATA_BLOCK_LENGTH;
|
||||
} else {
|
||||
prevBlock=block;
|
||||
for(j=0; j<UTRIE_DATA_BLOCK_LENGTH; ++j) {
|
||||
value= data32!=NULL ? enumValue(context, data32[block+j]) : enumValue(context, index[block+j]);
|
||||
value=enumValue(context, data32!=NULL ? data32[block+j] : index[block+j]);
|
||||
if(value!=prevValue) {
|
||||
if(prev<c) {
|
||||
enumRange(context, prev, c, prevValue);
|
||||
|
@ -966,13 +974,13 @@ utrie_enum(UTrie *trie,
|
|||
offset=trie->getFoldingOffset(value);
|
||||
if(offset<=0) {
|
||||
/* no data for this lead surrogate */
|
||||
if(prevValue!=noValue) {
|
||||
if(prevValue!=initialValue) {
|
||||
if(prev<c) {
|
||||
enumRange(context, prev, c, prevValue);
|
||||
}
|
||||
prevBlock=0;
|
||||
prev=c;
|
||||
prevValue=noValue;
|
||||
prevValue=initialValue;
|
||||
}
|
||||
|
||||
/* nothing else to do for the supplementary code points for this lead surrogate */
|
||||
|
@ -988,20 +996,20 @@ utrie_enum(UTrie *trie,
|
|||
/* the block is the same as the previous one, and filled with value */
|
||||
c+=UTRIE_DATA_BLOCK_LENGTH;
|
||||
} else if(block==0) {
|
||||
/* this is the all-zero block */
|
||||
if(prevValue!=noValue) {
|
||||
/* this is the all-initial-value block */
|
||||
if(prevValue!=initialValue) {
|
||||
if(prev<c) {
|
||||
enumRange(context, prev, c, prevValue);
|
||||
}
|
||||
prevBlock=0;
|
||||
prev=c;
|
||||
prevValue=noValue;
|
||||
prevValue=initialValue;
|
||||
}
|
||||
c+=UTRIE_DATA_BLOCK_LENGTH;
|
||||
} else {
|
||||
prevBlock=block;
|
||||
for(j=0; j<UTRIE_DATA_BLOCK_LENGTH; ++j) {
|
||||
value= data32!=NULL ? enumValue(context, data32[block+j]) : enumValue(context, index[block+j]);
|
||||
value=enumValue(context, data32!=NULL ? data32[block+j] : index[block+j]);
|
||||
if(value!=prevValue) {
|
||||
if(prev<c) {
|
||||
enumRange(context, prev, c, prevValue);
|
||||
|
|
|
@ -40,7 +40,7 @@ U_CDECL_BEGIN
|
|||
* From such a folded value, an offset needs to be extracted to supply
|
||||
* to the _FROM_OFFSET_TRAIL() macros.
|
||||
*
|
||||
* Most of the more complex (and more convenient) functions call a callback function
|
||||
* Most of the more complex (and more convenient) functions/macros call a callback function
|
||||
* to get that offset from the folded value for a lead surrogate unit.
|
||||
*/
|
||||
|
||||
|
@ -104,7 +104,7 @@ enum {
|
|||
/**
|
||||
* Maximum length of the build-time data (stage 2) array.
|
||||
* The maximum length is 0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400.
|
||||
* (Number of Unicode code points + one all-zero block +
|
||||
* (Number of Unicode code points + one all-initial-value block +
|
||||
* possible duplicate entries for 1024 lead surrogates.)
|
||||
*/
|
||||
#define UTRIE_MAX_BUILD_TIME_DATA_LENGTH (0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400)
|
||||
|
@ -114,6 +114,7 @@ enum {
|
|||
* Extract from a lead surrogate's data the
|
||||
* index array offset of the indexes for that lead surrogate.
|
||||
*
|
||||
* @param data data value for a surrogate from the trie, including the folding offset
|
||||
* @return offset>=UTRIE_BMP_INDEX_LENGTH, or 0 if there is no data for the lead surrogate
|
||||
*/
|
||||
typedef int32_t U_CALLCONV
|
||||
|
@ -140,6 +141,7 @@ struct UTrie {
|
|||
UTrieGetFoldingOffset *getFoldingOffset;
|
||||
|
||||
int32_t indexLength, dataLength;
|
||||
uint32_t initialValue;
|
||||
UBool isLatin1Linear;
|
||||
};
|
||||
|
||||
|
@ -153,7 +155,7 @@ typedef struct UTrie UTrie;
|
|||
]
|
||||
|
||||
/** Internal trie getter from a pair of surrogates */
|
||||
#define _UTRIE_GET_FROM_PAIR(trie, data, c, c2, result) { \
|
||||
#define _UTRIE_GET_FROM_PAIR(trie, data, c, c2, result, resultType) { \
|
||||
int32_t __offset; \
|
||||
\
|
||||
/* get data for lead surrogate */ \
|
||||
|
@ -164,7 +166,7 @@ typedef struct UTrie UTrie;
|
|||
if(__offset>0) { \
|
||||
(result)=_UTRIE_GET_RAW((trie), data, __offset, (c2)&0x3ff); \
|
||||
} else { \
|
||||
(result)=0; \
|
||||
(result)=(resultType)((trie)->initialValue); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
@ -177,28 +179,28 @@ typedef struct UTrie UTrie;
|
|||
* Could be faster(?) but longer with
|
||||
* if((c32)<=0xd7ff) { (result)=_UTRIE_GET_RAW(trie, data, 0, c32); }
|
||||
*/
|
||||
#define _UTRIE_GET(trie, data, c32, result) \
|
||||
#define _UTRIE_GET(trie, data, c32, result, resultType) \
|
||||
if((uint32_t)(c32)<=0xffff) { \
|
||||
/* BMP code points */ \
|
||||
(result)=_UTRIE_GET_FROM_BMP(trie, data, c32); \
|
||||
} else if((uint32_t)(c32)<=0x10ffff) { \
|
||||
/* supplementary code point */ \
|
||||
UChar __lead16=UTF16_LEAD(c32); \
|
||||
_UTRIE_GET_FROM_PAIR(trie, data, __lead16, c32, result); \
|
||||
_UTRIE_GET_FROM_PAIR(trie, data, __lead16, c32, result, resultType); \
|
||||
} else { \
|
||||
/* out of range */ \
|
||||
(result)=0; \
|
||||
(result)=(resultType)((trie)->initialValue); \
|
||||
}
|
||||
|
||||
/** Internal next-post-increment: get the next code point (c, c2) and its data */
|
||||
#define _UTRIE_NEXT(trie, data, src, limit, c, c2, result) { \
|
||||
#define _UTRIE_NEXT(trie, data, src, limit, c, c2, result, resultType) { \
|
||||
(c)=*(src)++; \
|
||||
if(!UTF_IS_LEAD(c)) { \
|
||||
(c2)=0; \
|
||||
(result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
|
||||
} else if((src)!=(limit) && UTF_IS_TRAIL((c2)=*(src))) { \
|
||||
++(src); \
|
||||
_UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result)); \
|
||||
_UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result), resultType); \
|
||||
} else { \
|
||||
/* unpaired lead surrogate code point */ \
|
||||
(c2)=0; \
|
||||
|
@ -207,7 +209,7 @@ typedef struct UTrie UTrie;
|
|||
}
|
||||
|
||||
/** Internal previous: get the previous code point (c, c2) and its data */
|
||||
#define _UTRIE_PREVIOUS(trie, data, start, src, c, c2, result) { \
|
||||
#define _UTRIE_PREVIOUS(trie, data, start, src, c, c2, result, resultType) { \
|
||||
(c)=*--(src); \
|
||||
if(!UTF_IS_SURROGATE(c)) { \
|
||||
(c2)=0; \
|
||||
|
@ -217,7 +219,7 @@ typedef struct UTrie UTrie;
|
|||
if((start)!=(src) && UTF_IS_LEAD((c2)=*((src)-1))) { \
|
||||
--(src); \
|
||||
(result)=(c); (c)=(c2); (c2)=(UChar)(result); /* swap c, c2 */ \
|
||||
_UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result)); \
|
||||
_UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result), resultType); \
|
||||
} else { \
|
||||
/* unpaired trail surrogate code point */ \
|
||||
(c2)=0; \
|
||||
|
@ -305,7 +307,7 @@ typedef struct UTrie UTrie;
|
|||
* @param c32 (UChar32, in) the input code point
|
||||
* @param result (uint16_t, out) uint16_t variable for the trie lookup result
|
||||
*/
|
||||
#define UTRIE_GET16(trie, c32, result) _UTRIE_GET(trie, index, c32, result)
|
||||
#define UTRIE_GET16(trie, c32, result) _UTRIE_GET(trie, index, c32, result, uint16_t)
|
||||
|
||||
/**
|
||||
* Get a 32-bit trie value from a code point.
|
||||
|
@ -316,7 +318,7 @@ typedef struct UTrie UTrie;
|
|||
* @param c32 (UChar32, in) the input code point
|
||||
* @param result (uint32_t, out) uint32_t variable for the trie lookup result
|
||||
*/
|
||||
#define UTRIE_GET32(trie, c32, result) _UTRIE_GET(trie, data32, c32, result)
|
||||
#define UTRIE_GET32(trie, c32, result) _UTRIE_GET(trie, data32, c32, result, uint32_t)
|
||||
|
||||
/**
|
||||
* Get the next code point (c, c2), post-increment src,
|
||||
|
@ -329,7 +331,7 @@ typedef struct UTrie UTrie;
|
|||
* @param c2 (UChar, out) variable for 0 or the trail code unit
|
||||
* @param result (uint16_t, out) uint16_t variable for the trie lookup result
|
||||
*/
|
||||
#define UTRIE_NEXT16(trie, src, limit, c, c2, result) _UTRIE_NEXT(trie, index, src, limit, c, c2, result)
|
||||
#define UTRIE_NEXT16(trie, src, limit, c, c2, result) _UTRIE_NEXT(trie, index, src, limit, c, c2, result, uint16_t)
|
||||
|
||||
/**
|
||||
* Get the next code point (c, c2), post-increment src,
|
||||
|
@ -342,7 +344,7 @@ typedef struct UTrie UTrie;
|
|||
* @param c2 (UChar, out) variable for 0 or the trail code unit
|
||||
* @param result (uint32_t, out) uint32_t variable for the trie lookup result
|
||||
*/
|
||||
#define UTRIE_NEXT32(trie, src, limit, c, c2, result) _UTRIE_NEXT(trie, data32, src, limit, c, c2, result)
|
||||
#define UTRIE_NEXT32(trie, src, limit, c, c2, result) _UTRIE_NEXT(trie, data32, src, limit, c, c2, result, uint32_t)
|
||||
|
||||
/**
|
||||
* Get the previous code point (c, c2), pre-decrement src,
|
||||
|
@ -355,7 +357,7 @@ typedef struct UTrie UTrie;
|
|||
* @param c2 (UChar, out) variable for 0 or the trail code unit
|
||||
* @param result (uint16_t, out) uint16_t variable for the trie lookup result
|
||||
*/
|
||||
#define UTRIE_PREVIOUS16(trie, start, src, c, c2, result) _UTRIE_PREVIOUS(trie, index, start, src, c, c2, result)
|
||||
#define UTRIE_PREVIOUS16(trie, start, src, c, c2, result) _UTRIE_PREVIOUS(trie, index, start, src, c, c2, result, uint16_t)
|
||||
|
||||
/**
|
||||
* Get the previous code point (c, c2), pre-decrement src,
|
||||
|
@ -368,7 +370,7 @@ typedef struct UTrie UTrie;
|
|||
* @param c2 (UChar, out) variable for 0 or the trail code unit
|
||||
* @param result (uint32_t, out) uint32_t variable for the trie lookup result
|
||||
*/
|
||||
#define UTRIE_PREVIOUS32(trie, start, src, c, c2, result) _UTRIE_PREVIOUS(trie, data32, start, src, c, c2, result)
|
||||
#define UTRIE_PREVIOUS32(trie, start, src, c, c2, result) _UTRIE_PREVIOUS(trie, data32, start, src, c, c2, result, uint32_t)
|
||||
|
||||
/**
|
||||
* Get a 16-bit trie value from a pair of surrogates.
|
||||
|
@ -378,7 +380,7 @@ typedef struct UTrie UTrie;
|
|||
* @param c2 (UChar, in) a trail surrogate
|
||||
* @param result (uint16_t, out) uint16_t variable for the trie lookup result
|
||||
*/
|
||||
#define UTRIE_GET16_FROM_PAIR(trie, c, c2, result) _UTRIE_GET_FROM_PAIR(trie, index, c, c2, result)
|
||||
#define UTRIE_GET16_FROM_PAIR(trie, c, c2, result) _UTRIE_GET_FROM_PAIR(trie, index, c, c2, result, uint16_t)
|
||||
|
||||
/**
|
||||
* Get a 32-bit trie value from a pair of surrogates.
|
||||
|
@ -388,7 +390,7 @@ typedef struct UTrie UTrie;
|
|||
* @param c2 (UChar, in) a trail surrogate
|
||||
* @param result (uint32_t, out) uint32_t variable for the trie lookup result
|
||||
*/
|
||||
#define UTRIE_GET32_FROM_PAIR(trie, c, c2, result) _UTRIE_GET_FROM_PAIR(trie, data32, c, c2, result)
|
||||
#define UTRIE_GET32_FROM_PAIR(trie, c, c2, result) _UTRIE_GET_FROM_PAIR(trie, data32, c, c2, result, uint32_t)
|
||||
|
||||
/**
|
||||
* Get a 16-bit trie value from a folding offset (from the value of a lead surrogate)
|
||||
|
@ -530,7 +532,7 @@ UNewTrieGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset);
|
|||
* utrie_setRange32() is used, the data array could be large during build time.
|
||||
* The maximum length is
|
||||
* UTRIE_MAX_BUILD_TIME_DATA_LENGTH=0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400.
|
||||
* (Number of Unicode code points + one all-zero block +
|
||||
* (Number of Unicode code points + one all-initial-value block +
|
||||
* possible duplicate entries for 1024 lead surrogates.)
|
||||
* (UTRIE_DATA_BLOCK_LENGTH<=0x200 in all cases.)
|
||||
*
|
||||
|
@ -540,12 +542,15 @@ UNewTrieGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset);
|
|||
* NULL if one is to be allocated
|
||||
* @param maxDataLength the capacity of aliasData (if not NULL) or
|
||||
* the length of the data array to be allocated
|
||||
* @param initialValue the initial value that is set for all code points
|
||||
* @param latin1Linear a flag indicating whether the Latin-1 range is to be allocated and
|
||||
* kept in a linear, contiguous part of the data array
|
||||
* @return a pointer to the initialized fillIn or the allocated and initialized new UNewTrie
|
||||
*/
|
||||
U_CAPI UNewTrie * U_EXPORT2
|
||||
utrie_open(UNewTrie *fillIn, uint32_t *aliasData, int32_t maxDataLength, UBool latin1Linear);
|
||||
utrie_open(UNewTrie *fillIn,
|
||||
uint32_t *aliasData, int32_t maxDataLength,
|
||||
uint32_t initialValue, UBool latin1Linear);
|
||||
|
||||
/**
|
||||
* Clone a build-time trie structure with all entries.
|
||||
|
@ -600,7 +605,7 @@ utrie_set32(UNewTrie *trie, UChar32 c, uint32_t value);
|
|||
* @param c the code point
|
||||
* @param pInBlockZero if not NULL, then *pInBlockZero is set to TRUE
|
||||
* iff the value is retrieved from block 0;
|
||||
* block 0 is the all-zero initial block
|
||||
* block 0 is the all-initial-value initial block
|
||||
* @return the value
|
||||
*/
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
|
@ -615,7 +620,7 @@ utrie_get32(UNewTrie *trie, UChar32 c, UBool *pInBlockZero);
|
|||
* @param start the first code point to get the value
|
||||
* @param limit one past the last code point to get the value
|
||||
* @param value the value
|
||||
* @param overwrite flag for whether old non-zero values are to be overwritten
|
||||
* @param overwrite flag for whether old non-initial values are to be overwritten
|
||||
* @return FALSE if a failure occurred (illegal argument or data array overrun)
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
|
|
|
@ -193,7 +193,7 @@ testTrieIteration(const char *testName,
|
|||
if(offset>0) {
|
||||
value=UTRIE_GET16_FROM_OFFSET_TRAIL(trie, offset, c2);
|
||||
} else {
|
||||
value=0;
|
||||
value=trie->initialValue;
|
||||
}
|
||||
} else {
|
||||
value=UTRIE_GET32_FROM_LEAD(trie, c);
|
||||
|
@ -201,7 +201,7 @@ testTrieIteration(const char *testName,
|
|||
if(offset>0) {
|
||||
value=UTRIE_GET32_FROM_OFFSET_TRAIL(trie, offset, c2);
|
||||
} else {
|
||||
value=0;
|
||||
value=trie->initialValue;
|
||||
}
|
||||
}
|
||||
if(value!=values[i]) {
|
||||
|
@ -266,7 +266,7 @@ testTrieRanges(const char *testName,
|
|||
UBool overwrite, ok;
|
||||
|
||||
log_verbose("\ntesting Trie '%s'\n", testName);
|
||||
newTrie=utrie_open(NULL, NULL, 2000, latin1Linear);
|
||||
newTrie=utrie_open(NULL, NULL, 2000, checkRanges[0].value, latin1Linear);
|
||||
|
||||
/* set values from setRanges[] */
|
||||
ok=TRUE;
|
||||
|
@ -543,14 +543,38 @@ checkRanges2[]={
|
|||
0x110000, 0
|
||||
};
|
||||
|
||||
/* use a non-zero initial value */
|
||||
static const SetRange
|
||||
setRanges3[]={
|
||||
0x31, 0xa4, 1, FALSE,
|
||||
0x3400, 0x6789, 2, FALSE,
|
||||
0x30000,0x34567,9, TRUE,
|
||||
0x45678,0x56789,3, TRUE
|
||||
};
|
||||
|
||||
static const CheckRange
|
||||
checkRanges3[]={
|
||||
0, 9, /* dummy start range, also carries the initial value */
|
||||
0x31, 9,
|
||||
0xa4, 1,
|
||||
0x3400, 9,
|
||||
0x6789, 2,
|
||||
0x45678,9,
|
||||
0x56789,3,
|
||||
0x110000,9
|
||||
};
|
||||
|
||||
static void
|
||||
TrieTest() {
|
||||
testTrieRanges4("set1",
|
||||
setRanges1, ARRAY_LENGTH(setRanges1),
|
||||
checkRanges1, ARRAY_LENGTH(checkRanges1));
|
||||
testTrieRanges4("set2",
|
||||
testTrieRanges4("set2-overlap",
|
||||
setRanges2, ARRAY_LENGTH(setRanges2),
|
||||
checkRanges2, ARRAY_LENGTH(checkRanges2));
|
||||
testTrieRanges4("set3-initial-9",
|
||||
setRanges3, ARRAY_LENGTH(setRanges3),
|
||||
checkRanges3, ARRAY_LENGTH(checkRanges3));
|
||||
}
|
||||
|
||||
#if 1
|
||||
|
|
|
@ -310,7 +310,7 @@ setUnicodeVersion(const char *v) {
|
|||
|
||||
extern void
|
||||
initStore() {
|
||||
pTrie=utrie_open(NULL, NULL, MAX_PROPS_COUNT, FALSE);
|
||||
pTrie=utrie_open(NULL, NULL, MAX_PROPS_COUNT, 0, FALSE);
|
||||
if(pTrie==NULL) {
|
||||
fprintf(stderr, "error: unable to create a UNewTrie\n");
|
||||
exit(U_MEMORY_ALLOCATION_ERROR);
|
||||
|
@ -625,14 +625,20 @@ makeProps(Props *p) {
|
|||
|
||||
extern void
|
||||
addProps(uint32_t c, uint32_t x) {
|
||||
utrie_set32(pTrie, (UChar32)c, x);
|
||||
if(!utrie_set32(pTrie, (UChar32)c, x)) {
|
||||
fprintf(stderr, "error: too many entries for the properties trie\n");
|
||||
exit(U_BUFFER_OVERFLOW_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
/* areas of same properties ------------------------------------------------- */
|
||||
|
||||
extern void
|
||||
repeatProps(uint32_t first, uint32_t last, uint32_t x) {
|
||||
utrie_setRange32(pTrie, (UChar32)first, (UChar32)(last+1), x, FALSE);
|
||||
if(!utrie_setRange32(pTrie, (UChar32)first, (UChar32)(last+1), x, FALSE)) {
|
||||
fprintf(stderr, "error: too many entries for the properties trie\n");
|
||||
exit(U_BUFFER_OVERFLOW_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
/* compacting --------------------------------------------------------------- */
|
||||
|
@ -832,6 +838,8 @@ generateData(const char *dataDir) {
|
|||
dataLength, (unsigned long)size);
|
||||
exit(U_INTERNAL_PROGRAM_ERROR);
|
||||
}
|
||||
|
||||
utrie_close(pTrie);
|
||||
}
|
||||
|
||||
/* helpers ------------------------------------------------------------------ */
|
||||
|
|
Loading…
Add table
Reference in a new issue