ICU-1586 handle non-zero initial values

X-SVN-Rev: 7350
This commit is contained in:
Markus Scherer 2001-12-16 21:15:09 +00:00
parent a903b84867
commit e4b419efb7
4 changed files with 118 additions and 73 deletions

View file

@ -32,8 +32,11 @@
/* Building a trie ----------------------------------------------------------*/
U_CAPI UNewTrie * U_EXPORT2
utrie_open(UNewTrie *fillIn, uint32_t *aliasData, int32_t maxDataLength, UBool latin1Linear) {
utrie_open(UNewTrie *fillIn,
uint32_t *aliasData, int32_t maxDataLength,
uint32_t initialValue, UBool latin1Linear) {
UNewTrie *trie;
int32_t i, j;
if( maxDataLength<UTRIE_DATA_BLOCK_LENGTH ||
(latin1Linear && maxDataLength<1024)
@ -64,25 +67,26 @@ utrie_open(UNewTrie *fillIn, uint32_t *aliasData, int32_t maxDataLength, UBool l
trie->isDataAllocated=TRUE;
}
/* preallocate and reset the first data block (block index 0, all values 0) */
uprv_memset(trie->data, 0, 4*UTRIE_DATA_BLOCK_LENGTH);
trie->dataLength=UTRIE_DATA_BLOCK_LENGTH;
/* preallocate and reset the first data block (block index 0) */
j=UTRIE_DATA_BLOCK_LENGTH;
/* preallocate and reset Latin-1 (U+0000..U+00ff) after that if requested */
if(latin1Linear) {
int32_t i, j;
uprv_memset(trie->data, 0, 4096); /* made sure above that maxDataLength>=1024 */
/* preallocate and reset the first block (number 0) and Latin-1 (U+0000..U+00ff) after that */
/* made sure above that maxDataLength>=1024 */
/* set indexes to point to consecutive data blocks */
i=0;
j=UTRIE_DATA_BLOCK_LENGTH;
do {
/* do this at least for trie->index[0] even if that block is only partly used for Latin-1 */
trie->index[i++]=j;
j+=UTRIE_DATA_BLOCK_LENGTH;
} while(i<(256>>UTRIE_SHIFT));
trie->dataLength=j;
}
/* reset the initially allocated blocks to the initial value */
trie->dataLength=j;
while(j>0) {
trie->data[--j]=initialValue;
}
trie->indexLength=UTRIE_MAX_INDEX_LENGTH;
@ -114,7 +118,7 @@ utrie_clone(UNewTrie *fillIn, const UNewTrie *other, uint32_t *aliasData, int32_
isDataAllocated=TRUE;
}
trie=utrie_open(fillIn, aliasData, aliasDataCapacity, other->isLatin1Linear);
trie=utrie_open(fillIn, aliasData, aliasDataCapacity, other->data[0], other->isLatin1Linear);
if(trie==NULL) {
uprv_free(aliasData);
} else {
@ -176,12 +180,8 @@ utrie_getDataBlock(UNewTrie *trie, UChar32 c) {
trie->dataLength=newTop;
trie->index[c]=newBlock;
if(indexValue==0) {
uprv_memset(trie->data+newBlock, 0, 4*UTRIE_DATA_BLOCK_LENGTH);
} else /* indexValue<0 */ {
/* copy-on-write for a block from a setRange() */
uprv_memcpy(trie->data+newBlock, trie->data-indexValue, 4*UTRIE_DATA_BLOCK_LENGTH);
}
/* copy-on-write for a block from a setRange() */
uprv_memcpy(trie->data+newBlock, trie->data-indexValue, 4*UTRIE_DATA_BLOCK_LENGTH);
return newBlock;
}
@ -230,7 +230,8 @@ utrie_get32(UNewTrie *trie, UChar32 c, UBool *pInBlockZero) {
* @internal
*/
static void
utrie_fillBlock(uint32_t *block, UChar32 start, UChar32 limit, uint32_t value, UBool overwrite) {
utrie_fillBlock(uint32_t *block, UChar32 start, UChar32 limit,
uint32_t value, uint32_t initialValue, UBool overwrite) {
uint32_t *pLimit;
pLimit=block+limit;
@ -241,7 +242,7 @@ utrie_fillBlock(uint32_t *block, UChar32 start, UChar32 limit, uint32_t value, U
}
} else {
while(block<pLimit) {
if(*block==0) {
if(*block==initialValue) {
*block=value;
}
++block;
@ -256,6 +257,7 @@ utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, U
* mark index values for repeat-data blocks by setting bit 31 of the index values
* fill around existing values if any, if(overwrite)
*/
uint32_t initialValue;
int32_t block, rest, repeatBlock;
/* valid, uncompacted trie and valid indexes? */
@ -268,6 +270,7 @@ utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, U
return TRUE; /* nothing to do */
}
initialValue=trie->data[0];
if(start&UTRIE_MASK) {
UChar32 nextStart;
@ -279,10 +282,12 @@ utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, U
nextStart=(start+UTRIE_DATA_BLOCK_LENGTH)&~UTRIE_MASK;
if(nextStart<=limit) {
utrie_fillBlock(trie->data+block, start&UTRIE_MASK, UTRIE_DATA_BLOCK_LENGTH, value, overwrite);
utrie_fillBlock(trie->data+block, start&UTRIE_MASK, UTRIE_DATA_BLOCK_LENGTH,
value, initialValue, overwrite);
start=nextStart;
} else {
utrie_fillBlock(trie->data+block, start&UTRIE_MASK, limit&UTRIE_MASK, value, overwrite);
utrie_fillBlock(trie->data+block, start&UTRIE_MASK, limit&UTRIE_MASK,
value, initialValue, overwrite);
return TRUE;
}
}
@ -294,7 +299,7 @@ utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, U
limit&=~UTRIE_MASK;
/* iterate over all-value blocks */
if(value==0) {
if(value==initialValue) {
repeatBlock=0;
} else {
repeatBlock=-1;
@ -304,7 +309,7 @@ utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, U
block=trie->index[start>>UTRIE_SHIFT];
if(block>0) {
/* already allocated, fill in value */
utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, value, overwrite);
utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, overwrite);
} else if(trie->data[-block]!=value && (block==0 || overwrite)) {
/* set the repeatBlock instead of the current block 0 or range block */
if(repeatBlock>=0) {
@ -318,7 +323,7 @@ utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, U
/* set the negative block number to indicate that it is a repeat block */
trie->index[start>>UTRIE_SHIFT]=-repeatBlock;
utrie_fillBlock(trie->data+repeatBlock, 0, UTRIE_DATA_BLOCK_LENGTH, value, TRUE);
utrie_fillBlock(trie->data+repeatBlock, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, TRUE);
}
}
@ -332,7 +337,7 @@ utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, U
return FALSE;
}
utrie_fillBlock(trie->data+block, 0, rest, value, overwrite);
utrie_fillBlock(trie->data+block, 0, rest, value, initialValue, overwrite);
}
return TRUE;
@ -476,7 +481,7 @@ utrie_fold(UNewTrie *trie, UNewTrieGetFoldedValue *getFoldedValue, UErrorCode *p
* Compact a folded build-time trie.
*
* The compaction
* - removes all-zero blocks
* - removes all-initial-value blocks
* - maps all blocks that are completely filled with the same values to only of them
* - overlaps adjacent blocks as much as possible
*
@ -513,11 +518,12 @@ utrie_compact(UNewTrie *trie, UErrorCode *pErrorCode) {
/* compaction */
/* never move the all-zero block */
/* never move the all-initial-value block 0 */
trie->map[0]=0;
/* prime the whole blocks lookup table with the all-zero block */
wholeBlockValues[0]=wholeBlockIndexes[0]=0;
/* prime the whole blocks lookup table with the all-initial-value block 0 */
wholeBlockValues[0]=trie->data[0];
wholeBlockIndexes[0]=0;
countWholeBlocks=1;
/* if Latin-1 is preallocated and linear, then do not compact Latin-1 data */
@ -842,6 +848,7 @@ utrie_unserialize(UTrie *trie, const uint8_t *data, int32_t length, UErrorCode *
return -1;
}
trie->data32=(const uint32_t *)p16;
trie->initialValue=trie->data32[0];
return sizeof(UTrieHeader)+2*trie->indexLength+4*trie->dataLength;
} else {
if(length<2*trie->dataLength) {
@ -851,6 +858,7 @@ utrie_unserialize(UTrie *trie, const uint8_t *data, int32_t length, UErrorCode *
/* the "data16" data is used via the index pointer */
trie->data32=NULL;
trie->initialValue=trie->index[trie->indexLength];
return sizeof(UTrieHeader)+2*trie->indexLength+2*trie->dataLength;
}
}
@ -873,7 +881,7 @@ utrie_enum(UTrie *trie,
const uint32_t *data32;
const uint16_t *index;
uint32_t value, prevValue, noValue;
uint32_t value, prevValue, initialValue;
UChar32 c, prev;
int32_t l, i, j, block, prevBlock, offset;
@ -888,13 +896,13 @@ utrie_enum(UTrie *trie,
index=trie->index;
data32=trie->data32;
/* get the non-value that corresponds to a trie data entry of 0 */
noValue=enumValue(context, 0);
/* get the enumeration value that corresponds to an initial-value trie data entry */
initialValue=enumValue(context, trie->initialValue);
/* set variables for previous range */
prevBlock=0;
prev=0;
prevValue=noValue;
prevValue=initialValue;
/* enumerate BMP - the main loop enumerates data blocks */
for(i=0, c=0; c<=0xffff; ++i) {
@ -911,20 +919,20 @@ utrie_enum(UTrie *trie,
/* the block is the same as the previous one, and filled with value */
c+=UTRIE_DATA_BLOCK_LENGTH;
} else if(block==0) {
/* this is the all-zero block */
if(prevValue!=noValue) {
/* this is the all-initial-value block */
if(prevValue!=initialValue) {
if(prev<c) {
enumRange(context, prev, c, prevValue);
}
prevBlock=0;
prev=c;
prevValue=noValue;
prevValue=initialValue;
}
c+=UTRIE_DATA_BLOCK_LENGTH;
} else {
prevBlock=block;
for(j=0; j<UTRIE_DATA_BLOCK_LENGTH; ++j) {
value= data32!=NULL ? enumValue(context, data32[block+j]) : enumValue(context, index[block+j]);
value=enumValue(context, data32!=NULL ? data32[block+j] : index[block+j]);
if(value!=prevValue) {
if(prev<c) {
enumRange(context, prev, c, prevValue);
@ -966,13 +974,13 @@ utrie_enum(UTrie *trie,
offset=trie->getFoldingOffset(value);
if(offset<=0) {
/* no data for this lead surrogate */
if(prevValue!=noValue) {
if(prevValue!=initialValue) {
if(prev<c) {
enumRange(context, prev, c, prevValue);
}
prevBlock=0;
prev=c;
prevValue=noValue;
prevValue=initialValue;
}
/* nothing else to do for the supplementary code points for this lead surrogate */
@ -988,20 +996,20 @@ utrie_enum(UTrie *trie,
/* the block is the same as the previous one, and filled with value */
c+=UTRIE_DATA_BLOCK_LENGTH;
} else if(block==0) {
/* this is the all-zero block */
if(prevValue!=noValue) {
/* this is the all-initial-value block */
if(prevValue!=initialValue) {
if(prev<c) {
enumRange(context, prev, c, prevValue);
}
prevBlock=0;
prev=c;
prevValue=noValue;
prevValue=initialValue;
}
c+=UTRIE_DATA_BLOCK_LENGTH;
} else {
prevBlock=block;
for(j=0; j<UTRIE_DATA_BLOCK_LENGTH; ++j) {
value= data32!=NULL ? enumValue(context, data32[block+j]) : enumValue(context, index[block+j]);
value=enumValue(context, data32!=NULL ? data32[block+j] : index[block+j]);
if(value!=prevValue) {
if(prev<c) {
enumRange(context, prev, c, prevValue);

View file

@ -40,7 +40,7 @@ U_CDECL_BEGIN
* From such a folded value, an offset needs to be extracted to supply
* to the _FROM_OFFSET_TRAIL() macros.
*
* Most of the more complex (and more convenient) functions call a callback function
* Most of the more complex (and more convenient) functions/macros call a callback function
* to get that offset from the folded value for a lead surrogate unit.
*/
@ -104,7 +104,7 @@ enum {
/**
* Maximum length of the build-time data (stage 2) array.
* The maximum length is 0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400.
* (Number of Unicode code points + one all-zero block +
* (Number of Unicode code points + one all-initial-value block +
* possible duplicate entries for 1024 lead surrogates.)
*/
#define UTRIE_MAX_BUILD_TIME_DATA_LENGTH (0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400)
@ -114,6 +114,7 @@ enum {
* Extract from a lead surrogate's data the
* index array offset of the indexes for that lead surrogate.
*
* @param data data value for a surrogate from the trie, including the folding offset
* @return offset>=UTRIE_BMP_INDEX_LENGTH, or 0 if there is no data for the lead surrogate
*/
typedef int32_t U_CALLCONV
@ -140,6 +141,7 @@ struct UTrie {
UTrieGetFoldingOffset *getFoldingOffset;
int32_t indexLength, dataLength;
uint32_t initialValue;
UBool isLatin1Linear;
};
@ -153,7 +155,7 @@ typedef struct UTrie UTrie;
]
/** Internal trie getter from a pair of surrogates */
#define _UTRIE_GET_FROM_PAIR(trie, data, c, c2, result) { \
#define _UTRIE_GET_FROM_PAIR(trie, data, c, c2, result, resultType) { \
int32_t __offset; \
\
/* get data for lead surrogate */ \
@ -164,7 +166,7 @@ typedef struct UTrie UTrie;
if(__offset>0) { \
(result)=_UTRIE_GET_RAW((trie), data, __offset, (c2)&0x3ff); \
} else { \
(result)=0; \
(result)=(resultType)((trie)->initialValue); \
} \
}
@ -177,28 +179,28 @@ typedef struct UTrie UTrie;
* Could be faster(?) but longer with
* if((c32)<=0xd7ff) { (result)=_UTRIE_GET_RAW(trie, data, 0, c32); }
*/
#define _UTRIE_GET(trie, data, c32, result) \
#define _UTRIE_GET(trie, data, c32, result, resultType) \
if((uint32_t)(c32)<=0xffff) { \
/* BMP code points */ \
(result)=_UTRIE_GET_FROM_BMP(trie, data, c32); \
} else if((uint32_t)(c32)<=0x10ffff) { \
/* supplementary code point */ \
UChar __lead16=UTF16_LEAD(c32); \
_UTRIE_GET_FROM_PAIR(trie, data, __lead16, c32, result); \
_UTRIE_GET_FROM_PAIR(trie, data, __lead16, c32, result, resultType); \
} else { \
/* out of range */ \
(result)=0; \
(result)=(resultType)((trie)->initialValue); \
}
/** Internal next-post-increment: get the next code point (c, c2) and its data */
#define _UTRIE_NEXT(trie, data, src, limit, c, c2, result) { \
#define _UTRIE_NEXT(trie, data, src, limit, c, c2, result, resultType) { \
(c)=*(src)++; \
if(!UTF_IS_LEAD(c)) { \
(c2)=0; \
(result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
} else if((src)!=(limit) && UTF_IS_TRAIL((c2)=*(src))) { \
++(src); \
_UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result)); \
_UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result), resultType); \
} else { \
/* unpaired lead surrogate code point */ \
(c2)=0; \
@ -207,7 +209,7 @@ typedef struct UTrie UTrie;
}
/** Internal previous: get the previous code point (c, c2) and its data */
#define _UTRIE_PREVIOUS(trie, data, start, src, c, c2, result) { \
#define _UTRIE_PREVIOUS(trie, data, start, src, c, c2, result, resultType) { \
(c)=*--(src); \
if(!UTF_IS_SURROGATE(c)) { \
(c2)=0; \
@ -217,7 +219,7 @@ typedef struct UTrie UTrie;
if((start)!=(src) && UTF_IS_LEAD((c2)=*((src)-1))) { \
--(src); \
(result)=(c); (c)=(c2); (c2)=(UChar)(result); /* swap c, c2 */ \
_UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result)); \
_UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result), resultType); \
} else { \
/* unpaired trail surrogate code point */ \
(c2)=0; \
@ -305,7 +307,7 @@ typedef struct UTrie UTrie;
* @param c32 (UChar32, in) the input code point
* @param result (uint16_t, out) uint16_t variable for the trie lookup result
*/
#define UTRIE_GET16(trie, c32, result) _UTRIE_GET(trie, index, c32, result)
#define UTRIE_GET16(trie, c32, result) _UTRIE_GET(trie, index, c32, result, uint16_t)
/**
* Get a 32-bit trie value from a code point.
@ -316,7 +318,7 @@ typedef struct UTrie UTrie;
* @param c32 (UChar32, in) the input code point
* @param result (uint32_t, out) uint32_t variable for the trie lookup result
*/
#define UTRIE_GET32(trie, c32, result) _UTRIE_GET(trie, data32, c32, result)
#define UTRIE_GET32(trie, c32, result) _UTRIE_GET(trie, data32, c32, result, uint32_t)
/**
* Get the next code point (c, c2), post-increment src,
@ -329,7 +331,7 @@ typedef struct UTrie UTrie;
* @param c2 (UChar, out) variable for 0 or the trail code unit
* @param result (uint16_t, out) uint16_t variable for the trie lookup result
*/
#define UTRIE_NEXT16(trie, src, limit, c, c2, result) _UTRIE_NEXT(trie, index, src, limit, c, c2, result)
#define UTRIE_NEXT16(trie, src, limit, c, c2, result) _UTRIE_NEXT(trie, index, src, limit, c, c2, result, uint16_t)
/**
* Get the next code point (c, c2), post-increment src,
@ -342,7 +344,7 @@ typedef struct UTrie UTrie;
* @param c2 (UChar, out) variable for 0 or the trail code unit
* @param result (uint32_t, out) uint32_t variable for the trie lookup result
*/
#define UTRIE_NEXT32(trie, src, limit, c, c2, result) _UTRIE_NEXT(trie, data32, src, limit, c, c2, result)
#define UTRIE_NEXT32(trie, src, limit, c, c2, result) _UTRIE_NEXT(trie, data32, src, limit, c, c2, result, uint32_t)
/**
* Get the previous code point (c, c2), pre-decrement src,
@ -355,7 +357,7 @@ typedef struct UTrie UTrie;
* @param c2 (UChar, out) variable for 0 or the trail code unit
* @param result (uint16_t, out) uint16_t variable for the trie lookup result
*/
#define UTRIE_PREVIOUS16(trie, start, src, c, c2, result) _UTRIE_PREVIOUS(trie, index, start, src, c, c2, result)
#define UTRIE_PREVIOUS16(trie, start, src, c, c2, result) _UTRIE_PREVIOUS(trie, index, start, src, c, c2, result, uint16_t)
/**
* Get the previous code point (c, c2), pre-decrement src,
@ -368,7 +370,7 @@ typedef struct UTrie UTrie;
* @param c2 (UChar, out) variable for 0 or the trail code unit
* @param result (uint32_t, out) uint32_t variable for the trie lookup result
*/
#define UTRIE_PREVIOUS32(trie, start, src, c, c2, result) _UTRIE_PREVIOUS(trie, data32, start, src, c, c2, result)
#define UTRIE_PREVIOUS32(trie, start, src, c, c2, result) _UTRIE_PREVIOUS(trie, data32, start, src, c, c2, result, uint32_t)
/**
* Get a 16-bit trie value from a pair of surrogates.
@ -378,7 +380,7 @@ typedef struct UTrie UTrie;
* @param c2 (UChar, in) a trail surrogate
* @param result (uint16_t, out) uint16_t variable for the trie lookup result
*/
#define UTRIE_GET16_FROM_PAIR(trie, c, c2, result) _UTRIE_GET_FROM_PAIR(trie, index, c, c2, result)
#define UTRIE_GET16_FROM_PAIR(trie, c, c2, result) _UTRIE_GET_FROM_PAIR(trie, index, c, c2, result, uint16_t)
/**
* Get a 32-bit trie value from a pair of surrogates.
@ -388,7 +390,7 @@ typedef struct UTrie UTrie;
* @param c2 (UChar, in) a trail surrogate
* @param result (uint32_t, out) uint32_t variable for the trie lookup result
*/
#define UTRIE_GET32_FROM_PAIR(trie, c, c2, result) _UTRIE_GET_FROM_PAIR(trie, data32, c, c2, result)
#define UTRIE_GET32_FROM_PAIR(trie, c, c2, result) _UTRIE_GET_FROM_PAIR(trie, data32, c, c2, result, uint32_t)
/**
* Get a 16-bit trie value from a folding offset (from the value of a lead surrogate)
@ -530,7 +532,7 @@ UNewTrieGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset);
* utrie_setRange32() is used, the data array could be large during build time.
* The maximum length is
* UTRIE_MAX_BUILD_TIME_DATA_LENGTH=0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400.
* (Number of Unicode code points + one all-zero block +
* (Number of Unicode code points + one all-initial-value block +
* possible duplicate entries for 1024 lead surrogates.)
* (UTRIE_DATA_BLOCK_LENGTH<=0x200 in all cases.)
*
@ -540,12 +542,15 @@ UNewTrieGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset);
* NULL if one is to be allocated
* @param maxDataLength the capacity of aliasData (if not NULL) or
* the length of the data array to be allocated
* @param initialValue the initial value that is set for all code points
* @param latin1Linear a flag indicating whether the Latin-1 range is to be allocated and
* kept in a linear, contiguous part of the data array
* @return a pointer to the initialized fillIn or the allocated and initialized new UNewTrie
*/
U_CAPI UNewTrie * U_EXPORT2
utrie_open(UNewTrie *fillIn, uint32_t *aliasData, int32_t maxDataLength, UBool latin1Linear);
utrie_open(UNewTrie *fillIn,
uint32_t *aliasData, int32_t maxDataLength,
uint32_t initialValue, UBool latin1Linear);
/**
* Clone a build-time trie structure with all entries.
@ -600,7 +605,7 @@ utrie_set32(UNewTrie *trie, UChar32 c, uint32_t value);
* @param c the code point
* @param pInBlockZero if not NULL, then *pInBlockZero is set to TRUE
* iff the value is retrieved from block 0;
* block 0 is the all-zero initial block
* block 0 is the all-initial-value initial block
* @return the value
*/
U_CAPI uint32_t U_EXPORT2
@ -615,7 +620,7 @@ utrie_get32(UNewTrie *trie, UChar32 c, UBool *pInBlockZero);
* @param start the first code point to get the value
* @param limit one past the last code point to get the value
* @param value the value
* @param overwrite flag for whether old non-zero values are to be overwritten
* @param overwrite flag for whether old non-initial values are to be overwritten
* @return FALSE if a failure occurred (illegal argument or data array overrun)
*/
U_CAPI UBool U_EXPORT2

View file

@ -193,7 +193,7 @@ testTrieIteration(const char *testName,
if(offset>0) {
value=UTRIE_GET16_FROM_OFFSET_TRAIL(trie, offset, c2);
} else {
value=0;
value=trie->initialValue;
}
} else {
value=UTRIE_GET32_FROM_LEAD(trie, c);
@ -201,7 +201,7 @@ testTrieIteration(const char *testName,
if(offset>0) {
value=UTRIE_GET32_FROM_OFFSET_TRAIL(trie, offset, c2);
} else {
value=0;
value=trie->initialValue;
}
}
if(value!=values[i]) {
@ -266,7 +266,7 @@ testTrieRanges(const char *testName,
UBool overwrite, ok;
log_verbose("\ntesting Trie '%s'\n", testName);
newTrie=utrie_open(NULL, NULL, 2000, latin1Linear);
newTrie=utrie_open(NULL, NULL, 2000, checkRanges[0].value, latin1Linear);
/* set values from setRanges[] */
ok=TRUE;
@ -543,14 +543,38 @@ checkRanges2[]={
0x110000, 0
};
/* use a non-zero initial value */
static const SetRange
setRanges3[]={
0x31, 0xa4, 1, FALSE,
0x3400, 0x6789, 2, FALSE,
0x30000,0x34567,9, TRUE,
0x45678,0x56789,3, TRUE
};
static const CheckRange
checkRanges3[]={
0, 9, /* dummy start range, also carries the initial value */
0x31, 9,
0xa4, 1,
0x3400, 9,
0x6789, 2,
0x45678,9,
0x56789,3,
0x110000,9
};
static void
TrieTest() {
testTrieRanges4("set1",
setRanges1, ARRAY_LENGTH(setRanges1),
checkRanges1, ARRAY_LENGTH(checkRanges1));
testTrieRanges4("set2",
testTrieRanges4("set2-overlap",
setRanges2, ARRAY_LENGTH(setRanges2),
checkRanges2, ARRAY_LENGTH(checkRanges2));
testTrieRanges4("set3-initial-9",
setRanges3, ARRAY_LENGTH(setRanges3),
checkRanges3, ARRAY_LENGTH(checkRanges3));
}
#if 1

View file

@ -310,7 +310,7 @@ setUnicodeVersion(const char *v) {
extern void
initStore() {
pTrie=utrie_open(NULL, NULL, MAX_PROPS_COUNT, FALSE);
pTrie=utrie_open(NULL, NULL, MAX_PROPS_COUNT, 0, FALSE);
if(pTrie==NULL) {
fprintf(stderr, "error: unable to create a UNewTrie\n");
exit(U_MEMORY_ALLOCATION_ERROR);
@ -625,14 +625,20 @@ makeProps(Props *p) {
extern void
addProps(uint32_t c, uint32_t x) {
utrie_set32(pTrie, (UChar32)c, x);
if(!utrie_set32(pTrie, (UChar32)c, x)) {
fprintf(stderr, "error: too many entries for the properties trie\n");
exit(U_BUFFER_OVERFLOW_ERROR);
}
}
/* areas of same properties ------------------------------------------------- */
extern void
repeatProps(uint32_t first, uint32_t last, uint32_t x) {
utrie_setRange32(pTrie, (UChar32)first, (UChar32)(last+1), x, FALSE);
if(!utrie_setRange32(pTrie, (UChar32)first, (UChar32)(last+1), x, FALSE)) {
fprintf(stderr, "error: too many entries for the properties trie\n");
exit(U_BUFFER_OVERFLOW_ERROR);
}
}
/* compacting --------------------------------------------------------------- */
@ -832,6 +838,8 @@ generateData(const char *dataDir) {
dataLength, (unsigned long)size);
exit(U_INTERNAL_PROGRAM_ERROR);
}
utrie_close(pTrie);
}
/* helpers ------------------------------------------------------------------ */