mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-16 10:17:23 +00:00
ICU-6076 Use window failure testing instead of max size testing. This allows you to skip some hash table failures. Hash tables grow by powers of 2.
X-SVN-Rev: 23409
This commit is contained in:
parent
fb5e448868
commit
2bc412e598
8 changed files with 419 additions and 385 deletions
|
@ -7230,7 +7230,7 @@ inline void UCOL_INIT_CEBUF(ucol_CEBuf *b) {
|
|||
}
|
||||
|
||||
static
|
||||
void ucol_CEBuf_Expand(ucol_CEBuf *b, collIterate *ci) {
|
||||
void ucol_CEBuf_Expand(ucol_CEBuf *b, collIterate *ci, UErrorCode *status) {
|
||||
uint32_t oldSize;
|
||||
uint32_t newSize;
|
||||
uint32_t *newBuf;
|
||||
|
@ -7239,7 +7239,10 @@ void ucol_CEBuf_Expand(ucol_CEBuf *b, collIterate *ci) {
|
|||
oldSize = b->pos - b->buf;
|
||||
newSize = oldSize * 2;
|
||||
newBuf = (uint32_t *)uprv_malloc(newSize * sizeof(uint32_t));
|
||||
if(newBuf != NULL) {
|
||||
if(newBuf == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
else {
|
||||
uprv_memcpy(newBuf, b->buf, oldSize * sizeof(uint32_t));
|
||||
if (b->buf != b->localArray) {
|
||||
uprv_free(b->buf);
|
||||
|
@ -7251,11 +7254,13 @@ void ucol_CEBuf_Expand(ucol_CEBuf *b, collIterate *ci) {
|
|||
}
|
||||
|
||||
static
|
||||
inline void UCOL_CEBUF_PUT(ucol_CEBuf *b, uint32_t ce, collIterate *ci) {
|
||||
inline void UCOL_CEBUF_PUT(ucol_CEBuf *b, uint32_t ce, collIterate *ci, UErrorCode *status) {
|
||||
if (b->pos == b->endp) {
|
||||
ucol_CEBuf_Expand(b, ci);
|
||||
ucol_CEBuf_Expand(b, ci, status);
|
||||
}
|
||||
if (U_SUCCESS(*status)) {
|
||||
*(b)->pos++ = ce;
|
||||
}
|
||||
*(b)->pos++ = ce;
|
||||
}
|
||||
|
||||
/* This is a trick string compare function that goes in and uses sortkeys to compare */
|
||||
|
@ -7400,7 +7405,7 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
|
|||
// We get the next CE
|
||||
sOrder = ucol_IGetNextCE(coll, sColl, status);
|
||||
// Stuff it in the buffer
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl);
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
|
||||
// And keep just the primary part.
|
||||
sOrder &= UCOL_PRIMARYMASK;
|
||||
} while(sOrder == 0);
|
||||
|
@ -7408,7 +7413,7 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
|
|||
// see the comments on the above block
|
||||
do {
|
||||
tOrder = ucol_IGetNextCE(coll, tColl, status);
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl);
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
|
||||
tOrder &= UCOL_PRIMARYMASK;
|
||||
} while(tOrder == 0);
|
||||
|
||||
|
@ -7439,7 +7444,7 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
|
|||
for(;;) {
|
||||
sOrder = ucol_IGetNextCE(coll, sColl, status);
|
||||
if(sOrder == UCOL_NO_MORE_CES) {
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl);
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
|
||||
break;
|
||||
} else if(sOrder == 0 || (sInShifted && (sOrder & UCOL_PRIMARYMASK) == 0)) {
|
||||
/* UCA amendment - ignore ignorables that follow shifted code points */
|
||||
|
@ -7448,32 +7453,32 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
|
|||
if((sOrder & UCOL_PRIMARYMASK) > 0) { /* There is primary value */
|
||||
if(sInShifted) {
|
||||
sOrder = (sOrder & UCOL_PRIMARYMASK) | 0xC0; /* preserve interesting continuation */
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl);
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
|
||||
continue;
|
||||
} else {
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl);
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
|
||||
break;
|
||||
}
|
||||
} else { /* Just lower level values */
|
||||
if(sInShifted) {
|
||||
continue;
|
||||
} else {
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl);
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else { /* regular */
|
||||
if((sOrder & UCOL_PRIMARYMASK) > LVT) {
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl);
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
|
||||
break;
|
||||
} else {
|
||||
if((sOrder & UCOL_PRIMARYMASK) > 0) {
|
||||
sInShifted = TRUE;
|
||||
sOrder &= UCOL_PRIMARYMASK;
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl);
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
|
||||
continue;
|
||||
} else {
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl);
|
||||
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
|
||||
sInShifted = FALSE;
|
||||
continue;
|
||||
}
|
||||
|
@ -7486,7 +7491,7 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
|
|||
for(;;) {
|
||||
tOrder = ucol_IGetNextCE(coll, tColl, status);
|
||||
if(tOrder == UCOL_NO_MORE_CES) {
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl);
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
|
||||
break;
|
||||
} else if(tOrder == 0 || (tInShifted && (tOrder & UCOL_PRIMARYMASK) == 0)) {
|
||||
/* UCA amendment - ignore ignorables that follow shifted code points */
|
||||
|
@ -7495,32 +7500,32 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
|
|||
if((tOrder & UCOL_PRIMARYMASK) > 0) { /* There is primary value */
|
||||
if(tInShifted) {
|
||||
tOrder = (tOrder & UCOL_PRIMARYMASK) | 0xC0; /* preserve interesting continuation */
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl);
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
|
||||
continue;
|
||||
} else {
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl);
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
|
||||
break;
|
||||
}
|
||||
} else { /* Just lower level values */
|
||||
if(tInShifted) {
|
||||
continue;
|
||||
} else {
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl);
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else { /* regular */
|
||||
if((tOrder & UCOL_PRIMARYMASK) > LVT) {
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl);
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
|
||||
break;
|
||||
} else {
|
||||
if((tOrder & UCOL_PRIMARYMASK) > 0) {
|
||||
tInShifted = TRUE;
|
||||
tOrder &= UCOL_PRIMARYMASK;
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl);
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
|
||||
continue;
|
||||
} else {
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl);
|
||||
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
|
||||
tInShifted = FALSE;
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -159,6 +159,9 @@ uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollat
|
|||
uhash_setValueDeleter(t->prefixLookup, uhash_freeBlock);
|
||||
|
||||
t->contractions = uprv_cnttab_open(t->mapping, status);
|
||||
if (U_FAILURE(*status)) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* copy UCA's maxexpansion and merge as we go along */
|
||||
if (UCA != NULL) {
|
||||
|
@ -213,8 +216,9 @@ uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollat
|
|||
return t;
|
||||
|
||||
allocation_failure:
|
||||
uprv_uca_closeTempTable(t);
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
cleanup:
|
||||
uprv_uca_closeTempTable(t);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
|
@ -274,6 +274,8 @@ ucol_openRules( const UChar *rules,
|
|||
return 0;
|
||||
}
|
||||
|
||||
UCollator *result = NULL;
|
||||
UCATableHeader *table = NULL;
|
||||
UCollator *UCA = ucol_initUCA(status);
|
||||
|
||||
if(U_FAILURE(*status)){
|
||||
|
@ -294,11 +296,8 @@ ucol_openRules( const UChar *rules,
|
|||
fprintf(stderr, "invalid rule just before offset %i\n", src.current-src.source);
|
||||
}
|
||||
#endif
|
||||
ucol_tok_closeTokenList(&src);
|
||||
return NULL;
|
||||
goto cleanup;
|
||||
}
|
||||
UCollator *result = NULL;
|
||||
UCATableHeader *table = NULL;
|
||||
|
||||
if(src.resultLen > 0 || src.removeSet != NULL) { /* we have a set of rules, let's make something of it */
|
||||
/* also, if we wanted to remove some contractions, we should make a tailoring */
|
||||
|
@ -313,10 +312,8 @@ ucol_openRules( const UChar *rules,
|
|||
// set UCA version
|
||||
uprv_memcpy(table->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo));
|
||||
result = ucol_initCollator(table, 0, UCA, status);
|
||||
// Check for null result
|
||||
if (result == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
if (U_FAILURE(*status)) {
|
||||
goto cleanup;
|
||||
}
|
||||
result->hasRealData = TRUE;
|
||||
result->freeImageOnClose = TRUE;
|
||||
|
@ -326,9 +323,8 @@ ucol_openRules( const UChar *rules,
|
|||
// We will init the collator from UCA
|
||||
result = ucol_initCollator(UCA->image, 0, UCA, status);
|
||||
// Check for null result
|
||||
if (result == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
if (U_FAILURE(*status)) {
|
||||
goto cleanup;
|
||||
}
|
||||
// And set only the options
|
||||
UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
|
||||
|
|
|
@ -1435,6 +1435,9 @@ uint32_t ucol_tok_assembleTokenList(UColTokenParser *src, UParseError *parseErro
|
|||
// keep the flags around so that we know about before
|
||||
sourceToken->flags = src->parsedToken.flags;
|
||||
uhash_put(src->tailored, sourceToken, sourceToken, status);
|
||||
if(U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
/* we could have fished out a reset here */
|
||||
if(sourceToken->strength != UCOL_TOK_RESET && lastToken != sourceToken) {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
* Copyright (C) 2001-2006, International Business Machines
|
||||
* Copyright (C) 2001-2008, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
******************************************************************************
|
||||
*
|
||||
|
@ -37,134 +37,137 @@ ucol_openElements(const UCollator *coll,
|
|||
int32_t textLength,
|
||||
UErrorCode *status)
|
||||
{
|
||||
UCollationElements *result;
|
||||
UCollationElements *result;
|
||||
|
||||
if (U_FAILURE(*status)) {
|
||||
return NULL;
|
||||
}
|
||||
if (U_FAILURE(*status)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
result = (UCollationElements *)uprv_malloc(sizeof(UCollationElements));
|
||||
/* test for NULL */
|
||||
if (result == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
result = (UCollationElements *)uprv_malloc(sizeof(UCollationElements));
|
||||
/* test for NULL */
|
||||
if (result == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
result->reset_ = TRUE;
|
||||
result->isWritable = FALSE;
|
||||
result->reset_ = TRUE;
|
||||
result->isWritable = FALSE;
|
||||
|
||||
if (text == NULL) {
|
||||
textLength = 0;
|
||||
}
|
||||
uprv_init_collIterate(coll, text, textLength, &result->iteratordata_);
|
||||
if (text == NULL) {
|
||||
textLength = 0;
|
||||
}
|
||||
uprv_init_collIterate(coll, text, textLength, &result->iteratordata_);
|
||||
|
||||
return result;
|
||||
return result;
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucol_closeElements(UCollationElements *elems)
|
||||
{
|
||||
collIterate *ci = &elems->iteratordata_;
|
||||
if (ci->writableBuffer != ci->stackWritableBuffer) {
|
||||
uprv_free(ci->writableBuffer);
|
||||
}
|
||||
if (elems->isWritable && elems->iteratordata_.string != NULL)
|
||||
{
|
||||
uprv_free(elems->iteratordata_.string);
|
||||
}
|
||||
if (ci->extendCEs) {
|
||||
uprv_free(ci->extendCEs);
|
||||
}
|
||||
uprv_free(elems);
|
||||
if (elems != NULL) {
|
||||
collIterate *ci = &elems->iteratordata_;
|
||||
if (ci != NULL) {
|
||||
if (ci->writableBuffer != ci->stackWritableBuffer) {
|
||||
uprv_free(ci->writableBuffer);
|
||||
}
|
||||
if (ci->extendCEs) {
|
||||
uprv_free(ci->extendCEs);
|
||||
}
|
||||
}
|
||||
if (elems->isWritable && elems->iteratordata_.string != NULL)
|
||||
{
|
||||
uprv_free(elems->iteratordata_.string);
|
||||
}
|
||||
uprv_free(elems);
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucol_reset(UCollationElements *elems)
|
||||
{
|
||||
collIterate *ci = &(elems->iteratordata_);
|
||||
elems->reset_ = TRUE;
|
||||
ci->pos = ci->string;
|
||||
if ((ci->flags & UCOL_ITER_HASLEN) == 0 || ci->endp == NULL) {
|
||||
ci->endp = ci->string + u_strlen(ci->string);
|
||||
}
|
||||
ci->CEpos = ci->toReturn = ci->CEs;
|
||||
ci->flags = UCOL_ITER_HASLEN;
|
||||
if (ci->coll->normalizationMode == UCOL_ON) {
|
||||
ci->flags |= UCOL_ITER_NORM;
|
||||
}
|
||||
|
||||
if (ci->stackWritableBuffer != ci->writableBuffer) {
|
||||
uprv_free(ci->writableBuffer);
|
||||
ci->writableBuffer = ci->stackWritableBuffer;
|
||||
ci->writableBufSize = UCOL_WRITABLE_BUFFER_SIZE;
|
||||
}
|
||||
ci->fcdPosition = NULL;
|
||||
collIterate *ci = &(elems->iteratordata_);
|
||||
elems->reset_ = TRUE;
|
||||
ci->pos = ci->string;
|
||||
if ((ci->flags & UCOL_ITER_HASLEN) == 0 || ci->endp == NULL) {
|
||||
ci->endp = ci->string + u_strlen(ci->string);
|
||||
}
|
||||
ci->CEpos = ci->toReturn = ci->CEs;
|
||||
ci->flags = UCOL_ITER_HASLEN;
|
||||
if (ci->coll->normalizationMode == UCOL_ON) {
|
||||
ci->flags |= UCOL_ITER_NORM;
|
||||
}
|
||||
|
||||
if (ci->stackWritableBuffer != ci->writableBuffer) {
|
||||
uprv_free(ci->writableBuffer);
|
||||
ci->writableBuffer = ci->stackWritableBuffer;
|
||||
ci->writableBufSize = UCOL_WRITABLE_BUFFER_SIZE;
|
||||
}
|
||||
ci->fcdPosition = NULL;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucol_next(UCollationElements *elems,
|
||||
UErrorCode *status)
|
||||
{
|
||||
int32_t result;
|
||||
if (U_FAILURE(*status)) {
|
||||
return UCOL_NULLORDER;
|
||||
}
|
||||
int32_t result;
|
||||
if (U_FAILURE(*status)) {
|
||||
return UCOL_NULLORDER;
|
||||
}
|
||||
|
||||
elems->reset_ = FALSE;
|
||||
elems->reset_ = FALSE;
|
||||
|
||||
result = (int32_t)ucol_getNextCE(elems->iteratordata_.coll,
|
||||
&elems->iteratordata_,
|
||||
status);
|
||||
|
||||
if (result == UCOL_NO_MORE_CES) {
|
||||
result = UCOL_NULLORDER;
|
||||
}
|
||||
return result;
|
||||
result = (int32_t)ucol_getNextCE(elems->iteratordata_.coll,
|
||||
&elems->iteratordata_,
|
||||
status);
|
||||
|
||||
if (result == UCOL_NO_MORE_CES) {
|
||||
result = UCOL_NULLORDER;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucol_previous(UCollationElements *elems,
|
||||
UErrorCode *status)
|
||||
{
|
||||
if(U_FAILURE(*status)) {
|
||||
return UCOL_NULLORDER;
|
||||
}
|
||||
else
|
||||
{
|
||||
int32_t result;
|
||||
if(U_FAILURE(*status)) {
|
||||
return UCOL_NULLORDER;
|
||||
}
|
||||
else
|
||||
{
|
||||
int32_t result;
|
||||
|
||||
if (elems->reset_ &&
|
||||
(elems->iteratordata_.pos == elems->iteratordata_.string)) {
|
||||
if (elems->iteratordata_.endp == NULL) {
|
||||
elems->iteratordata_.endp = elems->iteratordata_.string +
|
||||
u_strlen(elems->iteratordata_.string);
|
||||
elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
|
||||
if (elems->reset_ && (elems->iteratordata_.pos == elems->iteratordata_.string)) {
|
||||
if (elems->iteratordata_.endp == NULL) {
|
||||
elems->iteratordata_.endp = elems->iteratordata_.string +
|
||||
u_strlen(elems->iteratordata_.string);
|
||||
elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
|
||||
}
|
||||
elems->iteratordata_.pos = elems->iteratordata_.endp;
|
||||
elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
|
||||
}
|
||||
elems->iteratordata_.pos = elems->iteratordata_.endp;
|
||||
elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
|
||||
|
||||
elems->reset_ = FALSE;
|
||||
|
||||
result = (int32_t)ucol_getPrevCE(elems->iteratordata_.coll,
|
||||
&(elems->iteratordata_),
|
||||
status);
|
||||
|
||||
if (result == UCOL_NO_MORE_CES) {
|
||||
result = UCOL_NULLORDER;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
elems->reset_ = FALSE;
|
||||
|
||||
result = (int32_t)ucol_getPrevCE(elems->iteratordata_.coll,
|
||||
&(elems->iteratordata_),
|
||||
status);
|
||||
|
||||
if (result == UCOL_NO_MORE_CES) {
|
||||
result = UCOL_NULLORDER;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucol_getMaxExpansion(const UCollationElements *elems,
|
||||
int32_t order)
|
||||
{
|
||||
uint8_t result;
|
||||
UCOL_GETMAXEXPANSION(elems->iteratordata_.coll, (uint32_t)order, result);
|
||||
return result;
|
||||
uint8_t result;
|
||||
UCOL_GETMAXEXPANSION(elems->iteratordata_.coll, (uint32_t)order, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
|
@ -173,44 +176,44 @@ ucol_setText( UCollationElements *elems,
|
|||
int32_t textLength,
|
||||
UErrorCode *status)
|
||||
{
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (elems->isWritable && elems->iteratordata_.string != NULL)
|
||||
{
|
||||
uprv_free(elems->iteratordata_.string);
|
||||
}
|
||||
|
||||
if (text == NULL) {
|
||||
textLength = 0;
|
||||
}
|
||||
if (elems->isWritable && elems->iteratordata_.string != NULL)
|
||||
{
|
||||
uprv_free(elems->iteratordata_.string);
|
||||
}
|
||||
|
||||
elems->isWritable = FALSE;
|
||||
uprv_init_collIterate(elems->iteratordata_.coll, text, textLength,
|
||||
&elems->iteratordata_);
|
||||
if (text == NULL) {
|
||||
textLength = 0;
|
||||
}
|
||||
|
||||
elems->reset_ = TRUE;
|
||||
elems->isWritable = FALSE;
|
||||
uprv_init_collIterate(elems->iteratordata_.coll, text, textLength,
|
||||
&elems->iteratordata_);
|
||||
|
||||
elems->reset_ = TRUE;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucol_getOffset(const UCollationElements *elems)
|
||||
{
|
||||
const collIterate *ci = &(elems->iteratordata_);
|
||||
// while processing characters in normalization buffer getOffset will
|
||||
// return the next non-normalized character.
|
||||
// should be inline with the old implementation since the old codes uses
|
||||
// nextDecomp in normalizer which also decomposes the string till the
|
||||
// first base character is found.
|
||||
if (ci->flags & UCOL_ITER_INNORMBUF) {
|
||||
if (ci->fcdPosition == NULL) {
|
||||
return 0;
|
||||
}
|
||||
return (int32_t)(ci->fcdPosition - ci->string);
|
||||
}
|
||||
else {
|
||||
return (int32_t)(ci->pos - ci->string);
|
||||
}
|
||||
const collIterate *ci = &(elems->iteratordata_);
|
||||
// while processing characters in normalization buffer getOffset will
|
||||
// return the next non-normalized character.
|
||||
// should be inline with the old implementation since the old codes uses
|
||||
// nextDecomp in normalizer which also decomposes the string till the
|
||||
// first base character is found.
|
||||
if (ci->flags & UCOL_ITER_INNORMBUF) {
|
||||
if (ci->fcdPosition == NULL) {
|
||||
return 0;
|
||||
}
|
||||
return (int32_t)(ci->fcdPosition - ci->string);
|
||||
}
|
||||
else {
|
||||
return (int32_t)(ci->pos - ci->string);
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
|
@ -218,44 +221,44 @@ ucol_setOffset(UCollationElements *elems,
|
|||
int32_t offset,
|
||||
UErrorCode *status)
|
||||
{
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
if (U_FAILURE(*status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// this methods will clean up any use of the writable buffer and points to
|
||||
// the original string
|
||||
collIterate *ci = &(elems->iteratordata_);
|
||||
ci->pos = ci->string + offset;
|
||||
ci->CEpos = ci->toReturn = ci->CEs;
|
||||
if (ci->flags & UCOL_ITER_INNORMBUF) {
|
||||
ci->flags = ci->origFlags;
|
||||
}
|
||||
if ((ci->flags & UCOL_ITER_HASLEN) == 0) {
|
||||
ci->endp = ci->string + u_strlen(ci->string);
|
||||
ci->flags |= UCOL_ITER_HASLEN;
|
||||
}
|
||||
ci->fcdPosition = NULL;
|
||||
elems->reset_ = FALSE;
|
||||
// this methods will clean up any use of the writable buffer and points to
|
||||
// the original string
|
||||
collIterate *ci = &(elems->iteratordata_);
|
||||
ci->pos = ci->string + offset;
|
||||
ci->CEpos = ci->toReturn = ci->CEs;
|
||||
if (ci->flags & UCOL_ITER_INNORMBUF) {
|
||||
ci->flags = ci->origFlags;
|
||||
}
|
||||
if ((ci->flags & UCOL_ITER_HASLEN) == 0) {
|
||||
ci->endp = ci->string + u_strlen(ci->string);
|
||||
ci->flags |= UCOL_ITER_HASLEN;
|
||||
}
|
||||
ci->fcdPosition = NULL;
|
||||
elems->reset_ = FALSE;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucol_primaryOrder (int32_t order)
|
||||
{
|
||||
order &= UCOL_PRIMARYMASK;
|
||||
return (order >> UCOL_PRIMARYORDERSHIFT);
|
||||
order &= UCOL_PRIMARYMASK;
|
||||
return (order >> UCOL_PRIMARYORDERSHIFT);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucol_secondaryOrder (int32_t order)
|
||||
{
|
||||
order &= UCOL_SECONDARYMASK;
|
||||
return (order >> UCOL_SECONDARYORDERSHIFT);
|
||||
order &= UCOL_SECONDARYMASK;
|
||||
return (order >> UCOL_SECONDARYORDERSHIFT);
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucol_tertiaryOrder (int32_t order)
|
||||
{
|
||||
return (order & UCOL_TERTIARYMASK);
|
||||
return (order & UCOL_TERTIARYMASK);
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_COLLATION */
|
||||
|
|
|
@ -2616,6 +2616,7 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator(
|
|||
result->ownCollator = FALSE;
|
||||
result->search->matchedLength = 0;
|
||||
result->search->matchedIndex = USEARCH_DONE;
|
||||
result->utilIter = NULL;
|
||||
result->textIter = ucol_openElements(collator, text,
|
||||
textlength, status);
|
||||
if (U_FAILURE(*status)) {
|
||||
|
@ -2623,8 +2624,6 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator(
|
|||
return NULL;
|
||||
}
|
||||
|
||||
result->utilIter = NULL;
|
||||
|
||||
result->search->isOverlap = FALSE;
|
||||
result->search->isCanonicalMatch = FALSE;
|
||||
result->search->isForwardSearching = TRUE;
|
||||
|
|
|
@ -1051,208 +1051,217 @@ typedef struct {
|
|||
static indirectBoundaries ucolIndirectBoundaries[15];
|
||||
static UBool indirectBoundariesSet = FALSE;
|
||||
static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
|
||||
|
||||
/* Set values for the top - TODO: once we have values for all the indirects, we are going */
|
||||
/* to initalize here. */
|
||||
ucolIndirectBoundaries[indexR].startCE = start[0];
|
||||
ucolIndirectBoundaries[indexR].startContCE = start[1];
|
||||
if(end) {
|
||||
ucolIndirectBoundaries[indexR].limitCE = end[0];
|
||||
ucolIndirectBoundaries[indexR].limitContCE = end[1];
|
||||
} else {
|
||||
ucolIndirectBoundaries[indexR].limitCE = 0;
|
||||
ucolIndirectBoundaries[indexR].limitContCE = 0;
|
||||
}
|
||||
/* Set values for the top - TODO: once we have values for all the indirects, we are going */
|
||||
/* to initalize here. */
|
||||
ucolIndirectBoundaries[indexR].startCE = start[0];
|
||||
ucolIndirectBoundaries[indexR].startContCE = start[1];
|
||||
if(end) {
|
||||
ucolIndirectBoundaries[indexR].limitCE = end[0];
|
||||
ucolIndirectBoundaries[indexR].limitContCE = end[1];
|
||||
} else {
|
||||
ucolIndirectBoundaries[indexR].limitCE = 0;
|
||||
ucolIndirectBoundaries[indexR].limitContCE = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void testCEs(UCollator *coll, UErrorCode *status) {
|
||||
const UChar *rules = NULL, *current = NULL;
|
||||
int32_t ruleLen = 0;
|
||||
|
||||
const UChar *rules = NULL, *current = NULL;
|
||||
int32_t ruleLen = 0;
|
||||
uint32_t strength = 0;
|
||||
uint32_t maxStrength = UCOL_IDENTICAL;
|
||||
uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
|
||||
uint32_t lastCE;
|
||||
uint32_t lastContCE;
|
||||
|
||||
uint32_t strength = 0;
|
||||
uint32_t maxStrength = UCOL_IDENTICAL;
|
||||
uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
|
||||
uint32_t lastCE;
|
||||
uint32_t lastContCE;
|
||||
int32_t result = 0;
|
||||
uint32_t chOffset = 0; uint32_t chLen = 0;
|
||||
uint32_t exOffset = 0; uint32_t exLen = 0;
|
||||
uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
|
||||
uint32_t oldOffset = 0;
|
||||
|
||||
int32_t result = 0;
|
||||
uint32_t chOffset = 0; uint32_t chLen = 0;
|
||||
uint32_t exOffset = 0; uint32_t exLen = 0;
|
||||
uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
|
||||
uint32_t oldOffset = 0;
|
||||
/* uint32_t rExpsLen = 0; */
|
||||
/* uint32_t firstLen = 0; */
|
||||
uint16_t specs = 0;
|
||||
UBool varT = FALSE; UBool top_ = TRUE;
|
||||
UBool startOfRules = TRUE;
|
||||
UBool before = FALSE;
|
||||
UColTokenParser src;
|
||||
UColOptionSet opts;
|
||||
UParseError parseError;
|
||||
UChar *rulesCopy = NULL;
|
||||
collIterate c;
|
||||
UCAConstants *consts = NULL;
|
||||
uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
|
||||
UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
|
||||
UCollator *UCA = ucol_open("root", status);
|
||||
|
||||
/* uint32_t rExpsLen = 0; */
|
||||
/* uint32_t firstLen = 0; */
|
||||
uint16_t specs = 0;
|
||||
UBool varT = FALSE; UBool top_ = TRUE;
|
||||
UBool startOfRules = TRUE;
|
||||
UBool before = FALSE;
|
||||
UColTokenParser src;
|
||||
UColOptionSet opts;
|
||||
UParseError parseError;
|
||||
UChar *rulesCopy = NULL;
|
||||
collIterate c;
|
||||
UCollator *UCA = ucol_open("root", status);
|
||||
UCAConstants *consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
|
||||
uint32_t UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0], /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1], */
|
||||
UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0], UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
|
||||
|
||||
baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
|
||||
|
||||
src.opts = &opts;
|
||||
|
||||
rules = ucol_getRules(coll, &ruleLen);
|
||||
|
||||
src.invUCA = ucol_initInverseUCA(status);
|
||||
|
||||
if(indirectBoundariesSet == FALSE) {
|
||||
/* UCOL_RESET_TOP_VALUE */
|
||||
setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
|
||||
/* UCOL_FIRST_PRIMARY_IGNORABLE */
|
||||
setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
|
||||
/* UCOL_LAST_PRIMARY_IGNORABLE */
|
||||
setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
|
||||
/* UCOL_FIRST_SECONDARY_IGNORABLE */
|
||||
setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
|
||||
/* UCOL_LAST_SECONDARY_IGNORABLE */
|
||||
setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
|
||||
/* UCOL_FIRST_TERTIARY_IGNORABLE */
|
||||
setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
|
||||
/* UCOL_LAST_TERTIARY_IGNORABLE */
|
||||
setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
|
||||
/* UCOL_FIRST_VARIABLE */
|
||||
setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
|
||||
/* UCOL_LAST_VARIABLE */
|
||||
setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
|
||||
/* UCOL_FIRST_NON_VARIABLE */
|
||||
setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
|
||||
/* UCOL_LAST_NON_VARIABLE */
|
||||
setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
|
||||
/* UCOL_FIRST_IMPLICIT */
|
||||
setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
|
||||
/* UCOL_LAST_IMPLICIT */
|
||||
setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
|
||||
/* UCOL_FIRST_TRAILING */
|
||||
setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
|
||||
/* UCOL_LAST_TRAILING */
|
||||
setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
|
||||
ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
|
||||
indirectBoundariesSet = TRUE;
|
||||
}
|
||||
|
||||
|
||||
if(U_SUCCESS(*status) && ruleLen > 0) {
|
||||
rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
|
||||
uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
|
||||
src.current = src.source = rulesCopy;
|
||||
src.end = rulesCopy+ruleLen;
|
||||
src.extraCurrent = src.end;
|
||||
src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
|
||||
|
||||
while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
|
||||
strength = src.parsedToken.strength;
|
||||
chOffset = src.parsedToken.charsOffset;
|
||||
chLen = src.parsedToken.charsLen;
|
||||
exOffset = src.parsedToken.extensionOffset;
|
||||
exLen = src.parsedToken.extensionLen;
|
||||
prefixOffset = src.parsedToken.prefixOffset;
|
||||
prefixLen = src.parsedToken.prefixLen;
|
||||
specs = src.parsedToken.flags;
|
||||
|
||||
startOfRules = FALSE;
|
||||
varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
|
||||
top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
|
||||
|
||||
uprv_init_collIterate(coll, rulesCopy+chOffset, chLen, &c);
|
||||
|
||||
currCE = ucol_getNextCE(coll, &c, status);
|
||||
if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(rulesCopy+chOffset))) {
|
||||
log_verbose("Thai prevowel detected. Will pick next CE\n");
|
||||
currCE = ucol_getNextCE(coll, &c, status);
|
||||
}
|
||||
|
||||
currContCE = ucol_getNextCE(coll, &c, status);
|
||||
if(!isContinuation(currContCE)) {
|
||||
currContCE = 0;
|
||||
}
|
||||
|
||||
/* we need to repack CEs here */
|
||||
|
||||
if(strength == UCOL_TOK_RESET) {
|
||||
before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
|
||||
if(top_ == TRUE) {
|
||||
int32_t index = src.parsedToken.indirectIndex;
|
||||
|
||||
nextCE = baseCE = currCE = ucolIndirectBoundaries[index].startCE;
|
||||
nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[index].startContCE;
|
||||
} else {
|
||||
nextCE = baseCE = currCE;
|
||||
nextContCE = baseContCE = currContCE;
|
||||
}
|
||||
maxStrength = UCOL_IDENTICAL;
|
||||
} else {
|
||||
if(strength < maxStrength) {
|
||||
maxStrength = strength;
|
||||
if(baseCE == UCOL_RESET_TOP_VALUE) {
|
||||
log_verbose("Resetting to [top]\n");
|
||||
nextCE = UCOL_NEXT_TOP_VALUE;
|
||||
nextContCE = UCOL_NEXT_TOP_CONT;
|
||||
} else {
|
||||
result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
|
||||
}
|
||||
if(result < 0) {
|
||||
if(ucol_isTailored(coll, *(rulesCopy+oldOffset), status)) {
|
||||
log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(rulesCopy+oldOffset));
|
||||
return;
|
||||
} else {
|
||||
log_err("couldn't find the CE\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
currCE &= 0xFFFFFF3F;
|
||||
currContCE &= 0xFFFFFFBF;
|
||||
|
||||
if(maxStrength == UCOL_IDENTICAL) {
|
||||
if(baseCE != currCE || baseContCE != currContCE) {
|
||||
log_err("current CE (initial strength UCOL_EQUAL)\n");
|
||||
}
|
||||
} else {
|
||||
if(strength == UCOL_IDENTICAL) {
|
||||
if(lastCE != currCE || lastContCE != currContCE) {
|
||||
log_err("current CE (initial strength UCOL_EQUAL)\n");
|
||||
}
|
||||
} else {
|
||||
if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
|
||||
/*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
|
||||
log_err("current CE is not less than base CE\n");
|
||||
}
|
||||
if(!before) {
|
||||
if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
|
||||
/*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
|
||||
log_err("sequence of generated CEs is broken\n");
|
||||
}
|
||||
} else {
|
||||
before = FALSE;
|
||||
if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
|
||||
/*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
|
||||
log_err("sequence of generated CEs is broken\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
oldOffset = chOffset;
|
||||
lastCE = currCE & 0xFFFFFF3F;
|
||||
lastContCE = currContCE & 0xFFFFFFBF;
|
||||
if (U_FAILURE(*status)) {
|
||||
log_err("Could not open root collator %s\n", u_errorName(*status));
|
||||
return;
|
||||
}
|
||||
free(rulesCopy);
|
||||
}
|
||||
ucol_close(UCA);
|
||||
|
||||
consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
|
||||
UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
|
||||
/*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
|
||||
UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
|
||||
UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
|
||||
|
||||
baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
|
||||
|
||||
src.opts = &opts;
|
||||
|
||||
rules = ucol_getRules(coll, &ruleLen);
|
||||
|
||||
src.invUCA = ucol_initInverseUCA(status);
|
||||
|
||||
if(indirectBoundariesSet == FALSE) {
|
||||
/* UCOL_RESET_TOP_VALUE */
|
||||
setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
|
||||
/* UCOL_FIRST_PRIMARY_IGNORABLE */
|
||||
setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
|
||||
/* UCOL_LAST_PRIMARY_IGNORABLE */
|
||||
setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
|
||||
/* UCOL_FIRST_SECONDARY_IGNORABLE */
|
||||
setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
|
||||
/* UCOL_LAST_SECONDARY_IGNORABLE */
|
||||
setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
|
||||
/* UCOL_FIRST_TERTIARY_IGNORABLE */
|
||||
setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
|
||||
/* UCOL_LAST_TERTIARY_IGNORABLE */
|
||||
setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
|
||||
/* UCOL_FIRST_VARIABLE */
|
||||
setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
|
||||
/* UCOL_LAST_VARIABLE */
|
||||
setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
|
||||
/* UCOL_FIRST_NON_VARIABLE */
|
||||
setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
|
||||
/* UCOL_LAST_NON_VARIABLE */
|
||||
setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
|
||||
/* UCOL_FIRST_IMPLICIT */
|
||||
setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
|
||||
/* UCOL_LAST_IMPLICIT */
|
||||
setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
|
||||
/* UCOL_FIRST_TRAILING */
|
||||
setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
|
||||
/* UCOL_LAST_TRAILING */
|
||||
setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
|
||||
ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
|
||||
indirectBoundariesSet = TRUE;
|
||||
}
|
||||
|
||||
|
||||
if(U_SUCCESS(*status) && ruleLen > 0) {
|
||||
rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
|
||||
uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
|
||||
src.current = src.source = rulesCopy;
|
||||
src.end = rulesCopy+ruleLen;
|
||||
src.extraCurrent = src.end;
|
||||
src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
|
||||
|
||||
while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
|
||||
strength = src.parsedToken.strength;
|
||||
chOffset = src.parsedToken.charsOffset;
|
||||
chLen = src.parsedToken.charsLen;
|
||||
exOffset = src.parsedToken.extensionOffset;
|
||||
exLen = src.parsedToken.extensionLen;
|
||||
prefixOffset = src.parsedToken.prefixOffset;
|
||||
prefixLen = src.parsedToken.prefixLen;
|
||||
specs = src.parsedToken.flags;
|
||||
|
||||
startOfRules = FALSE;
|
||||
varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
|
||||
top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
|
||||
|
||||
uprv_init_collIterate(coll, rulesCopy+chOffset, chLen, &c);
|
||||
|
||||
currCE = ucol_getNextCE(coll, &c, status);
|
||||
if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(rulesCopy+chOffset))) {
|
||||
log_verbose("Thai prevowel detected. Will pick next CE\n");
|
||||
currCE = ucol_getNextCE(coll, &c, status);
|
||||
}
|
||||
|
||||
currContCE = ucol_getNextCE(coll, &c, status);
|
||||
if(!isContinuation(currContCE)) {
|
||||
currContCE = 0;
|
||||
}
|
||||
|
||||
/* we need to repack CEs here */
|
||||
|
||||
if(strength == UCOL_TOK_RESET) {
|
||||
before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
|
||||
if(top_ == TRUE) {
|
||||
int32_t index = src.parsedToken.indirectIndex;
|
||||
|
||||
nextCE = baseCE = currCE = ucolIndirectBoundaries[index].startCE;
|
||||
nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[index].startContCE;
|
||||
} else {
|
||||
nextCE = baseCE = currCE;
|
||||
nextContCE = baseContCE = currContCE;
|
||||
}
|
||||
maxStrength = UCOL_IDENTICAL;
|
||||
} else {
|
||||
if(strength < maxStrength) {
|
||||
maxStrength = strength;
|
||||
if(baseCE == UCOL_RESET_TOP_VALUE) {
|
||||
log_verbose("Resetting to [top]\n");
|
||||
nextCE = UCOL_NEXT_TOP_VALUE;
|
||||
nextContCE = UCOL_NEXT_TOP_CONT;
|
||||
} else {
|
||||
result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
|
||||
}
|
||||
if(result < 0) {
|
||||
if(ucol_isTailored(coll, *(rulesCopy+oldOffset), status)) {
|
||||
log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(rulesCopy+oldOffset));
|
||||
return;
|
||||
} else {
|
||||
log_err("couldn't find the CE\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
currCE &= 0xFFFFFF3F;
|
||||
currContCE &= 0xFFFFFFBF;
|
||||
|
||||
if(maxStrength == UCOL_IDENTICAL) {
|
||||
if(baseCE != currCE || baseContCE != currContCE) {
|
||||
log_err("current CE (initial strength UCOL_EQUAL)\n");
|
||||
}
|
||||
} else {
|
||||
if(strength == UCOL_IDENTICAL) {
|
||||
if(lastCE != currCE || lastContCE != currContCE) {
|
||||
log_err("current CE (initial strength UCOL_EQUAL)\n");
|
||||
}
|
||||
} else {
|
||||
if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
|
||||
/*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
|
||||
log_err("current CE is not less than base CE\n");
|
||||
}
|
||||
if(!before) {
|
||||
if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
|
||||
/*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
|
||||
log_err("sequence of generated CEs is broken\n");
|
||||
}
|
||||
} else {
|
||||
before = FALSE;
|
||||
if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
|
||||
/*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
|
||||
log_err("sequence of generated CEs is broken\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
oldOffset = chOffset;
|
||||
lastCE = currCE & 0xFFFFFF3F;
|
||||
lastContCE = currContCE & 0xFFFFFFBF;
|
||||
}
|
||||
free(rulesCopy);
|
||||
}
|
||||
ucol_close(UCA);
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
@ -1608,6 +1617,10 @@ static void TestComposeDecompose(void) {
|
|||
return;
|
||||
}
|
||||
charsToTestSize = uset_size(charsToTest);
|
||||
if (charsToTestSize <= 0) {
|
||||
log_err("Set was zero. Missing data?\n");
|
||||
return;
|
||||
}
|
||||
t = malloc(charsToTestSize * sizeof(tester *));
|
||||
t[0] = (tester *)malloc(sizeof(tester));
|
||||
log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
********************************************************************************
|
||||
*
|
||||
* Copyright (C) 1996-2007, International Business Machines
|
||||
* Copyright (C) 1996-2008, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
********************************************************************************
|
||||
|
@ -89,7 +89,8 @@ int ERR_MSG =1; /* error messages will be displayed by default*/
|
|||
int QUICK = 1; /* Skip some of the slower tests? */
|
||||
int WARN_ON_MISSING_DATA = 0; /* Reduce data errs to warnings? */
|
||||
UTraceLevel ICU_TRACE = UTRACE_OFF; /* ICU tracing level */
|
||||
size_t MAX_MEMORY_ALLOCATION = (size_t)-1; /* Maximum library memory allocation allowed. */
|
||||
size_t MINIMUM_MEMORY_SIZE_FAILURE = (size_t)-1; /* Minimum library memory allocation window that will fail. */
|
||||
size_t MAXIMUM_MEMORY_SIZE_FAILURE = (size_t)-1; /* Maximum library memory allocation window that will fail. */
|
||||
int32_t ALLOCATION_COUNT = 0;
|
||||
/*-------------------------------------------*/
|
||||
|
||||
|
@ -557,7 +558,7 @@ static void *U_CALLCONV ctest_libMalloc(const void *context, size_t size) {
|
|||
/*if (VERBOSITY) {
|
||||
printf("Allocated %ld\n", (long)size);
|
||||
}*/
|
||||
if (size >= MAX_MEMORY_ALLOCATION) {
|
||||
if (MINIMUM_MEMORY_SIZE_FAILURE <= size && size <= MAXIMUM_MEMORY_SIZE_FAILURE) {
|
||||
return NULL;
|
||||
}
|
||||
umtx_atomic_inc(&ALLOCATION_COUNT);
|
||||
|
@ -567,7 +568,7 @@ static void *U_CALLCONV ctest_libRealloc(const void *context, void *mem, size_t
|
|||
/*if (VERBOSITY) {
|
||||
printf("Reallocated %ld\n", (long)size);
|
||||
}*/
|
||||
if (size >= MAX_MEMORY_ALLOCATION) {
|
||||
if (MINIMUM_MEMORY_SIZE_FAILURE <= size && size <= MAXIMUM_MEMORY_SIZE_FAILURE) {
|
||||
/*free(mem);*/ /* Realloc doesn't free on failure. */
|
||||
return NULL;
|
||||
}
|
||||
|
@ -628,15 +629,25 @@ initArgs( int argc, const char* const argv[])
|
|||
if (i+1 < argc) {
|
||||
char *endPtr = NULL;
|
||||
i++;
|
||||
MAX_MEMORY_ALLOCATION = (size_t)strtol(argv[i], &endPtr, 10);
|
||||
MINIMUM_MEMORY_SIZE_FAILURE = (size_t)strtol(argv[i], &endPtr, 10);
|
||||
if (endPtr == argv[i]) {
|
||||
printf("Can't parse %s\n", argv[i]);
|
||||
help( argv[0] );
|
||||
help(argv[0]);
|
||||
return 0;
|
||||
}
|
||||
if (*endPtr == '-') {
|
||||
char *maxPtr = endPtr+1;
|
||||
endPtr = NULL;
|
||||
MAXIMUM_MEMORY_SIZE_FAILURE = (size_t)strtol(maxPtr, &endPtr, 10);
|
||||
if (endPtr == argv[i]) {
|
||||
printf("Can't parse %s\n", argv[i]);
|
||||
help(argv[0]);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Use the default value */
|
||||
u_setMemoryFunctions(&MAX_MEMORY_ALLOCATION, ctest_libMalloc, ctest_libRealloc, ctest_libFree, &errorCode);
|
||||
u_setMemoryFunctions(NULL, ctest_libMalloc, ctest_libRealloc, ctest_libFree, &errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
printf("u_setMemoryFunctions returned %s\n", u_errorName(errorCode));
|
||||
return 0;
|
||||
|
@ -764,8 +775,8 @@ runTestRequest(const TestNode* root,
|
|||
static void help ( const char *argv0 )
|
||||
{
|
||||
printf("Usage: %s [ -l ] [ -v ] [ -verbose] [-a] [ -all] [-n] [ -no_err_msg]\n"
|
||||
" [ -h ] [-t_info | -t_error | -t_warn | -t_oc | -t_verbose]"
|
||||
" [ /path/to/test ]\n",
|
||||
" [ -h ] [-t_info | -t_error | -t_warn | -t_oc | -t_verbose] [-m n[-q] ]\n"
|
||||
" [ /path/to/test ]\n",
|
||||
argv0);
|
||||
printf(" -l To get a list of test names\n");
|
||||
printf(" -e to do exhaustive testing\n");
|
||||
|
@ -777,10 +788,10 @@ static void help ( const char *argv0 )
|
|||
" user has reduced/changed the common set of ICU data \n");
|
||||
printf(" -t_info | -t_error | -t_warn | -t_oc | -t_verbose Enable ICU tracing\n");
|
||||
printf(" -no_err_msg (same as -n) \n");
|
||||
printf(" -m n Maximum size of library allocation allowed.\n");
|
||||
printf(" The default is the maximum value of size_t\n");
|
||||
printf(" -r repeat tests after calling u_cleanup \n");
|
||||
printf(" -[/subtest] To run a subtest \n");
|
||||
printf(" -m n[-q] Min-Max memory size that will cause an allocation failure.\n");
|
||||
printf(" The default is the maximum value of size_t. Max is optional.\n");
|
||||
printf(" -r Repeat tests after calling u_cleanup \n");
|
||||
printf(" [/subtest] To run a subtest \n");
|
||||
printf(" eg: to run just the utility tests type: cintltest /tsutil) \n");
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue