ICU-6076 Use window failure testing instead of max size testing. This allows you to skip some hash table failures. Hash tables grow by powers of 2.

X-SVN-Rev: 23409
This commit is contained in:
George Rhoten 2008-02-10 20:17:14 +00:00
parent fb5e448868
commit 2bc412e598
8 changed files with 419 additions and 385 deletions

View file

@ -7230,7 +7230,7 @@ inline void UCOL_INIT_CEBUF(ucol_CEBuf *b) {
}
static
void ucol_CEBuf_Expand(ucol_CEBuf *b, collIterate *ci) {
void ucol_CEBuf_Expand(ucol_CEBuf *b, collIterate *ci, UErrorCode *status) {
uint32_t oldSize;
uint32_t newSize;
uint32_t *newBuf;
@ -7239,7 +7239,10 @@ void ucol_CEBuf_Expand(ucol_CEBuf *b, collIterate *ci) {
oldSize = b->pos - b->buf;
newSize = oldSize * 2;
newBuf = (uint32_t *)uprv_malloc(newSize * sizeof(uint32_t));
if(newBuf != NULL) {
if(newBuf == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
}
else {
uprv_memcpy(newBuf, b->buf, oldSize * sizeof(uint32_t));
if (b->buf != b->localArray) {
uprv_free(b->buf);
@ -7251,11 +7254,13 @@ void ucol_CEBuf_Expand(ucol_CEBuf *b, collIterate *ci) {
}
static
inline void UCOL_CEBUF_PUT(ucol_CEBuf *b, uint32_t ce, collIterate *ci) {
inline void UCOL_CEBUF_PUT(ucol_CEBuf *b, uint32_t ce, collIterate *ci, UErrorCode *status) {
if (b->pos == b->endp) {
ucol_CEBuf_Expand(b, ci);
ucol_CEBuf_Expand(b, ci, status);
}
if (U_SUCCESS(*status)) {
*(b)->pos++ = ce;
}
*(b)->pos++ = ce;
}
/* This is a trick string compare function that goes in and uses sortkeys to compare */
@ -7400,7 +7405,7 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
// We get the next CE
sOrder = ucol_IGetNextCE(coll, sColl, status);
// Stuff it in the buffer
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl);
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
// And keep just the primary part.
sOrder &= UCOL_PRIMARYMASK;
} while(sOrder == 0);
@ -7408,7 +7413,7 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
// see the comments on the above block
do {
tOrder = ucol_IGetNextCE(coll, tColl, status);
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl);
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
tOrder &= UCOL_PRIMARYMASK;
} while(tOrder == 0);
@ -7439,7 +7444,7 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
for(;;) {
sOrder = ucol_IGetNextCE(coll, sColl, status);
if(sOrder == UCOL_NO_MORE_CES) {
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl);
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
break;
} else if(sOrder == 0 || (sInShifted && (sOrder & UCOL_PRIMARYMASK) == 0)) {
/* UCA amendment - ignore ignorables that follow shifted code points */
@ -7448,32 +7453,32 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
if((sOrder & UCOL_PRIMARYMASK) > 0) { /* There is primary value */
if(sInShifted) {
sOrder = (sOrder & UCOL_PRIMARYMASK) | 0xC0; /* preserve interesting continuation */
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl);
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
continue;
} else {
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl);
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
break;
}
} else { /* Just lower level values */
if(sInShifted) {
continue;
} else {
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl);
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
continue;
}
}
} else { /* regular */
if((sOrder & UCOL_PRIMARYMASK) > LVT) {
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl);
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
break;
} else {
if((sOrder & UCOL_PRIMARYMASK) > 0) {
sInShifted = TRUE;
sOrder &= UCOL_PRIMARYMASK;
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl);
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
continue;
} else {
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl);
UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);
sInShifted = FALSE;
continue;
}
@ -7486,7 +7491,7 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
for(;;) {
tOrder = ucol_IGetNextCE(coll, tColl, status);
if(tOrder == UCOL_NO_MORE_CES) {
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl);
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
break;
} else if(tOrder == 0 || (tInShifted && (tOrder & UCOL_PRIMARYMASK) == 0)) {
/* UCA amendment - ignore ignorables that follow shifted code points */
@ -7495,32 +7500,32 @@ ucol_strcollRegular( collIterate *sColl, collIterate *tColl,
if((tOrder & UCOL_PRIMARYMASK) > 0) { /* There is primary value */
if(tInShifted) {
tOrder = (tOrder & UCOL_PRIMARYMASK) | 0xC0; /* preserve interesting continuation */
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl);
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
continue;
} else {
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl);
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
break;
}
} else { /* Just lower level values */
if(tInShifted) {
continue;
} else {
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl);
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
continue;
}
}
} else { /* regular */
if((tOrder & UCOL_PRIMARYMASK) > LVT) {
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl);
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
break;
} else {
if((tOrder & UCOL_PRIMARYMASK) > 0) {
tInShifted = TRUE;
tOrder &= UCOL_PRIMARYMASK;
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl);
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
continue;
} else {
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl);
UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);
tInShifted = FALSE;
continue;
}

View file

@ -159,6 +159,9 @@ uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollat
uhash_setValueDeleter(t->prefixLookup, uhash_freeBlock);
t->contractions = uprv_cnttab_open(t->mapping, status);
if (U_FAILURE(*status)) {
goto cleanup;
}
/* copy UCA's maxexpansion and merge as we go along */
if (UCA != NULL) {
@ -213,8 +216,9 @@ uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollat
return t;
allocation_failure:
uprv_uca_closeTempTable(t);
*status = U_MEMORY_ALLOCATION_ERROR;
cleanup:
uprv_uca_closeTempTable(t);
return NULL;
}

View file

@ -274,6 +274,8 @@ ucol_openRules( const UChar *rules,
return 0;
}
UCollator *result = NULL;
UCATableHeader *table = NULL;
UCollator *UCA = ucol_initUCA(status);
if(U_FAILURE(*status)){
@ -294,11 +296,8 @@ ucol_openRules( const UChar *rules,
fprintf(stderr, "invalid rule just before offset %i\n", src.current-src.source);
}
#endif
ucol_tok_closeTokenList(&src);
return NULL;
goto cleanup;
}
UCollator *result = NULL;
UCATableHeader *table = NULL;
if(src.resultLen > 0 || src.removeSet != NULL) { /* we have a set of rules, let's make something of it */
/* also, if we wanted to remove some contractions, we should make a tailoring */
@ -313,10 +312,8 @@ ucol_openRules( const UChar *rules,
// set UCA version
uprv_memcpy(table->UCAVersion, UCA->image->UCAVersion, sizeof(UVersionInfo));
result = ucol_initCollator(table, 0, UCA, status);
// Check for null result
if (result == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
if (U_FAILURE(*status)) {
goto cleanup;
}
result->hasRealData = TRUE;
result->freeImageOnClose = TRUE;
@ -326,9 +323,8 @@ ucol_openRules( const UChar *rules,
// We will init the collator from UCA
result = ucol_initCollator(UCA->image, 0, UCA, status);
// Check for null result
if (result == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
if (U_FAILURE(*status)) {
goto cleanup;
}
// And set only the options
UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));

View file

@ -1435,6 +1435,9 @@ uint32_t ucol_tok_assembleTokenList(UColTokenParser *src, UParseError *parseErro
// keep the flags around so that we know about before
sourceToken->flags = src->parsedToken.flags;
uhash_put(src->tailored, sourceToken, sourceToken, status);
if(U_FAILURE(*status)) {
return 0;
}
} else {
/* we could have fished out a reset here */
if(sourceToken->strength != UCOL_TOK_RESET && lastToken != sourceToken) {

View file

@ -1,6 +1,6 @@
/*
******************************************************************************
* Copyright (C) 2001-2006, International Business Machines
* Copyright (C) 2001-2008, International Business Machines
* Corporation and others. All Rights Reserved.
******************************************************************************
*
@ -37,134 +37,137 @@ ucol_openElements(const UCollator *coll,
int32_t textLength,
UErrorCode *status)
{
UCollationElements *result;
UCollationElements *result;
if (U_FAILURE(*status)) {
return NULL;
}
if (U_FAILURE(*status)) {
return NULL;
}
result = (UCollationElements *)uprv_malloc(sizeof(UCollationElements));
/* test for NULL */
if (result == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
result = (UCollationElements *)uprv_malloc(sizeof(UCollationElements));
/* test for NULL */
if (result == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
result->reset_ = TRUE;
result->isWritable = FALSE;
result->reset_ = TRUE;
result->isWritable = FALSE;
if (text == NULL) {
textLength = 0;
}
uprv_init_collIterate(coll, text, textLength, &result->iteratordata_);
if (text == NULL) {
textLength = 0;
}
uprv_init_collIterate(coll, text, textLength, &result->iteratordata_);
return result;
return result;
}
U_CAPI void U_EXPORT2
ucol_closeElements(UCollationElements *elems)
{
collIterate *ci = &elems->iteratordata_;
if (ci->writableBuffer != ci->stackWritableBuffer) {
uprv_free(ci->writableBuffer);
}
if (elems->isWritable && elems->iteratordata_.string != NULL)
{
uprv_free(elems->iteratordata_.string);
}
if (ci->extendCEs) {
uprv_free(ci->extendCEs);
}
uprv_free(elems);
if (elems != NULL) {
collIterate *ci = &elems->iteratordata_;
if (ci != NULL) {
if (ci->writableBuffer != ci->stackWritableBuffer) {
uprv_free(ci->writableBuffer);
}
if (ci->extendCEs) {
uprv_free(ci->extendCEs);
}
}
if (elems->isWritable && elems->iteratordata_.string != NULL)
{
uprv_free(elems->iteratordata_.string);
}
uprv_free(elems);
}
}
U_CAPI void U_EXPORT2
ucol_reset(UCollationElements *elems)
{
collIterate *ci = &(elems->iteratordata_);
elems->reset_ = TRUE;
ci->pos = ci->string;
if ((ci->flags & UCOL_ITER_HASLEN) == 0 || ci->endp == NULL) {
ci->endp = ci->string + u_strlen(ci->string);
}
ci->CEpos = ci->toReturn = ci->CEs;
ci->flags = UCOL_ITER_HASLEN;
if (ci->coll->normalizationMode == UCOL_ON) {
ci->flags |= UCOL_ITER_NORM;
}
if (ci->stackWritableBuffer != ci->writableBuffer) {
uprv_free(ci->writableBuffer);
ci->writableBuffer = ci->stackWritableBuffer;
ci->writableBufSize = UCOL_WRITABLE_BUFFER_SIZE;
}
ci->fcdPosition = NULL;
collIterate *ci = &(elems->iteratordata_);
elems->reset_ = TRUE;
ci->pos = ci->string;
if ((ci->flags & UCOL_ITER_HASLEN) == 0 || ci->endp == NULL) {
ci->endp = ci->string + u_strlen(ci->string);
}
ci->CEpos = ci->toReturn = ci->CEs;
ci->flags = UCOL_ITER_HASLEN;
if (ci->coll->normalizationMode == UCOL_ON) {
ci->flags |= UCOL_ITER_NORM;
}
if (ci->stackWritableBuffer != ci->writableBuffer) {
uprv_free(ci->writableBuffer);
ci->writableBuffer = ci->stackWritableBuffer;
ci->writableBufSize = UCOL_WRITABLE_BUFFER_SIZE;
}
ci->fcdPosition = NULL;
}
U_CAPI int32_t U_EXPORT2
ucol_next(UCollationElements *elems,
UErrorCode *status)
{
int32_t result;
if (U_FAILURE(*status)) {
return UCOL_NULLORDER;
}
int32_t result;
if (U_FAILURE(*status)) {
return UCOL_NULLORDER;
}
elems->reset_ = FALSE;
elems->reset_ = FALSE;
result = (int32_t)ucol_getNextCE(elems->iteratordata_.coll,
&elems->iteratordata_,
status);
if (result == UCOL_NO_MORE_CES) {
result = UCOL_NULLORDER;
}
return result;
result = (int32_t)ucol_getNextCE(elems->iteratordata_.coll,
&elems->iteratordata_,
status);
if (result == UCOL_NO_MORE_CES) {
result = UCOL_NULLORDER;
}
return result;
}
U_CAPI int32_t U_EXPORT2
ucol_previous(UCollationElements *elems,
UErrorCode *status)
{
if(U_FAILURE(*status)) {
return UCOL_NULLORDER;
}
else
{
int32_t result;
if(U_FAILURE(*status)) {
return UCOL_NULLORDER;
}
else
{
int32_t result;
if (elems->reset_ &&
(elems->iteratordata_.pos == elems->iteratordata_.string)) {
if (elems->iteratordata_.endp == NULL) {
elems->iteratordata_.endp = elems->iteratordata_.string +
u_strlen(elems->iteratordata_.string);
elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
if (elems->reset_ && (elems->iteratordata_.pos == elems->iteratordata_.string)) {
if (elems->iteratordata_.endp == NULL) {
elems->iteratordata_.endp = elems->iteratordata_.string +
u_strlen(elems->iteratordata_.string);
elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
}
elems->iteratordata_.pos = elems->iteratordata_.endp;
elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
}
elems->iteratordata_.pos = elems->iteratordata_.endp;
elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
elems->reset_ = FALSE;
result = (int32_t)ucol_getPrevCE(elems->iteratordata_.coll,
&(elems->iteratordata_),
status);
if (result == UCOL_NO_MORE_CES) {
result = UCOL_NULLORDER;
}
return result;
}
elems->reset_ = FALSE;
result = (int32_t)ucol_getPrevCE(elems->iteratordata_.coll,
&(elems->iteratordata_),
status);
if (result == UCOL_NO_MORE_CES) {
result = UCOL_NULLORDER;
}
return result;
}
}
U_CAPI int32_t U_EXPORT2
ucol_getMaxExpansion(const UCollationElements *elems,
int32_t order)
{
uint8_t result;
UCOL_GETMAXEXPANSION(elems->iteratordata_.coll, (uint32_t)order, result);
return result;
uint8_t result;
UCOL_GETMAXEXPANSION(elems->iteratordata_.coll, (uint32_t)order, result);
return result;
}
U_CAPI void U_EXPORT2
@ -173,44 +176,44 @@ ucol_setText( UCollationElements *elems,
int32_t textLength,
UErrorCode *status)
{
if (U_FAILURE(*status)) {
return;
}
if (U_FAILURE(*status)) {
return;
}
if (elems->isWritable && elems->iteratordata_.string != NULL)
{
uprv_free(elems->iteratordata_.string);
}
if (text == NULL) {
textLength = 0;
}
if (elems->isWritable && elems->iteratordata_.string != NULL)
{
uprv_free(elems->iteratordata_.string);
}
elems->isWritable = FALSE;
uprv_init_collIterate(elems->iteratordata_.coll, text, textLength,
&elems->iteratordata_);
if (text == NULL) {
textLength = 0;
}
elems->reset_ = TRUE;
elems->isWritable = FALSE;
uprv_init_collIterate(elems->iteratordata_.coll, text, textLength,
&elems->iteratordata_);
elems->reset_ = TRUE;
}
U_CAPI int32_t U_EXPORT2
ucol_getOffset(const UCollationElements *elems)
{
const collIterate *ci = &(elems->iteratordata_);
// while processing characters in normalization buffer getOffset will
// return the next non-normalized character.
// should be inline with the old implementation since the old codes uses
// nextDecomp in normalizer which also decomposes the string till the
// first base character is found.
if (ci->flags & UCOL_ITER_INNORMBUF) {
if (ci->fcdPosition == NULL) {
return 0;
}
return (int32_t)(ci->fcdPosition - ci->string);
}
else {
return (int32_t)(ci->pos - ci->string);
}
const collIterate *ci = &(elems->iteratordata_);
// while processing characters in normalization buffer getOffset will
// return the next non-normalized character.
// should be inline with the old implementation since the old codes uses
// nextDecomp in normalizer which also decomposes the string till the
// first base character is found.
if (ci->flags & UCOL_ITER_INNORMBUF) {
if (ci->fcdPosition == NULL) {
return 0;
}
return (int32_t)(ci->fcdPosition - ci->string);
}
else {
return (int32_t)(ci->pos - ci->string);
}
}
U_CAPI void U_EXPORT2
@ -218,44 +221,44 @@ ucol_setOffset(UCollationElements *elems,
int32_t offset,
UErrorCode *status)
{
if (U_FAILURE(*status)) {
return;
}
if (U_FAILURE(*status)) {
return;
}
// this methods will clean up any use of the writable buffer and points to
// the original string
collIterate *ci = &(elems->iteratordata_);
ci->pos = ci->string + offset;
ci->CEpos = ci->toReturn = ci->CEs;
if (ci->flags & UCOL_ITER_INNORMBUF) {
ci->flags = ci->origFlags;
}
if ((ci->flags & UCOL_ITER_HASLEN) == 0) {
ci->endp = ci->string + u_strlen(ci->string);
ci->flags |= UCOL_ITER_HASLEN;
}
ci->fcdPosition = NULL;
elems->reset_ = FALSE;
// this methods will clean up any use of the writable buffer and points to
// the original string
collIterate *ci = &(elems->iteratordata_);
ci->pos = ci->string + offset;
ci->CEpos = ci->toReturn = ci->CEs;
if (ci->flags & UCOL_ITER_INNORMBUF) {
ci->flags = ci->origFlags;
}
if ((ci->flags & UCOL_ITER_HASLEN) == 0) {
ci->endp = ci->string + u_strlen(ci->string);
ci->flags |= UCOL_ITER_HASLEN;
}
ci->fcdPosition = NULL;
elems->reset_ = FALSE;
}
U_CAPI int32_t U_EXPORT2
ucol_primaryOrder (int32_t order)
{
order &= UCOL_PRIMARYMASK;
return (order >> UCOL_PRIMARYORDERSHIFT);
order &= UCOL_PRIMARYMASK;
return (order >> UCOL_PRIMARYORDERSHIFT);
}
U_CAPI int32_t U_EXPORT2
ucol_secondaryOrder (int32_t order)
{
order &= UCOL_SECONDARYMASK;
return (order >> UCOL_SECONDARYORDERSHIFT);
order &= UCOL_SECONDARYMASK;
return (order >> UCOL_SECONDARYORDERSHIFT);
}
U_CAPI int32_t U_EXPORT2
ucol_tertiaryOrder (int32_t order)
{
return (order & UCOL_TERTIARYMASK);
return (order & UCOL_TERTIARYMASK);
}
#endif /* #if !UCONFIG_NO_COLLATION */

View file

@ -2616,6 +2616,7 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator(
result->ownCollator = FALSE;
result->search->matchedLength = 0;
result->search->matchedIndex = USEARCH_DONE;
result->utilIter = NULL;
result->textIter = ucol_openElements(collator, text,
textlength, status);
if (U_FAILURE(*status)) {
@ -2623,8 +2624,6 @@ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator(
return NULL;
}
result->utilIter = NULL;
result->search->isOverlap = FALSE;
result->search->isCanonicalMatch = FALSE;
result->search->isForwardSearching = TRUE;

View file

@ -1051,208 +1051,217 @@ typedef struct {
static indirectBoundaries ucolIndirectBoundaries[15];
static UBool indirectBoundariesSet = FALSE;
static void setIndirectBoundaries(uint32_t indexR, uint32_t *start, uint32_t *end) {
/* Set values for the top - TODO: once we have values for all the indirects, we are going */
/* to initalize here. */
ucolIndirectBoundaries[indexR].startCE = start[0];
ucolIndirectBoundaries[indexR].startContCE = start[1];
if(end) {
ucolIndirectBoundaries[indexR].limitCE = end[0];
ucolIndirectBoundaries[indexR].limitContCE = end[1];
} else {
ucolIndirectBoundaries[indexR].limitCE = 0;
ucolIndirectBoundaries[indexR].limitContCE = 0;
}
/* Set values for the top - TODO: once we have values for all the indirects, we are going */
/* to initalize here. */
ucolIndirectBoundaries[indexR].startCE = start[0];
ucolIndirectBoundaries[indexR].startContCE = start[1];
if(end) {
ucolIndirectBoundaries[indexR].limitCE = end[0];
ucolIndirectBoundaries[indexR].limitContCE = end[1];
} else {
ucolIndirectBoundaries[indexR].limitCE = 0;
ucolIndirectBoundaries[indexR].limitContCE = 0;
}
}
static void testCEs(UCollator *coll, UErrorCode *status) {
const UChar *rules = NULL, *current = NULL;
int32_t ruleLen = 0;
const UChar *rules = NULL, *current = NULL;
int32_t ruleLen = 0;
uint32_t strength = 0;
uint32_t maxStrength = UCOL_IDENTICAL;
uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
uint32_t lastCE;
uint32_t lastContCE;
uint32_t strength = 0;
uint32_t maxStrength = UCOL_IDENTICAL;
uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;
uint32_t lastCE;
uint32_t lastContCE;
int32_t result = 0;
uint32_t chOffset = 0; uint32_t chLen = 0;
uint32_t exOffset = 0; uint32_t exLen = 0;
uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
uint32_t oldOffset = 0;
int32_t result = 0;
uint32_t chOffset = 0; uint32_t chLen = 0;
uint32_t exOffset = 0; uint32_t exLen = 0;
uint32_t prefixOffset = 0; uint32_t prefixLen = 0;
uint32_t oldOffset = 0;
/* uint32_t rExpsLen = 0; */
/* uint32_t firstLen = 0; */
uint16_t specs = 0;
UBool varT = FALSE; UBool top_ = TRUE;
UBool startOfRules = TRUE;
UBool before = FALSE;
UColTokenParser src;
UColOptionSet opts;
UParseError parseError;
UChar *rulesCopy = NULL;
collIterate c;
UCAConstants *consts = NULL;
uint32_t UCOL_RESET_TOP_VALUE, /*UCOL_RESET_TOP_CONT, */
UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;
UCollator *UCA = ucol_open("root", status);
/* uint32_t rExpsLen = 0; */
/* uint32_t firstLen = 0; */
uint16_t specs = 0;
UBool varT = FALSE; UBool top_ = TRUE;
UBool startOfRules = TRUE;
UBool before = FALSE;
UColTokenParser src;
UColOptionSet opts;
UParseError parseError;
UChar *rulesCopy = NULL;
collIterate c;
UCollator *UCA = ucol_open("root", status);
UCAConstants *consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
uint32_t UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0], /*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1], */
UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0], UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
src.opts = &opts;
rules = ucol_getRules(coll, &ruleLen);
src.invUCA = ucol_initInverseUCA(status);
if(indirectBoundariesSet == FALSE) {
/* UCOL_RESET_TOP_VALUE */
setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
/* UCOL_FIRST_PRIMARY_IGNORABLE */
setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
/* UCOL_LAST_PRIMARY_IGNORABLE */
setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
/* UCOL_FIRST_SECONDARY_IGNORABLE */
setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
/* UCOL_LAST_SECONDARY_IGNORABLE */
setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
/* UCOL_FIRST_TERTIARY_IGNORABLE */
setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
/* UCOL_LAST_TERTIARY_IGNORABLE */
setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
/* UCOL_FIRST_VARIABLE */
setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
/* UCOL_LAST_VARIABLE */
setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
/* UCOL_FIRST_NON_VARIABLE */
setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
/* UCOL_LAST_NON_VARIABLE */
setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
/* UCOL_FIRST_IMPLICIT */
setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
/* UCOL_LAST_IMPLICIT */
setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
/* UCOL_FIRST_TRAILING */
setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
/* UCOL_LAST_TRAILING */
setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
indirectBoundariesSet = TRUE;
}
if(U_SUCCESS(*status) && ruleLen > 0) {
rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
src.current = src.source = rulesCopy;
src.end = rulesCopy+ruleLen;
src.extraCurrent = src.end;
src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
strength = src.parsedToken.strength;
chOffset = src.parsedToken.charsOffset;
chLen = src.parsedToken.charsLen;
exOffset = src.parsedToken.extensionOffset;
exLen = src.parsedToken.extensionLen;
prefixOffset = src.parsedToken.prefixOffset;
prefixLen = src.parsedToken.prefixLen;
specs = src.parsedToken.flags;
startOfRules = FALSE;
varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
uprv_init_collIterate(coll, rulesCopy+chOffset, chLen, &c);
currCE = ucol_getNextCE(coll, &c, status);
if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(rulesCopy+chOffset))) {
log_verbose("Thai prevowel detected. Will pick next CE\n");
currCE = ucol_getNextCE(coll, &c, status);
}
currContCE = ucol_getNextCE(coll, &c, status);
if(!isContinuation(currContCE)) {
currContCE = 0;
}
/* we need to repack CEs here */
if(strength == UCOL_TOK_RESET) {
before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
if(top_ == TRUE) {
int32_t index = src.parsedToken.indirectIndex;
nextCE = baseCE = currCE = ucolIndirectBoundaries[index].startCE;
nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[index].startContCE;
} else {
nextCE = baseCE = currCE;
nextContCE = baseContCE = currContCE;
}
maxStrength = UCOL_IDENTICAL;
} else {
if(strength < maxStrength) {
maxStrength = strength;
if(baseCE == UCOL_RESET_TOP_VALUE) {
log_verbose("Resetting to [top]\n");
nextCE = UCOL_NEXT_TOP_VALUE;
nextContCE = UCOL_NEXT_TOP_CONT;
} else {
result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
}
if(result < 0) {
if(ucol_isTailored(coll, *(rulesCopy+oldOffset), status)) {
log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(rulesCopy+oldOffset));
return;
} else {
log_err("couldn't find the CE\n");
return;
}
}
}
currCE &= 0xFFFFFF3F;
currContCE &= 0xFFFFFFBF;
if(maxStrength == UCOL_IDENTICAL) {
if(baseCE != currCE || baseContCE != currContCE) {
log_err("current CE (initial strength UCOL_EQUAL)\n");
}
} else {
if(strength == UCOL_IDENTICAL) {
if(lastCE != currCE || lastContCE != currContCE) {
log_err("current CE (initial strength UCOL_EQUAL)\n");
}
} else {
if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
/*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
log_err("current CE is not less than base CE\n");
}
if(!before) {
if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
/*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
log_err("sequence of generated CEs is broken\n");
}
} else {
before = FALSE;
if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
/*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
log_err("sequence of generated CEs is broken\n");
}
}
}
}
}
oldOffset = chOffset;
lastCE = currCE & 0xFFFFFF3F;
lastContCE = currContCE & 0xFFFFFFBF;
if (U_FAILURE(*status)) {
log_err("Could not open root collator %s\n", u_errorName(*status));
return;
}
free(rulesCopy);
}
ucol_close(UCA);
consts = (UCAConstants *)((uint8_t *)UCA->image + UCA->image->UCAConsts);
UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];
/*UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; */
UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];
UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];
baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UCOL_NOT_FOUND;
src.opts = &opts;
rules = ucol_getRules(coll, &ruleLen);
src.invUCA = ucol_initInverseUCA(status);
if(indirectBoundariesSet == FALSE) {
/* UCOL_RESET_TOP_VALUE */
setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
/* UCOL_FIRST_PRIMARY_IGNORABLE */
setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);
/* UCOL_LAST_PRIMARY_IGNORABLE */
setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);
/* UCOL_FIRST_SECONDARY_IGNORABLE */
setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);
/* UCOL_LAST_SECONDARY_IGNORABLE */
setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);
/* UCOL_FIRST_TERTIARY_IGNORABLE */
setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);
/* UCOL_LAST_TERTIARY_IGNORABLE */
setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);
/* UCOL_FIRST_VARIABLE */
setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);
/* UCOL_LAST_VARIABLE */
setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);
/* UCOL_FIRST_NON_VARIABLE */
setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);
/* UCOL_LAST_NON_VARIABLE */
setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRST_IMPLICIT);
/* UCOL_FIRST_IMPLICIT */
setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);
/* UCOL_LAST_IMPLICIT */
setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_TRAILING);
/* UCOL_FIRST_TRAILING */
setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);
/* UCOL_LAST_TRAILING */
setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);
ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<24);
indirectBoundariesSet = TRUE;
}
if(U_SUCCESS(*status) && ruleLen > 0) {
rulesCopy = (UChar *)malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)*sizeof(UChar));
uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));
src.current = src.source = rulesCopy;
src.end = rulesCopy+ruleLen;
src.extraCurrent = src.end;
src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,status)) != NULL) {
strength = src.parsedToken.strength;
chOffset = src.parsedToken.charsOffset;
chLen = src.parsedToken.charsLen;
exOffset = src.parsedToken.extensionOffset;
exLen = src.parsedToken.extensionLen;
prefixOffset = src.parsedToken.prefixOffset;
prefixLen = src.parsedToken.prefixLen;
specs = src.parsedToken.flags;
startOfRules = FALSE;
varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);
top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);
uprv_init_collIterate(coll, rulesCopy+chOffset, chLen, &c);
currCE = ucol_getNextCE(coll, &c, status);
if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(rulesCopy+chOffset))) {
log_verbose("Thai prevowel detected. Will pick next CE\n");
currCE = ucol_getNextCE(coll, &c, status);
}
currContCE = ucol_getNextCE(coll, &c, status);
if(!isContinuation(currContCE)) {
currContCE = 0;
}
/* we need to repack CEs here */
if(strength == UCOL_TOK_RESET) {
before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);
if(top_ == TRUE) {
int32_t index = src.parsedToken.indirectIndex;
nextCE = baseCE = currCE = ucolIndirectBoundaries[index].startCE;
nextContCE = baseContCE = currContCE = ucolIndirectBoundaries[index].startContCE;
} else {
nextCE = baseCE = currCE;
nextContCE = baseContCE = currContCE;
}
maxStrength = UCOL_IDENTICAL;
} else {
if(strength < maxStrength) {
maxStrength = strength;
if(baseCE == UCOL_RESET_TOP_VALUE) {
log_verbose("Resetting to [top]\n");
nextCE = UCOL_NEXT_TOP_VALUE;
nextContCE = UCOL_NEXT_TOP_CONT;
} else {
result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, baseContCE, &nextCE, &nextContCE, maxStrength);
}
if(result < 0) {
if(ucol_isTailored(coll, *(rulesCopy+oldOffset), status)) {
log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(rulesCopy+oldOffset));
return;
} else {
log_err("couldn't find the CE\n");
return;
}
}
}
currCE &= 0xFFFFFF3F;
currContCE &= 0xFFFFFFBF;
if(maxStrength == UCOL_IDENTICAL) {
if(baseCE != currCE || baseContCE != currContCE) {
log_err("current CE (initial strength UCOL_EQUAL)\n");
}
} else {
if(strength == UCOL_IDENTICAL) {
if(lastCE != currCE || lastContCE != currContCE) {
log_err("current CE (initial strength UCOL_EQUAL)\n");
}
} else {
if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {
/*if(currCE > nextCE || (currCE == nextCE && currContCE >= nextContCE)) {*/
log_err("current CE is not less than base CE\n");
}
if(!before) {
if(compareCEs(currCE, currContCE, lastCE, lastContCE) < 0) {
/*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
log_err("sequence of generated CEs is broken\n");
}
} else {
before = FALSE;
if(compareCEs(currCE, currContCE, lastCE, lastContCE) > 0) {
/*if(currCE < lastCE || (currCE == lastCE && currContCE <= lastContCE)) {*/
log_err("sequence of generated CEs is broken\n");
}
}
}
}
}
oldOffset = chOffset;
lastCE = currCE & 0xFFFFFF3F;
lastContCE = currContCE & 0xFFFFFFBF;
}
free(rulesCopy);
}
ucol_close(UCA);
}
#if 0
@ -1608,6 +1617,10 @@ static void TestComposeDecompose(void) {
return;
}
charsToTestSize = uset_size(charsToTest);
if (charsToTestSize <= 0) {
log_err("Set was zero. Missing data?\n");
return;
}
t = malloc(charsToTestSize * sizeof(tester *));
t[0] = (tester *)malloc(sizeof(tester));
log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);

View file

@ -1,7 +1,7 @@
/*
********************************************************************************
*
* Copyright (C) 1996-2007, International Business Machines
* Copyright (C) 1996-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*
********************************************************************************
@ -89,7 +89,8 @@ int ERR_MSG =1; /* error messages will be displayed by default*/
int QUICK = 1; /* Skip some of the slower tests? */
int WARN_ON_MISSING_DATA = 0; /* Reduce data errs to warnings? */
UTraceLevel ICU_TRACE = UTRACE_OFF; /* ICU tracing level */
size_t MAX_MEMORY_ALLOCATION = (size_t)-1; /* Maximum library memory allocation allowed. */
size_t MINIMUM_MEMORY_SIZE_FAILURE = (size_t)-1; /* Minimum library memory allocation window that will fail. */
size_t MAXIMUM_MEMORY_SIZE_FAILURE = (size_t)-1; /* Maximum library memory allocation window that will fail. */
int32_t ALLOCATION_COUNT = 0;
/*-------------------------------------------*/
@ -557,7 +558,7 @@ static void *U_CALLCONV ctest_libMalloc(const void *context, size_t size) {
/*if (VERBOSITY) {
printf("Allocated %ld\n", (long)size);
}*/
if (size >= MAX_MEMORY_ALLOCATION) {
if (MINIMUM_MEMORY_SIZE_FAILURE <= size && size <= MAXIMUM_MEMORY_SIZE_FAILURE) {
return NULL;
}
umtx_atomic_inc(&ALLOCATION_COUNT);
@ -567,7 +568,7 @@ static void *U_CALLCONV ctest_libRealloc(const void *context, void *mem, size_t
/*if (VERBOSITY) {
printf("Reallocated %ld\n", (long)size);
}*/
if (size >= MAX_MEMORY_ALLOCATION) {
if (MINIMUM_MEMORY_SIZE_FAILURE <= size && size <= MAXIMUM_MEMORY_SIZE_FAILURE) {
/*free(mem);*/ /* Realloc doesn't free on failure. */
return NULL;
}
@ -628,15 +629,25 @@ initArgs( int argc, const char* const argv[])
if (i+1 < argc) {
char *endPtr = NULL;
i++;
MAX_MEMORY_ALLOCATION = (size_t)strtol(argv[i], &endPtr, 10);
MINIMUM_MEMORY_SIZE_FAILURE = (size_t)strtol(argv[i], &endPtr, 10);
if (endPtr == argv[i]) {
printf("Can't parse %s\n", argv[i]);
help( argv[0] );
help(argv[0]);
return 0;
}
if (*endPtr == '-') {
char *maxPtr = endPtr+1;
endPtr = NULL;
MAXIMUM_MEMORY_SIZE_FAILURE = (size_t)strtol(maxPtr, &endPtr, 10);
if (endPtr == argv[i]) {
printf("Can't parse %s\n", argv[i]);
help(argv[0]);
return 0;
}
}
}
/* Use the default value */
u_setMemoryFunctions(&MAX_MEMORY_ALLOCATION, ctest_libMalloc, ctest_libRealloc, ctest_libFree, &errorCode);
u_setMemoryFunctions(NULL, ctest_libMalloc, ctest_libRealloc, ctest_libFree, &errorCode);
if (U_FAILURE(errorCode)) {
printf("u_setMemoryFunctions returned %s\n", u_errorName(errorCode));
return 0;
@ -764,8 +775,8 @@ runTestRequest(const TestNode* root,
static void help ( const char *argv0 )
{
printf("Usage: %s [ -l ] [ -v ] [ -verbose] [-a] [ -all] [-n] [ -no_err_msg]\n"
" [ -h ] [-t_info | -t_error | -t_warn | -t_oc | -t_verbose]"
" [ /path/to/test ]\n",
" [ -h ] [-t_info | -t_error | -t_warn | -t_oc | -t_verbose] [-m n[-q] ]\n"
" [ /path/to/test ]\n",
argv0);
printf(" -l To get a list of test names\n");
printf(" -e to do exhaustive testing\n");
@ -777,10 +788,10 @@ static void help ( const char *argv0 )
" user has reduced/changed the common set of ICU data \n");
printf(" -t_info | -t_error | -t_warn | -t_oc | -t_verbose Enable ICU tracing\n");
printf(" -no_err_msg (same as -n) \n");
printf(" -m n Maximum size of library allocation allowed.\n");
printf(" The default is the maximum value of size_t\n");
printf(" -r repeat tests after calling u_cleanup \n");
printf(" -[/subtest] To run a subtest \n");
printf(" -m n[-q] Min-Max memory size that will cause an allocation failure.\n");
printf(" The default is the maximum value of size_t. Max is optional.\n");
printf(" -r Repeat tests after calling u_cleanup \n");
printf(" [/subtest] To run a subtest \n");
printf(" eg: to run just the utility tests type: cintltest /tsutil) \n");
}