ICU-7167 reimplement checkCEValidity() for actual byte value ranges

X-SVN-Rev: 28193
This commit is contained in:
Markus Scherer 2010-06-15 05:28:53 +00:00
parent 3e24713df1
commit 2ef6586467

View file

@ -1496,147 +1496,147 @@ static void TestCEBufferOverflow()
}
/**
* Byte bounds checks. Checks if each byte in data is between upper and lower
* inclusive.
*/
static UBool checkByteBounds(uint32_t data, char upper, char lower)
{
int count = 4;
while (count > 0) {
char b = (char)(data & 0xFF);
if (b > upper || b < lower) {
return FALSE;
}
data = data >> 8;
count --;
}
return TRUE;
}
/**
* Determines case of the string of codepoints.
* If it is a multiple codepoints it has to treated as a contraction.
*/
#if 0
static uint8_t getCase(const UChar *s, uint32_t len) {
UBool lower = FALSE;
UBool upper = FALSE;
UBool title = FALSE;
UErrorCode status = U_ZERO_ERROR;
UChar str[256];
const UChar *ps = s;
if (len == 0) {
return UCOL_LOWER_CASE;
}
while (len > 0) {
UChar c = *ps ++;
if (u_islower(c)) {
lower = TRUE;
}
if (u_isupper(c)) {
upper = TRUE;
}
if (u_istitle(c)) {
title = TRUE;
}
len --;
}
if ((lower && !upper && !title) || (!lower && !upper && !title)){
return UCOL_LOWER_CASE;
}
if (upper && !lower && !title) {
return UCOL_UPPER_CASE;
}
/* mix of cases here */
/* len = unorm_normalize(s, len, UNORM_NFKD, 0, str, 256, &status);
if (U_FAILURE(status)) {
log_err("Error normalizing data string\n");
return UCOL_LOWER_CASE;
}*/
if ((title && len >= 2) || (lower && upper)) {
return UCOL_MIXED_CASE;
}
if (u_isupper(s[0])) {
return UCOL_UPPER_CASE;
}
return UCOL_LOWER_CASE;
}
#endif
/**
* Checking collation element validity given the boundary arguments.
* Checking collation element validity.
*/
static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
int length, uint32_t primarymax,
uint32_t secondarymax)
int length)
{
UErrorCode status = U_ZERO_ERROR;
UCollationElements *iter = ucol_openElements(coll, codepoints, length,
&status);
uint32_t ce;
UBool first = TRUE;
/*
UBool upper = FALSE;
UBool lower = FALSE;
*/
UBool result = FALSE;
UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE;
if (U_FAILURE(status)) {
log_err("Error creating iterator for testing validity\n");
return FALSE;
}
ce = ucol_next(iter, &status);
for (;;) {
uint32_t ce = ucol_next(iter, &status);
uint32_t primary, p1, p2, secondary, tertiary;
if (ce == UCOL_NULLORDER) {
result = TRUE;
break;
}
if (ce == 0) {
continue;
}
primary = UCOL_PRIMARYORDER(ce);
p1 = primary >> 8;
p2 = primary & 0xFF;
secondary = UCOL_SECONDARYORDER(ce);
tertiary = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION;
while (ce != UCOL_NULLORDER) {
if (ce != 0) {
uint32_t primary = UCOL_PRIMARYORDER(ce);
uint32_t secondary = UCOL_SECONDARYORDER(ce);
uint32_t tertiary = UCOL_TERTIARYORDER(ce);
/* uint32_t scasebits = tertiary & 0xC0;*/
if ((tertiary == 0 && secondary != 0) ||
(tertiary < 0xC0 && secondary == 0 && primary != 0)) {
/* n-1th level is not zero when the nth level is
except for continuations, this is wrong */
log_err("Lower level weight not 0 when high level weight is 0\n");
goto fail;
}
else {
/* checks if any byte is illegal ie = 01 02 03. */
if (checkByteBounds(ce, 0x3, 0x1)) {
log_err("Byte range in CE lies in illegal bounds 0x1 - 0x3\n");
goto fail;
}
}
if ((primary != 0 && primary < primarymax)
|| ((primary & 0xFF) == 0xFF) || (((primary>>8) & 0xFF) == 0xFF)
|| ((primary & 0xFF) && ((primary & 0xFF) <= 2))
|| (((primary>>8) & 0xFF) && ((primary>>8) & 0xFF) <= 2)
|| (primary >= 0xFE00 && !isContinuation(ce))) {
log_err("UCA primary weight out of bounds: %04X for string starting with %04X\n",
primary, codepoints[0]);
goto fail;
}
/* case matching not done since data generated by ken */
if (first) {
if (secondary >= 6 && secondary <= secondarymax) {
log_err("Secondary weight out of range\n");
goto fail;
}
first = FALSE;
}
}
ce = ucol_next(iter, &status);
if (!isContinuation(ce)) {
if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
log_err("Empty CE %08lX except for case bits\n", (long)ce);
break;
}
if (p1 == 0) {
if (p2 != 0) {
log_err("Primary 00 xx in %08lX\n", (long)ce);
break;
}
primaryDone = TRUE;
} else {
if (p1 <= 2 || p1 >= 0xF0) {
/* Primary first bytes F0..FF are specials. */
log_err("Primary first byte of %08lX out of range\n", (long)ce);
break;
}
if (p2 == 0) {
primaryDone = TRUE;
} else {
if (p2 <= 3 || p2 >= 0xFF) {
/* Primary second bytes 03 and FF are sort key compression terminators. */
log_err("Primary second byte of %08lX out of range\n", (long)ce);
break;
}
primaryDone = FALSE;
}
}
if (secondary == 0) {
if (primary != 0) {
log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce);
break;
}
secondaryDone = TRUE;
} else {
if (secondary <= 2 ||
(UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COMMON + 0x80))
) {
/* Secondary first bytes common+1..+0x80 are used for sort key compression. */
log_err("Secondary byte of %08lX out of range\n", (long)ce);
break;
}
secondaryDone = FALSE;
}
if (tertiary == 0) {
/* We know that ce != 0. */
log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce);
break;
}
if (tertiary <= 2) {
log_err("Tertiary byte of %08lX out of range\n", (long)ce);
break;
}
tertiaryDone = FALSE;
} else {
if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
log_err("Empty continuation %08lX\n", (long)ce);
break;
}
if (primaryDone && primary != 0) {
log_err("Primary was done but continues in %08lX\n", (long)ce);
break;
}
if (p1 == 0) {
if (p2 != 0) {
log_err("Primary 00 xx in %08lX\n", (long)ce);
break;
}
primaryDone = TRUE;
} else {
if (p1 <= 2) {
log_err("Primary first byte of %08lX out of range\n", (long)ce);
break;
}
if (p2 == 0) {
primaryDone = TRUE;
} else {
if (p2 <= 3) {
log_err("Primary second byte of %08lX out of range\n", (long)ce);
break;
}
}
}
if (secondaryDone && secondary != 0) {
log_err("Secondary was done but continues in %08lX\n", (long)ce);
break;
}
if (secondary == 0) {
secondaryDone = TRUE;
} else {
if (secondary <= 2) {
log_err("Secondary byte of %08lX out of range\n", (long)ce);
break;
}
}
if (tertiaryDone && tertiary != 0) {
log_err("Tertiary was done but continues in %08lX\n", (long)ce);
break;
}
if (tertiary == 0) {
tertiaryDone = TRUE;
} else if (tertiary <= 2) {
log_err("Tertiary byte of %08lX out of range\n", (long)ce);
break;
}
}
}
ucol_closeElements(iter);
return TRUE;
fail :
ucol_closeElements(iter);
return FALSE;
return result;
}
static void TestCEValidity()
@ -1674,21 +1674,21 @@ static void TestCEValidity()
}
getCodePoints(line, codepoints, contextCPs);
checkCEValidity(coll, codepoints, u_strlen(codepoints), 5, 86);
checkCEValidity(coll, codepoints, u_strlen(codepoints));
}
log_verbose("Testing UCA elements for the whole range of unicode characters\n");
for (c = 0; c <= 0xffff; ++c) {
if (u_isdefined(c)) {
codepoints[0] = (UChar)c;
checkCEValidity(coll, codepoints, 1, 5, 86);
checkCEValidity(coll, codepoints, 1);
}
}
for (; c <= 0x10ffff; ++c) {
if (u_isdefined(c)) {
int32_t i = 0;
U16_APPEND_UNSAFE(codepoints, i, c);
checkCEValidity(coll, codepoints, i, 5, 86);
checkCEValidity(coll, codepoints, i);
}
}
@ -1765,7 +1765,7 @@ static void TestCEValidity()
uprv_memcpy(codepoints, src.source + chOffset,
chLen * sizeof(UChar));
codepoints[chLen] = 0;
checkCEValidity(coll, codepoints, chLen, 4, 85);
checkCEValidity(coll, codepoints, chLen);
}
free(rulesCopy);
}