mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-15 01:42:37 +00:00
ICU-7167 reimplement checkCEValidity() for actual byte value ranges
X-SVN-Rev: 28193
This commit is contained in:
parent
3e24713df1
commit
2ef6586467
1 changed files with 131 additions and 131 deletions
|
@ -1496,147 +1496,147 @@ static void TestCEBufferOverflow()
|
|||
}
|
||||
|
||||
/**
|
||||
* Byte bounds checks. Checks if each byte in data is between upper and lower
|
||||
* inclusive.
|
||||
*/
|
||||
static UBool checkByteBounds(uint32_t data, char upper, char lower)
|
||||
{
|
||||
int count = 4;
|
||||
while (count > 0) {
|
||||
char b = (char)(data & 0xFF);
|
||||
if (b > upper || b < lower) {
|
||||
return FALSE;
|
||||
}
|
||||
data = data >> 8;
|
||||
count --;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines case of the string of codepoints.
|
||||
* If it is a multiple codepoints it has to treated as a contraction.
|
||||
*/
|
||||
#if 0
|
||||
static uint8_t getCase(const UChar *s, uint32_t len) {
|
||||
UBool lower = FALSE;
|
||||
UBool upper = FALSE;
|
||||
UBool title = FALSE;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UChar str[256];
|
||||
const UChar *ps = s;
|
||||
|
||||
if (len == 0) {
|
||||
return UCOL_LOWER_CASE;
|
||||
}
|
||||
|
||||
while (len > 0) {
|
||||
UChar c = *ps ++;
|
||||
|
||||
if (u_islower(c)) {
|
||||
lower = TRUE;
|
||||
}
|
||||
if (u_isupper(c)) {
|
||||
upper = TRUE;
|
||||
}
|
||||
if (u_istitle(c)) {
|
||||
title = TRUE;
|
||||
}
|
||||
|
||||
len --;
|
||||
}
|
||||
if ((lower && !upper && !title) || (!lower && !upper && !title)){
|
||||
return UCOL_LOWER_CASE;
|
||||
}
|
||||
if (upper && !lower && !title) {
|
||||
return UCOL_UPPER_CASE;
|
||||
}
|
||||
/* mix of cases here */
|
||||
/* len = unorm_normalize(s, len, UNORM_NFKD, 0, str, 256, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
log_err("Error normalizing data string\n");
|
||||
return UCOL_LOWER_CASE;
|
||||
}*/
|
||||
|
||||
if ((title && len >= 2) || (lower && upper)) {
|
||||
return UCOL_MIXED_CASE;
|
||||
}
|
||||
if (u_isupper(s[0])) {
|
||||
return UCOL_UPPER_CASE;
|
||||
}
|
||||
return UCOL_LOWER_CASE;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Checking collation element validity given the boundary arguments.
|
||||
* Checking collation element validity.
|
||||
*/
|
||||
static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
|
||||
int length, uint32_t primarymax,
|
||||
uint32_t secondarymax)
|
||||
int length)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UCollationElements *iter = ucol_openElements(coll, codepoints, length,
|
||||
&status);
|
||||
uint32_t ce;
|
||||
UBool first = TRUE;
|
||||
/*
|
||||
UBool upper = FALSE;
|
||||
UBool lower = FALSE;
|
||||
*/
|
||||
UBool result = FALSE;
|
||||
UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE;
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
log_err("Error creating iterator for testing validity\n");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
ce = ucol_next(iter, &status);
|
||||
for (;;) {
|
||||
uint32_t ce = ucol_next(iter, &status);
|
||||
uint32_t primary, p1, p2, secondary, tertiary;
|
||||
if (ce == UCOL_NULLORDER) {
|
||||
result = TRUE;
|
||||
break;
|
||||
}
|
||||
if (ce == 0) {
|
||||
continue;
|
||||
}
|
||||
primary = UCOL_PRIMARYORDER(ce);
|
||||
p1 = primary >> 8;
|
||||
p2 = primary & 0xFF;
|
||||
secondary = UCOL_SECONDARYORDER(ce);
|
||||
tertiary = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION;
|
||||
|
||||
while (ce != UCOL_NULLORDER) {
|
||||
if (ce != 0) {
|
||||
uint32_t primary = UCOL_PRIMARYORDER(ce);
|
||||
uint32_t secondary = UCOL_SECONDARYORDER(ce);
|
||||
uint32_t tertiary = UCOL_TERTIARYORDER(ce);
|
||||
/* uint32_t scasebits = tertiary & 0xC0;*/
|
||||
|
||||
if ((tertiary == 0 && secondary != 0) ||
|
||||
(tertiary < 0xC0 && secondary == 0 && primary != 0)) {
|
||||
/* n-1th level is not zero when the nth level is
|
||||
except for continuations, this is wrong */
|
||||
log_err("Lower level weight not 0 when high level weight is 0\n");
|
||||
goto fail;
|
||||
}
|
||||
else {
|
||||
/* checks if any byte is illegal ie = 01 02 03. */
|
||||
if (checkByteBounds(ce, 0x3, 0x1)) {
|
||||
log_err("Byte range in CE lies in illegal bounds 0x1 - 0x3\n");
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
if ((primary != 0 && primary < primarymax)
|
||||
|| ((primary & 0xFF) == 0xFF) || (((primary>>8) & 0xFF) == 0xFF)
|
||||
|| ((primary & 0xFF) && ((primary & 0xFF) <= 2))
|
||||
|| (((primary>>8) & 0xFF) && ((primary>>8) & 0xFF) <= 2)
|
||||
|| (primary >= 0xFE00 && !isContinuation(ce))) {
|
||||
log_err("UCA primary weight out of bounds: %04X for string starting with %04X\n",
|
||||
primary, codepoints[0]);
|
||||
goto fail;
|
||||
}
|
||||
/* case matching not done since data generated by ken */
|
||||
if (first) {
|
||||
if (secondary >= 6 && secondary <= secondarymax) {
|
||||
log_err("Secondary weight out of range\n");
|
||||
goto fail;
|
||||
}
|
||||
first = FALSE;
|
||||
}
|
||||
}
|
||||
ce = ucol_next(iter, &status);
|
||||
if (!isContinuation(ce)) {
|
||||
if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
|
||||
log_err("Empty CE %08lX except for case bits\n", (long)ce);
|
||||
break;
|
||||
}
|
||||
if (p1 == 0) {
|
||||
if (p2 != 0) {
|
||||
log_err("Primary 00 xx in %08lX\n", (long)ce);
|
||||
break;
|
||||
}
|
||||
primaryDone = TRUE;
|
||||
} else {
|
||||
if (p1 <= 2 || p1 >= 0xF0) {
|
||||
/* Primary first bytes F0..FF are specials. */
|
||||
log_err("Primary first byte of %08lX out of range\n", (long)ce);
|
||||
break;
|
||||
}
|
||||
if (p2 == 0) {
|
||||
primaryDone = TRUE;
|
||||
} else {
|
||||
if (p2 <= 3 || p2 >= 0xFF) {
|
||||
/* Primary second bytes 03 and FF are sort key compression terminators. */
|
||||
log_err("Primary second byte of %08lX out of range\n", (long)ce);
|
||||
break;
|
||||
}
|
||||
primaryDone = FALSE;
|
||||
}
|
||||
}
|
||||
if (secondary == 0) {
|
||||
if (primary != 0) {
|
||||
log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce);
|
||||
break;
|
||||
}
|
||||
secondaryDone = TRUE;
|
||||
} else {
|
||||
if (secondary <= 2 ||
|
||||
(UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COMMON + 0x80))
|
||||
) {
|
||||
/* Secondary first bytes common+1..+0x80 are used for sort key compression. */
|
||||
log_err("Secondary byte of %08lX out of range\n", (long)ce);
|
||||
break;
|
||||
}
|
||||
secondaryDone = FALSE;
|
||||
}
|
||||
if (tertiary == 0) {
|
||||
/* We know that ce != 0. */
|
||||
log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce);
|
||||
break;
|
||||
}
|
||||
if (tertiary <= 2) {
|
||||
log_err("Tertiary byte of %08lX out of range\n", (long)ce);
|
||||
break;
|
||||
}
|
||||
tertiaryDone = FALSE;
|
||||
} else {
|
||||
if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {
|
||||
log_err("Empty continuation %08lX\n", (long)ce);
|
||||
break;
|
||||
}
|
||||
if (primaryDone && primary != 0) {
|
||||
log_err("Primary was done but continues in %08lX\n", (long)ce);
|
||||
break;
|
||||
}
|
||||
if (p1 == 0) {
|
||||
if (p2 != 0) {
|
||||
log_err("Primary 00 xx in %08lX\n", (long)ce);
|
||||
break;
|
||||
}
|
||||
primaryDone = TRUE;
|
||||
} else {
|
||||
if (p1 <= 2) {
|
||||
log_err("Primary first byte of %08lX out of range\n", (long)ce);
|
||||
break;
|
||||
}
|
||||
if (p2 == 0) {
|
||||
primaryDone = TRUE;
|
||||
} else {
|
||||
if (p2 <= 3) {
|
||||
log_err("Primary second byte of %08lX out of range\n", (long)ce);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (secondaryDone && secondary != 0) {
|
||||
log_err("Secondary was done but continues in %08lX\n", (long)ce);
|
||||
break;
|
||||
}
|
||||
if (secondary == 0) {
|
||||
secondaryDone = TRUE;
|
||||
} else {
|
||||
if (secondary <= 2) {
|
||||
log_err("Secondary byte of %08lX out of range\n", (long)ce);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (tertiaryDone && tertiary != 0) {
|
||||
log_err("Tertiary was done but continues in %08lX\n", (long)ce);
|
||||
break;
|
||||
}
|
||||
if (tertiary == 0) {
|
||||
tertiaryDone = TRUE;
|
||||
} else if (tertiary <= 2) {
|
||||
log_err("Tertiary byte of %08lX out of range\n", (long)ce);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
ucol_closeElements(iter);
|
||||
return TRUE;
|
||||
fail :
|
||||
ucol_closeElements(iter);
|
||||
return FALSE;
|
||||
return result;
|
||||
}
|
||||
|
||||
static void TestCEValidity()
|
||||
|
@ -1674,21 +1674,21 @@ static void TestCEValidity()
|
|||
}
|
||||
|
||||
getCodePoints(line, codepoints, contextCPs);
|
||||
checkCEValidity(coll, codepoints, u_strlen(codepoints), 5, 86);
|
||||
checkCEValidity(coll, codepoints, u_strlen(codepoints));
|
||||
}
|
||||
|
||||
log_verbose("Testing UCA elements for the whole range of unicode characters\n");
|
||||
for (c = 0; c <= 0xffff; ++c) {
|
||||
if (u_isdefined(c)) {
|
||||
codepoints[0] = (UChar)c;
|
||||
checkCEValidity(coll, codepoints, 1, 5, 86);
|
||||
checkCEValidity(coll, codepoints, 1);
|
||||
}
|
||||
}
|
||||
for (; c <= 0x10ffff; ++c) {
|
||||
if (u_isdefined(c)) {
|
||||
int32_t i = 0;
|
||||
U16_APPEND_UNSAFE(codepoints, i, c);
|
||||
checkCEValidity(coll, codepoints, i, 5, 86);
|
||||
checkCEValidity(coll, codepoints, i);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1765,7 +1765,7 @@ static void TestCEValidity()
|
|||
uprv_memcpy(codepoints, src.source + chOffset,
|
||||
chLen * sizeof(UChar));
|
||||
codepoints[chLen] = 0;
|
||||
checkCEValidity(coll, codepoints, chLen, 4, 85);
|
||||
checkCEValidity(coll, codepoints, chLen);
|
||||
}
|
||||
free(rulesCopy);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue