ICU-1930 updated tests for the new UCA (some of them still fail, pending new data files)

X-SVN-Rev: 8883
This commit is contained in:
Vladimir Weinstein 2002-06-13 18:35:27 +00:00
parent 96f743b5ad
commit 71c2e432b3
4 changed files with 145 additions and 22 deletions

View file

@ -684,12 +684,14 @@ static void TestVariableTop(void)
}
/**
* Tests surrogate support.
*/
* Tests surrogate support.
* NOTE: This test used \\uD801\\uDC01 pair, which is now assigned to Desseret
* Therefore, another (unassigned) code point was used for this test.
*/
static void TestSurrogates(void)
{
const char *str =
"&z<'\\uD800\\uDC00'<'\\uD801\\uDC01\\u0308'<A";
"&z<'\\uD800\\uDC00'<'\\uD800\\uDC0A\\u0308'<A";
int len = strlen(str);
int rlen = 0;
UChar *rules;
@ -697,9 +699,9 @@ static void TestSurrogates(void)
UCollator *enCollation;
UErrorCode status = U_ZERO_ERROR;
UChar source[][4] =
{{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD801, 0xDC01, 0x0308, 0}};
{{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {0xD800, 0xDC02}};
UChar target[][4] =
{{0xD800, 0xDC00, 0}, {0xD801, 0xDC01, 0x0308, 0}, {'A', 0, 0}};
{{0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {'A', 0, 0}, {0xD800, 0xDC03}};
int count = 0;
uint8_t enresult[20], myresult[20];
int enlen, mylen;
@ -731,14 +733,14 @@ static void TestSurrogates(void)
log_verbose("start of tailored collation supplementary characters test\n");
count = 0;
/* tests getting collation elements for surrogates for tailored rules */
while (count < 3) {
while (count < 4) {
doTest(myCollation, source[count], target[count], UCOL_LESS);
count ++;
}
/* tests that \uD801\uDC01 still has the same value, not changed */
enlen = ucol_getSortKey(enCollation, source[2], 2, enresult, 20);
mylen = ucol_getSortKey(myCollation, source[2], 2, myresult, 20);
/* tests that \uD800\uDC02 still has the same value, not changed */
enlen = ucol_getSortKey(enCollation, source[3], 2, enresult, 20);
mylen = ucol_getSortKey(myCollation, source[3], 2, myresult, 20);
if (enlen != mylen ||
uprv_memcmp(enresult, myresult, enlen) != 0) {
log_verbose("Failed : non-tailored supplementary characters should have the same value\n");

View file

@ -654,8 +654,12 @@ void TestSortKey()
{
uint8_t *sortk1 = NULL, *sortk2 = NULL, *sortk3 = NULL;
uint8_t sortk2_compat[] = {
/* 2.0 key */
0x19, 0x1B, 0x1D, 0x1F, 0x19, 0x01, 0x09, 0x01, 0x09, 0x01, 0x18, 0x01, 0x92, 0x93, 0x94, 0x95, 0x92, 0x00
/* 2.2 key */
0x1D, 0x1F, 0x21, 0x23, 0x1D, 0x01,
0x09, 0x01, 0x09, 0x01, 0x1F, 0x01,
0x92, 0x93, 0x94, 0x95, 0x92, 0x00
/* 2.0 key */
/*0x19, 0x1B, 0x1D, 0x1F, 0x19, 0x01, 0x09, 0x01, 0x09, 0x01, 0x18, 0x01, 0x92, 0x93, 0x94, 0x95, 0x92, 0x00*/
/* 1.8.1 key.*/
/*0x19, 0x1B, 0x1D, 0x1F, 0x19, 0x01, 0x0A, 0x01, 0x0A, 0x01, 0x92, 0x93, 0x94, 0x95, 0x92, 0x00*/
/*this is a 1.8 sortkey */
@ -955,7 +959,16 @@ void TestElemIter()
log_err("Somehow ran out of memory stepping through the iterator2.: %s\n", myErrorName(status));
return;
}
/* this here, my friends, is either pure lunacy or something so obsolete that even it's mother
* doesn't care about it. Essentialy, this test complains if secondary values for 'I' and '_'
* are the same. According to the UCA, this is not true. Therefore, remove the test.
* Besides, if primary strengths for two code points are different, it doesn't matter one bit
* what is the relation between secondary or any other strengths.
* killed by weiv 06/11/2002.
*/
/*
doAssert( ((order1 & UCOL_SECONDARYMASK) != (order3 & UCOL_SECONDARYMASK)), "The secondary orders should be different");
*/
doAssert( (order1 != UCOL_NULLORDER), "Unexpected end of iterator reached");
free(testString1);

View file

@ -1334,12 +1334,13 @@ static FileStream * getFractionalUCA(void)
*/
static void TestCEs() {
FileStream *file = NULL;
char line[300];
char line[1024];
char *str;
UChar codepoints[5];
uint32_t ces[20];
UErrorCode status = U_ZERO_ERROR;
UCollator *coll = ucol_open("", &status);
uint32_t lineNo = 0;
if (U_FAILURE(status)) {
log_err("Error in opening root collator\n");
@ -1353,9 +1354,11 @@ static void TestCEs() {
return;
}
while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
int count = 0;
UCollationElements *iter;
lineNo++;
/* skip this line if it is empty or a comment or is a return value
or start of some variable section */
if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
@ -1364,6 +1367,7 @@ static void TestCEs() {
}
str = getCodePoints(line, codepoints);
getCEs(str, ces, &status);
if (U_FAILURE(status)) {
log_err("Error in parsing collation elements in FractionalUCA.txt\n");
@ -1683,7 +1687,7 @@ static void TestCEValidity()
/* tailored locales */
char locale[][6] = {"fr_FR\0", "ko_KR\0", "sh_YU\0", "th_TH\0", "zh_CN\0"};
FileStream *file = getFractionalUCA();
char line[300];
char line[1024];
UChar codepoints[5];
int count = 0;
UParseError parseError;
@ -1862,7 +1866,7 @@ static void TestSortKeyValidity(void)
/* tailored locales */
char locale[][6] = {"fr_FR\0", "ko_KR\0", "sh_YU\0", "th_TH\0", "zh_CN\0"};
FileStream *file = getFractionalUCA();
char line[300];
char line[1024];
UChar codepoints[5];
int count = 0;
UParseError parseError;

View file

@ -1623,9 +1623,6 @@ static void TestComposeDecompose(void) {
UChar comp[NORM_BUFFER_TEST_LEN];
uint32_t len = 0;
log_err("error - ### TODO re-enable /tscoll/cmsccoll/TestComposeDecompose after the upgrade to Unicode 3.2 _and_ the new UCA table/algorithm is complete (see Mark & Markus)\n");
return;
noOfLoc = uloc_countAvailable();
t = uprv_malloc(0x30000 * sizeof(tester *));
@ -1869,7 +1866,8 @@ static void TestRedundantRules(void) {
};
const static char *expectedRules[] = {
"&\\u3029<<<x",
/*"&\\u3029<<<x",*/
"&\\u2089<<<x",
"& a <<< x < b <<< c << d <<< e",
"& a < b < m < c < d",
"& a < b <<< c << d <<< x <<< e",
@ -1886,7 +1884,8 @@ static void TestRedundantRules(void) {
};
const static char *testdata[][8] = {
{"\\u3029", "x"},
/*{"\\u3029", "x"},*/
{"\\u2089", "x"},
{"a", "x", "b", "c", "d", "e"},
{"a", "b", "m", "c", "d"},
{"a", "b", "c", "d", "x", "e"},
@ -2186,7 +2185,12 @@ static void TestIncrementalNormalize(void) {
/* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
{
UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};
/* New UCA 3.1.1.
* test below used a code point from Desseret, which sorts differently
* than d800 dc00
*/
/*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
ucol_setStrength(coll, UCOL_TERTIARY);
doTest(coll, strA, strB, UCOL_GREATER);
@ -2494,6 +2498,8 @@ static void TestCyrillicTailoring(void) {
"\\u0410\\u0306a",
"\\u04d0A"
};
log_err("*** Disabled, pending corrected FractionalUCA.txt from Mark ***\n");
return;
genericLocaleStarter("ru", test, 3);
genericRulesStarter("&\\u0410 = \\u0410", test, 3);
genericRulesStarter("&Z < \\u0410", test, 3);
@ -2819,6 +2825,10 @@ static void TestVariableTopSetting(void) {
UChar first[256] = { 0 };
UChar second[256] = { 0 };
UParseError parseError;
log_err("*** Disabled, pending amendend UCARules.txt from Mark ***\n");
return;
src.opts = &opts;
log_verbose("Slide variable top over UCARules\n");
@ -2910,7 +2920,8 @@ static void TestVariableTopSetting(void) {
log_verbose("Testing setting variable top to contractions\n");
{
/* uint32_t tailoredCE = UCOL_NOT_FOUND; */
UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->UCAConsts+sizeof(UCAConstants));
/*UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);*/
while(*conts != 0) {
if(*(conts+2) == 0) {
varTop1 = ucol_setVariableTop(coll, conts, -1, &status);
@ -3489,9 +3500,102 @@ static void TestRuleOptions(void) {
}
}
static void TestIgnorableShifted(void) {
#if 0
Mark a <space> <acute> b
Mark vs
Mark A <space> b
Mark I think those should differ.
weiv these should be equal when shifted but different when non ignorable, right
weiv except capital A would sort after lower case a
Mark OLD implementation: IGNOREABLE: #1 > #2
Mark NEW implemenation: IGNOREABLE: #2 > #1
Mark NON-IGNOREABLE: #1 > #2
"a<space>b","A<space>b","a<space><grave>b","A<space><grave>b","a\\0300b","A<grave>b"
"A<space>\\u300b","a<space><grave>b","A<space>b","a<grave>b","A<grave>b"
#endif
static struct {
const char *data[50];
const uint32_t len;
const UColAttribute att[2];
const UColAttribute value[2];
} tests[] = {
{
{ "a \\u0300b", "a b", "A \\u0300b", "A b", "a\\u0300b", "A\\u0300b"}, 5,
{ UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH }, { UCOL_SHIFTED, UCOL_QUATERNARY }
},
{
{"a b", "A b", "a \\u0300b", "A \\u0300b", "a\\u0300b", "A\\u0300b"}, 6,
{ UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH }, { UCOL_NON_IGNORABLE, UCOL_TERTIARY }
},
{
{
#if 0
"a<space>b",
"A<space>b",
"a<space><grave>b",
"A<space><grave>b",
"a<grave>b",
"A<grave>b"
"a<lowline>b",
"A<lowline>b",
"a<lowline><grave>b",
"A<lowline><grave>b",
"a<grave>b",
"A<grave>b"
"a<space>b",
"A<space>b",
"a<space><acute>b",
"A<space><acute>b",
"a<acute>b",
"A<acute>b"
"a<lowline>b",
"A<lowline>b",
"a<lowline><acute>b",
"A<lowline><acute>b",
"a<acute>b",
"A<acute>b"
#endif
"a b",
"A b",
"a \\u0300b",
"A \\u0300b",
"a\\u0300b",
"A\\u0300b",
"a\\u005fb",
"A\\u005fb",
"a\\u005f\\u0300b",
"A\\u005f\\u0300b",
"a\\u0300b",
"A\\u0300b",
"a b",
"A b",
"a \\u0301b",
"A \\u0301b",
"a\\u0301b",
"A\\u0301b",
"a\\u005fb",
"A\\u005fb",
"a\\u005f\\u0301b",
"A\\u005f\\u0301b",
"a\\u0301b",
"A\\u0301b"
}, 22,
{ UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH }, { UCOL_SHIFTED, UCOL_QUATERNARY }
//{ UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH }, { UCOL_NON_IGNORABLE, UCOL_TERTIARY }
}
};
int32_t i = 0;
for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
genericLocaleStarterWithOptions("", tests[i].data, tests[i].len, tests[i].att, tests[i].value, 2);
}
}
void addMiscCollTest(TestNode** root)
{
/*addTest(root, &TestIgnorableShifted, "tscoll/cmsccoll/TestIgnorableShifted");*/ /* turned off for now, until I make a normal ordering */
addTest(root, &TestRuleOptions, "tscoll/cmsccoll/TestRuleOptions");
addTest(root, &TestBeforePrefixFailure, "tscoll/cmsccoll/TestBeforePrefixFailure");
addTest(root, &TestContractionClosure, "tscoll/cmsccoll/TestContractionClosure");