mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 14:31:31 +00:00
ICU-1930 updated tests for the new UCA (some of them still fail, pending new data files)
X-SVN-Rev: 8883
This commit is contained in:
parent
96f743b5ad
commit
71c2e432b3
4 changed files with 145 additions and 22 deletions
|
@ -684,12 +684,14 @@ static void TestVariableTop(void)
|
|||
}
|
||||
|
||||
/**
|
||||
* Tests surrogate support.
|
||||
*/
|
||||
* Tests surrogate support.
|
||||
* NOTE: This test used \\uD801\\uDC01 pair, which is now assigned to Desseret
|
||||
* Therefore, another (unassigned) code point was used for this test.
|
||||
*/
|
||||
static void TestSurrogates(void)
|
||||
{
|
||||
const char *str =
|
||||
"&z<'\\uD800\\uDC00'<'\\uD801\\uDC01\\u0308'<A";
|
||||
"&z<'\\uD800\\uDC00'<'\\uD800\\uDC0A\\u0308'<A";
|
||||
int len = strlen(str);
|
||||
int rlen = 0;
|
||||
UChar *rules;
|
||||
|
@ -697,9 +699,9 @@ static void TestSurrogates(void)
|
|||
UCollator *enCollation;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UChar source[][4] =
|
||||
{{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD801, 0xDC01, 0x0308, 0}};
|
||||
{{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {0xD800, 0xDC02}};
|
||||
UChar target[][4] =
|
||||
{{0xD800, 0xDC00, 0}, {0xD801, 0xDC01, 0x0308, 0}, {'A', 0, 0}};
|
||||
{{0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {'A', 0, 0}, {0xD800, 0xDC03}};
|
||||
int count = 0;
|
||||
uint8_t enresult[20], myresult[20];
|
||||
int enlen, mylen;
|
||||
|
@ -731,14 +733,14 @@ static void TestSurrogates(void)
|
|||
log_verbose("start of tailored collation supplementary characters test\n");
|
||||
count = 0;
|
||||
/* tests getting collation elements for surrogates for tailored rules */
|
||||
while (count < 3) {
|
||||
while (count < 4) {
|
||||
doTest(myCollation, source[count], target[count], UCOL_LESS);
|
||||
count ++;
|
||||
}
|
||||
|
||||
/* tests that \uD801\uDC01 still has the same value, not changed */
|
||||
enlen = ucol_getSortKey(enCollation, source[2], 2, enresult, 20);
|
||||
mylen = ucol_getSortKey(myCollation, source[2], 2, myresult, 20);
|
||||
/* tests that \uD800\uDC02 still has the same value, not changed */
|
||||
enlen = ucol_getSortKey(enCollation, source[3], 2, enresult, 20);
|
||||
mylen = ucol_getSortKey(myCollation, source[3], 2, myresult, 20);
|
||||
if (enlen != mylen ||
|
||||
uprv_memcmp(enresult, myresult, enlen) != 0) {
|
||||
log_verbose("Failed : non-tailored supplementary characters should have the same value\n");
|
||||
|
|
|
@ -654,8 +654,12 @@ void TestSortKey()
|
|||
{
|
||||
uint8_t *sortk1 = NULL, *sortk2 = NULL, *sortk3 = NULL;
|
||||
uint8_t sortk2_compat[] = {
|
||||
/* 2.0 key */
|
||||
0x19, 0x1B, 0x1D, 0x1F, 0x19, 0x01, 0x09, 0x01, 0x09, 0x01, 0x18, 0x01, 0x92, 0x93, 0x94, 0x95, 0x92, 0x00
|
||||
/* 2.2 key */
|
||||
0x1D, 0x1F, 0x21, 0x23, 0x1D, 0x01,
|
||||
0x09, 0x01, 0x09, 0x01, 0x1F, 0x01,
|
||||
0x92, 0x93, 0x94, 0x95, 0x92, 0x00
|
||||
/* 2.0 key */
|
||||
/*0x19, 0x1B, 0x1D, 0x1F, 0x19, 0x01, 0x09, 0x01, 0x09, 0x01, 0x18, 0x01, 0x92, 0x93, 0x94, 0x95, 0x92, 0x00*/
|
||||
/* 1.8.1 key.*/
|
||||
/*0x19, 0x1B, 0x1D, 0x1F, 0x19, 0x01, 0x0A, 0x01, 0x0A, 0x01, 0x92, 0x93, 0x94, 0x95, 0x92, 0x00*/
|
||||
/*this is a 1.8 sortkey */
|
||||
|
@ -955,7 +959,16 @@ void TestElemIter()
|
|||
log_err("Somehow ran out of memory stepping through the iterator2.: %s\n", myErrorName(status));
|
||||
return;
|
||||
}
|
||||
/* this here, my friends, is either pure lunacy or something so obsolete that even it's mother
|
||||
* doesn't care about it. Essentialy, this test complains if secondary values for 'I' and '_'
|
||||
* are the same. According to the UCA, this is not true. Therefore, remove the test.
|
||||
* Besides, if primary strengths for two code points are different, it doesn't matter one bit
|
||||
* what is the relation between secondary or any other strengths.
|
||||
* killed by weiv 06/11/2002.
|
||||
*/
|
||||
/*
|
||||
doAssert( ((order1 & UCOL_SECONDARYMASK) != (order3 & UCOL_SECONDARYMASK)), "The secondary orders should be different");
|
||||
*/
|
||||
doAssert( (order1 != UCOL_NULLORDER), "Unexpected end of iterator reached");
|
||||
|
||||
free(testString1);
|
||||
|
|
|
@ -1334,12 +1334,13 @@ static FileStream * getFractionalUCA(void)
|
|||
*/
|
||||
static void TestCEs() {
|
||||
FileStream *file = NULL;
|
||||
char line[300];
|
||||
char line[1024];
|
||||
char *str;
|
||||
UChar codepoints[5];
|
||||
uint32_t ces[20];
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UCollator *coll = ucol_open("", &status);
|
||||
uint32_t lineNo = 0;
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
log_err("Error in opening root collator\n");
|
||||
|
@ -1353,9 +1354,11 @@ static void TestCEs() {
|
|||
return;
|
||||
}
|
||||
|
||||
|
||||
while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
|
||||
int count = 0;
|
||||
UCollationElements *iter;
|
||||
lineNo++;
|
||||
/* skip this line if it is empty or a comment or is a return value
|
||||
or start of some variable section */
|
||||
if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
|
||||
|
@ -1364,6 +1367,7 @@ static void TestCEs() {
|
|||
}
|
||||
|
||||
str = getCodePoints(line, codepoints);
|
||||
|
||||
getCEs(str, ces, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
log_err("Error in parsing collation elements in FractionalUCA.txt\n");
|
||||
|
@ -1683,7 +1687,7 @@ static void TestCEValidity()
|
|||
/* tailored locales */
|
||||
char locale[][6] = {"fr_FR\0", "ko_KR\0", "sh_YU\0", "th_TH\0", "zh_CN\0"};
|
||||
FileStream *file = getFractionalUCA();
|
||||
char line[300];
|
||||
char line[1024];
|
||||
UChar codepoints[5];
|
||||
int count = 0;
|
||||
UParseError parseError;
|
||||
|
@ -1862,7 +1866,7 @@ static void TestSortKeyValidity(void)
|
|||
/* tailored locales */
|
||||
char locale[][6] = {"fr_FR\0", "ko_KR\0", "sh_YU\0", "th_TH\0", "zh_CN\0"};
|
||||
FileStream *file = getFractionalUCA();
|
||||
char line[300];
|
||||
char line[1024];
|
||||
UChar codepoints[5];
|
||||
int count = 0;
|
||||
UParseError parseError;
|
||||
|
|
|
@ -1623,9 +1623,6 @@ static void TestComposeDecompose(void) {
|
|||
UChar comp[NORM_BUFFER_TEST_LEN];
|
||||
uint32_t len = 0;
|
||||
|
||||
log_err("error - ### TODO re-enable /tscoll/cmsccoll/TestComposeDecompose after the upgrade to Unicode 3.2 _and_ the new UCA table/algorithm is complete (see Mark & Markus)\n");
|
||||
return;
|
||||
|
||||
noOfLoc = uloc_countAvailable();
|
||||
|
||||
t = uprv_malloc(0x30000 * sizeof(tester *));
|
||||
|
@ -1869,7 +1866,8 @@ static void TestRedundantRules(void) {
|
|||
};
|
||||
|
||||
const static char *expectedRules[] = {
|
||||
"&\\u3029<<<x",
|
||||
/*"&\\u3029<<<x",*/
|
||||
"&\\u2089<<<x",
|
||||
"& a <<< x < b <<< c << d <<< e",
|
||||
"& a < b < m < c < d",
|
||||
"& a < b <<< c << d <<< x <<< e",
|
||||
|
@ -1886,7 +1884,8 @@ static void TestRedundantRules(void) {
|
|||
};
|
||||
|
||||
const static char *testdata[][8] = {
|
||||
{"\\u3029", "x"},
|
||||
/*{"\\u3029", "x"},*/
|
||||
{"\\u2089", "x"},
|
||||
{"a", "x", "b", "c", "d", "e"},
|
||||
{"a", "b", "m", "c", "d"},
|
||||
{"a", "b", "c", "d", "x", "e"},
|
||||
|
@ -2186,7 +2185,12 @@ static void TestIncrementalNormalize(void) {
|
|||
/* Test 3: Non-normal sequence is terminated by a surrogate pair.*/
|
||||
|
||||
{
|
||||
UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};
|
||||
/* New UCA 3.1.1.
|
||||
* test below used a code point from Desseret, which sorts differently
|
||||
* than d800 dc00
|
||||
*/
|
||||
/*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/
|
||||
UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0};
|
||||
UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};
|
||||
ucol_setStrength(coll, UCOL_TERTIARY);
|
||||
doTest(coll, strA, strB, UCOL_GREATER);
|
||||
|
@ -2494,6 +2498,8 @@ static void TestCyrillicTailoring(void) {
|
|||
"\\u0410\\u0306a",
|
||||
"\\u04d0A"
|
||||
};
|
||||
log_err("*** Disabled, pending corrected FractionalUCA.txt from Mark ***\n");
|
||||
return;
|
||||
genericLocaleStarter("ru", test, 3);
|
||||
genericRulesStarter("&\\u0410 = \\u0410", test, 3);
|
||||
genericRulesStarter("&Z < \\u0410", test, 3);
|
||||
|
@ -2819,6 +2825,10 @@ static void TestVariableTopSetting(void) {
|
|||
UChar first[256] = { 0 };
|
||||
UChar second[256] = { 0 };
|
||||
UParseError parseError;
|
||||
|
||||
log_err("*** Disabled, pending amendend UCARules.txt from Mark ***\n");
|
||||
return;
|
||||
|
||||
src.opts = &opts;
|
||||
|
||||
log_verbose("Slide variable top over UCARules\n");
|
||||
|
@ -2910,7 +2920,8 @@ static void TestVariableTopSetting(void) {
|
|||
log_verbose("Testing setting variable top to contractions\n");
|
||||
{
|
||||
/* uint32_t tailoredCE = UCOL_NOT_FOUND; */
|
||||
UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);
|
||||
UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->UCAConsts+sizeof(UCAConstants));
|
||||
/*UChar *conts = (UChar *)((uint8_t *)coll->image + coll->image->contractionUCACombos);*/
|
||||
while(*conts != 0) {
|
||||
if(*(conts+2) == 0) {
|
||||
varTop1 = ucol_setVariableTop(coll, conts, -1, &status);
|
||||
|
@ -3489,9 +3500,102 @@ static void TestRuleOptions(void) {
|
|||
}
|
||||
}
|
||||
|
||||
static void TestIgnorableShifted(void) {
|
||||
#if 0
|
||||
Mark a <space> <acute> b
|
||||
Mark vs
|
||||
Mark A <space> b
|
||||
Mark I think those should differ.
|
||||
weiv these should be equal when shifted but different when non ignorable, right
|
||||
weiv except capital A would sort after lower case a
|
||||
Mark OLD implementation: IGNOREABLE: #1 > #2
|
||||
Mark NEW implemenation: IGNOREABLE: #2 > #1
|
||||
Mark NON-IGNOREABLE: #1 > #2
|
||||
"a<space>b","A<space>b","a<space><grave>b","A<space><grave>b","a\\0300b","A<grave>b"
|
||||
"A<space>\\u300b","a<space><grave>b","A<space>b","a<grave>b","A<grave>b"
|
||||
#endif
|
||||
static struct {
|
||||
const char *data[50];
|
||||
const uint32_t len;
|
||||
const UColAttribute att[2];
|
||||
const UColAttribute value[2];
|
||||
} tests[] = {
|
||||
{
|
||||
{ "a \\u0300b", "a b", "A \\u0300b", "A b", "a\\u0300b", "A\\u0300b"}, 5,
|
||||
{ UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH }, { UCOL_SHIFTED, UCOL_QUATERNARY }
|
||||
},
|
||||
{
|
||||
{"a b", "A b", "a \\u0300b", "A \\u0300b", "a\\u0300b", "A\\u0300b"}, 6,
|
||||
{ UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH }, { UCOL_NON_IGNORABLE, UCOL_TERTIARY }
|
||||
},
|
||||
{
|
||||
{
|
||||
#if 0
|
||||
"a<space>b",
|
||||
"A<space>b",
|
||||
"a<space><grave>b",
|
||||
"A<space><grave>b",
|
||||
"a<grave>b",
|
||||
"A<grave>b"
|
||||
"a<lowline>b",
|
||||
"A<lowline>b",
|
||||
"a<lowline><grave>b",
|
||||
"A<lowline><grave>b",
|
||||
"a<grave>b",
|
||||
"A<grave>b"
|
||||
"a<space>b",
|
||||
"A<space>b",
|
||||
"a<space><acute>b",
|
||||
"A<space><acute>b",
|
||||
"a<acute>b",
|
||||
"A<acute>b"
|
||||
"a<lowline>b",
|
||||
"A<lowline>b",
|
||||
"a<lowline><acute>b",
|
||||
"A<lowline><acute>b",
|
||||
"a<acute>b",
|
||||
"A<acute>b"
|
||||
#endif
|
||||
"a b",
|
||||
"A b",
|
||||
"a \\u0300b",
|
||||
"A \\u0300b",
|
||||
"a\\u0300b",
|
||||
"A\\u0300b",
|
||||
"a\\u005fb",
|
||||
"A\\u005fb",
|
||||
"a\\u005f\\u0300b",
|
||||
"A\\u005f\\u0300b",
|
||||
"a\\u0300b",
|
||||
"A\\u0300b",
|
||||
"a b",
|
||||
"A b",
|
||||
"a \\u0301b",
|
||||
"A \\u0301b",
|
||||
"a\\u0301b",
|
||||
"A\\u0301b",
|
||||
"a\\u005fb",
|
||||
"A\\u005fb",
|
||||
"a\\u005f\\u0301b",
|
||||
"A\\u005f\\u0301b",
|
||||
"a\\u0301b",
|
||||
"A\\u0301b"
|
||||
}, 22,
|
||||
{ UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH }, { UCOL_SHIFTED, UCOL_QUATERNARY }
|
||||
//{ UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH }, { UCOL_NON_IGNORABLE, UCOL_TERTIARY }
|
||||
}
|
||||
};
|
||||
|
||||
int32_t i = 0;
|
||||
|
||||
for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {
|
||||
genericLocaleStarterWithOptions("", tests[i].data, tests[i].len, tests[i].att, tests[i].value, 2);
|
||||
}
|
||||
}
|
||||
|
||||
void addMiscCollTest(TestNode** root)
|
||||
{
|
||||
|
||||
/*addTest(root, &TestIgnorableShifted, "tscoll/cmsccoll/TestIgnorableShifted");*/ /* turned off for now, until I make a normal ordering */
|
||||
addTest(root, &TestRuleOptions, "tscoll/cmsccoll/TestRuleOptions");
|
||||
addTest(root, &TestBeforePrefixFailure, "tscoll/cmsccoll/TestBeforePrefixFailure");
|
||||
addTest(root, &TestContractionClosure, "tscoll/cmsccoll/TestContractionClosure");
|
||||
|
|
Loading…
Add table
Reference in a new issue