diff --git a/icu4c/source/test/intltest/normconf.cpp b/icu4c/source/test/intltest/normconf.cpp index 403658b61c4..85603eaaf77 100644 --- a/icu4c/source/test/intltest/normconf.cpp +++ b/icu4c/source/test/intltest/normconf.cpp @@ -47,7 +47,6 @@ void NormalizerConformanceTest::TestConformance(void) { enum { BUF_SIZE = 1024 }; char lineBuf[BUF_SIZE]; UnicodeString fields[FIELD_COUNT]; - UnicodeString buf; int32_t passCount = 0; int32_t failCount = 0; char newPath[256]; @@ -82,31 +81,36 @@ void NormalizerConformanceTest::TestConformance(void) { } - for (int32_t count = 0;;++count) { + for (int32_t count = 1;;++count) { if (T_FileStream_eof(input)) { break; } T_FileStream_readLine(input, lineBuf, (int32_t)sizeof(lineBuf)); - UnicodeString line(lineBuf, ""); - if (line.length() == 0) continue; + if (lineBuf[0] == 0 || lineBuf[0] == 10 || lineBuf[0] == 13) continue; // Expect 5 columns of this format: // 1E0C;1E0C;0044 0323;1E0C;0044 0323; # // Parse out the comment. - if (line.charAt(0) == 0x0023/*'#'*/) continue; + if (lineBuf[0] == '#') continue; + + // Read separator lines starting with '@' + if (lineBuf[0] == '@') { + logln(lineBuf); + continue; + } // Parse out the fields - if (!hexsplit(line, (UChar)0x003B/*';'*/, fields, FIELD_COUNT, buf)) { + if (!hexsplit(lineBuf, ';', fields, FIELD_COUNT)) { errln((UnicodeString)"Unable to parse line " + count); break; // Syntax error } - if (checkConformance(fields, line)) { + if (checkConformance(fields, UnicodeString(lineBuf, ""))) { ++passCount; } else { ++failCount; } - if ((count % 1000) == 999) { + if ((count % 1000) == 0) { logln((UnicodeString)"Line " + (count+1)); } } @@ -143,36 +147,46 @@ UBool NormalizerConformanceTest::checkConformance(const UnicodeString* field, UnicodeString out; int32_t fieldNum; + /* ### TODO: reenable iterativeNorm() tests!! ### ### ### ### ### ### */ + for (int32_t i=0; i 0) { for (ch = normalizer.first(); ch != Normalizer::DONE; ch = normalizer.next()) { - result.append((UChar)ch); + result.append(ch); } } else { for (ch = normalizer.last(); ch != Normalizer::DONE; ch = normalizer.previous()) { - result.insert(0, (UChar)ch); + result.insert(0, ch); } } } @@ -255,22 +269,6 @@ UBool NormalizerConformanceTest::assertEqual(const char *op, return FALSE; } -/** - * Parse 4 hex digits at pos. - */ -static UChar parseInt(const UnicodeString& s, int32_t pos) { - UChar value = 0; - int32_t limit = pos+4; - while (pos < limit) { - int8_t digit = Unicode::digit(s.charAt(pos++), 16); - if (digit < 0) { - return (UChar) -1; // Bogus hex digit -- shouldn't happen - } - value = (UChar)((value << 4) | digit); - } - return value; -} - /** * Split a string into pieces based on the given delimiter * character. Then, parse the resultant fields from hex into @@ -280,43 +278,56 @@ static UChar parseInt(const UnicodeString& s, int32_t pos) { * fields are parsed. If there are too few an exception is * thrown. If there are too many the extras are ignored. * - * @param buf scratch buffer * @return FALSE upon failure */ -UBool NormalizerConformanceTest::hexsplit(const UnicodeString& s, UChar delimiter, - UnicodeString* output, int32_t outputLength, - UnicodeString& buf) { +UBool NormalizerConformanceTest::hexsplit(const char *s, char delimiter, + UnicodeString output[], int32_t outputLength) { + const char *t = s; + char *end = NULL; + UChar32 c; int32_t i; int32_t pos = 0; for (i=0; i delim) { - errln((UnicodeString)"Premature eol in " + s); + + // read a sequence of code points + output[i].remove(); + for(;;) { + c = (UChar32)uprv_strtoul(t, &end, 16); + + if( (char *)t == end || + (uint32_t)c > 0x10ffff || + (*end != ' ' && *end != '\t' && *end != delimiter) + ) { + errln(UnicodeString("Bad field ", "") + (i + 1) + " in " + UnicodeString(s, "")); return FALSE; - } else { - UChar hex = parseInt(s, pos); - if (hex == 0xFFFF) { - errln((UnicodeString)"Bad field " + i + " in " + s); + } + + output[i].append(c); + + t = (const char *)end; + + // skip whitespace + while(*t == ' ' || *t == '\t') { + ++t; + } + + if(*t == delimiter) { + ++t; + break; + } + if(*t == 0) { + if((i + 1) == outputLength) { + return TRUE; + } else { + errln(UnicodeString("Missing field(s) in ", "") + s + " only " + (i + 1) + " out of " + outputLength); + return FALSE; } - buf.append(hex); - pos += 4; } } - if (buf.length() < 1) { - errln((UnicodeString)"Empty field " + i + " in " + s); - return FALSE; - } - output[i] = buf; - ++pos; // Skip over delim } return TRUE; } @@ -328,10 +339,9 @@ void NormalizerConformanceTest::TestCase6(void) { _testOneLine("0385;0385;00A8 0301;0020 0308 0301;0020 0308 0301;"); } -void NormalizerConformanceTest::_testOneLine(const UnicodeString& line) { +void NormalizerConformanceTest::_testOneLine(const char *line) { UnicodeString fields[FIELD_COUNT]; - UnicodeString buf; - if (!hexsplit(line, (UChar)0x003B/*';'*/, fields, FIELD_COUNT, buf)) { + if (!hexsplit(line, ';', fields, FIELD_COUNT)) { errln((UnicodeString)"Unable to parse line " + line); } else { checkConformance(fields, line); diff --git a/icu4c/source/test/intltest/normconf.h b/icu4c/source/test/intltest/normconf.h index 696c19f83d1..fcd3e2eafdb 100644 --- a/icu4c/source/test/intltest/normconf.h +++ b/icu4c/source/test/intltest/normconf.h @@ -86,11 +86,10 @@ class NormalizerConformanceTest : public IntlTest { * @param buf scratch buffer * @return FALSE upon failure */ - UBool hexsplit(const UnicodeString& s, UChar delimiter, - UnicodeString* output, int32_t outputLength, - UnicodeString& buf); + UBool hexsplit(const char *s, char delimiter, + UnicodeString output[], int32_t outputLength); - void _testOneLine(const UnicodeString& line); + void _testOneLine(const char *line); }; #endif