From d394403080eb3dbd4bf214a384615f47c7d05f8c Mon Sep 17 00:00:00 2001 From: Alan Liu Date: Wed, 26 Jul 2000 16:27:18 +0000 Subject: [PATCH] ICU-89 add UTR15 conformance test X-SVN-Rev: 2046 --- icu4c/source/test/intltest/intltest.dsp | 4 + icu4c/source/test/intltest/normconf.cpp | 293 ++++++++++++++++++++++++ icu4c/source/test/intltest/normconf.h | 95 ++++++++ icu4c/source/test/intltest/tscoll.cpp | 10 + 4 files changed, 402 insertions(+) create mode 100644 icu4c/source/test/intltest/normconf.cpp create mode 100644 icu4c/source/test/intltest/normconf.h diff --git a/icu4c/source/test/intltest/intltest.dsp b/icu4c/source/test/intltest/intltest.dsp index f50d3730d37..d044d96697c 100644 --- a/icu4c/source/test/intltest/intltest.dsp +++ b/icu4c/source/test/intltest/intltest.dsp @@ -268,6 +268,10 @@ SOURCE=.\nmfmtrt.cpp # End Source File # Begin Source File +SOURCE=.\normconf.cpp +# End Source File +# Begin Source File + SOURCE=.\numfmtst.cpp # End Source File # Begin Source File diff --git a/icu4c/source/test/intltest/normconf.cpp b/icu4c/source/test/intltest/normconf.cpp new file mode 100644 index 00000000000..5b68b17a4a2 --- /dev/null +++ b/icu4c/source/test/intltest/normconf.cpp @@ -0,0 +1,293 @@ +/* +************************************************************************ +* Copyright (c) 1997-2000, International Business Machines +* Corporation and others. All Rights Reserved. +************************************************************************ +*/ + +#include "normconf.h" +#include "unicode/normlzr.h" +#include "unicode/unicode.h" +#include "cstring.h" +#include "unicode/putil.h" +#include "filestrm.h" + +#define ARRAY_LENGTH(array) (sizeof(array) / sizeof(array[0])) + +#define CASE(id,test) case id: \ + name = #test; \ + if (exec) { \ + logln(#test "---"); \ + logln((UnicodeString)""); \ + test(); \ + } \ + break + +void NormalizerConformanceTest::runIndexedTest(int32_t index, UBool exec, char* &name, char* par) { + switch (index) { + CASE(0,TestConformance); + // CASE(1,TestCase6); + default: name = ""; break; + } +} + +#define FIELD_COUNT 5 + +NormalizerConformanceTest::NormalizerConformanceTest() : + normalizer(UnicodeString("", ""), Normalizer::COMPOSE) {} + +NormalizerConformanceTest::~NormalizerConformanceTest() {} + +/** + * Test the conformance of Normalizer to + * http://www.unicode.org/unicode/reports/tr15/conformance/Draft-TestSuite.txt. + * This file must be located at the path specified as TEST_SUITE_FILE. + */ +void NormalizerConformanceTest::TestConformance(void) { + enum { BUF_SIZE = 1024 }; + char lineBuf[BUF_SIZE]; + UnicodeString fields[FIELD_COUNT]; + UnicodeString buf; + int32_t passCount = 0; + int32_t failCount = 0; + char path[256]; + + // Construct the path to the test suite file + uprv_strcpy(path, u_getDataDirectory()); + uprv_strcat(path, TEST_SUITE_DIR); + uprv_strcat(path, U_FILE_SEP_STRING); + uprv_strcat(path, TEST_SUITE_FILE); + + FileStream *input = T_FileStream_open(path, "r"); + if (input == NULL) { + errln((UnicodeString)"Can't open " TEST_SUITE_FILE); + return; + } + + for (int32_t count = 0;;++count) { + if (T_FileStream_eof(input)) { + break; + } + T_FileStream_readLine(input, lineBuf, sizeof(lineBuf)); + UnicodeString line(lineBuf, ""); + if (line.length() == 0) continue; + + // Expect 5 columns of this format: + // 1E0C;1E0C;0044 0323;1E0C;0044 0323; # + + // Parse out the comment. + if (line.charAt(0) == 0x0023/*'#'*/) continue; + + // Parse out the fields + if (!hexsplit(line, (UChar)0x003B/*';'*/, fields, FIELD_COUNT, buf)) { + errln((UnicodeString)"Unable to parse line " + count); + break; // Syntax error + } + if (checkConformance(fields, line)) { + ++passCount; + } else { + ++failCount; + } + if ((count % 1000) == 999) { + logln((UnicodeString)"Line " + (count+1)); + } + } + + T_FileStream_close(input); + + if (failCount != 0) { + errln((UnicodeString)"Total: " + failCount + " lines failed, " + + passCount + " lines passed"); + } else { + logln((UnicodeString)"Total: " + passCount + " lines passed"); + } +} + +/** + * Verify the conformance of the given line of the Unicode + * normalization (UTR 15) test suite file. For each line, + * there are five columns, corresponding to field[0]..field[4]. + * + * The following invariants must be true for all conformant implementations + * c2 == NFC(c1) == NFC(c2) == NFC(c3) + * c3 == NFD(c1) == NFD(c2) == NFD(c3) + * c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5) + * c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5) + * + * @param field the 5 columns + * @param line the source line from the test suite file + * @return true if the test passes + */ +UBool NormalizerConformanceTest::checkConformance(const UnicodeString* field, + const UnicodeString& line) { + UBool pass = TRUE; + UErrorCode status = U_ZERO_ERROR; + UnicodeString out; + for (int32_t i=0; i 0) { + for (ch = normalizer.first(); ch != Normalizer::DONE; + ch = normalizer.next()) { + result.append((UChar)ch); + } + } else { + for (ch = normalizer.last(); ch != Normalizer::DONE; + ch = normalizer.previous()) { + result.insert(0, (UChar)ch); + } + } +} + +/** + * @param op name of normalization form, e.g., "KC" + * @param s string being normalized + * @param got value received + * @param exp expected value + * @param msg description of this test + * @param return true if got == exp + */ +UBool NormalizerConformanceTest::assertEqual(const UnicodeString& op, + const UnicodeString& s, + const UnicodeString& got, + const UnicodeString& exp, + const UnicodeString& msg) { + if (exp == got) return TRUE; + errln((UnicodeString)" " + msg + ") " + op + "(" + + prettify(s) + ")=" + prettify(got) + + ", exp. " + prettify(exp)); + return FALSE; +} + +/** + * Parse 4 hex digits at pos. + */ +static UChar parseInt(const UnicodeString& s, int32_t pos) { + UChar value = 0; + int32_t limit = pos+4; + while (pos < limit) { + int8_t digit = Unicode::digit(s.charAt(pos++), 16); + if (digit < 0) { + return -1; // Bogus hex digit -- shouldn't happen + } + value = (value << 4) | digit; + } + return value; +} + +/** + * Split a string into pieces based on the given delimiter + * character. Then, parse the resultant fields from hex into + * characters. That is, "0040 0400;0C00;0899" -> new String[] { + * "\u0040\u0400", "\u0C00", "\u0899" }. The output is assumed to + * be of the proper length already, and exactly output.length + * fields are parsed. If there are too few an exception is + * thrown. If there are too many the extras are ignored. + * + * @param buf scratch buffer + * @return FALSE upon failure + */ +UBool NormalizerConformanceTest::hexsplit(const UnicodeString& s, UChar delimiter, + UnicodeString* output, int32_t outputLength, + UnicodeString& buf) { + int32_t i; + int32_t pos = 0; + for (i=0; i delim) { + errln((UnicodeString)"Premature eol in " + s); + return FALSE; + } else { + UChar hex = parseInt(s, pos); + buf.append(hex); + pos += 4; + } + } + if (buf.length() < 1) { + errln((UnicodeString)"Empty field " + i + " in " + s); + return FALSE; + } + output[i] = buf; + ++pos; // Skip over delim + } + return TRUE; +} + +// Specific tests for debugging. These are generally failures taken from +// the conformance file, but culled out to make debugging easier. + +void NormalizerConformanceTest::TestCase6(void) { + _testOneLine("0385;0385;00A8 0301;0020 0308 0301;0020 0308 0301;"); +} + +void NormalizerConformanceTest::_testOneLine(const UnicodeString& line) { + UnicodeString fields[FIELD_COUNT]; + UnicodeString buf; + if (!hexsplit(line, (UChar)0x003B/*';'*/, fields, FIELD_COUNT, buf)) { + errln((UnicodeString)"Unable to parse line " + line); + } else { + checkConformance(fields, line); + } +} diff --git a/icu4c/source/test/intltest/normconf.h b/icu4c/source/test/intltest/normconf.h new file mode 100644 index 00000000000..f3806c5df3c --- /dev/null +++ b/icu4c/source/test/intltest/normconf.h @@ -0,0 +1,95 @@ +/* +************************************************************************ +* Copyright (c) 1997-2000, International Business Machines +* Corporation and others. All Rights Reserved. +************************************************************************ +*/ + +#ifndef _NORMCONF +#define _NORMCONF + +#include "unicode/utypes.h" +#include "unicode/normlzr.h" +#include "intltest.h" + +#define TEST_SUITE_DIR "unidata" +#define TEST_SUITE_FILE "Draft-TestSuite.txt" + +class NormalizerConformanceTest : public IntlTest { + Normalizer normalizer; + + public: + NormalizerConformanceTest(); + ~NormalizerConformanceTest(); + + void runIndexedTest(int32_t index, UBool exec, char* &name, char* par=NULL); + + /** + * Test the conformance of Normalizer to + * http://www.unicode.org/unicode/reports/tr15/conformance/Draft-TestSuite.txt. + * This file must be located at the path specified as TEST_SUITE_FILE. + */ + void TestConformance(void); + + // Specific tests for debugging. These are generally failures taken from + // the conformance file, but culled out to make debugging easier. + void TestCase6(void); + + private: + /** + * Verify the conformance of the given line of the Unicode + * normalization (UTR 15) test suite file. For each line, + * there are five columns, corresponding to field[0]..field[4]. + * + * The following invariants must be true for all conformant implementations + * c2 == NFC(c1) == NFC(c2) == NFC(c3) + * c3 == NFD(c1) == NFD(c2) == NFD(c3) + * c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5) + * c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5) + * + * @param field the 5 columns + * @param line the source line from the test suite file + * @return true if the test passes + */ + UBool checkConformance(const UnicodeString* field, + const UnicodeString& line); + + void iterativeNorm(const UnicodeString& str, + Normalizer::EMode mode, + UnicodeString& result, + int8_t dir); + + /** + * @param op name of normalization form, e.g., "KC" + * @param s string being normalized + * @param got value received + * @param exp expected value + * @param msg description of this test + * @param return true if got == exp + */ + UBool assertEqual(const UnicodeString& op, + const UnicodeString& s, + const UnicodeString& got, + const UnicodeString& exp, + const UnicodeString& msg); + + /** + * Split a string into pieces based on the given delimiter + * character. Then, parse the resultant fields from hex into + * characters. That is, "0040 0400;0C00;0899" -> new String[] { + * "\u0040\u0400", "\u0C00", "\u0899" }. The output is assumed to + * be of the proper length already, and exactly output.length + * fields are parsed. If there are too few an exception is + * thrown. If there are too many the extras are ignored. + * + * @param buf scratch buffer + * @return FALSE upon failure + */ + UBool hexsplit(const UnicodeString& s, UChar delimiter, + UnicodeString* output, int32_t outputLength, + UnicodeString& buf); + + void _testOneLine(const UnicodeString& line); +}; + +#endif diff --git a/icu4c/source/test/intltest/tscoll.cpp b/icu4c/source/test/intltest/tscoll.cpp index f4510ade377..728563edcee 100644 --- a/icu4c/source/test/intltest/tscoll.cpp +++ b/icu4c/source/test/intltest/tscoll.cpp @@ -28,6 +28,7 @@ #include "itercoll.h" //#include "capicoll.h" // CollationCAPITest #include "tstnorm.h" +#include "normconf.h" #include "thcoll.h" void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, char* &name, char* par ) @@ -253,6 +254,15 @@ void IntlTestCollator::runIndexedTest( int32_t index, UBool exec, char* &name, c break; case 16: + name = "NormalizerConformanceTest"; + if (exec) { + logln("NormalizerConformanceTest---"); logln(""); + NormalizerConformanceTest test; + callTest( test, par ); + } + break; + + case 17: name = "CollationThaiTest"; if (exec) { logln("CollationThaiTest---"); logln("");