From 62c4c3530d15a54ebde69ae51620f0bdab073c55 Mon Sep 17 00:00:00 2001 From: Michael Ow Date: Wed, 10 Sep 2014 23:23:56 +0000 Subject: [PATCH] ICU-10551 Update test to check non-ignorable code points and fix comments X-SVN-Rev: 36443 --- icu4c/source/common/ucnv_err.c | 32 ++---------- icu4c/source/test/intltest/convtest.cpp | 65 +++++++++++++++++++------ 2 files changed, 52 insertions(+), 45 deletions(-) diff --git a/icu4c/source/common/ucnv_err.c b/icu4c/source/common/ucnv_err.c index 42f31daa0e4..0fb14f06470 100644 --- a/icu4c/source/common/ucnv_err.c +++ b/icu4c/source/common/ucnv_err.c @@ -58,36 +58,10 @@ * To avoid dependency on other code, this list is hard coded here. * When an ignorable code point is found and is unmappable, the default callbacks * will ignore them. - * (c == 0x00AD) || \ (Latin-1 Punctuation and Symbols) - * (c == 0x034F) || \ (Combining Diacritical Marks Grapheme Joiner) - * (c == 0x061C) || \ (Arabic Format Character) - * (c == 0x115F) || \ (Hangul Jamo Old Initial Consonants) - * (c == 0x1160) || \ (Hangul Jamo Medial Vowels) - * (0x17B4 <= c && c <= 0x17B5) || \ (Khmer Inherent Vowels) - * (0x180B <= c && c <= 0x180E) || \ (Mongolian Format Controls) - * (0x200B <= c && c <= 0x200F) || \ (General Punctuation Format Characters) - * (0x202A <= c && c <= 0x202E) || \ (General Punctuation Format Characters) - * (c == 0x2060) || \ (General Punctuation Format Characters) - * (0x2066 <= c && c <= 0x2069) || \ (General Punctuation Format Characters) - * (0x2061 <= c && c <= 0x2064) || \ (General Punctuation Invisible Operators) - * (0x206A <= c && c <= 0x206F) || \ (General Punctuation Deprecated) - * (c == 0x3164) || \ (Hangul Compatibility Jamo) - * (0x0FE00 <= c && c <= 0x0FE0F) || \ (Variation Selectors) - * (c == 0x0FEFF) || \ (Arabic Presentation Forms B) - * (c == 0x0FFA0) || \ (Halfwidth and Fullwidth Forms) - * (0x01BCA0 <= c && c <= 0x01BCA3) || \ (Shorthand Format Controls) - * (0x01D173 <= c && c <= 0x01D17A) || \ (Musical Symbols) - * (c == 0x0E0001) || \ (Tag Identifiers) - * (0x0E0020 <= c && c <= 0x0E007F) || \ (Tag Components) - * (0x0E0100 <= c && c <= 0x0E01EF) || \ (Variation Selectors Supplement) - * (c == 0x2065) || \ (Unassigned) - * (0x0FFF0 <= c && c <= 0x0FFF8) || \ (Unassigned) - * (c == 0x0E0000) || \ (Unassigned) - * (0x0E0002 <= c && c <= 0x0E001F) || \ (Unassigned) - * (0x0E0080 <= c && c <= 0x0E00FF) || \ (Unassigned) - * (0x0E01F0 <= c && c <= 0x0E0FFF) \ (Unassigned) + * For a list of the default ignorable code points, use this link: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3ADI%3A]&g= + * + * This list should be sync with the one in CharsetCallback.java */ - #define IS_DEFAULT_IGNORABLE_CODE_POINT(c) (\ (c == 0x00AD) || \ (c == 0x034F) || \ diff --git a/icu4c/source/test/intltest/convtest.cpp b/icu4c/source/test/intltest/convtest.cpp index 400ea625ae4..6b6dbfd4e50 100644 --- a/icu4c/source/test/intltest/convtest.cpp +++ b/icu4c/source/test/intltest/convtest.cpp @@ -654,38 +654,71 @@ ConversionTest::TestGetUnicodeSet2() { void ConversionTest::TestDefaultIgnorableCallback() { UErrorCode status = U_ZERO_ERROR; - const char *name = "euc-jp-2007"; - const char *pattern = "[:Default_Ignorable_Code_Point:]"; - UnicodeSet *set = new UnicodeSet(pattern, status); + const char *cnv_name = "euc-jp-2007"; + const char *pattern_ignorable = "[:Default_Ignorable_Code_Point:]"; + const char *pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]"; + + UnicodeSet *set_ignorable = new UnicodeSet(pattern_ignorable, status); if (U_FAILURE(status)) { - dataerrln("Unable to create Unicodeset: %s - %s\n", pattern, u_errorName(status)); + dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_ignorable, u_errorName(status)); return; } - UConverter *cnv = cnv_open(name, status); + + UnicodeSet *set_not_ignorable = new UnicodeSet(pattern_not_ignorable, status); if (U_FAILURE(status)) { - errln("Unable to open converter: %s - %s\n", name, u_errorName(status)); + dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_not_ignorable, u_errorName(status)); return; } - // set callback for the converter - ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status); + + UConverter *cnv = cnv_open(cnv_name, status); + if (U_FAILURE(status)) { + dataerrln("Unable to open converter: %s - %s\n", cnv_name, u_errorName(status)); + return; + } + + // set callback for the converter + ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status); UChar32 input[1]; char output[10]; - int size = set->size(); + int32_t outputLength; + + // test default ignorables are ignored + int size = set_ignorable->size(); for (int i = 0; i < size; i++) { status = U_ZERO_ERROR; + outputLength= 0; - input[0] = set->charAt(i); + input[0] = set_ignorable->charAt(i); - ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status); - if (U_FAILURE(status)) { - errln("Callback did not ignore code point: 0x%06X on failed conversion - %s", input[0], u_errorName(status)); + outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status); + if (U_FAILURE(status) || outputLength != 0) { + errln("Ignorable code point: 0x%06X not skipped as expected - %s", input[0], u_errorName(status)); } } - delete set; - ucnv_close(cnv); -} + // test non-ignorables are not ignored + size = set_not_ignorable->size(); + for (int i = 0; i < size; i++) { + status = U_ZERO_ERROR; + outputLength= 0; + + input[0] = set_not_ignorable->charAt(i); + + if (input[0] == 0) { + continue; + } + + outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status); + if (U_FAILURE(status) || outputLength <= 0) { + errln("Non-ignorable code point: 0x%06X skipped unexpectedly - %s", input[0], u_errorName(status)); + } + } + + ucnv_close(cnv); + delete set_not_ignorable; + delete set_ignorable; +} // open testdata or ICU data converter ------------------------------------- ***