ICU-10551 Update test to check non-ignorable code points and fix comments

X-SVN-Rev: 36443
This commit is contained in:
Michael Ow 2014-09-10 23:23:56 +00:00
parent b00b0d552e
commit 62c4c3530d
2 changed files with 52 additions and 45 deletions

View file

@ -58,36 +58,10 @@
* To avoid dependency on other code, this list is hard coded here.
* When an ignorable code point is found and is unmappable, the default callbacks
* will ignore them.
* (c == 0x00AD) || \ (Latin-1 Punctuation and Symbols)
* (c == 0x034F) || \ (Combining Diacritical Marks Grapheme Joiner)
* (c == 0x061C) || \ (Arabic Format Character)
* (c == 0x115F) || \ (Hangul Jamo Old Initial Consonants)
* (c == 0x1160) || \ (Hangul Jamo Medial Vowels)
* (0x17B4 <= c && c <= 0x17B5) || \ (Khmer Inherent Vowels)
* (0x180B <= c && c <= 0x180E) || \ (Mongolian Format Controls)
* (0x200B <= c && c <= 0x200F) || \ (General Punctuation Format Characters)
* (0x202A <= c && c <= 0x202E) || \ (General Punctuation Format Characters)
* (c == 0x2060) || \ (General Punctuation Format Characters)
* (0x2066 <= c && c <= 0x2069) || \ (General Punctuation Format Characters)
* (0x2061 <= c && c <= 0x2064) || \ (General Punctuation Invisible Operators)
* (0x206A <= c && c <= 0x206F) || \ (General Punctuation Deprecated)
* (c == 0x3164) || \ (Hangul Compatibility Jamo)
* (0x0FE00 <= c && c <= 0x0FE0F) || \ (Variation Selectors)
* (c == 0x0FEFF) || \ (Arabic Presentation Forms B)
* (c == 0x0FFA0) || \ (Halfwidth and Fullwidth Forms)
* (0x01BCA0 <= c && c <= 0x01BCA3) || \ (Shorthand Format Controls)
* (0x01D173 <= c && c <= 0x01D17A) || \ (Musical Symbols)
* (c == 0x0E0001) || \ (Tag Identifiers)
* (0x0E0020 <= c && c <= 0x0E007F) || \ (Tag Components)
* (0x0E0100 <= c && c <= 0x0E01EF) || \ (Variation Selectors Supplement)
* (c == 0x2065) || \ (Unassigned)
* (0x0FFF0 <= c && c <= 0x0FFF8) || \ (Unassigned)
* (c == 0x0E0000) || \ (Unassigned)
* (0x0E0002 <= c && c <= 0x0E001F) || \ (Unassigned)
* (0x0E0080 <= c && c <= 0x0E00FF) || \ (Unassigned)
* (0x0E01F0 <= c && c <= 0x0E0FFF) \ (Unassigned)
* For a list of the default ignorable code points, use this link: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3ADI%3A]&g=
*
* This list should be sync with the one in CharsetCallback.java
*/
#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) (\
(c == 0x00AD) || \
(c == 0x034F) || \

View file

@ -654,38 +654,71 @@ ConversionTest::TestGetUnicodeSet2() {
void
ConversionTest::TestDefaultIgnorableCallback() {
UErrorCode status = U_ZERO_ERROR;
const char *name = "euc-jp-2007";
const char *pattern = "[:Default_Ignorable_Code_Point:]";
UnicodeSet *set = new UnicodeSet(pattern, status);
const char *cnv_name = "euc-jp-2007";
const char *pattern_ignorable = "[:Default_Ignorable_Code_Point:]";
const char *pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]";
UnicodeSet *set_ignorable = new UnicodeSet(pattern_ignorable, status);
if (U_FAILURE(status)) {
dataerrln("Unable to create Unicodeset: %s - %s\n", pattern, u_errorName(status));
dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_ignorable, u_errorName(status));
return;
}
UConverter *cnv = cnv_open(name, status);
UnicodeSet *set_not_ignorable = new UnicodeSet(pattern_not_ignorable, status);
if (U_FAILURE(status)) {
errln("Unable to open converter: %s - %s\n", name, u_errorName(status));
dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_not_ignorable, u_errorName(status));
return;
}
// set callback for the converter
ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
UConverter *cnv = cnv_open(cnv_name, status);
if (U_FAILURE(status)) {
dataerrln("Unable to open converter: %s - %s\n", cnv_name, u_errorName(status));
return;
}
// set callback for the converter
ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status);
UChar32 input[1];
char output[10];
int size = set->size();
int32_t outputLength;
// test default ignorables are ignored
int size = set_ignorable->size();
for (int i = 0; i < size; i++) {
status = U_ZERO_ERROR;
outputLength= 0;
input[0] = set->charAt(i);
input[0] = set_ignorable->charAt(i);
ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
if (U_FAILURE(status)) {
errln("Callback did not ignore code point: 0x%06X on failed conversion - %s", input[0], u_errorName(status));
outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
if (U_FAILURE(status) || outputLength != 0) {
errln("Ignorable code point: 0x%06X not skipped as expected - %s", input[0], u_errorName(status));
}
}
delete set;
ucnv_close(cnv);
}
// test non-ignorables are not ignored
size = set_not_ignorable->size();
for (int i = 0; i < size; i++) {
status = U_ZERO_ERROR;
outputLength= 0;
input[0] = set_not_ignorable->charAt(i);
if (input[0] == 0) {
continue;
}
outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
if (U_FAILURE(status) || outputLength <= 0) {
errln("Non-ignorable code point: 0x%06X skipped unexpectedly - %s", input[0], u_errorName(status));
}
}
ucnv_close(cnv);
delete set_not_ignorable;
delete set_ignorable;
}
// open testdata or ICU data converter ------------------------------------- ***