mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-15 01:42:37 +00:00
ICU-10551 Update test to check non-ignorable code points and fix comments
X-SVN-Rev: 36443
This commit is contained in:
parent
b00b0d552e
commit
62c4c3530d
2 changed files with 52 additions and 45 deletions
|
@ -58,36 +58,10 @@
|
|||
* To avoid dependency on other code, this list is hard coded here.
|
||||
* When an ignorable code point is found and is unmappable, the default callbacks
|
||||
* will ignore them.
|
||||
* (c == 0x00AD) || \ (Latin-1 Punctuation and Symbols)
|
||||
* (c == 0x034F) || \ (Combining Diacritical Marks Grapheme Joiner)
|
||||
* (c == 0x061C) || \ (Arabic Format Character)
|
||||
* (c == 0x115F) || \ (Hangul Jamo Old Initial Consonants)
|
||||
* (c == 0x1160) || \ (Hangul Jamo Medial Vowels)
|
||||
* (0x17B4 <= c && c <= 0x17B5) || \ (Khmer Inherent Vowels)
|
||||
* (0x180B <= c && c <= 0x180E) || \ (Mongolian Format Controls)
|
||||
* (0x200B <= c && c <= 0x200F) || \ (General Punctuation Format Characters)
|
||||
* (0x202A <= c && c <= 0x202E) || \ (General Punctuation Format Characters)
|
||||
* (c == 0x2060) || \ (General Punctuation Format Characters)
|
||||
* (0x2066 <= c && c <= 0x2069) || \ (General Punctuation Format Characters)
|
||||
* (0x2061 <= c && c <= 0x2064) || \ (General Punctuation Invisible Operators)
|
||||
* (0x206A <= c && c <= 0x206F) || \ (General Punctuation Deprecated)
|
||||
* (c == 0x3164) || \ (Hangul Compatibility Jamo)
|
||||
* (0x0FE00 <= c && c <= 0x0FE0F) || \ (Variation Selectors)
|
||||
* (c == 0x0FEFF) || \ (Arabic Presentation Forms B)
|
||||
* (c == 0x0FFA0) || \ (Halfwidth and Fullwidth Forms)
|
||||
* (0x01BCA0 <= c && c <= 0x01BCA3) || \ (Shorthand Format Controls)
|
||||
* (0x01D173 <= c && c <= 0x01D17A) || \ (Musical Symbols)
|
||||
* (c == 0x0E0001) || \ (Tag Identifiers)
|
||||
* (0x0E0020 <= c && c <= 0x0E007F) || \ (Tag Components)
|
||||
* (0x0E0100 <= c && c <= 0x0E01EF) || \ (Variation Selectors Supplement)
|
||||
* (c == 0x2065) || \ (Unassigned)
|
||||
* (0x0FFF0 <= c && c <= 0x0FFF8) || \ (Unassigned)
|
||||
* (c == 0x0E0000) || \ (Unassigned)
|
||||
* (0x0E0002 <= c && c <= 0x0E001F) || \ (Unassigned)
|
||||
* (0x0E0080 <= c && c <= 0x0E00FF) || \ (Unassigned)
|
||||
* (0x0E01F0 <= c && c <= 0x0E0FFF) \ (Unassigned)
|
||||
* For a list of the default ignorable code points, use this link: http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3ADI%3A]&g=
|
||||
*
|
||||
* This list should be sync with the one in CharsetCallback.java
|
||||
*/
|
||||
|
||||
#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) (\
|
||||
(c == 0x00AD) || \
|
||||
(c == 0x034F) || \
|
||||
|
|
|
@ -654,38 +654,71 @@ ConversionTest::TestGetUnicodeSet2() {
|
|||
void
|
||||
ConversionTest::TestDefaultIgnorableCallback() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
const char *name = "euc-jp-2007";
|
||||
const char *pattern = "[:Default_Ignorable_Code_Point:]";
|
||||
UnicodeSet *set = new UnicodeSet(pattern, status);
|
||||
const char *cnv_name = "euc-jp-2007";
|
||||
const char *pattern_ignorable = "[:Default_Ignorable_Code_Point:]";
|
||||
const char *pattern_not_ignorable = "[:^Default_Ignorable_Code_Point:]";
|
||||
|
||||
UnicodeSet *set_ignorable = new UnicodeSet(pattern_ignorable, status);
|
||||
if (U_FAILURE(status)) {
|
||||
dataerrln("Unable to create Unicodeset: %s - %s\n", pattern, u_errorName(status));
|
||||
dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_ignorable, u_errorName(status));
|
||||
return;
|
||||
}
|
||||
UConverter *cnv = cnv_open(name, status);
|
||||
|
||||
UnicodeSet *set_not_ignorable = new UnicodeSet(pattern_not_ignorable, status);
|
||||
if (U_FAILURE(status)) {
|
||||
errln("Unable to open converter: %s - %s\n", name, u_errorName(status));
|
||||
dataerrln("Unable to create Unicodeset: %s - %s\n", pattern_not_ignorable, u_errorName(status));
|
||||
return;
|
||||
}
|
||||
// set callback for the converter
|
||||
ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
|
||||
|
||||
UConverter *cnv = cnv_open(cnv_name, status);
|
||||
if (U_FAILURE(status)) {
|
||||
dataerrln("Unable to open converter: %s - %s\n", cnv_name, u_errorName(status));
|
||||
return;
|
||||
}
|
||||
|
||||
// set callback for the converter
|
||||
ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_SUBSTITUTE, NULL, NULL, NULL, &status);
|
||||
|
||||
UChar32 input[1];
|
||||
char output[10];
|
||||
int size = set->size();
|
||||
int32_t outputLength;
|
||||
|
||||
// test default ignorables are ignored
|
||||
int size = set_ignorable->size();
|
||||
for (int i = 0; i < size; i++) {
|
||||
status = U_ZERO_ERROR;
|
||||
outputLength= 0;
|
||||
|
||||
input[0] = set->charAt(i);
|
||||
input[0] = set_ignorable->charAt(i);
|
||||
|
||||
ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
errln("Callback did not ignore code point: 0x%06X on failed conversion - %s", input[0], u_errorName(status));
|
||||
outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
|
||||
if (U_FAILURE(status) || outputLength != 0) {
|
||||
errln("Ignorable code point: 0x%06X not skipped as expected - %s", input[0], u_errorName(status));
|
||||
}
|
||||
}
|
||||
delete set;
|
||||
ucnv_close(cnv);
|
||||
}
|
||||
|
||||
// test non-ignorables are not ignored
|
||||
size = set_not_ignorable->size();
|
||||
for (int i = 0; i < size; i++) {
|
||||
status = U_ZERO_ERROR;
|
||||
outputLength= 0;
|
||||
|
||||
input[0] = set_not_ignorable->charAt(i);
|
||||
|
||||
if (input[0] == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
outputLength = ucnv_fromUChars(cnv, output, 10, UnicodeString::fromUTF32(input, 1).getTerminatedBuffer(), -1, &status);
|
||||
if (U_FAILURE(status) || outputLength <= 0) {
|
||||
errln("Non-ignorable code point: 0x%06X skipped unexpectedly - %s", input[0], u_errorName(status));
|
||||
}
|
||||
}
|
||||
|
||||
ucnv_close(cnv);
|
||||
delete set_not_ignorable;
|
||||
delete set_ignorable;
|
||||
}
|
||||
|
||||
// open testdata or ICU data converter ------------------------------------- ***
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue