mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 22:15:31 +00:00
ICU-13630 switch from IdnaTest.txt to IdnaTestV2.txt new in Unicode 11 see Unicode PRI 375
X-SVN-Rev: 41294
This commit is contained in:
parent
c9680037cc
commit
a4e66ded6d
8 changed files with 12773 additions and 15782 deletions
|
@ -44,7 +44,7 @@ public:
|
|||
|
||||
void checkIdnaTestResult(const char *line, const char *type,
|
||||
const UnicodeString &expected, const UnicodeString &result,
|
||||
const IDNAInfo &info);
|
||||
const char *status, const IDNAInfo &info);
|
||||
void idnaTestOneLine(char *fields[][2], UErrorCode &errorCode);
|
||||
|
||||
private:
|
||||
|
@ -896,7 +896,7 @@ void UTS46Test::TestSomeCases() {
|
|||
|
||||
namespace {
|
||||
|
||||
const int32_t kNumFields = 4; // Will need 5 when we read NV8 from the optional fifth column.
|
||||
const int32_t kNumFields = 7;
|
||||
|
||||
void U_CALLCONV
|
||||
idnaTestLineFn(void *context,
|
||||
|
@ -905,17 +905,43 @@ idnaTestLineFn(void *context,
|
|||
reinterpret_cast<UTS46Test *>(context)->idnaTestOneLine(fields, *pErrorCode);
|
||||
}
|
||||
|
||||
UnicodeString s16FromField(char *(&field)[2]) {
|
||||
int32_t length = (int32_t)(field[1] - field[0]);
|
||||
return UnicodeString::fromUTF8(StringPiece(field[0], length)).trim().unescape();
|
||||
}
|
||||
|
||||
std::string statusFromField(char *(&field)[2]) {
|
||||
const char *start = u_skipWhitespace(field[0]);
|
||||
std::string status;
|
||||
if (start != field[1]) {
|
||||
int32_t length = (int32_t)(field[1] - start);
|
||||
while (length > 0 && (start[length - 1] == u' ' || start[length - 1] == u'\t')) {
|
||||
--length;
|
||||
}
|
||||
status.assign(start, length);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void UTS46Test::checkIdnaTestResult(const char *line, const char *type,
|
||||
const UnicodeString &expected, const UnicodeString &result,
|
||||
const IDNAInfo &info) {
|
||||
const char *status, const IDNAInfo &info) {
|
||||
// An error in toUnicode or toASCII is indicated by a value in square brackets,
|
||||
// such as "[B5 B6]".
|
||||
UBool expectedHasErrors = !expected.isEmpty() && expected[0] == u'[';
|
||||
UBool expectedHasErrors = FALSE;
|
||||
if (*status != 0) {
|
||||
if (*status != u'[') {
|
||||
errln("%s status field does not start with '[': %s\n %s", type, status, line);
|
||||
}
|
||||
if (strcmp(status, u8"[]") != 0) {
|
||||
expectedHasErrors = TRUE;
|
||||
}
|
||||
}
|
||||
if (expectedHasErrors != info.hasErrors()) {
|
||||
errln("%s expected errors %d != %d = actual has errors: %04lx\n %s",
|
||||
type, expectedHasErrors, info.hasErrors(), (long)info.getErrors(), line);
|
||||
errln("%s expected errors %s %d != %d = actual has errors: %04lx\n %s",
|
||||
type, status, expectedHasErrors, info.hasErrors(), (long)info.getErrors(), line);
|
||||
}
|
||||
if (!expectedHasErrors && expected != result) {
|
||||
errln("%s expected != actual\n %s", type, line);
|
||||
|
@ -925,57 +951,68 @@ void UTS46Test::checkIdnaTestResult(const char *line, const char *type,
|
|||
}
|
||||
|
||||
void UTS46Test::idnaTestOneLine(char *fields[][2], UErrorCode &errorCode) {
|
||||
// Column 1: type - T for transitional, N for nontransitional, B for both
|
||||
const char *typePtr = u_skipWhitespace(fields[0][0]);
|
||||
const char *limit;
|
||||
char typeChar;
|
||||
if (typePtr == fields[0][1] ||
|
||||
((typeChar = *typePtr) != 'B' && typeChar != 'N' && typeChar != 'T') ||
|
||||
(limit = u_skipWhitespace(typePtr + 1)) != fields[0][1]) {
|
||||
errln("empty or unknown type field: %s", fields[0][0]);
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
// IdnaTestV2.txt (since Unicode 11)
|
||||
// Column 1: source
|
||||
// The source string to be tested
|
||||
UnicodeString source = s16FromField(fields[0]);
|
||||
|
||||
// Column 2: source - the source string to be tested
|
||||
int32_t length = (int32_t)(fields[1][1] - fields[1][0]);
|
||||
UnicodeString source16 = UnicodeString::fromUTF8(StringPiece(fields[1][0], length)).
|
||||
trim().unescape();
|
||||
|
||||
// Column 3: toUnicode - the result of applying toUnicode to the source.
|
||||
// Column 2: toUnicode
|
||||
// The result of applying toUnicode to the source, with Transitional_Processing=false.
|
||||
// A blank value means the same as the source value.
|
||||
length = (int32_t)(fields[2][1] - fields[2][0]);
|
||||
UnicodeString unicode16 = UnicodeString::fromUTF8(StringPiece(fields[2][0], length)).
|
||||
trim().unescape();
|
||||
if (unicode16.isEmpty()) {
|
||||
unicode16 = source16;
|
||||
UnicodeString toUnicode = s16FromField(fields[1]);
|
||||
if (toUnicode.isEmpty()) {
|
||||
toUnicode = source;
|
||||
}
|
||||
|
||||
// Column 4: toASCII - the result of applying toASCII to the source, using the specified type.
|
||||
// Column 3: toUnicodeStatus
|
||||
// A set of status codes, each corresponding to a particular test.
|
||||
// A blank value means [].
|
||||
std::string toUnicodeStatus = statusFromField(fields[2]);
|
||||
|
||||
// Column 4: toAsciiN
|
||||
// The result of applying toASCII to the source, with Transitional_Processing=false.
|
||||
// A blank value means the same as the toUnicode value.
|
||||
length = (int32_t)(fields[3][1] - fields[3][0]);
|
||||
UnicodeString ascii16 = UnicodeString::fromUTF8(StringPiece(fields[3][0], length)).
|
||||
trim().unescape();
|
||||
if (ascii16.isEmpty()) {
|
||||
ascii16 = unicode16;
|
||||
UnicodeString toAsciiN = s16FromField(fields[3]);
|
||||
if (toAsciiN.isEmpty()) {
|
||||
toAsciiN = toUnicode;
|
||||
}
|
||||
|
||||
// Column 5: NV8 - present if the toUnicode value would not be a valid domain name under IDNA2008. Not a normative field.
|
||||
// Ignored as long as we do not implement and test vanilla IDNA2008.
|
||||
// Column 5: toAsciiNStatus
|
||||
// A set of status codes, each corresponding to a particular test.
|
||||
// A blank value means the same as the toUnicodeStatus value.
|
||||
std::string toAsciiNStatus = statusFromField(fields[4]);
|
||||
if (toAsciiNStatus.empty()) {
|
||||
toAsciiNStatus = toUnicodeStatus;
|
||||
}
|
||||
|
||||
// Column 6: toAsciiT
|
||||
// The result of applying toASCII to the source, with Transitional_Processing=true.
|
||||
// A blank value means the same as the toAsciiN value.
|
||||
UnicodeString toAsciiT = s16FromField(fields[5]);
|
||||
if (toAsciiT.isEmpty()) {
|
||||
toAsciiT = toAsciiN;
|
||||
}
|
||||
|
||||
// Column 7: toAsciiTStatus
|
||||
// A set of status codes, each corresponding to a particular test.
|
||||
// A blank value means the same as the toAsciiNStatus value.
|
||||
std::string toAsciiTStatus = statusFromField(fields[6]);
|
||||
if (toAsciiTStatus.empty()) {
|
||||
toAsciiTStatus = toAsciiNStatus;
|
||||
}
|
||||
|
||||
// ToASCII/ToUnicode, transitional/nontransitional
|
||||
UnicodeString uN, aN, aT;
|
||||
IDNAInfo uNInfo, aNInfo, aTInfo;
|
||||
nontrans->nameToUnicode(source16, uN, uNInfo, errorCode);
|
||||
checkIdnaTestResult(fields[0][0], "toUnicodeNontrans", unicode16, uN, uNInfo);
|
||||
if (typeChar == 'T' || typeChar == 'B') {
|
||||
trans->nameToASCII(source16, aT, aTInfo, errorCode);
|
||||
checkIdnaTestResult(fields[0][0], "toASCIITrans", ascii16, aT, aTInfo);
|
||||
}
|
||||
if (typeChar == 'N' || typeChar == 'B') {
|
||||
nontrans->nameToASCII(source16, aN, aNInfo, errorCode);
|
||||
checkIdnaTestResult(fields[0][0], "toASCIINontrans", ascii16, aN, aNInfo);
|
||||
}
|
||||
nontrans->nameToUnicode(source, uN, uNInfo, errorCode);
|
||||
checkIdnaTestResult(fields[0][0], "toUnicodeNontrans", toUnicode, uN,
|
||||
toUnicodeStatus.c_str(), uNInfo);
|
||||
nontrans->nameToASCII(source, aN, aNInfo, errorCode);
|
||||
checkIdnaTestResult(fields[0][0], "toASCIINontrans", toAsciiN, aN,
|
||||
toAsciiNStatus.c_str(), aNInfo);
|
||||
trans->nameToASCII(source, aT, aTInfo, errorCode);
|
||||
checkIdnaTestResult(fields[0][0], "toASCIITrans", toAsciiT, aT,
|
||||
toAsciiTStatus.c_str(), aTInfo);
|
||||
}
|
||||
|
||||
// TODO: de-duplicate
|
||||
|
@ -990,7 +1027,7 @@ void UTS46Test::IdnaTest() {
|
|||
return;
|
||||
}
|
||||
CharString path(sourceTestDataPath, errorCode);
|
||||
path.appendPathPart("IdnaTest.txt", errorCode);
|
||||
path.appendPathPart("IdnaTestV2.txt", errorCode);
|
||||
LocalStdioFilePointer idnaTestFile(fopen(path.data(), "r"));
|
||||
if (idnaTestFile.isNull()) {
|
||||
errln("unable to open %s", path.data());
|
||||
|
|
7848
icu4c/source/test/testdata/IdnaTest.txt
vendored
7848
icu4c/source/test/testdata/IdnaTest.txt
vendored
File diff suppressed because it is too large
Load diff
6310
icu4c/source/test/testdata/IdnaTestV2.txt
vendored
Normal file
6310
icu4c/source/test/testdata/IdnaTestV2.txt
vendored
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -728,14 +728,23 @@ public class UTS46Test extends TestFmwk {
|
|||
}
|
||||
|
||||
private void checkIdnaTestResult(String line, String type,
|
||||
String expected, CharSequence result, IDNA.Info info) {
|
||||
String expected, CharSequence result, String status, IDNA.Info info) {
|
||||
// An error in toUnicode or toASCII is indicated by a value in square brackets,
|
||||
// such as "[B5 B6]".
|
||||
boolean expectedHasErrors = !expected.isEmpty() && expected.charAt(0) == '[';
|
||||
boolean expectedHasErrors = false;
|
||||
if (!status.isEmpty()) {
|
||||
if (status.charAt(0) != '[') {
|
||||
errln(String.format("%s status field does not start with '[': %s\n %s",
|
||||
type, status, line));
|
||||
}
|
||||
if (!status.equals("[]")) {
|
||||
expectedHasErrors = true;
|
||||
}
|
||||
}
|
||||
if (expectedHasErrors != info.hasErrors()) {
|
||||
errln(String.format(
|
||||
"%s expected errors %b != %b = actual has errors: %s\n %s",
|
||||
type, expectedHasErrors, info.hasErrors(), info.getErrors(), line));
|
||||
"%s expected errors %s %b != %b = actual has errors: %s\n %s",
|
||||
type, status, expectedHasErrors, info.hasErrors(), info.getErrors(), line));
|
||||
}
|
||||
if (!expectedHasErrors && !UTF16Plus.equal(expected, result)) {
|
||||
errln(String.format("%s expected != actual\n %s", type, line));
|
||||
|
@ -746,7 +755,7 @@ public class UTS46Test extends TestFmwk {
|
|||
|
||||
@Test
|
||||
public void IdnaTest() throws IOException {
|
||||
BufferedReader idnaTestFile = TestUtil.getDataReader("unicode/IdnaTest.txt", "UTF-8");
|
||||
BufferedReader idnaTestFile = TestUtil.getDataReader("unicode/IdnaTestV2.txt", "UTF-8");
|
||||
Pattern semi = Pattern.compile(";");
|
||||
try {
|
||||
String line;
|
||||
|
@ -761,48 +770,65 @@ public class UTS46Test extends TestFmwk {
|
|||
continue; // Skip empty and comment-only lines.
|
||||
}
|
||||
|
||||
// Column 1: type - T for transitional, N for nontransitional, B for both
|
||||
String type = fields[0].trim();
|
||||
char typeChar;
|
||||
if (type.length() != 1 ||
|
||||
((typeChar = type.charAt(0)) != 'B' && typeChar != 'N' && typeChar != 'T')) {
|
||||
errln("empty or unknown type field: " + line);
|
||||
return;
|
||||
}
|
||||
// IdnaTestV2.txt (since Unicode 11)
|
||||
// Column 1: source
|
||||
// The source string to be tested
|
||||
String source = Utility.unescape(fields[0].trim());
|
||||
|
||||
// Column 2: source - the source string to be tested
|
||||
String source16 = Utility.unescape(fields[1].trim());
|
||||
|
||||
// Column 3: toUnicode - the result of applying toUnicode to the source.
|
||||
// Column 2: toUnicode
|
||||
// The result of applying toUnicode to the source, with Transitional_Processing=false.
|
||||
// A blank value means the same as the source value.
|
||||
String unicode16 = Utility.unescape(fields[2].trim());
|
||||
if (unicode16.isEmpty()) {
|
||||
unicode16 = source16;
|
||||
String toUnicode = Utility.unescape(fields[1].trim());
|
||||
if (toUnicode.isEmpty()) {
|
||||
toUnicode = source;
|
||||
}
|
||||
|
||||
// Column 4: toASCII - the result of applying toASCII to the source, using the specified type.
|
||||
// Column 3: toUnicodeStatus
|
||||
// A set of status codes, each corresponding to a particular test.
|
||||
// A blank value means [].
|
||||
String toUnicodeStatus = fields[2].trim();
|
||||
|
||||
// Column 4: toAsciiN
|
||||
// The result of applying toASCII to the source, with Transitional_Processing=false.
|
||||
// A blank value means the same as the toUnicode value.
|
||||
String ascii16 = Utility.unescape(fields[3].trim());
|
||||
if (ascii16.isEmpty()) {
|
||||
ascii16 = unicode16;
|
||||
String toAsciiN = Utility.unescape(fields[3].trim());
|
||||
if (toAsciiN.isEmpty()) {
|
||||
toAsciiN = toUnicode;
|
||||
}
|
||||
|
||||
// Column 5: NV8 - present if the toUnicode value would not be a valid domain name under IDNA2008. Not a normative field.
|
||||
// Ignored as long as we do not implement and test vanilla IDNA2008.
|
||||
// Column 5: toAsciiNStatus
|
||||
// A set of status codes, each corresponding to a particular test.
|
||||
// A blank value means the same as the toUnicodeStatus value.
|
||||
String toAsciiNStatus = fields[4].trim();
|
||||
if (toAsciiNStatus.isEmpty()) {
|
||||
toAsciiNStatus = toUnicodeStatus;
|
||||
}
|
||||
|
||||
// Column 6: toAsciiT
|
||||
// The result of applying toASCII to the source, with Transitional_Processing=true.
|
||||
// A blank value means the same as the toAsciiN value.
|
||||
String toAsciiT = Utility.unescape(fields[5].trim());
|
||||
if (toAsciiT.isEmpty()) {
|
||||
toAsciiT = toAsciiN;
|
||||
}
|
||||
|
||||
// Column 7: toAsciiTStatus
|
||||
// A set of status codes, each corresponding to a particular test.
|
||||
// A blank value means the same as the toAsciiNStatus value.
|
||||
String toAsciiTStatus = fields[6].trim();
|
||||
if (toAsciiTStatus.isEmpty()) {
|
||||
toAsciiTStatus = toAsciiNStatus;
|
||||
}
|
||||
|
||||
// ToASCII/ToUnicode, transitional/nontransitional
|
||||
StringBuilder uN, aN, aT;
|
||||
IDNA.Info uNInfo, aNInfo, aTInfo;
|
||||
nontrans.nameToUnicode(source16, uN = new StringBuilder(), uNInfo = new IDNA.Info());
|
||||
checkIdnaTestResult(line, "toUnicodeNontrans", unicode16, uN, uNInfo);
|
||||
if (typeChar == 'T' || typeChar == 'B') {
|
||||
trans.nameToASCII(source16, aT = new StringBuilder(), aTInfo = new IDNA.Info());
|
||||
checkIdnaTestResult(line, "toASCIITrans", ascii16, aT, aTInfo);
|
||||
}
|
||||
if (typeChar == 'N' || typeChar == 'B') {
|
||||
nontrans.nameToASCII(source16, aN = new StringBuilder(), aNInfo = new IDNA.Info());
|
||||
checkIdnaTestResult(line, "toASCIINontrans", ascii16, aN, aNInfo);
|
||||
}
|
||||
nontrans.nameToUnicode(source, uN = new StringBuilder(), uNInfo = new IDNA.Info());
|
||||
checkIdnaTestResult(line, "toUnicodeNontrans", toUnicode, uN, toUnicodeStatus, uNInfo);
|
||||
nontrans.nameToASCII(source, aN = new StringBuilder(), aNInfo = new IDNA.Info());
|
||||
checkIdnaTestResult(line, "toASCIINontrans", toAsciiN, aN, toAsciiNStatus, aNInfo);
|
||||
trans.nameToASCII(source, aT = new StringBuilder(), aTInfo = new IDNA.Info());
|
||||
checkIdnaTestResult(line, "toASCIITrans", toAsciiT, aT, toAsciiTStatus, aTInfo);
|
||||
}
|
||||
} finally {
|
||||
idnaTestFile.close();
|
||||
|
|
|
@ -26,15 +26,19 @@ public final class TestUtil {
|
|||
* Return an input stream on the data file at path 'name' rooted at the data path
|
||||
*/
|
||||
public static final InputStream getDataStream(String name) throws IOException {
|
||||
String path = DATA_PATH + name;
|
||||
InputStream is = null;
|
||||
try {
|
||||
is = TestUtil.class.getResourceAsStream(DATA_PATH + name);
|
||||
is = TestUtil.class.getResourceAsStream(path);
|
||||
} catch (Throwable t) {
|
||||
IOException ex =
|
||||
new IOException("data resource '" + name + "' not found");
|
||||
new IOException("data resource '" + path + "' not found");
|
||||
ex.initCause(t);
|
||||
throw ex;
|
||||
}
|
||||
if (is == null) {
|
||||
throw new IOException("data resource '" + path + "' not found");
|
||||
}
|
||||
return is;
|
||||
}
|
||||
|
||||
|
|
|
@ -1603,7 +1603,7 @@ _files = {
|
|||
"emoji-data.txt": (DontCopy, ParseNamedProperties),
|
||||
"GraphemeBreakProperty.txt": (DontCopy, ParseGraphemeBreakProperty),
|
||||
"GraphemeBreakTest.txt": (CopyOnly, "testdata"),
|
||||
"IdnaTest.txt": (CopyOnly, "testdata"),
|
||||
"IdnaTestV2.txt": (CopyOnly, "testdata"),
|
||||
"IndicPositionalCategory.txt": (DontCopy, ParseIndicPositionalCategory),
|
||||
"IndicSyllabicCategory.txt": (DontCopy, ParseIndicSyllabicCategory),
|
||||
"LineBreak.txt": (DontCopy, ParseLineBreak),
|
||||
|
|
Loading…
Add table
Reference in a new issue