diff --git a/.ci-builds/.azure-pipelines.yml b/.ci-builds/.azure-pipelines.yml index ed13185c09e..37a4e9c7887 100644 --- a/.ci-builds/.azure-pipelines.yml +++ b/.ci-builds/.azure-pipelines.yml @@ -57,7 +57,11 @@ jobs: vmImage: 'Ubuntu 16.04' steps: - script: | - cd icu4c/source && ICU_DATA_FILTER_FILE=../../.ci-builds/data-filter.json ./runConfigureICU Linux && make -j2 + cd icu4c/source && \ + ICU_DATA_FILTER_FILE=../../.ci-builds/data-filter.json ./runConfigureICU Linux && \ + make -j2 tests && \ + \[ ! -d data/out/build/icudt66l/translit \] && \ + (cd test/intltest && LD_LIBRARY_PATH=../../lib:../../tools/ctestfw ./intltest translit/TransliteratorTest/TestBasicTransliteratorEvenWithoutData) displayName: 'Build with Data Filter' env: CC: clang diff --git a/.ci-builds/data-filter.json b/.ci-builds/data-filter.json index 177478acff2..d2dd74d4bb1 100644 --- a/.ci-builds/data-filter.json +++ b/.ci-builds/data-filter.json @@ -8,16 +8,18 @@ ] }, // Test mixed feature filter and resource filter +// Exlude translit data so we can run test for ICU-20673 "featureFilters": { "misc": { "whitelist": ["supplementalData"] - } + }, + "translit": "exclude" }, "resourceFilters": [ { "categories": ["misc"], "files": { - "whitelist": ["supplementalData"] + "whitelist": ["supplementalData"] }, "rules": ["+/*"] } @@ -27,8 +29,8 @@ "directory": "$SRC", "replacements": [ { - "src": "translit/Zawgyi_my.txt", - "dest": "translit/Zawgyi_my.txt" + "src": "brkitr/rules/line.txt", + "dest": "brkitr/rules/line_normal.txt" }, "misc/dayPeriods.txt" ] diff --git a/icu4c/source/i18n/translit.cpp b/icu4c/source/i18n/translit.cpp index dae87d06d79..ef44f42aa66 100644 --- a/icu4c/source/i18n/translit.cpp +++ b/icu4c/source/i18n/translit.cpp @@ -1508,28 +1508,35 @@ UBool Transliterator::initializeRegistry(UErrorCode &status) { */ //static const char translit_index[] = "translit_index"; + UErrorCode lstatus = U_ZERO_ERROR; UResourceBundle *bundle, *transIDs, *colBund; - bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/*open default locale*/, &status); - transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &status); + bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/*open default locale*/, &lstatus); + transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &lstatus); const UnicodeString T_PART = UNICODE_STRING_SIMPLE("-t-"); int32_t row, maxRows; - if (U_SUCCESS(status)) { + if (lstatus == U_MEMORY_ALLOCATION_ERROR) { + delete registry; + registry = nullptr; + status = U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + if (U_SUCCESS(lstatus)) { maxRows = ures_getSize(transIDs); for (row = 0; row < maxRows; row++) { - colBund = ures_getByIndex(transIDs, row, 0, &status); - if (U_SUCCESS(status)) { + colBund = ures_getByIndex(transIDs, row, 0, &lstatus); + if (U_SUCCESS(lstatus)) { UnicodeString id(ures_getKey(colBund), -1, US_INV); if(id.indexOf(T_PART) != -1) { ures_close(colBund); continue; } - UResourceBundle* res = ures_getNextResource(colBund, NULL, &status); + UResourceBundle* res = ures_getNextResource(colBund, NULL, &lstatus); const char* typeStr = ures_getKey(res); UChar type; u_charsToUChars(typeStr, &type, 1); - if (U_SUCCESS(status)) { + if (U_SUCCESS(lstatus)) { int32_t len = 0; const UChar *resString; switch (type) { @@ -1539,19 +1546,19 @@ UBool Transliterator::initializeRegistry(UErrorCode &status) { // row[2]=resource, row[3]=direction { - resString = ures_getStringByKey(res, "resource", &len, &status); + resString = ures_getStringByKey(res, "resource", &len, &lstatus); UBool visible = (type == 0x0066 /*f*/); UTransDirection dir = - (ures_getUnicodeStringByKey(res, "direction", &status).charAt(0) == + (ures_getUnicodeStringByKey(res, "direction", &lstatus).charAt(0) == 0x0046 /*F*/) ? UTRANS_FORWARD : UTRANS_REVERSE; - registry->put(id, UnicodeString(TRUE, resString, len), dir, TRUE, visible, status); + registry->put(id, UnicodeString(TRUE, resString, len), dir, TRUE, visible, lstatus); } break; case 0x61: // 'a' // 'alias'; row[2]=createInstance argument - resString = ures_getString(res, &len, &status); - registry->put(id, UnicodeString(TRUE, resString, len), TRUE, TRUE, status); + resString = ures_getString(res, &len, &lstatus); + registry->put(id, UnicodeString(TRUE, resString, len), TRUE, TRUE, lstatus); break; } } diff --git a/icu4c/source/test/intltest/transtst.cpp b/icu4c/source/test/intltest/transtst.cpp index 965fca88ebb..fd7f733a913 100644 --- a/icu4c/source/test/intltest/transtst.cpp +++ b/icu4c/source/test/intltest/transtst.cpp @@ -196,6 +196,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec, TESTCASE(82,TestHalfwidthFullwidth); TESTCASE(83,TestThai); TESTCASE(84,TestAny); + TESTCASE(85,TestBasicTransliteratorEvenWithoutData); default: name = ""; break; } } @@ -1508,6 +1509,81 @@ void TransliteratorTest::TestNormalizationTransliterator() { delete t; } +/** + * Test we can create basic transliterator even without data. + */ +void TransliteratorTest::TestBasicTransliteratorEvenWithoutData() { + const char16_t* TEST_DATA = u"\u0124e\u0301 \uFB01nd x"; + const char16_t* EXPECTED_RESULTS[] = { + u"H\u0302e\u0301 \uFB01nd x", // NFD + u"\u0124\u00E9 \uFB01nd x", // NFC + u"H\u0302e\u0301 find x", // NFKD + u"\u0124\u00E9 find x", // NFKC + u"\u0124e\u0301 \uFB01nd x", // Hex-Any + u"\u0125e\u0301 \uFB01nd x", // Lower + u"\u0124e\uFB01ndx", // [:^L:]Remove + u"H\u0302e\u0301 \uFB01nd ", // NFD; [x]Remove + u"h\u0302e\u0301 find x", // Lower; NFKD; + u"hefindx", // Lower; NFKD; [:^L:]Remove; NFC; + u"\u0124e \uFB01nd x", // [:Nonspacing Mark:] Remove; + u"He \uFB01nd x", // NFD; [:Nonspacing Mark:] Remove; NFC; + // end + 0 + }; + + const char* BASIC_TRANSLITERATOR_ID[] = { + "NFD", + "NFC", + "NFKD", + "NFKC", + "Hex-Any", + "Lower", + "[:^L:]Remove", + "NFD; [x]Remove", + "Lower; NFKD;", + "Lower; NFKD; [:^L:]Remove; NFC;", + "[:Nonspacing Mark:] Remove;", + "NFD; [:Nonspacing Mark:] Remove; NFC;", + // end + 0 + }; + const char* BASIC_TRANSLITERATOR_RULES[] = { + "::Lower; ::NFKD;", + "::Lower; ::NFKD; ::[:^L:]Remove; ::NFC;", + "::[:Nonspacing Mark:] Remove;", + "::NFD; ::[:Nonspacing Mark:] Remove; ::NFC;", + // end + 0 + }; + for (int32_t i=0; BASIC_TRANSLITERATOR_ID[i]; i++) { + UErrorCode status = U_ZERO_ERROR; + UParseError parseError; + std::unique_ptr translit(Transliterator::createInstance( + BASIC_TRANSLITERATOR_ID[i], UTRANS_FORWARD, parseError, status)); + if (translit.get() == nullptr || !U_SUCCESS(status)) { + dataerrln("FAIL: createInstance %s failed", BASIC_TRANSLITERATOR_ID[i]); + } + UnicodeString data(TEST_DATA); + UnicodeString expected(EXPECTED_RESULTS[i]); + translit->transliterate(data); + if (data != expected) { + dataerrln(UnicodeString("FAIL: expected translit(") + + BASIC_TRANSLITERATOR_ID[i] + ") = '" + + EXPECTED_RESULTS[i] + "' but got '" + data); + } + } + for (int32_t i=0; BASIC_TRANSLITERATOR_RULES[i]; i++) { + UErrorCode status = U_ZERO_ERROR; + UParseError parseError; + std::unique_ptr translit(Transliterator::createFromRules( + "Test", + BASIC_TRANSLITERATOR_RULES[i], UTRANS_FORWARD, parseError, status)); + if (translit.get() == nullptr || !U_SUCCESS(status)) { + dataerrln("FAIL: createFromRules %s failed", BASIC_TRANSLITERATOR_RULES[i]); + } + } +} + /** * Test compound RBT rules. */ diff --git a/icu4c/source/test/intltest/transtst.h b/icu4c/source/test/intltest/transtst.h index 8a2bcc68f69..64246d4add7 100644 --- a/icu4c/source/test/intltest/transtst.h +++ b/icu4c/source/test/intltest/transtst.h @@ -369,6 +369,7 @@ private: */ void TestRegisterAlias(void); + void TestBasicTransliteratorEvenWithoutData(void); //====================================================================== // Support methods //======================================================================