diff --git a/icu4c/source/common/locdispnames.cpp b/icu4c/source/common/locdispnames.cpp index 106eb584c71..8fbee32eea9 100644 --- a/icu4c/source/common/locdispnames.cpp +++ b/icu4c/source/common/locdispnames.cpp @@ -26,6 +26,7 @@ #include "unicode/uloc.h" #include "unicode/ures.h" #include "unicode/ustring.h" +#include "charstr.h" #include "cmemory.h" #include "cstring.h" #include "putilimp.h" @@ -506,6 +507,22 @@ uloc_getDisplayName(const char *locale, return 0; } + // For the display name, we treat this as unknown language (ICU-20273). + static const char UND[] = "und"; + CharString und; + if (locale != NULL) { + if (*locale == '\0') { + locale = UND; + } else if (*locale == '_') { + und.append(UND, *pErrorCode); + und.append(locale, *pErrorCode); + if (U_FAILURE(*pErrorCode)) { + return 0; + } + locale = und.data(); + } + } + { UErrorCode status = U_ZERO_ERROR; diff --git a/icu4c/source/common/locid.cpp b/icu4c/source/common/locid.cpp index a6a518201c2..97b6e5aa586 100644 --- a/icu4c/source/common/locid.cpp +++ b/icu4c/source/common/locid.cpp @@ -626,6 +626,19 @@ Locale& Locale::init(const char* localeID, UBool canonicalize) variantBegin = (int32_t)(field[variantField] - fullName); } + if (length == 4 && uprv_stricmp(fullName, "root") == 0) { + length = 0; + variantBegin = 0; + language[0] = '\0'; + fullName[0] = '\0'; + } else if (length >= 3 && uprv_strnicmp(fullName, "und", 3) == 0 && + (length == 3 || fullName[3] == '_' || fullName[3] == '@')) { + length -= 3; + variantBegin -= 3; + language[0] = '\0'; + uprv_memmove(fullName, fullName + 3, length + 1); + } + err = U_ZERO_ERROR; initBaseName(err); if (U_FAILURE(err)) { diff --git a/icu4c/source/common/loclikely.cpp b/icu4c/source/common/loclikely.cpp index d4df914a9bb..107316def17 100644 --- a/icu4c/source/common/loclikely.cpp +++ b/icu4c/source/common/loclikely.cpp @@ -1253,16 +1253,17 @@ uloc_isRightToLeft(const char *locale) { errorCode = U_ZERO_ERROR; char lang[8]; int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode); - if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || - langLength == 0) { + if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) { return FALSE; } - const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang); - if (langPtr != NULL) { - switch (langPtr[langLength]) { - case '-': return FALSE; - case '+': return TRUE; - default: break; // partial match of a longer code + if (langLength > 0) { + const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang); + if (langPtr != NULL) { + switch (langPtr[langLength]) { + case '-': return FALSE; + case '+': return TRUE; + default: break; // partial match of a longer code + } } } // Otherwise, find the likely script. diff --git a/icu4c/source/i18n/ucol_res.cpp b/icu4c/source/i18n/ucol_res.cpp index 56ed5b3c19c..aa4027eb872 100644 --- a/icu4c/source/i18n/ucol_res.cpp +++ b/icu4c/source/i18n/ucol_res.cpp @@ -348,7 +348,7 @@ CollationLoader::loadFromCollations(UErrorCode &errorCode) { const char *actualLocale = ures_getLocaleByType(data, ULOC_ACTUAL_LOCALE, &errorCode); if(U_FAILURE(errorCode)) { return NULL; } const char *vLocale = validLocale.getBaseName(); - UBool actualAndValidLocalesAreDifferent = uprv_strcmp(actualLocale, vLocale) != 0; + UBool actualAndValidLocalesAreDifferent = Locale(actualLocale) != Locale(vLocale); // Set the collation types on the informational locales, // except when they match the default types (for brevity and backwards compatibility). @@ -410,7 +410,7 @@ CollationLoader::loadFromData(UErrorCode &errorCode) { const char *actualLocale = locale.getBaseName(); // without type const char *vLocale = validLocale.getBaseName(); - UBool actualAndValidLocalesAreDifferent = uprv_strcmp(actualLocale, vLocale) != 0; + UBool actualAndValidLocalesAreDifferent = Locale(actualLocale) != Locale(vLocale); // For the actual locale, suppress the default type *according to the actual locale*. // For example, zh has default=pinyin and contains all of the Chinese tailorings. diff --git a/icu4c/source/test/intltest/loctest.cpp b/icu4c/source/test/intltest/loctest.cpp index e9ce47fbaa5..5927f7202b8 100644 --- a/icu4c/source/test/intltest/loctest.cpp +++ b/icu4c/source/test/intltest/loctest.cpp @@ -11,6 +11,7 @@ #include #include "loctest.h" +#include "unicode/localebuilder.h" #include "unicode/localpointer.h" #include "unicode/decimfmt.h" #include "unicode/ucurr.h" @@ -227,6 +228,7 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c TESTCASE_AUTO(TestParallelAPIValues); TESTCASE_AUTO(TestAddLikelySubtags); TESTCASE_AUTO(TestMinimizeSubtags); + TESTCASE_AUTO(TestAddLikelyAndMinimizeSubtags); TESTCASE_AUTO(TestKeywordVariants); TESTCASE_AUTO(TestCreateUnicodeKeywords); TESTCASE_AUTO(TestKeywordVariantParsing); @@ -256,6 +258,9 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c TESTCASE_AUTO(TestBug20407iVariantPreferredValue); TESTCASE_AUTO(TestBug13417VeryLongLanguageTag); TESTCASE_AUTO(TestBug11053UnderlineTimeZone); + TESTCASE_AUTO(TestUnd); + TESTCASE_AUTO(TestUndScript); + TESTCASE_AUTO(TestUndRegion); TESTCASE_AUTO_END; } @@ -1061,29 +1066,29 @@ LocaleTest::TestAtypicalLocales() "Russian (Mexico)", "English (France)", "Spanish (Germany)", - "Croatia", - "Sweden", - "Dominican Republic", - "Belgium" }; + "Unknown language (Croatia)", + "Unknown language (Sweden)", + "Unknown language (Dominican Republic)", + "Unknown language (Belgium)" }; UnicodeString frenchDisplayNames []= { "allemand (Canada)", - "japonais (Afrique du Sud)", - "russe (Mexique)", + "japonais (Afrique du Sud)", + "russe (Mexique)", "anglais (France)", "espagnol (Allemagne)", - "Croatie", - CharsToUnicodeString("Su\\u00E8de"), - CharsToUnicodeString("R\\u00E9publique dominicaine"), - "Belgique" }; + u"langue indéterminée (Croatie)", + u"langue indéterminée (Suède)", + u"langue indéterminée (République dominicaine)", + u"langue indéterminée (Belgique)" }; UnicodeString spanishDisplayNames [] = { - CharsToUnicodeString("alem\\u00E1n (Canad\\u00E1)"), - CharsToUnicodeString("japon\\u00E9s (Sud\\u00E1frica)"), - CharsToUnicodeString("ruso (M\\u00E9xico)"), - CharsToUnicodeString("ingl\\u00E9s (Francia)"), - CharsToUnicodeString("espa\\u00F1ol (Alemania)"), - "Croacia", - "Suecia", - CharsToUnicodeString("Rep\\u00FAblica Dominicana"), - CharsToUnicodeString("B\\u00E9lgica") }; + u"alemán (Canadá)", + u"japonés (Sudáfrica)", + u"ruso (México)", + u"inglés (Francia)", + u"español (Alemania)", + "lengua desconocida (Croacia)", + "lengua desconocida (Suecia)", + u"lengua desconocida (República Dominicana)", + u"lengua desconocida (Bélgica)" }; // De-Anglicizing root required the change from // English display names to ISO Codes - ram 2003/09/26 UnicodeString invDisplayNames [] = { "German (Canada)", @@ -1091,10 +1096,10 @@ LocaleTest::TestAtypicalLocales() "Russian (Mexico)", "English (France)", "Spanish (Germany)", - "Croatia", - "Sweden", - "Dominican Republic", - "Belgium" }; + "Unknown language (Croatia)", + "Unknown language (Sweden)", + "Unknown language (Dominican Republic)", + "Unknown language (Belgium)" }; int32_t i; UErrorCode status = U_ZERO_ERROR; @@ -1644,6 +1649,54 @@ LocaleTest::TestMinimizeSubtags() { } +void +LocaleTest::TestAddLikelyAndMinimizeSubtags() { + IcuTestErrorCode status(*this, "TestAddLikelyAndMinimizeSubtags()"); + + static const struct { + const char* const from; + const char* const add; + const char* const remove; + } full_data[] = { + { + "und_AQ", + "_Latn_AQ", + "_AQ" + }, { + "und_Zzzz_AQ", + "_Latn_AQ", + "_AQ" + }, { + "und_Latn_AQ", + "_Latn_AQ", + "_AQ" + }, { + "und_Moon_AQ", + "_Moon_AQ", + "_Moon_AQ" + }, + }; + + for (const auto& item : full_data) { + const char* const org = item.from; + const char* const exp = item.add; + Locale res(org); + res.addLikelySubtags(status); + status.errIfFailureAndReset("\"%s\"", org); + assertEquals("addLikelySubtags", exp, res.getName()); + } + + for (const auto& item : full_data) { + const char* const org = item.from; + const char* const exp = item.remove; + Locale res(org); + res.minimizeSubtags(status); + status.errIfFailureAndReset("\"%s\"", org); + assertEquals("minimizeSubtags", exp, res.getName()); + } +} + + void LocaleTest::TestKeywordVariants(void) { static const struct { @@ -2037,8 +2090,8 @@ static UBool _loccmp(const char* string, const char* prefix) { plen = (int32_t)strlen(prefix); int32_t c = uprv_strncmp(string, prefix, plen); /* 'root' is "less than" everything */ - if (uprv_strcmp(prefix, "root") == 0) { - return (uprv_strcmp(string, "root") == 0) ? 0 : 1; + if (prefix[0] == '\0') { + return string[0] != '\0'; } if (c) return -1; /* mismatch */ if (slen == plen) return 0; @@ -3341,3 +3394,153 @@ void LocaleTest::TestBug11053UnderlineTimeZone() { Locale l8(locale_str.c_str()); assertTrue((locale_str + " !l8.isBogus()").c_str(), !l8.isBogus()); } + +void LocaleTest::TestUnd() { + IcuTestErrorCode status(*this, "TestUnd()"); + + static const char empty[] = ""; + static const char root[] = "root"; + static const char und[] = "und"; + + Locale empty_ctor(empty); + Locale empty_tag = Locale::forLanguageTag(empty, status); + status.errIfFailureAndReset("\"%s\"", empty); + + Locale root_ctor(root); + Locale root_tag = Locale::forLanguageTag(root, status); + Locale root_build = LocaleBuilder().setLanguageTag(root).build(status); + status.errIfFailureAndReset("\"%s\"", root); + + Locale und_ctor(und); + Locale und_tag = Locale::forLanguageTag(und, status); + Locale und_build = LocaleBuilder().setLanguageTag(und).build(status); + status.errIfFailureAndReset("\"%s\"", und); + + assertEquals("getName()", empty, empty_ctor.getName()); + assertEquals("getName()", empty, root_ctor.getName()); + assertEquals("getName()", empty, und_ctor.getName()); + + assertEquals("getName()", empty, empty_tag.getName()); + assertEquals("getName()", empty, root_tag.getName()); + assertEquals("getName()", empty, und_tag.getName()); + + assertEquals("getName()", empty, root_build.getName()); + assertEquals("getName()", empty, und_build.getName()); + + assertEquals("toLanguageTag()", und, empty_ctor.toLanguageTag(status).c_str()); + assertEquals("toLanguageTag()", und, root_ctor.toLanguageTag(status).c_str()); + assertEquals("toLanguageTag()", und, und_ctor.toLanguageTag(status).c_str()); + status.errIfFailureAndReset(); + + assertEquals("toLanguageTag()", und, empty_tag.toLanguageTag(status).c_str()); + assertEquals("toLanguageTag()", und, root_tag.toLanguageTag(status).c_str()); + assertEquals("toLanguageTag()", und, und_tag.toLanguageTag(status).c_str()); + status.errIfFailureAndReset(); + + assertEquals("toLanguageTag()", und, root_build.toLanguageTag(status).c_str()); + assertEquals("toLanguageTag()", und, und_build.toLanguageTag(status).c_str()); + status.errIfFailureAndReset(); + + assertTrue("empty_ctor == empty_tag", empty_ctor == empty_tag); + + assertTrue("root_ctor == root_tag", root_ctor == root_tag); + assertTrue("root_ctor == root_build", root_ctor == root_build); + assertTrue("root_tag == root_build", root_tag == root_build); + + assertTrue("und_ctor == und_tag", und_ctor == und_tag); + assertTrue("und_ctor == und_build", und_ctor == und_build); + assertTrue("und_tag == und_build", und_tag == und_build); + + assertTrue("empty_ctor == root_ctor", empty_ctor == root_ctor); + assertTrue("empty_ctor == und_ctor", empty_ctor == und_ctor); + assertTrue("root_ctor == und_ctor", root_ctor == und_ctor); + + assertTrue("empty_tag == root_tag", empty_tag == root_tag); + assertTrue("empty_tag == und_tag", empty_tag == und_tag); + assertTrue("root_tag == und_tag", root_tag == und_tag); + + assertTrue("root_build == und_build", root_build == und_build); + + static const Locale& displayLocale = Locale::getEnglish(); + static const UnicodeString displayName("Unknown language"); + UnicodeString tmp; + + assertEquals("getDisplayName()", displayName, empty_ctor.getDisplayName(displayLocale, tmp)); + assertEquals("getDisplayName()", displayName, root_ctor.getDisplayName(displayLocale, tmp)); + assertEquals("getDisplayName()", displayName, und_ctor.getDisplayName(displayLocale, tmp)); + + assertEquals("getDisplayName()", displayName, empty_tag.getDisplayName(displayLocale, tmp)); + assertEquals("getDisplayName()", displayName, root_tag.getDisplayName(displayLocale, tmp)); + assertEquals("getDisplayName()", displayName, und_tag.getDisplayName(displayLocale, tmp)); + + assertEquals("getDisplayName()", displayName, root_build.getDisplayName(displayLocale, tmp)); + assertEquals("getDisplayName()", displayName, und_build.getDisplayName(displayLocale, tmp)); +} + +void LocaleTest::TestUndScript() { + IcuTestErrorCode status(*this, "TestUndScript()"); + + static const char id[] = "_Cyrl"; + static const char tag[] = "und-Cyrl"; + static const char script[] = "Cyrl"; + + Locale locale_ctor(id); + Locale locale_legacy(tag); + Locale locale_tag = Locale::forLanguageTag(tag, status); + Locale locale_build = LocaleBuilder().setScript(script).build(status); + status.errIfFailureAndReset("\"%s\"", tag); + + assertEquals("getName()", id, locale_ctor.getName()); + assertEquals("getName()", id, locale_legacy.getName()); + assertEquals("getName()", id, locale_tag.getName()); + assertEquals("getName()", id, locale_build.getName()); + + assertEquals("toLanguageTag()", tag, locale_ctor.toLanguageTag(status).c_str()); + assertEquals("toLanguageTag()", tag, locale_legacy.toLanguageTag(status).c_str()); + assertEquals("toLanguageTag()", tag, locale_tag.toLanguageTag(status).c_str()); + assertEquals("toLanguageTag()", tag, locale_build.toLanguageTag(status).c_str()); + status.errIfFailureAndReset(); + + static const Locale& displayLocale = Locale::getEnglish(); + static const UnicodeString displayName("Unknown language (Cyrillic)"); + UnicodeString tmp; + + assertEquals("getDisplayName()", displayName, locale_ctor.getDisplayName(displayLocale, tmp)); + assertEquals("getDisplayName()", displayName, locale_legacy.getDisplayName(displayLocale, tmp)); + assertEquals("getDisplayName()", displayName, locale_tag.getDisplayName(displayLocale, tmp)); + assertEquals("getDisplayName()", displayName, locale_build.getDisplayName(displayLocale, tmp)); +} + +void LocaleTest::TestUndRegion() { + IcuTestErrorCode status(*this, "TestUndRegion()"); + + static const char id[] = "_AQ"; + static const char tag[] = "und-AQ"; + static const char region[] = "AQ"; + + Locale locale_ctor(id); + Locale locale_legacy(tag); + Locale locale_tag = Locale::forLanguageTag(tag, status); + Locale locale_build = LocaleBuilder().setRegion(region).build(status); + status.errIfFailureAndReset("\"%s\"", tag); + + assertEquals("getName()", id, locale_ctor.getName()); + assertEquals("getName()", id, locale_legacy.getName()); + assertEquals("getName()", id, locale_tag.getName()); + assertEquals("getName()", id, locale_build.getName()); + + assertEquals("toLanguageTag()", tag, locale_ctor.toLanguageTag(status).c_str()); + assertEquals("toLanguageTag()", tag, locale_legacy.toLanguageTag(status).c_str()); + assertEquals("toLanguageTag()", tag, locale_tag.toLanguageTag(status).c_str()); + assertEquals("toLanguageTag()", tag, locale_build.toLanguageTag(status).c_str()); + status.errIfFailureAndReset(); + + static const Locale& displayLocale = Locale::getEnglish(); + static const UnicodeString displayName("Unknown language (Antarctica)"); + UnicodeString tmp; + + assertEquals("getDisplayName()", displayName, locale_ctor.getDisplayName(displayLocale, tmp)); + assertEquals("getDisplayName()", displayName, locale_legacy.getDisplayName(displayLocale, tmp)); + assertEquals("getDisplayName()", displayName, locale_tag.getDisplayName(displayLocale, tmp)); + assertEquals("getDisplayName()", displayName, locale_build.getDisplayName(displayLocale, tmp)); +} diff --git a/icu4c/source/test/intltest/loctest.h b/icu4c/source/test/intltest/loctest.h index daf3baddc6b..5ccdea641df 100644 --- a/icu4c/source/test/intltest/loctest.h +++ b/icu4c/source/test/intltest/loctest.h @@ -118,6 +118,7 @@ public: void TestAddLikelySubtags(); void TestMinimizeSubtags(); + void TestAddLikelyAndMinimizeSubtags(); void TestForLanguageTag(); void TestToLanguageTag(); @@ -131,6 +132,10 @@ public: void TestBug11053UnderlineTimeZone(); + void TestUnd(); + void TestUndScript(); + void TestUndRegion(); + private: void _checklocs(const char* label, const char* req, diff --git a/icu4c/source/test/intltest/restest.cpp b/icu4c/source/test/intltest/restest.cpp index 5ffcb948226..44a9ac39290 100644 --- a/icu4c/source/test/intltest/restest.cpp +++ b/icu4c/source/test/intltest/restest.cpp @@ -581,8 +581,8 @@ ResourceBundleTest::TestGetLocaleByType(void) } test[] = { { "te_IN_BLAH", "string_only_in_te_IN", "te_IN", "te_IN" }, { "te_IN_BLAH", "string_only_in_te", "te_IN", "te" }, - { "te_IN_BLAH", "string_only_in_Root", "te_IN", "root" }, - { "te_IN_BLAH_01234567890_01234567890_01234567890_01234567890_01234567890_01234567890", "array_2d_only_in_Root", "te_IN", "root" }, + { "te_IN_BLAH", "string_only_in_Root", "te_IN", "" }, + { "te_IN_BLAH_01234567890_01234567890_01234567890_01234567890_01234567890_01234567890", "array_2d_only_in_Root", "te_IN", "" }, { "te_IN_BLAH@currency=euro", "array_2d_only_in_te_IN", "te_IN", "te_IN" }, { "te_IN_BLAH@calendar=thai;collation=phonebook", "array_2d_only_in_te", "te_IN", "te" } }; diff --git a/icu4c/source/test/intltest/svccoll.cpp b/icu4c/source/test/intltest/svccoll.cpp index ecc1d5026d9..8d46c68f37f 100644 --- a/icu4c/source/test/intltest/svccoll.cpp +++ b/icu4c/source/test/intltest/svccoll.cpp @@ -583,7 +583,7 @@ void CollationServiceTest::TestSeparateTree() { Locale::createFromName("de"), isAvailable, ec); assertSuccess("getFunctionalEquivalent", ec); - assertEquals("getFunctionalEquivalent(de)", "root", equiv.getName()); + assertEquals("getFunctionalEquivalent(de)", "", equiv.getName()); assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE", isAvailable == TRUE); @@ -591,7 +591,7 @@ void CollationServiceTest::TestSeparateTree() { Locale::createFromName("de_DE"), isAvailable, ec); assertSuccess("getFunctionalEquivalent", ec); - assertEquals("getFunctionalEquivalent(de_DE)", "root", equiv.getName()); + assertEquals("getFunctionalEquivalent(de_DE)", "", equiv.getName()); assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE", isAvailable == FALSE); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java index 54d7293c4de..60ba36f2bc1 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/LocaleDisplayNamesImpl.java @@ -305,10 +305,9 @@ public class LocaleDisplayNamesImpl extends LocaleDisplayNames { String lang = locale.getLanguage(); // Empty basename indicates root locale (keywords are ignored for this). - // Our data uses 'root' to access display names for the root locale in the - // "Languages" table. - if (locale.getBaseName().length() == 0) { - lang = "root"; + // For the display name, we treat this as unknown language (ICU-20273). + if (lang.isEmpty()) { + lang = "und"; } String script = locale.getScript(); String country = locale.getCountry(); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java index 785e790684f..37d72bf0f4e 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java @@ -22,6 +22,7 @@ import java.util.MissingResourceException; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; +import java.util.regex.Pattern; import com.ibm.icu.impl.CacheBase; import com.ibm.icu.impl.ICUData; @@ -111,6 +112,8 @@ public final class ULocale implements Serializable, Comparable { // using serialver from jdk1.4.2_05 private static final long serialVersionUID = 3715177670352309217L; + private static final Pattern UND_PATTERN = Pattern.compile("^und(?=$|[_-])", Pattern.CASE_INSENSITIVE); + private static CacheBase nameCache = new SoftCache() { @Override protected String createInstance(String tmpLocaleID, Void unused) { @@ -1061,8 +1064,10 @@ public final class ULocale implements Serializable, Comparable { if (tmpLocaleID.length() == 0) { tmpLocaleID = localeID; } + } else if ("root".equalsIgnoreCase(localeID)) { + tmpLocaleID = EMPTY_STRING; } else { - tmpLocaleID = localeID; + tmpLocaleID = UND_PATTERN.matcher(localeID).replaceFirst(EMPTY_STRING); } return nameCache.getInstance(tmpLocaleID, null /* unused */); } @@ -1292,15 +1297,14 @@ public final class ULocale implements Serializable, Comparable { // Fastpath: We know the likely scripts and their writing direction // for some common languages. String lang = getLanguage(); - if (lang.length() == 0) { - return false; - } - int langIndex = LANG_DIR_STRING.indexOf(lang); - if (langIndex >= 0) { - switch (LANG_DIR_STRING.charAt(langIndex + lang.length())) { - case '-': return false; - case '+': return true; - default: break; // partial match of a longer code + if (!lang.isEmpty()) { + int langIndex = LANG_DIR_STRING.indexOf(lang); + if (langIndex >= 0) { + switch (LANG_DIR_STRING.charAt(langIndex + lang.length())) { + case '-': return false; + case '+': return true; + default: break; // partial match of a longer code + } } } // Otherwise, find the likely script. diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationServiceTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationServiceTest.java index 76377fe417f..4be8e1cd836 100644 --- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationServiceTest.java +++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/collator/CollationServiceTest.java @@ -359,7 +359,7 @@ public class CollationServiceTest extends TestFmwk { new ULocale("de"), isAvailable); if (assertTrue("getFunctionalEquivalent(de)!=null", equiv!=null)) { - assertEquals("getFunctionalEquivalent(de)", "root", equiv.toString()); + assertEquals("getFunctionalEquivalent(de)", "", equiv.toString()); } assertTrue("getFunctionalEquivalent(de).isAvailable==true", isAvailable[0] == true); @@ -368,7 +368,7 @@ public class CollationServiceTest extends TestFmwk { new ULocale("de_DE"), isAvailable); if (assertTrue("getFunctionalEquivalent(de_DE)!=null", equiv!=null)) { - assertEquals("getFunctionalEquivalent(de_DE)", "root", equiv.toString()); + assertEquals("getFunctionalEquivalent(de_DE)", "", equiv.toString()); } assertTrue("getFunctionalEquivalent(de_DE).isAvailable==false", isAvailable[0] == false); diff --git a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/format/GlobalizationPreferencesTest.java b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/format/GlobalizationPreferencesTest.java index 7bc21bc6365..a3fcb4da0d0 100644 --- a/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/format/GlobalizationPreferencesTest.java +++ b/icu4j/main/tests/collate/src/com/ibm/icu/dev/test/format/GlobalizationPreferencesTest.java @@ -868,8 +868,8 @@ public class GlobalizationPreferencesTest extends TestFmwk { gp.setLocale(new ULocale("aar")); BreakIterator brk = gp.getBreakIterator(GlobalizationPreferences.BI_LINE); String locStr = brk.getLocale(ULocale.VALID_LOCALE).toString(); - if (!locStr.equals("root")) { - errln("FAIL: Line break iterator locale is " + locStr + " Expected: root"); + if (!locStr.isEmpty()) { + errln("FAIL: Line break iterator locale is " + locStr + " Expected: \"\""); } // Set locale - es diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java index 060b28d7035..0b1f7cda5b5 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java @@ -377,7 +377,7 @@ public class LocaleMatcherTest extends TestFmwk { // When it *does* occur in the list, BestMatch returns it, as expected. matcher = newLocaleMatcher("it,und"); - assertEquals("und", matcher.getBestMatch("und").toString()); + assertEquals("", matcher.getBestMatch("und").toString()); // The unusual part: // max("und") = "en_Latn_US", and since matching is based on maximized diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/TestLocaleValidity.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/TestLocaleValidity.java index 7a411b86484..553cbd80212 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/TestLocaleValidity.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/TestLocaleValidity.java @@ -105,8 +105,8 @@ public class TestLocaleValidity extends TestFmwk { {"OK", "en-u-ca-buddhist-ca-islamic-umalqura-cf-account-co-big5han-cu-adp-fw-fri-hc-h11-ka-noignore-kb-false-kc-false-kf-false-kk-false-kn-false-kr-latn-digit-symbol-ks-identic-kv-currency-nu-ahom-sd-usny-tz-adalv-va-posix"}, - // bad case (for language tag) - {"{language, root}", "root"}, + // root is canonicalized to the root locale (ICU-20273) + {"OK", "root"}, // deprecated, but turned into valid by ULocale.Builder() {"OK", "en-u-ca-islamicc"}, // deprecated diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java index 4e42a6d4bb9..56dd2a2d9a1 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java @@ -25,6 +25,7 @@ import java.util.TreeMap; import java.util.TreeSet; import java.util.regex.Pattern; +import org.junit.Assert; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -3424,8 +3425,8 @@ public class ULocaleTest extends TestFmwk { "zh_HK" }, { "und_AQ", - "und_Latn_AQ", - "und_AQ" + "_Latn_AQ", + "_AQ" }, { "und_Zzzz", "en_Latn_US", @@ -3448,8 +3449,8 @@ public class ULocaleTest extends TestFmwk { "zh_HK" }, { "und_Zzzz_AQ", - "und_Latn_AQ", - "und_AQ" + "_Latn_AQ", + "_AQ" }, { "und_Latn", "en_Latn_US", @@ -3472,8 +3473,8 @@ public class ULocaleTest extends TestFmwk { "zh_Latn_HK" }, { "und_Latn_AQ", - "und_Latn_AQ", - "und_AQ" + "_Latn_AQ", + "_AQ" }, { "und_Hans", "zh_Hans_CN", @@ -3544,8 +3545,8 @@ public class ULocaleTest extends TestFmwk { "zh_Moon_HK" }, { "und_Moon_AQ", - "und_Moon_AQ", - "und_Moon_AQ" + "_Moon_AQ", + "_Moon_AQ" }, { "es", "es_Latn_ES", @@ -4272,7 +4273,7 @@ public class ULocaleTest extends TestFmwk { {new ULocale("en__POSIX"), new ULocale("en"), ULocale.ROOT, null}, {new ULocale("de_DE@collation=phonebook"), new ULocale("de@collation=phonebook"), new ULocale("@collation=phonebook"), null}, {new ULocale("_US_POSIX"), new ULocale("_US"), ULocale.ROOT, null}, - {new ULocale("root"), ULocale.ROOT, null}, + {new ULocale("root"), null}, }; for(ULocale[] chain : TESTLOCALES) { @@ -4676,7 +4677,6 @@ public class ULocaleTest extends TestFmwk { "th_TH@calendar=gergorian", "th_TH@numbers=latn", "this is a bogus locale id", - "und", "zh_CN", "zh_TW", "zh_Hans", @@ -4890,4 +4890,148 @@ public class ULocaleTest extends TestFmwk { } } } + + @Test + public void TestUnd() { + final String empty = ""; + final String root = "root"; + final String und = "und"; + + ULocale empty_new = new ULocale(empty); + ULocale empty_tag = ULocale.forLanguageTag(empty); + + ULocale root_new = new ULocale(root); + ULocale root_tag = ULocale.forLanguageTag(root); + ULocale root_build = new Builder().setLanguageTag(root).build(); + + ULocale und_new = new ULocale(und); + ULocale und_tag = ULocale.forLanguageTag(und); + ULocale und_build = new Builder().setLanguageTag(und).build(); + + Assert.assertEquals(empty, empty_new.getName()); + Assert.assertEquals(empty, root_new.getName()); + Assert.assertEquals(empty, und_new.getName()); + + Assert.assertEquals(empty, empty_tag.getName()); + Assert.assertEquals(empty, root_tag.getName()); + Assert.assertEquals(empty, und_tag.getName()); + + Assert.assertEquals(empty, root_build.getName()); + Assert.assertEquals(empty, und_build.getName()); + + Assert.assertEquals(und, empty_new.toLanguageTag()); + Assert.assertEquals(und, root_new.toLanguageTag()); + Assert.assertEquals(und, und_new.toLanguageTag()); + + Assert.assertEquals(und, empty_tag.toLanguageTag()); + Assert.assertEquals(und, root_tag.toLanguageTag()); + Assert.assertEquals(und, und_tag.toLanguageTag()); + + Assert.assertEquals(und, root_build.toLanguageTag()); + Assert.assertEquals(und, und_build.toLanguageTag()); + + Assert.assertEquals(empty_new, empty_tag); + + Assert.assertEquals(root_new, root_tag); + Assert.assertEquals(root_new, root_build); + Assert.assertEquals(root_tag, root_build); + + Assert.assertEquals(und_new, und_tag); + Assert.assertEquals(und_new, und_build); + Assert.assertEquals(und_tag, und_build); + + Assert.assertEquals(empty_new, root_new); + Assert.assertEquals(empty_new, und_new); + Assert.assertEquals(root_new, und_new); + + Assert.assertEquals(empty_tag, root_tag); + Assert.assertEquals(empty_tag, und_tag); + Assert.assertEquals(root_tag, und_tag); + + Assert.assertEquals(root_build, und_build); + + final ULocale displayLocale = ULocale.ENGLISH; + final String displayName = "Unknown language"; + + Assert.assertEquals(displayName, empty_new.getDisplayName(displayLocale)); + Assert.assertEquals(displayName, root_new.getDisplayName(displayLocale)); + Assert.assertEquals(displayName, und_new.getDisplayName(displayLocale)); + + Assert.assertEquals(displayName, empty_tag.getDisplayName(displayLocale)); + Assert.assertEquals(displayName, root_tag.getDisplayName(displayLocale)); + Assert.assertEquals(displayName, und_tag.getDisplayName(displayLocale)); + + Assert.assertEquals(displayName, root_build.getDisplayName(displayLocale)); + Assert.assertEquals(displayName, und_build.getDisplayName(displayLocale)); + } + + @Test + public void TestUndScript() { + final String id = "_Cyrl"; + final String tag = "und-Cyrl"; + final String script = "Cyrl"; + + ULocale locale_new = new ULocale(id); + ULocale locale_legacy = new ULocale(tag); + ULocale locale_tag = ULocale.forLanguageTag(tag); + ULocale locale_build = new Builder().setScript(script).build(); + + Assert.assertEquals(id, locale_new.getName()); + Assert.assertEquals(id, locale_legacy.getName()); + Assert.assertEquals(id, locale_tag.getName()); + Assert.assertEquals(id, locale_build.getName()); + + Assert.assertEquals(tag, locale_new.toLanguageTag()); + Assert.assertEquals(tag, locale_legacy.toLanguageTag()); + Assert.assertEquals(tag, locale_tag.toLanguageTag()); + Assert.assertEquals(tag, locale_build.toLanguageTag()); + + Assert.assertEquals(locale_new, locale_legacy); + Assert.assertEquals(locale_new, locale_tag); + Assert.assertEquals(locale_new, locale_build); + Assert.assertEquals(locale_tag, locale_build); + + final ULocale displayLocale = ULocale.ENGLISH; + final String displayName = "Unknown language (Cyrillic)"; + + Assert.assertEquals(displayName, locale_new.getDisplayName(displayLocale)); + Assert.assertEquals(displayName, locale_legacy.getDisplayName(displayLocale)); + Assert.assertEquals(displayName, locale_tag.getDisplayName(displayLocale)); + Assert.assertEquals(displayName, locale_build.getDisplayName(displayLocale)); + } + + @Test + public void TestUndRegion() { + final String id = "_AQ"; + final String tag = "und-AQ"; + final String region = "AQ"; + + ULocale locale_new = new ULocale(id); + ULocale locale_legacy = new ULocale(tag); + ULocale locale_tag = ULocale.forLanguageTag(tag); + ULocale locale_build = new Builder().setRegion(region).build(); + + Assert.assertEquals(id, locale_new.getName()); + Assert.assertEquals(id, locale_legacy.getName()); + Assert.assertEquals(id, locale_tag.getName()); + Assert.assertEquals(id, locale_build.getName()); + + Assert.assertEquals(tag, locale_new.toLanguageTag()); + Assert.assertEquals(tag, locale_legacy.toLanguageTag()); + Assert.assertEquals(tag, locale_tag.toLanguageTag()); + Assert.assertEquals(tag, locale_build.toLanguageTag()); + + Assert.assertEquals(locale_new, locale_legacy); + Assert.assertEquals(locale_new, locale_tag); + Assert.assertEquals(locale_new, locale_build); + Assert.assertEquals(locale_tag, locale_build); + + final ULocale displayLocale = ULocale.ENGLISH; + final String displayName = "Unknown language (Antarctica)"; + + Assert.assertEquals(displayName, locale_new.getDisplayName(displayLocale)); + Assert.assertEquals(displayName, locale_legacy.getDisplayName(displayLocale)); + Assert.assertEquals(displayName, locale_tag.getDisplayName(displayLocale)); + Assert.assertEquals(displayName, locale_build.getDisplayName(displayLocale)); + } } diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt index 3a149dd7c40..bd653a7a59a 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/data/localeMatcherTest.txt @@ -763,7 +763,7 @@ fr-FR >> fr ja-JP >> fr # For a language that doesn't match anything, return the default. zu >> en-GB -root >> fr +zxx >> fr @distance=script en-GB >> en-GB @@ -771,7 +771,7 @@ en-US >> en fr-FR >> fr ja-JP >> fr zu >> en-GB -root >> en +zxx >> en ** test: TestExactMatch @supported=fr, en-GB, ja, es-ES, es-MX