ICU-20273 Resolve inconsistent behavior of "root", "und", "" in locales

This commit is contained in:
Fredrik Roubert 2019-02-20 13:26:37 +01:00 committed by Fredrik Roubert
parent ff4cdc0f8b
commit 09bf9f2845
16 changed files with 458 additions and 72 deletions

View file

@ -26,6 +26,7 @@
#include "unicode/uloc.h"
#include "unicode/ures.h"
#include "unicode/ustring.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "putilimp.h"
@ -506,6 +507,22 @@ uloc_getDisplayName(const char *locale,
return 0;
}
// For the display name, we treat this as unknown language (ICU-20273).
static const char UND[] = "und";
CharString und;
if (locale != NULL) {
if (*locale == '\0') {
locale = UND;
} else if (*locale == '_') {
und.append(UND, *pErrorCode);
und.append(locale, *pErrorCode);
if (U_FAILURE(*pErrorCode)) {
return 0;
}
locale = und.data();
}
}
{
UErrorCode status = U_ZERO_ERROR;

View file

@ -626,6 +626,19 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
variantBegin = (int32_t)(field[variantField] - fullName);
}
if (length == 4 && uprv_stricmp(fullName, "root") == 0) {
length = 0;
variantBegin = 0;
language[0] = '\0';
fullName[0] = '\0';
} else if (length >= 3 && uprv_strnicmp(fullName, "und", 3) == 0 &&
(length == 3 || fullName[3] == '_' || fullName[3] == '@')) {
length -= 3;
variantBegin -= 3;
language[0] = '\0';
uprv_memmove(fullName, fullName + 3, length + 1);
}
err = U_ZERO_ERROR;
initBaseName(err);
if (U_FAILURE(err)) {

View file

@ -1253,16 +1253,17 @@ uloc_isRightToLeft(const char *locale) {
errorCode = U_ZERO_ERROR;
char lang[8];
int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
langLength == 0) {
if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
return FALSE;
}
const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
if (langPtr != NULL) {
switch (langPtr[langLength]) {
case '-': return FALSE;
case '+': return TRUE;
default: break; // partial match of a longer code
if (langLength > 0) {
const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
if (langPtr != NULL) {
switch (langPtr[langLength]) {
case '-': return FALSE;
case '+': return TRUE;
default: break; // partial match of a longer code
}
}
}
// Otherwise, find the likely script.

View file

@ -348,7 +348,7 @@ CollationLoader::loadFromCollations(UErrorCode &errorCode) {
const char *actualLocale = ures_getLocaleByType(data, ULOC_ACTUAL_LOCALE, &errorCode);
if(U_FAILURE(errorCode)) { return NULL; }
const char *vLocale = validLocale.getBaseName();
UBool actualAndValidLocalesAreDifferent = uprv_strcmp(actualLocale, vLocale) != 0;
UBool actualAndValidLocalesAreDifferent = Locale(actualLocale) != Locale(vLocale);
// Set the collation types on the informational locales,
// except when they match the default types (for brevity and backwards compatibility).
@ -410,7 +410,7 @@ CollationLoader::loadFromData(UErrorCode &errorCode) {
const char *actualLocale = locale.getBaseName(); // without type
const char *vLocale = validLocale.getBaseName();
UBool actualAndValidLocalesAreDifferent = uprv_strcmp(actualLocale, vLocale) != 0;
UBool actualAndValidLocalesAreDifferent = Locale(actualLocale) != Locale(vLocale);
// For the actual locale, suppress the default type *according to the actual locale*.
// For example, zh has default=pinyin and contains all of the Chinese tailorings.

View file

@ -11,6 +11,7 @@
#include <utility>
#include "loctest.h"
#include "unicode/localebuilder.h"
#include "unicode/localpointer.h"
#include "unicode/decimfmt.h"
#include "unicode/ucurr.h"
@ -227,6 +228,7 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c
TESTCASE_AUTO(TestParallelAPIValues);
TESTCASE_AUTO(TestAddLikelySubtags);
TESTCASE_AUTO(TestMinimizeSubtags);
TESTCASE_AUTO(TestAddLikelyAndMinimizeSubtags);
TESTCASE_AUTO(TestKeywordVariants);
TESTCASE_AUTO(TestCreateUnicodeKeywords);
TESTCASE_AUTO(TestKeywordVariantParsing);
@ -256,6 +258,9 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c
TESTCASE_AUTO(TestBug20407iVariantPreferredValue);
TESTCASE_AUTO(TestBug13417VeryLongLanguageTag);
TESTCASE_AUTO(TestBug11053UnderlineTimeZone);
TESTCASE_AUTO(TestUnd);
TESTCASE_AUTO(TestUndScript);
TESTCASE_AUTO(TestUndRegion);
TESTCASE_AUTO_END;
}
@ -1061,29 +1066,29 @@ LocaleTest::TestAtypicalLocales()
"Russian (Mexico)",
"English (France)",
"Spanish (Germany)",
"Croatia",
"Sweden",
"Dominican Republic",
"Belgium" };
"Unknown language (Croatia)",
"Unknown language (Sweden)",
"Unknown language (Dominican Republic)",
"Unknown language (Belgium)" };
UnicodeString frenchDisplayNames []= { "allemand (Canada)",
"japonais (Afrique du Sud)",
"russe (Mexique)",
"japonais (Afrique du Sud)",
"russe (Mexique)",
"anglais (France)",
"espagnol (Allemagne)",
"Croatie",
CharsToUnicodeString("Su\\u00E8de"),
CharsToUnicodeString("R\\u00E9publique dominicaine"),
"Belgique" };
u"langue indéterminée (Croatie)",
u"langue indéterminée (Suède)",
u"langue indéterminée (République dominicaine)",
u"langue indéterminée (Belgique)" };
UnicodeString spanishDisplayNames [] = {
CharsToUnicodeString("alem\\u00E1n (Canad\\u00E1)"),
CharsToUnicodeString("japon\\u00E9s (Sud\\u00E1frica)"),
CharsToUnicodeString("ruso (M\\u00E9xico)"),
CharsToUnicodeString("ingl\\u00E9s (Francia)"),
CharsToUnicodeString("espa\\u00F1ol (Alemania)"),
"Croacia",
"Suecia",
CharsToUnicodeString("Rep\\u00FAblica Dominicana"),
CharsToUnicodeString("B\\u00E9lgica") };
u"alemán (Canadá)",
u"japonés (Sudáfrica)",
u"ruso (México)",
u"inglés (Francia)",
u"español (Alemania)",
"lengua desconocida (Croacia)",
"lengua desconocida (Suecia)",
u"lengua desconocida (República Dominicana)",
u"lengua desconocida (Bélgica)" };
// De-Anglicizing root required the change from
// English display names to ISO Codes - ram 2003/09/26
UnicodeString invDisplayNames [] = { "German (Canada)",
@ -1091,10 +1096,10 @@ LocaleTest::TestAtypicalLocales()
"Russian (Mexico)",
"English (France)",
"Spanish (Germany)",
"Croatia",
"Sweden",
"Dominican Republic",
"Belgium" };
"Unknown language (Croatia)",
"Unknown language (Sweden)",
"Unknown language (Dominican Republic)",
"Unknown language (Belgium)" };
int32_t i;
UErrorCode status = U_ZERO_ERROR;
@ -1644,6 +1649,54 @@ LocaleTest::TestMinimizeSubtags() {
}
void
LocaleTest::TestAddLikelyAndMinimizeSubtags() {
IcuTestErrorCode status(*this, "TestAddLikelyAndMinimizeSubtags()");
static const struct {
const char* const from;
const char* const add;
const char* const remove;
} full_data[] = {
{
"und_AQ",
"_Latn_AQ",
"_AQ"
}, {
"und_Zzzz_AQ",
"_Latn_AQ",
"_AQ"
}, {
"und_Latn_AQ",
"_Latn_AQ",
"_AQ"
}, {
"und_Moon_AQ",
"_Moon_AQ",
"_Moon_AQ"
},
};
for (const auto& item : full_data) {
const char* const org = item.from;
const char* const exp = item.add;
Locale res(org);
res.addLikelySubtags(status);
status.errIfFailureAndReset("\"%s\"", org);
assertEquals("addLikelySubtags", exp, res.getName());
}
for (const auto& item : full_data) {
const char* const org = item.from;
const char* const exp = item.remove;
Locale res(org);
res.minimizeSubtags(status);
status.errIfFailureAndReset("\"%s\"", org);
assertEquals("minimizeSubtags", exp, res.getName());
}
}
void
LocaleTest::TestKeywordVariants(void) {
static const struct {
@ -2037,8 +2090,8 @@ static UBool _loccmp(const char* string, const char* prefix) {
plen = (int32_t)strlen(prefix);
int32_t c = uprv_strncmp(string, prefix, plen);
/* 'root' is "less than" everything */
if (uprv_strcmp(prefix, "root") == 0) {
return (uprv_strcmp(string, "root") == 0) ? 0 : 1;
if (prefix[0] == '\0') {
return string[0] != '\0';
}
if (c) return -1; /* mismatch */
if (slen == plen) return 0;
@ -3341,3 +3394,153 @@ void LocaleTest::TestBug11053UnderlineTimeZone() {
Locale l8(locale_str.c_str());
assertTrue((locale_str + " !l8.isBogus()").c_str(), !l8.isBogus());
}
void LocaleTest::TestUnd() {
IcuTestErrorCode status(*this, "TestUnd()");
static const char empty[] = "";
static const char root[] = "root";
static const char und[] = "und";
Locale empty_ctor(empty);
Locale empty_tag = Locale::forLanguageTag(empty, status);
status.errIfFailureAndReset("\"%s\"", empty);
Locale root_ctor(root);
Locale root_tag = Locale::forLanguageTag(root, status);
Locale root_build = LocaleBuilder().setLanguageTag(root).build(status);
status.errIfFailureAndReset("\"%s\"", root);
Locale und_ctor(und);
Locale und_tag = Locale::forLanguageTag(und, status);
Locale und_build = LocaleBuilder().setLanguageTag(und).build(status);
status.errIfFailureAndReset("\"%s\"", und);
assertEquals("getName()", empty, empty_ctor.getName());
assertEquals("getName()", empty, root_ctor.getName());
assertEquals("getName()", empty, und_ctor.getName());
assertEquals("getName()", empty, empty_tag.getName());
assertEquals("getName()", empty, root_tag.getName());
assertEquals("getName()", empty, und_tag.getName());
assertEquals("getName()", empty, root_build.getName());
assertEquals("getName()", empty, und_build.getName());
assertEquals("toLanguageTag()", und, empty_ctor.toLanguageTag<std::string>(status).c_str());
assertEquals("toLanguageTag()", und, root_ctor.toLanguageTag<std::string>(status).c_str());
assertEquals("toLanguageTag()", und, und_ctor.toLanguageTag<std::string>(status).c_str());
status.errIfFailureAndReset();
assertEquals("toLanguageTag()", und, empty_tag.toLanguageTag<std::string>(status).c_str());
assertEquals("toLanguageTag()", und, root_tag.toLanguageTag<std::string>(status).c_str());
assertEquals("toLanguageTag()", und, und_tag.toLanguageTag<std::string>(status).c_str());
status.errIfFailureAndReset();
assertEquals("toLanguageTag()", und, root_build.toLanguageTag<std::string>(status).c_str());
assertEquals("toLanguageTag()", und, und_build.toLanguageTag<std::string>(status).c_str());
status.errIfFailureAndReset();
assertTrue("empty_ctor == empty_tag", empty_ctor == empty_tag);
assertTrue("root_ctor == root_tag", root_ctor == root_tag);
assertTrue("root_ctor == root_build", root_ctor == root_build);
assertTrue("root_tag == root_build", root_tag == root_build);
assertTrue("und_ctor == und_tag", und_ctor == und_tag);
assertTrue("und_ctor == und_build", und_ctor == und_build);
assertTrue("und_tag == und_build", und_tag == und_build);
assertTrue("empty_ctor == root_ctor", empty_ctor == root_ctor);
assertTrue("empty_ctor == und_ctor", empty_ctor == und_ctor);
assertTrue("root_ctor == und_ctor", root_ctor == und_ctor);
assertTrue("empty_tag == root_tag", empty_tag == root_tag);
assertTrue("empty_tag == und_tag", empty_tag == und_tag);
assertTrue("root_tag == und_tag", root_tag == und_tag);
assertTrue("root_build == und_build", root_build == und_build);
static const Locale& displayLocale = Locale::getEnglish();
static const UnicodeString displayName("Unknown language");
UnicodeString tmp;
assertEquals("getDisplayName()", displayName, empty_ctor.getDisplayName(displayLocale, tmp));
assertEquals("getDisplayName()", displayName, root_ctor.getDisplayName(displayLocale, tmp));
assertEquals("getDisplayName()", displayName, und_ctor.getDisplayName(displayLocale, tmp));
assertEquals("getDisplayName()", displayName, empty_tag.getDisplayName(displayLocale, tmp));
assertEquals("getDisplayName()", displayName, root_tag.getDisplayName(displayLocale, tmp));
assertEquals("getDisplayName()", displayName, und_tag.getDisplayName(displayLocale, tmp));
assertEquals("getDisplayName()", displayName, root_build.getDisplayName(displayLocale, tmp));
assertEquals("getDisplayName()", displayName, und_build.getDisplayName(displayLocale, tmp));
}
void LocaleTest::TestUndScript() {
IcuTestErrorCode status(*this, "TestUndScript()");
static const char id[] = "_Cyrl";
static const char tag[] = "und-Cyrl";
static const char script[] = "Cyrl";
Locale locale_ctor(id);
Locale locale_legacy(tag);
Locale locale_tag = Locale::forLanguageTag(tag, status);
Locale locale_build = LocaleBuilder().setScript(script).build(status);
status.errIfFailureAndReset("\"%s\"", tag);
assertEquals("getName()", id, locale_ctor.getName());
assertEquals("getName()", id, locale_legacy.getName());
assertEquals("getName()", id, locale_tag.getName());
assertEquals("getName()", id, locale_build.getName());
assertEquals("toLanguageTag()", tag, locale_ctor.toLanguageTag<std::string>(status).c_str());
assertEquals("toLanguageTag()", tag, locale_legacy.toLanguageTag<std::string>(status).c_str());
assertEquals("toLanguageTag()", tag, locale_tag.toLanguageTag<std::string>(status).c_str());
assertEquals("toLanguageTag()", tag, locale_build.toLanguageTag<std::string>(status).c_str());
status.errIfFailureAndReset();
static const Locale& displayLocale = Locale::getEnglish();
static const UnicodeString displayName("Unknown language (Cyrillic)");
UnicodeString tmp;
assertEquals("getDisplayName()", displayName, locale_ctor.getDisplayName(displayLocale, tmp));
assertEquals("getDisplayName()", displayName, locale_legacy.getDisplayName(displayLocale, tmp));
assertEquals("getDisplayName()", displayName, locale_tag.getDisplayName(displayLocale, tmp));
assertEquals("getDisplayName()", displayName, locale_build.getDisplayName(displayLocale, tmp));
}
void LocaleTest::TestUndRegion() {
IcuTestErrorCode status(*this, "TestUndRegion()");
static const char id[] = "_AQ";
static const char tag[] = "und-AQ";
static const char region[] = "AQ";
Locale locale_ctor(id);
Locale locale_legacy(tag);
Locale locale_tag = Locale::forLanguageTag(tag, status);
Locale locale_build = LocaleBuilder().setRegion(region).build(status);
status.errIfFailureAndReset("\"%s\"", tag);
assertEquals("getName()", id, locale_ctor.getName());
assertEquals("getName()", id, locale_legacy.getName());
assertEquals("getName()", id, locale_tag.getName());
assertEquals("getName()", id, locale_build.getName());
assertEquals("toLanguageTag()", tag, locale_ctor.toLanguageTag<std::string>(status).c_str());
assertEquals("toLanguageTag()", tag, locale_legacy.toLanguageTag<std::string>(status).c_str());
assertEquals("toLanguageTag()", tag, locale_tag.toLanguageTag<std::string>(status).c_str());
assertEquals("toLanguageTag()", tag, locale_build.toLanguageTag<std::string>(status).c_str());
status.errIfFailureAndReset();
static const Locale& displayLocale = Locale::getEnglish();
static const UnicodeString displayName("Unknown language (Antarctica)");
UnicodeString tmp;
assertEquals("getDisplayName()", displayName, locale_ctor.getDisplayName(displayLocale, tmp));
assertEquals("getDisplayName()", displayName, locale_legacy.getDisplayName(displayLocale, tmp));
assertEquals("getDisplayName()", displayName, locale_tag.getDisplayName(displayLocale, tmp));
assertEquals("getDisplayName()", displayName, locale_build.getDisplayName(displayLocale, tmp));
}

View file

@ -118,6 +118,7 @@ public:
void TestAddLikelySubtags();
void TestMinimizeSubtags();
void TestAddLikelyAndMinimizeSubtags();
void TestForLanguageTag();
void TestToLanguageTag();
@ -131,6 +132,10 @@ public:
void TestBug11053UnderlineTimeZone();
void TestUnd();
void TestUndScript();
void TestUndRegion();
private:
void _checklocs(const char* label,
const char* req,

View file

@ -581,8 +581,8 @@ ResourceBundleTest::TestGetLocaleByType(void)
} test[] = {
{ "te_IN_BLAH", "string_only_in_te_IN", "te_IN", "te_IN" },
{ "te_IN_BLAH", "string_only_in_te", "te_IN", "te" },
{ "te_IN_BLAH", "string_only_in_Root", "te_IN", "root" },
{ "te_IN_BLAH_01234567890_01234567890_01234567890_01234567890_01234567890_01234567890", "array_2d_only_in_Root", "te_IN", "root" },
{ "te_IN_BLAH", "string_only_in_Root", "te_IN", "" },
{ "te_IN_BLAH_01234567890_01234567890_01234567890_01234567890_01234567890_01234567890", "array_2d_only_in_Root", "te_IN", "" },
{ "te_IN_BLAH@currency=euro", "array_2d_only_in_te_IN", "te_IN", "te_IN" },
{ "te_IN_BLAH@calendar=thai;collation=phonebook", "array_2d_only_in_te", "te_IN", "te" }
};

View file

@ -583,7 +583,7 @@ void CollationServiceTest::TestSeparateTree() {
Locale::createFromName("de"),
isAvailable, ec);
assertSuccess("getFunctionalEquivalent", ec);
assertEquals("getFunctionalEquivalent(de)", "root", equiv.getName());
assertEquals("getFunctionalEquivalent(de)", "", equiv.getName());
assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",
isAvailable == TRUE);
@ -591,7 +591,7 @@ void CollationServiceTest::TestSeparateTree() {
Locale::createFromName("de_DE"),
isAvailable, ec);
assertSuccess("getFunctionalEquivalent", ec);
assertEquals("getFunctionalEquivalent(de_DE)", "root", equiv.getName());
assertEquals("getFunctionalEquivalent(de_DE)", "", equiv.getName());
assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",
isAvailable == FALSE);

View file

@ -305,10 +305,9 @@ public class LocaleDisplayNamesImpl extends LocaleDisplayNames {
String lang = locale.getLanguage();
// Empty basename indicates root locale (keywords are ignored for this).
// Our data uses 'root' to access display names for the root locale in the
// "Languages" table.
if (locale.getBaseName().length() == 0) {
lang = "root";
// For the display name, we treat this as unknown language (ICU-20273).
if (lang.isEmpty()) {
lang = "und";
}
String script = locale.getScript();
String country = locale.getCountry();

View file

@ -22,6 +22,7 @@ import java.util.MissingResourceException;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Pattern;
import com.ibm.icu.impl.CacheBase;
import com.ibm.icu.impl.ICUData;
@ -111,6 +112,8 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
// using serialver from jdk1.4.2_05
private static final long serialVersionUID = 3715177670352309217L;
private static final Pattern UND_PATTERN = Pattern.compile("^und(?=$|[_-])", Pattern.CASE_INSENSITIVE);
private static CacheBase<String, String, Void> nameCache = new SoftCache<String, String, Void>() {
@Override
protected String createInstance(String tmpLocaleID, Void unused) {
@ -1061,8 +1064,10 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
if (tmpLocaleID.length() == 0) {
tmpLocaleID = localeID;
}
} else if ("root".equalsIgnoreCase(localeID)) {
tmpLocaleID = EMPTY_STRING;
} else {
tmpLocaleID = localeID;
tmpLocaleID = UND_PATTERN.matcher(localeID).replaceFirst(EMPTY_STRING);
}
return nameCache.getInstance(tmpLocaleID, null /* unused */);
}
@ -1292,15 +1297,14 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
// Fastpath: We know the likely scripts and their writing direction
// for some common languages.
String lang = getLanguage();
if (lang.length() == 0) {
return false;
}
int langIndex = LANG_DIR_STRING.indexOf(lang);
if (langIndex >= 0) {
switch (LANG_DIR_STRING.charAt(langIndex + lang.length())) {
case '-': return false;
case '+': return true;
default: break; // partial match of a longer code
if (!lang.isEmpty()) {
int langIndex = LANG_DIR_STRING.indexOf(lang);
if (langIndex >= 0) {
switch (LANG_DIR_STRING.charAt(langIndex + lang.length())) {
case '-': return false;
case '+': return true;
default: break; // partial match of a longer code
}
}
}
// Otherwise, find the likely script.

View file

@ -359,7 +359,7 @@ public class CollationServiceTest extends TestFmwk {
new ULocale("de"),
isAvailable);
if (assertTrue("getFunctionalEquivalent(de)!=null", equiv!=null)) {
assertEquals("getFunctionalEquivalent(de)", "root", equiv.toString());
assertEquals("getFunctionalEquivalent(de)", "", equiv.toString());
}
assertTrue("getFunctionalEquivalent(de).isAvailable==true",
isAvailable[0] == true);
@ -368,7 +368,7 @@ public class CollationServiceTest extends TestFmwk {
new ULocale("de_DE"),
isAvailable);
if (assertTrue("getFunctionalEquivalent(de_DE)!=null", equiv!=null)) {
assertEquals("getFunctionalEquivalent(de_DE)", "root", equiv.toString());
assertEquals("getFunctionalEquivalent(de_DE)", "", equiv.toString());
}
assertTrue("getFunctionalEquivalent(de_DE).isAvailable==false",
isAvailable[0] == false);

View file

@ -868,8 +868,8 @@ public class GlobalizationPreferencesTest extends TestFmwk {
gp.setLocale(new ULocale("aar"));
BreakIterator brk = gp.getBreakIterator(GlobalizationPreferences.BI_LINE);
String locStr = brk.getLocale(ULocale.VALID_LOCALE).toString();
if (!locStr.equals("root")) {
errln("FAIL: Line break iterator locale is " + locStr + " Expected: root");
if (!locStr.isEmpty()) {
errln("FAIL: Line break iterator locale is " + locStr + " Expected: \"\"");
}
// Set locale - es

View file

@ -377,7 +377,7 @@ public class LocaleMatcherTest extends TestFmwk {
// When it *does* occur in the list, BestMatch returns it, as expected.
matcher = newLocaleMatcher("it,und");
assertEquals("und", matcher.getBestMatch("und").toString());
assertEquals("", matcher.getBestMatch("und").toString());
// The unusual part:
// max("und") = "en_Latn_US", and since matching is based on maximized

View file

@ -105,8 +105,8 @@ public class TestLocaleValidity extends TestFmwk {
{"OK", "en-u-ca-buddhist-ca-islamic-umalqura-cf-account-co-big5han-cu-adp-fw-fri-hc-h11-ka-noignore-kb-false-kc-false-kf-false-kk-false-kn-false-kr-latn-digit-symbol-ks-identic-kv-currency-nu-ahom-sd-usny-tz-adalv-va-posix"},
// bad case (for language tag)
{"{language, root}", "root"},
// root is canonicalized to the root locale (ICU-20273)
{"OK", "root"},
// deprecated, but turned into valid by ULocale.Builder()
{"OK", "en-u-ca-islamicc"}, // deprecated

View file

@ -25,6 +25,7 @@ import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Pattern;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@ -3424,8 +3425,8 @@ public class ULocaleTest extends TestFmwk {
"zh_HK"
}, {
"und_AQ",
"und_Latn_AQ",
"und_AQ"
"_Latn_AQ",
"_AQ"
}, {
"und_Zzzz",
"en_Latn_US",
@ -3448,8 +3449,8 @@ public class ULocaleTest extends TestFmwk {
"zh_HK"
}, {
"und_Zzzz_AQ",
"und_Latn_AQ",
"und_AQ"
"_Latn_AQ",
"_AQ"
}, {
"und_Latn",
"en_Latn_US",
@ -3472,8 +3473,8 @@ public class ULocaleTest extends TestFmwk {
"zh_Latn_HK"
}, {
"und_Latn_AQ",
"und_Latn_AQ",
"und_AQ"
"_Latn_AQ",
"_AQ"
}, {
"und_Hans",
"zh_Hans_CN",
@ -3544,8 +3545,8 @@ public class ULocaleTest extends TestFmwk {
"zh_Moon_HK"
}, {
"und_Moon_AQ",
"und_Moon_AQ",
"und_Moon_AQ"
"_Moon_AQ",
"_Moon_AQ"
}, {
"es",
"es_Latn_ES",
@ -4272,7 +4273,7 @@ public class ULocaleTest extends TestFmwk {
{new ULocale("en__POSIX"), new ULocale("en"), ULocale.ROOT, null},
{new ULocale("de_DE@collation=phonebook"), new ULocale("de@collation=phonebook"), new ULocale("@collation=phonebook"), null},
{new ULocale("_US_POSIX"), new ULocale("_US"), ULocale.ROOT, null},
{new ULocale("root"), ULocale.ROOT, null},
{new ULocale("root"), null},
};
for(ULocale[] chain : TESTLOCALES) {
@ -4676,7 +4677,6 @@ public class ULocaleTest extends TestFmwk {
"th_TH@calendar=gergorian",
"th_TH@numbers=latn",
"this is a bogus locale id",
"und",
"zh_CN",
"zh_TW",
"zh_Hans",
@ -4890,4 +4890,148 @@ public class ULocaleTest extends TestFmwk {
}
}
}
@Test
public void TestUnd() {
final String empty = "";
final String root = "root";
final String und = "und";
ULocale empty_new = new ULocale(empty);
ULocale empty_tag = ULocale.forLanguageTag(empty);
ULocale root_new = new ULocale(root);
ULocale root_tag = ULocale.forLanguageTag(root);
ULocale root_build = new Builder().setLanguageTag(root).build();
ULocale und_new = new ULocale(und);
ULocale und_tag = ULocale.forLanguageTag(und);
ULocale und_build = new Builder().setLanguageTag(und).build();
Assert.assertEquals(empty, empty_new.getName());
Assert.assertEquals(empty, root_new.getName());
Assert.assertEquals(empty, und_new.getName());
Assert.assertEquals(empty, empty_tag.getName());
Assert.assertEquals(empty, root_tag.getName());
Assert.assertEquals(empty, und_tag.getName());
Assert.assertEquals(empty, root_build.getName());
Assert.assertEquals(empty, und_build.getName());
Assert.assertEquals(und, empty_new.toLanguageTag());
Assert.assertEquals(und, root_new.toLanguageTag());
Assert.assertEquals(und, und_new.toLanguageTag());
Assert.assertEquals(und, empty_tag.toLanguageTag());
Assert.assertEquals(und, root_tag.toLanguageTag());
Assert.assertEquals(und, und_tag.toLanguageTag());
Assert.assertEquals(und, root_build.toLanguageTag());
Assert.assertEquals(und, und_build.toLanguageTag());
Assert.assertEquals(empty_new, empty_tag);
Assert.assertEquals(root_new, root_tag);
Assert.assertEquals(root_new, root_build);
Assert.assertEquals(root_tag, root_build);
Assert.assertEquals(und_new, und_tag);
Assert.assertEquals(und_new, und_build);
Assert.assertEquals(und_tag, und_build);
Assert.assertEquals(empty_new, root_new);
Assert.assertEquals(empty_new, und_new);
Assert.assertEquals(root_new, und_new);
Assert.assertEquals(empty_tag, root_tag);
Assert.assertEquals(empty_tag, und_tag);
Assert.assertEquals(root_tag, und_tag);
Assert.assertEquals(root_build, und_build);
final ULocale displayLocale = ULocale.ENGLISH;
final String displayName = "Unknown language";
Assert.assertEquals(displayName, empty_new.getDisplayName(displayLocale));
Assert.assertEquals(displayName, root_new.getDisplayName(displayLocale));
Assert.assertEquals(displayName, und_new.getDisplayName(displayLocale));
Assert.assertEquals(displayName, empty_tag.getDisplayName(displayLocale));
Assert.assertEquals(displayName, root_tag.getDisplayName(displayLocale));
Assert.assertEquals(displayName, und_tag.getDisplayName(displayLocale));
Assert.assertEquals(displayName, root_build.getDisplayName(displayLocale));
Assert.assertEquals(displayName, und_build.getDisplayName(displayLocale));
}
@Test
public void TestUndScript() {
final String id = "_Cyrl";
final String tag = "und-Cyrl";
final String script = "Cyrl";
ULocale locale_new = new ULocale(id);
ULocale locale_legacy = new ULocale(tag);
ULocale locale_tag = ULocale.forLanguageTag(tag);
ULocale locale_build = new Builder().setScript(script).build();
Assert.assertEquals(id, locale_new.getName());
Assert.assertEquals(id, locale_legacy.getName());
Assert.assertEquals(id, locale_tag.getName());
Assert.assertEquals(id, locale_build.getName());
Assert.assertEquals(tag, locale_new.toLanguageTag());
Assert.assertEquals(tag, locale_legacy.toLanguageTag());
Assert.assertEquals(tag, locale_tag.toLanguageTag());
Assert.assertEquals(tag, locale_build.toLanguageTag());
Assert.assertEquals(locale_new, locale_legacy);
Assert.assertEquals(locale_new, locale_tag);
Assert.assertEquals(locale_new, locale_build);
Assert.assertEquals(locale_tag, locale_build);
final ULocale displayLocale = ULocale.ENGLISH;
final String displayName = "Unknown language (Cyrillic)";
Assert.assertEquals(displayName, locale_new.getDisplayName(displayLocale));
Assert.assertEquals(displayName, locale_legacy.getDisplayName(displayLocale));
Assert.assertEquals(displayName, locale_tag.getDisplayName(displayLocale));
Assert.assertEquals(displayName, locale_build.getDisplayName(displayLocale));
}
@Test
public void TestUndRegion() {
final String id = "_AQ";
final String tag = "und-AQ";
final String region = "AQ";
ULocale locale_new = new ULocale(id);
ULocale locale_legacy = new ULocale(tag);
ULocale locale_tag = ULocale.forLanguageTag(tag);
ULocale locale_build = new Builder().setRegion(region).build();
Assert.assertEquals(id, locale_new.getName());
Assert.assertEquals(id, locale_legacy.getName());
Assert.assertEquals(id, locale_tag.getName());
Assert.assertEquals(id, locale_build.getName());
Assert.assertEquals(tag, locale_new.toLanguageTag());
Assert.assertEquals(tag, locale_legacy.toLanguageTag());
Assert.assertEquals(tag, locale_tag.toLanguageTag());
Assert.assertEquals(tag, locale_build.toLanguageTag());
Assert.assertEquals(locale_new, locale_legacy);
Assert.assertEquals(locale_new, locale_tag);
Assert.assertEquals(locale_new, locale_build);
Assert.assertEquals(locale_tag, locale_build);
final ULocale displayLocale = ULocale.ENGLISH;
final String displayName = "Unknown language (Antarctica)";
Assert.assertEquals(displayName, locale_new.getDisplayName(displayLocale));
Assert.assertEquals(displayName, locale_legacy.getDisplayName(displayLocale));
Assert.assertEquals(displayName, locale_tag.getDisplayName(displayLocale));
Assert.assertEquals(displayName, locale_build.getDisplayName(displayLocale));
}
}

View file

@ -763,7 +763,7 @@ fr-FR >> fr
ja-JP >> fr
# For a language that doesn't match anything, return the default.
zu >> en-GB
root >> fr
zxx >> fr
@distance=script
en-GB >> en-GB
@ -771,7 +771,7 @@ en-US >> en
fr-FR >> fr
ja-JP >> fr
zu >> en-GB
root >> en
zxx >> en
** test: TestExactMatch
@supported=fr, en-GB, ja, es-ES, es-MX