mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
parent
e3a43c7fb8
commit
12dc3772b1
11 changed files with 5001 additions and 393 deletions
File diff suppressed because it is too large
Load diff
|
@ -38,6 +38,8 @@ typedef enum ECleanupCommonType {
|
|||
UCLN_COMMON_SERVICE,
|
||||
UCLN_COMMON_LOCALE_KEY_TYPE,
|
||||
UCLN_COMMON_LOCALE,
|
||||
UCLN_COMMON_LOCALE_ALIAS,
|
||||
UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED,
|
||||
UCLN_COMMON_LOCALE_AVAILABLE,
|
||||
UCLN_COMMON_LIKELY_SUBTAGS,
|
||||
UCLN_COMMON_LOCALE_DISTANCE,
|
||||
|
|
|
@ -298,4 +298,10 @@ ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* i
|
|||
U_CFUNC const char*
|
||||
ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
|
||||
|
||||
/* Function for testing purpose */
|
||||
U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length);
|
||||
|
||||
// Return true if the value is already canonicalized.
|
||||
U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -192,6 +192,12 @@ ant -f build-icu-data.xml -DcldrDataDir="$CLDR_TMP_DIR/production" | tee /tmp/cl
|
|||
cd $TOOLS_ROOT/cldr
|
||||
ant copy-cldr-testdata
|
||||
|
||||
# 4d. Copy from CLDR common/testData/localeIdentifiers/localeCanonicalization.txt
|
||||
# into icu4c/source/test/testdata/localeCanonicalization.txt
|
||||
# and icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/localeCanonicalization.txt
|
||||
# and add the following line to the begginning of these two files
|
||||
# # File copied from cldr common/testData/localeIdentifiers/localeCanonicalization.txt
|
||||
|
||||
# 5. Check which data files have modifications, which have been added or removed
|
||||
# (if there are no changes, you may not need to proceed further). Make sure the
|
||||
# list seems reasonable.
|
||||
|
|
|
@ -32,6 +32,8 @@
|
|||
#include "putilimp.h"
|
||||
#include "hash.h"
|
||||
#include "locmap.h"
|
||||
#include "uparse.h"
|
||||
#include "ulocimp.h"
|
||||
|
||||
static const char* const rawData[33][8] = {
|
||||
|
||||
|
@ -257,6 +259,8 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c
|
|||
TESTCASE_AUTO(TestBug13554);
|
||||
TESTCASE_AUTO(TestBug20410);
|
||||
TESTCASE_AUTO(TestBug20900);
|
||||
TESTCASE_AUTO(TestLocaleCanonicalizationFromFile);
|
||||
TESTCASE_AUTO(TestKnownCanonicalizedListCorrect);
|
||||
TESTCASE_AUTO(TestConstructorAcceptsBCP47);
|
||||
TESTCASE_AUTO(TestForLanguageTag);
|
||||
TESTCASE_AUTO(TestToLanguageTag);
|
||||
|
@ -4707,10 +4711,10 @@ void LocaleTest::TestCanonicalization(void)
|
|||
} testCases[] = {
|
||||
{ "ca_ES-with-extra-stuff-that really doesn't make any sense-unless-you're trying to increase code coverage",
|
||||
"ca_ES_WITH_EXTRA_STUFF_THAT REALLY DOESN'T MAKE ANY SENSE_UNLESS_YOU'RE TRYING TO INCREASE CODE COVERAGE",
|
||||
"ca_ES_WITH_EXTRA_STUFF_THAT REALLY DOESN'T MAKE ANY SENSE_UNLESS_YOU'RE TRYING TO INCREASE CODE COVERAGE"},
|
||||
"ca_ES_EXTRA_STUFF_THAT REALLY DOESN'T MAKE ANY SENSE_UNLESS_WITH_YOU'RE TRYING TO INCREASE CODE COVERAGE"},
|
||||
{ "zh@collation=pinyin", "zh@collation=pinyin", "zh@collation=pinyin" },
|
||||
{ "zh_CN@collation=pinyin", "zh_CN@collation=pinyin", "zh_Hans_CN@collation=pinyin" },
|
||||
{ "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin", "zh_Hans_CN_CA@collation=pinyin" },
|
||||
{ "zh_CN@collation=pinyin", "zh_CN@collation=pinyin", "zh_CN@collation=pinyin" },
|
||||
{ "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin" },
|
||||
{ "en_US_POSIX", "en_US_POSIX", "en_US_POSIX" },
|
||||
{ "hy_AM_REVISED", "hy_AM_REVISED", "hy_AM_REVISED" },
|
||||
{ "no_NO_NY", "no_NO_NY", "nb_NO_NY" /* not: "nn_NO" [alan ICU3.0] */ },
|
||||
|
@ -4729,13 +4733,17 @@ void LocaleTest::TestCanonicalization(void)
|
|||
{ "x-piglatin_ML.MBE", "x-piglatin_ML.MBE", "x-piglatin_ML" },
|
||||
{ "i-cherokee_US.utf7", "i-cherokee_US.utf7", "i-cherokee_US" },
|
||||
{ "x-filfli_MT_FILFLA.gb-18030", "x-filfli_MT_FILFLA.gb-18030", "x-filfli_MT_FILFLA" },
|
||||
{ "no-no-ny.utf8@B", "no_NO_NY.utf8@B", "nb_NO_NY_B" /* not: "nn_NO" [alan ICU3.0] */ }, /* @ ignored unless variant is empty */
|
||||
{ "no-no-ny.utf8@B", "no_NO_NY.utf8@B", "nb_NO_B_NY" /* not: "nn_NO" [alan ICU3.0] */ }, /* @ ignored unless variant is empty */
|
||||
|
||||
/* fleshing out canonicalization */
|
||||
/* trim space and sort keywords, ';' is separator so not present at end in canonical form */
|
||||
{ "en_Hant_IL_VALLEY_GIRL@ currency = EUR; calendar = Japanese ;", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR" },
|
||||
{ "en_Hant_IL_VALLEY_GIRL@ currency = EUR; calendar = Japanese ;",
|
||||
"en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR",
|
||||
"en_Hant_IL_GIRL_VALLEY@calendar=Japanese;currency=EUR" },
|
||||
/* already-canonical ids are not changed */
|
||||
{ "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR" },
|
||||
{ "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR",
|
||||
"en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR",
|
||||
"en_Hant_IL_GIRL_VALLEY@calendar=Japanese;currency=EUR" },
|
||||
/* norwegian is just too weird, if we handle things in their full generality */
|
||||
{ "no-Hant-GB_NY@currency=$$$", "no_Hant_GB_NY@currency=$$$", "nb_Hant_GB_NY@currency=$$$" /* not: "nn_Hant_GB@currency=$$$" [alan ICU3.0] */ },
|
||||
|
||||
|
@ -4776,13 +4784,13 @@ void LocaleTest::TestCanonicalization(void)
|
|||
{ "hi__DIRECT", "hi__DIRECT", "hi__DIRECT" },
|
||||
{ "ja_JP_TRADITIONAL", "ja_JP_TRADITIONAL", "ja_JP_TRADITIONAL" },
|
||||
{ "th_TH_TRADITIONAL", "th_TH_TRADITIONAL", "th_TH_TRADITIONAL" },
|
||||
{ "zh_TW_STROKE", "zh_TW_STROKE", "zh_Hant_TW_STROKE" },
|
||||
{ "zh_TW_STROKE", "zh_TW_STROKE", "zh_TW_STROKE" },
|
||||
{ "zh__PINYIN", "zh__PINYIN", "zh__PINYIN" },
|
||||
{ "sr-SP-Cyrl", "sr_SP_CYRL", "sr_SP_CYRL" }, /* .NET name */
|
||||
{ "sr-SP-Latn", "sr_SP_LATN", "sr_SP_LATN" }, /* .NET name */
|
||||
{ "sr_YU_CYRILLIC", "sr_YU_CYRILLIC", "sr_RS_CYRILLIC" }, /* Linux name */
|
||||
{ "uz-UZ-Cyrl", "uz_UZ_CYRL", "uz_Latn_UZ_CYRL" }, /* .NET name */
|
||||
{ "uz-UZ-Latn", "uz_UZ_LATN", "uz_Latn_UZ_LATN" }, /* .NET name */
|
||||
{ "uz-UZ-Cyrl", "uz_UZ_CYRL", "uz_UZ_CYRL" }, /* .NET name */
|
||||
{ "uz-UZ-Latn", "uz_UZ_LATN", "uz_UZ_LATN" }, /* .NET name */
|
||||
{ "zh-CHS", "zh_CHS", "zh_CHS" }, /* .NET name */
|
||||
{ "zh-CHT", "zh_CHT", "zh_CHT" }, /* .NET name This may change back to zh_Hant */
|
||||
/* PRE_EURO and EURO conversions don't affect other keywords */
|
||||
|
@ -4799,13 +4807,6 @@ void LocaleTest::TestCanonicalization(void)
|
|||
|
||||
for (i=0; i < UPRV_LENGTHOF(testCases); i++) {
|
||||
for (j=0; j<3; ++j) {
|
||||
if (j==1 && logKnownIssue("21236", "skip some canonicalization tests until code fixed")) {
|
||||
if (uprv_strncmp(testCases[i].localeID, "zh_CN", 5) == 0 ||
|
||||
uprv_strncmp(testCases[i].localeID, "zh_TW", 5) == 0 ||
|
||||
uprv_strncmp(testCases[i].localeID, "uz-UZ", 5) == 0 ) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
const char* expected = (j==1) ? testCases[i].canonicalID : testCases[i].getNameID;
|
||||
Locale loc = _canonicalize(j, testCases[i].localeID);
|
||||
const char* getName = loc.isBogus() ? "BOGUS" : loc.getName();
|
||||
|
@ -4858,17 +4859,18 @@ void LocaleTest::TestCanonicalize(void)
|
|||
// also test with script, variants and extensions
|
||||
{ "prs-Cyrl-1009-u-ca-roc", "fa-Cyrl-AF-1009-u-ca-roc" },
|
||||
|
||||
// language _ country -> language _ script _ country
|
||||
{ "pa-IN", "pa-Guru-IN" },
|
||||
{ "pa-IN", "pa-IN" },
|
||||
// also test with script
|
||||
{ "pa-Latn-IN", "pa-Latn-IN" },
|
||||
// also test with variants and extensions
|
||||
{ "pa-IN-5678-u-ca-hindi", "pa-Guru-IN-5678-u-ca-hindi" },
|
||||
{ "pa-IN-5678-u-ca-hindi", "pa-IN-5678-u-ca-hindi" },
|
||||
|
||||
// language _ script _ country -> language _ country
|
||||
{ "ky-Cyrl-KG", "ky-KG" },
|
||||
{ "ky-Cyrl-KG", "ky-Cyrl-KG" },
|
||||
// also test with variants and extensions
|
||||
{ "ky-Cyrl-KG-3456-u-ca-roc", "ky-KG-3456-u-ca-roc" },
|
||||
{ "ky-Cyrl-KG-3456-u-ca-roc", "ky-Cyrl-KG-3456-u-ca-roc" },
|
||||
|
||||
// Test replacement of scriptAlias
|
||||
{ "en-Qaai", "en-Zinh" },
|
||||
|
||||
// Test replacement of territoryAlias
|
||||
// 554 has one replacement
|
||||
|
@ -4887,18 +4889,14 @@ void LocaleTest::TestCanonicalize(void)
|
|||
{ "uz-Cyrl-172-5678-u-nu-latn", "uz-Cyrl-UZ-5678-u-nu-latn" },
|
||||
// a language not used in this region
|
||||
{ "fr-172", "fr-RU" },
|
||||
|
||||
// variant
|
||||
{ "ja-Latn-hepburn-heploc", "ja-Latn-alalc97"},
|
||||
|
||||
{ "aaa-Fooo-SU", "aaa-Fooo-RU"},
|
||||
};
|
||||
int32_t i;
|
||||
for (i=0; i < UPRV_LENGTHOF(testCases); i++) {
|
||||
if (logKnownIssue("21236", "skip some canonicalization tests until code fixed")) {
|
||||
if (uprv_strstr(testCases[i].localeID, "-BOKMAL") != 0 ||
|
||||
uprv_strstr(testCases[i].localeID, "-NYNORSK") != 0 ||
|
||||
uprv_strstr(testCases[i].localeID, "-SAAHO") != 0 ||
|
||||
uprv_strncmp(testCases[i].localeID, "pa-IN", 5) == 0 ||
|
||||
uprv_strncmp(testCases[i].localeID, "ky-Cyrl", 7) == 0 ) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
std::string otag = testCases[i].localeID;
|
||||
Locale loc = Locale::forLanguageTag(otag.c_str(), status);
|
||||
|
@ -5351,6 +5349,73 @@ void LocaleTest::TestBug20900() {
|
|||
}
|
||||
}
|
||||
|
||||
U_DEFINE_LOCAL_OPEN_POINTER(LocalStdioFilePointer, FILE, fclose);
|
||||
void LocaleTest::TestLocaleCanonicalizationFromFile()
|
||||
{
|
||||
IcuTestErrorCode status(*this, "TestLocaleCanonicalizationFromFile");
|
||||
const char *sourceTestDataPath=getSourceTestData(status);
|
||||
if(status.errIfFailureAndReset("unable to find the source/test/testdata "
|
||||
"folder (getSourceTestData())")) {
|
||||
return;
|
||||
}
|
||||
char testPath[400];
|
||||
char line[256];
|
||||
strcpy(testPath, sourceTestDataPath);
|
||||
strcat(testPath, "localeCanonicalization.txt");
|
||||
LocalStdioFilePointer testFile(fopen(testPath, "r"));
|
||||
if(testFile.isNull()) {
|
||||
errln("unable to open %s", testPath);
|
||||
return;
|
||||
}
|
||||
// Format:
|
||||
// <source locale identifier> ; <expected canonicalized locale identifier>
|
||||
while (fgets(line, (int)sizeof(line), testFile.getAlias())!=NULL) {
|
||||
if (line[0] == '#') {
|
||||
// ignore any lines start with #
|
||||
continue;
|
||||
}
|
||||
char *semi = strchr(line, ';');
|
||||
if (semi == nullptr) {
|
||||
// ignore any lines without ;
|
||||
continue;
|
||||
}
|
||||
*semi = '\0'; // null terminiate on the spot of semi
|
||||
const char* from = u_skipWhitespace((const char*)line);
|
||||
u_rtrim((char*)from);
|
||||
const char* to = u_skipWhitespace((const char*)semi + 1);
|
||||
u_rtrim((char*)to);
|
||||
std::string expect(to);
|
||||
// Change the _ to -
|
||||
std::transform(expect.begin(), expect.end(), expect.begin(),
|
||||
[](unsigned char c){ return c == '_' ? '-' : c; });
|
||||
|
||||
Locale loc = Locale::createCanonical(from);
|
||||
std::string result = loc.toLanguageTag<std::string>(status);
|
||||
const char* tag = loc.isBogus() ? "BOGUS" : result.c_str();
|
||||
status.errIfFailureAndReset(
|
||||
"FAIL: createCanonical(%s).toLanguageTag() expected \"%s\" locale is %s",
|
||||
from, tag, loc.getName());
|
||||
std::string msg("createCanonical(");
|
||||
msg += from;
|
||||
msg += ") locale = ";
|
||||
msg += loc.getName();
|
||||
assertEquals(msg.c_str(), expect.c_str(), tag);
|
||||
}
|
||||
}
|
||||
|
||||
void LocaleTest::TestKnownCanonicalizedListCorrect()
|
||||
{
|
||||
IcuTestErrorCode status(*this, "TestKnownCanonicalizedListCorrect");
|
||||
int32_t numOfKnownCanonicalized;
|
||||
const char* const* knownCanonicalized =
|
||||
ulocimp_getKnownCanonicalizedLocaleForTest(&numOfKnownCanonicalized);
|
||||
for (int32_t i = 0; i < numOfKnownCanonicalized; i++) {
|
||||
std::string msg("Known Canonicalized Locale is not canonicalized: ");
|
||||
assertTrue((msg + knownCanonicalized[i]).c_str(),
|
||||
ulocimp_isCanonicalizedLocaleForTest(knownCanonicalized[i]));
|
||||
}
|
||||
}
|
||||
|
||||
void LocaleTest::TestConstructorAcceptsBCP47() {
|
||||
IcuTestErrorCode status(*this, "TestConstructorAcceptsBCP47");
|
||||
|
||||
|
|
|
@ -122,6 +122,8 @@ public:
|
|||
void TestBug13554();
|
||||
void TestBug20410();
|
||||
void TestBug20900();
|
||||
void TestLocaleCanonicalizationFromFile();
|
||||
void TestKnownCanonicalizedListCorrect();
|
||||
void TestConstructorAcceptsBCP47();
|
||||
|
||||
void TestAddLikelySubtags();
|
||||
|
|
1648
icu4c/source/test/testdata/localeCanonicalization.txt
vendored
Normal file
1648
icu4c/source/test/testdata/localeCanonicalization.txt
vendored
Normal file
File diff suppressed because it is too large
Load diff
|
@ -78,7 +78,7 @@ public class LanguageTag {
|
|||
final String[][] entries = {
|
||||
//{"tag", "preferred"},
|
||||
{"art-lojban", "jbo"},
|
||||
{"cel-gaulish", "xtg-x-cel-gaulish"}, // fallback
|
||||
{"cel-gaulish", "xtg"}, // fallback
|
||||
{"en-GB-oed", "en-GB-x-oed"}, // fallback
|
||||
{"i-ami", "ami"},
|
||||
{"i-bnn", "bnn"},
|
||||
|
|
|
@ -16,12 +16,15 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.MissingResourceException;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
@ -1203,6 +1206,396 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
return new LocaleIDParser(localeID).getKeywordValue(keywordName);
|
||||
}
|
||||
|
||||
static private class AliasReplacer {
|
||||
/**
|
||||
* @param language language subtag to be replaced. Cannot be null but could be empty.
|
||||
* @param script script subtag to be replaced. Cannot be null but could be empty.
|
||||
* @param region region subtag to be replaced. Cannot be null but could be empty.
|
||||
* @param variants variant subtags to be replaced. Cannot be null but could be empty.
|
||||
* @param extensions extensions in string to be replaced. Cannot be null but could be empty.
|
||||
*/
|
||||
public AliasReplacer(String language, String script, String region,
|
||||
String variants, String extensions) {
|
||||
|
||||
assert language != null;
|
||||
assert script != null;
|
||||
assert region != null;
|
||||
assert variants != null;
|
||||
assert extensions != null;
|
||||
this.language = language;
|
||||
this.script = script;
|
||||
this.region = region;
|
||||
if (!variants.isEmpty()) {
|
||||
this.variants =
|
||||
new ArrayList<String>(Arrays.asList(variants.split("_")));
|
||||
}
|
||||
this.extensions = extensions;
|
||||
}
|
||||
|
||||
private String language;
|
||||
private String script;
|
||||
private String region;
|
||||
private List<String> variants;
|
||||
private String extensions;
|
||||
|
||||
public String replace() {
|
||||
boolean changed = false;
|
||||
loadAliasData();
|
||||
int count = 0;
|
||||
while (true) {
|
||||
if (count++ > 10) {
|
||||
// Throw exception when we loop through too many time
|
||||
// stop to avoid infinity loop cauesd by incorrect data
|
||||
// in resource.
|
||||
throw new IllegalArgumentException(
|
||||
"Have problem to resolve locale alias of " +
|
||||
lscvToID(language, script, region,
|
||||
((variants == null) ? "" : String.join("_", variants))) +
|
||||
extensions);
|
||||
}
|
||||
// Anytime we replace something, we need to start over again.
|
||||
// lang REGION variant
|
||||
if ( replaceLanguage(true, true, true) ||
|
||||
replaceLanguage(true, true, false) ||
|
||||
replaceLanguage(true, false, true) ||
|
||||
replaceLanguage(true, false, false) ||
|
||||
replaceLanguage(false, false, true) ||
|
||||
replaceRegion() ||
|
||||
replaceScript() ||
|
||||
replaceVariant()) {
|
||||
// Some values in data is changed, try to match from the
|
||||
// beginning again.
|
||||
changed = true;
|
||||
continue;
|
||||
}
|
||||
// Nothing changed in this iteration, break out the loop
|
||||
break;
|
||||
} // while(1)
|
||||
if (changed) {
|
||||
String result = lscvToID(language, script, region,
|
||||
((variants == null) ? "" : String.join("_", variants)));
|
||||
if (extensions != null) {
|
||||
result += extensions;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
// Nothing changed in any iteration of the loop.
|
||||
return null;
|
||||
};
|
||||
|
||||
private static boolean aliasDataIsLoaded = false;
|
||||
private static Map<String, String> languageAliasMap = null;
|
||||
private static Map<String, String> scriptAliasMap = null;
|
||||
private static Map<String, List<String>> territoryAliasMap = null;
|
||||
private static Map<String, String> variantAliasMap = null;
|
||||
|
||||
/*
|
||||
* Initializes the alias data from the ICU resource bundles. The alias
|
||||
* data contains alias of language, country, script and variants.
|
||||
*
|
||||
* If the alias data has already loaded, then this method simply
|
||||
* returns without doing anything meaningful.
|
||||
*
|
||||
*/
|
||||
private static synchronized void loadAliasData() {
|
||||
if (aliasDataIsLoaded) {
|
||||
return;
|
||||
}
|
||||
languageAliasMap = new HashMap<String, String>();
|
||||
scriptAliasMap = new HashMap<String, String>();
|
||||
territoryAliasMap = new HashMap<String, List<String>>();
|
||||
variantAliasMap = new HashMap<String, String>();
|
||||
|
||||
UResourceBundle metadata = UResourceBundle.getBundleInstance(
|
||||
ICUData.ICU_BASE_NAME, "metadata",
|
||||
ICUResourceBundle.ICU_DATA_CLASS_LOADER);
|
||||
UResourceBundle metadataAlias = metadata.get("alias");
|
||||
UResourceBundle languageAlias = metadataAlias.get("language");
|
||||
UResourceBundle scriptAlias = metadataAlias.get("script");
|
||||
UResourceBundle territoryAlias = metadataAlias.get("territory");
|
||||
UResourceBundle variantAlias = metadataAlias.get("variant");
|
||||
|
||||
for (int i = 0 ; i < languageAlias.getSize(); i++) {
|
||||
UResourceBundle res = languageAlias.get(i);
|
||||
String aliasFrom = res.getKey();
|
||||
String aliasTo = res.get("replacement").getString();
|
||||
Locale testLocale = new Locale(aliasFrom);
|
||||
// if there are script in the aliasFrom
|
||||
// or we have both a und as language and a region code.
|
||||
if ( ! testLocale.getScript().isEmpty() ||
|
||||
(aliasFrom.startsWith("und") && ! testLocale.getCountry().isEmpty())) {
|
||||
throw new IllegalArgumentException(
|
||||
"key [" + aliasFrom +
|
||||
"] in alias:language contains unsupported fields combination.");
|
||||
}
|
||||
languageAliasMap.put(aliasFrom, aliasTo);
|
||||
}
|
||||
for (int i = 0 ; i < scriptAlias.getSize(); i++) {
|
||||
UResourceBundle res = scriptAlias.get(i);
|
||||
String aliasFrom = res.getKey();
|
||||
String aliasTo = res.get("replacement").getString();
|
||||
if (aliasFrom.length() != 4) {
|
||||
throw new IllegalArgumentException(
|
||||
"Incorrect key [" + aliasFrom + "] in alias:script.");
|
||||
}
|
||||
scriptAliasMap.put(aliasFrom, aliasTo);
|
||||
}
|
||||
for (int i = 0 ; i < territoryAlias.getSize(); i++) {
|
||||
UResourceBundle res = territoryAlias.get(i);
|
||||
String aliasFrom = res.getKey();
|
||||
String aliasTo = res.get("replacement").getString();
|
||||
if (aliasFrom.length() < 2 || aliasFrom.length() > 3) {
|
||||
throw new IllegalArgumentException(
|
||||
"Incorrect key [" + aliasFrom + "] in alias:territory.");
|
||||
}
|
||||
territoryAliasMap.put(aliasFrom,
|
||||
new ArrayList<String>(Arrays.asList(aliasTo.split(" "))));
|
||||
}
|
||||
for (int i = 0 ; i < variantAlias.getSize(); i++) {
|
||||
UResourceBundle res = variantAlias.get(i);
|
||||
String aliasFrom = res.getKey();
|
||||
String aliasTo = res.get("replacement").getString();
|
||||
if ( aliasFrom.length() < 4 ||
|
||||
aliasFrom.length() > 8 ||
|
||||
(aliasFrom.length() == 4 &&
|
||||
(aliasFrom.charAt(0) < '0' || aliasFrom.charAt(0) > '9'))) {
|
||||
throw new IllegalArgumentException(
|
||||
"Incorrect key [" + aliasFrom + "] in alias:variant.");
|
||||
}
|
||||
if ( aliasTo.length() < 4 ||
|
||||
aliasTo.length() > 8 ||
|
||||
(aliasTo.length() == 4 &&
|
||||
(aliasTo.charAt(0) < '0' || aliasTo.charAt(0) > '9'))) {
|
||||
throw new IllegalArgumentException(
|
||||
"Incorrect variant [" + aliasTo + "] for the key [" + aliasFrom +
|
||||
"] in alias:variant.");
|
||||
}
|
||||
variantAliasMap.put(aliasFrom, aliasTo);
|
||||
}
|
||||
|
||||
aliasDataIsLoaded = true;
|
||||
}
|
||||
|
||||
private static String generateKey(
|
||||
String language, String region, String variant) {
|
||||
assert variant == null || variant.length() >= 4;
|
||||
StringBuilder buf = new StringBuilder();
|
||||
buf.append(language);
|
||||
if (region != null && !region.isEmpty()) {
|
||||
buf.append(UNDERSCORE);
|
||||
buf.append(region);
|
||||
}
|
||||
if (variant != null && !variant.isEmpty()) {
|
||||
buf.append(UNDERSCORE);
|
||||
buf.append(variant);
|
||||
}
|
||||
return buf.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* If replacement is neither null nor empty and input is either null or empty,
|
||||
* return replacement.
|
||||
* If replacement is neither null nor empty but input is not empty, return input.
|
||||
* If replacement is either null or empty and type is either null or empty,
|
||||
* return input.
|
||||
* Otherwise return null.
|
||||
* replacement input type return
|
||||
* AAA "" * AAA
|
||||
* AAA BBB * BBB
|
||||
* "" CCC "" CCC
|
||||
* "" * i DDD ""
|
||||
*/
|
||||
private static String deleteOrReplace(
|
||||
String input, String type, String replacement) {
|
||||
return (replacement != null && !replacement.isEmpty()) ?
|
||||
((input == null || input.isEmpty()) ? replacement : input) :
|
||||
((type == null || type.isEmpty()) ? input : null);
|
||||
}
|
||||
|
||||
private boolean replaceLanguage(boolean checkLanguage,
|
||||
boolean checkRegion, boolean checkVariants) {
|
||||
if ( (checkRegion && (region == null || region.isEmpty())) ||
|
||||
(checkVariants && (variants == null))) {
|
||||
// Nothing to search
|
||||
return false;
|
||||
}
|
||||
int variantSize = checkVariants ? variants.size() : 1;
|
||||
// Since we may have more than one variant, we need to loop through
|
||||
// them.
|
||||
String searchLanguage = checkLanguage ? language : UNDEFINED_LANGUAGE;
|
||||
String searchRegion = checkRegion ? region : null;
|
||||
String searchVariant = null;
|
||||
for (int variantIndex = 0; variantIndex < variantSize; ++variantIndex) {
|
||||
if (checkVariants) {
|
||||
searchVariant = variants.get(variantIndex);
|
||||
}
|
||||
if (searchVariant != null && searchVariant.length() < 4) {
|
||||
// Do not consider ill-formed variant subtag.
|
||||
searchVariant = null;
|
||||
}
|
||||
String typeKey = generateKey(
|
||||
searchLanguage, searchRegion, searchVariant);
|
||||
String replacement = languageAliasMap.get(typeKey);
|
||||
if (replacement == null) {
|
||||
// Found no replacement data.
|
||||
continue;
|
||||
}
|
||||
String replacedScript = null;
|
||||
String replacedRegion = null;
|
||||
String replacedVariant = null;
|
||||
String replacedExtensions = null;
|
||||
String replacedLanguage = null;
|
||||
|
||||
if (replacement.indexOf('_') < 0) {
|
||||
replacedLanguage = replacement.equals(UNDEFINED_LANGUAGE) ?
|
||||
language : replacement;
|
||||
} else {
|
||||
String[] replacementFields = replacement.split("_");
|
||||
replacedLanguage = replacementFields[0];
|
||||
int index = 1;
|
||||
|
||||
if (replacedLanguage.equals(UNDEFINED_LANGUAGE)) {
|
||||
replacedLanguage = language;
|
||||
}
|
||||
int consumed = replacementFields[0].length() + 1;
|
||||
while (replacementFields.length > index) {
|
||||
String field = replacementFields[index];
|
||||
int len = field.length();
|
||||
if (1 == len) {
|
||||
replacedExtensions = replacement.substring(consumed);
|
||||
break;
|
||||
} else if (len >= 2 && len <= 3) {
|
||||
assert replacedRegion == null;
|
||||
replacedRegion = field;
|
||||
} else if (len >= 5 && len <= 8) {
|
||||
assert replacedVariant == null;
|
||||
replacedVariant = field;
|
||||
} else if (len == 4) {
|
||||
if (field.charAt(0) >= '0' && field.charAt(0) <= '9') {
|
||||
assert replacedVariant == null;
|
||||
replacedVariant = field;
|
||||
} else {
|
||||
assert replacedScript == null;
|
||||
replacedScript = field;
|
||||
}
|
||||
}
|
||||
index++;
|
||||
consumed += len + 1;
|
||||
}
|
||||
}
|
||||
|
||||
replacedScript = deleteOrReplace(script, null, replacedScript);
|
||||
replacedRegion = deleteOrReplace(region, searchRegion, replacedRegion);
|
||||
replacedVariant = deleteOrReplace(searchVariant, searchVariant, replacedVariant);
|
||||
|
||||
if ( this.language.equals(replacedLanguage) &&
|
||||
this.script.equals(replacedScript) &&
|
||||
this.region.equals(replacedRegion) &&
|
||||
Objects.equals(searchVariant, replacedVariant) &&
|
||||
replacedExtensions == null) {
|
||||
// Replacement produce no changes on search.
|
||||
// For example, apply pa_IN=> pa_Guru_IN on pa_Guru_IN.
|
||||
continue;
|
||||
}
|
||||
this.language = replacedLanguage;
|
||||
this.script = replacedScript;
|
||||
this.region = replacedRegion;
|
||||
if (searchVariant != null && !searchVariant.isEmpty()) {
|
||||
if (replacedVariant != null && !replacedVariant.isEmpty()) {
|
||||
this.variants.set(variantIndex, replacedVariant);
|
||||
} else {
|
||||
this.variants.remove(variantIndex);
|
||||
if (this.variants.isEmpty()) {
|
||||
this.variants = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (replacedExtensions != null && !replacedExtensions.isEmpty()) {
|
||||
// TODO(ICU-21292)
|
||||
// DO NOTHING
|
||||
// UTS35 does not specifiy what should we do if we have extensions in the
|
||||
// replacement. Currently we know only the following 4 "BCP47 LegacyRules" have
|
||||
// extensions in them languageAlias:
|
||||
// i_default => en_x_i_default
|
||||
// i_enochian => und_x_i_enochian
|
||||
// i_mingo => see_x_i_mingo
|
||||
// zh_min => nan_x_zh_min
|
||||
// But all of them are already changed by code inside LanguageTag before
|
||||
// hitting this code.
|
||||
}
|
||||
// Something in search changed by language alias data.
|
||||
return true;
|
||||
}
|
||||
// Nothing changed in search by language alias data.
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean replaceRegion() {
|
||||
if (region == null || region.isEmpty()) return false;
|
||||
List<String> replacement = territoryAliasMap.get(region);
|
||||
if (replacement == null) {
|
||||
// Found no replacement data for this region.
|
||||
return false;
|
||||
}
|
||||
String replacedRegion;
|
||||
if (replacement.size() > 1) {
|
||||
String regionOfLanguageAndScript =
|
||||
ULocale.addLikelySubtags(
|
||||
new ULocale(this.language, this.script, null))
|
||||
.getCountry();
|
||||
replacedRegion = replacement.contains(regionOfLanguageAndScript) ?
|
||||
regionOfLanguageAndScript : replacement.get(0);
|
||||
} else {
|
||||
replacedRegion = replacement.get(0);
|
||||
}
|
||||
assert this.region != replacedRegion;
|
||||
this.region = replacedRegion;
|
||||
// The region is changed by data in territory alias.
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean replaceScript() {
|
||||
if (script == null || script.isEmpty()) return false;
|
||||
String replacement = scriptAliasMap.get(script);
|
||||
if (replacement == null) {
|
||||
// Found no replacement data for this script.
|
||||
return false;
|
||||
}
|
||||
assert this.script != replacement;
|
||||
this.script = replacement;
|
||||
// The script is changed by data in script alias.
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean replaceVariant() {
|
||||
if (variants == null) return false;
|
||||
for (int i = 0; i < variants.size(); i++) {
|
||||
String variant = variants.get(i);
|
||||
String replacement = variantAliasMap.get(variant);
|
||||
if (replacement == null) {
|
||||
// Found no replacement data for this variant.
|
||||
continue;
|
||||
}
|
||||
assert replacement.length() >= 4;
|
||||
assert replacement.length() <= 8;
|
||||
assert replacement.length() != 4 ||
|
||||
( replacement.charAt(0) >= '0' && replacement.charAt(0) <= '9');
|
||||
if (!variant.equals(replacement)) {
|
||||
variants.set(i, replacement);
|
||||
// Special hack to handle hepburn-heploc => alalc97
|
||||
if (variant.equals("heploc")) {
|
||||
variants.remove("hepburn");
|
||||
if (variants.isEmpty()) {
|
||||
variants = null;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* {@icu} Returns the canonical name according to CLDR for the specified locale ID.
|
||||
* This is used to convert POSIX and other legacy IDs to standard ICU form.
|
||||
|
@ -1239,147 +1632,55 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
}
|
||||
}
|
||||
|
||||
// If the BCP 47 primary language subtag matches the type attribute of a languageAlias
|
||||
// element in Supplemental Data, replace the language subtag with the replacement value.
|
||||
// If there are additional subtags in the replacement value, add them to the result, but
|
||||
// only if there is no corresponding subtag already in the tag.
|
||||
// Five special deprecated codes (such as i-default) are in type attributes, and are also replaced.
|
||||
try {
|
||||
UResourceBundle languageAlias = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,
|
||||
"metadata", ICUResourceBundle.ICU_DATA_CLASS_LOADER)
|
||||
.get("alias")
|
||||
.get("language");
|
||||
// language _ variant
|
||||
if (!parser.getVariant().isEmpty()) {
|
||||
String [] variants = parser.getVariant().split("_");
|
||||
for (String variant : variants) {
|
||||
try {
|
||||
// Note the key in the metadata.txt is formatted as language_variant
|
||||
// instead of language__variant but lscvToID will generate
|
||||
// language__variant so we have to build the string ourselves.
|
||||
ULocale replaceLocale = new ULocale(languageAlias.get(
|
||||
(new StringBuilder(parser.getLanguage().length() + 1 + parser.getVariant().length()))
|
||||
.append(parser.getLanguage())
|
||||
.append("_")
|
||||
.append(variant)
|
||||
.toString())
|
||||
.get("replacement")
|
||||
.getString());
|
||||
StringBuilder replacedVariant = new StringBuilder(parser.getVariant().length());
|
||||
for (String current : variants) {
|
||||
if (current.equals(variant)) continue;
|
||||
if (replacedVariant.length() > 0) replacedVariant.append("_");
|
||||
replacedVariant.append(current);
|
||||
}
|
||||
parser = new LocaleIDParser(
|
||||
(new StringBuilder(localeID.length()))
|
||||
.append(lscvToID(replaceLocale.getLanguage(),
|
||||
!parser.getScript().isEmpty() ? parser.getScript() : replaceLocale.getScript(),
|
||||
!parser.getCountry().isEmpty() ? parser.getCountry() : replaceLocale.getCountry(),
|
||||
replacedVariant.toString()))
|
||||
.append(parser.getName().substring(parser.getBaseName().length()))
|
||||
.toString());
|
||||
} catch (MissingResourceException e) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// language _ script _ country
|
||||
// ug_Arab_CN -> ug_CN
|
||||
if (!parser.getScript().isEmpty() && !parser.getCountry().isEmpty()) {
|
||||
try {
|
||||
ULocale replaceLocale = new ULocale(languageAlias.get(
|
||||
lscvToID(parser.getLanguage(), parser.getScript(), parser.getCountry(), null))
|
||||
.get("replacement")
|
||||
.getString());
|
||||
parser = new LocaleIDParser((new StringBuilder(localeID.length()))
|
||||
.append(lscvToID(replaceLocale.getLanguage(),
|
||||
replaceLocale.getScript(),
|
||||
replaceLocale.getCountry(),
|
||||
parser.getVariant()))
|
||||
.append(parser.getName().substring(parser.getBaseName().length()))
|
||||
.toString());
|
||||
} catch (MissingResourceException e) {
|
||||
}
|
||||
}
|
||||
// language _ country
|
||||
// eg. az_AZ -> az_Latn_AZ
|
||||
if (!parser.getCountry().isEmpty()) {
|
||||
try {
|
||||
ULocale replaceLocale = new ULocale(languageAlias.get(
|
||||
lscvToID(parser.getLanguage(), null, parser.getCountry(), null))
|
||||
.get("replacement")
|
||||
.getString());
|
||||
parser = new LocaleIDParser((new StringBuilder(localeID.length()))
|
||||
.append(lscvToID(replaceLocale.getLanguage(),
|
||||
parser.getScript().isEmpty() ? replaceLocale.getScript() : parser.getScript(),
|
||||
replaceLocale.getCountry(),
|
||||
parser.getVariant()))
|
||||
.append(parser.getName().substring(parser.getBaseName().length()))
|
||||
.toString());
|
||||
} catch (MissingResourceException e) {
|
||||
}
|
||||
}
|
||||
// only language
|
||||
// e.g. twi -> ak
|
||||
try {
|
||||
ULocale replaceLocale = new ULocale(languageAlias.get(parser.getLanguage())
|
||||
.get("replacement")
|
||||
.getString());
|
||||
parser = new LocaleIDParser((new StringBuilder(localeID.length()))
|
||||
.append(lscvToID(replaceLocale.getLanguage(),
|
||||
parser.getScript().isEmpty() ? replaceLocale.getScript() : parser.getScript() ,
|
||||
parser.getCountry().isEmpty() ? replaceLocale.getCountry() : parser.getCountry() ,
|
||||
parser.getVariant()))
|
||||
.append(parser.getName().substring(parser.getBaseName().length()))
|
||||
.toString());
|
||||
} catch (MissingResourceException e) {
|
||||
}
|
||||
} catch (MissingResourceException e) {
|
||||
}
|
||||
|
||||
// If the BCP 47 region subtag matches the type attribute of a
|
||||
// territoryAlias element in Supplemental Data, replace the language
|
||||
// subtag with the replacement value, as follows:
|
||||
if (!parser.getCountry().isEmpty()) {
|
||||
try {
|
||||
String replacements[] = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,
|
||||
"metadata", ICUResourceBundle.ICU_DATA_CLASS_LOADER)
|
||||
.get("alias")
|
||||
.get("territory")
|
||||
.get(parser.getCountry())
|
||||
.get("replacement")
|
||||
.getString()
|
||||
.split(" ");
|
||||
String replacement = replacements[0];
|
||||
// If there is a single territory in the replacement, use it.
|
||||
// If there are multiple territories:
|
||||
// Look up the most likely territory for the base language code (and script, if there is one).
|
||||
// If that likely territory is in the list, use it.
|
||||
// Otherwise, use the first territory in the list.
|
||||
if (replacements.length > 1) {
|
||||
String likelyCountry = ULocale.addLikelySubtags(
|
||||
new ULocale(lscvToID(parser.getLanguage(), parser.getScript(), null, parser.getVariant())))
|
||||
.getCountry();
|
||||
for (String country : replacements) {
|
||||
if (country.equals(likelyCountry)) {
|
||||
replacement = likelyCountry;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
parser = new LocaleIDParser(
|
||||
(new StringBuilder(localeID.length()))
|
||||
.append(lscvToID(parser.getLanguage(), parser.getScript(), replacement, parser.getVariant()))
|
||||
.append(parser.getName().substring(parser.getBaseName().length()))
|
||||
.toString());
|
||||
} catch (MissingResourceException e) {
|
||||
boolean knownCanonicalized = false;
|
||||
String name = parser.getName();
|
||||
if (!isKnownCanonicalizedLocale(name)) {
|
||||
AliasReplacer replacer = new AliasReplacer(
|
||||
parser.getLanguage(), parser.getScript(), parser.getCountry(),
|
||||
AsciiUtil.toLowerString(parser.getVariant()),
|
||||
parser.getName().substring(parser.getBaseName().length()));
|
||||
String replaced = replacer.replace();
|
||||
if (replaced != null) {
|
||||
parser = new LocaleIDParser(replaced);
|
||||
}
|
||||
}
|
||||
|
||||
return parser.getName();
|
||||
}
|
||||
|
||||
private static synchronized boolean isKnownCanonicalizedLocale(String name) {
|
||||
if (name.equals("c") || name.equals("en") || name.equals("en_US")) {
|
||||
return true;
|
||||
}
|
||||
if (gKnownCanonicalizedCases == null) {
|
||||
List<String> items = Arrays.asList(
|
||||
"af", "af_ZA", "am", "am_ET", "ar", "ar_001", "as", "as_IN", "az", "az_AZ",
|
||||
"be", "be_BY", "bg", "bg_BG", "bn", "bn_IN", "bs", "bs_BA", "ca", "ca_ES",
|
||||
"cs", "cs_CZ", "cy", "cy_GB", "da", "da_DK", "de", "de_DE", "el", "el_GR",
|
||||
"en", "en_GB", "en_US", "es", "es_419", "es_ES", "et", "et_EE", "eu",
|
||||
"eu_ES", "fa", "fa_IR", "fi", "fi_FI", "fil", "fil_PH", "fr", "fr_FR",
|
||||
"ga", "ga_IE", "gl", "gl_ES", "gu", "gu_IN", "he", "he_IL", "hi", "hi_IN",
|
||||
"hr", "hr_HR", "hu", "hu_HU", "hy", "hy_AM", "id", "id_ID", "is", "is_IS",
|
||||
"it", "it_IT", "ja", "ja_JP", "jv", "jv_ID", "ka", "ka_GE", "kk", "kk_KZ",
|
||||
"km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA",
|
||||
"lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN",
|
||||
"mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP",
|
||||
"nl", "nl_NL", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
|
||||
"pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si",
|
||||
"si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr",
|
||||
"sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta",
|
||||
"ta_IN", "te", "te_IN", "th", "th_TH", "tk", "tk_TM", "tr", "tr_TR", "uk",
|
||||
"uk_UA", "ur", "ur_PK", "uz", "uz_UZ", "vi", "vi_VN", "yue", "yue_Hant",
|
||||
"yue_Hant_HK", "yue_HK", "zh", "zh_CN", "zh_Hans", "zh_Hans_CN", "zh_Hant",
|
||||
"zh_Hant_TW", "zh_TW", "zu", "zu_ZA");
|
||||
gKnownCanonicalizedCases = new HashSet<String>(items);
|
||||
|
||||
}
|
||||
return gKnownCanonicalizedCases.contains(name);
|
||||
}
|
||||
|
||||
private static Set<String> gKnownCanonicalizedCases = null;
|
||||
|
||||
/**
|
||||
* {@icu} Given a keyword and a value, return a new locale with an updated
|
||||
* keyword and value. If the keyword is null, this removes all keywords from the locale id.
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -14,6 +14,8 @@ package com.ibm.icu.dev.test.util;
|
|||
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
|
@ -892,8 +894,8 @@ public class ULocaleTest extends TestFmwk {
|
|||
public void TestCanonicalization(){
|
||||
final String[][]testCases = new String[][]{
|
||||
{ "zh@collation=pinyin", "zh@collation=pinyin", "zh@collation=pinyin" },
|
||||
{ "zh_CN@collation=pinyin", "zh_CN@collation=pinyin", "zh_Hans_CN@collation=pinyin" },
|
||||
{ "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin", "zh_Hans_CN_CA@collation=pinyin" },
|
||||
{ "zh_CN@collation=pinyin", "zh_CN@collation=pinyin", "zh_CN@collation=pinyin" },
|
||||
{ "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin" },
|
||||
{ "en_US_POSIX", "en_US_POSIX", "en_US_POSIX" },
|
||||
{ "hy_AM_REVISED", "hy_AM_REVISED", "hy_AM_REVISED" },
|
||||
{ "no_NO_NY", "no_NO_NY", "nb_NO_NY" /* not: "nn_NO" [alan ICU3.0] */ },
|
||||
|
@ -956,14 +958,14 @@ public class ULocaleTest extends TestFmwk {
|
|||
{ "hi__DIRECT", "hi__DIRECT", "hi__DIRECT" },
|
||||
{ "ja_JP_TRADITIONAL", "ja_JP_TRADITIONAL", "ja_JP_TRADITIONAL" },
|
||||
{ "th_TH_TRADITIONAL", "th_TH_TRADITIONAL", "th_TH_TRADITIONAL" },
|
||||
{ "zh_TW_STROKE", "zh_TW_STROKE", "zh_Hant_TW_STROKE" },
|
||||
{ "zh_TW_STROKE", "zh_TW_STROKE", "zh_TW_STROKE" },
|
||||
{ "zh__PINYIN", "zh__PINYIN", "zh__PINYIN" },
|
||||
{ "qz-qz@Euro", null, "qz_QZ_EURO" }, /* qz-qz uses private use iso codes */
|
||||
{ "sr-SP-Cyrl", "sr_SP_CYRL", "sr_SP_CYRL" }, /* .NET name */
|
||||
{ "sr-SP-Latn", "sr_SP_LATN", "sr_SP_LATN" }, /* .NET name */
|
||||
{ "sr_YU_CYRILLIC", "sr_YU_CYRILLIC", "sr_RS_CYRILLIC" }, /* Linux name */
|
||||
{ "uz-UZ-Cyrl", "uz_UZ_CYRL", "uz_Latn_UZ_CYRL" }, /* .NET name */
|
||||
{ "uz-UZ-Latn", "uz_UZ_LATN", "uz_Latn_UZ_LATN" }, /* .NET name */
|
||||
{ "uz-UZ-Cyrl", "uz_UZ_CYRL", "uz_UZ_CYRL" }, /* .NET name */
|
||||
{ "uz-UZ-Latn", "uz_UZ_LATN", "uz_UZ_LATN" }, /* .NET name */
|
||||
{ "zh-CHS", "zh_CHS", "zh_CHS" }, /* .NET name */
|
||||
{ "zh-CHT", "zh_CHT", "zh_CHT" }, /* .NET name This may change back to zh_Hant */
|
||||
/* PRE_EURO and EURO conversions don't affect other keywords */
|
||||
|
@ -5175,21 +5177,18 @@ public class ULocaleTest extends TestFmwk {
|
|||
// also test with script, variants and extensions
|
||||
Assert.assertEquals("fa-Cyrl-AF-1009-u-ca-roc", canonicalTag("prs-Cyrl-1009-u-ca-roc"));
|
||||
|
||||
if (!logKnownIssue("21236", "skip some canonicalization tests until code fixed")) {
|
||||
// language _ country -> language _ script _ country
|
||||
Assert.assertEquals("pa-Guru-IN", canonicalTag("pa-IN"));
|
||||
}
|
||||
Assert.assertEquals("pa-IN", canonicalTag("pa-IN"));
|
||||
// also test with script
|
||||
Assert.assertEquals("pa-Latn-IN", canonicalTag("pa-Latn-IN"));
|
||||
if (!logKnownIssue("21236", "skip some canonicalization tests until code fixed")) {
|
||||
// also test with variants and extensions
|
||||
Assert.assertEquals("pa-Guru-IN-5678-u-ca-hindi", canonicalTag("pa-IN-5678-u-ca-hindi"));
|
||||
// also test with variants and extensions
|
||||
Assert.assertEquals("pa-IN-5678-u-ca-hindi", canonicalTag("pa-IN-5678-u-ca-hindi"));
|
||||
|
||||
// language _ script _ country -> language _ country
|
||||
Assert.assertEquals("ky-KG", canonicalTag("ky-Cyrl-KG"));
|
||||
// also test with variants and extensions
|
||||
Assert.assertEquals("ky-KG-3456-u-ca-roc", canonicalTag("ky-Cyrl-KG-3456-u-ca-roc"));
|
||||
}
|
||||
Assert.assertEquals("ky-Cyrl-KG", canonicalTag("ky-Cyrl-KG"));
|
||||
// also test with variants and extensions
|
||||
Assert.assertEquals("ky-Cyrl-KG-3456-u-ca-roc", canonicalTag("ky-Cyrl-KG-3456-u-ca-roc"));
|
||||
|
||||
// Test replacement of scriptAlias
|
||||
Assert.assertEquals("en-Zinh", canonicalTag("en-Qaai"));
|
||||
|
||||
// Test replacement of territoryAlias
|
||||
// 554 has one replacement
|
||||
|
@ -5209,5 +5208,35 @@ public class ULocaleTest extends TestFmwk {
|
|||
Assert.assertEquals("uz-Cyrl-UZ-5678-u-nu-latn", canonicalTag("uz-Cyrl-172-5678-u-nu-latn"));
|
||||
// a language not used in this region
|
||||
Assert.assertEquals("fr-RU", canonicalTag("fr-172"));
|
||||
|
||||
Assert.assertEquals("ja-Latn-alalc97", canonicalTag("ja-Latn-hepburn-heploc"));
|
||||
|
||||
Assert.assertEquals("aaa-Fooo-RU", canonicalTag("aaa-Fooo-SU"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestLocaleCanonicalizationFromFile() throws IOException {
|
||||
BufferedReader testFile = TestUtil.getDataReader("unicode/localeCanonicalization.txt");
|
||||
try {
|
||||
String line;
|
||||
while ((line = testFile.readLine()) != null) {
|
||||
if (line.startsWith("#")) {
|
||||
// ignore any lines start with #
|
||||
continue;
|
||||
}
|
||||
String[] fields = line.split("\t;\t");
|
||||
if (fields.length != 2) {
|
||||
// ignore any lines without TAB ; TAB
|
||||
continue;
|
||||
}
|
||||
String from = fields[0].replace("_", "-");
|
||||
String to = fields[1].replace("_", "-");
|
||||
Assert.assertEquals("canonicalTag(" + from + ")",
|
||||
to, canonicalTag(from));
|
||||
}
|
||||
} finally {
|
||||
testFile.close();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue