mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 05:55:35 +00:00
parent
35e950d35f
commit
1aa229daff
4 changed files with 201 additions and 8 deletions
|
@ -627,6 +627,17 @@ private:
|
|||
LocalMemory<const char*>& types,
|
||||
LocalMemory<int32_t>& replacementIndexes,
|
||||
int32_t &length, UErrorCode &status);
|
||||
|
||||
// Read the subdivisionAlias data from alias to
|
||||
// strings+types+replacementIndexes
|
||||
// Allocate length items for types, to store the type field.
|
||||
// Allocate length items for replacementIndexes,
|
||||
// to store the index in the strings for the replacement variant.
|
||||
void readSubdivisionAlias(UResourceBundle* alias,
|
||||
UniqueCharStrings* strings,
|
||||
LocalMemory<const char*>& types,
|
||||
LocalMemory<int32_t>& replacementIndexes,
|
||||
int32_t &length, UErrorCode &status);
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -647,6 +658,7 @@ public:
|
|||
const CharStringMap& scriptMap() const { return script; }
|
||||
const CharStringMap& territoryMap() const { return territory; }
|
||||
const CharStringMap& variantMap() const { return variant; }
|
||||
const CharStringMap& subdivisionMap() const { return subdivision; }
|
||||
|
||||
static void U_CALLCONV loadData(UErrorCode &status);
|
||||
static UBool U_CALLCONV cleanup();
|
||||
|
@ -658,11 +670,13 @@ private:
|
|||
CharStringMap scriptMap,
|
||||
CharStringMap territoryMap,
|
||||
CharStringMap variantMap,
|
||||
CharStringMap subdivisionMap,
|
||||
CharString* strings)
|
||||
: language(std::move(languageMap)),
|
||||
script(std::move(scriptMap)),
|
||||
territory(std::move(territoryMap)),
|
||||
variant(std::move(variantMap)),
|
||||
subdivision(std::move(subdivisionMap)),
|
||||
strings(strings) {
|
||||
}
|
||||
|
||||
|
@ -676,6 +690,7 @@ private:
|
|||
CharStringMap script;
|
||||
CharStringMap territory;
|
||||
CharStringMap variant;
|
||||
CharStringMap subdivision;
|
||||
CharString* strings;
|
||||
|
||||
friend class AliasDataBuilder;
|
||||
|
@ -866,6 +881,34 @@ AliasDataBuilder::readVariantAlias(
|
|||
status);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the subdivisionAlias data from alias to strings+types+replacementIndexes.
|
||||
* Allocate length items for types, to store the type field. Allocate length
|
||||
* items for replacementIndexes, to store the index in the strings for the
|
||||
* replacement regions.
|
||||
*/
|
||||
void
|
||||
AliasDataBuilder::readSubdivisionAlias(
|
||||
UResourceBundle* alias,
|
||||
UniqueCharStrings* strings,
|
||||
LocalMemory<const char*>& types,
|
||||
LocalMemory<int32_t>& replacementIndexes,
|
||||
int32_t &length,
|
||||
UErrorCode &status)
|
||||
{
|
||||
return readAlias(
|
||||
alias, strings, types, replacementIndexes, length,
|
||||
#if U_DEBUG
|
||||
[](const char* type) {
|
||||
U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8);
|
||||
},
|
||||
#else
|
||||
[](const char*) {},
|
||||
#endif
|
||||
[](const UnicodeString&) { },
|
||||
status);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the alias data from the ICU resource bundles. The alias data
|
||||
* contains alias of language, country, script and variants.
|
||||
|
@ -905,12 +948,14 @@ AliasDataBuilder::build(UErrorCode &status) {
|
|||
ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status));
|
||||
LocalUResourceBundlePointer variantAlias(
|
||||
ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status));
|
||||
LocalUResourceBundlePointer subdivisionAlias(
|
||||
ures_getByKey(metadataAlias.getAlias(), "subdivision", nullptr, &status));
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0,
|
||||
variantLength = 0;
|
||||
variantLength = 0, subdivisionLength = 0;
|
||||
|
||||
// Read the languageAlias into languageTypes, languageReplacementIndexes
|
||||
// and strings
|
||||
|
@ -955,6 +1000,16 @@ AliasDataBuilder::build(UErrorCode &status) {
|
|||
variantReplacementIndexes,
|
||||
variantLength, status);
|
||||
|
||||
// Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes
|
||||
// and strings
|
||||
LocalMemory<const char*> subdivisionTypes;
|
||||
LocalMemory<int32_t> subdivisionReplacementIndexes;
|
||||
readSubdivisionAlias(subdivisionAlias.getAlias(),
|
||||
&strings,
|
||||
subdivisionTypes,
|
||||
subdivisionReplacementIndexes,
|
||||
subdivisionLength, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -994,6 +1049,14 @@ AliasDataBuilder::build(UErrorCode &status) {
|
|||
status);
|
||||
}
|
||||
|
||||
// Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes.
|
||||
CharStringMap subdivisionMap(2, status);
|
||||
for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) {
|
||||
subdivisionMap.put(subdivisionTypes[i],
|
||||
strings.get(subdivisionReplacementIndexes[i]),
|
||||
status);
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -1004,6 +1067,7 @@ AliasDataBuilder::build(UErrorCode &status) {
|
|||
std::move(scriptMap),
|
||||
std::move(territoryMap),
|
||||
std::move(variantMap),
|
||||
std::move(subdivisionMap),
|
||||
strings.orphanCharStrings());
|
||||
|
||||
if (data == nullptr) {
|
||||
|
@ -1105,6 +1169,9 @@ private:
|
|||
|
||||
// Replace by using variantAlias.
|
||||
bool replaceVariant(UErrorCode& status);
|
||||
|
||||
// Replace by using subdivisionAlias.
|
||||
bool replaceSubdivision(CharString& subdivision, UErrorCode& status);
|
||||
};
|
||||
|
||||
CharString&
|
||||
|
@ -1433,6 +1500,27 @@ AliasReplacer::replaceVariant(UErrorCode& status)
|
|||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
AliasReplacer::replaceSubdivision(CharString& subdivision, UErrorCode& status)
|
||||
{
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
}
|
||||
const char *replacement = data->subdivisionMap().get(subdivision.data());
|
||||
if (replacement != nullptr) {
|
||||
const char* firstSpace = uprv_strchr(replacement, ' ');
|
||||
// Found replacement data for this subdivision.
|
||||
size_t len = (firstSpace != nullptr) ?
|
||||
(firstSpace - replacement) : uprv_strlen(replacement);
|
||||
// Ignore len == 2, see CLDR-14312
|
||||
if (3 <= len && len <= 8) {
|
||||
subdivision.clear().append(replacement, (int32_t)len, status);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
CharString&
|
||||
AliasReplacer::outputToString(
|
||||
CharString& out, UErrorCode status)
|
||||
|
@ -1495,7 +1583,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status)
|
|||
region = nullptr;
|
||||
}
|
||||
const char* variantsStr = locale.getVariant();
|
||||
const char* extensionsStr = locale_getKeywordsStart(locale.getName());
|
||||
CharString variantsBuff(variantsStr, -1, status);
|
||||
if (!variantsBuff.isEmpty()) {
|
||||
if (U_FAILURE(status)) { return false; }
|
||||
|
@ -1559,11 +1646,42 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status)
|
|||
if (U_FAILURE(status)) { return false; }
|
||||
// Nothing changed and we know the order of the vaiants are not change
|
||||
// because we have no variant or only one.
|
||||
if (changed == 0 && variants.size() <= 1) {
|
||||
const char* extensionsStr = locale_getKeywordsStart(locale.getName());
|
||||
if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) {
|
||||
return false;
|
||||
}
|
||||
outputToString(out, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
}
|
||||
if (extensionsStr != nullptr) {
|
||||
changed = 0;
|
||||
Locale temp(locale);
|
||||
LocalPointer<icu::StringEnumeration> iter(locale.createKeywords(status));
|
||||
if (U_SUCCESS(status) && !iter.isNull()) {
|
||||
const char* key;
|
||||
while ((key = iter->next(nullptr, status)) != nullptr) {
|
||||
if (uprv_strcmp("sd", key) == 0 || uprv_strcmp("rg", key) == 0) {
|
||||
CharString value;
|
||||
CharStringByteSink valueSink(&value);
|
||||
locale.getKeywordValue(key, valueSink, status);
|
||||
if (U_FAILURE(status)) {
|
||||
status = U_ZERO_ERROR;
|
||||
continue;
|
||||
}
|
||||
if (replaceSubdivision(value, status)) {
|
||||
changed++;
|
||||
}
|
||||
temp.setKeywordValue(key, value.data(), status);
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (changed != 0) {
|
||||
extensionsStr = locale_getKeywordsStart(temp.getName());
|
||||
}
|
||||
out.append(extensionsStr, status);
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
|
@ -1572,7 +1690,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status)
|
|||
// If the tag is not changed, return.
|
||||
if (uprv_strcmp(out.data(), locale.getName()) == 0) {
|
||||
U_ASSERT(changed == 0);
|
||||
U_ASSERT(variants.size() > 1);
|
||||
out.clear();
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -4916,8 +4916,24 @@ void LocaleTest::TestCanonicalize(void)
|
|||
// ICU-21344
|
||||
{ "ku-Arab-NT", "ku-Arab-IQ"},
|
||||
|
||||
// ICU-21402
|
||||
{ "und-u-rg-no23", "und-u-rg-no50"},
|
||||
{ "und-u-rg-cn11", "und-u-rg-cnbj"},
|
||||
{ "und-u-rg-cz10a", "und-u-rg-cz110"},
|
||||
{ "und-u-rg-fra", "und-u-rg-frges"},
|
||||
{ "und-u-rg-frg", "und-u-rg-frges"},
|
||||
{ "und-u-rg-lud", "und-u-rg-lucl"},
|
||||
|
||||
{ "und-NO-u-sd-no23", "und-NO-u-sd-no50"},
|
||||
{ "und-CN-u-sd-cn11", "und-CN-u-sd-cnbj"},
|
||||
{ "und-CZ-u-sd-cz10a", "und-CZ-u-sd-cz110"},
|
||||
{ "und-FR-u-sd-fra", "und-FR-u-sd-frges"},
|
||||
{ "und-FR-u-sd-frg", "und-FR-u-sd-frges"},
|
||||
{ "und-LU-u-sd-lud", "und-LU-u-sd-lucl"},
|
||||
|
||||
// ICU-21401
|
||||
{ "cel-gaulish", "xtg"},
|
||||
|
||||
};
|
||||
int32_t i;
|
||||
for (i=0; i < UPRV_LENGTHOF(testCases); i++) {
|
||||
|
|
|
@ -1268,12 +1268,33 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
// Nothing changed in this iteration, break out the loop
|
||||
break;
|
||||
} // while(1)
|
||||
if (changed) {
|
||||
String result = lscvToID(language, script, region,
|
||||
if (extensions == null && !changed) {
|
||||
return null;
|
||||
}
|
||||
String result = lscvToID(language, script, region,
|
||||
((variants == null) ? "" : Utility.joinStrings("_", variants)));
|
||||
if (extensions != null) {
|
||||
result += extensions;
|
||||
if (extensions != null) {
|
||||
boolean keywordChanged = false;
|
||||
ULocale temp = new ULocale(result + extensions);
|
||||
Iterator<String> keywords = temp.getKeywords();
|
||||
while (keywords != null && keywords.hasNext()) {
|
||||
String key = keywords.next();
|
||||
if (key.equals("rg") || key.equals("sd")) {
|
||||
String value = temp.getKeywordValue(key);
|
||||
String replacement = replaceSubdivision(value);
|
||||
if (replacement != null) {
|
||||
temp = temp.setKeywordValue(key, replacement);
|
||||
keywordChanged = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (keywordChanged) {
|
||||
extensions = temp.getName().substring(temp.getBaseName().length());
|
||||
changed = true;
|
||||
}
|
||||
result += extensions;
|
||||
}
|
||||
if (changed) {
|
||||
return result;
|
||||
}
|
||||
// Nothing changed in any iteration of the loop.
|
||||
|
@ -1285,6 +1306,7 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
private static Map<String, String> scriptAliasMap = null;
|
||||
private static Map<String, List<String>> territoryAliasMap = null;
|
||||
private static Map<String, String> variantAliasMap = null;
|
||||
private static Map<String, String> subdivisionAliasMap = null;
|
||||
|
||||
/*
|
||||
* Initializes the alias data from the ICU resource bundles. The alias
|
||||
|
@ -1302,6 +1324,7 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
scriptAliasMap = new HashMap<>();
|
||||
territoryAliasMap = new HashMap<>();
|
||||
variantAliasMap = new HashMap<>();
|
||||
subdivisionAliasMap = new HashMap<>();
|
||||
|
||||
UResourceBundle metadata = UResourceBundle.getBundleInstance(
|
||||
ICUData.ICU_BASE_NAME, "metadata",
|
||||
|
@ -1311,6 +1334,7 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
UResourceBundle scriptAlias = metadataAlias.get("script");
|
||||
UResourceBundle territoryAlias = metadataAlias.get("territory");
|
||||
UResourceBundle variantAlias = metadataAlias.get("variant");
|
||||
UResourceBundle subdivisionAlias = metadataAlias.get("subdivision");
|
||||
|
||||
for (int i = 0 ; i < languageAlias.getSize(); i++) {
|
||||
UResourceBundle res = languageAlias.get(i);
|
||||
|
@ -1369,6 +1393,22 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
}
|
||||
variantAliasMap.put(aliasFrom, aliasTo);
|
||||
}
|
||||
for (int i = 0 ; i < subdivisionAlias.getSize(); i++) {
|
||||
UResourceBundle res = subdivisionAlias.get(i);
|
||||
String aliasFrom = res.getKey();
|
||||
String aliasTo = res.get("replacement").getString().split(" ")[0];
|
||||
if (aliasFrom.length() < 3 || aliasFrom.length() > 8) {
|
||||
throw new IllegalArgumentException(
|
||||
"Incorrect key [" + aliasFrom + "] in alias:territory.");
|
||||
}
|
||||
if (aliasTo.length() < 3 || aliasTo.length() > 8) {
|
||||
// Ignore replacement < 3 for now. see CLDR-14312
|
||||
// throw new IllegalArgumentException(
|
||||
// "Incorrect value [" + aliasTo + "] in alias:subdivision.");
|
||||
continue;
|
||||
}
|
||||
subdivisionAliasMap.put(aliasFrom, aliasTo);
|
||||
}
|
||||
|
||||
aliasDataIsLoaded = true;
|
||||
}
|
||||
|
@ -1591,6 +1631,11 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
|
|||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private String replaceSubdivision(String subdivision) {
|
||||
return subdivisionAliasMap.get(subdivision);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -5216,6 +5216,21 @@ public class ULocaleTest extends TestFmwk {
|
|||
// ICU-21344
|
||||
Assert.assertEquals("ku-Arab-IQ", canonicalTag("ku-Arab-NT"));
|
||||
|
||||
// ICU-21402
|
||||
Assert.assertEquals("und-u-rg-no50", canonicalTag("und-u-rg-no23"));
|
||||
Assert.assertEquals("und-u-rg-cnbj", canonicalTag("und-u-rg-cn11"));
|
||||
Assert.assertEquals("und-u-rg-cz110", canonicalTag("und-u-rg-cz10a"));
|
||||
Assert.assertEquals("und-u-rg-frges", canonicalTag("und-u-rg-fra"));
|
||||
Assert.assertEquals("und-u-rg-frges", canonicalTag("und-u-rg-frg"));
|
||||
Assert.assertEquals("und-u-rg-lucl", canonicalTag("und-u-rg-lud"));
|
||||
|
||||
Assert.assertEquals("und-NO-u-sd-no50", canonicalTag("und-NO-u-sd-no23"));
|
||||
Assert.assertEquals("und-CN-u-sd-cnbj", canonicalTag("und-CN-u-sd-cn11"));
|
||||
Assert.assertEquals("und-CZ-u-sd-cz110", canonicalTag("und-CZ-u-sd-cz10a"));
|
||||
Assert.assertEquals("und-FR-u-sd-frges", canonicalTag("und-FR-u-sd-fra"));
|
||||
Assert.assertEquals("und-FR-u-sd-frges", canonicalTag("und-FR-u-sd-frg"));
|
||||
Assert.assertEquals("und-LU-u-sd-lucl", canonicalTag("und-LU-u-sd-lud"));
|
||||
|
||||
// ICU-21401
|
||||
Assert.assertEquals("xtg", canonicalTag("cel-gaulish"));
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue