ICU-8951 Legacy/BCP 47 keyword conversion APIs merged into ICU4J trunk.

X-SVN-Rev: 36261
This commit is contained in:
Yoshito Umaoka 2014-08-28 01:27:49 +00:00
parent f2dfa7422e
commit 0ffd26301d
5 changed files with 818 additions and 163 deletions

1
.gitattributes vendored
View file

@ -268,6 +268,7 @@ icu4j/main/classes/core/.settings/org.eclipse.core.resources.prefs -text
icu4j/main/classes/core/.settings/org.eclipse.jdt.core.prefs -text
icu4j/main/classes/core/manifest.stub -text
icu4j/main/classes/core/src/com/ibm/icu/impl/TZDBTimeZoneNames.java -text
icu4j/main/classes/core/src/com/ibm/icu/impl/locale/KeyTypeData.java -text
icu4j/main/classes/currdata/.externalToolBuilders/copy-data-currdata.launch -text
icu4j/main/classes/currdata/.settings/org.eclipse.core.resources.prefs -text
icu4j/main/classes/currdata/.settings/org.eclipse.jdt.core.prefs -text

View file

@ -0,0 +1,542 @@
/*
*******************************************************************************
* Copyright (C) 2014, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
package com.ibm.icu.impl.locale;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.MissingResourceException;
import java.util.Set;
import java.util.regex.Pattern;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.UResourceBundle;
import com.ibm.icu.util.UResourceBundleIterator;
/**
*/
public class KeyTypeData {
private static abstract class SpecialTypeHandler {
abstract boolean isValid(String value);
String canonicalize(String value) {
return AsciiUtil.toLowerString(value);
}
}
private static class CodepointsTypeHandler extends SpecialTypeHandler {
private static final Pattern pat = Pattern.compile("[0-9a-fA-F]{4,6}(-[0-9a-fA-F]{4,6})*");
boolean isValid(String value) {
return pat.matcher(value).matches();
}
}
private static class ReorderCodeTypeHandler extends SpecialTypeHandler {
private static final Pattern pat = Pattern.compile("[a-zA-Z]{3,8}(-[a-zA-Z]{3,8})*");
boolean isValid(String value) {
return pat.matcher(value).matches();
}
}
private enum SpecialType {
CODEPOINTS(new CodepointsTypeHandler()),
REORDER_CODE(new ReorderCodeTypeHandler());
SpecialTypeHandler handler;
SpecialType(SpecialTypeHandler handler) {
this.handler = handler;
}
};
private static class KeyData {
String legacyId;
String bcpId;
Map<String, Type> typeMap;
EnumSet<SpecialType> specialTypes;
KeyData(String legacyId, String bcpId, Map<String, Type> typeMap,
EnumSet<SpecialType> specialTypes) {
this.legacyId = legacyId;
this.bcpId = bcpId;
this.typeMap = typeMap;
this.specialTypes = specialTypes;
}
}
private static class Type {
String legacyId;
String bcpId;
Type(String legacyId, String bcpId) {
this.legacyId = legacyId;
this.bcpId = bcpId;
}
}
public static String toBcpKey(String key) {
key = AsciiUtil.toLowerString(key);
KeyData keyData = KEYMAP.get(key);
if (keyData != null) {
return keyData.bcpId;
}
return null;
}
public static String toLegacyKey(String key) {
key = AsciiUtil.toLowerString(key);
KeyData keyData = KEYMAP.get(key);
if (keyData != null) {
return keyData.legacyId;
}
return null;
}
public static String toBcpType(String key, String type,
Output<Boolean> isKnownKey, Output<Boolean> isSpecialType) {
if (isKnownKey != null) {
isKnownKey.value = false;
}
if (isSpecialType != null) {
isSpecialType.value = false;
}
key = AsciiUtil.toLowerString(key);
type = AsciiUtil.toLowerString(type);
KeyData keyData = KEYMAP.get(key);
if (keyData != null) {
if (isKnownKey != null) {
isKnownKey.value = Boolean.TRUE;
}
Type t = keyData.typeMap.get(type);
if (t != null) {
return t.bcpId;
}
if (keyData.specialTypes != null) {
for (SpecialType st : keyData.specialTypes) {
if (st.handler.isValid(type)) {
if (isSpecialType != null) {
isSpecialType.value = true;
}
return st.handler.canonicalize(type);
}
}
}
}
return null;
}
public static String toLegacyType(String key, String type,
Output<Boolean> isKnownKey, Output<Boolean> isSpecialType) {
if (isKnownKey != null) {
isKnownKey.value = false;
}
if (isSpecialType != null) {
isSpecialType.value = false;
}
key = AsciiUtil.toLowerString(key);
type = AsciiUtil.toLowerString(type);
KeyData keyData = KEYMAP.get(key);
if (keyData != null) {
if (isKnownKey != null) {
isKnownKey.value = Boolean.TRUE;
}
Type t = keyData.typeMap.get(type);
if (t != null) {
return t.legacyId;
}
if (keyData.specialTypes != null) {
for (SpecialType st : keyData.specialTypes) {
if (st.handler.isValid(type)) {
if (isSpecialType != null) {
isSpecialType.value = true;
}
return st.handler.canonicalize(type);
}
}
}
}
return null;
}
private static void initFromResourceBundle() {
UResourceBundle keyTypeDataRes = UResourceBundle.getBundleInstance(
ICUResourceBundle.ICU_BASE_NAME,
"keyTypeData",
ICUResourceBundle.ICU_DATA_CLASS_LOADER);
UResourceBundle keyMapRes = keyTypeDataRes.get("keyMap");
UResourceBundle typeMapRes = keyTypeDataRes.get("typeMap");
// alias data is optional
UResourceBundle typeAliasRes = null;
UResourceBundle bcpTypeAliasRes = null;
try {
typeAliasRes = keyTypeDataRes.get("typeAlias");
} catch (MissingResourceException e) {
// fall through
}
try {
bcpTypeAliasRes = keyTypeDataRes.get("bcpTypeAlias");
} catch (MissingResourceException e) {
// fall through
}
// iterate through keyMap resource
UResourceBundleIterator keyMapItr = keyMapRes.getIterator();
while (keyMapItr.hasNext()) {
UResourceBundle keyMapEntry = keyMapItr.next();
String legacyKeyId = keyMapEntry.getKey();
String bcpKeyId = keyMapEntry.getString();
boolean hasSameKey = false;
if (bcpKeyId.length() == 0) {
// Empty value indicates that BCP key is same with the legacy key.
bcpKeyId = legacyKeyId;
hasSameKey = true;
}
boolean isTZ = legacyKeyId.equals("timezone");
// reverse type alias map
Map<String, Set<String>> typeAliasMap = null;
if (typeAliasRes != null) {
UResourceBundle typeAliasResByKey = null;
try {
typeAliasResByKey = typeAliasRes.get(legacyKeyId);
} catch (MissingResourceException e) {
// fall through
}
if (typeAliasResByKey != null) {
typeAliasMap = new HashMap<String, Set<String>>();
UResourceBundleIterator typeAliasResItr = typeAliasResByKey.getIterator();
while (typeAliasResItr.hasNext()) {
UResourceBundle typeAliasDataEntry = typeAliasResItr.next();
String from = typeAliasDataEntry.getKey();
String to = typeAliasDataEntry.getString();
if (isTZ) {
from = from.replace(':', '/');
}
Set<String> aliasSet = typeAliasMap.get(to);
if (aliasSet == null) {
aliasSet = new HashSet<String>();
typeAliasMap.put(to, aliasSet);
}
aliasSet.add(from);
}
}
}
// reverse bcp type alias map
Map<String, Set<String>> bcpTypeAliasMap = null;
if (bcpTypeAliasRes != null) {
UResourceBundle bcpTypeAliasResByKey = null;
try {
bcpTypeAliasResByKey = bcpTypeAliasRes.get(bcpKeyId);
} catch (MissingResourceException e) {
// fall through
}
if (bcpTypeAliasResByKey != null) {
bcpTypeAliasMap = new HashMap<String, Set<String>>();
UResourceBundleIterator bcpTypeAliasResItr = bcpTypeAliasResByKey.getIterator();
while (bcpTypeAliasResItr.hasNext()) {
UResourceBundle bcpTypeAliasDataEntry = bcpTypeAliasResItr.next();
String from = bcpTypeAliasDataEntry.getKey();
String to = bcpTypeAliasDataEntry.getString();
Set<String> aliasSet = bcpTypeAliasMap.get(to);
if (aliasSet == null) {
aliasSet = new HashSet<String>();
bcpTypeAliasMap.put(to, aliasSet);
}
aliasSet.add(from);
}
}
}
Map<String, Type> typeDataMap = new HashMap<String, Type>();
Set<SpecialType> specialTypeSet = null;
// look up type map for the key, and walk through the mapping data
UResourceBundle typeMapResByKey = null;
try {
typeMapResByKey = typeMapRes.get(legacyKeyId);
} catch (MissingResourceException e) {
// type map for each key must exist
assert false;
}
if (typeMapResByKey != null) {
UResourceBundleIterator typeMapResByKeyItr = typeMapResByKey.getIterator();
while (typeMapResByKeyItr.hasNext()) {
UResourceBundle typeMapEntry = typeMapResByKeyItr.next();
String legacyTypeId = typeMapEntry.getKey();
// special types
boolean isSpecialType = false;
for (SpecialType st : SpecialType.values()) {
if (legacyTypeId.equals(st.toString())) {
isSpecialType = true;
if (specialTypeSet == null) {
specialTypeSet = new HashSet<SpecialType>();
}
specialTypeSet.add(st);
break;
}
}
if (isSpecialType) {
continue;
}
if (isTZ) {
// a timezone key uses a colon instead of a slash in the resource.
// e.g. America:Los_Angeles
legacyTypeId = legacyTypeId.replace(':', '/');
}
String bcpTypeId = typeMapEntry.getString();
boolean hasSameType = false;
if (bcpTypeId.length() == 0) {
// Empty value indicates that BCP type is same with the legacy type.
bcpTypeId = legacyTypeId;
hasSameType = true;
}
// Note: legacy type value should never be
// equivalent to bcp type value of a different
// type under the same key. So we use a single
// map for lookup.
Type t = new Type(legacyTypeId, bcpTypeId);
typeDataMap.put(AsciiUtil.toLowerString(legacyTypeId), t);
if (!hasSameType) {
typeDataMap.put(AsciiUtil.toLowerString(bcpTypeId), t);
}
// Also put aliases in the map
if (typeAliasMap != null) {
Set<String> typeAliasSet = typeAliasMap.get(legacyTypeId);
if (typeAliasSet != null) {
for (String alias : typeAliasSet) {
typeDataMap.put(AsciiUtil.toLowerString(alias), t);
}
}
}
if (bcpTypeAliasMap != null) {
Set<String> bcpTypeAliasSet = bcpTypeAliasMap.get(bcpTypeId);
if (bcpTypeAliasSet != null) {
for (String alias : bcpTypeAliasSet) {
typeDataMap.put(AsciiUtil.toLowerString(alias), t);
}
}
}
}
}
EnumSet<SpecialType> specialTypes = null;
if (specialTypeSet != null) {
specialTypes = EnumSet.copyOf(specialTypeSet);
}
KeyData keyData = new KeyData(legacyKeyId, bcpKeyId, typeDataMap, specialTypes);
KEYMAP.put(AsciiUtil.toLowerString(legacyKeyId), keyData);
if (!hasSameKey) {
KEYMAP.put(AsciiUtil.toLowerString(bcpKeyId), keyData);
}
}
}
//
// Note: The key-type data is currently read from ICU resource bundle keyTypeData.res.
// In future, we may import the data into code like below directly from CLDR to
// avoid cyclic dependency between ULocale and UResourceBundle. For now, the code
// below is just for proof of concept, and commented out.
//
// private static final String[][] TYPE_DATA_CA = {
// // {<legacy type>, <bcp type - if different>},
// {"buddhist", null},
// {"chinese", null},
// {"coptic", null},
// {"dangi", null},
// {"ethiopic", null},
// {"ethiopic-amete-alem", "ethioaa"},
// {"gregorian", "gregory"},
// {"hebrew", null},
// {"indian", null},
// {"islamic", null},
// {"islamic-civil", null},
// {"islamic-rgsa", null},
// {"islamic-tbla", null},
// {"islamic-umalqura", null},
// {"iso8601", null},
// {"japanese", null},
// {"persian", null},
// {"roc", null},
// };
//
// private static final String[][] TYPE_DATA_KS = {
// // {<legacy type>, <bcp type - if different>},
// {"identical", "identic"},
// {"primary", "level1"},
// {"quaternary", "level4"},
// {"secondary", "level2"},
// {"tertiary", "level3"},
// };
//
// private static final String[][] TYPE_ALIAS_KS = {
// // {<legacy alias>, <legacy canonical>},
// {"quarternary", "quaternary"},
// };
//
// private static final String[][] BCP_TYPE_ALIAS_CA = {
// // {<bcp deprecated>, <bcp preferred>
// {"islamicc", "islamic-civil"},
// };
//
// private static final Object[][] KEY_DATA = {
// // {<legacy key>, <bcp key - if different>, <type map>, <type alias>, <bcp type alias>},
// {"calendar", "ca", TYPE_DATA_CA, null, BCP_TYPE_ALIAS_CA},
// {"colstrength", "ks", TYPE_DATA_KS, TYPE_ALIAS_KS, null},
// };
private static final Object[][] KEY_DATA = {};
@SuppressWarnings("unused")
private static void initFromTables() {
for (Object[] keyDataEntry : KEY_DATA) {
String legacyKeyId = (String)keyDataEntry[0];
String bcpKeyId = (String)keyDataEntry[1];
String[][] typeData = (String[][])keyDataEntry[2];
String[][] typeAliasData = (String[][])keyDataEntry[3];
String[][] bcpTypeAliasData = (String[][])keyDataEntry[4];
boolean hasSameKey = false;
if (bcpKeyId == null) {
bcpKeyId = legacyKeyId;
hasSameKey = true;
}
// reverse type alias map
Map<String, Set<String>> typeAliasMap = null;
if (typeAliasData != null) {
typeAliasMap = new HashMap<String, Set<String>>();
for (String[] typeAliasDataEntry : typeAliasData) {
String from = typeAliasDataEntry[0];
String to = typeAliasDataEntry[1];
Set<String> aliasSet = typeAliasMap.get(to);
if (aliasSet == null) {
aliasSet = new HashSet<String>();
typeAliasMap.put(to, aliasSet);
}
aliasSet.add(from);
}
}
// BCP type alias map data
Map<String, Set<String>> bcpTypeAliasMap = null;
if (bcpTypeAliasData != null) {
bcpTypeAliasMap = new HashMap<String, Set<String>>();
for (String[] bcpTypeAliasDataEntry : bcpTypeAliasData) {
String from = bcpTypeAliasDataEntry[0];
String to = bcpTypeAliasDataEntry[1];
Set<String> aliasSet = bcpTypeAliasMap.get(to);
if (aliasSet == null) {
aliasSet = new HashSet<String>();
bcpTypeAliasMap.put(to, aliasSet);
}
aliasSet.add(from);
}
}
// Type map data
assert typeData != null;
Map<String, Type> typeDataMap = new HashMap<String, Type>();
Set<SpecialType> specialTypeSet = null;
for (String[] typeDataEntry : typeData) {
String legacyTypeId = typeDataEntry[0];
String bcpTypeId = typeDataEntry[1];
// special types
boolean isSpecialType = false;
for (SpecialType st : SpecialType.values()) {
if (legacyTypeId.equals(st.toString())) {
isSpecialType = true;
if (specialTypeSet == null) {
specialTypeSet = new HashSet<SpecialType>();
}
specialTypeSet.add(st);
break;
}
}
if (isSpecialType) {
continue;
}
boolean hasSameType = false;
if (bcpTypeId == null) {
bcpTypeId = legacyTypeId;
hasSameType = true;
}
// Note: legacy type value should never be
// equivalent to bcp type value of a different
// type under the same key. So we use a single
// map for lookup.
Type t = new Type(legacyTypeId, bcpTypeId);
typeDataMap.put(AsciiUtil.toLowerString(legacyTypeId), t);
if (!hasSameType) {
typeDataMap.put(AsciiUtil.toLowerString(bcpTypeId), t);
}
// Also put aliases in the index
Set<String> typeAliasSet = typeAliasMap.get(legacyTypeId);
if (typeAliasSet != null) {
for (String alias : typeAliasSet) {
typeDataMap.put(AsciiUtil.toLowerString(alias), t);
}
}
Set<String> bcpTypeAliasSet = bcpTypeAliasMap.get(bcpTypeId);
if (bcpTypeAliasSet != null) {
for (String alias : bcpTypeAliasSet) {
typeDataMap.put(AsciiUtil.toLowerString(alias), t);
}
}
}
EnumSet<SpecialType> specialTypes = null;
if (specialTypeSet != null) {
specialTypes = EnumSet.copyOf(specialTypeSet);
}
KeyData keyData = new KeyData(legacyKeyId, bcpKeyId, typeDataMap, specialTypes);
KEYMAP.put(AsciiUtil.toLowerString(legacyKeyId), keyData);
if (!hasSameKey) {
KEYMAP.put(AsciiUtil.toLowerString(bcpKeyId), keyData);
}
}
}
private static final Map<String, KeyData> KEYMAP;
static {
KEYMAP = new HashMap<String, KeyData>();
// initFromTables();
initFromResourceBundle();
}
}

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2009-2010, International Business Machines Corporation and *
* Copyright (C) 2009-2014, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
@ -99,4 +99,23 @@ public class UnicodeLocaleExtension extends Extension {
// 3*8alphanum
return (s.length() >= 3) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s);
}
public static boolean isType(String s) {
// sequence of type subtags delimited by '-'
int startIdx = 0;
boolean sawSubtag = false;
while (true) {
int idx = s.indexOf(LanguageTag.SEP, startIdx);
String subtag = idx < 0 ? s.substring(startIdx) : s.substring(startIdx, idx);
if (!isTypeSubtag(subtag)) {
return false;
}
sawSubtag = true;
if (idx < 0) {
break;
}
startIdx = idx + 1;
}
return sawSubtag && startIdx < s.length();
}
}

View file

@ -40,6 +40,7 @@ import com.ibm.icu.impl.locale.LocaleExtensions;
import com.ibm.icu.impl.locale.LocaleSyntaxException;
import com.ibm.icu.impl.locale.ParseStatus;
import com.ibm.icu.impl.locale.UnicodeLocaleExtension;
import com.ibm.icu.impl.locale.KeyTypeData;
import com.ibm.icu.text.LocaleDisplayNames;
import com.ibm.icu.text.LocaleDisplayNames.DialectHandling;
@ -3218,6 +3219,149 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
return getInstance(bldr.getBaseLocale(), bldr.getLocaleExtensions());
}
/**
* Converts the specified keyword (legacy key, or BCP 47 Unicode locale
* extension key) to the equivalent BCP 47 Unicode locale extension key.
* For example, BCP 47 Unicode locale extension key "co" is returned for
* the input keyword "collation".
* <p>
* When the specified keyword is unknown, but satisfies the BCP syntax,
* then the lower-case version of the input keyword will be returned.
* For example,
* <code>toUnicodeLocaleKey("ZZ")</code> returns "zz".
*
* @param keyword the input locale keyword (either legacy key
* such as "collation" or BCP 47 Unicode locale extension
* key such as "co").
* @return the well-formed BCP 47 Unicode locale extension key,
* or null if the specified locale keyword cannot be mapped
* to a well-formed BCP 47 Unicode locale extension key.
* @see #toLegacyKey(String)
* @draft ICU 54
* @provisional This API might change or be removed in a future release.
*/
public static String toUnicodeLocaleKey(String keyword) {
String uniLocKey = KeyTypeData.toBcpKey(keyword);
if (uniLocKey == null && UnicodeLocaleExtension.isKey(keyword)) {
// unknown keyword, but syntax is fine..
uniLocKey = AsciiUtil.toLowerString(keyword);
}
return uniLocKey;
}
/**
* Converts the specified keyword value (legacy type, or BCP 47
* Unicode locale extension type) to the well-formed BCP 47 Unicode locale
* extension type for the specified keyword (category). For example, BCP 47
* Unicode locale extension type "phonebk" is returned for the input
* keyword value "phonebook", with the keyword "collation" (or "co").
* <p>
* When the specified keyword is not recognized, but the specified value
* satisfies the syntax of the BCP 47 Unicode locale extension type,
* or when the specified keyword allows 'variable' type and the specified
* value satisfies the syntax, the lower-case version of the input value
* will be returned. For example,
* <code>toUnicodeLocaleType("Foo", "Bar")</code> returns "bar",
* <code>toUnicodeLocaleType("variableTop", "00A4")</code> returns "00a4".
*
* @param keyword the locale keyword (either legacy key such as
* "collation" or BCP 47 Unicode locale extension
* key such as "co").
* @param value the locale keyword value (either legacy type
* such as "phonebook" or BCP 47 Unicode locale extension
* type such as "phonebk").
* @return the well-formed BCP47 Unicode locale extension type,
* or null if the locale keyword value cannot be mapped to
* a well-formed BCP 47 Unicode locale extension type.
* @see #toLegacyType(String, String)
* @draft ICU 54
* @provisional This API might change or be removed in a future release.
*/
public static String toUnicodeLocaleType(String keyword, String value) {
String bcpType = KeyTypeData.toBcpType(keyword, value, null, null);
if (bcpType == null && UnicodeLocaleExtension.isType(value)) {
// unknown keyword, but syntax is fine..
bcpType = AsciiUtil.toLowerString(value);
}
return bcpType;
}
/**
* Converts the specified keyword (BCP 47 Unicode locale extension key, or
* legacy key) to the legacy key. For example, legacy key "collation" is
* returned for the input BCP 47 Unicode locale extension key "co".
*
* @param keyword the input locale keyword (either BCP 47 Unicode locale
* extension key or legacy key).
* @return the well-formed legacy key, or null if the specified
* keyword cannot be mapped to a well-formed legacy key.
* @see #toUnicodeLocaleKey(String)
* @draft ICU 54
* @provisional This API might change or be removed in a future release.
*/
public static String toLegacyKey(String keyword) {
String legacyKey = KeyTypeData.toLegacyKey(keyword);
if (legacyKey == null) {
// Checks if the specified locale key is well-formed with the legacy locale syntax.
//
// Note:
// Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
// However, a key should not contain '=' obviously. For now, all existing
// keys are using ASCII alphabetic letters only. We won't add any new key
// that is not compatible with the BCP 47 syntax. Therefore, we assume
// a valid key consist from [0-9a-zA-Z], no symbols.
if (keyword.matches("[0-9a-zA-Z]*")) {
legacyKey = AsciiUtil.toLowerString(keyword);
}
}
return legacyKey;
}
/**
* Converts the specified keyword value (BCP 47 Unicode locale extension type,
* or legacy type or type alias) to the canonical legacy type. For example,
* the legacy type "phonebook" is returned for the input BCP 47 Unicode
* locale extension type "phonebk" with the keyword "collation" (or "co").
* <p>
* When the specified keyword is not recognized, but the specified value
* satisfies the syntax of legacy key, or when the specified keyword
* allows 'variable' type and the specified value satisfies the syntax,
* the lower-case version of the input value will be returned.
* For example,
* <code>toLegacyType("Foo", "Bar")</code> returns "bar",
* <code>toLegacyType("vt", "00A4")</code> returns "00a4".
*
* @param keyword the locale keyword (either legacy keyword such as
* "collation" or BCP 47 Unicode locale extension
* key such as "co").
* @param value the locale keyword value (either BCP 47 Unicode locale
* extension type such as "phonebk" or legacy keyword value
* such as "phonebook").
* @return the well-formed legacy type, or null if the specified
* keyword value cannot be mapped to a well-formed legacy
* type.
* @see #toUnicodeLocaleType(String, String)
* @draft ICU 54
* @provisional This API might change or be removed in a future release.
*/
public static String toLegacyType(String keyword, String value) {
String legacyType = KeyTypeData.toLegacyType(keyword, value, null, null);
if (legacyType == null) {
// Checks if the specified locale type is well-formed with the legacy locale syntax.
//
// Note:
// Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
// However, a type should not contain '=' obviously. For now, all existing
// types are using ASCII alphabetic letters with a few symbol letters. We won't
// add any new type that is not compatible with the BCP 47 syntax except timezone
// IDs. For now, we assume a valid type start with [0-9a-zA-Z], but may contain
// '-' '_' '/' in the middle.
if (value.matches("[0-9a-zA-Z]+([_/\\-][0-9a-zA-Z]+)*")) {
legacyType = AsciiUtil.toLowerString(value);
}
}
return legacyType;
}
/**
* <code>Builder</code> is used to build instances of <code>ULocale</code>
@ -3591,8 +3735,8 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
for (String bcpKey : ukeys) {
String bcpType = uext.getUnicodeLocaleType(bcpKey);
// convert to legacy key/type
String lkey = bcp47ToLDMLKey(bcpKey);
String ltype = bcp47ToLDMLType(lkey, ((bcpType.length() == 0) ? "yes" : bcpType)); // use "yes" as the value of typeless keywords
String lkey = toLegacyKey(bcpKey);
String ltype = toLegacyType(bcpKey, ((bcpType.length() == 0) ? "yes" : bcpType)); // use "yes" as the value of typeless keywords
// special handling for u-va-posix, since this is a variant, not a keyword
if (lkey.equals("va") && ltype.equals("posix") && base.getVariant().length() == 0) {
id = id + "_POSIX";
@ -3675,8 +3819,8 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
}
}
} else if (key.length() >= 2) {
String bcpKey = ldmlKeyToBCP47(key);
String bcpType = ldmlTypeToBCP47(key, getKeywordValue(key));
String bcpKey = toUnicodeLocaleKey(key);
String bcpType = toUnicodeLocaleType(key, getKeywordValue(key));
if (bcpKey != null && bcpType != null) {
try {
intbld.setUnicodeLocaleKeyword(bcpKey, bcpType);
@ -3699,161 +3843,6 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
return extensions;
}
//
// LDML legacy/BCP47 key and type mapping functions
//
private static String ldmlKeyToBCP47(String key) {
UResourceBundle keyTypeData = UResourceBundle.getBundleInstance(
ICUResourceBundle.ICU_BASE_NAME,
"keyTypeData",
ICUResourceBundle.ICU_DATA_CLASS_LOADER);
UResourceBundle keyMap = keyTypeData.get("keyMap");
// normalize key to lowercase
key = AsciiUtil.toLowerString(key);
String bcpKey = null;
try {
bcpKey = keyMap.getString(key);
if (bcpKey.length() == 0) {
// empty value indicates the BCP47 key is same with the legacy key
bcpKey = key;
}
} catch (MissingResourceException mre) {
// fall through
}
if (bcpKey == null) {
if (key.length() == 2 && LanguageTag.isExtensionSubtag(key)) {
return key;
}
return null;
}
return bcpKey;
}
private static String bcp47ToLDMLKey(String bcpKey) {
UResourceBundle keyTypeData = UResourceBundle.getBundleInstance(
ICUResourceBundle.ICU_BASE_NAME,
"keyTypeData",
ICUResourceBundle.ICU_DATA_CLASS_LOADER);
UResourceBundle keyMap = keyTypeData.get("keyMap");
// normalize bcp key to lowercase
bcpKey = AsciiUtil.toLowerString(bcpKey);
String key = null;
for (int i = 0; i < keyMap.getSize(); i++) {
UResourceBundle mapData = keyMap.get(i);
String tmpBcpKey = mapData.getString();
if (tmpBcpKey.length() == 0) {
// empty value indicates the BCP47 key is same with the legacy key
tmpBcpKey = mapData.getKey();
}
if (bcpKey.equals(tmpBcpKey)) {
key = mapData.getKey();
break;
}
}
if (key == null) {
return bcpKey;
}
return key;
}
private static String ldmlTypeToBCP47(String key, String type) {
UResourceBundle keyTypeData = UResourceBundle.getBundleInstance(
ICUResourceBundle.ICU_BASE_NAME,
"keyTypeData",
ICUResourceBundle.ICU_DATA_CLASS_LOADER);
UResourceBundle typeMap = keyTypeData.get("typeMap");
// keys are case-insensitive, while types are case-sensitive
// TODO: make types case insensitive
key = AsciiUtil.toLowerString(key);
UResourceBundle typeMapForKey = null;
String bcpType = null;
String typeResKey = key.equals("timezone") ? type.replace('/', ':') : type;
try {
typeMapForKey = typeMap.get(key);
bcpType = typeMapForKey.getString(typeResKey);
if (bcpType.length() == 0) {
// empty value indicates the BCP47 type is same with the legacy type
bcpType = type;
}
} catch (MissingResourceException mre) {
// fall through
}
if (bcpType == null && typeMapForKey != null) {
// is this type alias?
UResourceBundle typeAlias = keyTypeData.get("typeAlias");
try {
UResourceBundle typeAliasForKey = typeAlias.get(key);
typeResKey = typeAliasForKey.getString(typeResKey);
bcpType = typeMapForKey.getString(typeResKey.replace('/', ':'));
if (bcpType.length() == 0) {
// empty value indicates the BCP47 type is same with the legacy type
bcpType = typeResKey;
}
} catch (MissingResourceException mre) {
// fall through
}
}
if (bcpType == null) {
int typeLen = type.length();
if (typeLen >= 3 && typeLen <= 8 && LanguageTag.isExtensionSubtag(type)) {
return type;
}
return null;
}
return bcpType;
}
private static String bcp47ToLDMLType(String key, String bcpType) {
UResourceBundle keyTypeData = UResourceBundle.getBundleInstance(
ICUResourceBundle.ICU_BASE_NAME,
"keyTypeData",
ICUResourceBundle.ICU_DATA_CLASS_LOADER);
UResourceBundle typeMap = keyTypeData.get("typeMap");
// normalize key/bcpType to lowercase
key = AsciiUtil.toLowerString(key);
bcpType = AsciiUtil.toLowerString(bcpType);
String type = null;
try {
UResourceBundle typeMapForKey = typeMap.get(key);
// Note: Linear search for time zone ID might be too slow.
// ICU services do not use timezone keywords for now.
// In future, we may need to build the optimized inverse
// lookup table.
for (int i = 0; i < typeMapForKey.getSize(); i++) {
UResourceBundle mapData = typeMapForKey.get(i);
String tmpBcpType = mapData.getString();
if (tmpBcpType.length() == 0) {
// empty value indicates the BCP47 type is same with the legacy type
tmpBcpType = mapData.getKey();
}
if (bcpType.equals(tmpBcpType)) {
type = mapData.getKey();
if (key.equals("timezone")) {
type = type.replace(':', '/');
}
break;
}
}
} catch (MissingResourceException mre) {
// fall through
}
if (type == null) {
return bcpType;
}
return type;
}
/*
* JDK Locale Helper
*/
@ -4073,9 +4062,9 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
if (kwKey.length() != 1) {
// Unicode locale key
kwKey = bcp47ToLDMLKey(kwKey);
kwKey = toLegacyKey(kwKey);
// use "yes" as the value of typeless keywords
kwVal = bcp47ToLDMLType(kwKey, ((kwVal.length() == 0) ? "yes" : kwVal));
kwVal = toLegacyType(kwKey, ((kwVal.length() == 0) ? "yes" : kwVal));
}
if (addSep) {

View file

@ -3874,7 +3874,7 @@ public class ULocaleTest extends TestFmwk {
{"en@timezone=America/New_York;calendar=japanese", "en-u-ca-japanese-tz-usnyc"},
{"en@timezone=US/Eastern", "en-u-tz-usnyc"},
{"en@x=x-y-z;a=a-b-c", "en-x-x-y-z"},
{"it@collation=badcollationtype;colStrength=identical;cu=usd-eur", "it-u-ks-identic"},
{"it@collation=badcollationtype;colStrength=identical;cu=usd-eur", "it-u-cu-usd-eur-ks-identic"},
{"en_US_POSIX", "en-US-u-va-posix"},
{"en_US_POSIX@calendar=japanese;currency=EUR","en-US-u-ca-japanese-cu-eur-va-posix"},
{"@x=elmer", "x-elmer"},
@ -4449,4 +4449,108 @@ public class ULocaleTest extends TestFmwk {
}
}
}
public void TestToUnicodeLocaleKey() {
String[][] DATA = {
{"calendar", "ca"},
{"CALEndar", "ca"}, // difference casing
{"ca", "ca"}, // bcp key itself
{"kv", "kv"}, // no difference between legacy and bcp
{"foo", null}, // unknown, bcp ill-formed
{"ZZ", "zz"}, // unknown, bcp well-formed
};
for (String[] d : DATA) {
String keyword = d[0];
String expected = d[1];
String bcpKey = ULocale.toUnicodeLocaleKey(keyword);
assertEquals("keyword=" + keyword, expected, bcpKey);
}
}
public void TestToLegacyKey() {
String[][] DATA = {
{"kb", "colbackwards"},
{"kB", "colbackwards"}, // different casing
{"Collation", "collation"}, // keyword itself with different casing
{"kv", "kv"}, // no difference between legacy and bcp
{"foo", "foo"}, // unknown, bcp ill-formed
{"ZZ", "zz"}, // unknown, bcp well-formed
{"e=mc2", null}, // unknown, bcp/legacy ill-formed
};
for (String[] d : DATA) {
String keyword = d[0];
String expected = d[1];
String legacyKey = ULocale.toLegacyKey(keyword);
assertEquals("bcpKey=" + keyword, expected, legacyKey);
}
}
public void TestToUnicodeLocaleType() {
String[][] DATA = {
{"tz", "Asia/Kolkata", "inccu"},
{"calendar", "gregorian", "gregory"},
{"ca", "gregorian", "gregory"},
{"ca", "Gregorian", "gregory"},
{"ca", "buddhist", "buddhist"},
{"Calendar", "Japanese", "japanese"},
{"calendar", "Islamic-Civil", "islamic-civil"},
{"calendar", "islamicc", "islamic-civil"}, // bcp type alias
{"colalternate", "NON-IGNORABLE", "noignore"},
{"colcaselevel", "yes", "true"},
{"tz", "america/new_york", "usnyc"},
{"tz", "Asia/Kolkata", "inccu"},
{"timezone", "navajo", "usden"},
{"ca", "aaaa", "aaaa"}, // unknown type, well-formed type
{"ca", "gregory-japanese-islamic", "gregory-japanese-islamic"}, // unknown type, well-formed type
{"zz", "gregorian", null}, // unknown key, ill-formed type
{"co", "foo-", null}, // unknown type, ill-formed type
};
for (String[] d : DATA) {
String keyword = d[0];
String value = d[1];
String expected = d[2];
String bcpType = ULocale.toUnicodeLocaleType(keyword, value);
assertEquals("keyword=" + keyword + ", value=" + value, expected, bcpType);
}
}
public void TestToLegacyType() {
String[][] DATA = {
{"calendar", "gregory", "gregorian"},
{"ca", "gregory", "gregorian"},
{"ca", "Gregory", "gregorian"},
{"ca", "buddhist", "buddhist"},
{"Calendar", "Japanese", "japanese"},
{"calendar", "Islamic-Civil", "islamic-civil"},
{"calendar", "islamicc", "islamic-civil"}, // bcp type alias
{"colalternate", "noignore", "non-ignorable"},
{"colcaselevel", "true", "yes"},
{"tz", "usnyc", "America/New_York"},
{"tz", "inccu", "Asia/Calcutta"},
{"timezone", "usden", "America/Denver"},
{"timezone", "usnavajo", "America/Denver"}, // bcp type alias
{"colstrength", "quarternary", "quaternary"}, // type alias
{"ca", "aaaa", "aaaa"}, // unknown type
{"calendar", "gregory-japanese-islamic", "gregory-japanese-islamic"}, // unknown type, well-formed type
{"zz", "gregorian", "gregorian"}, // unknown key, bcp ill-formed type
{"ca", "gregorian-calendar", "gregorian-calendar"}, // known key, bcp ill-formed type
{"co", "e=mc2", null}, // known key, ill-formed bcp/legacy type
};
for (String[] d : DATA) {
String keyword = d[0];
String value = d[1];
String expected = d[2];
String legacyType = ULocale.toLegacyType(keyword, value);
assertEquals("keyword=" + keyword + ", value=" + value, expected, legacyType);
}
}
}