mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-12812 Add new implementation, but only expose a limited API as tech preview. Note the XCldrStub class to help migration from CLDR to ICU environment.
X-SVN-Rev: 39849
This commit is contained in:
parent
81579b93df
commit
3a18873d2c
11 changed files with 4195 additions and 25 deletions
|
@ -0,0 +1,390 @@
|
|||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.locale;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.ibm.icu.util.ICUException;
|
||||
import com.ibm.icu.util.ICUUncheckedIOException;
|
||||
|
||||
/**
|
||||
* Stub class to make migration easier until we get either Guava or a higher level of Java.
|
||||
*/
|
||||
public class XCldrStub {
|
||||
|
||||
public static class Multimap<K, V> {
|
||||
private final Map<K,Set<V>> map;
|
||||
private final Class<Set<V>> setClass;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private Multimap(Map<K,Set<V>> map, Class<?> setClass) {
|
||||
this.map = map;
|
||||
this.setClass = (Class<Set<V>>) (setClass != null
|
||||
? setClass
|
||||
: HashSet.class);
|
||||
}
|
||||
public Multimap<K, V> putAll(K key, V... values) {
|
||||
if (values.length != 0) {
|
||||
createSetIfMissing(key).addAll(Arrays.asList(values));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
public void putAll(K key, Collection<V> values) {
|
||||
if (!values.isEmpty()) {
|
||||
createSetIfMissing(key).addAll(values);
|
||||
}
|
||||
}
|
||||
public void putAll(Collection<K> keys, V value) {
|
||||
for (K key : keys) {
|
||||
put(key, value);
|
||||
}
|
||||
}
|
||||
public void putAll(Multimap<K, V> source) {
|
||||
for (Entry<K, Set<V>> entry : source.map.entrySet()) {
|
||||
putAll(entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
public void put(K key, V value) {
|
||||
createSetIfMissing(key).add(value);
|
||||
}
|
||||
private Set<V> createSetIfMissing(K key) {
|
||||
Set<V> old = map.get(key);
|
||||
if (old == null) {
|
||||
map.put(key, old = getInstance());
|
||||
}
|
||||
return old;
|
||||
}
|
||||
private Set<V> getInstance() {
|
||||
try {
|
||||
return setClass.newInstance();
|
||||
} catch (Exception e) {
|
||||
throw new ICUException(e);
|
||||
}
|
||||
}
|
||||
public Set<V> get(K key) {
|
||||
Set<V> result = map.get(key);
|
||||
return result; // == null ? Collections.<V>emptySet() : result;
|
||||
}
|
||||
public Set<K> keySet() {
|
||||
return map.keySet();
|
||||
}
|
||||
public Map<K, Set<V>> asMap() {
|
||||
return map;
|
||||
}
|
||||
public Set<V> values() {
|
||||
Collection<Set<V>> values = map.values();
|
||||
if (values.size() == 0) {
|
||||
return Collections.<V>emptySet();
|
||||
}
|
||||
Set<V> result = getInstance();
|
||||
for ( Set<V> valueSet : values) {
|
||||
result.addAll(valueSet);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
public int size() {
|
||||
return map.size();
|
||||
}
|
||||
public Iterable<Entry<K, V>> entries() {
|
||||
return new MultimapIterator<K, V>(map);
|
||||
}
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
Multimap<K,V> other = (Multimap) obj;
|
||||
return map.equals(other.map);
|
||||
}
|
||||
}
|
||||
|
||||
public static class Multimaps {
|
||||
public static <K, V, R extends Multimap<K, V>> R invertFrom(Multimap<V, K> source, R target) {
|
||||
for (Entry<V, Set<K>> entry : source.asMap().entrySet()) {
|
||||
target.putAll(entry.getValue(), entry.getKey());
|
||||
}
|
||||
return target;
|
||||
}
|
||||
public static <K, V, R extends Multimap<K, V>> R invertFrom(Map<V, K> source, R target) {
|
||||
for (Entry<V, K> entry : source.entrySet()) {
|
||||
target.put(entry.getValue(), entry.getKey());
|
||||
}
|
||||
return target;
|
||||
}
|
||||
/**
|
||||
* Warning, not functionally the same as Guava; only for use in invertFrom.
|
||||
*/
|
||||
public static <K, V> Map<K,V> forMap(Map<K,V> map) {
|
||||
return map;
|
||||
}
|
||||
}
|
||||
|
||||
private static class MultimapIterator<K,V> implements Iterator<Entry<K,V>>, Iterable<Entry<K,V>> {
|
||||
private final Iterator<Entry<K, Set<V>>> it1;
|
||||
private Iterator<V> it2 = null;
|
||||
private final ReusableEntry<K,V> entry = new ReusableEntry<K,V>();
|
||||
|
||||
private MultimapIterator(Map<K,Set<V>> map) {
|
||||
it1 = map.entrySet().iterator();
|
||||
}
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return it1.hasNext() || it2 != null && it2.hasNext();
|
||||
}
|
||||
@Override
|
||||
public Entry<K, V> next() {
|
||||
if (it2 != null && it2.hasNext()) {
|
||||
entry.value = it2.next();
|
||||
} else {
|
||||
Entry<K, Set<V>> e = it1.next();
|
||||
entry.key = e.getKey();
|
||||
it2 = e.getValue().iterator();
|
||||
}
|
||||
return entry;
|
||||
}
|
||||
@Override
|
||||
public Iterator<Entry<K, V>> iterator() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
private static class ReusableEntry<K,V> implements Entry<K,V> {
|
||||
K key;
|
||||
V value;
|
||||
@Override
|
||||
public K getKey() {
|
||||
return key;
|
||||
}
|
||||
@Override
|
||||
public V getValue() {
|
||||
return value;
|
||||
}
|
||||
@Override
|
||||
public V setValue(V value) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
public static class HashMultimap<K, V> extends Multimap<K, V> {
|
||||
private HashMultimap() {
|
||||
super(new HashMap<K, Set<V>>(), HashSet.class);
|
||||
}
|
||||
public static <K, V> HashMultimap<K, V> create() {
|
||||
return new HashMultimap<K, V>();
|
||||
}
|
||||
}
|
||||
|
||||
public static class TreeMultimap<K, V> extends Multimap<K, V> {
|
||||
private TreeMultimap() {
|
||||
super(new TreeMap<K, Set<V>>(), TreeSet.class);
|
||||
}
|
||||
public static <K, V> TreeMultimap<K, V> create() {
|
||||
return new TreeMultimap<K, V>();
|
||||
}
|
||||
}
|
||||
|
||||
public static class LinkedHashMultimap<K, V> extends Multimap<K, V> {
|
||||
private LinkedHashMultimap() {
|
||||
super(new LinkedHashMap<K, Set<V>>(), LinkedHashSet.class);
|
||||
}
|
||||
public static <K, V> LinkedHashMultimap<K, V> create() {
|
||||
return new LinkedHashMultimap<K, V>();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static class Counter<T> implements Iterable<T>{
|
||||
private Map<T,Long> data;
|
||||
@Override
|
||||
public Iterator<T> iterator() {
|
||||
return data.keySet().iterator();
|
||||
}
|
||||
public long get(T s) {
|
||||
Long result = data.get(s);
|
||||
return result != null ? result : 0L;
|
||||
}
|
||||
public void add(T item, int count) {
|
||||
Long result = data.get(item);
|
||||
data.put(item, result == null ? count : result + count);
|
||||
}
|
||||
}
|
||||
|
||||
public static <T> String join(T[] source, String separator) {
|
||||
StringBuilder result = new StringBuilder();
|
||||
for (int i = 0; i < source.length; ++i) {
|
||||
if (i != 0) result.append(separator);
|
||||
result.append(source[i]);
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
public static <T> String join(Iterable<T> source, String separator) {
|
||||
StringBuilder result = new StringBuilder();
|
||||
boolean first = true;
|
||||
for (T item : source) {
|
||||
if (!first) result.append(separator);
|
||||
else first = false;
|
||||
result.append(item.toString());
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
public static class CollectionUtilities {
|
||||
public static <T, U extends Iterable<T>> String join(U source, String separator) {
|
||||
return XCldrStub.join(source, separator);
|
||||
}
|
||||
}
|
||||
|
||||
public static class Joiner {
|
||||
private final String separator;
|
||||
private Joiner(String separator) {
|
||||
this.separator = separator;
|
||||
}
|
||||
public static final Joiner on(String separator) {
|
||||
return new Joiner(separator);
|
||||
}
|
||||
public <T> String join(T[] source) {
|
||||
return XCldrStub.join(source, separator);
|
||||
}
|
||||
public <T> String join(Iterable<T> source) {
|
||||
return XCldrStub.join(source, separator);
|
||||
}
|
||||
}
|
||||
|
||||
public static class Splitter {
|
||||
Pattern pattern;
|
||||
boolean trimResults = false;
|
||||
public Splitter(char c) {
|
||||
this(Pattern.compile("\\Q" + c + "\\E"));
|
||||
}
|
||||
public Splitter(Pattern p) {
|
||||
pattern = p;
|
||||
}
|
||||
public static Splitter on(char c) {
|
||||
return new Splitter(c);
|
||||
}
|
||||
public static Splitter on(Pattern p) {
|
||||
return new Splitter(p);
|
||||
}
|
||||
public List<String> splitToList(String input) {
|
||||
String[] items = pattern.split(input);
|
||||
if (trimResults) {
|
||||
for (int i = 0; i < items.length; ++i) {
|
||||
items[i] = items[i].trim();
|
||||
}
|
||||
}
|
||||
return Arrays.asList(items);
|
||||
}
|
||||
public Splitter trimResults() {
|
||||
trimResults = true;
|
||||
return this;
|
||||
}
|
||||
public Iterable<String> split(String input) {
|
||||
return splitToList(input);
|
||||
}
|
||||
}
|
||||
|
||||
public static class ImmutableSet {
|
||||
public static <T> Set<T> copyOf(Set<T> values) {
|
||||
return Collections.unmodifiableSet(new LinkedHashSet<T>(values)); // copy set for safety, preserve order
|
||||
}
|
||||
}
|
||||
public static class ImmutableMap {
|
||||
public static <K,V> Map<K,V> copyOf(Map<K,V> values) {
|
||||
return Collections.unmodifiableMap(new LinkedHashMap<K,V>(values)); // copy set for safety, preserve order
|
||||
}
|
||||
}
|
||||
public static class ImmutableMultimap {
|
||||
public static <K,V> Multimap<K,V> copyOf(Multimap<K,V> values) {
|
||||
LinkedHashMap<K, Set<V>> temp = new LinkedHashMap<K,Set<V>>(); // semi-deep copy, preserve order
|
||||
for (Entry<K, Set<V>> entry : values.asMap().entrySet()) {
|
||||
Set<V> value = entry.getValue();
|
||||
temp.put(entry.getKey(), value.size() == 1
|
||||
? Collections.singleton(value.iterator().next())
|
||||
: Collections.unmodifiableSet(new LinkedHashSet<V>(value)));
|
||||
}
|
||||
return new Multimap<K,V>(Collections.unmodifiableMap(temp), null);
|
||||
}
|
||||
}
|
||||
|
||||
public static class FileUtilities {
|
||||
public static final Charset UTF8 = Charset.forName("utf-8");
|
||||
|
||||
public static BufferedReader openFile(Class<?> class1, String file) {
|
||||
return openFile(class1, file, UTF8);
|
||||
}
|
||||
|
||||
public static BufferedReader openFile(Class<?> class1, String file, Charset charset) {
|
||||
// URL path = null;
|
||||
// String externalForm = null;
|
||||
try {
|
||||
final InputStream resourceAsStream = class1.getResourceAsStream(file);
|
||||
if (charset == null) {
|
||||
charset = UTF8;
|
||||
}
|
||||
InputStreamReader reader = new InputStreamReader(resourceAsStream, charset);
|
||||
BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64);
|
||||
return bufferedReader;
|
||||
} catch (Exception e) {
|
||||
String className = class1 == null ? null : class1.getCanonicalName();
|
||||
String canonicalName = null;
|
||||
try {
|
||||
String relativeFileName = getRelativeFileName(class1, "../util/");
|
||||
canonicalName = new File(relativeFileName).getCanonicalPath();
|
||||
} catch (Exception e1) {
|
||||
throw new ICUUncheckedIOException("Couldn't open file: " + file + "; relative to class: "
|
||||
+ className, e);
|
||||
}
|
||||
throw new ICUUncheckedIOException("Couldn't open file " + file + "; in path " + canonicalName + "; relative to class: "
|
||||
+ className, e);
|
||||
}
|
||||
}
|
||||
public static String getRelativeFileName(Class<?> class1, String filename) {
|
||||
URL resource = class1.getResource(filename);
|
||||
String resourceString = resource.toString();
|
||||
if (resourceString.startsWith("file:")) {
|
||||
return resourceString.substring(5);
|
||||
} else if (resourceString.startsWith("jar:file:")) {
|
||||
return resourceString.substring(9);
|
||||
} else {
|
||||
throw new ICUUncheckedIOException("File not found: " + resourceString);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static public class RegexUtilities {
|
||||
public static int findMismatch(Matcher m, CharSequence s) {
|
||||
int i;
|
||||
for (i = 1; i < s.length(); ++i) {
|
||||
boolean matches = m.reset(s.subSequence(0, i)).matches();
|
||||
if (!matches && !m.hitEnd()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return i - 1;
|
||||
}
|
||||
public static String showMismatch(Matcher m, CharSequence s) {
|
||||
int failPoint = findMismatch(m, s);
|
||||
String show = s.subSequence(0, failPoint) + "☹" + s.subSequence(failPoint, s.length());
|
||||
return show;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,667 @@
|
|||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.locale;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Enumeration;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import com.ibm.icu.impl.ICUData;
|
||||
import com.ibm.icu.impl.ICUResourceBundle;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.HashMultimap;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Multimap;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Multimaps;
|
||||
import com.ibm.icu.util.ICUException;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
import com.ibm.icu.util.ULocale.Minimize;
|
||||
import com.ibm.icu.util.UResourceBundle;
|
||||
|
||||
public class XLikelySubtags {
|
||||
|
||||
private static final XLikelySubtags DEFAULT = new XLikelySubtags();
|
||||
|
||||
public static final XLikelySubtags getDefault() {
|
||||
return DEFAULT;
|
||||
}
|
||||
|
||||
static abstract class Maker {
|
||||
abstract <V> V make();
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public <K,V> V getSubtable(Map<K, V> langTable, final K language) {
|
||||
V scriptTable = langTable.get(language);
|
||||
if (scriptTable == null) {
|
||||
langTable.put(language, scriptTable = (V) make());
|
||||
}
|
||||
return scriptTable;
|
||||
}
|
||||
|
||||
static final Maker HASHMAP = new Maker() {
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public Map<Object,Object> make() {
|
||||
return new HashMap<Object,Object>();
|
||||
}
|
||||
};
|
||||
|
||||
static final Maker TREEMAP = new Maker() {
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public Map<Object,Object> make() {
|
||||
return new TreeMap<Object,Object>();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public static class Aliases {
|
||||
final Map<String, String> toCanonical;
|
||||
final Multimap<String, String> toAliases;
|
||||
public String getCanonical(String alias) {
|
||||
String canonical = toCanonical.get(alias);
|
||||
return canonical == null ? alias : canonical;
|
||||
}
|
||||
public Set<String> getAliases(String canonical) {
|
||||
Set<String> aliases = toAliases.get(canonical);
|
||||
return aliases == null ? Collections.singleton(canonical) : aliases;
|
||||
}
|
||||
public Aliases(String key) {
|
||||
UResourceBundle metadata = UResourceBundle.getBundleInstance(ICUData.ICU_BASE_NAME,"metadata",ICUResourceBundle.ICU_DATA_CLASS_LOADER);
|
||||
UResourceBundle metadataAlias = metadata.get("alias");
|
||||
UResourceBundle territoryAlias = metadataAlias.get(key);
|
||||
Map<String, String> toCanonical1 = new HashMap<String, String>();
|
||||
for ( int i = 0 ; i < territoryAlias.getSize(); i++ ) {
|
||||
UResourceBundle res = territoryAlias.get(i);
|
||||
String aliasFrom = res.getKey();
|
||||
if (aliasFrom.contains("_")) {
|
||||
continue; // only simple aliasing
|
||||
}
|
||||
String aliasReason = res.get("reason").getString();
|
||||
if (aliasReason.equals("overlong")) {
|
||||
continue;
|
||||
}
|
||||
String aliasTo = res.get("replacement").getString();
|
||||
int spacePos = aliasTo.indexOf(' ');
|
||||
String aliasFirst = spacePos < 0 ? aliasTo : aliasTo.substring(0, spacePos);
|
||||
if (aliasFirst.contains("_")) {
|
||||
continue; // only simple aliasing
|
||||
}
|
||||
toCanonical1.put(aliasFrom, aliasFirst);
|
||||
}
|
||||
if (key.equals("language")) {
|
||||
toCanonical1.put("mo", "ro"); // special case
|
||||
}
|
||||
toCanonical = Collections.unmodifiableMap(toCanonical1);
|
||||
toAliases = Multimaps.invertFrom(toCanonical1, HashMultimap.<String,String>create());
|
||||
}
|
||||
}
|
||||
|
||||
public static class LSR {
|
||||
public final String language;
|
||||
public final String script;
|
||||
public final String region;
|
||||
|
||||
public static Aliases LANGUAGE_ALIASES = new Aliases("language");
|
||||
public static Aliases REGION_ALIASES = new Aliases("territory");
|
||||
|
||||
public static LSR from(String language, String script, String region) {
|
||||
return new LSR(language, script, region);
|
||||
}
|
||||
|
||||
// from http://unicode.org/reports/tr35/#Unicode_language_identifier
|
||||
// but simplified to requiring language subtag, and nothing beyond region
|
||||
// #1 is language
|
||||
// #2 is script
|
||||
// #3 is region
|
||||
// static final String pat =
|
||||
// "language_id = (unicode_language_subtag)"
|
||||
// + "(?:sep(unicode_script_subtag))?"
|
||||
// + "(?:sep(unicode_region_subtag))?;\n"
|
||||
// + "unicode_language_subtag = alpha{2,3}|alpha{5,8};\n"
|
||||
// + "unicode_script_subtag = alpha{4};\n"
|
||||
// + "unicode_region_subtag = alpha{2}|digit{3};\n"
|
||||
// + "sep = [-_];\n"
|
||||
// + "digit = [0-9];\n"
|
||||
// + "alpha = [A-Za-z];\n"
|
||||
// ;
|
||||
// static {
|
||||
// System.out.println(pat);
|
||||
// System.out.println(new UnicodeRegex().compileBnf(pat));
|
||||
// }
|
||||
// static final Pattern LANGUAGE_PATTERN = Pattern.compile(
|
||||
// "([a-zA-Z0-9]+)" // (?:[-_]([a-zA-Z0-9]+))?(?:[-_]([a-zA-Z0-9]+))?"
|
||||
// //new UnicodeRegex().compileBnf(pat)
|
||||
// );
|
||||
//
|
||||
// TODO: fix this to check for format. Not required, since this is only called internally, but safer for the future.
|
||||
static LSR from(String languageIdentifier) {
|
||||
String[] parts = languageIdentifier.split("[-_]");
|
||||
if (parts.length < 1 || parts.length > 3) {
|
||||
throw new ICUException("too many subtags");
|
||||
}
|
||||
String lang = parts[0].toLowerCase();
|
||||
String p2 = parts.length < 2 ? "": parts[1];
|
||||
String p3 = parts.length < 3 ? "": parts[2];
|
||||
return p2.length() < 4 ? new LSR(lang, "", p2) : new LSR(lang, p2, p3);
|
||||
|
||||
// Matcher matcher = LANGUAGE_PATTERN.matcher(languageIdentifier);
|
||||
// if (!matcher.matches()) {
|
||||
// return new LSR(matcher.group(1), matcher.group(2), matcher.group(3));
|
||||
// }
|
||||
// System.out.println(RegexUtilities.showMismatch(matcher, languageIdentifier));
|
||||
// throw new ICUException("invalid language id");
|
||||
}
|
||||
|
||||
public static LSR from(ULocale locale) {
|
||||
return new LSR(locale.getLanguage(), locale.getScript(), locale.getCountry());
|
||||
}
|
||||
|
||||
public static LSR fromMaximalized(ULocale locale) {
|
||||
return fromMaximalized(locale.getLanguage(), locale.getScript(), locale.getCountry());
|
||||
}
|
||||
|
||||
public static LSR fromMaximalized(String language, String script, String region) {
|
||||
String canonicalLanguage = LANGUAGE_ALIASES.getCanonical(language);
|
||||
// script is ok
|
||||
String canonicalRegion = REGION_ALIASES.getCanonical(region); // getCanonical(REGION_ALIASES.get(region));
|
||||
|
||||
return DEFAULT.maximize(canonicalLanguage, script, canonicalRegion);
|
||||
}
|
||||
|
||||
public LSR(String language, String script, String region) {
|
||||
this.language = language;
|
||||
this.script = script;
|
||||
this.region = region;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder result = new StringBuilder(language);
|
||||
if (!script.isEmpty()) {
|
||||
result.append('-').append(script);
|
||||
}
|
||||
if (!region.isEmpty()) {
|
||||
result.append('-').append(region);
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
public LSR replace(String language2, String script2, String region2) {
|
||||
if (language2 == null && script2 == null && region2 == null) return this;
|
||||
return new LSR(
|
||||
language2 == null ? language: language2,
|
||||
script2 == null ? script : script2,
|
||||
region2 == null ? region : region2);
|
||||
}
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
LSR other = (LSR) obj;
|
||||
return language.equals(other.language)
|
||||
&& script.equals(other.script)
|
||||
&& region.equals(other.region);
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(language, script, region);
|
||||
}
|
||||
}
|
||||
|
||||
final Map<String, Map<String, Map<String, LSR>>> langTable;
|
||||
|
||||
public XLikelySubtags() {
|
||||
this(getDefaultRawData(), true);
|
||||
}
|
||||
|
||||
private static Map<String, String> getDefaultRawData() {
|
||||
Map<String, String> rawData = new TreeMap<String, String>();
|
||||
UResourceBundle bundle = UResourceBundle.getBundleInstance( ICUData.ICU_BASE_NAME, "likelySubtags");
|
||||
for (Enumeration<String> enumer = bundle.getKeys(); enumer.hasMoreElements();) {
|
||||
String key = enumer.nextElement();
|
||||
rawData.put(key, bundle.getString(key));
|
||||
}
|
||||
return rawData;
|
||||
}
|
||||
|
||||
public XLikelySubtags(Map<String, String> rawData, boolean skipNoncanonical) {
|
||||
this.langTable = init(rawData, skipNoncanonical);
|
||||
}
|
||||
|
||||
private Map<String, Map<String, Map<String, LSR>>> init(final Map<String, String> rawData, boolean skipNoncanonical) {
|
||||
// prepare alias info. We want a mapping from the canonical form to all aliases
|
||||
|
||||
//Multimap<String,String> canonicalToAliasLanguage = HashMultimap.create();
|
||||
// getAliasInfo(LANGUAGE_ALIASES, canonicalToAliasLanguage);
|
||||
|
||||
// Don't bother with script; there are none
|
||||
|
||||
//Multimap<String,String> canonicalToAliasRegion = HashMultimap.create();
|
||||
// getAliasInfo(REGION_ALIASES, canonicalToAliasRegion);
|
||||
|
||||
Maker maker = Maker.TREEMAP;
|
||||
Map<String, Map<String, Map<String, LSR>>> result = maker.make();
|
||||
// Splitter bar = Splitter.on('_');
|
||||
// int last = -1;
|
||||
// set the base data
|
||||
Map<LSR,LSR> internCache = new HashMap<LSR,LSR>();
|
||||
for (Entry<String, String> sourceTarget : rawData.entrySet()) {
|
||||
LSR ltp = LSR.from(sourceTarget.getKey());
|
||||
final String language = ltp.language;
|
||||
final String script = ltp.script;
|
||||
final String region = ltp.region;
|
||||
|
||||
ltp = LSR.from(sourceTarget.getValue());
|
||||
String languageTarget = ltp.language;
|
||||
final String scriptTarget = ltp.script;
|
||||
final String regionTarget = ltp.region;
|
||||
|
||||
set(result, language, script, region, languageTarget, scriptTarget, regionTarget, internCache);
|
||||
// now add aliases
|
||||
Collection<String> languageAliases = LSR.LANGUAGE_ALIASES.getAliases(language);
|
||||
// if (languageAliases.isEmpty()) {
|
||||
// languageAliases = Collections.singleton(language);
|
||||
// }
|
||||
Collection<String> regionAliases = LSR.REGION_ALIASES.getAliases(region);
|
||||
// if (regionAliases.isEmpty()) {
|
||||
// regionAliases = Collections.singleton(region);
|
||||
// }
|
||||
for (String languageAlias : languageAliases) {
|
||||
for (String regionAlias : regionAliases) {
|
||||
if (languageAlias.equals(language) && regionAlias.equals(region)) {
|
||||
continue;
|
||||
}
|
||||
set(result, languageAlias, script, regionAlias, languageTarget, scriptTarget, regionTarget, internCache);
|
||||
}
|
||||
}
|
||||
}
|
||||
// hack
|
||||
set(result, "und", "Latn", "", "en", "Latn", "US", internCache);
|
||||
|
||||
// hack, ensure that if und-YY => und-Xxxx-YY, then we add Xxxx=>YY to the table
|
||||
// <likelySubtag from="und_GH" to="ak_Latn_GH"/>
|
||||
|
||||
// so und-Latn-GH => ak-Latn-GH
|
||||
Map<String, Map<String, LSR>> undScriptMap = result.get("und");
|
||||
Map<String, LSR> undEmptyRegionMap = undScriptMap.get("");
|
||||
for (Entry<String, LSR> regionEntry : undEmptyRegionMap.entrySet()) {
|
||||
final LSR value = regionEntry.getValue();
|
||||
set(result, "und", value.script, value.region, value);
|
||||
}
|
||||
//
|
||||
// check that every level has "" (or "und")
|
||||
if (!result.containsKey("und")) {
|
||||
throw new IllegalArgumentException("failure: base");
|
||||
}
|
||||
for (Entry<String, Map<String, Map<String, LSR>>> langEntry : result.entrySet()) {
|
||||
String lang = langEntry.getKey();
|
||||
final Map<String, Map<String, LSR>> scriptMap = langEntry.getValue();
|
||||
if (!scriptMap.containsKey("")) {
|
||||
throw new IllegalArgumentException("failure: " + lang);
|
||||
}
|
||||
for (Entry<String, Map<String, LSR>> scriptEntry : scriptMap.entrySet()) {
|
||||
String script = scriptEntry.getKey();
|
||||
final Map<String, LSR> regionMap = scriptEntry.getValue();
|
||||
if (!regionMap.containsKey("")) {
|
||||
throw new IllegalArgumentException("failure: " + lang + "-" + script);
|
||||
}
|
||||
// for (Entry<String, LSR> regionEntry : regionMap.entrySet()) {
|
||||
// String region = regionEntry.getKey();
|
||||
// LSR value = regionEntry.getValue();
|
||||
// }
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// private void getAliasInfo(Map<String, R2<List<String>, String>> aliasInfo, Multimap<String, String> canonicalToAlias) {
|
||||
// for (Entry<String, R2<List<String>, String>> e : aliasInfo.entrySet()) {
|
||||
// final String alias = e.getKey();
|
||||
// if (alias.contains("_")) {
|
||||
// continue; // only do simple aliasing
|
||||
// }
|
||||
// String canonical = getCanonical(e.getValue());
|
||||
// canonicalToAlias.put(canonical, alias);
|
||||
// }
|
||||
// }
|
||||
|
||||
// private static String getCanonical(R2<List<String>, String> aliasAndReason) {
|
||||
// if (aliasAndReason == null) {
|
||||
// return null;
|
||||
// }
|
||||
// if (aliasAndReason.get1().equals("overlong")) {
|
||||
// return null;
|
||||
// }
|
||||
// List<String> value = aliasAndReason.get0();
|
||||
// if (value.size() != 1) {
|
||||
// return null;
|
||||
// }
|
||||
// final String canonical = value.iterator().next();
|
||||
// if (canonical.contains("_")) {
|
||||
// return null; // only do simple aliasing
|
||||
// }
|
||||
// return canonical;
|
||||
// }
|
||||
|
||||
private void set(Map<String, Map<String, Map<String, LSR>>> langTable, final String language, final String script, final String region,
|
||||
final String languageTarget, final String scriptTarget, final String regionTarget, Map<LSR, LSR> internCache) {
|
||||
LSR newValue = new LSR(languageTarget, scriptTarget, regionTarget);
|
||||
LSR oldValue = internCache.get(newValue);
|
||||
if (oldValue == null) {
|
||||
internCache.put(newValue, newValue);
|
||||
oldValue = newValue;
|
||||
}
|
||||
set(langTable, language, script, region, oldValue);
|
||||
}
|
||||
|
||||
private void set(Map<String, Map<String, Map<String, LSR>>> langTable, final String language, final String script, final String region, LSR newValue) {
|
||||
Map<String, Map<String, LSR>> scriptTable = Maker.TREEMAP.getSubtable(langTable, language);
|
||||
Map<String, LSR> regionTable = Maker.TREEMAP.getSubtable(scriptTable, script);
|
||||
LSR oldValue = regionTable.get(region);
|
||||
if (oldValue != null) {
|
||||
int debug = 0;
|
||||
}
|
||||
regionTable.put(region, newValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience methods
|
||||
* @param source
|
||||
* @return
|
||||
*/
|
||||
public LSR maximize(String source) {
|
||||
return maximize(ULocale.forLanguageTag(source));
|
||||
}
|
||||
|
||||
public LSR maximize(ULocale source) {
|
||||
return maximize(source.getLanguage(), source.getScript(), source.getCountry());
|
||||
}
|
||||
|
||||
public LSR maximize(LSR source) {
|
||||
return maximize(source.language, source.script, source.region);
|
||||
}
|
||||
|
||||
// public static ULocale addLikelySubtags(ULocale loc) {
|
||||
//
|
||||
// }
|
||||
|
||||
/**
|
||||
* Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
|
||||
*/
|
||||
public LSR maximize(String language, String script, String region) {
|
||||
int retainOldMask = 0;
|
||||
Map<String, Map<String, LSR>> scriptTable = langTable.get(language);
|
||||
if (scriptTable == null) { // cannot happen if language == "und"
|
||||
retainOldMask |= 4;
|
||||
scriptTable = langTable.get("und");
|
||||
} else if (!language.equals("und")) {
|
||||
retainOldMask |= 4;
|
||||
}
|
||||
|
||||
if (script.equals("Zzzz")) {
|
||||
script = "";
|
||||
}
|
||||
Map<String, LSR> regionTable = scriptTable.get(script);
|
||||
if (regionTable == null) { // cannot happen if script == ""
|
||||
retainOldMask |= 2;
|
||||
regionTable = scriptTable.get("");
|
||||
} else if (!script.isEmpty()) {
|
||||
retainOldMask |= 2;
|
||||
}
|
||||
|
||||
if (region.equals("ZZ")) {
|
||||
region = "";
|
||||
}
|
||||
LSR result = regionTable.get(region);
|
||||
if (result == null) { // cannot happen if region == ""
|
||||
retainOldMask |= 1;
|
||||
result = regionTable.get("");
|
||||
if (result == null) {
|
||||
return null;
|
||||
}
|
||||
} else if (!region.isEmpty()) {
|
||||
retainOldMask |= 1;
|
||||
}
|
||||
|
||||
switch (retainOldMask) {
|
||||
default:
|
||||
case 0: return result;
|
||||
case 1: return result.replace(null, null, region);
|
||||
case 2: return result.replace(null, script, null);
|
||||
case 3: return result.replace(null, script, region);
|
||||
case 4: return result.replace(language, null, null);
|
||||
case 5: return result.replace(language, null, region);
|
||||
case 6: return result.replace(language, script, null);
|
||||
case 7: return result.replace(language, script, region);
|
||||
}
|
||||
}
|
||||
|
||||
private LSR minimizeSubtags(String languageIn, String scriptIn, String regionIn, Minimize fieldToFavor) {
|
||||
LSR result = maximize(languageIn, scriptIn, regionIn);
|
||||
|
||||
// We could try just a series of checks, like:
|
||||
// LSR result2 = addLikelySubtags(languageIn, "", "");
|
||||
// if result.equals(result2) return result2;
|
||||
// However, we can optimize 2 of the cases:
|
||||
// (languageIn, "", "")
|
||||
// (languageIn, "", regionIn)
|
||||
|
||||
Map<String, Map<String, LSR>> scriptTable = langTable.get(result.language);
|
||||
|
||||
Map<String, LSR> regionTable0 = scriptTable.get("");
|
||||
LSR value00 = regionTable0.get("");
|
||||
boolean favorRegionOk = false;
|
||||
if (result.script.equals(value00.script)) { //script is default
|
||||
if (result.region.equals(value00.region)) {
|
||||
return result.replace(null, "", "");
|
||||
} else if (fieldToFavor == fieldToFavor.FAVOR_REGION) {
|
||||
return result.replace(null, "", null);
|
||||
} else {
|
||||
favorRegionOk = true;
|
||||
}
|
||||
}
|
||||
|
||||
// The last case is not as easy to optimize.
|
||||
// Maybe do later, but for now use the straightforward code.
|
||||
LSR result2 = maximize(languageIn, scriptIn, "");
|
||||
if (result2.equals(result)) {
|
||||
return result.replace(null, null, "");
|
||||
} else if (favorRegionOk) {
|
||||
return result.replace(null, "", null);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static <V> StringBuilder show(Map<String,V> map, String indent, StringBuilder output) {
|
||||
String first = indent.isEmpty() ? "" : "\t";
|
||||
for (Entry<String,V> e : map.entrySet()) {
|
||||
String key = e.getKey();
|
||||
V value = e.getValue();
|
||||
output.append(first + (key.isEmpty() ? "∅" : key));
|
||||
if (value instanceof Map) {
|
||||
show((Map)value, indent+"\t", output);
|
||||
} else {
|
||||
output.append("\t" + Objects.toString(value)).append("\n");
|
||||
}
|
||||
first = indent;
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return show(langTable, "", new StringBuilder()).toString();
|
||||
}
|
||||
|
||||
// public static void main(String[] args) {
|
||||
// System.out.println(LSR.fromMaximalized(ULocale.ENGLISH));
|
||||
//
|
||||
// final Map<String, String> rawData = sdi.getLikelySubtags();
|
||||
// XLikelySubtags ls = XLikelySubtags.getDefault();
|
||||
// System.out.println(ls);
|
||||
// ls.maximize(new ULocale("iw"));
|
||||
// if (true) return;
|
||||
//
|
||||
// LanguageTagParser ltp = new LanguageTagParser();
|
||||
//
|
||||
// // get all the languages, scripts, and regions
|
||||
// Set<String> languages = new TreeSet<String>();
|
||||
// Set<String> scripts = new TreeSet<String>();
|
||||
// Set<String> regions = new TreeSet<String>();
|
||||
// Counter<String> languageCounter = new Counter<String>();
|
||||
// Counter<String> scriptCounter = new Counter<String>();
|
||||
// Counter<String> regionCounter = new Counter<String>();
|
||||
//
|
||||
// for (Entry<String, String> sourceTarget : rawData.entrySet()) {
|
||||
// final String source = sourceTarget.getKey();
|
||||
// ltp.set(source);
|
||||
// languages.add(ltp.getLanguage());
|
||||
// scripts.add(ltp.getScript());
|
||||
// regions.add(ltp.getRegion());
|
||||
// final String target = sourceTarget.getValue();
|
||||
// ltp.set(target);
|
||||
// add(target, languageCounter, ltp.getLanguage(), 1);
|
||||
// add(target, scriptCounter, ltp.getScript(), 1);
|
||||
// add(target, regionCounter, ltp.getRegion(), 1);
|
||||
// }
|
||||
// ltp.set("und-Zzzz-ZZ");
|
||||
// languageCounter.add(ltp.getLanguage(), 1);
|
||||
// scriptCounter.add(ltp.getScript(), 1);
|
||||
// regionCounter.add(ltp.getRegion(), 1);
|
||||
//
|
||||
// if (SHORT) {
|
||||
// removeSingletons(languages, languageCounter);
|
||||
// removeSingletons(scripts, scriptCounter);
|
||||
// removeSingletons(regions, regionCounter);
|
||||
// }
|
||||
//
|
||||
// System.out.println("languages: " + languages.size() + "\n\t" + languages + "\n\t" + languageCounter);
|
||||
// System.out.println("scripts: " + scripts.size() + "\n\t" + scripts + "\n\t" + scriptCounter);
|
||||
// System.out.println("regions: " + regions.size() + "\n\t" + regions + "\n\t" + regionCounter);
|
||||
//
|
||||
// int maxCount = Integer.MAX_VALUE;
|
||||
//
|
||||
// int counter = maxCount;
|
||||
// long tempTime = System.nanoTime();
|
||||
// newMax:
|
||||
// for (String language : languages) {
|
||||
// for (String script : scripts) {
|
||||
// for (String region : regions) {
|
||||
// if (--counter < 0) break newMax;
|
||||
// LSR result = ls.maximize(language, script, region);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// long newMaxTime = System.nanoTime() - tempTime;
|
||||
// System.out.println("newMaxTime: " + newMaxTime);
|
||||
//
|
||||
// counter = maxCount;
|
||||
// tempTime = System.nanoTime();
|
||||
// newMin:
|
||||
// for (String language : languages) {
|
||||
// for (String script : scripts) {
|
||||
// for (String region : regions) {
|
||||
// if (--counter < 0) break newMin;
|
||||
// LSR minNewS = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_SCRIPT);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// long newMinTime = System.nanoTime() - tempTime;
|
||||
// System.out.println("newMinTime: " + newMinTime);
|
||||
//
|
||||
// // *****
|
||||
//
|
||||
// tempTime = System.nanoTime();
|
||||
// counter = maxCount;
|
||||
// oldMax:
|
||||
// for (String language : languages) {
|
||||
// for (String script : scripts) {
|
||||
// for (String region : regions) {
|
||||
// if (--counter < 0) break oldMax;
|
||||
// ULocale tempLocale = new ULocale(language, script, region);
|
||||
// ULocale max = ULocale.addLikelySubtags(tempLocale);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// long oldMaxTime = System.nanoTime() - tempTime;
|
||||
// System.out.println("oldMaxTime: " + oldMaxTime + "\t" + oldMaxTime/newMaxTime + "x");
|
||||
//
|
||||
// counter = maxCount;
|
||||
// tempTime = System.nanoTime();
|
||||
// oldMin:
|
||||
// for (String language : languages) {
|
||||
// for (String script : scripts) {
|
||||
// for (String region : regions) {
|
||||
// if (--counter < 0) break oldMin;
|
||||
// ULocale tempLocale = new ULocale(language, script, region);
|
||||
// ULocale minOldS = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_SCRIPT);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// long oldMinTime = System.nanoTime() - tempTime;
|
||||
// System.out.println("oldMinTime: " + oldMinTime + "\t" + oldMinTime/newMinTime + "x");
|
||||
//
|
||||
// counter = maxCount;
|
||||
// testMain:
|
||||
// for (String language : languages) {
|
||||
// System.out.println(language);
|
||||
// int tests = 0;
|
||||
// for (String script : scripts) {
|
||||
// for (String region : regions) {
|
||||
// ++tests;
|
||||
// if (--counter < 0) break testMain;
|
||||
// LSR maxNew = ls.maximize(language, script, region);
|
||||
// LSR minNewS = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_SCRIPT);
|
||||
// LSR minNewR = ls.minimizeSubtags(language, script, region, Minimize.FAVOR_REGION);
|
||||
//
|
||||
// ULocale tempLocale = new ULocale(language, script, region);
|
||||
// ULocale maxOld = ULocale.addLikelySubtags(tempLocale);
|
||||
// ULocale minOldS = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_SCRIPT);
|
||||
// ULocale minOldR = ULocale.minimizeSubtags(tempLocale, Minimize.FAVOR_REGION);
|
||||
//
|
||||
// // check values
|
||||
// final String maxNewS = String.valueOf(maxNew);
|
||||
// final String maxOldS = maxOld.toLanguageTag();
|
||||
// boolean sameMax = maxOldS.equals(maxNewS);
|
||||
//
|
||||
// final String minNewSS = String.valueOf(minNewS);
|
||||
// final String minOldSS = minOldS.toLanguageTag();
|
||||
// boolean sameMinS = minNewSS.equals(minOldSS);
|
||||
//
|
||||
// final String minNewRS = String.valueOf(minNewR);
|
||||
// final String minOldRS = minOldS.toLanguageTag();
|
||||
// boolean sameMinR = minNewRS.equals(minOldRS);
|
||||
//
|
||||
// if (sameMax && sameMinS && sameMinR) continue;
|
||||
// System.out.println(new LSR(language, script, region)
|
||||
// + "\tmax: " + maxNew
|
||||
// + (sameMax ? "" : "≠" + maxOldS)
|
||||
// + "\tminS: " + minNewS
|
||||
// + (sameMinS ? "" : "≠" + minOldS)
|
||||
// + "\tminR: " + minNewR
|
||||
// + (sameMinR ? "" : "≠" + minOldR)
|
||||
// );
|
||||
// }
|
||||
// }
|
||||
// System.out.println(language + ": " + tests);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// private static void add(String target, Counter<String> languageCounter, String language, int count) {
|
||||
// if (language.equals("aa")) {
|
||||
// int debug = 0;
|
||||
// }
|
||||
// languageCounter.add(language, count);
|
||||
// }
|
||||
//
|
||||
// private static void removeSingletons(Set<String> languages, Counter<String> languageCounter) {
|
||||
// for (String s : languageCounter) {
|
||||
// final long count = languageCounter.get(s);
|
||||
// if (count <= 1) {
|
||||
// languages.remove(s);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
}
|
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,473 @@
|
|||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.locale;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
|
||||
import com.ibm.icu.impl.locale.XCldrStub.ImmutableMultimap;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.ImmutableSet;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.LinkedHashMultimap;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Multimap;
|
||||
import com.ibm.icu.impl.locale.XLikelySubtags.LSR;
|
||||
import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
|
||||
import com.ibm.icu.util.LocalePriorityList;
|
||||
import com.ibm.icu.util.Output;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
/**
|
||||
* Immutable class that picks best match between user's desired locales and application's supported locales.
|
||||
* @author markdavis
|
||||
*/
|
||||
public class XLocaleMatcher {
|
||||
private static final LSR UND = new LSR("und","","");
|
||||
private static final ULocale UND_LOCALE = new ULocale("und");
|
||||
|
||||
// normally the default values, but can be set via constructor
|
||||
|
||||
private final XLocaleDistance localeDistance;
|
||||
private final int thresholdDistance;
|
||||
private final int demotionPerAdditionalDesiredLocale;
|
||||
private final DistanceOption distanceOption;
|
||||
|
||||
// built based on application's supported languages in constructor
|
||||
|
||||
private final Map<LSR, Set<ULocale>> supportedLanguages; // the locales in the collection are ordered!
|
||||
private final Set<ULocale> exactSupportedLocales; // the locales in the collection are ordered!
|
||||
private final ULocale defaultLanguage;
|
||||
|
||||
|
||||
public static class Builder {
|
||||
private Set<ULocale> supportedLanguagesList;
|
||||
private int thresholdDistance = -1;
|
||||
private int demotionPerAdditionalDesiredLocale = -1;;
|
||||
private ULocale defaultLanguage;
|
||||
private XLocaleDistance localeDistance;
|
||||
private DistanceOption distanceOption;
|
||||
/**
|
||||
* @param languagePriorityList the languagePriorityList to set
|
||||
* @return
|
||||
*/
|
||||
public Builder setSupportedLocales(String languagePriorityList) {
|
||||
this.supportedLanguagesList = asSet(LocalePriorityList.add(languagePriorityList).build());
|
||||
return this;
|
||||
}
|
||||
public Builder setSupportedLocales(LocalePriorityList languagePriorityList) {
|
||||
this.supportedLanguagesList = asSet(languagePriorityList);
|
||||
return this;
|
||||
}
|
||||
public Builder setSupportedLocales(Set<ULocale> languagePriorityList) {
|
||||
this.supportedLanguagesList = languagePriorityList;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param thresholdDistance the thresholdDistance to set, with -1 = default
|
||||
* @return
|
||||
*/
|
||||
public Builder setThresholdDistance(int thresholdDistance) {
|
||||
this.thresholdDistance = thresholdDistance;
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
* @param demotionPerAdditionalDesiredLocale the demotionPerAdditionalDesiredLocale to set, with -1 = default
|
||||
* @return
|
||||
*/
|
||||
public Builder setDemotionPerAdditionalDesiredLocale(int demotionPerAdditionalDesiredLocale) {
|
||||
this.demotionPerAdditionalDesiredLocale = demotionPerAdditionalDesiredLocale;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param localeDistance the localeDistance to set, with default = XLocaleDistance.getDefault().
|
||||
* @return
|
||||
*/
|
||||
public Builder setLocaleDistance(XLocaleDistance localeDistance) {
|
||||
this.localeDistance = localeDistance;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the default language, with null = default = first supported language
|
||||
* @param defaultLanguage
|
||||
* @return
|
||||
*/
|
||||
public Builder setDefaultLanguage(ULocale defaultLanguage) {
|
||||
this.defaultLanguage = defaultLanguage;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* If true, then the language differences are smaller than than script differences.
|
||||
* This is used in situations (such as maps) where it is better to fall back to the same script than a similar language.
|
||||
* @param distanceOption
|
||||
* @return
|
||||
*/
|
||||
public Builder setDistanceOption(DistanceOption distanceOption) {
|
||||
this.distanceOption = distanceOption;
|
||||
return this;
|
||||
}
|
||||
|
||||
public XLocaleMatcher build() {
|
||||
return new XLocaleMatcher(this);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a builder used in chaining parameters for building a Locale Matcher.
|
||||
* @return
|
||||
*/
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
/** Convenience method */
|
||||
public XLocaleMatcher(String supportedLocales) {
|
||||
this(builder().setSupportedLocales(supportedLocales));
|
||||
}
|
||||
/** Convenience method */
|
||||
public XLocaleMatcher(LocalePriorityList supportedLocales) {
|
||||
this(builder().setSupportedLocales(supportedLocales));
|
||||
}
|
||||
/** Convenience method */
|
||||
public XLocaleMatcher(Set<ULocale> supportedLocales) {
|
||||
this(builder().setSupportedLocales(supportedLocales));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a locale matcher with the given parameters.
|
||||
* @param supportedLocales
|
||||
* @param thresholdDistance
|
||||
* @param demotionPerAdditionalDesiredLocale
|
||||
* @param localeDistance
|
||||
* @param likelySubtags
|
||||
*/
|
||||
private XLocaleMatcher(Builder builder) {
|
||||
localeDistance = builder.localeDistance == null ? XLocaleDistance.getDefault()
|
||||
: builder.localeDistance;
|
||||
thresholdDistance = builder.thresholdDistance < 0 ? localeDistance.getDefaultScriptDistance()
|
||||
: builder.thresholdDistance;
|
||||
// only do AFTER above are set
|
||||
Set<LSR> paradigms = extractLsrSet(localeDistance.getParadigms());
|
||||
final Multimap<LSR, ULocale> temp2 = extractLsrMap(builder.supportedLanguagesList, paradigms);
|
||||
supportedLanguages = temp2.asMap();
|
||||
exactSupportedLocales = ImmutableSet.copyOf(temp2.values());
|
||||
defaultLanguage = builder.defaultLanguage != null ? builder.defaultLanguage
|
||||
: supportedLanguages.isEmpty() ? null
|
||||
: supportedLanguages.entrySet().iterator().next().getValue().iterator().next(); // first language
|
||||
demotionPerAdditionalDesiredLocale = builder.demotionPerAdditionalDesiredLocale < 0 ? localeDistance.getDefaultRegionDistance()+1
|
||||
: builder.demotionPerAdditionalDesiredLocale;
|
||||
distanceOption = builder.distanceOption;
|
||||
}
|
||||
|
||||
// Result is not immutable!
|
||||
private Set<LSR> extractLsrSet(Set<ULocale> languagePriorityList) {
|
||||
Set<LSR> result = new LinkedHashSet<LSR>();
|
||||
for (ULocale item : languagePriorityList) {
|
||||
final LSR max = item.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(item);
|
||||
result.add(max);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private Multimap<LSR,ULocale> extractLsrMap(Set<ULocale> languagePriorityList, Set<LSR> priorities) {
|
||||
Multimap<LSR, ULocale> builder = LinkedHashMultimap.create();
|
||||
for (ULocale item : languagePriorityList) {
|
||||
final LSR max = item.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(item);
|
||||
builder.put(max, item);
|
||||
}
|
||||
if (builder.size() > 1 && priorities != null) {
|
||||
// for the supported list, we put any priorities before all others, except for the first.
|
||||
Multimap<LSR, ULocale> builder2 = LinkedHashMultimap.create();
|
||||
|
||||
// copy the long way so the priorities are in the same order as in the original
|
||||
boolean first = true;
|
||||
for (Entry<LSR, Set<ULocale>> entry : builder.asMap().entrySet()) {
|
||||
final LSR key = entry.getKey();
|
||||
if (first || priorities.contains(key)) {
|
||||
builder2.putAll(key, entry.getValue());
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
// now copy the rest
|
||||
builder2.putAll(builder);
|
||||
if (!builder2.equals(builder)) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
builder = builder2;
|
||||
}
|
||||
return ImmutableMultimap.copyOf(builder);
|
||||
}
|
||||
|
||||
|
||||
/** Convenience method */
|
||||
public ULocale getBestMatch(ULocale ulocale) {
|
||||
return getBestMatch(ulocale, null);
|
||||
}
|
||||
/** Convenience method */
|
||||
public ULocale getBestMatch(String languageList) {
|
||||
return getBestMatch(LocalePriorityList.add(languageList).build(), null);
|
||||
}
|
||||
/** Convenience method */
|
||||
public ULocale getBestMatch(ULocale... locales) {
|
||||
return getBestMatch(new LinkedHashSet<ULocale>(Arrays.asList(locales)), null);
|
||||
}
|
||||
/** Convenience method */
|
||||
public ULocale getBestMatch(Set<ULocale> desiredLanguages) {
|
||||
return getBestMatch(desiredLanguages, null);
|
||||
}
|
||||
/** Convenience method */
|
||||
public ULocale getBestMatch(LocalePriorityList desiredLanguages) {
|
||||
return getBestMatch(desiredLanguages, null);
|
||||
}
|
||||
/** Convenience method */
|
||||
public ULocale getBestMatch(LocalePriorityList desiredLanguages, Output<ULocale> outputBestDesired) {
|
||||
return getBestMatch(asSet(desiredLanguages), outputBestDesired);
|
||||
}
|
||||
|
||||
// TODO add LocalePriorityList method asSet() for ordered Set view backed by LocalePriorityList
|
||||
private static Set<ULocale> asSet(LocalePriorityList languageList) {
|
||||
Set<ULocale> temp = new LinkedHashSet<ULocale>(); // maintain order
|
||||
for (ULocale locale : languageList) {
|
||||
temp.add(locale);
|
||||
};
|
||||
return temp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the best match between the desired languages and supported languages
|
||||
* @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first.
|
||||
* @param outputBestDesired The one of the desired languages that matched best.
|
||||
* Set to null if the best match was not below the threshold distance.
|
||||
* @return
|
||||
*/
|
||||
public ULocale getBestMatch(Set<ULocale> desiredLanguages, Output<ULocale> outputBestDesired) {
|
||||
// fast path for singleton
|
||||
if (desiredLanguages.size() == 1) {
|
||||
return getBestMatch(desiredLanguages.iterator().next(), outputBestDesired);
|
||||
}
|
||||
// TODO produce optimized version for single desired ULocale
|
||||
Multimap<LSR, ULocale> desiredLSRs = extractLsrMap(desiredLanguages,null);
|
||||
int bestDistance = Integer.MAX_VALUE;
|
||||
ULocale bestDesiredLocale = null;
|
||||
Collection<ULocale> bestSupportedLocales = null;
|
||||
int delta = 0;
|
||||
mainLoop:
|
||||
for (final Entry<LSR, ULocale> desiredLsrAndLocale : desiredLSRs.entries()) {
|
||||
// quick check for exact match
|
||||
ULocale desiredLocale = desiredLsrAndLocale.getValue();
|
||||
LSR desiredLSR = desiredLsrAndLocale.getKey();
|
||||
if (delta < bestDistance) {
|
||||
if (exactSupportedLocales.contains(desiredLocale)) {
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = desiredLocale;
|
||||
}
|
||||
return desiredLocale;
|
||||
}
|
||||
// quick check for maximized locale
|
||||
Collection<ULocale> found = supportedLanguages.get(desiredLSR);
|
||||
if (found != null) {
|
||||
// if we find one in the set, return first (lowest). We already know the exact one isn't there.
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = desiredLocale;
|
||||
}
|
||||
return found.iterator().next();
|
||||
}
|
||||
}
|
||||
for (final Entry<LSR, Set<ULocale>> supportedLsrAndLocale : supportedLanguages.entrySet()) {
|
||||
int distance = delta + localeDistance.distanceRaw(desiredLSR, supportedLsrAndLocale.getKey(),
|
||||
thresholdDistance, distanceOption);
|
||||
if (distance < bestDistance) {
|
||||
bestDistance = distance;
|
||||
bestDesiredLocale = desiredLocale;
|
||||
bestSupportedLocales = supportedLsrAndLocale.getValue();
|
||||
if (distance == 0) {
|
||||
break mainLoop;
|
||||
}
|
||||
}
|
||||
}
|
||||
delta += demotionPerAdditionalDesiredLocale;
|
||||
}
|
||||
if (bestDistance >= thresholdDistance) {
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = null;
|
||||
}
|
||||
return defaultLanguage;
|
||||
}
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = bestDesiredLocale;
|
||||
}
|
||||
// pick exact match if there is one
|
||||
if (bestSupportedLocales.contains(bestDesiredLocale)) {
|
||||
return bestDesiredLocale;
|
||||
}
|
||||
// otherwise return first supported, combining variants and extensions from bestDesired
|
||||
return bestSupportedLocales.iterator().next();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the best match between the desired languages and supported languages
|
||||
* @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first.
|
||||
* @param outputBestDesired The one of the desired languages that matched best.
|
||||
* Set to null if the best match was not below the threshold distance.
|
||||
* @return
|
||||
*/
|
||||
public ULocale getBestMatch(ULocale desiredLocale, Output<ULocale> outputBestDesired) {
|
||||
int bestDistance = Integer.MAX_VALUE;
|
||||
ULocale bestDesiredLocale = null;
|
||||
Collection<ULocale> bestSupportedLocales = null;
|
||||
|
||||
// quick check for exact match, with hack for und
|
||||
final LSR desiredLSR = desiredLocale.equals(UND_LOCALE) ? UND : LSR.fromMaximalized(desiredLocale);
|
||||
|
||||
if (exactSupportedLocales.contains(desiredLocale)) {
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = desiredLocale;
|
||||
}
|
||||
return desiredLocale;
|
||||
}
|
||||
// quick check for maximized locale
|
||||
if (distanceOption == DistanceOption.NORMAL) {
|
||||
Collection<ULocale> found = supportedLanguages.get(desiredLSR);
|
||||
if (found != null) {
|
||||
// if we find one in the set, return first (lowest). We already know the exact one isn't there.
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = desiredLocale;
|
||||
}
|
||||
return found.iterator().next();
|
||||
}
|
||||
}
|
||||
for (final Entry<LSR, Set<ULocale>> supportedLsrAndLocale : supportedLanguages.entrySet()) {
|
||||
int distance = localeDistance.distanceRaw(desiredLSR, supportedLsrAndLocale.getKey(),
|
||||
thresholdDistance, distanceOption);
|
||||
if (distance < bestDistance) {
|
||||
bestDistance = distance;
|
||||
bestDesiredLocale = desiredLocale;
|
||||
bestSupportedLocales = supportedLsrAndLocale.getValue();
|
||||
if (distance == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (bestDistance >= thresholdDistance) {
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = null;
|
||||
}
|
||||
return defaultLanguage;
|
||||
}
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = bestDesiredLocale;
|
||||
}
|
||||
// pick exact match if there is one
|
||||
if (bestSupportedLocales.contains(bestDesiredLocale)) {
|
||||
return bestDesiredLocale;
|
||||
}
|
||||
// otherwise return first supported, combining variants and extensions from bestDesired
|
||||
return bestSupportedLocales.iterator().next();
|
||||
}
|
||||
|
||||
/** Combine features of the desired locale into those of the supported, and return result. */
|
||||
public static ULocale combine(ULocale bestSupported, ULocale bestDesired) {
|
||||
// for examples of extensions, variants, see
|
||||
// http://unicode.org/repos/cldr/tags/latest/common/bcp47/
|
||||
// http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml
|
||||
|
||||
if (!bestSupported.equals(bestDesired) && bestDesired != null) {
|
||||
// add region, variants, extensions
|
||||
ULocale.Builder b = new ULocale.Builder().setLocale(bestSupported);
|
||||
|
||||
// copy the region from the desired, if there is one
|
||||
String region = bestDesired.getCountry();
|
||||
if (!region.isEmpty()) {
|
||||
b.setRegion(region);
|
||||
}
|
||||
|
||||
// copy the variants from desired, if there is one
|
||||
// note that this will override any subvariants. Eg "sco-ulster-fonipa" + "…-fonupa" => "sco-fonupa" (nuking ulster)
|
||||
String variants = bestDesired.getVariant();
|
||||
if (!variants.isEmpty()) {
|
||||
b.setVariant(variants);
|
||||
}
|
||||
|
||||
// copy the extensions from desired, if there are any
|
||||
// note that this will override any subkeys. Eg "th-u-nu-latn-ca-buddhist" + "…-u-nu-native" => "th-u-nu-native" (nuking calendar)
|
||||
for (char extensionKey : bestDesired.getExtensionKeys()) {
|
||||
b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
|
||||
}
|
||||
bestSupported = b.build();
|
||||
}
|
||||
return bestSupported;
|
||||
}
|
||||
|
||||
/** Returns the distance between the two languages. The values are not necessarily symmetric.
|
||||
* @param desired A locale desired by the user
|
||||
* @param supported A locale supported by a program.
|
||||
* @return A return of 0 is a complete match, and 100 is a failure case (above the thresholdDistance).
|
||||
* A language is first maximized with add likely subtags, then compared.
|
||||
*/
|
||||
public int distance(ULocale desired, ULocale supported) {
|
||||
return localeDistance.distanceRaw(
|
||||
LSR.fromMaximalized(desired),
|
||||
LSR.fromMaximalized(supported), thresholdDistance, distanceOption);
|
||||
}
|
||||
|
||||
/** Convenience method */
|
||||
public int distance(String desiredLanguage, String supportedLanguage) {
|
||||
return localeDistance.distanceRaw(
|
||||
LSR.fromMaximalized(new ULocale(desiredLanguage)),
|
||||
LSR.fromMaximalized(new ULocale(supportedLanguage)),
|
||||
thresholdDistance, distanceOption);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return exactSupportedLocales.toString();
|
||||
}
|
||||
|
||||
/** Return the inverse of the distance: that is, 1-distance(desired, supported) */
|
||||
public double match(ULocale desired, ULocale supported) {
|
||||
return (100-distance(desired, supported))/100.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a fraction between 0 and 1, where 1 means that the languages are a
|
||||
* perfect match, and 0 means that they are completely different. This is (100-distance(desired, supported))/100.0.
|
||||
* <br>Note that
|
||||
* the precise values may change over time; no code should be made dependent
|
||||
* on the values remaining constant.
|
||||
* @param desired Desired locale
|
||||
* @param desiredMax Maximized locale (using likely subtags)
|
||||
* @param supported Supported locale
|
||||
* @param supportedMax Maximized locale (using likely subtags)
|
||||
* @return value between 0 and 1, inclusive.
|
||||
* @deprecated Use the form with 2 parameters instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
|
||||
return match(desired, supported);
|
||||
}
|
||||
|
||||
/**
|
||||
* Canonicalize a locale (language). Note that for now, it is canonicalizing
|
||||
* according to CLDR conventions (he vs iw, etc), since that is what is needed
|
||||
* for likelySubtags.
|
||||
* @param ulocale language/locale code
|
||||
* @return ULocale with remapped subtags.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
public ULocale canonicalize(ULocale ulocale) {
|
||||
// TODO
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the thresholdDistance. Any distance above this value is treated as a match failure.
|
||||
*/
|
||||
public int getThresholdDistance() {
|
||||
return thresholdDistance;
|
||||
}
|
||||
}
|
|
@ -25,19 +25,22 @@ import com.ibm.icu.impl.Relation;
|
|||
import com.ibm.icu.impl.Row;
|
||||
import com.ibm.icu.impl.Row.R3;
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
|
||||
import com.ibm.icu.impl.locale.XLocaleMatcher;
|
||||
import com.ibm.icu.impl.locale.XLocaleMatcher.Builder;
|
||||
|
||||
/**
|
||||
* Provides a way to match the languages (locales) supported by a product to the
|
||||
* languages (locales) acceptable to a user, and get the best match. For
|
||||
* example:
|
||||
*
|
||||
*
|
||||
* <pre>
|
||||
* LocaleMatcher matcher = new LocaleMatcher("fr, en-GB, en");
|
||||
*
|
||||
*
|
||||
* // afterwards:
|
||||
* matcher.getBestMatch("en-US").toLanguageTag() => "en"
|
||||
* </pre>
|
||||
*
|
||||
*
|
||||
* It takes into account when languages are close to one another, such as fil
|
||||
* and tl, and when language regional variants are close, like en-GB and en-AU.
|
||||
* It also handles scripts, like zh-Hant vs zh-TW. For examples, see the test
|
||||
|
@ -46,7 +49,7 @@ import com.ibm.icu.impl.Utility;
|
|||
* product will just need one static instance, built with the languages
|
||||
* that it supports. However, it may want multiple instances with different
|
||||
* default languages based on additional information, such as the domain.
|
||||
*
|
||||
*
|
||||
* @author markdavis@google.com
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
|
@ -83,7 +86,7 @@ public class LocaleMatcher {
|
|||
* threshold, that default language is chosen. Typically the default is English,
|
||||
* but it could be different based on additional information, such as the domain
|
||||
* of the page.
|
||||
*
|
||||
*
|
||||
* @param languagePriorityList weighted list
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
|
@ -94,7 +97,7 @@ public class LocaleMatcher {
|
|||
/**
|
||||
* Create a new language matcher from a String form. The highest-weighted
|
||||
* language is the default.
|
||||
*
|
||||
*
|
||||
* @param languagePriorityListString String form of LanguagePriorityList
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
|
@ -124,6 +127,7 @@ public class LocaleMatcher {
|
|||
@Deprecated
|
||||
public LocaleMatcher(LocalePriorityList languagePriorityList, LanguageMatcherData matcherData, double threshold) {
|
||||
this.matcherData = matcherData == null ? defaultWritten : matcherData.freeze();
|
||||
this.languagePriorityList = languagePriorityList;
|
||||
for (final ULocale language : languagePriorityList) {
|
||||
add(language, languagePriorityList.getWeight(language));
|
||||
}
|
||||
|
@ -179,7 +183,7 @@ public class LocaleMatcher {
|
|||
|
||||
/**
|
||||
* Get the best match for a LanguagePriorityList
|
||||
*
|
||||
*
|
||||
* @param languageList list to match
|
||||
* @return best matching language code
|
||||
* @stable ICU 4.4
|
||||
|
@ -206,7 +210,7 @@ public class LocaleMatcher {
|
|||
|
||||
/**
|
||||
* Convenience method: Get the best match for a LanguagePriorityList
|
||||
*
|
||||
*
|
||||
* @param languageList String form of language priority list
|
||||
* @return best matching language code
|
||||
* @stable ICU 4.4
|
||||
|
@ -217,7 +221,7 @@ public class LocaleMatcher {
|
|||
|
||||
/**
|
||||
* Get the best match for an individual language code.
|
||||
*
|
||||
*
|
||||
* @param ulocale locale/language code to match
|
||||
* @return best matching language code
|
||||
* @stable ICU 4.4
|
||||
|
@ -241,14 +245,14 @@ public class LocaleMatcher {
|
|||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return "{" + defaultLanguage + ", "
|
||||
return "{" + defaultLanguage + ", "
|
||||
+ localeToMaxLocaleAndWeight + "}";
|
||||
}
|
||||
// ================= Privates =====================
|
||||
|
||||
/**
|
||||
* Get the best match for an individual language code.
|
||||
*
|
||||
*
|
||||
* @param languageCode
|
||||
* @return best matching language code and weight (as per
|
||||
* {@link #match(ULocale, ULocale)})
|
||||
|
@ -291,9 +295,9 @@ public class LocaleMatcher {
|
|||
}
|
||||
return bestTableMatch;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @internal
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
*/
|
||||
@Deprecated
|
||||
|
@ -309,7 +313,7 @@ public class LocaleMatcher {
|
|||
}
|
||||
|
||||
/**
|
||||
* We preprocess the data to get just the possible matches for each desired base language.
|
||||
* We preprocess the data to get just the possible matches for each desired base language.
|
||||
*/
|
||||
private void processMapping() {
|
||||
for (Entry<String, Set<String>> desiredToMatchingLanguages : matcherData.matchingLanguages().keyValuesSet()) {
|
||||
|
@ -343,7 +347,7 @@ public class LocaleMatcher {
|
|||
}
|
||||
|
||||
Set<Row.R3<ULocale, ULocale, Double>> localeToMaxLocaleAndWeight = new LinkedHashSet<Row.R3<ULocale, ULocale, Double>>();
|
||||
Map<String,Set<Row.R3<ULocale, ULocale, Double>>> desiredLanguageToPossibleLocalesToMaxLocaleToData
|
||||
Map<String,Set<Row.R3<ULocale, ULocale, Double>>> desiredLanguageToPossibleLocalesToMaxLocaleToData
|
||||
= new LinkedHashMap<String,Set<Row.R3<ULocale, ULocale, Double>>>();
|
||||
|
||||
// =============== Special Mapping Information ==============
|
||||
|
@ -444,6 +448,7 @@ public class LocaleMatcher {
|
|||
return (region == null ? "*" : region);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
String result = getLanguage();
|
||||
if (level != Level.language) {
|
||||
|
@ -487,7 +492,7 @@ public class LocaleMatcher {
|
|||
|
||||
enum Level {
|
||||
language(0.99),
|
||||
script(0.2),
|
||||
script(0.2),
|
||||
region(0.04);
|
||||
|
||||
final double worst;
|
||||
|
@ -527,7 +532,7 @@ public class LocaleMatcher {
|
|||
}
|
||||
}
|
||||
|
||||
double getScore(ULocale dMax, String desiredRaw, String desiredMax,
|
||||
double getScore(ULocale dMax, String desiredRaw, String desiredMax,
|
||||
ULocale sMax, String supportedRaw, String supportedMax) {
|
||||
double distance = 0;
|
||||
if (!desiredMax.equals(supportedMax)) {
|
||||
|
@ -543,7 +548,7 @@ public class LocaleMatcher {
|
|||
System.out.println("\t\t\t" + level + " Raw Score:\t" + desiredLocale + ";\t" + supportedLocale);
|
||||
}
|
||||
for (R3<LocalePatternMatcher,LocalePatternMatcher,Double> datum : scores) { // : result
|
||||
if (datum.get0().matches(desiredLocale)
|
||||
if (datum.get0().matches(desiredLocale)
|
||||
&& datum.get1().matches(supportedLocale)) {
|
||||
if (DEBUG) {
|
||||
System.out.println("\t\t\t\tFOUND\t" + datum);
|
||||
|
@ -557,6 +562,7 @@ public class LocaleMatcher {
|
|||
return level.worst;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder result = new StringBuilder().append(level);
|
||||
for (R3<LocalePatternMatcher, LocalePatternMatcher, Double> score : scores) {
|
||||
|
@ -566,6 +572,7 @@ public class LocaleMatcher {
|
|||
}
|
||||
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public ScoreData cloneAsThawed() {
|
||||
try {
|
||||
|
@ -581,10 +588,12 @@ public class LocaleMatcher {
|
|||
|
||||
private volatile boolean frozen = false;
|
||||
|
||||
@Override
|
||||
public ScoreData freeze() {
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isFrozen() {
|
||||
return frozen;
|
||||
}
|
||||
|
@ -638,6 +647,7 @@ public class LocaleMatcher {
|
|||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
*/
|
||||
@Override
|
||||
@Deprecated
|
||||
public String toString() {
|
||||
return languageScores + "\n\t" + scriptScores + "\n\t" + regionScores;
|
||||
|
@ -746,11 +756,12 @@ public class LocaleMatcher {
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
*/
|
||||
@Override
|
||||
@Deprecated
|
||||
public LanguageMatcherData cloneAsThawed() {
|
||||
LanguageMatcherData result;
|
||||
|
@ -766,11 +777,12 @@ public class LocaleMatcher {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
*/
|
||||
@Override
|
||||
@Deprecated
|
||||
public LanguageMatcherData freeze() {
|
||||
languageScores.freeze();
|
||||
|
@ -781,11 +793,12 @@ public class LocaleMatcher {
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
*/
|
||||
@Override
|
||||
@Deprecated
|
||||
public boolean isFrozen() {
|
||||
return frozen;
|
||||
|
@ -793,6 +806,7 @@ public class LocaleMatcher {
|
|||
}
|
||||
|
||||
LanguageMatcherData matcherData;
|
||||
LocalePriorityList languagePriorityList;
|
||||
|
||||
private static final LanguageMatcherData defaultWritten;
|
||||
|
||||
|
@ -845,4 +859,84 @@ public class LocaleMatcher {
|
|||
final LocaleMatcher matcher = new LocaleMatcher("");
|
||||
return matcher.match(a, matcher.addLikelySubtags(a), b, matcher.addLikelySubtags(b));
|
||||
}
|
||||
|
||||
transient XLocaleMatcher xLocaleMatcher = null;
|
||||
transient ULocale xDefaultLanguage = null;
|
||||
transient boolean xFavorScript = false;
|
||||
|
||||
/*
|
||||
* Returns the distance between the two languages, using the new CLDR syntax (see getBestMatch).
|
||||
* The values are not necessarily symmetric.
|
||||
* @param desired A locale desired by the user
|
||||
* @param supported A locale supported by a program.
|
||||
* @return A return of 0 is a complete match, and 100 is a complete mismatch (above the thresholdDistance).
|
||||
* A language is first maximized with add likely subtags, then compared.
|
||||
* @internal
|
||||
* @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
|
||||
*/
|
||||
@Deprecated
|
||||
public int distance(ULocale desired, ULocale supported) {
|
||||
return getLocaleMatcher().distance(desired, supported);
|
||||
}
|
||||
|
||||
private synchronized XLocaleMatcher getLocaleMatcher() {
|
||||
if (xLocaleMatcher == null) {
|
||||
Builder builder = XLocaleMatcher.builder();
|
||||
builder.setSupportedLocales(languagePriorityList);
|
||||
if (xDefaultLanguage != null) {
|
||||
builder.setDefaultLanguage(xDefaultLanguage);
|
||||
}
|
||||
if (xFavorScript) {
|
||||
builder.setDistanceOption(DistanceOption.SCRIPT_FIRST);
|
||||
}
|
||||
xLocaleMatcher = builder.build();
|
||||
}
|
||||
return xLocaleMatcher;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the best match between the desired languages and supported languages
|
||||
* This supports the new CLDR syntax to provide for better matches within
|
||||
* regional clusters (such as maghreb Arabic vs non-maghreb Arabic, or regions that use en-GB vs en-US)
|
||||
* and also matching between regions and macroregions, such as comparing es-419 to es-AR).
|
||||
* @param desiredLanguages Typically the supplied user's languages, in order of preference, with best first.
|
||||
* @param outputBestDesired The one of the desired languages that matched best.
|
||||
* Set to null if the best match was not below the threshold distance.
|
||||
* @return best-match supported language
|
||||
* @internal
|
||||
* @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
|
||||
*/
|
||||
@Deprecated
|
||||
public ULocale getBestMatch(LinkedHashSet<ULocale> desiredLanguages, Output<ULocale> outputBestDesired) {
|
||||
return getLocaleMatcher().getBestMatch(desiredLanguages, outputBestDesired);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the default language, with null = default = first supported language
|
||||
* @param defaultLanguage Language to use in case the threshold for distance is exceeded.
|
||||
* @return this, for chaining
|
||||
* @internal
|
||||
* @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
|
||||
*/
|
||||
@Deprecated
|
||||
public synchronized LocaleMatcher setDefaultLanguage(ULocale defaultLanguage) {
|
||||
this.xDefaultLanguage = defaultLanguage;
|
||||
xLocaleMatcher = null;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* If true, then the language differences are smaller than than script differences.
|
||||
* This is used in situations (such as maps) where it is better to fall back to the same script than a similar language.
|
||||
* @param favorScript Set to true to treat script as most important.
|
||||
* @return this, for chaining.
|
||||
* @internal
|
||||
* @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
|
||||
*/
|
||||
@Deprecated
|
||||
public synchronized LocaleMatcher setFavorScript(boolean favorScript) {
|
||||
this.xFavorScript = favorScript;
|
||||
xLocaleMatcher = null;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,187 @@
|
|||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import com.ibm.icu.dev.test.AbstractTestLog;
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.dev.util.CollectionUtilities;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.FileUtilities;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Splitter;
|
||||
import com.ibm.icu.util.ICUUncheckedIOException;
|
||||
|
||||
abstract public class DataDrivenTestHelper {
|
||||
|
||||
public static final List<String> DEBUG_LINE = Collections.singletonList("@debug");
|
||||
public static final Splitter SEMICOLON = Splitter.on(';').trimResults();
|
||||
public static final Splitter EQUAL_SPLIT = Splitter.on('=').trimResults();
|
||||
public static final String SEPARATOR = " ; \t";
|
||||
|
||||
protected TestFmwk framework = null;
|
||||
protected int minArgumentCount = 3;
|
||||
protected int maxArgumentCount = 4;
|
||||
private List<List<String>> lines = new ArrayList<List<String>>();
|
||||
private List<String> comments = new ArrayList<String>();
|
||||
|
||||
public DataDrivenTestHelper setFramework(TestFmwk testFramework) {
|
||||
this.framework = testFramework;
|
||||
return this;
|
||||
}
|
||||
|
||||
public <T extends Appendable> T appendLines(T out) {
|
||||
try {
|
||||
for (int i = 0; i < lines.size(); ++i) {
|
||||
List<String> components = lines.get(i);
|
||||
String comment = comments.get(i);
|
||||
if (components.isEmpty()) {
|
||||
if(!comment.isEmpty()) {
|
||||
out.append("# ").append(comment);
|
||||
}
|
||||
} else {
|
||||
String first = components.iterator().next();
|
||||
String sep = first.startsWith("@") ? "=" : SEPARATOR;
|
||||
out.append(CollectionUtilities.join(components, sep));
|
||||
if (!comment.isEmpty()) {
|
||||
out.append("\t# ").append(comment);
|
||||
}
|
||||
}
|
||||
out.append('\n');
|
||||
}
|
||||
return out;
|
||||
} catch (IOException e) {
|
||||
throw new ICUUncheckedIOException(e);
|
||||
}
|
||||
}
|
||||
|
||||
protected DataDrivenTestHelper addLine(List<String> arguments, String commentBase) {
|
||||
lines.add(Collections.unmodifiableList(arguments));
|
||||
comments.add(commentBase);
|
||||
return this;
|
||||
}
|
||||
|
||||
public DataDrivenTestHelper run(Class<?> classFileIsRelativeTo, String file) {
|
||||
return load(classFileIsRelativeTo, file)
|
||||
.test();
|
||||
}
|
||||
|
||||
public boolean isTestLine(List<String> arguments) {
|
||||
return !arguments.isEmpty() && !arguments.equals(DEBUG_LINE);
|
||||
}
|
||||
|
||||
public DataDrivenTestHelper test() {
|
||||
boolean breakpoint = false;
|
||||
for (int i = 0; i < lines.size(); ++i) {
|
||||
List<String> arguments = lines.get(i);
|
||||
String comment = comments.get(i);
|
||||
if (arguments.isEmpty()) {
|
||||
if (!comment.isEmpty()) {
|
||||
AbstractTestLog.logln(comment);
|
||||
}
|
||||
continue;
|
||||
} else if (arguments.equals(DEBUG_LINE)) {
|
||||
breakpoint = true;
|
||||
continue;
|
||||
} else {
|
||||
String first = arguments.get(0);
|
||||
if (first.startsWith("@")) {
|
||||
handleParams(comment, arguments);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
try {
|
||||
handle(i, breakpoint, comment, arguments);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
AbstractTestLog.errln("Illegal data test file entry (" + i + "): " + arguments + " # " + comment);
|
||||
}
|
||||
breakpoint = false;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public DataDrivenTestHelper load(Class<?> classFileIsRelativeTo, String file) {
|
||||
BufferedReader in = null;
|
||||
try {
|
||||
in = FileUtilities.openFile(classFileIsRelativeTo, file);
|
||||
//boolean breakpoint = false;
|
||||
|
||||
while (true) {
|
||||
String line = in.readLine();
|
||||
if (line == null) {
|
||||
break;
|
||||
}
|
||||
line = line.trim();
|
||||
if (line.isEmpty()) {
|
||||
addLine(Collections.<String>emptyList(), "");
|
||||
continue;
|
||||
}
|
||||
int hash = line.indexOf('#');
|
||||
String comment = "";
|
||||
String commentBase = "";
|
||||
if (hash >= 0) {
|
||||
commentBase = line.substring(hash+1).trim();
|
||||
line = line.substring(0,hash).trim();
|
||||
comment = "# " + commentBase;
|
||||
if (!line.isEmpty()) {
|
||||
comment = "\t" + comment;
|
||||
}
|
||||
}
|
||||
if (line.isEmpty()) {
|
||||
addLine(Collections.<String>emptyList(), commentBase);
|
||||
continue;
|
||||
}
|
||||
if (line.startsWith("@")) {
|
||||
List<String> keyValue = EQUAL_SPLIT.splitToList(line);
|
||||
addLine(keyValue, comment);
|
||||
continue;
|
||||
}
|
||||
List<String> arguments = SEMICOLON.splitToList(line);
|
||||
if (arguments.size() < minArgumentCount || arguments.size() > maxArgumentCount) {
|
||||
AbstractTestLog.errln("Malformed data line:" + line + comment);
|
||||
continue;
|
||||
}
|
||||
addLine(arguments, commentBase);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new ICUUncheckedIOException(e);
|
||||
} finally {
|
||||
if (in != null) {
|
||||
try {
|
||||
in.close();
|
||||
} catch (IOException e) {
|
||||
throw new ICUUncheckedIOException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
lines = Collections.unmodifiableList(lines); // should do deep unmodifiable...
|
||||
comments = Collections.unmodifiableList(comments);
|
||||
return this;
|
||||
}
|
||||
|
||||
protected boolean assertEquals(String message, Object expected, Object actual) {
|
||||
return TestFmwk.handleAssert(Objects.equals(expected, actual), message, stringFor(expected), stringFor(actual), null, false);
|
||||
}
|
||||
|
||||
private final String stringFor(Object obj) {
|
||||
return obj == null ? "null"
|
||||
: obj instanceof String ? "\"" + obj + '"'
|
||||
: obj instanceof Number ? String.valueOf(obj)
|
||||
: obj.getClass().getName() + "<" + obj + ">";
|
||||
}
|
||||
|
||||
abstract public void handle(int lineNumber, boolean breakpoint, String commentBase, List<String> arguments);
|
||||
|
||||
public void handleParams(String comment, List<String> arguments) {
|
||||
throw new IllegalArgumentException("Unrecognized parameter: " + arguments);
|
||||
}
|
||||
|
||||
public List<List<String>> getLines() {
|
||||
return lines;
|
||||
}
|
||||
}
|
|
@ -9,6 +9,8 @@
|
|||
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
|
@ -18,11 +20,12 @@ import com.ibm.icu.dev.test.TestFmwk;
|
|||
import com.ibm.icu.util.LocaleMatcher;
|
||||
import com.ibm.icu.util.LocaleMatcher.LanguageMatcherData;
|
||||
import com.ibm.icu.util.LocalePriorityList;
|
||||
import com.ibm.icu.util.Output;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
/**
|
||||
* Test the LocaleMatcher.
|
||||
*
|
||||
*
|
||||
* @author markdavis
|
||||
*/
|
||||
@SuppressWarnings("deprecation")
|
||||
|
@ -490,7 +493,7 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
LocaleMatcher matcher;
|
||||
matcher = new LocaleMatcher("mul, nl");
|
||||
assertEquals("nl", matcher.getBestMatch("af").toString()); // af => nl
|
||||
|
||||
|
||||
matcher = new LocaleMatcher("mul, af");
|
||||
assertEquals("mul", matcher.getBestMatch("nl").toString()); // but nl !=> af
|
||||
}
|
||||
|
@ -618,7 +621,7 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
}
|
||||
}
|
||||
|
||||
private long timeLocaleMatcher(String title, String desired, LocaleMatcher matcher,
|
||||
private long timeLocaleMatcher(String title, String desired, LocaleMatcher matcher,
|
||||
boolean showmessage, int iterations, long comparisonTime) {
|
||||
long start = System.nanoTime();
|
||||
for (int i = iterations; i > 0; --i) {
|
||||
|
@ -629,11 +632,36 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
+ (comparisonTime > 0 ? (delta * 100 / comparisonTime - 100) + "% longer" : ""));
|
||||
return delta;
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void Test8288() {
|
||||
final LocaleMatcher matcher = newLocaleMatcher("it, en");
|
||||
assertEquals("it", matcher.getBestMatch("und").toString());
|
||||
assertEquals("en", matcher.getBestMatch("und, en").toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestTechPreview() {
|
||||
final LocaleMatcher matcher = newLocaleMatcher("it, en, ru");
|
||||
ULocale und = new ULocale("und");
|
||||
ULocale bulgarian = new ULocale("bg");
|
||||
ULocale russian = new ULocale("ru");
|
||||
|
||||
assertEquals("es-419/MX", 4, matcher.distance(new ULocale("es","419"), new ULocale("es","MX")));
|
||||
assertEquals("es-ES/DE", 4, matcher.distance(new ULocale("es","DE"), new ULocale("es","ES")));
|
||||
|
||||
Output<ULocale> outputBestDesired = new Output<ULocale>();
|
||||
|
||||
ULocale best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, ULocale.GERMAN)), outputBestDesired);
|
||||
assertEquals(ULocale.ITALIAN, best);
|
||||
assertEquals(null, outputBestDesired.value);
|
||||
|
||||
matcher.setDefaultLanguage(ULocale.JAPANESE);
|
||||
best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, ULocale.GERMAN)), outputBestDesired);
|
||||
assertEquals(ULocale.JAPANESE, best);
|
||||
|
||||
matcher.setFavorScript(true);
|
||||
best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, bulgarian)), outputBestDesired);
|
||||
assertEquals(russian, best);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,206 @@
|
|||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.impl.locale.XLikelySubtags.LSR;
|
||||
import com.ibm.icu.impl.locale.XLocaleDistance;
|
||||
import com.ibm.icu.impl.locale.XLocaleDistance.DistanceNode;
|
||||
import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
|
||||
import com.ibm.icu.impl.locale.XLocaleDistance.DistanceTable;
|
||||
import com.ibm.icu.util.LocaleMatcher;
|
||||
import com.ibm.icu.util.Output;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
/**
|
||||
* Test the XLocaleDistance.
|
||||
*
|
||||
* @author markdavis
|
||||
*/
|
||||
public class XLocaleDistanceTest extends TestFmwk {
|
||||
private static final boolean REFORMAT = false; // set to true to get a reformatted data file listed
|
||||
|
||||
public static final int FAIL = XLocaleDistance.ABOVE_THRESHOLD;
|
||||
|
||||
private XLocaleDistance localeMatcher = XLocaleDistance.getDefault();
|
||||
DataDrivenTestHelper tfh = new MyTestFileHandler()
|
||||
.setFramework(this)
|
||||
.load(XLocaleDistanceTest.class, "data/localeDistanceTest.txt");
|
||||
|
||||
static class Arguments {
|
||||
final ULocale desired;
|
||||
final ULocale supported;
|
||||
final int desiredToSupported;
|
||||
final int supportedToDesired;
|
||||
|
||||
public Arguments(List<String> args) {
|
||||
this.desired = new ULocale.Builder().setLanguageTag(args.get(0)).build(); // use more complicated expression to check syntax
|
||||
this.supported = new ULocale.Builder().setLanguageTag(args.get(1)).build();
|
||||
this.desiredToSupported = Integer.parseInt(args.get(2));
|
||||
this.supportedToDesired = args.size() > 3 ? Integer.parseInt(args.get(3)) : this.desiredToSupported;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTiming() {
|
||||
List<Arguments> testArgs = new ArrayList<Arguments>();
|
||||
for (List<String> line : tfh.getLines()) {
|
||||
if (tfh.isTestLine(line)) {
|
||||
testArgs.add(new Arguments(line));
|
||||
}
|
||||
}
|
||||
Arguments[] tests = testArgs.toArray(new Arguments[testArgs.size()]);
|
||||
|
||||
final LocaleMatcher oldLocaleMatcher = new LocaleMatcher("");
|
||||
|
||||
long likelyTime = 0;
|
||||
long newLikelyTime = 0;
|
||||
long newTimeMinusLikely = 0;
|
||||
//long intTime = 0;
|
||||
long oldTimeMinusLikely = 0;
|
||||
final int maxIterations = 1000;
|
||||
|
||||
for (int iterations = maxIterations; iterations > 0; --iterations) {
|
||||
// int count=0;
|
||||
for (Arguments test : tests) {
|
||||
final ULocale desired = test.desired;
|
||||
final ULocale supported = test.supported;
|
||||
//final int desiredToSupported = test.desiredToSupported;
|
||||
//final int supportedToDesired = test.supportedToDesired;
|
||||
|
||||
long temp = System.nanoTime();
|
||||
final ULocale desiredMax = ULocale.addLikelySubtags(desired);
|
||||
final ULocale supportedMax = ULocale.addLikelySubtags(supported);
|
||||
likelyTime += System.nanoTime()-temp;
|
||||
|
||||
temp = System.nanoTime();
|
||||
//double distOld1 = oldLocaleMatcher.match(desired, desiredMax, supported, supportedMax);
|
||||
//double distOld2 = oldLocaleMatcher.match(supported, supportedMax, desired, desiredMax);
|
||||
oldTimeMinusLikely += System.nanoTime()-temp;
|
||||
|
||||
temp = System.nanoTime();
|
||||
final LSR desiredLSR = LSR.fromMaximalized(desired);
|
||||
final LSR supportedLSR = LSR.fromMaximalized(supported);
|
||||
newLikelyTime += System.nanoTime()-temp;
|
||||
|
||||
temp = System.nanoTime();
|
||||
int dist1 = localeMatcher.distanceRaw(desiredLSR, supportedLSR, 1000, DistanceOption.NORMAL);
|
||||
int dist2 = localeMatcher.distanceRaw(supportedLSR, desiredLSR, 1000, DistanceOption.NORMAL);
|
||||
newTimeMinusLikely += System.nanoTime()-temp;
|
||||
}
|
||||
}
|
||||
final long oldTime = oldTimeMinusLikely+likelyTime;
|
||||
final long newTime = newLikelyTime+newTimeMinusLikely;
|
||||
logln("\n");
|
||||
logln("\tlikelyTime:\t" + likelyTime/maxIterations);
|
||||
logln("\toldTime-likelyTime:\t" + oldTimeMinusLikely/maxIterations);
|
||||
logln("totalOld:\t" + oldTime/maxIterations);
|
||||
logln("\tnewLikelyTime:\t" + newLikelyTime/maxIterations);
|
||||
logln("totalNew:\t" + newTime/maxIterations);
|
||||
assertTrue("newTime < 20% of oldTime", newTime * 5 < oldTime);
|
||||
//logln("\tnewIntTime-newLikelyTime-extractTime:\t" + intTime/maxIterations);
|
||||
//logln("totalInt:\t" + (intTime)/maxIterations);
|
||||
}
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("deprecation")
|
||||
public void testInternalTable() {
|
||||
checkTables(localeMatcher.internalGetDistanceTable(), "", 1);
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
private void checkTables(DistanceTable internalGetDistanceTable, String title, int depth) {
|
||||
// Check that ANY, ANY is always present, and that the table has a depth of exactly 3 everyplace.
|
||||
Map<String, Set<String>> matches = internalGetDistanceTable.getInternalMatches();
|
||||
|
||||
// must have ANY,ANY
|
||||
boolean haveANYANY = false;
|
||||
for (Entry<String, Set<String>> entry : matches.entrySet()) {
|
||||
String first = entry.getKey();
|
||||
boolean haveANYfirst = first.equals(XLocaleDistance.ANY);
|
||||
for (String second : entry.getValue()) {
|
||||
haveANYANY |= haveANYfirst && second.equals(XLocaleDistance.ANY);
|
||||
DistanceNode distanceNode = internalGetDistanceTable.getInternalNode(first, second);
|
||||
DistanceTable subDistanceTable = distanceNode.getDistanceTable();
|
||||
if (subDistanceTable == null || subDistanceTable.isEmpty()) {
|
||||
if (depth != 3) {
|
||||
logln("depth should be 3");
|
||||
}
|
||||
if (distanceNode.getClass() != DistanceNode.class) {
|
||||
logln("should be plain DistanceNode");
|
||||
}
|
||||
} else {
|
||||
if (depth >= 3) {
|
||||
logln("depth should be ≤ 3");
|
||||
}
|
||||
if (distanceNode.getClass() == DistanceNode.class) {
|
||||
logln("should NOT be plain DistanceNode");
|
||||
}
|
||||
checkTables(subDistanceTable, first + "," + second + ",", depth+1);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!haveANYANY) {
|
||||
logln("ANY-ANY not in" + matches);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testShowDistanceTable() {
|
||||
if (isVerbose()) {
|
||||
System.out.println(XLocaleDistance.getDefault().toString(false));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDataDriven() throws IOException {
|
||||
tfh.test();
|
||||
if (REFORMAT) {
|
||||
System.out.println(tfh.appendLines(new StringBuffer()));
|
||||
}
|
||||
}
|
||||
|
||||
class MyTestFileHandler extends DataDrivenTestHelper {
|
||||
final XLocaleDistance distance = XLocaleDistance.getDefault();
|
||||
Output<ULocale> bestDesired = new Output<ULocale>();
|
||||
private DistanceOption distanceOption = DistanceOption.NORMAL;
|
||||
private Integer threshold = distance.getDefaultScriptDistance();
|
||||
|
||||
@Override
|
||||
public void handle(int lineNumber, boolean breakpoint, String commentBase, List<String> arguments) {
|
||||
if (breakpoint) {
|
||||
breakpoint = false; // put debugger breakpoint here to break at @debug in test file
|
||||
}
|
||||
Arguments args = new Arguments(arguments);
|
||||
int supportedToDesiredActual = distance.distance(args.supported, args.desired, threshold, distanceOption);
|
||||
int desiredToSupportedActual = distance.distance(args.desired, args.supported, threshold, distanceOption);
|
||||
String desiredTag = args.desired.toLanguageTag();
|
||||
String supportedTag = args.supported.toLanguageTag();
|
||||
final String comment = commentBase.isEmpty() ? "" : "\t# " + commentBase;
|
||||
if (assertEquals("(" + lineNumber + ") " + desiredTag + " to " + supportedTag + comment, args.desiredToSupported, desiredToSupportedActual)) {
|
||||
assertEquals("(" + lineNumber + ") " + supportedTag + " to " + desiredTag + comment, args.supportedToDesired, supportedToDesiredActual);
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public void handleParams(String comment, List<String> arguments) {
|
||||
String switchArg = arguments.get(0);
|
||||
if (switchArg.equals("@DistanceOption")) {
|
||||
distanceOption = DistanceOption.valueOf(arguments.get(1));
|
||||
} else if (switchArg.equals("@Threshold")) {
|
||||
threshold = Integer.valueOf(arguments.get(1));
|
||||
} else {
|
||||
super.handleParams(comment, arguments);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,334 @@
|
|||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Joiner;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Splitter;
|
||||
import com.ibm.icu.impl.locale.XLocaleDistance;
|
||||
import com.ibm.icu.impl.locale.XLocaleDistance.DistanceOption;
|
||||
import com.ibm.icu.impl.locale.XLocaleMatcher;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.util.LocaleMatcher;
|
||||
import com.ibm.icu.util.LocalePriorityList;
|
||||
import com.ibm.icu.util.Output;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
/**
|
||||
* Test the XLocaleMatcher.
|
||||
*
|
||||
* @author markdavis
|
||||
*/
|
||||
public class XLocaleMatcherTest extends TestFmwk {
|
||||
private static final boolean REFORMAT = false; // set to true to get a reformatted data file listed
|
||||
|
||||
private static final int REGION_DISTANCE = 4;
|
||||
|
||||
private static final XLocaleDistance LANGUAGE_MATCHER_DATA = XLocaleDistance.getDefault();
|
||||
|
||||
private XLocaleMatcher newXLocaleMatcher() {
|
||||
return new XLocaleMatcher("");
|
||||
}
|
||||
|
||||
private XLocaleMatcher newXLocaleMatcher(LocalePriorityList build) {
|
||||
return new XLocaleMatcher(build);
|
||||
}
|
||||
|
||||
private XLocaleMatcher newXLocaleMatcher(String string) {
|
||||
return new XLocaleMatcher(LocalePriorityList.add(string).build());
|
||||
}
|
||||
|
||||
private XLocaleMatcher newXLocaleMatcher(LocalePriorityList string, int d) {
|
||||
return XLocaleMatcher.builder().setSupportedLocales(string).setThresholdDistance(d).build();
|
||||
}
|
||||
|
||||
private XLocaleMatcher newXLocaleMatcher(LocalePriorityList string, int d, DistanceOption distanceOption) {
|
||||
return XLocaleMatcher
|
||||
.builder()
|
||||
.setSupportedLocales(string)
|
||||
.setThresholdDistance(d)
|
||||
.setDistanceOption(distanceOption)
|
||||
.build();
|
||||
}
|
||||
|
||||
// public void testParentLocales() {
|
||||
// // find all the regions that have a closer relation because of an explicit parent
|
||||
// Set<String> explicitParents = new HashSet<>(INFO.getExplicitParents());
|
||||
// explicitParents.remove("root");
|
||||
// Set<String> otherParents = new HashSet<>(INFO.getExplicitParents());
|
||||
// for (String locale : explicitParents) {
|
||||
// while (true) {
|
||||
// locale = LocaleIDParser.getParent(locale);
|
||||
// if (locale == null || locale.equals("root")) {
|
||||
// break;
|
||||
// }
|
||||
// otherParents.add(locale);
|
||||
// }
|
||||
// }
|
||||
// otherParents.remove("root");
|
||||
//
|
||||
// for (String locale : CONFIG.getCldrFactory().getAvailable()) {
|
||||
// String parentId = LocaleIDParser.getParent(locale);
|
||||
// String parentIdSimple = LocaleIDParser.getSimpleParent(locale);
|
||||
// if (!explicitParents.contains(parentId) && !otherParents.contains(parentIdSimple)) {
|
||||
// continue;
|
||||
// }
|
||||
// System.out.println(locale + "\t" + CONFIG.getEnglish().getName(locale) + "\t" + parentId + "\t" + parentIdSimple);
|
||||
// }
|
||||
// }
|
||||
|
||||
|
||||
// TBD reenable with override data
|
||||
// public void testOverrideData() {
|
||||
// double threshold = 0.05;
|
||||
// XLocaleDistance XLocaleMatcherData = new XLocaleDistance()
|
||||
// .addDistance("br", "fr", 10, true)
|
||||
// .addDistance("es", "cy", 10, true);
|
||||
// logln(XLocaleMatcherData.toString());
|
||||
//
|
||||
// final XLocaleMatcher matcher = newXLocaleMatcher(
|
||||
// LocalePriorityList
|
||||
// .add(ULocale.ENGLISH)
|
||||
// .add(ULocale.FRENCH)
|
||||
// .add(ULocale.UK)
|
||||
// .build(), XLocaleMatcherData, threshold);
|
||||
// logln(matcher.toString());
|
||||
//
|
||||
// assertEquals(ULocale.FRENCH, matcher.getBestMatch(new ULocale("br")));
|
||||
// assertEquals(ULocale.ENGLISH, matcher.getBestMatch(new ULocale("es"))); // one
|
||||
// // way
|
||||
// }
|
||||
|
||||
|
||||
private void assertEquals(Object expected, Object string) {
|
||||
assertEquals("", expected, string);
|
||||
}
|
||||
|
||||
/**
|
||||
* If all the base languages are the same, then each sublocale matches
|
||||
* itself most closely
|
||||
*/
|
||||
@Test
|
||||
public void testExactMatches() {
|
||||
String lastBase = "";
|
||||
TreeSet<ULocale> sorted = new TreeSet<ULocale>();
|
||||
for (ULocale loc : ULocale.getAvailableLocales()) {
|
||||
String language = loc.getLanguage();
|
||||
if (!lastBase.equals(language)) {
|
||||
check(sorted);
|
||||
sorted.clear();
|
||||
lastBase = language;
|
||||
}
|
||||
sorted.add(loc);
|
||||
}
|
||||
check(sorted);
|
||||
}
|
||||
|
||||
private void check(Set<ULocale> sorted) {
|
||||
if (sorted.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
check2(sorted);
|
||||
ULocale first = sorted.iterator().next();
|
||||
ULocale max = ULocale.addLikelySubtags(first);
|
||||
sorted.add(max);
|
||||
check2(sorted);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param sorted
|
||||
*/
|
||||
private void check2(Set<ULocale> sorted) {
|
||||
// TODO Auto-generated method stub
|
||||
logln("Checking: " + sorted);
|
||||
XLocaleMatcher matcher = newXLocaleMatcher(
|
||||
LocalePriorityList.add(
|
||||
sorted.toArray(new ULocale[sorted.size()]))
|
||||
.build());
|
||||
for (ULocale loc : sorted) {
|
||||
String stringLoc = loc.toString();
|
||||
assertEquals(stringLoc, matcher.getBestMatch(stringLoc).toString());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComputeDistance_monkeyTest() {
|
||||
String[] codes = ULocale.getISOCountries();
|
||||
Random random = new Random();
|
||||
XLocaleMatcher lm = newXLocaleMatcher();
|
||||
for (int i = 0; i < 1000; ++i) {
|
||||
String x = codes[random.nextInt(codes.length)];
|
||||
String y = codes[random.nextInt(codes.length)];
|
||||
double d = lm.distance(ULocale.forLanguageTag("xx-Xxxx-"+x), ULocale.forLanguageTag("xx-Xxxx-"+y));
|
||||
if (x.equals("ZZ") || y.equals("ZZ")) {
|
||||
assertEquals("dist(regionDistance," + x + ") = 0", REGION_DISTANCE, d);
|
||||
} else if (x.equals(y)) {
|
||||
assertEquals("dist(x,x) = 0", 0.0, d);
|
||||
} else {
|
||||
assertTrue("dist(" + x + "," + y + ") > 0", d > 0);
|
||||
assertTrue("dist(" + x + "," + y + ") ≤ " + REGION_DISTANCE, d <= REGION_DISTANCE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testPerf() {
|
||||
if (LANGUAGE_MATCHER_DATA == null) {
|
||||
return; // skip except when testing data
|
||||
}
|
||||
final ULocale desired = new ULocale("sv");
|
||||
|
||||
final String shortList = "en, sv";
|
||||
final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu";
|
||||
final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_001, en_150, en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA";
|
||||
|
||||
final XLocaleMatcher matcherShort = newXLocaleMatcher(shortList);
|
||||
final XLocaleMatcher matcherLong = newXLocaleMatcher(longList);
|
||||
final XLocaleMatcher matcherVeryLong = newXLocaleMatcher(veryLongList);
|
||||
|
||||
final LocaleMatcher matcherShortOld = new LocaleMatcher(shortList);
|
||||
final LocaleMatcher matcherLongOld = new LocaleMatcher(longList);
|
||||
final LocaleMatcher matcherVeryLongOld = new LocaleMatcher(veryLongList);
|
||||
|
||||
//XLocaleMatcher.DEBUG = true;
|
||||
ULocale expected = new ULocale("sv");
|
||||
assertEquals(expected, matcherShort.getBestMatch(desired));
|
||||
assertEquals(expected, matcherLong.getBestMatch(desired));
|
||||
assertEquals(expected, matcherVeryLong.getBestMatch(desired));
|
||||
//XLocaleMatcher.DEBUG = false;
|
||||
|
||||
long timeShortNew=0;
|
||||
long timeMediumNew=0;
|
||||
long timeLongNew=0;
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
int iterations = i == 0 ? 1000 : 1000000;
|
||||
boolean showMessage = i != 0;
|
||||
timeShortNew = timeXLocaleMatcher("Duration (few supported):\t", desired, matcherShort, showMessage, iterations);
|
||||
timeMediumNew = timeXLocaleMatcher("Duration (med. supported):\t", desired, matcherLong, showMessage, iterations);
|
||||
timeLongNew = timeXLocaleMatcher("Duration (many supported):\t", desired, matcherVeryLong, showMessage, iterations);
|
||||
}
|
||||
|
||||
long timeShortOld=0;
|
||||
long timeMediumOld=0;
|
||||
long timeLongOld=0;
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
int iterations = i == 0 ? 1000 : 100000;
|
||||
boolean showMessage = i != 0;
|
||||
timeShortOld = timeLocaleMatcher("Old Duration (few supported):\t", desired, matcherShortOld, showMessage, iterations);
|
||||
timeMediumOld = timeLocaleMatcher("Old Duration (med. supported):\t", desired, matcherLongOld, showMessage, iterations);
|
||||
timeLongOld = timeLocaleMatcher("Old Duration (many supported):\t", desired, matcherVeryLongOld, showMessage, iterations);
|
||||
}
|
||||
|
||||
assertTrue("timeShortNew (=" + timeShortNew + ") < 25% of timeShortOld (=" + timeShortOld + ")", timeShortNew * 4 < timeShortOld);
|
||||
assertTrue("timeMediumNew (=" + timeMediumNew + ") < 25% of timeMediumOld (=" + timeMediumOld + ")", timeMediumNew * 4 < timeMediumOld);
|
||||
assertTrue("timeLongNew (=" + timeLongNew + ") < 25% of timeLongOld (=" + timeLongOld + ")", timeLongNew * 4 < timeLongOld);
|
||||
|
||||
}
|
||||
|
||||
private long timeXLocaleMatcher(String title, ULocale desired, XLocaleMatcher matcher,
|
||||
boolean showmessage, int iterations) {
|
||||
long start = System.nanoTime();
|
||||
for (int i = iterations; i > 0; --i) {
|
||||
matcher.getBestMatch(desired);
|
||||
}
|
||||
long delta = System.nanoTime() - start;
|
||||
if (showmessage) logln(title + (delta / iterations) + " nanos");
|
||||
return (delta / iterations);
|
||||
}
|
||||
|
||||
private long timeLocaleMatcher(String title, ULocale desired, LocaleMatcher matcher,
|
||||
boolean showmessage, int iterations) {
|
||||
long start = System.nanoTime();
|
||||
for (int i = iterations; i > 0; --i) {
|
||||
matcher.getBestMatch(desired);
|
||||
}
|
||||
long delta = System.nanoTime() - start;
|
||||
if (showmessage) logln(title + (delta / iterations) + " nanos");
|
||||
return (delta / iterations);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDataDriven() throws IOException {
|
||||
DataDrivenTestHelper tfh = new MyTestFileHandler()
|
||||
.setFramework(this)
|
||||
.run(XLocaleMatcherTest.class, "data/localeMatcherTest.txt");
|
||||
if (REFORMAT) {
|
||||
System.out.println(tfh.appendLines(new StringBuilder()));
|
||||
}
|
||||
}
|
||||
|
||||
private static final Splitter COMMA_SPACE = Splitter.on(Pattern.compile(",\\s*|\\s+")).trimResults();
|
||||
private static final Joiner JOIN_COMMA_SPACE = Joiner.on(", ");
|
||||
private static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze();
|
||||
|
||||
class MyTestFileHandler extends DataDrivenTestHelper {
|
||||
|
||||
Output<ULocale> bestDesired = new Output<ULocale>();
|
||||
DistanceOption distanceOption = DistanceOption.NORMAL;
|
||||
int threshold = -1;
|
||||
|
||||
@Override
|
||||
public void handle(int lineNumber, boolean breakpoint, String commentBase, List<String> arguments) {
|
||||
List<String> supported = COMMA_SPACE.splitToList(arguments.get(0));
|
||||
final String supportedReformatted = JOIN_COMMA_SPACE.join(supported);
|
||||
LocalePriorityList supportedList = LocalePriorityList.add(supportedReformatted).build();
|
||||
|
||||
Iterable<String> desired = COMMA_SPACE.split(arguments.get(1));
|
||||
final String desiredReformatted = JOIN_COMMA_SPACE.join(desired);
|
||||
LocalePriorityList desiredList = LocalePriorityList.add(desiredReformatted).build();
|
||||
|
||||
String expected = arguments.get(2);
|
||||
String expectedLanguageTag = expected.equals("null") ? null : new ULocale(expected).toLanguageTag();
|
||||
|
||||
String expectedUi = arguments.size() < 4 ? null : arguments.get(3);
|
||||
String expectedUiLanguageTag = expectedUi == null || expectedUi.equals("null") ? null
|
||||
: new ULocale(expectedUi).toLanguageTag();
|
||||
|
||||
if (breakpoint) {
|
||||
breakpoint = false; // put debugger breakpoint here to break at @debug in test file
|
||||
}
|
||||
|
||||
XLocaleMatcher matcher = threshold < 0 && distanceOption == DistanceOption.NORMAL
|
||||
? newXLocaleMatcher(supportedList)
|
||||
: newXLocaleMatcher(supportedList, threshold, distanceOption);
|
||||
commentBase = "(" + lineNumber + ") " + commentBase;
|
||||
|
||||
ULocale bestSupported;
|
||||
if (expectedUi != null) {
|
||||
bestSupported = matcher.getBestMatch(desiredList, bestDesired);
|
||||
ULocale bestUI = XLocaleMatcher.combine(bestSupported, bestDesired.value);
|
||||
assertEquals(commentBase + " (UI)", expectedUiLanguageTag, bestUI == null ? null : bestUI.toLanguageTag());
|
||||
} else {
|
||||
bestSupported = matcher.getBestMatch(desiredList);
|
||||
}
|
||||
String bestMatchLanguageTag = bestSupported == null ? null : bestSupported.toLanguageTag();
|
||||
assertEquals(commentBase, expectedLanguageTag, bestMatchLanguageTag);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleParams(String comment, List<String> arguments) {
|
||||
String switchItem = arguments.get(0);
|
||||
if (switchItem.equals("@DistanceOption")) {
|
||||
distanceOption = DistanceOption.valueOf(arguments.get(1));
|
||||
} else if (switchItem.equals("@Threshold")) {
|
||||
threshold = Integer.valueOf(arguments.get(1));
|
||||
} else {
|
||||
super.handleParams(comment, arguments);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,66 @@
|
|||
# © 2017 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
#
|
||||
# Data-driven test for XLocaleDistance.
|
||||
# Format
|
||||
# • supported ; desired ; dist(s,d) ; dist(d,x)
|
||||
# • argument 4 only used when different
|
||||
# • 100 = fail
|
||||
# A line starting with @debug will reach a statement in the test code where you can put a breakpoint for debugging
|
||||
# The test code also supports reformatting this file, by setting the REFORMAT flag.
|
||||
|
||||
en-CA ; en-CA ; 0
|
||||
ar-MK ; en-CA ; 100
|
||||
|
||||
iw ; he ; 0
|
||||
zh ; cmn ; 0
|
||||
|
||||
# fallback languages get closer distances, between script (40) and region (4)
|
||||
|
||||
@debug
|
||||
to ; en ; 14 ; 100
|
||||
no ; no-DE ; 4
|
||||
nn ; no ; 10
|
||||
no-DE ; nn ; 14
|
||||
no ; no ; 0
|
||||
no ; da ; 12
|
||||
da ; zh-Hant ; 100
|
||||
zh-Hant ; zh-Hans ; 23 ; 19
|
||||
zh-Hans ; en ; 100
|
||||
|
||||
en-US ; en-AU ; 5 # across clusters
|
||||
en-VI ; en-GU ; 4 # within cluster
|
||||
en-AU ; en-CA ; 4 # within cluster
|
||||
|
||||
# testScript
|
||||
en-CA ; en-Cyrl ; 100
|
||||
en-Cyrl ; es-MX ; 100
|
||||
|
||||
hr ; sr ; 100
|
||||
#hr ; sr-Latn ; 8
|
||||
sr ; sr-Latn ; 5
|
||||
|
||||
# test419
|
||||
# Should be as good as any in cluster
|
||||
es-MX ; es-AR ; 4
|
||||
@debug
|
||||
es-MX ; es-419 ; 4
|
||||
es-MX ; es-MX ; 0
|
||||
es-MX ; es-ES ; 5
|
||||
es-MX ; es-PT ; 5
|
||||
es-MX ; es-150 ; 5
|
||||
es-419 ; es-AR ; 4
|
||||
es-419 ; es-419 ; 0
|
||||
es-419 ; es-MX ; 4
|
||||
es-419 ; es-ES ; 5
|
||||
es-419 ; es-PT ; 5
|
||||
es-419 ; es-150 ; 5
|
||||
es-ES ; es-AR ; 5
|
||||
es-ES ; es-419 ; 5
|
||||
es-ES ; es-MX ; 5
|
||||
es-ES ; es-ES ; 0
|
||||
es-ES ; es-PT ; 4
|
||||
es-419 ; es-150 ; 5
|
||||
|
||||
# testEuEc
|
||||
xx-Xxxx-EC; xx-Xxxx-EU; 4
|
|
@ -0,0 +1,387 @@
|
|||
# © 2017 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
#
|
||||
# Data-driven test for the XLocaleMatcher.
|
||||
# Format
|
||||
# • Everything after "#" is a comment
|
||||
# • Arguments are separated by ";". They are:
|
||||
|
||||
# supported ; desired ; expected
|
||||
|
||||
# • The supported may have the threshold distance reset as a first item, eg 50, en, fr
|
||||
# A line starting with @debug will reach a statement in the test code where you can put a breakpoint for debugging
|
||||
# The test code also supports reformatting this file, by setting the REFORMAT flag.
|
||||
|
||||
##################################################
|
||||
# testParentLocales
|
||||
|
||||
# es-419, es-AR, and es-MX are in a cluster; es is in a different one
|
||||
|
||||
@debug
|
||||
es-419, es-ES ; es-AR ; es-419
|
||||
es-ES, es-419 ; es-AR ; es-419
|
||||
|
||||
es-419, es ; es-AR ; es-419
|
||||
es, es-419 ; es-AR ; es-419
|
||||
|
||||
es-MX, es ; es-AR ; es-MX
|
||||
es, es-MX ; es-AR ; es-MX
|
||||
|
||||
# en-GB, en-AU, and en-NZ are in a cluster; en in a different one
|
||||
|
||||
en-GB, en-US ; en-AU ; en-GB
|
||||
en-US, en-GB ; en-AU ; en-GB
|
||||
|
||||
en-GB, en ; en-AU ; en-GB
|
||||
en, en-GB ; en-AU ; en-GB
|
||||
|
||||
en-NZ, en-US ; en-AU ; en-NZ
|
||||
en-US, en-NZ ; en-AU ; en-NZ
|
||||
|
||||
en-NZ, en ; en-AU ; en-NZ
|
||||
en, en-NZ ; en-AU ; en-NZ
|
||||
|
||||
# pt-AU and pt-PT in one cluster; pt-BR in another
|
||||
|
||||
pt-PT, pt-BR ; pt-AO ; pt-PT
|
||||
pt-BR, pt-PT ; pt-AO ; pt-PT
|
||||
|
||||
pt-PT, pt ; pt-AO ; pt-PT
|
||||
pt, pt-PT ; pt-AO ; pt-PT
|
||||
|
||||
zh-MO, zh-TW ; zh-HK ; zh-MO
|
||||
zh-TW, zh-MO ; zh-HK ; zh-MO
|
||||
|
||||
zh-MO, zh-TW ; zh-HK ; zh-MO
|
||||
zh-TW, zh-MO ; zh-HK ; zh-MO
|
||||
|
||||
zh-MO, zh-CN ; zh-HK ; zh-MO
|
||||
zh-CN, zh-MO ; zh-HK ; zh-MO
|
||||
|
||||
zh-MO, zh ; zh-HK ; zh-MO
|
||||
zh, zh-MO ; zh-HK ; zh-MO
|
||||
|
||||
##################################################
|
||||
# testChinese
|
||||
|
||||
zh-CN, zh-TW, iw ; zh-Hant-TW ; zh-TW
|
||||
zh-CN, zh-TW, iw ; zh-Hant ; zh-TW
|
||||
zh-CN, zh-TW, iw ; zh-TW ; zh-TW
|
||||
zh-CN, zh-TW, iw ; zh-Hans-CN ; zh-CN
|
||||
zh-CN, zh-TW, iw ; zh-CN ; zh-CN
|
||||
zh-CN, zh-TW, iw ; zh ; zh-CN
|
||||
|
||||
##################################################
|
||||
# testenGB
|
||||
|
||||
fr, en, en-GB, es-419, es-MX, es ; en-NZ ; en-GB
|
||||
fr, en, en-GB, es-419, es-MX, es ; es-ES ; es
|
||||
fr, en, en-GB, es-419, es-MX, es ; es-AR ; es-419
|
||||
fr, en, en-GB, es-419, es-MX, es ; es-MX ; es-MX
|
||||
|
||||
##################################################
|
||||
# testFallbacks
|
||||
|
||||
91, en, hi ; sa ; hi
|
||||
|
||||
##################################################
|
||||
# testBasics
|
||||
|
||||
fr, en-GB, en ; en-GB ; en-GB
|
||||
fr, en-GB, en ; en ; en
|
||||
fr, en-GB, en ; fr ; fr
|
||||
fr, en-GB, en ; ja ; fr # return first if no match
|
||||
|
||||
##################################################
|
||||
# testFallback
|
||||
|
||||
# check that script fallbacks are handled right
|
||||
|
||||
zh-CN, zh-TW, iw ; zh-Hant ; zh-TW
|
||||
zh-CN, zh-TW, iw ; zh ; zh-CN
|
||||
zh-CN, zh-TW, iw ; zh-Hans-CN ; zh-CN
|
||||
zh-CN, zh-TW, iw ; zh-Hant-HK ; zh-TW
|
||||
zh-CN, zh-TW, iw ; he-IT ; iw
|
||||
|
||||
##################################################
|
||||
# testSpecials
|
||||
|
||||
# check that nearby languages are handled
|
||||
|
||||
en, fil, ro, nn ; tl ; fil
|
||||
en, fil, ro, nn ; mo ; ro
|
||||
en, fil, ro, nn ; nb ; nn
|
||||
|
||||
# make sure default works
|
||||
|
||||
en, fil, ro, nn ; ja ; en
|
||||
|
||||
##################################################
|
||||
# testRegionalSpecials
|
||||
|
||||
# verify that en-AU is closer to en-GB than to en (which is en-US)
|
||||
|
||||
en, en-GB, es, es-419 ; es-MX ; es-419
|
||||
en, en-GB, es, es-419 ; en-AU ; en-GB
|
||||
en, en-GB, es, es-419 ; es-ES ; es
|
||||
|
||||
##################################################
|
||||
# testHK
|
||||
|
||||
# HK and MO are closer to each other for Hant than to TW
|
||||
|
||||
zh, zh-TW, zh-MO ; zh-HK ; zh-MO
|
||||
zh, zh-TW, zh-HK ; zh-MO ; zh-HK
|
||||
|
||||
##################################################
|
||||
# testMatch-exact
|
||||
|
||||
# see localeDistance.txt
|
||||
|
||||
##################################################
|
||||
# testMatch-none
|
||||
|
||||
# see localeDistance.txt
|
||||
|
||||
##################################################
|
||||
# testMatch-matchOnMazimized
|
||||
|
||||
zh, zh-Hant ; und-TW ; zh-Hant # und-TW should be closer to zh-Hant than to zh
|
||||
en-Hant-TW, und-TW ; zh-Hant ; und-TW # zh-Hant should be closer to und-TW than to en-Hant-TW
|
||||
en-Hant-TW, und-TW ; zh ; und-TW # zh should be closer to und-TW than to en-Hant-TW
|
||||
|
||||
##################################################
|
||||
# testMatchGrandfatheredCode
|
||||
|
||||
fr, i-klingon, en-Latn-US ; en-GB-oed ; en-Latn-US
|
||||
|
||||
##################################################
|
||||
# testGetBestMatchForList-exactMatch
|
||||
fr, en-GB, ja, es-ES, es-MX ; ja, de ; ja
|
||||
|
||||
##################################################
|
||||
# testGetBestMatchForList-simpleVariantMatch
|
||||
fr, en-GB, ja, es-ES, es-MX ; de, en-US ; en-GB # Intentionally avoiding a perfect-match or two candidates for variant matches.
|
||||
|
||||
# Fallback.
|
||||
|
||||
fr, en-GB, ja, es-ES, es-MX ; de, zh ; fr
|
||||
|
||||
##################################################
|
||||
# testGetBestMatchForList-matchOnMaximized
|
||||
# Check that if the preference is maximized already, it works as well.
|
||||
|
||||
en, ja ; ja-Jpan-JP, en-AU ; ja # Match for ja-Jpan-JP (maximized already)
|
||||
|
||||
# ja-JP matches ja on likely subtags, and it's listed first, thus it wins over the second preference en-GB.
|
||||
|
||||
en, ja ; ja-JP, en-US ; ja # Match for ja-Jpan-JP (maximized already)
|
||||
|
||||
# Check that if the preference is maximized already, it works as well.
|
||||
|
||||
en, ja ; ja-Jpan-JP, en-US ; ja # Match for ja-Jpan-JP (maximized already)
|
||||
|
||||
##################################################
|
||||
# testGetBestMatchForList-noMatchOnMaximized
|
||||
# Regression test for http://b/5714572 .
|
||||
# de maximizes to de-DE. Pick the exact match for the secondary language instead.
|
||||
en, de, fr, ja ; de-CH, fr ; de
|
||||
|
||||
##################################################
|
||||
# testBestMatchForTraditionalChinese
|
||||
|
||||
# Scenario: An application that only supports Simplified Chinese (and some other languages),
|
||||
# but does not support Traditional Chinese. zh-Hans-CN could be replaced with zh-CN, zh, or
|
||||
# zh-Hans, it wouldn't make much of a difference.
|
||||
|
||||
# The script distance (simplified vs. traditional Han) is considered small enough
|
||||
# to be an acceptable match. The regional difference is considered almost insignificant.
|
||||
|
||||
fr, zh-Hans-CN, en-US ; zh-TW ; zh-Hans-CN
|
||||
fr, zh-Hans-CN, en-US ; zh-Hant ; zh-Hans-CN
|
||||
|
||||
# For geo-political reasons, you might want to avoid a zh-Hant -> zh-Hans match.
|
||||
# In this case, if zh-TW, zh-HK or a tag starting with zh-Hant is requested, you can
|
||||
# change your call to getBestMatch to include a 2nd language preference.
|
||||
# "en" is a better match since its distance to "en-US" is closer than the distance
|
||||
# from "zh-TW" to "zh-CN" (script distance).
|
||||
|
||||
fr, zh-Hans-CN, en-US ; zh-TW, en ; en-US
|
||||
fr, zh-Hans-CN, en-US ; zh-Hant-CN, en, en ; en-US
|
||||
fr, zh-Hans-CN, en-US ; zh-Hans, en ; zh-Hans-CN
|
||||
|
||||
##################################################
|
||||
# testUndefined
|
||||
# When the undefined language doesn't match anything in the list,
|
||||
# getBestMatch returns the default, as usual.
|
||||
|
||||
it, fr ; und ; it
|
||||
|
||||
# When it *does* occur in the list, bestMatch returns it, as expected.
|
||||
it, und ; und ; und
|
||||
|
||||
# The unusual part: max("und") = "en-Latn-US", and since matching is based on maximized
|
||||
# tags, the undefined language would normally match English. But that would produce the
|
||||
# counterintuitive results that getBestMatch("und", XLocaleMatcher("it,en")) would be "en", and
|
||||
# getBestMatch("en", XLocaleMatcher("it,und")) would be "und".
|
||||
|
||||
# To avoid that, we change the matcher's definitions of max
|
||||
# so that max("und")="und". That produces the following, more desirable
|
||||
# results:
|
||||
|
||||
it, en ; und ; it
|
||||
it, und ; en ; it
|
||||
|
||||
##################################################
|
||||
# testGetBestMatch-regionDistance
|
||||
|
||||
es-AR, es ; es-MX ; es-AR
|
||||
fr, en, en-GB ; en-CA ; en-GB
|
||||
de-AT, de-DE, de-CH ; de ; de-DE
|
||||
|
||||
##################################################
|
||||
# testAsymmetry
|
||||
|
||||
mul, nl ; af ; nl # af => nl
|
||||
mul, af ; nl ; mul # but nl !=> af
|
||||
|
||||
##################################################
|
||||
# testGetBestMatchForList-matchOnMaximized2
|
||||
|
||||
# ja-JP matches ja on likely subtags, and it's listed first, thus it wins over the second preference en-GB.
|
||||
|
||||
fr, en-GB, ja, es-ES, es-MX ; ja-JP, en-GB ; ja # Match for ja-JP, with likely region subtag
|
||||
|
||||
# Check that if the preference is maximized already, it works as well.
|
||||
|
||||
fr, en-GB, ja, es-ES, es-MX ; ja-Jpan-JP, en-GB ; ja # Match for ja-Jpan-JP (maximized already)
|
||||
|
||||
##################################################
|
||||
# testGetBestMatchForList-closeEnoughMatchOnMaximized
|
||||
|
||||
en-GB, en, de, fr, ja ; de-CH, fr ; de
|
||||
en-GB, en, de, fr, ja ; en-US, ar, nl, de, ja ; en
|
||||
|
||||
##################################################
|
||||
# testGetBestMatchForPortuguese
|
||||
|
||||
# pt might be supported and not pt-PT
|
||||
|
||||
# European user who prefers Spanish over Brazillian Portuguese as a fallback.
|
||||
|
||||
pt-PT, pt-BR, es, es-419 ; pt-PT, es, pt ; pt-PT
|
||||
pt-PT, pt, es, es-419 ; pt-PT, es, pt ; pt-PT # pt implicit
|
||||
|
||||
# Brazillian user who prefers South American Spanish over European Portuguese as a fallback.
|
||||
# The asymmetry between this case and above is because it's "pt-PT" that's missing between the
|
||||
# matchers as "pt-BR" is a much more common language.
|
||||
|
||||
pt-PT, pt-BR, es, es-419 ; pt, es-419, pt-PT ; pt-BR
|
||||
pt-PT, pt-BR, es, es-419 ; pt-PT, es, pt ; pt-PT
|
||||
pt-PT, pt, es, es-419 ; pt-PT, es, pt ; pt-PT
|
||||
pt-PT, pt, es, es-419 ; pt, es-419, pt-PT ; pt
|
||||
|
||||
pt-BR, es, es-419 ; pt, es-419, pt-PT ; pt-BR
|
||||
|
||||
# Code that adds the user's country can get "pt-US" for a user's language.
|
||||
# That should fall back to "pt-BR".
|
||||
|
||||
pt-PT, pt-BR, es, es-419 ; pt-US, pt-PT ; pt-BR
|
||||
pt-PT, pt, es, es-419 ; pt-US, pt-PT, pt ; pt # pt-BR implicit
|
||||
|
||||
##################################################
|
||||
# testVariantWithScriptMatch 1 and 2
|
||||
|
||||
fr, en, sv ; en-GB ; en
|
||||
fr, en, sv ; en-GB ; en
|
||||
en, sv ; en-GB, sv ; en
|
||||
|
||||
##################################################
|
||||
# testLongLists
|
||||
|
||||
en, sv ; sv ; sv
|
||||
af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu ; sv ; sv
|
||||
af, af-NA, af-ZA, agq, agq-CM, ak, ak-GH, am, am-ET, ar, ar-001, ar-AE, ar-BH, ar-DJ, ar-DZ, ar-EG, ar-EH, ar-ER, ar-IL, ar-IQ, ar-JO, ar-KM, ar-KW, ar-LB, ar-LY, ar-MA, ar-MR, ar-OM, ar-PS, ar-QA, ar-SA, ar-SD, ar-SO, ar-SS, ar-SY, ar-TD, ar-TN, ar-YE, as, as-IN, asa, asa-TZ, ast, ast-ES, az, az-Cyrl, az-Cyrl-AZ, az-Latn, az-Latn-AZ, bas, bas-CM, be, be-BY, bem, bem-ZM, bez, bez-TZ, bg, bg-BG, bm, bm-ML, bn, bn-BD, bn-IN, bo, bo-CN, bo-IN, br, br-FR, brx, brx-IN, bs, bs-Cyrl, bs-Cyrl-BA, bs-Latn, bs-Latn-BA, ca, ca-AD, ca-ES, ca-ES-VALENCIA, ca-FR, ca-IT, ce, ce-RU, cgg, cgg-UG, chr, chr-US, ckb, ckb-IQ, ckb-IR, cs, cs-CZ, cu, cu-RU, cy, cy-GB, da, da-DK, da-GL, dav, dav-KE, de, de-AT, de-BE, de-CH, de-DE, de-LI, de-LU, dje, dje-NE, dsb, dsb-DE, dua, dua-CM, dyo, dyo-SN, dz, dz-BT, ebu, ebu-KE, ee, ee-GH, ee-TG, el, el-CY, el-GR, en, en-001, en-150, en-AG, en-AI, en-AS, en-AT, en-AU, en-BB, en-BE, en-BI, en-BM, en-BS, en-BW, en-BZ, en-CA, en-CC, en-CH, en-CK, en-CM, en-CX, en-CY, en-DE, en-DG, en-DK, en-DM, en-ER, en-FI, en-FJ, en-FK, en-FM, en-GB, en-GD, en-GG, en-GH, en-GI, en-GM, en-GU, en-GY, en-HK, en-IE, en-IL, en-IM, en-IN, en-IO, en-JE, en-JM, en-KE, en-KI, en-KN, en-KY, en-LC, en-LR, en-LS, en-MG, en-MH, en-MO, en-MP, en-MS, en-MT, en-MU, en-MW, en-MY, en-NA, en-NF, en-NG, en-NL, en-NR, en-NU, en-NZ, en-PG, en-PH, en-PK, en-PN, en-PR, en-PW, en-RW, en-SB, en-SC, en-SD, en-SE, en-SG, en-SH, en-SI, en-SL, en-SS, en-SX, en-SZ, en-TC, en-TK, en-TO, en-TT, en-TV, en-TZ, en-UG, en-UM, en-US, en-US-POSIX, en-VC, en-VG, en-VI, en-VU, en-WS, en-ZA, en-ZM, en-ZW, eo, eo-001, es, es-419, es-AR, es-BO, es-CL, es-CO, es-CR, es-CU, es-DO, es-EA, es-EC, es-ES, es-GQ, es-GT, es-HN, es-IC, es-MX, es-NI, es-PA, es-PE, es-PH, es-PR, es-PY, es-SV, es-US, es-UY, es-VE, et, et-EE, eu, eu-ES, ewo, ewo-CM, fa, fa-AF, fa-IR, ff, ff-CM, ff-GN, ff-MR, ff-SN, fi, fi-FI, fil, fil-PH, fo, fo-DK, fo-FO, fr, fr-BE, fr-BF, fr-BI, fr-BJ, fr-BL, fr-CA, fr-CD, fr-CF, fr-CG, fr-CH, fr-CI, fr-CM, fr-DJ, fr-DZ, fr-FR, fr-GA, fr-GF, fr-GN, fr-GP, fr-GQ, fr-HT, fr-KM, fr-LU, fr-MA, fr-MC, fr-MF, fr-MG, fr-ML, fr-MQ, fr-MR, fr-MU, fr-NC, fr-NE, fr-PF, fr-PM, fr-RE, fr-RW, fr-SC, fr-SN, fr-SY, fr-TD, fr-TG, fr-TN, fr-VU, fr-WF, fr-YT, fur, fur-IT, fy, fy-NL, ga, ga-IE, gd, gd-GB, gl, gl-ES, gsw, gsw-CH, gsw-FR, gsw-LI, gu, gu-IN, guz, guz-KE, gv, gv-IM, ha, ha-GH, ha-NE, ha-NG, haw, haw-US, he, he-IL, hi, hi-IN, hr, hr-BA, hr-HR, hsb, hsb-DE, hu, hu-HU, hy, hy-AM, id, id-ID, ig, ig-NG, ii, ii-CN, is, is-IS, it, it-CH, it-IT, it-SM, ja, ja-JP, jgo, jgo-CM, jmc, jmc-TZ, ka, ka-GE, kab, kab-DZ, kam, kam-KE, kde, kde-TZ, kea, kea-CV, khq, khq-ML, ki, ki-KE, kk, kk-KZ, kkj, kkj-CM, kl, kl-GL, kln, kln-KE, km, km-KH, kn, kn-IN, ko, ko-KP, ko-KR, kok, kok-IN, ks, ks-IN, ksb, ksb-TZ, ksf, ksf-CM, ksh, ksh-DE, kw, kw-GB, ky, ky-KG, lag, lag-TZ, lb, lb-LU, lg, lg-UG, lkt, lkt-US, ln, ln-AO, ln-CD, ln-CF, ln-CG, lo, lo-LA, lrc, lrc-IQ, lrc-IR, lt, lt-LT, lu, lu-CD, luo, luo-KE, luy, luy-KE, lv, lv-LV, mas, mas-KE, mas-TZ, mer, mer-KE, mfe, mfe-MU, mg, mg-MG, mgh, mgh-MZ, mgo, mgo-CM, mk, mk-MK, ml, ml-IN, mn, mn-MN, mr, mr-IN, ms, ms-BN, ms-MY, ms-SG, mt, mt-MT, mua, mua-CM, my, my-MM, mzn, mzn-IR, naq, naq-NA, nb, nb-NO, nb-SJ, nd, nd-ZW, ne, ne-IN, ne-NP, nl, nl-AW, nl-BE, nl-BQ, nl-CW, nl-NL, nl-SR, nl-SX, nmg, nmg-CM, nn, nn-NO, nnh, nnh-CM, nus, nus-SS, nyn, nyn-UG, om, om-ET, om-KE, or, or-IN, os, os-GE, os-RU, pa, pa-Arab, pa-Arab-PK, pa-Guru, pa-Guru-IN, pl, pl-PL, prg, prg-001, ps, ps-AF, pt, pt-AO, pt-BR, pt-CV, pt-GW, pt-MO, pt-MZ, pt-PT, pt-ST, pt-TL, qu, qu-BO, qu-EC, qu-PE, rm, rm-CH, rn, rn-BI, ro, ro-MD, ro-RO, rof, rof-TZ, root, ru, ru-BY, ru-KG, ru-KZ, ru-MD, ru-RU, ru-UA, rw, rw-RW, rwk, rwk-TZ, sah, sah-RU, saq, saq-KE, sbp, sbp-TZ, se, se-FI, se-NO, se-SE, seh, seh-MZ, ses, ses-ML, sg, sg-CF, shi, shi-Latn, shi-Latn-MA, shi-Tfng, shi-Tfng-MA, si, si-LK, sk, sk-SK, sl, sl-SI, smn, smn-FI, sn, sn-ZW, so, so-DJ, so-ET, so-KE, so-SO, sq, sq-AL, sq-MK, sq-XK, sr, sr-Cyrl, sr-Cyrl-BA, sr-Cyrl-ME, sr-Cyrl-RS, sr-Cyrl-XK, sr-Latn, sr-Latn-BA, sr-Latn-ME, sr-Latn-RS, sr-Latn-XK, sv, sv-AX, sv-FI, sv-SE, sw, sw-CD, sw-KE, sw-TZ, sw-UG, ta, ta-IN, ta-LK, ta-MY, ta-SG, te, te-IN, teo, teo-KE, teo-UG, th, th-TH, ti, ti-ER, ti-ET, tk, tk-TM, to, to-TO, tr, tr-CY, tr-TR, twq, twq-NE, tzm, tzm-MA, ug, ug-CN, uk, uk-UA, ur, ur-IN, ur-PK, uz, uz-Arab, uz-Arab-AF, uz-Cyrl, uz-Cyrl-UZ, uz-Latn, uz-Latn-UZ, vai, vai-Latn, vai-Latn-LR, vai-Vaii, vai-Vaii-LR, vi, vi-VN, vo, vo-001, vun, vun-TZ, wae, wae-CH, xog, xog-UG, yav, yav-CM, yi, yi-001, yo, yo-BJ, yo-NG, zgh, zgh-MA, zh, zh-Hans, zh-Hans-CN, zh-Hans-HK, zh-Hans-MO, zh-Hans-SG, zh-Hant, zh-Hant-HK, zh-Hant-MO, zh-Hant-TW, zu, zu-ZA ; sv ; sv
|
||||
|
||||
##################################################
|
||||
# test8288
|
||||
|
||||
it, en ; und ; it
|
||||
it, en ; und, en ; en
|
||||
|
||||
# examples from
|
||||
# http://unicode.org/repos/cldr/tags/latest/common/bcp47/
|
||||
# http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml
|
||||
|
||||
##################################################
|
||||
# testUnHack
|
||||
|
||||
en-NZ, en-IT ; en-US ; en-NZ
|
||||
|
||||
##################################################
|
||||
# testEmptySupported => null
|
||||
; en ; null
|
||||
|
||||
##################################################
|
||||
# testVariantsAndExtensions
|
||||
##################################################
|
||||
# tests the .combine() method
|
||||
|
||||
und, fr ; fr-BE-fonipa ; fr ; fr-BE-fonipa
|
||||
und, fr-CA ; fr-BE-fonipa ; fr-CA ; fr-BE-fonipa
|
||||
und, fr-fonupa ; fr-BE-fonipa ; fr-fonupa ; fr-BE-fonipa
|
||||
und, no ; nn-BE-fonipa ; no ; no-BE-fonipa
|
||||
und, en-GB-u-sd-gbsct ; en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin ; en-GB-u-sd-gbsct ; en-GB-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin
|
||||
|
||||
en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ; fr-PSCRACK ; fr-PSCRACK
|
||||
en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ; fr ; fr-PSCRACK
|
||||
en-PSCRACK, de-PSCRACK, fr-PSCRACK, pt-PT-PSCRACK ; de-CH ; de-PSCRACK
|
||||
|
||||
##################################################
|
||||
# testClusters
|
||||
# we favor es-419 over others in cluster. Clusters: es- {ES, MA, EA} {419, AR, MX}
|
||||
|
||||
und, es, es-MA, es-MX, es-419 ; es-AR ; es-419
|
||||
und, es-MA, es, es-419, es-MX ; es-AR ; es-419
|
||||
und, es, es-MA, es-MX, es-419 ; es-EA ; es
|
||||
und, es-MA, es, es-419, es-MX ; es-EA ; es
|
||||
|
||||
# of course, fall back to within cluster
|
||||
|
||||
und, es, es-MA, es-MX ; es-AR ; es-MX
|
||||
und, es-MA, es, es-MX ; es-AR ; es-MX
|
||||
und, es-MA, es-MX, es-419 ; es-EA ; es-MA
|
||||
und, es-MA, es-419, es-MX ; es-EA ; es-MA
|
||||
|
||||
# we favor es-GB over others in cluster. Clusters: en- {US, GU, VI} {GB, IN, ZA}
|
||||
|
||||
und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB
|
||||
und, en-GU, en, en-GB, en-IN ; en-ZA ; en-GB
|
||||
und, en, en-GU, en-IN, en-GB ; en-VI ; en
|
||||
und, en-GU, en, en-GB, en-IN ; en-VI ; en
|
||||
|
||||
# of course, fall back to within cluster
|
||||
|
||||
und, en, en-GU, en-IN ; en-ZA ; en-IN
|
||||
und, en-GU, en, en-IN ; en-ZA ; en-IN
|
||||
und, en-GU, en-IN, en-GB ; en-VI ; en-GU
|
||||
und, en-GU, en-GB, en-IN ; en-VI ; en-GU
|
||||
|
||||
##################################################
|
||||
# testThreshold
|
||||
@Threshold=60
|
||||
|
||||
50, und, fr-CA-fonupa ; fr-BE-fonipa ; fr-CA-fonupa ; fr-BE-fonipa
|
||||
50, und, fr-Cyrl-CA-fonupa ; fr-BE-fonipa ; fr-Cyrl-CA-fonupa ; fr-Cyrl-BE-fonipa
|
||||
|
||||
@Threshold=-1 # restore
|
||||
|
||||
##################################################
|
||||
# testScriptFirst
|
||||
@DistanceOption=SCRIPT_FIRST
|
||||
@debug
|
||||
|
||||
ru, fr ; zh, pl ; fr
|
||||
ru, fr ; zh-Cyrl, pl ; ru
|
||||
#hr, en-Cyrl; sr ; en-Cyrl
|
||||
da, ru, hr; sr ; ru
|
Loading…
Add table
Reference in a new issue