ICU-20467 get XLocaleMatcher ready for drop-in

Get XLocaleMatcher ready for replacing the LocaleMatcher code.
More simplifications beyond ICU-20330 PR #409, smaller data, some more optimizations.
New API ready to be moved over.

- less work for region partitions distance lookup:
  - encode each array of single-character partition strings as one string
  - look up each desired partition only once, not for each (desired, supported) pair
  - look up the * fallback region distance only for the first mismatch, not for each non-matching pair
- skip region distance lookup if minRegionDistance>=remainingThreshold
- locale distance table: remove subtables that contain only *-* with default script/region distance
- mark intermediate subtag matches via last-character bit 7, not also with a match value
- likely subtags data: prune trailing *-only levels, and skip *-only script levels; likely subtags perf test
- likely subtags: skip_script=1; LSR.indexForRegion(ill-formed)=0 not negative
- likely subtags small optimization: array lookup for first letter of language subtag
- defaultDemotionPerDesiredLocale=distance(en, en-GB)
- favor=script: still reject a script mismatch
- if an explicit default locale is given, prefer that (by LSR), not the first supported locale
- XLocaleMatcher.Builder: copy supported locales into a List not a Set to preserve input indexes; duplicates are harmless
- match by LSR only, not exact locale match; results consistent with no fastpath, simpler, sometimes a little slower
- internal getBestMatch() returns just the suppIndex
- store the best desired locale & index in an LSR iterator
- make an LSR from Locale without ULocale detour
- adjust the XLocaleMatcher API as proposed; remove unused internal methods; clean up LocalePriorityList docs
This commit is contained in:
Markus Scherer 2019-02-09 14:20:56 -08:00
parent a3c5d7e0e4
commit 5182ad7d98
12 changed files with 1519 additions and 1092 deletions

View file

@ -5,7 +5,9 @@ package com.ibm.icu.impl.locale;
import java.util.Objects;
final class LSR {
static final int REGION_INDEX_LIMIT = 1000 + 26 * 26;
static final int REGION_INDEX_LIMIT = 1001 + 26 * 26;
static final boolean DEBUG_OUTPUT = false;
final String language;
final String script;
@ -21,27 +23,27 @@ final class LSR {
}
/**
* Returns a non-negative index for a well-formed region code.
* Returns a positive index (>0) for a well-formed region code.
* Do not rely on a particular region->index mapping; it may change.
* Returns -1 for ill-formed strings.
* Returns 0 for ill-formed strings.
*/
static final int indexForRegion(String region) {
if (region.length() == 2) {
int a = region.charAt(0) - 'A';
if (a < 0 || 25 < a) { return -1; }
if (a < 0 || 25 < a) { return 0; }
int b = region.charAt(1) - 'A';
if (b < 0 || 25 < b) { return -1; }
return 26 * a + b + 1000;
if (b < 0 || 25 < b) { return 0; }
return 26 * a + b + 1001;
} else if (region.length() == 3) {
int a = region.charAt(0) - '0';
if (a < 0 || 9 < a) { return -1; }
if (a < 0 || 9 < a) { return 0; }
int b = region.charAt(1) - '0';
if (b < 0 || 9 < b) { return -1; }
if (b < 0 || 9 < b) { return 0; }
int c = region.charAt(2) - '0';
if (c < 0 || 9 < c) { return -1; }
return (10 * a + b) * 10 + c;
if (c < 0 || 9 < c) { return 0; }
return (10 * a + b) * 10 + c + 1;
}
return -1;
return 0;
}
@Override

View file

@ -26,7 +26,7 @@ import com.ibm.icu.util.ICUException;
* Reads source data from ICU resource bundles.
*/
class LikelySubtagsBuilder {
private static final boolean DEBUG_OUTPUT = false;
private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
private static ICUResourceBundle getSupplementalDataBundle(String name) {
return ICUResourceBundle.getBundleInstance(
@ -84,12 +84,33 @@ class LikelySubtagsBuilder {
private static final class TrieBuilder {
byte[] bytes = new byte[24];
int length = 0;
BytesTrieBuilder tb = new BytesTrieBuilder();
void addMapping(String s, int value) {
// s contains only ASCII characters.
s.getBytes(0, s.length(), bytes, 0);
tb.add(bytes, s.length(), value);
void addValue(int value) {
assert value >= 0;
tb.add(bytes, length, value);
}
void addStar() {
bytes[length++] = '*';
}
void addSubtag(String s) {
assert !s.isEmpty();
assert !s.equals("*");
int end = s.length() - 1;
for (int i = 0;; ++i) {
char c = s.charAt(i);
assert c <= 0x7f;
if (i < end) {
bytes[length++] = (byte) c;
} else {
// Mark the last character as a terminator to avoid overlap matches.
bytes[length++] = (byte) (c | 0x80);
break;
}
}
}
BytesTrie build() {
@ -114,44 +135,70 @@ class LikelySubtagsBuilder {
TrieBuilder trieBuilder = new TrieBuilder();
Map<LSR, Integer> lsrIndexes = new LinkedHashMap<>();
// Bogus LSR at index 0 for some code to easily distinguish between
// intermediate match points and real result values.
LSR bogus = new LSR("", "", "");
lsrIndexes.put(bogus, 0);
// Reserve index 0 as "no value":
// The runtime lookup returns 0 for an intermediate match with no value.
lsrIndexes.put(new LSR("", "", ""), 0); // arbitrary LSR
// Reserve index 1 for SKIP_SCRIPT:
// The runtime lookup returns 1 for an intermediate match with a value.
lsrIndexes.put(new LSR("skip", "script", ""), 1); // looks good when printing the data
// We could prefill the lsrList with common locales to give them small indexes,
// and see if that improves performance a little.
for (Map.Entry<String, Map<String, Map<String, LSR>>> ls : langTable.entrySet()) {
trieBuilder.length = 0;
String lang = ls.getKey();
if (lang.equals("und")) {
lang = "*";
trieBuilder.addStar();
} else {
trieBuilder.addSubtag(lang);
}
// Create a match point for the language.
trieBuilder.addMapping(lang, 0);
Map<String, Map<String, LSR>> scriptTable = ls.getValue();
for (Map.Entry<String, Map<String, LSR>> sr : scriptTable.entrySet()) {
String script = sr.getKey();
if (script.isEmpty()) {
script = "*";
boolean skipScript = false;
if (scriptTable.size() == 1) {
Map<String, LSR> regionTable = scriptTable.get("");
if (regionTable.size() == 1) {
// Prune the script and region levels from language with
// only * for scripts and regions.
int i = uniqueIdForLsr(lsrIndexes, regionTable.get(""));
trieBuilder.addValue(i);
continue;
} else {
// Prune the script level from language with only * for scripts
// but with real regions.
// Set an intermediate value as a signal to the lookup code.
trieBuilder.addValue(XLikelySubtags.SKIP_SCRIPT);
skipScript = true;
}
// Match point for lang+script.
trieBuilder.addMapping(lang + script, 0);
Map<String, LSR> regionTable = sr.getValue();
for (Map.Entry<String, LSR> r2lsr : regionTable.entrySet()) {
String region = r2lsr.getKey();
if (region.isEmpty()) {
region = "*";
}
// Map the whole lang+script+region to a unique, dense index of the LSR.
LSR lsr = r2lsr.getValue();
Integer index = lsrIndexes.get(lsr);
int i;
if (index != null) {
i = index.intValue();
}
int scriptStartLength = trieBuilder.length;
for (Map.Entry<String, Map<String, LSR>> sr : scriptTable.entrySet()) {
trieBuilder.length = scriptStartLength;
if (!skipScript) {
String script = sr.getKey();
if (script.isEmpty()) {
trieBuilder.addStar();
} else {
i = lsrIndexes.size();
lsrIndexes.put(lsr, i);
trieBuilder.addSubtag(script);
}
trieBuilder.addMapping(lang + script + region, i);
}
Map<String, LSR> regionTable = sr.getValue();
if (regionTable.size() == 1) {
// Prune the region level from language+script with only * for regions.
int i = uniqueIdForLsr(lsrIndexes, regionTable.get(""));
trieBuilder.addValue(i);
continue;
}
int regionStartLength = trieBuilder.length;
for (Map.Entry<String, LSR> r2lsr : regionTable.entrySet()) {
trieBuilder.length = regionStartLength;
String region = r2lsr.getKey();
// Map the whole lang+script+region to a unique, dense index of the LSR.
if (region.isEmpty()) {
trieBuilder.addStar();
} else {
trieBuilder.addSubtag(region);
}
int i = uniqueIdForLsr(lsrIndexes, r2lsr.getValue());
trieBuilder.addValue(i);
}
}
}
@ -161,6 +208,17 @@ class LikelySubtagsBuilder {
languageAliasesBuilder.toCanonical, regionAliasesBuilder.toCanonical, trie, lsrs);
}
private static int uniqueIdForLsr(Map<LSR, Integer> lsrIndexes, LSR lsr) {
Integer index = lsrIndexes.get(lsr);
if (index != null) {
return index.intValue();
} else {
int i = lsrIndexes.size();
lsrIndexes.put(lsr, i);
return i;
}
}
private static Map<String, Map<String, Map<String, LSR>>> makeTable(
AliasesBuilder languageAliasesBuilder, AliasesBuilder regionAliasesBuilder) {
Map<String, Map<String, Map<String, LSR>>> result = new TreeMap<>();
@ -176,11 +234,8 @@ class LikelySubtagsBuilder {
final String region = ltp.region;
ltp = lsrFromLocaleID(value.getString()); // target
String languageTarget = ltp.language;
final String scriptTarget = ltp.script;
final String regionTarget = ltp.region;
set(result, language, script, region, ltp);
set(result, language, script, region, languageTarget, scriptTarget, regionTarget);
// now add aliases
Collection<String> languageAliases = languageAliasesBuilder.getAliases(language);
Collection<String> regionAliases = regionAliasesBuilder.getAliases(region);
@ -189,13 +244,12 @@ class LikelySubtagsBuilder {
if (languageAlias.equals(language) && regionAlias.equals(region)) {
continue;
}
set(result, languageAlias, script, regionAlias,
languageTarget, scriptTarget, regionTarget);
set(result, languageAlias, script, regionAlias, ltp);
}
}
}
// hack
set(result, "und", "Latn", "", "en", "Latn", "US");
set(result, "und", "Latn", "", new LSR("en", "Latn", "US"));
// hack, ensure that if und-YY => und-Xxxx-YY, then we add Xxxx=>YY to the table
// <likelySubtag from="und_GH" to="ak_Latn_GH"/>
@ -241,13 +295,6 @@ class LikelySubtagsBuilder {
return p2.length() < 4 ? new LSR(lang, "", p2) : new LSR(lang, p2, p3);
}
private static void set(Map<String, Map<String, Map<String, LSR>>> langTable,
final String language, final String script, final String region,
final String languageTarget, final String scriptTarget, final String regionTarget) {
LSR target = new LSR(languageTarget, scriptTarget, regionTarget);
set(langTable, language, script, region, target);
}
private static void set(Map<String, Map<String, Map<String, LSR>>> langTable,
final String language, final String script, final String region, LSR newValue) {
Map<String, Map<String, LSR>> scriptTable = getSubtable(langTable, language);
@ -255,10 +302,10 @@ class LikelySubtagsBuilder {
regionTable.put(region, newValue);
}
private static <K, V, T> Map<V, T> getSubtable(Map<K, Map<V, T>> table, final K language) {
Map<V, T> subTable = table.get(language);
private static <K, V, T> Map<V, T> getSubtable(Map<K, Map<V, T>> table, final K subtag) {
Map<V, T> subTable = table.get(subtag);
if (subTable == null) {
table.put(language, subTable = new TreeMap<>());
table.put(subtag, subTable = new TreeMap<>());
}
return subTable;
}

View file

@ -2,10 +2,11 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.locale;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
import com.ibm.icu.util.BytesTrie;
import com.ibm.icu.util.ULocale;
@ -14,9 +15,21 @@ import com.ibm.icu.util.ULocale;
* Mostly but not only the data for mapping locales to their maximized forms.
*/
public class LocaleDistance {
/** Distance value bit flag, set by the builder. */
static final int DISTANCE_SKIP_SCRIPT = 0x80;
/** Distance value bit flag, set by trieNext(). */
private static final int DISTANCE_IS_FINAL = 0x100;
private static final int DISTANCE_IS_FINAL_OR_SKIP_SCRIPT =
DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT;
// Indexes into array of distances.
static final int IX_DEF_LANG_DISTANCE = 0;
static final int IX_DEF_SCRIPT_DISTANCE = 1;
static final int IX_DEF_REGION_DISTANCE = 2;
static final int IX_MIN_REGION_DISTANCE = 3;
static final int IX_LIMIT = 4;
private static final int ABOVE_THRESHOLD = 100;
private static final boolean DEBUG_OUTPUT = false;
private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
// The trie maps each dlang+slang+dscript+sscript+dregion+sregion
// (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
@ -28,7 +41,7 @@ public class LocaleDistance {
* Maps each region to zero or more single-character partitions.
*/
private final byte[] regionToPartitionsIndex;
private final String[][] partitionArrays;
private final String[] partitionArrays;
/**
* Used to get the paradigm region for a cluster, if there is one.
@ -38,6 +51,8 @@ public class LocaleDistance {
private final int defaultLanguageDistance;
private final int defaultScriptDistance;
private final int defaultRegionDistance;
private final int minRegionDistance;
private final int defaultDemotionPerDesiredLocale;
// TODO: Load prebuilt data from a resource bundle
// to avoid the dependency on the builder code.
@ -45,42 +60,40 @@ public class LocaleDistance {
public static final LocaleDistance INSTANCE = LocaleDistanceBuilder.build();
LocaleDistance(BytesTrie trie,
byte[] regionToPartitionsIndex, String[][] partitionArrays,
Set<LSR> paradigmLSRs) {
byte[] regionToPartitionsIndex, String[] partitionArrays,
Set<LSR> paradigmLSRs, int[] distances) {
this.trie = trie;
if (DEBUG_OUTPUT) {
System.out.println("*** locale distance");
testOnlyPrintDistanceTable();
}
this.regionToPartitionsIndex = regionToPartitionsIndex;
this.partitionArrays = partitionArrays;
this.paradigmLSRs = paradigmLSRs;
defaultLanguageDistance = distances[IX_DEF_LANG_DISTANCE];
defaultScriptDistance = distances[IX_DEF_SCRIPT_DISTANCE];
defaultRegionDistance = distances[IX_DEF_REGION_DISTANCE];
this.minRegionDistance = distances[IX_MIN_REGION_DISTANCE];
BytesTrie iter = new BytesTrie(trie);
BytesTrie.Result result = iter.next('*');
assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
defaultLanguageDistance = iter.getValue();
result = iter.next('*');
assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
defaultScriptDistance = iter.getValue();
result = iter.next('*');
assert result.hasValue();
defaultRegionDistance = iter.getValue();
LSR en = new LSR("en", "Latn", "US");
LSR enGB = new LSR("en", "Latn", "GB");
defaultDemotionPerDesiredLocale = getBestIndexAndDistance(en, new LSR[] { enGB },
50, FavorSubtag.LANGUAGE) & 0xff;
if (DEBUG_OUTPUT) {
System.out.println("*** locale distance");
System.out.println("defaultLanguageDistance=" + defaultLanguageDistance);
System.out.println("defaultScriptDistance=" + defaultScriptDistance);
System.out.println("defaultRegionDistance=" + defaultRegionDistance);
testOnlyPrintDistanceTable();
}
}
// VisibleForTesting
public int testOnlyDistance(ULocale desired, ULocale supported,
int threshold, DistanceOption distanceOption) {
int threshold, FavorSubtag favorSubtag) {
LSR supportedLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported);
LSR desiredLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired);
return getBestIndexAndDistance(desiredLSR, new LSR[] { supportedLSR },
threshold, distanceOption) & 0xff;
threshold, favorSubtag) & 0xff;
}
public enum DistanceOption {REGION_FIRST, SCRIPT_FIRST}
// NOTE: Replaced "NORMAL" with "REGION_FIRST". By default, scripts have greater weight
// than regions, so they might be considered the "normal" case.
/**
* Finds the supported LSR with the smallest distance from the desired one.
* Equivalent LSR subtags must be normalized into a canonical form.
@ -90,13 +103,12 @@ public class LocaleDistance {
* and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
*/
int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs,
int threshold, DistanceOption distanceOption) {
int threshold, FavorSubtag favorSubtag) {
BytesTrie iter = new BytesTrie(trie);
// Look up the desired language only once for all supported LSRs.
// Its "distance" is either a match point value of 0, or a non-match negative value.
// Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
// Set wantValue=true so that iter reads & skips the match point value.
int desLangDistance = trieNext(iter, desired.language, true, true);
int desLangDistance = trieNext(iter, desired.language, false);
long desLangState = desLangDistance >= 0 && supportedLsrs.length > 1 ? iter.getState64() : 0;
// Index of the supported LSR with the lowest distance.
int bestIndex = -1;
@ -105,26 +117,31 @@ public class LocaleDistance {
boolean star = false;
int distance = desLangDistance;
if (distance >= 0) {
assert (distance & DISTANCE_IS_FINAL) == 0;
if (slIndex != 0) {
iter.resetToState64(desLangState);
}
distance = trieNext(iter, supported.language, true, true);
distance = trieNext(iter, supported.language, true);
}
// Note: The data builder verifies that there are no rules with "any" (*) language and
// real (non *) script or region subtags.
// This means that if the lookup for either language fails we can use
// the default distances without further lookups.
if (distance < 0) { // <*, *>
int flags;
if (distance >= 0) {
flags = distance & DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
distance &= ~DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
} else { // <*, *>
if (desired.language.equals(supported.language)) {
distance = 0;
} else {
distance = defaultLanguageDistance;
}
flags = 0;
star = true;
}
assert 0 <= distance && distance <= 100;
boolean scriptFirst = distanceOption == DistanceOption.SCRIPT_FIRST;
if (scriptFirst) {
if (favorSubtag == FavorSubtag.SCRIPT) {
distance >>= 2;
}
if (distance >= threshold) {
@ -132,18 +149,17 @@ public class LocaleDistance {
}
int scriptDistance;
if (star) {
if (star || flags != 0) {
if (desired.script.equals(supported.script)) {
scriptDistance = 0;
} else {
scriptDistance = defaultScriptDistance;
}
} else {
scriptDistance = getDesSuppDistance(iter, iter.getState64(),
desired.script, supported.script, false);
}
if (scriptFirst) {
scriptDistance >>= 1;
scriptDistance = getDesSuppScriptDistance(iter, iter.getState64(),
desired.script, supported.script);
flags = scriptDistance & DISTANCE_IS_FINAL;
scriptDistance &= ~DISTANCE_IS_FINAL;
}
distance += scriptDistance;
if (distance >= threshold) {
@ -152,27 +168,24 @@ public class LocaleDistance {
if (desired.region.equals(supported.region)) {
// regionDistance = 0
} else if (star) {
} else if (star || (flags & DISTANCE_IS_FINAL) != 0) {
distance += defaultRegionDistance;
} else {
long startState = iter.getState64();
int remainingThreshold = threshold - distance;
if (minRegionDistance >= remainingThreshold) {
continue;
}
// From here on we know the regions are not equal.
// Map each region to zero or more partitions. (zero = one empty string)
// Map each region to zero or more partitions. (zero = one non-matching string)
// (Each array of single-character partition strings is encoded as one string.)
// If either side has more than one, then we find the maximum distance.
// This could be optimized by adding some more structure, but probably not worth it.
final String[] desiredPartitions = partitionsForRegion(desired);
final String[] supportedPartitions = partitionsForRegion(supported);
int regionDistance;
if (desiredPartitions.length > 1 || supportedPartitions.length > 1) {
regionDistance = getRegionPartitionsDistance(iter, startState,
desiredPartitions, supportedPartitions, threshold - distance);
} else {
regionDistance = getDesSuppDistance(iter, startState,
desiredPartitions[0], supportedPartitions[0], true);
}
distance += regionDistance;
distance += getRegionPartitionsDistance(
iter, iter.getState64(),
partitionsForRegion(desired),
partitionsForRegion(supported),
remainingThreshold);
}
if (distance < threshold) {
if (distance == 0) {
@ -185,101 +198,140 @@ public class LocaleDistance {
return bestIndex >= 0 ? (bestIndex << 8) | threshold : 0xffffff00 | ABOVE_THRESHOLD;
}
private int getRegionPartitionsDistance(BytesTrie iter, long startState,
String[] desiredPartitions, String[] supportedPartitions, int threshold) {
int regionDistance = -1;
for (String dp : desiredPartitions) {
for (String sp : supportedPartitions) {
if (regionDistance >= 0) { // no need to reset in first iteration
iter.resetToState64(startState);
}
int d = getDesSuppDistance(iter, startState, dp, sp, true);
if (regionDistance < d) {
if (d >= threshold) {
return d;
}
regionDistance = d;
}
}
}
assert regionDistance >= 0;
return regionDistance;
}
// Modified from
// DistanceTable#getDistance(desired, supported, Output distanceTable, starEquals).
private static final int getDesSuppDistance(BytesTrie iter, long startState,
String desired, String supported, boolean finalSubtag) {
private static final int getDesSuppScriptDistance(BytesTrie iter, long startState,
String desired, String supported) {
// Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
int distance = trieNext(iter, desired, false, true);
int distance = trieNext(iter, desired, false);
if (distance >= 0) {
distance = trieNext(iter, supported, true, !finalSubtag);
distance = trieNext(iter, supported, true);
}
if (distance < 0) {
BytesTrie.Result result = iter.resetToState64(startState).next('*'); // <*, *>
assert finalSubtag ? result.hasValue() : result == BytesTrie.Result.INTERMEDIATE_VALUE;
if (!finalSubtag && desired.equals(supported)) {
distance = 0; // same language or script
assert result.hasValue();
if (desired.equals(supported)) {
distance = 0; // same script
} else {
distance = iter.getValue();
assert distance >= 0;
}
if (result == BytesTrie.Result.FINAL_VALUE) {
distance |= DISTANCE_IS_FINAL;
}
}
return distance;
}
private static final int trieNext(BytesTrie iter, String s, boolean wantValue, boolean wantNext) {
private static final int getRegionPartitionsDistance(BytesTrie iter, long startState,
String desiredPartitions, String supportedPartitions, int threshold) {
int desLength = desiredPartitions.length();
int suppLength = supportedPartitions.length();
if (desLength == 1 && suppLength == 1) {
BytesTrie.Result result = iter.next(desiredPartitions.charAt(0) | 0x80);
if (result.hasNext()) {
result = iter.next(supportedPartitions.charAt(0) | 0x80);
if (result.hasValue()) {
return iter.getValue();
}
}
return getFallbackRegionDistance(iter, startState);
}
int regionDistance = 0;
// Fall back to * only once, not for each pair of partition strings.
boolean star = false;
for (int di = 0;;) {
// Look up each desired-partition string only once,
// not for each (desired, supported) pair.
BytesTrie.Result result = iter.next(desiredPartitions.charAt(di++) | 0x80);
if (result.hasNext()) {
long desState = suppLength > 1 ? iter.getState64() : 0;
for (int si = 0;;) {
result = iter.next(supportedPartitions.charAt(si++) | 0x80);
int d;
if (result.hasValue()) {
d = iter.getValue();
} else if (star) {
d = 0;
} else {
d = getFallbackRegionDistance(iter, startState);
star = true;
}
if (d >= threshold) {
return d;
} else if (regionDistance < d) {
regionDistance = d;
}
if (si < suppLength) {
iter.resetToState64(desState);
} else {
break;
}
}
} else if (!star) {
int d = getFallbackRegionDistance(iter, startState);
if (d >= threshold) {
return d;
} else if (regionDistance < d) {
regionDistance = d;
}
star = true;
}
if (di < desLength) {
iter.resetToState64(startState);
} else {
break;
}
}
return regionDistance;
}
private static final int getFallbackRegionDistance(BytesTrie iter, long startState) {
BytesTrie.Result result = iter.resetToState64(startState).next('*'); // <*, *>
assert result.hasValue();
int distance = iter.getValue();
assert distance >= 0;
return distance;
}
private static final int trieNext(BytesTrie iter, String s, boolean wantValue) {
if (s.isEmpty()) {
return -1; // no empty subtags in the distance data
}
BytesTrie.Result result;
int end = s.length() - 1;
for (int i = 0;; ++i) {
for (int i = 0, end = s.length() - 1;; ++i) {
int c = s.charAt(i);
assert c <= 0x7f;
if (i < end) {
result = iter.next(c);
if (!result.hasNext()) {
if (!iter.next(c).hasNext()) {
return -1;
}
} else {
// last character of this subtag
result = iter.next(c | 0x80);
break;
BytesTrie.Result result = iter.next(c | 0x80);
if (wantValue) {
if (result.hasValue()) {
int value = iter.getValue();
if (result == BytesTrie.Result.FINAL_VALUE) {
value |= DISTANCE_IS_FINAL;
}
return value;
}
} else {
if (result.hasNext()) {
return 0;
}
}
return -1;
}
}
if (wantValue) {
if (wantNext) {
if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
return iter.getValue();
}
} else {
if (result.hasValue()) {
return iter.getValue();
}
}
} else {
if (wantNext) {
if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
return 0;
}
} else {
if (result.hasValue()) {
return 0;
}
}
}
return -1;
}
@Override
public String toString() {
return testOnlyGetDistanceTable(true).toString();
return testOnlyGetDistanceTable().toString();
}
private String[] partitionsForRegion(LSR lsr) {
// ill-formed region -> one empty string
int pIndex = lsr.regionIndex >= 0 ? regionToPartitionsIndex[lsr.regionIndex] : 0;
private String partitionsForRegion(LSR lsr) {
// ill-formed region -> one non-matching string
int pIndex = regionToPartitionsIndex[lsr.regionIndex];
return partitionArrays[pIndex];
}
@ -296,48 +348,50 @@ public class LocaleDistance {
return defaultRegionDistance;
}
int getDefaultDemotionPerDesiredLocale() {
return defaultDemotionPerDesiredLocale;
}
// TODO: When we build data offline,
// write test code to compare the loaded table with the builder output.
// Fail if different, with instructions for how to update the data file.
// VisibleForTesting
public Map<String, Integer> testOnlyGetDistanceTable(boolean skipIntermediateMatchPoints) {
Map<String, Integer> map = new LinkedHashMap<>();
public Map<String, Integer> testOnlyGetDistanceTable() {
Map<String, Integer> map = new TreeMap<>();
StringBuilder sb = new StringBuilder();
for (BytesTrie.Entry entry : trie) {
sb.setLength(0);
int numSubtags = 0;
int length = entry.bytesLength();
for (int i = 0; i < length; ++i) {
byte b = entry.byteAt(i);
if (b == '*') {
// One * represents a (desired, supported) = (ANY, ANY) pair.
sb.append("*-*-");
numSubtags += 2;
} else {
if (b >= 0) {
sb.append((char) b);
} else { // end of subtag
sb.append((char) (b & 0x7f)).append('-');
++numSubtags;
}
}
}
assert sb.length() > 0 && sb.charAt(sb.length() - 1) == '-';
if (!skipIntermediateMatchPoints || (numSubtags & 1) == 0) {
sb.setLength(sb.length() - 1);
String s = sb.toString();
if (!skipIntermediateMatchPoints && s.endsWith("*-*")) {
// Re-insert single-ANY match points to show consistent structure
// for the test code.
map.put(s.substring(0, s.length() - 2), 0);
}
map.put(s, entry.value);
}
sb.setLength(sb.length() - 1);
map.put(sb.toString(), entry.value);
}
return map;
}
// VisibleForTesting
public void testOnlyPrintDistanceTable() {
for (Map.Entry<String, Integer> mapping : testOnlyGetDistanceTable(true).entrySet()) {
System.out.println(mapping);
for (Map.Entry<String, Integer> mapping : testOnlyGetDistanceTable().entrySet()) {
String suffix = "";
int value = mapping.getValue();
if ((value & DISTANCE_SKIP_SCRIPT) != 0) {
value &= ~DISTANCE_SKIP_SCRIPT;
suffix = " skip script";
}
System.out.println(mapping.getKey() + '=' + value + suffix);
}
}
}

View file

@ -29,7 +29,7 @@ import com.ibm.icu.util.ULocale;
public final class LocaleDistanceBuilder {
private static final String ANY = "<EFBFBD>"; // matches any character. Uses value above any subtag.
private static final boolean DEBUG_OUTPUT = false;
private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
private static String fixAny(String string) {
return "*".equals(string) ? ANY : string;
@ -135,7 +135,6 @@ public final class LocaleDistanceBuilder {
void addSubtag(String s, int value) {
assert !s.isEmpty();
assert value >= 0;
assert !s.equals(ANY);
int end = s.length() - 1;
for (int i = 0;; ++i) {
@ -149,7 +148,9 @@ public final class LocaleDistanceBuilder {
break;
}
}
tb.add(bytes, length, value);
if (value >= 0) {
tb.add(bytes, length, value);
}
}
BytesTrie build() {
@ -166,7 +167,7 @@ public final class LocaleDistanceBuilder {
}
private static final class DistanceTable {
final int nodeDistance; // distance for the lookup so far
int nodeDistance; // distance for the lookup so far
final Map<String, Map<String, DistanceTable>> subtables;
DistanceTable(int distance) {
@ -188,7 +189,8 @@ public final class LocaleDistanceBuilder {
return nodeDistance ^ subtables.hashCode();
}
public int getDistance(String desired, String supported, Output<DistanceTable> distanceTable, boolean starEquals) {
private int getDistance(String desired, String supported,
Output<DistanceTable> distanceTable, boolean starEquals) {
boolean star = false;
Map<String, DistanceTable> sub2 = subtables.get(desired);
if (sub2 == null) {
@ -214,6 +216,10 @@ public final class LocaleDistanceBuilder {
return result;
}
private DistanceTable getAnyAnyNode() {
return subtables.get(ANY).get(ANY);
}
void copy(DistanceTable other) {
for (Map.Entry<String, Map<String, DistanceTable>> e1 : other.subtables.entrySet()) {
for (Map.Entry<String, DistanceTable> e2 : e1.getValue().entrySet()) {
@ -330,6 +336,34 @@ public final class LocaleDistanceBuilder {
addSubtables(desiredLang, supportedLang, r);
}
void prune(int level, int[] distances) {
for (Map<String, DistanceTable> suppNodeMap : subtables.values()) {
for (DistanceTable node : suppNodeMap.values()) {
node.prune(level + 1, distances);
}
}
if (subtables.size() == 1) {
DistanceTable next = getAnyAnyNode();
if (level == 1) {
// Remove script table -*-*-50 where there are no other script rules
// and no following region rules.
// If there are region rules, then mark this table for skipping.
if (next.nodeDistance == distances[LocaleDistance.IX_DEF_SCRIPT_DISTANCE]) {
if (next.subtables.isEmpty()) {
subtables.clear();
} else {
nodeDistance |= LocaleDistance.DISTANCE_SKIP_SCRIPT;
}
}
} else if (level == 2) {
// Remove region table -*-*-4 where there are no other region rules.
if (next.nodeDistance == distances[LocaleDistance.IX_DEF_REGION_DISTANCE]) {
subtables.clear();
}
}
}
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder("distance: ").append(nodeDistance).append('\n');
@ -356,6 +390,10 @@ public final class LocaleDistanceBuilder {
}
void toTrie(TrieBuilder builder) {
if (nodeDistance >= 0 && (nodeDistance & LocaleDistance.DISTANCE_SKIP_SCRIPT) != 0) {
getAnyAnyNode().toTrie(builder);
return;
}
int startLength = builder.length;
for (Map.Entry<String, Map<String, DistanceTable>> desSuppNode : subtables.entrySet()) {
String desired = desSuppNode.getKey();
@ -367,7 +405,7 @@ public final class LocaleDistanceBuilder {
builder.addStar(node.nodeDistance);
node.toTrie(builder);
} else {
builder.addSubtag(desired, 0);
builder.addSubtag(desired, -1);
int desiredLength = builder.length;
for (Map.Entry<String, DistanceTable> suppNode : suppNodeMap.entrySet()) {
String supported = suppNode.getKey();
@ -508,6 +546,7 @@ public final class LocaleDistanceBuilder {
final Multimap<String, String> variableToPartition = rmb.variableToPartitions;
final DistanceTable defaultDistanceTable = new DistanceTable(-1);
int minRegionDistance = 100;
for (Rule rule : rules) {
List<String> desired = rule.desired;
List<String> supported = rule.supported;
@ -519,6 +558,9 @@ public final class LocaleDistanceBuilder {
}
} else {
// language-script-region
if (rule.distance < minRegionDistance) {
minRegionDistance = rule.distance;
}
Collection<String> desiredRegions = getIdsFromVariable(variableToPartition, desired.get(2));
Collection<String> supportedRegions = getIdsFromVariable(variableToPartition, supported.get(2));
for (String desiredRegion2 : desiredRegions) {
@ -534,11 +576,25 @@ public final class LocaleDistanceBuilder {
}
}
int[] distances = new int[LocaleDistance.IX_LIMIT];
DistanceTable node = defaultDistanceTable.getAnyAnyNode();
distances[LocaleDistance.IX_DEF_LANG_DISTANCE] = node.nodeDistance;
node = node.getAnyAnyNode();
distances[LocaleDistance.IX_DEF_SCRIPT_DISTANCE] = node.nodeDistance;
node = node.getAnyAnyNode();
distances[LocaleDistance.IX_DEF_REGION_DISTANCE] = node.nodeDistance;
distances[LocaleDistance.IX_MIN_REGION_DISTANCE] = minRegionDistance;
defaultDistanceTable.prune(0, distances);
assert defaultDistanceTable.getAnyAnyNode().subtables.isEmpty();
defaultDistanceTable.subtables.remove(ANY);
TrieBuilder trieBuilder = new TrieBuilder();
defaultDistanceTable.toTrie(trieBuilder);
BytesTrie trie = trieBuilder.build();
return new LocaleDistance(
trie, rmb.regionToPartitionsIndex, rmb.partitionArrays, paradigmLSRs);
trie, rmb.regionToPartitionsIndex, rmb.partitionArrays,
paradigmLSRs, distances);
}
private static int checkStars(String desired, String supported, boolean allStars) {
@ -587,7 +643,7 @@ public final class LocaleDistanceBuilder {
// build() output
Multimap<String, String> variableToPartitions;
private byte[] regionToPartitionsIndex;
private String[][] partitionArrays;
private String[] partitionArrays;
RegionMapperBuilder(TerritoryContainment tc) {
regionSet = new RegionSet(tc);
@ -623,7 +679,7 @@ public final class LocaleDistanceBuilder {
void ensureRegionIsVariable(List<String> lsrList) {
String region = lsrList.get(2);
if (!isKnownVariable(region)) {
assert LSR.indexForRegion(region) >= 0; // well-formed region subtag
assert LSR.indexForRegion(region) > 0; // well-formed region subtag
String variable = "$" + region;
add(variable, region);
lsrList.set(2, variable);
@ -639,7 +695,7 @@ public final class LocaleDistanceBuilder {
// Example: {"1", "5"}
Map<Collection<String>, Integer> partitionStrings = new LinkedHashMap<>();
// pIndex 0: default value in regionToPartitionsIndex
Collection<String> noPartitions = Collections.singleton("");
Collection<String> noPartitions = Collections.singleton(".");
makeUniqueIndex(partitionStrings, noPartitions);
// Example: "$americas" -> {"1", "5"}
@ -697,13 +753,24 @@ public final class LocaleDistanceBuilder {
regionToPartitionsIndex[regionIndex] = (byte) pIndex;
}
}
// LSR.indexForRegion(ill-formed region) returns 0.
// Its regionToPartitionsIndex must also be 0 for the noPartitions value.
assert regionToPartitionsIndex[0] == 0;
// Turn the Collection of Collections into an array of arrays.
// Turn the Collection of Collections of single-character strings
// into an array of strings.
Collection<Collection<String>> list = partitionStrings.keySet();
partitionArrays = new String[list.size()][];
partitionArrays = new String[list.size()];
StringBuilder sb = new StringBuilder();
int i = 0;
for (Collection<String> partitions : list) {
partitionArrays[i++] = partitions.toArray(new String[partitions.size()]);
assert !partitions.isEmpty();
sb.setLength(0);
for (String p : partitions) {
assert p.length() == 1;
sb.append(p);
}
partitionArrays[i++] = sb.toString();
}
}
}

View file

@ -2,10 +2,9 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.locale;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import com.ibm.icu.util.BytesTrie;
import com.ibm.icu.util.ULocale;
@ -15,11 +14,14 @@ public final class XLikelySubtags {
private static final String PSEUDO_BIDI_PREFIX = "+"; // -XB, -PSBIDI
private static final String PSEUDO_CRACKED_PREFIX = ","; // -XC, -PSCRACK
private static final boolean DEBUG_OUTPUT = false;
static final int SKIP_SCRIPT = 1;
private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
// TODO: Load prebuilt data from a resource bundle
// to avoid the dependency on the builder code.
static final XLikelySubtags INSTANCE = new XLikelySubtags(LikelySubtagsBuilder.build());
// VisibleForTesting
public static final XLikelySubtags INSTANCE = new XLikelySubtags(LikelySubtagsBuilder.build());
static final class Data {
private final Map<String, String> languageAliases;
@ -46,6 +48,7 @@ public final class XLikelySubtags {
private final long trieUndState;
private final long trieUndZzzzState;
private final int defaultLsrIndex;
private final long[] trieFirstLetterStates = new long[26];
private final LSR[] lsrs;
private XLikelySubtags(XLikelySubtags.Data data) {
@ -56,20 +59,24 @@ public final class XLikelySubtags {
// Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
BytesTrie.Result result = trie.next('*');
assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
int value = trie.getValue();
assert value == 0;
assert result.hasNext();
trieUndState = trie.getState64();
result = trie.next('*');
assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
value = trie.getValue();
assert value == 0;
assert result.hasNext();
trieUndZzzzState = trie.getState64();
result = trie.next('*');
assert result.hasValue();
defaultLsrIndex = trie.getValue();
trie.reset();
for (char c = 'a'; c <= 'z'; ++c) {
result = trie.next(c);
if (result == BytesTrie.Result.NO_VALUE) {
trieFirstLetterStates[c - 'a'] = trie.getState64();
}
trie.reset();
}
if (DEBUG_OUTPUT) {
System.out.println("*** likely subtags");
for (Map.Entry<String, LSR> mapping : getTable().entrySet()) {
@ -83,19 +90,31 @@ public final class XLikelySubtags {
return canonical == null ? alias : canonical;
}
LSR makeMaximizedLsrFrom(ULocale locale) {
// VisibleForTesting
public LSR makeMaximizedLsrFrom(ULocale locale) {
String name = locale.getName();
if (name.startsWith("@x=")) {
// Private use language tag x-subtag-subtag...
return new LSR(name, "", "");
}
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
locale.getVariant());
}
LSR makeMaximizedLsrFrom(Locale locale) {
String tag = locale.toLanguageTag();
if (tag.startsWith("x-")) {
// Private use language tag x-subtag-subtag...
return new LSR(tag, "", "");
}
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
locale.getVariant());
}
private LSR makeMaximizedLsr(String language, String script, String region, String variant) {
// Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
// They should match only themselves,
// not other locales with what looks like the same language and script subtags.
String language = locale.getLanguage();
String script = locale.getScript();
String region = locale.getCountry();
if (region.length() == 2 && region.charAt(0) == 'X') {
switch (region.charAt(1)) {
case 'A':
@ -112,7 +131,6 @@ public final class XLikelySubtags {
}
}
String variant = locale.getVariant();
if (variant.startsWith("PS")) {
switch (variant) {
case "PSACCENT":
@ -130,7 +148,7 @@ public final class XLikelySubtags {
}
language = getCanonical(languageAliases, language);
// script is ok
// (We have no script mappings.)
region = getCanonical(regionAliases, region);
return INSTANCE.maximize(language, script, region);
}
@ -139,14 +157,31 @@ public final class XLikelySubtags {
* Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
*/
private LSR maximize(String language, String script, String region) {
int retainOldMask = 0;
BytesTrie iter = new BytesTrie(trie);
// language lookup
if (language.equals("und")) {
language = "";
}
if (script.equals("Zzzz")) {
script = "";
}
if (region.equals("ZZ")) {
region = "";
}
if (!script.isEmpty() && !region.isEmpty() && !language.isEmpty()) {
return new LSR(language, script, region); // already maximized
}
int retainOldMask = 0;
BytesTrie iter = new BytesTrie(trie);
long state;
int value = trieNext(iter, language, false);
int value;
// Small optimization: Array lookup for first language letter.
int c0;
if (language.length() >= 2 && 0 <= (c0 = language.charAt(0) - 'a') && c0 <= 25 &&
(state = trieFirstLetterStates[c0]) != 0) {
value = trieNext(iter.resetToState64(state), language, 1);
} else {
value = trieNext(iter, language, 0);
}
if (value >= 0) {
if (!language.isEmpty()) {
retainOldMask |= 4;
@ -157,45 +192,54 @@ public final class XLikelySubtags {
iter.resetToState64(trieUndState); // "und" ("*")
state = 0;
}
// script lookup
if (script.equals("Zzzz")) {
script = "";
}
value = trieNext(iter, script, false);
if (value >= 0) {
if (value > 0) {
// Intermediate or final value from just language.
if (value == SKIP_SCRIPT) {
value = 0;
}
if (!script.isEmpty()) {
retainOldMask |= 2;
}
state = iter.getState64();
} else {
retainOldMask |= 2;
if (state == 0) {
iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
} else {
iter.resetToState64(state);
value = trieNext(iter, "", false);
assert value == 0;
value = trieNext(iter, script, 0);
if (value >= 0) {
if (!script.isEmpty()) {
retainOldMask |= 2;
}
state = iter.getState64();
} else {
retainOldMask |= 2;
if (state == 0) {
iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
} else {
iter.resetToState64(state);
value = trieNext(iter, "", 0);
assert value >= 0;
state = iter.getState64();
}
}
}
// region lookup
if (region.equals("ZZ")) {
region = "";
}
value = trieNext(iter, region, true);
if (value >= 0) {
if (value > 0) {
// Final value from just language or language+script.
if (!region.isEmpty()) {
retainOldMask |= 1;
}
} else {
retainOldMask |= 1;
if (state == 0) {
value = defaultLsrIndex;
value = trieNext(iter, region, 0);
if (value >= 0) {
if (!region.isEmpty()) {
retainOldMask |= 1;
}
} else {
iter.resetToState64(state);
value = trieNext(iter, "", true);
if (value < 0) { // TODO: should never happen?! just assert value >= 0?
return null;
retainOldMask |= 1;
if (state == 0) {
value = defaultLsrIndex;
} else {
iter.resetToState64(state);
value = trieNext(iter, "", 0);
assert value > 0;
}
}
}
@ -220,34 +264,34 @@ public final class XLikelySubtags {
return new LSR(language, script, region);
}
private static final int trieNext(BytesTrie iter, String s, boolean finalSubtag) {
private static final int trieNext(BytesTrie iter, String s, int i) {
BytesTrie.Result result;
if (s.isEmpty()) {
result = iter.next('*');
} else {
int end = s.length() - 1;
for (int i = 0;; ++i) {
result = iter.next(s.charAt(i));
for (;; ++i) {
int c = s.charAt(i);
if (i < end) {
if (!result.hasNext()) {
if (!iter.next(c).hasNext()) {
return -1;
}
} else {
// last character of this subtag
result = iter.next(c | 0x80);
break;
}
}
}
if (!finalSubtag) {
if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
return 0; // value should be 0, don't care
}
} else {
if (result.hasValue()) {
return iter.getValue();
}
switch (result) {
case NO_MATCH: return -1;
case NO_VALUE: return 0;
case INTERMEDIATE_VALUE:
assert iter.getValue() == SKIP_SCRIPT;
return SKIP_SCRIPT;
case FINAL_VALUE: return iter.getValue();
default: return -1;
}
return -1;
}
LSR minimizeSubtags(String languageIn, String scriptIn, String regionIn,
@ -263,11 +307,16 @@ public final class XLikelySubtags {
// value00 = lookup(result.language, "", "")
BytesTrie iter = new BytesTrie(trie);
int value = trieNext(iter, result.language, false);
int value = trieNext(iter, result.language, 0);
assert value >= 0;
value = trieNext(iter, "", false);
assert value >= 0;
value = trieNext(iter, "", true);
if (value == 0) {
value = trieNext(iter, "", 0);
assert value >= 0;
if (value == 0) {
value = trieNext(iter, "", 0);
}
}
assert value > 0;
LSR value00 = lsrs[value];
boolean favorRegionOk = false;
if (result.script.equals(value00.script)) { //script is default
@ -292,26 +341,24 @@ public final class XLikelySubtags {
}
private Map<String, LSR> getTable() {
Map<String, LSR> map = new LinkedHashMap<>();
Set<String> prefixes = new HashSet<>();
Map<String, LSR> map = new TreeMap<>();
StringBuilder sb = new StringBuilder();
for (BytesTrie.Entry entry : trie) {
sb.setLength(0);
int length = entry.bytesLength();
for (int i = 0; i < length;) {
byte b = entry.byteAt(i++);
sb.append((char) b);
if (i < length && prefixes.contains(sb.toString())) {
sb.append('-');
if (b == '*') {
sb.append("*-");
} else if (b >= 0) {
sb.append((char) b);
} else { // end of subtag
sb.append((char) (b & 0x7f)).append('-');
}
}
String s = sb.toString();
if (entry.value == 0) {
// intermediate match point
prefixes.add(s);
} else {
map.put(s, lsrs[entry.value]);
}
assert sb.length() > 0 && sb.charAt(sb.length() - 1) == '-';
sb.setLength(sb.length() - 1);
map.put(sb.toString(), lsrs[entry.value]);
}
return map;
}

View file

@ -25,9 +25,9 @@ import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.Relation;
import com.ibm.icu.impl.Row;
import com.ibm.icu.impl.Row.R3;
import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
import com.ibm.icu.impl.locale.XLocaleMatcher;
import com.ibm.icu.impl.locale.XLocaleMatcher.Builder;
import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
/**
* Provides a way to match the languages (locales) supported by a product to the
@ -864,30 +864,15 @@ public class LocaleMatcher {
transient ULocale xDefaultLanguage = null;
transient boolean xFavorScript = false;
/**
* Returns the distance between the two languages, using the new CLDR syntax (see getBestMatch).
* The values are not necessarily symmetric.
* @param desired A locale desired by the user
* @param supported A locale supported by a program.
* @return A return of 0 is a complete match, and 100 is a complete mismatch (above the thresholdDistance).
* A language is first maximized with add likely subtags, then compared.
* @internal
* @deprecated ICU 59: This API is a technical preview. It may change in an upcoming release.
*/
@Deprecated
public int distance(ULocale desired, ULocale supported) {
return getLocaleMatcher().distance(desired, supported);
}
private synchronized XLocaleMatcher getLocaleMatcher() {
if (xLocaleMatcher == null) {
Builder builder = XLocaleMatcher.builder();
builder.setSupportedLocales(languagePriorityList);
builder.setSupportedULocales(languagePriorityList.getULocales());
if (xDefaultLanguage != null) {
builder.setDefaultLanguage(xDefaultLanguage);
builder.setDefaultULocale(xDefaultLanguage);
}
if (xFavorScript) {
builder.setDistanceOption(DistanceOption.SCRIPT_FIRST);
builder.setFavorSubtag(FavorSubtag.SCRIPT);
}
xLocaleMatcher = builder.build();
}
@ -908,7 +893,13 @@ public class LocaleMatcher {
*/
@Deprecated
public ULocale getBestMatch(LinkedHashSet<ULocale> desiredLanguages, Output<ULocale> outputBestDesired) {
return getLocaleMatcher().getBestMatch(desiredLanguages, outputBestDesired);
if (outputBestDesired == null) {
return getLocaleMatcher().getBestMatch(desiredLanguages);
} else {
XLocaleMatcher.Result result = getLocaleMatcher().getBestMatchResult(desiredLanguages);
outputBestDesired.value = result.getDesiredULocale();
return result.getSupportedULocale();
}
}
/**

View file

@ -22,43 +22,45 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Provides an immutable list of languages (locales) in priority order.
* The string format is based on the Accept-Language format
* Provides an immutable list of languages/locales in priority order.
* The string format is based on the Accept-Language format
* <a href="http://www.ietf.org/rfc/rfc2616.txt">http://www.ietf.org/rfc/rfc2616.txt</a>, such as
* "af, en, fr;q=0.9". Syntactically it is slightly
* more lenient, in allowing extra whitespace between elements, extra commas,
* and more than 3 decimals (on input), and pins between 0 and 1.
*
* <p>In theory, Accept-Language indicates the relative 'quality' of each item,
* but in practice, all of the browsers just take an ordered list, like
* but in practice, all of the browsers just take an ordered list, like
* "en, fr, de", and synthesize arbitrary quality values that put these in the
* right order, like: "en, fr;q=0.7, de;q=0.3". The quality values in these de facto
* semantics thus have <b>nothing</b> to do with the relative qualities of the
* original. Accept-Language also doesn't
* specify the interpretation of multiple instances, eg what "en, fr, en;q=.5"
* means.
* <p>There are various ways to build a LanguagePriorityList, such
* <p>There are various ways to build a LocalePriorityList, such
* as using the following equivalent patterns:
*
*
* <pre>
* list = LanguagePriorityList.add(&quot;af, en, fr;q=0.9&quot;).build();
*
* list2 = LanguagePriorityList
* list = LocalePriorityList.add(&quot;af, en, fr;q=0.9&quot;).build();
*
* list2 = LocalePriorityList
* .add(ULocale.forString(&quot;af&quot;))
* .add(ULocale.ENGLISH)
* .add(ULocale.FRENCH, 0.9d)
* .build();
* </pre>
* When the list is built, the internal values are sorted in descending order by
* weight, and then by input order. That is, if two languages have the same weight, the first one in the original order
* comes first. If exactly the same language tag appears multiple times,
* the last one wins.
*
* There are two options when building. If preserveWeights are on, then "de;q=0.3, ja;q=0.3, en, fr;q=0.7, de " would result in the following:
* When the list is built, the internal values are sorted in descending order by weight,
* and then by input order.
* That is, if two languages/locales have the same weight, the first one in the original order comes first.
* If exactly the same language tag appears multiple times, the last one wins.
*
* <p>There are two options when building.
* If preserveWeights are on, then "de;q=0.3, ja;q=0.3, en, fr;q=0.7, de " would result in the following:
* <pre> en;q=1.0
* de;q=1.0
* fr;q=0.7
* ja;q=0.3</pre>
* If it is off (the default), then all weights are reset to 1.0 after reordering.
* If it is off (the default), then all weights are reset to 1.0 after reordering.
* This is to match the effect of the Accept-Language semantics as used in browsers, and results in the following:
* * <pre> en;q=1.0
* de;q=1.0
@ -73,49 +75,48 @@ public class LocalePriorityList implements Iterable<ULocale> {
private static final Pattern languageSplitter = Pattern.compile("\\s*,\\s*");
private static final Pattern weightSplitter = Pattern
.compile("\\s*(\\S*)\\s*;\\s*q\\s*=\\s*(\\S*)");
.compile("\\s*(\\S*)\\s*;\\s*q\\s*=\\s*(\\S*)");
private final Map<ULocale, Double> languagesAndWeights;
/**
* Add a language code to the list being built, with weight 1.0.
*
* @param languageCode locale/language to be added
* @return internal builder, for chaining
* Creates a Builder and adds locales, each with weight 1.0.
*
* @param locales locales/languages to be added
* @return a new builder with these locales, for chaining
* @stable ICU 4.4
*/
public static Builder add(ULocale... languageCode) {
return new Builder().add(languageCode);
public static Builder add(ULocale... locales) {
return new Builder().add(locales);
}
/**
* Add a language code to the list being built, with specified weight.
*
* @param languageCode locale/language to be added
* Creates a Builder and adds a locale with a specified weight.
*
* @param locale locale/language to be added
* @param weight value from 0.0 to 1.0
* @return internal builder, for chaining
* @return a new builder with this locale, for chaining
* @stable ICU 4.4
*/
public static Builder add(ULocale languageCode, final double weight) {
return new Builder().add(languageCode, weight);
public static Builder add(ULocale locale, final double weight) {
return new Builder().add(locale, weight);
}
/**
* Add a language priority list.
*
* @param languagePriorityList list to add all the members of
* @return internal builder, for chaining
* Creates a Builder and adds locales with weights.
*
* @param list list of locales with weights
* @return a new builder with these locales, for chaining
* @stable ICU 4.4
*/
public static Builder add(LocalePriorityList languagePriorityList) {
return new Builder().add(languagePriorityList);
public static Builder add(LocalePriorityList list) {
return new Builder().add(list);
}
/**
* Add language codes to the list being built, using a string in rfc2616
* (lenient) format, where each language is a valid {@link ULocale}.
*
* @param acceptLanguageString String in rfc2616 format (but leniently parsed)
* @return internal builder, for chaining
* Creates a Builder, parses the RFC 2616 string, and adds locales with weights accordingly.
*
* @param acceptLanguageString String in RFC 2616 format (leniently parsed)
* @return a new builder with these locales, for chaining
* @stable ICU 4.4
*/
public static Builder add(String acceptLanguageString) {
@ -123,15 +124,27 @@ public class LocalePriorityList implements Iterable<ULocale> {
}
/**
* Return the weight for a given language, or null if there is none. Note that
* the weights may be adjusted from those used to build the list.
*
* @param language to get weight of
* Returns the weight for a given language/locale, or null if there is none.
* Note that the weights may be adjusted from those used to build the list.
*
* @param locale to get weight of
* @return weight
* @stable ICU 4.4
*/
public Double getWeight(ULocale language) {
return languagesAndWeights.get(language);
public Double getWeight(ULocale locale) {
return languagesAndWeights.get(locale);
}
/**
* Returns the locales as an immutable Set view.
* The set has the same iteration order as this object itself.
*
* @return the locales
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public Set<ULocale> getULocales() {
return languagesAndWeights.keySet();
}
/**
@ -158,6 +171,7 @@ public class LocalePriorityList implements Iterable<ULocale> {
* {@inheritDoc}
* @stable ICU 4.4
*/
@Override
public Iterator<ULocale> iterator() {
return languagesAndWeights.keySet().iterator();
}
@ -199,7 +213,7 @@ public class LocalePriorityList implements Iterable<ULocale> {
}
/**
* Class used for building LanguagePriorityLists
* Class used for building LocalePriorityLists.
* @stable ICU 4.4
*/
public static class Builder {
@ -207,8 +221,8 @@ public class LocalePriorityList implements Iterable<ULocale> {
* These store the input languages and weights, in chronological order,
* where later additions override previous ones.
*/
private final Map<ULocale, Double> languageToWeight
= new LinkedHashMap<ULocale, Double>();
private final Map<ULocale, Double> languageToWeight
= new LinkedHashMap<>();
/*
* Private constructor, only used by LocalePriorityList
@ -219,7 +233,7 @@ public class LocalePriorityList implements Iterable<ULocale> {
/**
* Creates a LocalePriorityList. This is equivalent to
* {@link Builder#build(boolean) Builder.build(false)}.
*
*
* @return A LocalePriorityList
* @stable ICU 4.4
*/
@ -229,27 +243,26 @@ public class LocalePriorityList implements Iterable<ULocale> {
/**
* Creates a LocalePriorityList.
*
* @param preserveWeights when true, the weights originally came
* from a language priority list specified by add() are preserved.
*
* @param preserveWeights when true, each locale's given weight is preserved.
* @return A LocalePriorityList
* @stable ICU 4.4
*/
public LocalePriorityList build(boolean preserveWeights) {
// Walk through the input list, collecting the items with the same weights.
final Map<Double, Set<ULocale>> doubleCheck = new TreeMap<Double, Set<ULocale>>(
final Map<Double, Set<ULocale>> doubleCheck = new TreeMap<>(
myDescendingDouble);
for (final ULocale lang : languageToWeight.keySet()) {
Double weight = languageToWeight.get(lang);
Set<ULocale> s = doubleCheck.get(weight);
if (s == null) {
doubleCheck.put(weight, s = new LinkedHashSet<ULocale>());
doubleCheck.put(weight, s = new LinkedHashSet<>());
}
s.add(lang);
}
// We now have a bunch of items sorted by weight, then chronologically.
// We can now create a list in the right order
final Map<ULocale, Double> temp = new LinkedHashMap<ULocale, Double>();
final Map<ULocale, Double> temp = new LinkedHashMap<>();
for (Entry<Double, Set<ULocale>> langEntry : doubleCheck.entrySet()) {
final Double weight = langEntry.getKey();
for (final ULocale lang : langEntry.getValue()) {
@ -260,73 +273,72 @@ public class LocalePriorityList implements Iterable<ULocale> {
}
/**
* Adds a LocalePriorityList
*
* @param languagePriorityList a LocalePriorityList
* Adds locales with weights.
*
* @param list list of locales with weights
* @return this, for chaining
* @stable ICU 4.4
*/
public Builder add(
final LocalePriorityList languagePriorityList) {
for (final ULocale language : languagePriorityList.languagesAndWeights
public Builder add(final LocalePriorityList list) {
for (final ULocale language : list.languagesAndWeights
.keySet()) {
add(language, languagePriorityList.languagesAndWeights.get(language));
add(language, list.languagesAndWeights.get(language));
}
return this;
}
/**
* Adds a new language code, with weight = 1.0.
*
* @param languageCode to add with weight 1.0
* Adds a locale with weight 1.0.
*
* @param locale to add with weight 1.0
* @return this, for chaining
* @stable ICU 4.4
*/
public Builder add(final ULocale languageCode) {
return add(languageCode, D1);
public Builder add(final ULocale locale) {
return add(locale, D1);
}
/**
* Adds language codes, with each having weight = 1.0.
*
* @param languageCodes List of language codes.
* Adds locales, each with weight 1.0.
*
* @param locales locales/languages to be added
* @return this, for chaining.
* @stable ICU 4.4
*/
public Builder add(ULocale... languageCodes) {
for (final ULocale languageCode : languageCodes) {
public Builder add(ULocale... locales) {
for (final ULocale languageCode : locales) {
add(languageCode, D1);
}
return this;
}
/**
* Adds a new supported languageCode, with specified weight. Overrides any
* previous weight for the language.
*
* @param languageCode language/locale to add
* Adds a locale with a specified weight.
* Overrides any previous weight for the locale.
* Removes a locale if the weight is zero.
*
* @param locale language/locale to add
* @param weight value between 0.0 and 1.1
* @return this, for chaining.
* @stable ICU 4.4
*/
public Builder add(final ULocale languageCode,
double weight) {
if (languageToWeight.containsKey(languageCode)) {
languageToWeight.remove(languageCode);
public Builder add(final ULocale locale, double weight) {
if (languageToWeight.containsKey(locale)) {
languageToWeight.remove(locale);
}
if (weight <= D0) {
return this; // skip zeros
} else if (weight > D1) {
weight = D1;
}
languageToWeight.put(languageCode, weight);
languageToWeight.put(locale, weight);
return this;
}
/**
* Adds rfc2616 list.
*
* @param acceptLanguageList in rfc2616 format
* Parses the RFC 2616 string, and adds locales with weights accordingly.
*
* @param acceptLanguageList in RFC 2616 format (leniently parsed)
* @return this, for chaining.
* @stable ICU 4.4
*/
@ -351,6 +363,7 @@ public class LocalePriorityList implements Iterable<ULocale> {
}
private static Comparator<Double> myDescendingDouble = new Comparator<Double>() {
@Override
public int compare(Double o1, Double o2) {
int result = o1.compareTo(o2);
return result > 0 ? -1 : result < 0 ? 1 : 0; // Reverse the order.

View file

@ -451,7 +451,7 @@ public class LocaleMatcherTest extends TestFmwk {
@Test
public void testExactMatches() {
String lastBase = "";
TreeSet<ULocale> sorted = new TreeSet<ULocale>();
TreeSet<ULocale> sorted = new TreeSet<>();
for (ULocale loc : ULocale.getAvailableLocales()) {
String language = loc.getLanguage();
if (!lastBase.equals(language)) {
@ -650,10 +650,7 @@ public class LocaleMatcherTest extends TestFmwk {
ULocale bulgarian = new ULocale("bg");
ULocale russian = new ULocale("ru");
assertEquals("es-419/MX", 4, matcher.distance(new ULocale("es","419"), new ULocale("es","MX")));
assertEquals("es-ES/DE", 4, matcher.distance(new ULocale("es","DE"), new ULocale("es","ES")));
Output<ULocale> outputBestDesired = new Output<ULocale>();
Output<ULocale> outputBestDesired = new Output<>();
ULocale best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, ULocale.GERMAN)), outputBestDesired);
assertEquals(ULocale.ITALIAN, best);

View file

@ -4,9 +4,7 @@ package com.ibm.icu.dev.test.util;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.junit.Ignore;
import org.junit.Test;
@ -15,7 +13,7 @@ import org.junit.runners.JUnit4;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.impl.locale.LocaleDistance;
import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
import com.ibm.icu.util.LocaleMatcher;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;
@ -94,8 +92,8 @@ public class XLocaleDistanceTest extends TestFmwk {
newLikelyTime += System.nanoTime()-temp;
temp = System.nanoTime();
int dist1 = localeDistance.testOnlyDistance(desired, supported, 1000, DistanceOption.REGION_FIRST);
int dist2 = localeDistance.testOnlyDistance(supported, desired, 1000, DistanceOption.REGION_FIRST);
int dist1 = localeDistance.testOnlyDistance(desired, supported, 1000, FavorSubtag.LANGUAGE);
int dist2 = localeDistance.testOnlyDistance(supported, desired, 1000, FavorSubtag.LANGUAGE);
newTimeMinusLikely += System.nanoTime()-temp;
}
}
@ -112,50 +110,6 @@ public class XLocaleDistanceTest extends TestFmwk {
//logln("totalInt:\t" + (intTime)/maxIterations);
}
@Test
public void testInternalTable() {
Set<String> strings = localeDistance.testOnlyGetDistanceTable(false).keySet();
// Check that the table has a depth of exactly 3 (desired, supported) pairs everyplace
// by removing every prefix of a 6-subtag string from a copy of the set of strings.
// Any remaining string is not a prefix of a full-depth string.
Set<String> remaining = new HashSet<>(strings);
// Check that ANY, ANY is always present.
assertTrue("*-*", strings.contains("*-*"));
for (String s : strings) {
int num = countSubtags(s);
assertTrue(s, 1 <= num && num <= 6);
if (num > 1) {
String oneShorter = removeLastSubtag(s);
assertTrue(oneShorter, strings.contains(oneShorter));
}
if (num == 2 || num == 4) {
String sPlusAnyAny = s + "-*-*";
assertTrue(sPlusAnyAny, strings.contains(sPlusAnyAny));
} else if (num == 6) {
for (;; --num) {
remaining.remove(s);
if (num == 1) { break; }
s = removeLastSubtag(s);
}
}
}
assertTrue("strings that do not lead to 6-subtag matches", remaining.isEmpty());
}
private static final int countSubtags(String s) {
if (s.isEmpty()) { return 0; }
int num = 1;
for (int pos = 0; (pos = s.indexOf('-', pos)) >= 0; ++pos) {
++num;
}
return num;
}
private static final String removeLastSubtag(String s) {
int last = s.lastIndexOf('-');
return s.substring(0, last);
}
@Test
public void testShowDistanceTable() {
if (isVerbose()) {
@ -173,7 +127,7 @@ public class XLocaleDistanceTest extends TestFmwk {
class MyTestFileHandler extends DataDrivenTestHelper {
Output<ULocale> bestDesired = new Output<>();
private DistanceOption distanceOption = DistanceOption.REGION_FIRST;
private FavorSubtag favorSubtag = FavorSubtag.LANGUAGE;
private Integer threshold = localeDistance.getDefaultScriptDistance();
@Override
@ -182,20 +136,21 @@ public class XLocaleDistanceTest extends TestFmwk {
breakpoint = false; // put debugger breakpoint here to break at @debug in test file
}
Arguments args = new Arguments(arguments);
int supportedToDesiredActual = localeDistance.testOnlyDistance(args.supported, args.desired, threshold, distanceOption);
int desiredToSupportedActual = localeDistance.testOnlyDistance(args.desired, args.supported, threshold, distanceOption);
String desiredTag = args.desired.toLanguageTag();
String supportedTag = args.supported.toLanguageTag();
final String comment = commentBase.isEmpty() ? "" : "\t# " + commentBase;
if (assertEquals("(" + lineNumber + ") " + desiredTag + " to " + supportedTag + comment, args.desiredToSupported, desiredToSupportedActual)) {
assertEquals("(" + lineNumber + ") " + supportedTag + " to " + desiredTag + comment, args.supportedToDesired, supportedToDesiredActual);
}
int supportedToDesiredActual = localeDistance.testOnlyDistance(args.supported, args.desired, threshold, favorSubtag);
assertEquals("(" + lineNumber + ") " + supportedTag + " to " + desiredTag + comment,
args.supportedToDesired, supportedToDesiredActual);
int desiredToSupportedActual = localeDistance.testOnlyDistance(args.desired, args.supported, threshold, favorSubtag);
assertEquals("(" + lineNumber + ") " + desiredTag + " to " + supportedTag + comment,
args.desiredToSupported, desiredToSupportedActual);
}
@Override
public void handleParams(String comment, List<String> arguments) {
String switchArg = arguments.get(0);
if (switchArg.equals("@DistanceOption")) {
distanceOption = DistanceOption.valueOf(arguments.get(1));
if (switchArg.equals("@FavorSubtag")) {
favorSubtag = FavorSubtag.valueOf(arguments.get(1));
} else if (switchArg.equals("@Threshold")) {
threshold = Integer.valueOf(arguments.get(1));
} else {

View file

@ -7,7 +7,6 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
@ -16,12 +15,12 @@ import org.junit.runner.RunWith;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.impl.locale.LocaleDistance;
import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
import com.ibm.icu.impl.locale.XCldrStub.FileUtilities;
import com.ibm.icu.impl.locale.XLikelySubtags;
import com.ibm.icu.impl.locale.XLocaleMatcher;
import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
import com.ibm.icu.util.LocaleMatcher;
import com.ibm.icu.util.LocalePriorityList;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;
import junitparams.JUnitParamsRunner;
@ -51,8 +50,9 @@ public class XLocaleMatcherTest extends TestFmwk {
}
@SuppressWarnings("unused")
private XLocaleMatcher newXLocaleMatcher(LocalePriorityList string, int d) {
return XLocaleMatcher.builder().setSupportedLocales(string).setThresholdDistance(d).build();
private XLocaleMatcher newXLocaleMatcher(LocalePriorityList list, int d) {
return XLocaleMatcher.builder().setSupportedULocales(list.getULocales()).
internalSetThresholdDistance(d).build();
}
// public void testParentLocales() {
@ -104,10 +104,6 @@ public class XLocaleMatcherTest extends TestFmwk {
// }
private void assertEquals(Object expected, Object string) {
assertEquals("", expected, string);
}
/**
* If all the base languages are the same, then each sublocale matches
* itself most closely
@ -139,40 +135,41 @@ public class XLocaleMatcherTest extends TestFmwk {
check2(sorted);
}
private static final ULocale posix = new ULocale("en_US_POSIX");
/**
* @param sorted
*/
private void check2(Set<ULocale> sorted) {
// TODO Auto-generated method stub
logln("Checking: " + sorted);
XLocaleMatcher matcher = newXLocaleMatcher(
LocalePriorityList.add(
sorted.toArray(new ULocale[sorted.size()]))
.build());
for (ULocale loc : sorted) {
String stringLoc = loc.toString();
assertEquals(stringLoc, matcher.getBestMatch(stringLoc).toString());
// The result may not be the exact same locale, but it must be equivalent.
// Variants and extensions are ignored.
if (loc.equals(posix)) { continue; }
ULocale max = ULocale.addLikelySubtags(loc);
ULocale best = matcher.getBestMatch(loc);
ULocale maxBest = ULocale.addLikelySubtags(best);
assertEquals(loc.toString(), max, maxBest);
}
}
@Test
public void testComputeDistance_monkeyTest() {
String[] codes = ULocale.getISOCountries();
Random random = new Random();
XLocaleMatcher lm = newXLocaleMatcher();
for (int i = 0; i < 1000; ++i) {
String x = codes[random.nextInt(codes.length)];
String y = codes[random.nextInt(codes.length)];
double d = lm.distance(ULocale.forLanguageTag("xx-Xxxx-"+x), ULocale.forLanguageTag("xx-Xxxx-"+y));
if (x.equals("ZZ") || y.equals("ZZ")) {
assertEquals("dist(regionDistance," + x + ") = 0", REGION_DISTANCE, d);
} else if (x.equals(y)) {
assertEquals("dist(x,x) = 0", 0.0, d);
} else {
assertTrue("dist(" + x + "," + y + ") > 0", d > 0);
assertTrue("dist(" + x + "," + y + ") ≤ " + REGION_DISTANCE, d <= REGION_DISTANCE);
}
}
public void testDemotion() {
LocalePriorityList supported = LocalePriorityList.add("fr, de-CH, it").build();
LocalePriorityList desired = LocalePriorityList.add("fr-CH, de-CH, it").build();
XLocaleMatcher noDemotion = XLocaleMatcher.builder().
setSupportedULocales(supported.getULocales()).
setDemotionPerDesiredLocale(XLocaleMatcher.Demotion.NONE).build();
assertEquals("no demotion", new ULocale("de-CH"), noDemotion.getBestMatch(desired));
XLocaleMatcher regionDemotion = XLocaleMatcher.builder().
setSupportedULocales(supported.getULocales()).
setDemotionPerDesiredLocale(XLocaleMatcher.Demotion.REGION).build();
assertEquals("region demotion", ULocale.FRENCH, regionDemotion.getBestMatch(desired));
}
private static final class PerfCase {
@ -304,9 +301,9 @@ public class XLocaleMatcherTest extends TestFmwk {
for (PerfCase pc : pcs) {
final ULocale desired = pc.desired;
assertEquals(pc.expectedShort, matcherShort.getBestMatch(desired));
assertEquals(pc.expectedLong, matcherLong.getBestMatch(desired));
assertEquals(pc.expectedVeryLong, matcherVeryLong.getBestMatch(desired));
assertEquals(desired.toString(), pc.expectedShort, matcherShort.getBestMatch(desired));
assertEquals(desired.toString(), pc.expectedLong, matcherLong.getBestMatch(desired));
assertEquals(desired.toString(), pc.expectedVeryLong, matcherVeryLong.getBestMatch(desired));
timeXLocaleMatcher(desired, matcherShort, WARM_UP_ITERATIONS);
timeXLocaleMatcher(desired, matcherLong, WARM_UP_ITERATIONS);
@ -350,9 +347,11 @@ public class XLocaleMatcherTest extends TestFmwk {
String.format("timeLongNew=%d < %d%% of timeLongOld=%d",
timeLongNew, AVG_PCT_LONG_NEW_OLD, timeLongOld),
timeLongNew * 100 < timeLongOld * AVG_PCT_LONG_NEW_OLD);
maximizePerf();
}
private long timeXLocaleMatcher(ULocale desired, XLocaleMatcher matcher, int iterations) {
private static long timeXLocaleMatcher(ULocale desired, XLocaleMatcher matcher, int iterations) {
long start = System.nanoTime();
for (int i = iterations; i > 0; --i) {
matcher.getBestMatch(desired);
@ -361,7 +360,7 @@ public class XLocaleMatcherTest extends TestFmwk {
return (delta / iterations);
}
private long timeLocaleMatcher(ULocale desired, LocaleMatcher matcher, int iterations) {
private static long timeLocaleMatcher(ULocale desired, LocaleMatcher matcher, int iterations) {
long start = System.nanoTime();
for (int i = iterations; i > 0; --i) {
matcher.getBestMatch(desired);
@ -370,6 +369,37 @@ public class XLocaleMatcherTest extends TestFmwk {
return (delta / iterations);
}
private void maximizePerf() {
final String tags = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
"el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
"hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
"mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
"si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
"zh-CN, zh-TW, zu";
LocalePriorityList list = LocalePriorityList.add(tags).build();
int few = 1000;
long t = timeMaximize(list, few); // warm up
t = timeMaximize(list, few); // measure for scale
long targetTime = 100000000L; // 10^8 ns = 0.1s
int iterations = (int)((targetTime * few) / t);
t = timeMaximize(list, iterations);
int length = 0;
for (@SuppressWarnings("unused") ULocale locale : list) { ++length; }
System.out.println("maximize: " + (t / iterations / length) + " ns/locale: " +
t + " ns / " + iterations + " iterations / " + length + " locales");
}
// returns total ns not per iteration
private static long timeMaximize(Iterable<ULocale> list, int iterations) {
long start = System.nanoTime();
for (int i = iterations; i > 0; --i) {
for (ULocale locale : list) {
XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
}
}
return System.nanoTime() - start;
}
private static final class TestCase implements Cloneable {
private static final String ENDL = System.getProperties().getProperty("line.separator");
@ -384,7 +414,7 @@ public class XLocaleMatcherTest extends TestFmwk {
String supported = "";
String def = "";
String distance = "";
String favor = "";
String threshold = "";
String desired = "";
String expMatch = "";
@ -405,12 +435,12 @@ public class XLocaleMatcherTest extends TestFmwk {
supported = "";
def = "";
distance = "";
favor = "";
threshold = "";
}
String toInputsKey() {
return supported + '+' + def + '+' + distance + '+' + threshold + '+' + desired;
return supported + '+' + def + '+' + favor + '+' + threshold + '+' + desired;
}
private static void appendLine(StringBuilder sb, String line) {
@ -471,9 +501,9 @@ public class XLocaleMatcherTest extends TestFmwk {
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@default=")) != null) {
test.defaultLine = line;
test.def = suffix;
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@distance=")) != null) {
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@favor=")) != null) {
test.distanceLine = line;
test.distance = suffix;
test.favor = suffix;
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@threshold=")) != null) {
test.thresholdLine = line;
test.threshold = suffix;
@ -531,31 +561,31 @@ public class XLocaleMatcherTest extends TestFmwk {
@Parameters(method = "readTestCases")
public void dataDriven(TestCase test) {
XLocaleMatcher matcher;
if (test.def.isEmpty() && test.distance.isEmpty() && test.threshold.isEmpty()) {
if (test.def.isEmpty() && test.favor.isEmpty() && test.threshold.isEmpty()) {
matcher = new XLocaleMatcher(test.supported);
} else {
XLocaleMatcher.Builder builder = XLocaleMatcher.builder();
builder.setSupportedLocales(test.supported);
if (!test.def.isEmpty()) {
builder.setDefaultLanguage(new ULocale(test.def));
builder.setDefaultULocale(new ULocale(test.def));
}
if (!test.distance.isEmpty()) {
DistanceOption distance;
switch (test.distance) {
if (!test.favor.isEmpty()) {
FavorSubtag favor;
switch (test.favor) {
case "normal":
distance = DistanceOption.REGION_FIRST;
favor = FavorSubtag.LANGUAGE;
break;
case "script":
distance = DistanceOption.SCRIPT_FIRST;
favor = FavorSubtag.SCRIPT;
break;
default:
throw new IllegalArgumentException("unsupported distance value " + test.distance);
throw new IllegalArgumentException("unsupported FavorSubtag value " + test.favor);
}
builder.setDistanceOption(distance);
builder.setFavorSubtag(favor);
}
if (!test.threshold.isEmpty()) {
int threshold = Integer.valueOf(test.threshold);
builder.setThresholdDistance(threshold);
builder.internalSetThresholdDistance(threshold);
}
matcher = builder.build();
}
@ -566,16 +596,15 @@ public class XLocaleMatcherTest extends TestFmwk {
assertEquals("bestSupported", expMatch, bestSupported);
} else {
LocalePriorityList desired = LocalePriorityList.add(test.desired).build();
Output<ULocale> bestDesired = new Output<>();
ULocale bestSupported = matcher.getBestMatch(desired, bestDesired);
assertEquals("bestSupported", expMatch, bestSupported);
XLocaleMatcher.Result result = matcher.getBestMatchResult(desired);
assertEquals("bestSupported", expMatch, result.getSupportedULocale());
if (!test.expDesired.isEmpty()) {
ULocale expDesired = getULocaleOrNull(test.expDesired);
assertEquals("bestDesired", expDesired, bestDesired.value);
assertEquals("bestDesired", expDesired, result.getDesiredULocale());
}
if (!test.expCombined.isEmpty()) {
ULocale expCombined = getULocaleOrNull(test.expCombined);
ULocale combined = XLocaleMatcher.combine(bestSupported, bestDesired.value);
ULocale combined = result.makeServiceULocale();
assertEquals("combined", expCombined, combined);
}
}

View file

@ -10,7 +10,7 @@
# Lines starting with an '@' sign provide matcher parameters.
# @supported=<comma-separated supported languages>
# @default=<default language> # no value = no explicit default
# @distance=[normal|script] # no value = no explicit setting
# @favor=[normal|script] # no value = no explicit setting
# @threshold=<number 0..100> # no value = no explicit setting
#
# A line with ">>" is a getBestMatch() test case:
@ -93,7 +93,7 @@ zh-HK >> zh-MO
@supported=zh, zh-MO
zh-HK >> zh-MO
@distance=script
@favor=script
@supported=es-419, es-ES
es-AR >> es-419
@supported=es-ES, es-419
@ -153,7 +153,7 @@ zh-Hans-CN >> zh-CN
zh-CN >> zh-CN
zh >> zh-CN
@distance=script
@favor=script
zh-Hant-TW >> zh-TW
zh-Hant >> zh-TW
zh-TW >> zh-TW
@ -169,7 +169,7 @@ es-ES >> es
es-AR >> es-419
es-MX >> es-MX
@distance=script
@favor=script
en-NZ >> en-GB
es-ES >> es
es-AR >> es-419
@ -180,7 +180,7 @@ es-MX >> es-MX
@supported=91, en, hi
sa >> hi
@distance=script
@favor=script
sa >> hi
** test: testBasics
@ -191,7 +191,7 @@ en >> en
fr >> fr
ja >> fr # return first if no match
@distance=script
@favor=script
en-GB >> en-GB
en >> en
fr >> fr
@ -208,7 +208,7 @@ zh-Hans-CN >> zh-CN
zh-Hant-HK >> zh-TW
he-IT >> iw
@distance=script
@favor=script
zh-Hant >> zh-TW
zh >> zh-CN
zh-Hans-CN >> zh-CN
@ -228,7 +228,7 @@ nb >> nn
ja >> en
@distance=script
@favor=script
tl >> fil
mo >> ro
nb >> nn
@ -243,7 +243,7 @@ es-MX >> es-419
en-AU >> en-GB
es-ES >> es
@distance=script
@favor=script
es-MX >> es-419
en-AU >> en-GB
es-ES >> es
@ -257,7 +257,7 @@ zh-HK >> zh-MO
@supported=zh, zh-TW, zh-HK
zh-MO >> zh-HK
@distance=script
@favor=script
@supported=zh, zh-TW, zh-MO
zh-HK >> zh-MO
@supported=zh, zh-TW, zh-HK
@ -272,7 +272,7 @@ und-TW >> zh-Hant # und-TW should be closer to zh-Hant than to zh
zh-Hant >> und-TW # zh-Hant should be closer to und-TW than to en-Hant-TW
zh >> und-TW # zh should be closer to und-TW than to en-Hant-TW
@distance=script
@favor=script
@supported=zh, zh-Hant
und-TW >> zh-Hant
@supported=en-Hant-TW, und-TW
@ -284,14 +284,14 @@ zh >> und-TW
@supported=fr, i-klingon, en-Latn-US
en-GB-oed >> en-Latn-US
@distance=script
@favor=script
en-GB-oed >> en-Latn-US
** test: testGetBestMatchForList-exactMatch
@supported=fr, en-GB, ja, es-ES, es-MX
ja, de >> ja
@distance=script
@favor=script
ja, de >> ja
** test: testGetBestMatchForList-simpleVariantMatch
@ -302,7 +302,7 @@ de, en-US >> en-GB # Intentionally avoiding a perfect-match or two candidates fo
de, zh >> fr
@distance=script
@favor=script
de, en-US >> en-GB
de, zh >> fr
@ -320,7 +320,7 @@ ja-JP, en-US >> ja # Match for ja-Jpan-JP (maximized already)
ja-Jpan-JP, en-US >> ja # Match for ja-Jpan-JP (maximized already)
@distance=script
@favor=script
ja-Jpan-JP, en-AU >> ja
ja-JP, en-US >> ja
ja-Jpan-JP, en-US >> ja
@ -331,7 +331,7 @@ ja-Jpan-JP, en-US >> ja
@supported=en, de, fr, ja
de-CH, fr >> de
@distance=script
@favor=script
de-CH, fr >> de
** test: testBestMatchForTraditionalChinese
@ -357,7 +357,7 @@ zh-TW, en >> en-US
zh-Hant-CN, en >> en-US
zh-Hans, en >> zh-Hans-CN
@distance=script
@favor=script
zh-TW >> zh-Hans-CN
zh-Hant >> zh-Hans-CN
zh-TW, en >> en-US
@ -389,7 +389,7 @@ und >> it
@supported=it, und
en >> it
@distance=script
@favor=script
@supported=it, fr
und >> it
@supported=it, und
@ -408,7 +408,7 @@ en-CA >> en-GB
@supported=de-AT, de-DE, de-CH
de >> de-DE
@distance=script
@favor=script
@supported=es-AR, es
es-MX >> es-AR
@supported=fr, en, en-GB
@ -423,7 +423,7 @@ af >> nl # af => nl
@supported=mul, af
nl >> mul # but nl !=> af
@distance=script
@favor=script
@supported=mul, nl
af >> nl
@supported=mul, af
@ -440,7 +440,7 @@ ja-JP, en-GB >> ja # Match for ja-JP, with likely region subtag
ja-Jpan-JP, en-GB >> ja # Match for ja-Jpan-JP (maximized already)
@distance=script
@favor=script
ja-JP, en-GB >> ja
ja-Jpan-JP, en-GB >> ja
@ -450,7 +450,7 @@ ja-Jpan-JP, en-GB >> ja
de-CH, fr >> de
en-US, ar, nl, de, ja >> en
@distance=script
@favor=script
de-CH, fr >> de
en-US, ar, nl, de, ja >> en
@ -487,7 +487,7 @@ pt-US, pt-PT >> pt-BR
@supported=pt-PT, pt, es, es-419
pt-US, pt-PT, pt >> pt # pt-BR implicit
@distance=script
@favor=script
@supported=pt-PT, pt-BR, es, es-419
pt-PT, es, pt >> pt-PT
@supported=pt-PT, pt, es, es-419
@ -515,7 +515,7 @@ en-GB >> en
@supported=en, sv
en-GB, sv >> en
@distance=script
@favor=script
@supported=fr, en, sv
en-GB >> en
@supported=en, sv
@ -532,7 +532,7 @@ sv >> sv
@supported=af, af-NA, af-ZA, agq, agq-CM, ak, ak-GH, am, am-ET, ar, ar-001, ar-AE, ar-BH, ar-DJ, ar-DZ, ar-EG, ar-EH, ar-ER, ar-IL, ar-IQ, ar-JO, ar-KM, ar-KW, ar-LB, ar-LY, ar-MA, ar-MR, ar-OM, ar-PS, ar-QA, ar-SA, ar-SD, ar-SO, ar-SS, ar-SY, ar-TD, ar-TN, ar-YE, as, as-IN, asa, asa-TZ, ast, ast-ES, az, az-Cyrl, az-Cyrl-AZ, az-Latn, az-Latn-AZ, bas, bas-CM, be, be-BY, bem, bem-ZM, bez, bez-TZ, bg, bg-BG, bm, bm-ML, bn, bn-BD, bn-IN, bo, bo-CN, bo-IN, br, br-FR, brx, brx-IN, bs, bs-Cyrl, bs-Cyrl-BA, bs-Latn, bs-Latn-BA, ca, ca-AD, ca-ES, ca-ES-VALENCIA, ca-FR, ca-IT, ce, ce-RU, cgg, cgg-UG, chr, chr-US, ckb, ckb-IQ, ckb-IR, cs, cs-CZ, cu, cu-RU, cy, cy-GB, da, da-DK, da-GL, dav, dav-KE, de, de-AT, de-BE, de-CH, de-DE, de-LI, de-LU, dje, dje-NE, dsb, dsb-DE, dua, dua-CM, dyo, dyo-SN, dz, dz-BT, ebu, ebu-KE, ee, ee-GH, ee-TG, el, el-CY, el-GR, en, en-001, en-150, en-AG, en-AI, en-AS, en-AT, en-AU, en-BB, en-BE, en-BI, en-BM, en-BS, en-BW, en-BZ, en-CA, en-CC, en-CH, en-CK, en-CM, en-CX, en-CY, en-DE, en-DG, en-DK, en-DM, en-ER, en-FI, en-FJ, en-FK, en-FM, en-GB, en-GD, en-GG, en-GH, en-GI, en-GM, en-GU, en-GY, en-HK, en-IE, en-IL, en-IM, en-IN, en-IO, en-JE, en-JM, en-KE, en-KI, en-KN, en-KY, en-LC, en-LR, en-LS, en-MG, en-MH, en-MO, en-MP, en-MS, en-MT, en-MU, en-MW, en-MY, en-NA, en-NF, en-NG, en-NL, en-NR, en-NU, en-NZ, en-PG, en-PH, en-PK, en-PN, en-PR, en-PW, en-RW, en-SB, en-SC, en-SD, en-SE, en-SG, en-SH, en-SI, en-SL, en-SS, en-SX, en-SZ, en-TC, en-TK, en-TO, en-TT, en-TV, en-TZ, en-UG, en-UM, en-US, en-US-POSIX, en-VC, en-VG, en-VI, en-VU, en-WS, en-ZA, en-ZM, en-ZW, eo, eo-001, es, es-419, es-AR, es-BO, es-CL, es-CO, es-CR, es-CU, es-DO, es-EA, es-EC, es-ES, es-GQ, es-GT, es-HN, es-IC, es-MX, es-NI, es-PA, es-PE, es-PH, es-PR, es-PY, es-SV, es-US, es-UY, es-VE, et, et-EE, eu, eu-ES, ewo, ewo-CM, fa, fa-AF, fa-IR, ff, ff-CM, ff-GN, ff-MR, ff-SN, fi, fi-FI, fil, fil-PH, fo, fo-DK, fo-FO, fr, fr-BE, fr-BF, fr-BI, fr-BJ, fr-BL, fr-CA, fr-CD, fr-CF, fr-CG, fr-CH, fr-CI, fr-CM, fr-DJ, fr-DZ, fr-FR, fr-GA, fr-GF, fr-GN, fr-GP, fr-GQ, fr-HT, fr-KM, fr-LU, fr-MA, fr-MC, fr-MF, fr-MG, fr-ML, fr-MQ, fr-MR, fr-MU, fr-NC, fr-NE, fr-PF, fr-PM, fr-RE, fr-RW, fr-SC, fr-SN, fr-SY, fr-TD, fr-TG, fr-TN, fr-VU, fr-WF, fr-YT, fur, fur-IT, fy, fy-NL, ga, ga-IE, gd, gd-GB, gl, gl-ES, gsw, gsw-CH, gsw-FR, gsw-LI, gu, gu-IN, guz, guz-KE, gv, gv-IM, ha, ha-GH, ha-NE, ha-NG, haw, haw-US, he, he-IL, hi, hi-IN, hr, hr-BA, hr-HR, hsb, hsb-DE, hu, hu-HU, hy, hy-AM, id, id-ID, ig, ig-NG, ii, ii-CN, is, is-IS, it, it-CH, it-IT, it-SM, ja, ja-JP, jgo, jgo-CM, jmc, jmc-TZ, ka, ka-GE, kab, kab-DZ, kam, kam-KE, kde, kde-TZ, kea, kea-CV, khq, khq-ML, ki, ki-KE, kk, kk-KZ, kkj, kkj-CM, kl, kl-GL, kln, kln-KE, km, km-KH, kn, kn-IN, ko, ko-KP, ko-KR, kok, kok-IN, ks, ks-IN, ksb, ksb-TZ, ksf, ksf-CM, ksh, ksh-DE, kw, kw-GB, ky, ky-KG, lag, lag-TZ, lb, lb-LU, lg, lg-UG, lkt, lkt-US, ln, ln-AO, ln-CD, ln-CF, ln-CG, lo, lo-LA, lrc, lrc-IQ, lrc-IR, lt, lt-LT, lu, lu-CD, luo, luo-KE, luy, luy-KE, lv, lv-LV, mas, mas-KE, mas-TZ, mer, mer-KE, mfe, mfe-MU, mg, mg-MG, mgh, mgh-MZ, mgo, mgo-CM, mk, mk-MK, ml, ml-IN, mn, mn-MN, mr, mr-IN, ms, ms-BN, ms-MY, ms-SG, mt, mt-MT, mua, mua-CM, my, my-MM, mzn, mzn-IR, naq, naq-NA, nb, nb-NO, nb-SJ, nd, nd-ZW, ne, ne-IN, ne-NP, nl, nl-AW, nl-BE, nl-BQ, nl-CW, nl-NL, nl-SR, nl-SX, nmg, nmg-CM, nn, nn-NO, nnh, nnh-CM, nus, nus-SS, nyn, nyn-UG, om, om-ET, om-KE, or, or-IN, os, os-GE, os-RU, pa, pa-Arab, pa-Arab-PK, pa-Guru, pa-Guru-IN, pl, pl-PL, prg, prg-001, ps, ps-AF, pt, pt-AO, pt-BR, pt-CV, pt-GW, pt-MO, pt-MZ, pt-PT, pt-ST, pt-TL, qu, qu-BO, qu-EC, qu-PE, rm, rm-CH, rn, rn-BI, ro, ro-MD, ro-RO, rof, rof-TZ, root, ru, ru-BY, ru-KG, ru-KZ, ru-MD, ru-RU, ru-UA, rw, rw-RW, rwk, rwk-TZ, sah, sah-RU, saq, saq-KE, sbp, sbp-TZ, se, se-FI, se-NO, se-SE, seh, seh-MZ, ses, ses-ML, sg, sg-CF, shi, shi-Latn, shi-Latn-MA, shi-Tfng, shi-Tfng-MA, si, si-LK, sk, sk-SK, sl, sl-SI, smn, smn-FI, sn, sn-ZW, so, so-DJ, so-ET, so-KE, so-SO, sq, sq-AL, sq-MK, sq-XK, sr, sr-Cyrl, sr-Cyrl-BA, sr-Cyrl-ME, sr-Cyrl-RS, sr-Cyrl-XK, sr-Latn, sr-Latn-BA, sr-Latn-ME, sr-Latn-RS, sr-Latn-XK, sv, sv-AX, sv-FI, sv-SE, sw, sw-CD, sw-KE, sw-TZ, sw-UG, ta, ta-IN, ta-LK, ta-MY, ta-SG, te, te-IN, teo, teo-KE, teo-UG, th, th-TH, ti, ti-ER, ti-ET, tk, tk-TM, to, to-TO, tr, tr-CY, tr-TR, twq, twq-NE, tzm, tzm-MA, ug, ug-CN, uk, uk-UA, ur, ur-IN, ur-PK, uz, uz-Arab, uz-Arab-AF, uz-Cyrl, uz-Cyrl-UZ, uz-Latn, uz-Latn-UZ, vai, vai-Latn, vai-Latn-LR, vai-Vaii, vai-Vaii-LR, vi, vi-VN, vo, vo-001, vun, vun-TZ, wae, wae-CH, xog, xog-UG, yav, yav-CM, yi, yi-001, yo, yo-BJ, yo-NG, zgh, zgh-MA, zh, zh-Hans, zh-Hans-CN, zh-Hans-HK, zh-Hans-MO, zh-Hans-SG, zh-Hant, zh-Hant-HK, zh-Hant-MO, zh-Hant-TW, zu, zu-ZA
sv >> sv
@distance=script
@favor=script
@supported=en, sv
sv >> sv
@ -552,7 +552,7 @@ und, en >> en
# http://unicode.org/repos/cldr/tags/latest/common/bcp47/
# http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml
@distance=script
@favor=script
und >> it
und, en >> en
@ -561,7 +561,7 @@ und, en >> en
@supported=en-NZ, en-IT
en-US >> en-NZ
@distance=script
@favor=script
en-US >> en-NZ
** test: testEmptySupported => null
@ -587,7 +587,7 @@ fr-PSCRACK >> fr-PSCRACK
fr >> en-PSCRACK
de-CH >> en-PSCRACK
@distance=script
@favor=script
@supported=und, fr
fr-BE-fonipa >> fr
@supported=und, fr-CA
@ -649,7 +649,7 @@ en-VI >> en-GU
@supported=und, en-GU, en-GB, en-IN
en-VI >> en-GU
@distance=script
@favor=script
@supported=und, es, es-MA, es-MX, es-419
es-AR >> es-419
@supported=und, es-MA, es, es-419, es-MX
@ -695,12 +695,12 @@ fr-BE-fonipa >> fr-Cyrl-CA-fonupa | | fr-Cyrl-BE-fonipa
@threshold=50
fr-BE-fonipa >> und
@distance=script
@favor=script
@supported=50, und, fr-CA-fonupa
@threshold=
fr-BE-fonipa >> fr-CA-fonupa | | fr-BE-fonipa
@supported=und, fr-Cyrl-CA-fonupa
fr-BE-fonipa >> fr-Cyrl-CA-fonupa | fr-BE-fonipa
fr-BE-fonipa >> und
** test: testScriptFirst
@supported=ru, fr
@ -711,7 +711,7 @@ sr >> hr
@supported=da, ru, hr
sr >> da
@distance=script
@favor=script
@supported=ru, fr
zh, pl >> fr
zh-Cyrl, pl >> ru
@ -730,11 +730,11 @@ en-US >> en
fr >> fr
ja >> fr
@distance=script
@favor=script
en-GB >> en-GB
en-US >> en
fr >> en-GB
ja >> en-GB
ja >> fr
** test: testEmptyWithDefault
@default=en
@ -765,7 +765,7 @@ ja-JP >> fr
zu >> en-GB
zxx >> fr
@distance=script
@favor=script
en-GB >> en-GB
en-US >> en
fr-FR >> fr
@ -792,7 +792,7 @@ ja-Jpan-JP, en-GB >> ja
@supported=fr, zh-Hant, en
zh, en >> en
@distance=script
@favor=script
zh, en >> en
** test: TestCloseEnoughMatchOnMaximized
@ -829,7 +829,7 @@ pt-US, pt-PT >> pt-BR
@supported=pt-PT, pt, es, es-419
pt-US, pt-PT >> pt
@distance=script
@favor=script
@supported=pt-BR, es, es-419
pt-PT, es, pt >> pt-BR
@supported=pt-PT, pt, es, es-419
@ -844,7 +844,7 @@ fr-CA, en-CA >> fr
@supported=zh-Hant, zh-TW
zh-HK >> zh-Hant
@distance=script
@favor=script
@supported=en-GB, en
en-CA >> en-GB
@supported=fr, en-GB, en
@ -871,7 +871,7 @@ zh-Hans-CN >> zh-CN
zh-Hant-HK >> zh-TW
he-IT >> iw
@distance=script
@favor=script
zh-Hant >> zh-TW
zh >> zh-CN
zh-Hans-CN >> zh-CN
@ -894,7 +894,7 @@ en-AU >> en-GB
es-MX >> es-419
es-PT >> es-ES
@distance=script
@favor=script
en-AU >> en-GB
es-MX >> es-419
es-PT >> es-ES
@ -930,7 +930,7 @@ en >> it
en-GB >> en
en-GB, sv >> en
@distance=script
@favor=script
en-GB, sv >> en
** test: Serbian
@ -951,7 +951,7 @@ sr >> sr-Latn
@supported=und, sr
sr-Latn >> sr
@distance=script
@favor=script
sr-ME >> sr
@supported=und, sr-ME
sr >> sr-ME
@ -976,7 +976,7 @@ x-bork >> x-bork
x-piglatin >> fr
x-bork >> x-bork
@distance=script
@favor=script
@supported=fr, x-bork, en-Latn-US
x-piglatin >> x-bork
x-bork >> x-bork
@ -989,7 +989,7 @@ x-bork >> x-bork
en-GB-oed >> en-Latn-US
i-klingon >> tlh
@distance=script
@favor=script
en-GB-oed >> en-Latn-US
i-klingon >> tlh
@ -1007,7 +1007,7 @@ pt-BR >> pt
pt-PT-PSCRACK >> pt-PT-PSCRACK
zh-Hans-PSCRACK >> zh-Hans-PSCRACK
@distance=script
@favor=script
de >> fr
en-US >> fr
en >> fr
@ -1030,7 +1030,7 @@ en-XC >> en-XC
pt-BR >> pt
zh-Hans-XC >> zh-Hans-XC
@distance=script
@favor=script
de >> fr
en-US >> fr
en >> fr
@ -1052,20 +1052,20 @@ en >> en-DE
ar-EG >> ar-SY
pt-BR >> pt
ar-XB >> ar-XB
ar-PSBIDI >> ar-PSBIDI
ar-PSBIDI >> ar-XB # These are equivalent.
en-XA >> en-XA
en-PSACCENT >> en-PSACCENT
en-PSACCENT >> en-XA # These are equivalent.
ar-PSCRACK >> ar-PSCRACK
@distance=script
@favor=script
de >> en-DE
en >> en-DE
ar-EG >> ar-SY
pt-BR >> pt
ar-XB >> ar-XB
ar-PSBIDI >> ar-PSBIDI
ar-PSBIDI >> ar-XB # These are equivalent.
en-XA >> en-XA
en-PSACCENT >> en-PSACCENT
en-PSACCENT >> en-XA # These are equivalent.
ar-PSCRACK >> ar-PSCRACK
** test: BestMatchForTraditionalChinese
@ -1095,7 +1095,7 @@ zh-Hans, en >> zh-Hans-CN
@supported=en, fr-CA
en-US, fr-CA >> en
@distance=script
@favor=script
en-US, fr-CA >> en
** test: SiblingDefaultRegion
@ -1111,15 +1111,15 @@ de >> und
@default=und
hi >> und
@distance=script
hi >> de
@favor=script
hi >> und
** test: MatchedLanguageIgnoresDefault
@supported=de, en, fr
@default=und
fr >> fr
@distance=script
@favor=script
fr >> fr
## GenX
@ -1168,9 +1168,9 @@ es-US >> es-MX
es-UY >> es-MX
es-VE >> es-MX
@distance=script
@favor=script
es-001 >> es
und >> es
und >> und
ca >> es
gl-ES >> es
es >> es
@ -1254,9 +1254,9 @@ es-US >> es-419
es-UY >> es-419
es-VE >> es-419
@distance=script
@favor=script
es-001 >> es
und >> es
und >> und
ca >> es
gl-ES >> es
es >> es
@ -1319,9 +1319,9 @@ en-ZA >> en-GB
en-US >> en-US
en >> en-US
@distance=script
und >> en-GB
ja >> en-GB
@favor=script
und >> und
ja >> und
fr-CA >> en-GB
en-AU >> en-GB
en-BZ >> en-GB
@ -1355,10 +1355,10 @@ fr >> und
@supported=pl, ja, ca
fr >> und
@distance=script
@favor=script
@supported=en-GB, en-US, en, en-AU
und >> en-GB
ja >> en-GB
und >> und
ja >> und
fr-CA >> en-GB
fr >> en-GB
@supported=en-AU, ja, ca
@ -1384,7 +1384,7 @@ zh-Hant-HK >> zh-TW
@default=iw
he-IT >> iw
@distance=script
@favor=script
he-IT >> iw
** test: language-specific script fallbacks 1
@ -1395,7 +1395,7 @@ hr >> en
bs >> en
nl-Cyrl >> en # Mark: Expected value should be en not sr. Script difference exceeds threshold, so can't be nl
@distance=script
@favor=script
sr-Latn >> sr
hr >> en
bs >> en
@ -1408,7 +1408,7 @@ sr-Cyrl >> sr-Latn
@default=und
hr >> und
@distance=script
@favor=script
@default=
sr >> sr-Latn
sr-Cyrl >> sr-Latn
@ -1419,45 +1419,45 @@ hr >> en
@supported=en, sr-Latn
hr >> en
@distance=script
@favor=script
hr >> en
** test: both deprecated and not
@supported=fil, tl, iw, he
he-IT >> iw
he >> he
he >> iw
iw >> iw
fil-IT >> fil
fil >> fil
tl >> tl
tl >> fil
@distance=script
@favor=script
he-IT >> iw
he >> he
he >> iw
iw >> iw
fil-IT >> fil
fil >> fil
tl >> tl
tl >> fil
** test: nearby languages: Nynorsk to Bokmål
@supported=en, nb
nn >> nb
@distance=script
@favor=script
nn >> nb
** test: nearby languages: Danish does not match nn
@supported=en, nn
da >> en
@distance=script
@favor=script
da >> en
** test: nearby languages: Danish matches no
@supported=en, no
da >> no
@distance=script
@favor=script
da >> no
** test: nearby languages: Danish matches nb
@ -1469,7 +1469,7 @@ da >> nb
no, en-US >> nn
nb, en-US >> nn
@distance=script
@favor=script
no, en-US >> nn
nb, en-US >> nn
@ -1477,7 +1477,7 @@ nb, en-US >> nn
@supported=nl, he, en-GB
iw, en-US >> he
@distance=script
@favor=script
iw, en-US >> he
** test: macro equivalent is closer than same language with other differences
@ -1485,7 +1485,7 @@ iw, en-US >> he
cmn, en-US >> zh
nb, en-US >> no
@distance=script
@favor=script
cmn, en-US >> zh
nb, en-US >> no
@ -1493,18 +1493,18 @@ nb, en-US >> no
@supported=nl, fil, en-GB
tl, en-US >> fil
@distance=script
@favor=script
tl, en-US >> fil
** test: distinguish near equivalents
@supported=en, ro, mo, ro-MD
ro >> ro
mo >> mo
mo >> ro # ro=mo for the locale matcher
ro-MD >> ro-MD
@distance=script
@favor=script
ro >> ro
mo >> mo
mo >> ro # ro=mo for the locale matcher
ro-MD >> ro-MD
** test: maximization of legacy
@ -1512,7 +1512,7 @@ ro-MD >> ro-MD
sh >> sr-Latn
mo >> ro
@distance=script
@favor=script
sh >> sr-Latn
mo >> ro
@ -1544,31 +1544,50 @@ zh-TW, en >> en-US
zh-Hant-CN, en >> en-US
zh-Hans, en >> zh-Hans-CN
** test: more specific script should win in case regions are identical
** test: return first among likely-subtags equivalent locales
# Was: more specific script should win in case regions are identical
# with some different results.
@supported=af, af-Latn, af-Arab
af >> af
af-ZA >> af
af-Latn-ZA >> af
af-Latn >> af-Latn
af-Latn >> af
@distance=script
@favor=script
af >> af
af-ZA >> af
af-Latn-ZA >> af
af-Latn >> af-Latn
af-Latn >> af
** test: more specific region should win
# Was: more specific region should win
# with some different results.
@supported=nl, nl-NL, nl-BE
@favor=
nl >> nl
nl-Latn >> nl
nl-Latn-NL >> nl
nl-NL >> nl-NL
nl-NL >> nl
@distance=script
@favor=script
nl >> nl
nl-Latn >> nl
nl-Latn-NL >> nl
nl-NL >> nl-NL
nl-NL >> nl
# Was: more specific region wins over more specific script
# with some different results.
@supported=nl, nl-Latn, nl-NL, nl-BE
@favor=
nl >> nl
nl-Latn >> nl
nl-NL >> nl
nl-Latn-NL >> nl
@favor=script
nl >> nl
nl-Latn >> nl
nl-NL >> nl
nl-Latn-NL >> nl
** test: region may replace matched if matched is enclosing
@supported=es-419, es
@ -1577,37 +1596,24 @@ es-MX >> es-419
@default=
es-SG >> es
@distance=script
@favor=script
@default=es-MX
es-MX >> es-419
@default=
es-SG >> es
** test: more specific region wins over more specific script
@supported=nl, nl-Latn, nl-NL, nl-BE
nl >> nl
nl-Latn >> nl-Latn
nl-NL >> nl-NL
nl-Latn-NL >> nl
@distance=script
nl >> nl
nl-Latn >> nl-Latn
nl-NL >> nl-NL
nl-Latn-NL >> nl
** test: region distance Portuguese
@supported=pt, pt-PT
pt-ES >> pt-PT
@distance=script
@favor=script
pt-ES >> pt-PT
** test: if no preferred locale specified, pick top language, not regional
@supported=en, fr, fr-CA, fr-CH
fr-US >> fr
@distance=script
@favor=script
fr-US >> fr
** test: region distance German
@ -1622,7 +1628,7 @@ es-MX >> es-419
@default=
es-PT >> es-ES
@distance=script
@favor=script
en-AU >> en-GB
es-MX >> es-419
@default=
@ -1649,7 +1655,7 @@ und-Hans >> zh
und-Hant >> zh
und-Latn >> it
@distance=script
@favor=script
und-FR >> fr
und-CN >> zh
und-Hans >> zh
@ -1664,22 +1670,22 @@ ja-Jpan-JP, en-GB >> ja
** test: pick best maximized tag
@supported=ja, ja-Jpan-US, ja-JP, en, ru
ja-Jpan, ru >> ja
ja-JP, ru >> ja-JP
ja-JP, ru >> ja
ja-US, ru >> ja-Jpan-US
@distance=script
@favor=script
ja-Jpan, ru >> ja
ja-JP, ru >> ja-JP
ja-JP, ru >> ja
ja-US, ru >> ja-Jpan-US
** test: termination: pick best maximized match
@supported=ja, ja-Jpan, ja-JP, en, ru
ja-Jpan-JP, ru >> ja
ja-Jpan, ru >> ja-Jpan
ja-Jpan, ru >> ja
@distance=script
@favor=script
ja-Jpan-JP, ru >> ja
ja-Jpan, ru >> ja-Jpan
ja-Jpan, ru >> ja
** test: same language over exact, but distinguish when user is explicit
@supported=fr, en-GB, ja, es-ES, es-MX
@ -1690,7 +1696,7 @@ de-CH, fr >> de
en, nl >> en-GB
en, nl, en-GB >> en-GB
@distance=script
@favor=script
@supported=fr, en-GB, ja, es-ES, es-MX
ja, de >> ja
@supported=en, de, fr, ja
@ -1767,7 +1773,7 @@ pt-MZ >> pt-PT
pt-ST >> pt-PT
pt-TL >> pt-PT
@distance=script
@favor=script
en-150 >> en-GB
en-AU >> en-GB
en-BE >> en-GB
@ -1845,7 +1851,7 @@ sl-HR-NEDIS-u-cu-eur >> sl-NEDIS
@default=de-t-m0-iso-i0-pinyin
de-t-m0-iso-i0-pinyin >> de
@distance=script
@favor=script
@default=de-u-co-phonebk
de-FR-u-co-phonebk >> de
@default=sl-NEDIS-u-cu-eur
@ -1865,28 +1871,28 @@ de-t-m0-iso-i0-pinyin >> de
@supported=de
fr >> de
@distance=script
@favor=script
fr >> de
** test: testLooseMatchForGeneral_getBestMatches
@supported=es-419
es-MX >> es-419
@distance=script
@favor=script
es-MX >> es-419
** test: testLooseMatchForEnglish_getBestMatches
@supported=en, en-GB
en-CA >> en-GB
@distance=script
@favor=script
en-CA >> en-GB
** test: testLooseMatchForChinese_getBestMatches
@supported=zh
zh-TW >> zh
@distance=script
@favor=script
zh-TW >> zh
## Geo
@ -1894,7 +1900,7 @@ zh-TW >> zh
** test: testGetBestMatchWithMinMatchScore
@supported=fr-FR, fr, fr-CA, en
@default=und
fr >> fr # Exact match is chosen.
fr >> fr-FR # First likely-subtags equivalent match is chosen.
@supported=en, fr, fr-CA
fr-FR >> fr # Parent match is chosen.
@supported=en, fr-CA
@ -1922,9 +1928,9 @@ zh-CN >> zh-TW
@supported=ja
ru >> und
@distance=script
@favor=script
@supported=fr-FR, fr, fr-CA, en
fr >> fr
fr >> fr-FR
@supported=en, fr, fr-CA
fr-FR >> fr
@supported=en, fr-CA
@ -1935,19 +1941,19 @@ fr-SN >> fr-CA
@supported=en, fr-FR
fr >> fr-FR
@supported=de, en, it
fr >> de
fr >> en
@supported=iw, en
iw-Latn >> en
@supported=iw, no
ru >> iw
ru >> und
@supported=iw-Latn, iw-Cyrl, iw
ru >> iw-Cyrl
@supported=iw, iw-Latn
ru >> iw
ru >> und
en >> iw-Latn
@supported=en, uk
ru >> uk
@supported=zh-TW, en
zh-CN >> zh-TW
@supported=ja
ru >> ja
ru >> und