ICU-20467 replace the LocaleMatcher implementation, load data from new bundle

- remove the old LocaleMatcher implementation code
- move the XLocaleMatcher code into LocaleMatcher, same for test
- remove unused internal methods
- stop comparing old vs. new performance
- generate langInfo.txt resource bundle file with precomputed likely-subtags and matcher data
- make genrb handle multi-line binary values
- load likely-subtags & distance data from new langInfo.res bundle
- test that built data == loaded data
- move data builders to tools, no more runtime dependency on builder code
This commit is contained in:
Markus Scherer 2019-03-15 17:13:11 -07:00
parent 93fde1c459
commit 61c4a728cd
23 changed files with 4475 additions and 2775 deletions

View file

@ -191,7 +191,7 @@ summarizes the ICU data files and their corresponding features and categories:
| Confusables | `"confusables"` | unidata/confusables\*.txt | 45 KiB |
| Currencies | `"misc"` <br/> `"curr_supplemental"` <br/> `"curr_tree"` | misc/currencyNumericCodes.txt <br/> curr/supplementalData.txt <br/> curr/\*.txt | 3.1 KiB <br/> 27 KiB <br/> **2.5 MiB** |
| Language Display <br/> Names | `"lang_tree"` | lang/\*.txt | **2.1 MiB** |
| Language Tags | `"misc"` | misc/keyTypeData.txt <br/> misc/likelySubtags.txt <br/> misc/metadata.txt | 6.8 KiB <br/> 53 KiB <br/> 33 KiB |
| Language Tags | `"misc"` | misc/keyTypeData.txt <br/> misc/langInfo.txt <br/> misc/likelySubtags.txt <br/> misc/metadata.txt | 6.8 KiB <br/> 37 KiB <br/> 53 KiB <br/> 33 KiB |
| Normalization | `"normalization"` | in/\*.nrm except in/nfc.nrm | 160 KiB |
| Plural Rules | `"misc"` | misc/pluralRanges.txt <br/> misc/plurals.txt | 3.3 KiB <br/> 33 KiB |
| Region Display <br/> Names | `"region_tree"` | region/\*.txt | **1.1 MiB** |

File diff suppressed because it is too large Load diff

View file

@ -205,10 +205,10 @@ main(int argc,
"\t-c or --copyright include copyright notice\n");
fprintf(stderr,
"\t-e or --encoding encoding of source files\n"
"\t-d of --destdir destination directory, followed by the path, defaults to %s\n"
"\t-s or --sourcedir source directory for files followed by path, defaults to %s\n"
"\t-d or --destdir destination directory, followed by the path, defaults to '%s'\n"
"\t-s or --sourcedir source directory for files followed by path, defaults to '%s'\n"
"\t-i or --icudatadir directory for locating any needed intermediate data files,\n"
"\t followed by path, defaults to %s\n",
"\t followed by path, defaults to '%s'\n",
u_getDataDirectory(), u_getDataDirectory(), u_getDataDirectory());
fprintf(stderr,
"\t-j or --write-java write a Java ListResourceBundle for ICU4J, followed by optional encoding\n"

View file

@ -274,11 +274,11 @@ expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenV
}
}
static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment,
int32_t &stringLength, UErrorCode *status)
{
struct UString *tokenValue;
char *result;
uint32_t count;
expect(state, TOK_STRING, &tokenValue, comment, line, status);
@ -287,14 +287,13 @@ static char *getInvariantString(ParseState* state, uint32_t *line, struct UStrin
return NULL;
}
count = u_strlen(tokenValue->fChars);
if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
if(!uprv_isInvariantUString(tokenValue->fChars, tokenValue->fLength)) {
*status = U_INVALID_FORMAT_ERROR;
error(*line, "invariant characters required for table keys, binary data, etc.");
return NULL;
}
result = static_cast<char *>(uprv_malloc(count+1));
result = static_cast<char *>(uprv_malloc(tokenValue->fLength+1));
if (result == NULL)
{
@ -302,7 +301,8 @@ static char *getInvariantString(ParseState* state, uint32_t *line, struct UStrin
return NULL;
}
u_UCharsToChars(tokenValue->fChars, result, count+1);
u_UCharsToChars(tokenValue->fChars, result, tokenValue->fLength+1);
stringLength = tokenValue->fLength;
return result;
}
@ -1371,7 +1371,6 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US
int32_t value;
UBool readToken = FALSE;
char *stopstring;
uint32_t len;
struct UString memberComments;
IntVectorResource *result = intvector_open(state->bundle, tag, comment, status);
@ -1404,7 +1403,8 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US
return result;
}
string = getInvariantString(state, NULL, NULL, status);
int32_t stringLength;
string = getInvariantString(state, NULL, NULL, stringLength, status);
if (U_FAILURE(*status))
{
@ -1414,9 +1414,9 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US
/* For handling illegal char in the Intvector */
value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
len=(uint32_t)(stopstring-string);
int32_t len = (int32_t)(stopstring-string);
if(len==uprv_strlen(string))
if(len==stringLength)
{
result->add(value, *status);
uprv_free(string);
@ -1454,7 +1454,8 @@ static struct SResource *
parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
{
uint32_t line;
LocalMemory<char> string(getInvariantString(state, &line, NULL, status));
int32_t stringLength;
LocalMemory<char> string(getInvariantString(state, &line, NULL, stringLength, status));
if (string.isNull() || U_FAILURE(*status))
{
return NULL;
@ -1470,46 +1471,45 @@ parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UStri
printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
}
uint32_t count = (uint32_t)uprv_strlen(string.getAlias());
if (count > 0){
if((count % 2)==0){
LocalMemory<uint8_t> value;
if (value.allocateInsteadAndCopy(count) == NULL)
{
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
LocalMemory<uint8_t> value;
int32_t count = 0;
if (stringLength > 0 && value.allocateInsteadAndCopy(stringLength) == NULL)
{
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
char toConv[3] = {'\0', '\0', '\0'};
for (uint32_t i = 0; i < count; i += 2)
{
toConv[0] = string[i];
toConv[1] = string[i + 1];
char *stopstring;
value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
uint32_t len=(uint32_t)(stopstring-toConv);
if(len!=2)
{
*status=U_INVALID_CHAR_FOUND;
return NULL;
}
}
return bin_open(state->bundle, tag, count >> 1, value.getAlias(), NULL, comment, status);
char toConv[3] = {'\0', '\0', '\0'};
for (int32_t i = 0; i < stringLength;)
{
// Skip spaces (which may have been line endings).
char c0 = string[i++];
if (c0 == ' ') { continue; }
if (i == stringLength) {
*status=U_INVALID_CHAR_FOUND;
error(line, "Encountered invalid binary value (odd number of hex digits)");
return NULL;
}
else
toConv[0] = c0;
toConv[1] = string[i++];
char *stopstring;
value[count++] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
uint32_t len=(uint32_t)(stopstring-toConv);
if(len!=2)
{
*status = U_INVALID_CHAR_FOUND;
error(line, "Encountered invalid binary value (length is odd)");
*status=U_INVALID_CHAR_FOUND;
error(line, "Encountered invalid binary value (not all pairs of hex digits)");
return NULL;
}
}
else
{
if (count == 0) {
warning(startline, "Encountered empty binary value");
return bin_open(state->bundle, tag, 0, NULL, "", comment, status);
} else {
return bin_open(state->bundle, tag, count, value.getAlias(), NULL, comment, status);
}
}
@ -1520,9 +1520,9 @@ parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UStr
int32_t value;
char *string;
char *stopstring;
uint32_t len;
string = getInvariantString(state, NULL, NULL, status);
int32_t stringLength;
string = getInvariantString(state, NULL, NULL, stringLength, status);
if (string == NULL || U_FAILURE(*status))
{
@ -1541,7 +1541,7 @@ parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UStr
printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
}
if (uprv_strlen(string) <= 0)
if (stringLength == 0)
{
warning(startline, "Encountered empty integer. Default value is 0.");
}
@ -1549,8 +1549,8 @@ parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UStr
/* Allow integer support for hexdecimal, octal digit and decimal*/
/* and handle illegal char in the integer*/
value = uprv_strtoul(string, &stopstring, 0);
len=(uint32_t)(stopstring-string);
if(len==uprv_strlen(string))
int32_t len = (int32_t)(stopstring-string);
if(len==stringLength)
{
result = int_open(state->bundle, tag, value, comment, status);
}
@ -1567,7 +1567,8 @@ static struct SResource *
parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
{
uint32_t line;
LocalMemory<char> filename(getInvariantString(state, &line, NULL, status));
int32_t stringLength;
LocalMemory<char> filename(getInvariantString(state, &line, NULL, stringLength, status));
if (U_FAILURE(*status))
{
return NULL;
@ -1628,12 +1629,11 @@ parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UStr
UCHARBUF *ucbuf;
char *fullname = NULL;
int32_t count = 0;
const char* cp = NULL;
const UChar* uBuffer = NULL;
filename = getInvariantString(state, &line, NULL, status);
count = (int32_t)uprv_strlen(filename);
int32_t stringLength;
filename = getInvariantString(state, &line, NULL, stringLength, status);
if (U_FAILURE(*status))
{
@ -1652,7 +1652,7 @@ parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UStr
printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
}
fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
fullname = (char *) uprv_malloc(state->inputdirLength + stringLength + 2);
/* test for NULL */
if(fullname == NULL)
{

View file

@ -368,6 +368,7 @@
<pathelement location="${icu4j.regiondata.jar}"/>
<pathelement location="${icu4j.translit.jar}"/>
<pathelement location="${icu4j.test-framework.jar}"/>
<pathelement location="${icu4j.tools.jar}"/>
<pathelement location="${icu4j.core-tests.jar}"/>
<pathelement location="${icu4j.collate-tests.jar}"/>
<pathelement location="${icu4j.charset-tests.jar}"/>
@ -1201,7 +1202,7 @@
</ant>
</target>
<target name="core-tests" depends="core, test-framework" description="Build core tests">
<target name="core-tests" depends="core, test-framework, tools" description="Build core tests">
<ant dir="${icu4j.core-tests.dir}" inheritAll="false">
<reference refid="junit.jars"/>
</ant>
@ -1249,7 +1250,7 @@
<ant dir="${icu4j.build-tools.dir}" inheritAll="false"/>
</target>
<target name="tools" depends="core, core-tests, collate, translit, translit-tests" description="Build tool classes">
<target name="tools" depends="core, collate, translit" description="Build tool classes">
<ant dir="${icu4j.tools.dir}" inheritAll="false"/>
</target>

View file

@ -4,18 +4,18 @@ package com.ibm.icu.impl.locale;
import java.util.Objects;
final class LSR {
static final int REGION_INDEX_LIMIT = 1001 + 26 * 26;
public final class LSR {
public static final int REGION_INDEX_LIMIT = 1001 + 26 * 26;
static final boolean DEBUG_OUTPUT = false;
public static final boolean DEBUG_OUTPUT = false;
final String language;
final String script;
final String region;
public final String language;
public final String script;
public final String region;
/** Index for region, negative if ill-formed. @see indexForRegion */
final int regionIndex;
LSR(String language, String script, String region) {
public LSR(String language, String script, String region) {
this.language = language;
this.script = script;
this.region = region;
@ -27,7 +27,7 @@ final class LSR {
* Do not rely on a particular region->index mapping; it may change.
* Returns 0 for ill-formed strings.
*/
static final int indexForRegion(String region) {
public static final int indexForRegion(String region) {
if (region.length() == 2) {
int a = region.charAt(0) - 'A';
if (a < 0 || 25 < a) { return 0; }

View file

@ -2,12 +2,20 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.locale;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
import java.util.MissingResourceException;
import java.util.Set;
import java.util.TreeMap;
import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.UResource;
import com.ibm.icu.util.BytesTrie;
import com.ibm.icu.util.LocaleMatcher.FavorSubtag;
import com.ibm.icu.util.ULocale;
/**
@ -16,17 +24,17 @@ import com.ibm.icu.util.ULocale;
*/
public class LocaleDistance {
/** Distance value bit flag, set by the builder. */
static final int DISTANCE_SKIP_SCRIPT = 0x80;
public static final int DISTANCE_SKIP_SCRIPT = 0x80;
/** Distance value bit flag, set by trieNext(). */
private static final int DISTANCE_IS_FINAL = 0x100;
private static final int DISTANCE_IS_FINAL_OR_SKIP_SCRIPT =
DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT;
// Indexes into array of distances.
static final int IX_DEF_LANG_DISTANCE = 0;
static final int IX_DEF_SCRIPT_DISTANCE = 1;
static final int IX_DEF_REGION_DISTANCE = 2;
static final int IX_MIN_REGION_DISTANCE = 3;
static final int IX_LIMIT = 4;
public static final int IX_DEF_LANG_DISTANCE = 0;
public static final int IX_DEF_SCRIPT_DISTANCE = 1;
public static final int IX_DEF_REGION_DISTANCE = 2;
public static final int IX_MIN_REGION_DISTANCE = 3;
public static final int IX_LIMIT = 4;
private static final int ABOVE_THRESHOLD = 100;
private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
@ -54,22 +62,100 @@ public class LocaleDistance {
private final int minRegionDistance;
private final int defaultDemotionPerDesiredLocale;
// TODO: Load prebuilt data from a resource bundle
// to avoid the dependency on the builder code.
// VisibleForTesting
public static final LocaleDistance INSTANCE = LocaleDistanceBuilder.build();
public static final class Data {
public byte[] trie;
public byte[] regionToPartitionsIndex;
public String[] partitionArrays;
public Set<LSR> paradigmLSRs;
public int[] distances;
LocaleDistance(BytesTrie trie,
byte[] regionToPartitionsIndex, String[] partitionArrays,
Set<LSR> paradigmLSRs, int[] distances) {
this.trie = trie;
this.regionToPartitionsIndex = regionToPartitionsIndex;
this.partitionArrays = partitionArrays;
this.paradigmLSRs = paradigmLSRs;
defaultLanguageDistance = distances[IX_DEF_LANG_DISTANCE];
defaultScriptDistance = distances[IX_DEF_SCRIPT_DISTANCE];
defaultRegionDistance = distances[IX_DEF_REGION_DISTANCE];
this.minRegionDistance = distances[IX_MIN_REGION_DISTANCE];
public Data(byte[] trie,
byte[] regionToPartitionsIndex, String[] partitionArrays,
Set<LSR> paradigmLSRs, int[] distances) {
this.trie = trie;
this.regionToPartitionsIndex = regionToPartitionsIndex;
this.partitionArrays = partitionArrays;
this.paradigmLSRs = paradigmLSRs;
this.distances = distances;
}
private static UResource.Value getValue(UResource.Table table,
String key, UResource.Value value) {
if (!table.findValue(key, value)) {
throw new MissingResourceException(
"langInfo.res missing data", "", "match/" + key);
}
return value;
}
// VisibleForTesting
public static Data load() throws MissingResourceException {
ICUResourceBundle langInfo = ICUResourceBundle.getBundleInstance(
ICUData.ICU_BASE_NAME, "langInfo",
ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
UResource.Value value = langInfo.getValueWithFallback("match");
UResource.Table matchTable = value.getTable();
ByteBuffer buffer = getValue(matchTable, "trie", value).getBinary();
byte[] trie = new byte[buffer.remaining()];
buffer.get(trie);
buffer = getValue(matchTable, "regionToPartitions", value).getBinary();
byte[] regionToPartitions = new byte[buffer.remaining()];
buffer.get(regionToPartitions);
if (regionToPartitions.length < LSR.REGION_INDEX_LIMIT) {
throw new MissingResourceException(
"langInfo.res binary data too short", "", "match/regionToPartitions");
}
String[] partitions = getValue(matchTable, "partitions", value).getStringArray();
Set<LSR> paradigmLSRs;
if (matchTable.findValue("paradigms", value)) {
String[] paradigms = value.getStringArray();
paradigmLSRs = new HashSet<>(paradigms.length / 3);
for (int i = 0; i < paradigms.length; i += 3) {
paradigmLSRs.add(new LSR(paradigms[i], paradigms[i + 1], paradigms[i + 2]));
}
} else {
paradigmLSRs = Collections.emptySet();
}
int[] distances = getValue(matchTable, "distances", value).getIntVector();
if (distances.length < IX_LIMIT) {
throw new MissingResourceException(
"langInfo.res intvector too short", "", "match/distances");
}
return new Data(trie, regionToPartitions, partitions, paradigmLSRs, distances);
}
@Override
public boolean equals(Object other) {
if (this == other) { return true; }
if (!getClass().equals(other.getClass())) { return false; }
Data od = (Data)other;
return Arrays.equals(trie, od.trie) &&
Arrays.equals(regionToPartitionsIndex, od.regionToPartitionsIndex) &&
Arrays.equals(partitionArrays, od.partitionArrays) &&
paradigmLSRs.equals(od.paradigmLSRs) &&
Arrays.equals(distances, od.distances);
}
}
// VisibleForTesting
public static final LocaleDistance INSTANCE = new LocaleDistance(Data.load());
private LocaleDistance(Data data) {
this.trie = new BytesTrie(data.trie, 0);
this.regionToPartitionsIndex = data.regionToPartitionsIndex;
this.partitionArrays = data.partitionArrays;
this.paradigmLSRs = data.paradigmLSRs;
defaultLanguageDistance = data.distances[IX_DEF_LANG_DISTANCE];
defaultScriptDistance = data.distances[IX_DEF_SCRIPT_DISTANCE];
defaultRegionDistance = data.distances[IX_DEF_REGION_DISTANCE];
this.minRegionDistance = data.distances[IX_MIN_REGION_DISTANCE];
LSR en = new LSR("en", "Latn", "US");
LSR enGB = new LSR("en", "Latn", "GB");
@ -102,7 +188,7 @@ public class LocaleDistance {
* (negative if none has a distance below the threshold),
* and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
*/
int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs,
public int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs,
int threshold, FavorSubtag favorSubtag) {
BytesTrie iter = new BytesTrie(trie);
// Look up the desired language only once for all supported LSRs.
@ -335,7 +421,7 @@ public class LocaleDistance {
return partitionArrays[pIndex];
}
boolean isParadigmLSR(LSR lsr) {
public boolean isParadigmLSR(LSR lsr) {
return paradigmLSRs.contains(lsr);
}
@ -348,7 +434,7 @@ public class LocaleDistance {
return defaultRegionDistance;
}
int getDefaultDemotionPerDesiredLocale() {
public int getDefaultDemotionPerDesiredLocale() {
return defaultDemotionPerDesiredLocale;
}

View file

@ -2,10 +2,18 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.locale;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.MissingResourceException;
import java.util.TreeMap;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.UResource;
import com.ibm.icu.util.BytesTrie;
import com.ibm.icu.util.ULocale;
@ -14,30 +22,93 @@ public final class XLikelySubtags {
private static final String PSEUDO_BIDI_PREFIX = "+"; // -XB, -PSBIDI
private static final String PSEUDO_CRACKED_PREFIX = ","; // -XC, -PSCRACK
static final int SKIP_SCRIPT = 1;
public static final int SKIP_SCRIPT = 1;
private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
// TODO: Load prebuilt data from a resource bundle
// to avoid the dependency on the builder code.
// VisibleForTesting
public static final XLikelySubtags INSTANCE = new XLikelySubtags(LikelySubtagsBuilder.build());
public static final class Data {
public final Map<String, String> languageAliases;
public final Map<String, String> regionAliases;
public final byte[] trie;
public final LSR[] lsrs;
static final class Data {
private final Map<String, String> languageAliases;
private final Map<String, String> regionAliases;
private final BytesTrie trie;
private final LSR[] lsrs;
Data(Map<String, String> languageAliases, Map<String, String> regionAliases,
BytesTrie trie, LSR[] lsrs) {
public Data(Map<String, String> languageAliases, Map<String, String> regionAliases,
byte[] trie, LSR[] lsrs) {
this.languageAliases = languageAliases;
this.regionAliases = regionAliases;
this.trie = trie;
this.lsrs = lsrs;
}
private static UResource.Value getValue(UResource.Table table,
String key, UResource.Value value) {
if (!table.findValue(key, value)) {
throw new MissingResourceException(
"langInfo.res missing data", "", "likely/" + key);
}
return value;
}
// VisibleForTesting
public static Data load() throws MissingResourceException {
ICUResourceBundle langInfo = ICUResourceBundle.getBundleInstance(
ICUData.ICU_BASE_NAME, "langInfo",
ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
UResource.Value value = langInfo.getValueWithFallback("likely");
UResource.Table likelyTable = value.getTable();
Map<String, String> languageAliases;
if (likelyTable.findValue("languageAliases", value)) {
String[] pairs = value.getStringArray();
languageAliases = new HashMap<>(pairs.length / 2);
for (int i = 0; i < pairs.length; i += 2) {
languageAliases.put(pairs[i], pairs[i + 1]);
}
} else {
languageAliases = Collections.emptyMap();
}
Map<String, String> regionAliases;
if (likelyTable.findValue("regionAliases", value)) {
String[] pairs = value.getStringArray();
regionAliases = new HashMap<>(pairs.length / 2);
for (int i = 0; i < pairs.length; i += 2) {
regionAliases.put(pairs[i], pairs[i + 1]);
}
} else {
regionAliases = Collections.emptyMap();
}
ByteBuffer buffer = getValue(likelyTable, "trie", value).getBinary();
byte[] trie = new byte[buffer.remaining()];
buffer.get(trie);
String[] lsrSubtags = getValue(likelyTable, "lsrs", value).getStringArray();
LSR[] lsrs = new LSR[lsrSubtags.length / 3];
for (int i = 0, j = 0; i < lsrSubtags.length; i += 3, ++j) {
lsrs[j] = new LSR(lsrSubtags[i], lsrSubtags[i + 1], lsrSubtags[i + 2]);
}
return new Data(languageAliases, regionAliases, trie, lsrs);
}
@Override
public boolean equals(Object other) {
if (this == other) { return true; }
if (!getClass().equals(other.getClass())) { return false; }
Data od = (Data)other;
return
languageAliases.equals(od.languageAliases) &&
regionAliases.equals(od.regionAliases) &&
Arrays.equals(trie, od.trie) &&
Arrays.equals(lsrs, od.lsrs);
}
}
// VisibleForTesting
public static final XLikelySubtags INSTANCE = new XLikelySubtags(Data.load());
private final Map<String, String> languageAliases;
private final Map<String, String> regionAliases;
@ -54,7 +125,7 @@ public final class XLikelySubtags {
private XLikelySubtags(XLikelySubtags.Data data) {
languageAliases = data.languageAliases;
regionAliases = data.regionAliases;
trie = data.trie;
trie = new BytesTrie(data.trie, 0);
lsrs = data.lsrs;
// Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
@ -85,6 +156,23 @@ public final class XLikelySubtags {
}
}
/**
* Implementation of LocaleMatcher.canonicalize(ULocale).
*/
public ULocale canonicalize(ULocale locale) {
String lang = locale.getLanguage();
String lang2 = languageAliases.get(lang);
String region = locale.getCountry();
String region2 = regionAliases.get(region);
if (lang2 != null || region2 != null) {
return new ULocale(
lang2 == null ? lang : lang2,
locale.getScript(),
region2 == null ? region : region2);
}
return locale;
}
private static String getCanonical(Map<String, String> aliases, String alias) {
String canonical = aliases.get(alias);
return canonical == null ? alias : canonical;
@ -101,7 +189,7 @@ public final class XLikelySubtags {
locale.getVariant());
}
LSR makeMaximizedLsrFrom(Locale locale) {
public LSR makeMaximizedLsrFrom(Locale locale) {
String tag = locale.toLanguageTag();
if (tag.startsWith("x-")) {
// Private use language tag x-subtag-subtag...

View file

@ -1,900 +0,0 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.locale;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import com.ibm.icu.util.LocalePriorityList;
import com.ibm.icu.util.ULocale;
/**
* Immutable class that picks the best match between a user's desired locales and
* and application's supported locales.
*
* <p>If there are multiple supported locales with the same (language, script, region)
* likely subtags, then the current implementation returns the first of those locales.
* It ignores variant subtags (except for pseudolocale variants) and extensions.
* This may change in future versions.
*
* <p>For example, the current implementation does not distinguish between
* de, de-DE, de-Latn, de-1901, de-u-co-phonebk.
*
* <p>If you prefer one equivalent locale over another, then provide only the preferred one,
* or place it earlier in the list of supported locales.
*
* <p>Otherwise, the order of supported locales may have no effect on the best-match results.
* The current implementation compares each desired locale with supported locales
* in the following order:
* 1. Default locale, if supported;
* 2. CLDR "paradigm locales" like en-GB and es-419;
* 3. other supported locales.
* This may change in future versions.
*
* <p>TODO: Migration notes.
*
* @author markdavis
*/
public final class XLocaleMatcher {
private static final LSR UND_LSR = new LSR("und","","");
private static final ULocale UND_ULOCALE = new ULocale("und");
private static final Locale UND_LOCALE = new Locale("und");
// Activates debugging output to stderr with details of GetBestMatch.
private static final boolean TRACE_MATCHER = false;
private static abstract class LsrIterator implements Iterator<LSR> {
int bestDesiredIndex = -1;
@Override
public void remove() {
throw new UnsupportedOperationException();
}
public abstract void rememberCurrent(int desiredIndex);
}
/**
* Builder option for whether the language subtag or the script subtag is most important.
*
* @see Builder#setFavorSubtag(FavorSubtag)
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public enum FavorSubtag {
/**
* Language differences are most important, then script differences, then region differences.
* (This is the default behavior.)
*
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
LANGUAGE,
/**
* Makes script differences matter relatively more than language differences.
*
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
SCRIPT
}
/**
* Builder option for whether all desired locales are treated equally or
* earlier ones are preferred.
*
* @see Builder#setDemotionPerDesiredLocale(Demotion)
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public enum Demotion {
/**
* All desired locales are treated equally.
*
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
NONE,
/**
* Earlier desired locales are preferred.
*
* <p>From each desired locale to the next,
* the distance to any supported locale is increased by an additional amount
* which is at least as large as most region mismatches.
* A later desired locale has to have a better match with some supported locale
* due to more than merely having the same region subtag.
*
* <p>For example: <code>Supported={en, sv} desired=[en-GB, sv]</code>
* yields <code>Result(en-GB, en)</code> because
* with the demotion of sv its perfect match is no better than
* the region distance between the earlier desired locale en-GB and en=en-US.
*
* <p>Notes:
* <ul>
* <li>In some cases, language and/or script differences can be as small as
* the typical region difference. (Example: sr-Latn vs. sr-Cyrl)
* <li>It is possible for certain region differences to be larger than usual,
* and larger than the demotion.
* (As of CLDR 35 there is no such case, but
* this is possible in future versions of the data.)
* </ul>
*
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
REGION
}
/**
* Data for the best-matching pair of a desired and a supported locale.
*
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public static final class Result {
private final ULocale desiredULocale;
private final ULocale supportedULocale;
private final Locale desiredLocale;
private final Locale supportedLocale;
private final int desiredIndex;
private final int supportedIndex;
private Result(ULocale udesired, ULocale usupported,
Locale desired, Locale supported,
int desIndex, int suppIndex) {
desiredULocale = udesired;
supportedULocale = usupported;
desiredLocale = desired;
supportedLocale = supported;
desiredIndex = desIndex;
supportedIndex = suppIndex;
}
/**
* Returns the best-matching desired locale.
* null if the list of desired locales is empty or if none matched well enough.
*
* @return the best-matching desired locale, or null.
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public ULocale getDesiredULocale() {
return desiredULocale == null && desiredLocale != null ?
ULocale.forLocale(desiredLocale) : desiredULocale;
}
/**
* Returns the best-matching desired locale.
* null if the list of desired locales is empty or if none matched well enough.
*
* @return the best-matching desired locale, or null.
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public Locale getDesiredLocale() {
return desiredLocale == null && desiredULocale != null ?
desiredULocale.toLocale() : desiredLocale;
}
/**
* Returns the best-matching supported locale.
* If none matched well enough, this is the default locale.
* The default locale is null if the list of supported locales is empty and
* no explicit default locale is set.
*
* @return the best-matching supported locale, or null.
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public ULocale getSupportedULocale() { return supportedULocale; }
/**
* Returns the best-matching supported locale.
* If none matched well enough, this is the default locale.
* The default locale is null if the list of supported locales is empty and
* no explicit default locale is set.
*
* @return the best-matching supported locale, or null.
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public Locale getSupportedLocale() { return supportedLocale; }
/**
* Returns the index of the best-matching desired locale in the input Iterable order.
* -1 if the list of desired locales is empty or if none matched well enough.
*
* @return the index of the best-matching desired locale, or -1.
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public int getDesiredIndex() { return desiredIndex; }
/**
* Returns the index of the best-matching supported locale in the constructors or builders input order
* (set Collection plus added locales).
* If the matcher was built from a locale list string, then the iteration order is that
* of a LocalePriorityList built from the same string.
* -1 if the list of supported locales is empty or if none matched well enough.
*
* @return the index of the best-matching supported locale, or -1.
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public int getSupportedIndex() { return supportedIndex; }
/**
* Takes the best-matching supported locale and adds relevant fields of the
* best-matching desired locale, such as the -t- and -u- extensions.
* May replace some fields of the supported locale.
* The result is the locale that should be used for date and number formatting, collation, etc.
*
* <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn
*
* @return the service locale, combining the best-matching desired and supported locales.
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public ULocale makeServiceULocale() {
ULocale bestDesired = getDesiredULocale();
ULocale serviceLocale = supportedULocale;
if (!serviceLocale.equals(bestDesired) && bestDesired != null) {
ULocale.Builder b = new ULocale.Builder().setLocale(serviceLocale);
// Copy the region from bestDesired, if there is one.
// TODO: Seems wrong to clobber serviceLocale.getCountry() if that is not empty.
String region = bestDesired.getCountry();
if (!region.isEmpty()) {
b.setRegion(region);
}
// Copy the variants from bestDesired, if there are any.
// Note that this will override any serviceLocale variants.
// For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
// TODO: Why replace? Why not append?
String variants = bestDesired.getVariant();
if (!variants.isEmpty()) {
b.setVariant(variants);
}
// Copy the extensions from bestDesired, if there are any.
// Note that this will override any serviceLocale extensions.
// For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
// (replacing calendar).
// TODO: Maybe enumerate -u- keys to not replace others in the serviceLocale??
// (Unsure about this one.)
for (char extensionKey : bestDesired.getExtensionKeys()) {
b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
}
serviceLocale = b.build();
}
return serviceLocale;
}
/**
* Takes the best-matching supported locale and adds relevant fields of the
* best-matching desired locale, such as the -t- and -u- extensions.
* May replace some fields of the supported locale.
* The result is the locale that should be used for date and number formatting, collation, etc.
*
* <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn
*
* @return the service locale, combining the best-matching desired and supported locales.
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public Locale makeServiceLocale() {
return makeServiceULocale().toLocale();
}
}
private final int thresholdDistance;
private final int demotionPerDesiredLocale;
private final FavorSubtag favorSubtag;
// These are in input order.
private final ULocale[] supportedULocales;
private final Locale[] supportedLocales;
// These are in preference order: 1. Default locale 2. paradigm locales 3. others.
private final Map<LSR, Integer> supportedLsrToIndex;
// Array versions of the supportedLsrToIndex keys and values.
// The distance lookup loops over the supportedLsrs and returns the index of the best match.
private final LSR[] supportedLsrs;
private final int[] supportedIndexes;
private final ULocale defaultULocale;
private final Locale defaultLocale;
private final int defaultLocaleIndex;
/**
* LocaleMatcher Builder.
*
* @see XLocaleMatcher#builder()
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public static class Builder {
private List<ULocale> supportedLocales;
private int thresholdDistance = -1;
private Demotion demotion;
private ULocale defaultLocale;
private FavorSubtag favor;
/**
* Parses the string like {@link LocalePriorityList} does and
* sets the supported locales accordingly.
* Clears any previously set/added supported locales first.
*
* @param locales the languagePriorityList to set
* @return this Builder object
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public Builder setSupportedLocales(String locales) {
return setSupportedULocales(LocalePriorityList.add(locales).build().getULocales());
}
/**
* Copies the supported locales, preserving iteration order.
* Clears any previously set/added supported locales first.
* Duplicates are allowed, and are not removed.
*
* @param locales the list of locale
* @return this Builder object
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public Builder setSupportedULocales(Collection<ULocale> locales) {
supportedLocales = new ArrayList<>(locales);
return this;
}
/**
* Copies the supported locales, preserving iteration order.
* Clears any previously set/added supported locales first.
* Duplicates are allowed, and are not removed.
*
* @param locales the list of locale
* @return this Builder object
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public Builder setSupportedLocales(Collection<Locale> locales) {
supportedLocales = new ArrayList<>(locales.size());
for (Locale locale : locales) {
supportedLocales.add(ULocale.forLocale(locale));
}
return this;
}
/**
* Adds another supported locale.
* Duplicates are allowed, and are not removed.
*
* @param locale the list of locale
* @return this Builder object
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public Builder addSupportedULocale(ULocale locale) {
if (supportedLocales == null) {
supportedLocales = new ArrayList<>();
}
supportedLocales.add(locale);
return this;
}
/**
* Adds another supported locale.
* Duplicates are allowed, and are not removed.
*
* @param locale the list of locale
* @return this Builder object
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public Builder addSupportedLocale(Locale locale) {
return addSupportedULocale(ULocale.forLocale(locale));
}
/**
* Sets the default locale; if null, or if it is not set explicitly,
* then the first supported locale is used as the default locale.
*
* @param defaultLocale the default locale
* @return this Builder object
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public Builder setDefaultULocale(ULocale defaultLocale) {
this.defaultLocale = defaultLocale;
return this;
}
/**
* Sets the default locale; if null, or if it is not set explicitly,
* then the first supported locale is used as the default locale.
*
* @param defaultLocale the default locale
* @return this Builder object
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public Builder setDefaultLocale(Locale defaultLocale) {
this.defaultLocale = ULocale.forLocale(defaultLocale);
return this;
}
/**
* If SCRIPT, then the language differences are smaller than script differences.
* This is used in situations (such as maps) where
* it is better to fall back to the same script than a similar language.
*
* @param subtag the subtag to favor
* @return this Builder object
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public Builder setFavorSubtag(FavorSubtag subtag) {
this.favor = subtag;
return this;
}
/**
* Option for whether all desired locales are treated equally or
* earlier ones are preferred (this is the default).
*
* @param demotion the demotion per desired locale to set.
* @return this Builder object
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public Builder setDemotionPerDesiredLocale(Demotion demotion) {
this.demotion = demotion;
return this;
}
/**
* <i>Internal only!</i>
*
* @param thresholdDistance the thresholdDistance to set, with -1 = default
* @return this Builder object
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public Builder internalSetThresholdDistance(int thresholdDistance) {
if (thresholdDistance > 100) {
thresholdDistance = 100;
}
this.thresholdDistance = thresholdDistance;
return this;
}
/**
* Builds and returns a new locale matcher.
* This builder can continue to be used.
*
* @return new XLocaleMatcher.
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public XLocaleMatcher build() {
return new XLocaleMatcher(this);
}
@Override
public String toString() {
StringBuilder s = new StringBuilder().append("{XLocaleMatcher.Builder");
if (!supportedLocales.isEmpty()) {
s.append(" supported={").append(supportedLocales.toString()).append('}');
}
if (defaultLocale != null) {
s.append(" default=").append(defaultLocale.toString());
}
if (favor != null) {
s.append(" distance=").append(favor.toString());
}
if (thresholdDistance >= 0) {
s.append(String.format(" threshold=%d", thresholdDistance));
}
if (demotion != null) {
s.append(" demotion=").append(demotion.toString());
}
return s.append('}').toString();
}
}
/**
* Returns a builder used in chaining parameters for building a LocaleMatcher.
*
* @return a new Builder object
* @draft ICU 65
* @provisional This API might change or be removed in a future release.
*/
public static Builder builder() {
return new Builder();
}
/** Convenience method */
public XLocaleMatcher(String supportedLocales) {
this(builder().setSupportedLocales(supportedLocales));
}
/** Convenience method */
public XLocaleMatcher(LocalePriorityList supportedLocales) {
this(builder().setSupportedULocales(supportedLocales.getULocales()));
}
private XLocaleMatcher(Builder builder) {
thresholdDistance = builder.thresholdDistance < 0 ?
LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
// Store the supported locales in input order,
// so that when different types are used (e.g., java.util.Locale)
// we can return those by parallel index.
int supportedLocalesLength = builder.supportedLocales.size();
supportedULocales = new ULocale[supportedLocalesLength];
supportedLocales = new Locale[supportedLocalesLength];
// Supported LRSs in input order.
LSR lsrs[] = new LSR[supportedLocalesLength];
// Also find the first supported locale whose LSR is
// the same as that for the default locale.
ULocale udef = builder.defaultLocale;
Locale def = null;
LSR defLSR = null;
int idef = -1;
if (udef != null) {
def = udef.toLocale();
defLSR = getMaximalLsrOrUnd(udef);
}
int i = 0;
for (ULocale locale : builder.supportedLocales) {
supportedULocales[i] = locale;
supportedLocales[i] = locale.toLocale();
LSR lsr = lsrs[i] = getMaximalLsrOrUnd(locale);
if (idef < 0 && defLSR != null && lsr.equals(defLSR)) {
idef = i;
}
++i;
}
// We need an unordered map from LSR to first supported locale with that LSR,
// and an ordered list of (LSR, Indexes).
// We use a LinkedHashMap for both,
// and insert the supported locales in the following order:
// 1. Default locale, if it is supported.
// 2. Priority locales in builder order.
// 3. Remaining locales in builder order.
supportedLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
Map<LSR, Integer> otherLsrToIndex = null;
if (idef >= 0) {
supportedLsrToIndex.put(defLSR, idef);
}
i = 0;
for (ULocale locale : supportedULocales) {
if (i == idef) { continue; }
LSR lsr = lsrs[i];
if (defLSR == null) {
assert i == 0;
udef = locale;
def = supportedLocales[0];
defLSR = lsr;
idef = 0;
supportedLsrToIndex.put(lsr, 0);
} else if (lsr.equals(defLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
putIfAbsent(supportedLsrToIndex, lsr, i);
} else {
if (otherLsrToIndex == null) {
otherLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
}
putIfAbsent(otherLsrToIndex, lsr, i);
}
++i;
}
if (otherLsrToIndex != null) {
supportedLsrToIndex.putAll(otherLsrToIndex);
}
int numSuppLsrs = supportedLsrToIndex.size();
supportedLsrs = new LSR[numSuppLsrs];
supportedIndexes = new int[numSuppLsrs];
i = 0;
for (Map.Entry<LSR, Integer> entry : supportedLsrToIndex.entrySet()) {
supportedLsrs[i] = entry.getKey(); // = lsrs[entry.getValue()]
supportedIndexes[i++] = entry.getValue();
}
defaultULocale = udef;
defaultLocale = def;
defaultLocaleIndex = idef;
demotionPerDesiredLocale =
builder.demotion == Demotion.NONE ? 0 :
LocaleDistance.INSTANCE.getDefaultDemotionPerDesiredLocale(); // null or REGION
favorSubtag = builder.favor;
}
private static final void putIfAbsent(Map<LSR, Integer> lsrToIndex, LSR lsr, int i) {
Integer index = lsrToIndex.get(lsr);
if (index == null) {
lsrToIndex.put(lsr, i);
}
}
private static final LSR getMaximalLsrOrUnd(ULocale locale) {
if (locale.equals(UND_ULOCALE)) {
return UND_LSR;
} else {
return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
}
}
private static final LSR getMaximalLsrOrUnd(Locale locale) {
if (locale.equals(UND_LOCALE)) {
return UND_LSR;
} else {
return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
}
}
private static final class ULocaleLsrIterator extends LsrIterator {
private Iterator<ULocale> locales;
private ULocale current, remembered;
ULocaleLsrIterator(Iterator<ULocale> locales) {
this.locales = locales;
}
@Override
public boolean hasNext() {
return locales.hasNext();
}
@Override
public LSR next() {
current = locales.next();
return getMaximalLsrOrUnd(current);
}
@Override
public void rememberCurrent(int desiredIndex) {
bestDesiredIndex = desiredIndex;
remembered = current;
}
}
private static final class LocaleLsrIterator extends LsrIterator {
private Iterator<Locale> locales;
private Locale current, remembered;
LocaleLsrIterator(Iterator<Locale> locales) {
this.locales = locales;
}
@Override
public boolean hasNext() {
return locales.hasNext();
}
@Override
public LSR next() {
current = locales.next();
return getMaximalLsrOrUnd(current);
}
@Override
public void rememberCurrent(int desiredIndex) {
bestDesiredIndex = desiredIndex;
remembered = current;
}
}
public ULocale getBestMatch(ULocale desiredLocale) {
LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
int suppIndex = getBestSuppIndex(desiredLSR, null);
return suppIndex >= 0 ? supportedULocales[suppIndex] : defaultULocale;
}
public ULocale getBestMatch(Iterable<ULocale> desiredLocales) {
Iterator<ULocale> desiredIter = desiredLocales.iterator();
if (!desiredIter.hasNext()) {
return defaultULocale;
}
ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter);
LSR desiredLSR = lsrIter.next();
int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
return suppIndex >= 0 ? supportedULocales[suppIndex] : defaultULocale;
}
public ULocale getBestMatch(String desiredLocaleList) {
return getBestMatch(LocalePriorityList.add(desiredLocaleList).build());
}
public Locale getBestLocale(Locale desiredLocale) {
LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
int suppIndex = getBestSuppIndex(desiredLSR, null);
return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
}
public Locale getBestLocale(Iterable<Locale> desiredLocales) {
Iterator<Locale> desiredIter = desiredLocales.iterator();
if (!desiredIter.hasNext()) {
return defaultLocale;
}
LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter);
LSR desiredLSR = lsrIter.next();
int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
}
private Result makeResult(ULocale desiredLocale, ULocaleLsrIterator lsrIter, int suppIndex) {
if (suppIndex < 0) {
return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
} else if (desiredLocale != null) {
return new Result(desiredLocale, supportedULocales[suppIndex],
null, supportedLocales[suppIndex], 0, suppIndex);
} else {
return new Result(lsrIter.remembered, supportedULocales[suppIndex],
null, supportedLocales[suppIndex], lsrIter.bestDesiredIndex, suppIndex);
}
}
private Result makeResult(Locale desiredLocale, LocaleLsrIterator lsrIter, int suppIndex) {
if (suppIndex < 0) {
return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
} else if (desiredLocale != null) {
return new Result(null, supportedULocales[suppIndex],
desiredLocale, supportedLocales[suppIndex], 0, suppIndex);
} else {
return new Result(null, supportedULocales[suppIndex],
lsrIter.remembered, supportedLocales[suppIndex],
lsrIter.bestDesiredIndex, suppIndex);
}
}
public Result getBestMatchResult(ULocale desiredLocale) {
LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
int suppIndex = getBestSuppIndex(desiredLSR, null);
return makeResult(desiredLocale, null, suppIndex);
}
/**
* Returns the best match between the desired and supported locales.
*
* @param desiredLocales Typically a user's languages, in order of preference (descending).
* @return the best-matching pair of a desired and a supported locale.
*/
public Result getBestMatchResult(Iterable<ULocale> desiredLocales) {
Iterator<ULocale> desiredIter = desiredLocales.iterator();
if (!desiredIter.hasNext()) {
return makeResult(UND_ULOCALE, null, -1);
}
ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter);
LSR desiredLSR = lsrIter.next();
int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
return makeResult(null, lsrIter, suppIndex);
}
public Result getBestLocaleResult(Locale desiredLocale) {
LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
int suppIndex = getBestSuppIndex(desiredLSR, null);
return makeResult(desiredLocale, null, suppIndex);
}
public Result getBestLocaleResult(Iterable<Locale> desiredLocales) {
Iterator<Locale> desiredIter = desiredLocales.iterator();
if (!desiredIter.hasNext()) {
return makeResult(UND_LOCALE, null, -1);
}
LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter);
LSR desiredLSR = lsrIter.next();
int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
return makeResult(null, lsrIter, suppIndex);
}
/**
* @param desiredLSR The first desired locale's LSR.
* @param remainingIter Remaining desired LSRs, null or empty if none.
* @return the index of the best-matching supported locale, or -1 if there is no good match.
*/
private int getBestSuppIndex(LSR desiredLSR, LsrIterator remainingIter) {
int desiredIndex = 0;
int bestSupportedLsrIndex = -1;
for (int bestDistance = thresholdDistance;;) {
// Quick check for exact maximized LSR.
Integer index = supportedLsrToIndex.get(desiredLSR);
if (index != null) {
int suppIndex = index;
if (TRACE_MATCHER) {
System.err.printf("Returning %s: desiredLSR=supportedLSR\n",
supportedULocales[suppIndex]);
}
if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
return suppIndex;
}
int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
desiredLSR, supportedLsrs, bestDistance, favorSubtag);
if (bestIndexAndDistance >= 0) {
bestDistance = bestIndexAndDistance & 0xff;
if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
bestSupportedLsrIndex = bestIndexAndDistance >> 8;
}
if ((bestDistance -= demotionPerDesiredLocale) <= 0) {
break;
}
if (remainingIter == null || !remainingIter.hasNext()) {
break;
}
desiredLSR = remainingIter.next();
}
if (bestSupportedLsrIndex < 0) {
if (TRACE_MATCHER) {
System.err.printf("Returning default %s: no good match\n", defaultULocale);
}
return -1;
}
int suppIndex = supportedIndexes[bestSupportedLsrIndex];
if (TRACE_MATCHER) {
System.err.printf("Returning %s: best matching supported locale\n",
supportedULocales[suppIndex]);
}
return suppIndex;
}
@Override
public String toString() {
StringBuilder s = new StringBuilder().append("{XLocaleMatcher");
if (supportedULocales.length > 0) {
s.append(" supported={").append(supportedULocales[0].toString());
for (int i = 1; i < supportedULocales.length; ++i) {
s.append(", ").append(supportedULocales[i].toString());
}
s.append('}');
}
s.append(" default=").append(Objects.toString(defaultULocale));
if (favorSubtag != null) {
s.append(" distance=").append(favorSubtag.toString());
}
if (thresholdDistance >= 0) {
s.append(String.format(" threshold=%d", thresholdDistance));
}
s.append(String.format(" demotion=%d", demotionPerDesiredLocale));
return s.append('}').toString();
}
/**
* Returns a fraction between 0 and 1, where 1 means that the languages are a
* perfect match, and 0 means that they are completely different. This is (100-distance(desired, supported))/100.0.
* <br>Note that
* the precise values may change over time; no code should be made dependent
* on the values remaining constant.
* @param desired Desired locale
* @param desiredMax Maximized locale (using likely subtags)
* @param supported Supported locale
* @param supportedMax Maximized locale (using likely subtags)
* @return value between 0 and 1, inclusive.
* @deprecated ICU 65 Build and use a matcher rather than comparing pairs of locales.
*/
@Deprecated
public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
// Returns the inverse of the distance: That is, 1-distance(desired, supported).
int distance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired),
new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) },
thresholdDistance, favorSubtag) & 0xff;
return (100 - distance) / 100.0;
}
/**
* Canonicalize a locale (language). Note that for now, it is canonicalizing
* according to CLDR conventions (he vs iw, etc), since that is what is needed
* for likelySubtags.
* @param ulocale language/locale code
* @return ULocale with remapped subtags.
* @stable ICU 4.4
*/
public ULocale canonicalize(ULocale ulocale) {
// TODO
return null;
}
}

File diff suppressed because it is too large Load diff

View file

@ -261,9 +261,10 @@
<path id="javac.classpathref.core-tests">
<pathelement location="${icu4j.core.jar}"/>
<pathelement location="${icu4j.test-framework.jar}"/>
<pathelement location="${icu4j.tools.jar}"/>
</path>
<target name="_all.core-tests" depends="_all.core, _all.test-framework">
<target name="_all.core-tests" depends="_all.core, _all.test-framework, _all.tools">
<ant dir="${icu4j.core-tests.dir}" inheritAll="false"/>
</target>
@ -349,11 +350,9 @@
<pathelement location="${icu4j.collate.jar}"/>
<pathelement location="${icu4j.translit.jar}"/>
<pathelement location="${icu4j.test-framework.jar}"/>
<pathelement location="${icu4j.core-tests.jar}"/>
<pathelement location="${icu4j.translit-tests.jar}"/>
</path>
<target name="_all.tools" depends="_all.core, _all.collate, _all.translit, _all.test-framework, _all.core-tests, _all.translit-tests">
<target name="_all.tools" depends="_all.core, _all.collate, _all.translit, _all.test-framework">
<ant dir="${icu4j.tools.dir}" inheritAll="false"/>
</target>

View file

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:bd004f5d8064e047cef4f7d31326b39b7fc43fba685fab2f0d23c154f4dbc637
size 12818511
oid sha256:b21585ec768edea7b099bd6a97b0a4130b53966a63e6a10de2f31b22f8b59fbd
size 12840921

View file

@ -18,5 +18,6 @@
<attribute name="javadoc_location" value="jar:platform:/resource/external-libraries/JUnitParams-1.0.5-javadoc.jar!/"/>
</attributes>
</classpathentry>
<classpathentry kind="src" path="/icu4j-tools"/>
<classpathentry kind="output" path="out/bin"/>
</classpath>

View file

@ -9,6 +9,7 @@
<project>icu4j-regiondata</project>
<project>icu4j-shared</project>
<project>icu4j-test-framework</project>
<project>icu4j-tools</project>
</projects>
<buildSpec>
<buildCommand>

View file

@ -8,7 +8,7 @@
*
*/
package com.ibm.icu.dev.tool.serializable;
package com.ibm.icu.dev.test.serializable;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
@ -23,7 +23,6 @@ import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import com.ibm.icu.dev.test.serializable.SerializableTestUtility;
import com.ibm.icu.impl.URLHandler;
/**
@ -31,32 +30,32 @@ import com.ibm.icu.impl.URLHandler;
* and lists all those classes that implement <code>Serializable</code>. It also checks
* to make sure that those classes have the <code>serialVersionUID</code>
* field define.
*
*
*/
public class SerializableChecker implements URLHandler.URLVisitor
{
private static Class serializable;
//private static Class throwable;
private String path = null;
//private boolean write;
public SerializableChecker(String path)
{
this.path = path;
if (path != null) {
File dir = new File(path);
if (!dir.exists()) {
dir.mkdirs();
}
}
}
static {
try {
try {
serializable = Class.forName("java.io.Serializable");
//throwable = Class.forName("java.lang.Throwable");
} catch (Exception e) {
@ -64,45 +63,43 @@ public class SerializableChecker implements URLHandler.URLVisitor
System.out.println("Woops! Can't get class info for Serializable and Throwable.");
}
}
private void writeFile(String className, byte bytes[])
{
File file = new File(path + File.separator + className + ".dat");
FileOutputStream stream;
try {
stream = new FileOutputStream(file);
try (FileOutputStream stream = new FileOutputStream(file)) {
stream.write(bytes);
stream.close();
} catch (Exception e) {
System.out.print(" - can't write file!");
}
}
@Override
public void visit(String str)
{
int ix = str.lastIndexOf(".class");
if (ix >= 0) {
String className = "com.ibm.icu" + str.substring(0, ix).replace('/', '.');
// Skip things in com.ibm.icu.dev; they're not relevant.
if (className.startsWith("com.ibm.icu.dev.")) {
return;
}
try {
Class c = Class.forName(className);
int m = c.getModifiers();
if (serializable.isAssignableFrom(c) /*&&
(! throwable.isAssignableFrom(c) || c.getDeclaredFields().length > 0)*/) {
//Field uid;
System.out.print(className + " (" + Modifier.toString(m) + ") - ");
if(!Modifier.isInterface(m)){
if(!Modifier.isInterface(m)){
try {
/* uid = */
c.getDeclaredField("serialVersionUID");
@ -110,18 +107,18 @@ public class SerializableChecker implements URLHandler.URLVisitor
System.out.print("no serialVersionUID - ");
}
}
if (Modifier.isPublic(m)) {
SerializableTestUtility.Handler handler = SerializableTestUtility.getHandler(className);
if (!Modifier.isInterface(m) && handler != null) {
Object objectsOut[] = handler.getTestObjects();
Object objectsIn[];
boolean passed = true;
ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
ObjectOutputStream out = new ObjectOutputStream(byteOut);
try {
out.writeObject(objectsOut);
out.close();
@ -130,14 +127,14 @@ public class SerializableChecker implements URLHandler.URLVisitor
System.out.println("Eror writing test objects:" + e.toString());
return;
}
if (path != null) {
writeFile(className, byteOut.toByteArray());
}
ByteArrayInputStream byteIn = new ByteArrayInputStream(byteOut.toByteArray());
ObjectInputStream in = new ObjectInputStream(byteIn);
try {
objectsIn = (Object[]) in.readObject();
in.close();
@ -153,7 +150,7 @@ public class SerializableChecker implements URLHandler.URLVisitor
System.out.println("Object " + i + " failed behavior test.");
}
}
if (passed) {
System.out.print("test passed.");
}
@ -164,7 +161,7 @@ public class SerializableChecker implements URLHandler.URLVisitor
}
}
}
System.out.println();
}
} catch (Exception e) {
@ -177,10 +174,10 @@ public class SerializableChecker implements URLHandler.URLVisitor
{
List argList = Arrays.asList(args);
String path = null;
for (Iterator it = argList.iterator(); it.hasNext(); /*anything?*/) {
String arg = (String) it.next();
if (arg.equals("-w")) {
if (it.hasNext()) {
path = (String) it.next();
@ -188,15 +185,15 @@ public class SerializableChecker implements URLHandler.URLVisitor
System.out.println("Missing directory name on -w command.");
}
} else {
try {
//URL jarURL = new URL("jar:file:/dev/eclipse/workspace/icu4j/icu4j.jar!/com/ibm/icu");
//URL fileURL = new URL("file:/dev/eclipse/workspace/icu4j/classes/com/ibm/icu");
URL url = new URL(arg);
URLHandler handler = URLHandler.get(url);
SerializableChecker checker = new SerializableChecker(path);
System.out.println("Checking classes from " + arg + ":");
handler.guide(checker, true, false);
} catch (Exception e) {

View file

@ -12,9 +12,10 @@ import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.dev.tool.locale.LocaleDistanceBuilder;
import com.ibm.icu.impl.locale.LocaleDistance;
import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
import com.ibm.icu.util.LocaleMatcher;
import com.ibm.icu.util.LocaleMatcher.FavorSubtag;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;
@ -25,13 +26,13 @@ import com.ibm.icu.util.ULocale;
* @author markdavis
*/
@RunWith(JUnit4.class)
public class XLocaleDistanceTest extends TestFmwk {
public class LocaleDistanceTest extends TestFmwk {
private static final boolean REFORMAT = false; // set to true to get a reformatted data file listed
private LocaleDistance localeDistance = LocaleDistance.INSTANCE;
DataDrivenTestHelper tfh = new MyTestFileHandler()
.setFramework(this)
.load(XLocaleDistanceTest.class, "data/localeDistanceTest.txt");
.load(LocaleDistanceTest.class, "data/localeDistanceTest.txt");
static class Arguments {
final ULocale desired;
@ -47,6 +48,13 @@ public class XLocaleDistanceTest extends TestFmwk {
}
}
@Test
public void testLoadedDataSameAsBuiltFromScratch() {
LocaleDistance.Data built = LocaleDistanceBuilder.build();
LocaleDistance.Data loaded = LocaleDistance.Data.load();
assertEquals("run LocaleDistanceBuilder and update ICU4C langInfo.txt", built, loaded);
}
@SuppressWarnings("unused")
@Ignore("Disabled because of Linux; need to investigate.")
@Test

View file

@ -1,22 +0,0 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
/*
*******************************************************************************
* Copyright (C) 2015, Google, Inc., International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*/
package com.ibm.icu.dev.test.util;
import com.ibm.icu.util.LocaleMatcher.LanguageMatcherData;
/**
* @author markdavis
*
*/
public class LocaleMatcherShim {
public static LanguageMatcherData load() {
// In CLDR, has different value
return null;
}
}

View file

@ -9,55 +9,47 @@
package com.ibm.icu.dev.test.util;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.io.BufferedReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.dev.tool.locale.LikelySubtagsBuilder;
import com.ibm.icu.impl.locale.XCldrStub.FileUtilities;
import com.ibm.icu.impl.locale.XLikelySubtags;
import com.ibm.icu.util.LocaleMatcher;
import com.ibm.icu.util.LocaleMatcher.LanguageMatcherData;
import com.ibm.icu.util.LocaleMatcher.FavorSubtag;
import com.ibm.icu.util.LocalePriorityList;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;
import junitparams.JUnitParamsRunner;
import junitparams.Parameters;
/**
* Test the LocaleMatcher.
*
* @author markdavis
*/
@SuppressWarnings("deprecation")
@RunWith(JUnit4.class)
@RunWith(JUnitParamsRunner.class)
public class LocaleMatcherTest extends TestFmwk {
private static final ULocale ZH_MO = new ULocale("zh_MO");
private static final ULocale ZH_HK = new ULocale("zh_HK");
static LanguageMatcherData LANGUAGE_MATCHER_DATA = LocaleMatcherShim.load();
private LocaleMatcher newLocaleMatcher(LocalePriorityList build) {
return new LocaleMatcher(build, LANGUAGE_MATCHER_DATA);
}
private LocaleMatcher newLocaleMatcher(LocalePriorityList build, LanguageMatcherData data) {
return new LocaleMatcher(build, data == null ? LANGUAGE_MATCHER_DATA : data);
}
private LocaleMatcher newLocaleMatcher(LocalePriorityList lpl, LanguageMatcherData data, double d) {
return new LocaleMatcher(lpl, data == null ? LANGUAGE_MATCHER_DATA : data, d);
return new LocaleMatcher(build);
}
private LocaleMatcher newLocaleMatcher(String string) {
return new LocaleMatcher(LocalePriorityList.add(string).build(), LANGUAGE_MATCHER_DATA);
return new LocaleMatcher(LocalePriorityList.add(string).build());
}
// public LocaleMatcher(LocalePriorityList languagePriorityList,
// LocaleMatcherData matcherData, double threshold)
@Test
public void testParentLocales() {
assertCloser("es_AR", "es_419", "es_ES");
@ -87,32 +79,6 @@ public class LocaleMatcherTest extends TestFmwk {
assertEquals("test " + a + " is closer to " + closer + " than to " + further, new ULocale(closer), matcher.getBestMatch(a));
}
// public void testParentLocales() {
// // find all the regions that have a closer relation because of an explicit parent
// Set<String> explicitParents = new HashSet<>(INFO.getExplicitParents());
// explicitParents.remove("root");
// Set<String> otherParents = new HashSet<>(INFO.getExplicitParents());
// for (String locale : explicitParents) {
// while (true) {
// locale = LocaleIDParser.getParent(locale);
// if (locale == null || locale.equals("root")) {
// break;
// }
// otherParents.add(locale);
// }
// }
// otherParents.remove("root");
//
// for (String locale : CONFIG.getCldrFactory().getAvailable()) {
// String parentId = LocaleIDParser.getParent(locale);
// String parentIdSimple = LocaleIDParser.getSimpleParent(locale);
// if (!explicitParents.contains(parentId) && !otherParents.contains(parentIdSimple)) {
// continue;
// }
// System.out.println(locale + "\t" + CONFIG.getEnglish().getName(locale) + "\t" + parentId + "\t" + parentIdSimple);
// }
// }
@Test
public void testChinese() {
LocaleMatcher matcher = newLocaleMatcher("zh_CN, zh_TW, iw");
@ -139,31 +105,10 @@ public class LocaleMatcherTest extends TestFmwk {
@Test
public void testFallbacks() {
LocalePriorityList lpl = LocalePriorityList.add("en, hi").build();
final LocaleMatcher matcher = newLocaleMatcher(lpl, null, 0.09);
final LocaleMatcher matcher = newLocaleMatcher(lpl);
assertEquals("hi", matcher.getBestMatch("sa").toString());
}
@Test
public void testOverrideData() {
double threshold = 0.05;
LanguageMatcherData localeMatcherData = new LanguageMatcherData()
.addDistance("br", "fr", 10, true)
.addDistance("es", "cy", 10, true);
logln(localeMatcherData.toString());
final LocaleMatcher matcher = newLocaleMatcher(
LocalePriorityList
.add(ULocale.ENGLISH)
.add(ULocale.FRENCH)
.add(ULocale.UK)
.build(), localeMatcherData, threshold);
logln(matcher.toString());
assertEquals(ULocale.FRENCH, matcher.getBestMatch(new ULocale("br")));
assertEquals(ULocale.ENGLISH, matcher.getBestMatch(new ULocale("es"))); // one
// way
}
@Test
public void testBasics() {
final LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(ULocale.FRENCH).add(ULocale.UK)
@ -184,7 +129,7 @@ public class LocaleMatcherTest extends TestFmwk {
assertEquals(new ULocale("zh_CN"), matcher.getBestMatch("zh"));
assertEquals(new ULocale("zh_CN"), matcher.getBestMatch("zh_Hans_CN"));
assertEquals(new ULocale("zh_TW"), matcher.getBestMatch("zh_Hant_HK"));
assertEquals(new ULocale("he"), matcher.getBestMatch("iw_IT"));
assertEquals(new ULocale("iw"), matcher.getBestMatch("iw_IT"));
}
@Test
@ -219,20 +164,8 @@ public class LocaleMatcherTest extends TestFmwk {
@Test
public void TestLocaleMatcherCoverage() {
// Add tests for better code coverage
LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(null, 0).build(), null);
LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(null, 0).build());
logln(matcher.toString());
LanguageMatcherData data = new LanguageMatcherData();
LanguageMatcherData clone = data.cloneAsThawed();
if (clone.equals(data)) {
errln("Error cloneAsThawed() is equal.");
}
if (data.isFrozen()) {
errln("Error LocaleMatcherData is frozen!");
}
}
private void assertEquals(Object expected, Object string) {
@ -251,17 +184,19 @@ public class LocaleMatcherTest extends TestFmwk {
static final ULocale ENGLISH_CANADA = new ULocale("en_CA");
private static double match(ULocale a, ULocale b) {
final LocaleMatcher matcher = new LocaleMatcher("");
return matcher.match(a, null, b, null);
}
@Test
public void testMatch_exact() {
assertEquals(1.0,
LocaleMatcher.match(ENGLISH_CANADA, ENGLISH_CANADA));
assertEquals(1.0, match(ENGLISH_CANADA, ENGLISH_CANADA));
}
@Test
public void testMatch_none() {
double match = LocaleMatcher.match(
new ULocale("ar_MK"),
ENGLISH_CANADA);
double match = match(new ULocale("ar_MK"), ENGLISH_CANADA);
assertTrue("Actual < 0: " + match, 0 <= match);
assertTrue("Actual > 0.15 (~ language + script distance): " + match, 0.2 > match);
}
@ -270,13 +205,12 @@ public class LocaleMatcherTest extends TestFmwk {
public void testMatch_matchOnMazimized() {
ULocale undTw = new ULocale("und_TW");
ULocale zhHant = new ULocale("zh_Hant");
double matchZh = LocaleMatcher.match(undTw, new ULocale("zh"));
double matchZhHant = LocaleMatcher.match(undTw, zhHant);
double matchZh = match(undTw, new ULocale("zh"));
double matchZhHant = match(undTw, zhHant);
assertTrue("und_TW should be closer to zh_Hant (" + matchZhHant +
") than to zh (" + matchZh + ")",
matchZh < matchZhHant);
double matchEnHantTw = LocaleMatcher.match(new ULocale("en_Hant_TW"),
zhHant);
double matchEnHantTw = match(new ULocale("en_Hant_TW"), zhHant);
assertTrue("zh_Hant should be closer to und_TW (" + matchZhHant +
") than to en_Hant_TW (" + matchEnHantTw + ")",
matchEnHantTw < matchZhHant);
@ -397,16 +331,9 @@ public class LocaleMatcherTest extends TestFmwk {
assertEquals("it", matcher.getBestMatch("en").toString());
}
// public void testGetBestMatch_emptyList() {
// final LocaleMatcher matcher = newLocaleMatcher(
// new LocalePriorityList(new HashMap()));
// assertNull(matcher.getBestMatch(ULocale.ENGLISH));
// }
@Test
public void testGetBestMatch_googlePseudoLocales() {
// Google pseudo locales are primarily based on variant subtags.
// See http://sites/intl_eng/pseudo_locales.
// (See below for the region code based fall back options.)
final LocaleMatcher matcher = newLocaleMatcher(
"fr, pt");
@ -475,19 +402,25 @@ public class LocaleMatcherTest extends TestFmwk {
check2(sorted);
}
private static final ULocale posix = new ULocale("en_US_POSIX");
/**
* @param sorted
*/
private void check2(Set<ULocale> sorted) {
// TODO Auto-generated method stub
logln("Checking: " + sorted);
LocaleMatcher matcher = newLocaleMatcher(
LocalePriorityList.add(
sorted.toArray(new ULocale[sorted.size()]))
.build());
.build());
for (ULocale loc : sorted) {
String stringLoc = loc.toString();
assertEquals(stringLoc, matcher.getBestMatch(stringLoc).toString());
// The result may not be the exact same locale, but it must be equivalent.
// Variants and extensions are ignored.
if (loc.equals(posix)) { continue; }
ULocale max = ULocale.addLikelySubtags(loc);
ULocale best = matcher.getBestMatch(loc);
ULocale maxBest = ULocale.addLikelySubtags(best);
assertEquals(loc.toString(), max, maxBest);
}
}
@ -502,29 +435,8 @@ public class LocaleMatcherTest extends TestFmwk {
}
// public void testComputeDistance_monkeyTest() {
// RegionCode[] codes = RegionCode.values();
// Random random = new Random();
// for (int i = 0; i < 1000; ++i) {
// RegionCode x = codes[random.nextInt(codes.length)];
// RegionCode y = codes[random.nextInt(codes.length)];
// double d = LocaleMatcher.getRegionDistance(x, y, null, null);
// if (x == RegionCode.ZZ || y == RegionCode.ZZ) {
// assertEquals(LocaleMatcher.REGION_DISTANCE, d);
// } else if (x == y) {
// assertEquals(0.0, d);
// } else {
// assertTrue(d > 0);
// assertTrue(d <= LocaleMatcher.REGION_DISTANCE);
// }
// }
// }
@Test
public void testGetBestMatchForList_matchOnMaximized2() {
// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
// return;
// }
final LocaleMatcher matcher = newLocaleMatcher("fr, en-GB, ja, es-ES, es-MX");
// ja-JP matches ja on likely subtags, and it's listed first, thus it wins over
// thus it wins over the second preference en-GB.
@ -537,9 +449,6 @@ public class LocaleMatcherTest extends TestFmwk {
@Test
public void testGetBestMatchForList_closeEnoughMatchOnMaximized() {
// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
// return;
// }
final LocaleMatcher matcher = newLocaleMatcher("en-GB, en, de, fr, ja");
assertEquals("de", matcher.getBestMatch("de-CH, fr").toString());
assertEquals("en", matcher.getBestMatch("en-US, ar, nl, de, ja").toString());
@ -547,23 +456,20 @@ public class LocaleMatcherTest extends TestFmwk {
@Test
public void testGetBestMatchForPortuguese() {
// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
// return;
// }
final LocaleMatcher withPTExplicit = newLocaleMatcher("pt_PT, pt_BR, es, es_419");
final LocaleMatcher withPTImplicit = newLocaleMatcher("pt_PT, pt, es, es_419");
// Could happen because "pt_BR" is a tier_1 language and "pt_PT" is tier_2.
final LocaleMatcher withoutPT = newLocaleMatcher("pt_BR, es, es_419");
// European user who prefers Spanish over Brazillian Portuguese as a fallback.
// European user who prefers Spanish over Brazilian Portuguese as a fallback.
assertEquals("pt_PT", withPTExplicit.getBestMatch("pt_PT, es, pt").toString());
assertEquals("pt_PT", withPTImplicit.getBestMatch("pt_PT, es, pt").toString());
assertEquals("es", withoutPT.getBestMatch("pt_PT, es, pt").toString());
// The earlier pt_PT vs. pt_BR region mismatch is as good as the later es perfect match
// because of the demotion per desired locale.
assertEquals("pt_BR", withoutPT.getBestMatch("pt_PT, es, pt").toString());
// Brazillian user who prefers South American Spanish over European Portuguese as a fallback.
// Brazilian user who prefers South American Spanish over European Portuguese as a fallback.
// The asymmetry between this case and above is because it's "pt_PT" that's missing between the
// matchers as "pt_BR" is a much more common language.
assertEquals("pt_BR", withPTExplicit.getBestMatch("pt, es_419, pt_PT").toString());
@ -578,9 +484,6 @@ public class LocaleMatcherTest extends TestFmwk {
@Test
public void testVariantWithScriptMatch() {
// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
// return;
// }
final LocaleMatcher matcher = newLocaleMatcher("fr, en, sv");
assertEquals("en", matcher.getBestMatch("en-GB").toString());
assertEquals("en", matcher.getBestMatch("en-GB, sv").toString());
@ -588,54 +491,10 @@ public class LocaleMatcherTest extends TestFmwk {
@Test
public void testVariantWithScriptMatch2() {
// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
// return;
// }
final LocaleMatcher matcher = newLocaleMatcher("en, sv");
assertEquals("en", matcher.getBestMatch("en-GB, sv").toString());
}
@Test
public void testPerf() {
if (LANGUAGE_MATCHER_DATA == null) {
return; // skip except when testing data
}
final String desired = "sv, en";
final LocaleMatcher matcherShort = newLocaleMatcher(desired);
final LocaleMatcher matcherLong = newLocaleMatcher("af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu");
final LocaleMatcher matcherVeryLong = newLocaleMatcher("af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_001, en_150, en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA");
//LocaleMatcher.DEBUG = true;
ULocale expected = new ULocale("sv");
assertEquals(expected, matcherShort.getBestMatch(desired));
assertEquals(expected, matcherLong.getBestMatch(desired));
assertEquals(expected, matcherVeryLong.getBestMatch(desired));
//LocaleMatcher.DEBUG = false;
for (int i = 0; i < 2; ++i) {
int iterations = i == 0 ? 1000 : 100000;
boolean showMessage = i != 0;
long timeShort = timeLocaleMatcher("Duration (few supported):\t", desired, matcherShort, showMessage, iterations, 0);
@SuppressWarnings("unused")
long timeMedium = timeLocaleMatcher("Duration (med. supported):\t", desired, matcherLong, showMessage, iterations, timeShort);
@SuppressWarnings("unused")
long timeLong = timeLocaleMatcher("Duration (many supported):\t", desired, matcherVeryLong, showMessage, iterations, timeShort);
}
}
private long timeLocaleMatcher(String title, String desired, LocaleMatcher matcher,
boolean showmessage, int iterations, long comparisonTime) {
long start = System.nanoTime();
for (int i = iterations; i > 0; --i) {
matcher.getBestMatch(desired);
}
long delta = System.nanoTime() - start;
if (showmessage) warnln(title + (delta / iterations) + " nanos, "
+ (comparisonTime > 0 ? (delta * 100 / comparisonTime - 100) + "% longer" : ""));
return delta;
}
@Test
public void Test8288() {
final LocaleMatcher matcher = newLocaleMatcher("it, en");
@ -644,24 +503,403 @@ public class LocaleMatcherTest extends TestFmwk {
}
@Test
public void TestTechPreview() {
final LocaleMatcher matcher = newLocaleMatcher("it, en, ru");
ULocale und = new ULocale("und");
ULocale bulgarian = new ULocale("bg");
ULocale russian = new ULocale("ru");
public void testDemotion() {
LocalePriorityList supported = LocalePriorityList.add("fr, de-CH, it").build();
LocalePriorityList desired = LocalePriorityList.add("fr-CH, de-CH, it").build();
LocaleMatcher noDemotion = LocaleMatcher.builder().
setSupportedULocales(supported.getULocales()).
setDemotionPerDesiredLocale(LocaleMatcher.Demotion.NONE).build();
assertEquals("no demotion", new ULocale("de-CH"), noDemotion.getBestMatch(desired));
Output<ULocale> outputBestDesired = new Output<>();
LocaleMatcher regionDemotion = LocaleMatcher.builder().
setSupportedULocales(supported.getULocales()).
setDemotionPerDesiredLocale(LocaleMatcher.Demotion.REGION).build();
assertEquals("region demotion", ULocale.FRENCH, regionDemotion.getBestMatch(desired));
}
ULocale best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, ULocale.GERMAN)), outputBestDesired);
assertEquals(ULocale.ITALIAN, best);
assertEquals(null, outputBestDesired.value);
private static final class PerfCase {
ULocale desired;
ULocale expectedShort;
ULocale expectedLong;
ULocale expectedVeryLong;
matcher.setDefaultLanguage(ULocale.JAPANESE);
best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, ULocale.GERMAN)), outputBestDesired);
assertEquals(ULocale.JAPANESE, best);
PerfCase(String des, String expShort, String expLong, String expVeryLong) {
desired = new ULocale(des);
expectedShort = new ULocale(expShort);
expectedLong = new ULocale(expLong);
expectedVeryLong = new ULocale(expVeryLong);
}
}
matcher.setFavorScript(true);
best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, bulgarian)), outputBestDesired);
assertEquals(russian, best);
private static final int WARM_UP_ITERATIONS = 1000;
private static final int BENCHMARK_ITERATIONS = 20000;
@Test
public void testPerf() {
final String shortList = "en, sv";
final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
"el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
"hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
"mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
"si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
"zh-CN, zh-TW, zu";
final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, " +
"ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, " +
"ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, " +
"ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, " +
"ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, " +
"bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, " +
"bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, " +
"bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, " +
"ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, " +
"cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, " +
"de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, " +
// removed en_001 to avoid exact match
"ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_150, " +
"en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, " +
"en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, " +
"en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, " +
"en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, " +
"en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, " +
"en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, " +
"en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, " +
"en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, " +
"en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, " +
"en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, " +
"es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, " +
"es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, " +
"et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, " +
"ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, " +
"fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, " +
"fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, " +
"fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, " +
"fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, " +
"fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, " +
"gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, " +
"he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, " +
"id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, " +
"jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, " +
"kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, " +
"kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, " +
"ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, " +
"lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, " +
"lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, " +
"lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, " +
"mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, " +
"ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, " +
"nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, " +
"nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, " +
"nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, " +
"pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, " +
"pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, " +
"rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, " +
"ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, " +
"sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, " +
"shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, " +
"smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, " +
"sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, " +
"sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, " +
"sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, " +
"teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, " +
"tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, " +
"uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, " +
"vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, " +
"wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, " +
"zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, " +
"zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA";
final LocaleMatcher matcherShort = newLocaleMatcher(shortList);
final LocaleMatcher matcherLong = newLocaleMatcher(longList);
final LocaleMatcher matcherVeryLong = newLocaleMatcher(veryLongList);
PerfCase[] pcs = new PerfCase[] {
// Exact match in all matchers.
new PerfCase("sv", "sv", "sv", "sv"),
// Common locale, exact match only in very long list.
new PerfCase("fr_CA", "en", "fr", "fr_CA"),
// Unusual locale, no exact match.
new PerfCase("de_CA", "en", "de", "de"),
// World English maps to several region partitions.
new PerfCase("en_001", "en", "en", "en"),
// Ancient language with interesting subtags.
new PerfCase("egy_Copt_CY", "en", "af", "af")
};
for (PerfCase pc : pcs) {
final ULocale desired = pc.desired;
assertEquals(desired.toString(), pc.expectedShort, matcherShort.getBestMatch(desired));
assertEquals(desired.toString(), pc.expectedLong, matcherLong.getBestMatch(desired));
assertEquals(desired.toString(), pc.expectedVeryLong, matcherVeryLong.getBestMatch(desired));
timeLocaleMatcher(desired, matcherShort, WARM_UP_ITERATIONS);
timeLocaleMatcher(desired, matcherLong, WARM_UP_ITERATIONS);
timeLocaleMatcher(desired, matcherVeryLong, WARM_UP_ITERATIONS);
long tns = timeLocaleMatcher(desired, matcherShort, BENCHMARK_ITERATIONS);
System.out.format("New Duration (few supported):\t%s\t%d\tnanos\n", desired, tns);
long tnl = timeLocaleMatcher(desired, matcherLong, BENCHMARK_ITERATIONS);
System.out.format("New Duration (med. supported):\t%s\t%d\tnanos\n", desired, tnl);
long tnv = timeLocaleMatcher(desired, matcherVeryLong, BENCHMARK_ITERATIONS);
System.out.format("New Duration (many supported):\t%s\t%d\tnanos\n", desired, tnv);
}
maximizePerf();
}
private static long timeLocaleMatcher(ULocale desired, LocaleMatcher matcher, int iterations) {
long start = System.nanoTime();
for (int i = iterations; i > 0; --i) {
matcher.getBestMatch(desired);
}
long delta = System.nanoTime() - start;
return (delta / iterations);
}
private void maximizePerf() {
final String tags = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
"el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
"hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
"mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
"si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
"zh-CN, zh-TW, zu";
LocalePriorityList list = LocalePriorityList.add(tags).build();
int few = 1000;
long t = timeMaximize(list, few); // warm up
t = timeMaximize(list, few); // measure for scale
long targetTime = 100000000L; // 10^8 ns = 0.1s
int iterations = (int)((targetTime * few) / t);
t = timeMaximize(list, iterations);
int length = 0;
for (@SuppressWarnings("unused") ULocale locale : list) { ++length; }
System.out.println("maximize: " + (t / iterations / length) + " ns/locale: " +
t + " ns / " + iterations + " iterations / " + length + " locales");
}
// returns total ns not per iteration
private static long timeMaximize(Iterable<ULocale> list, int iterations) {
long start = System.nanoTime();
for (int i = iterations; i > 0; --i) {
for (ULocale locale : list) {
XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
}
}
return System.nanoTime() - start;
}
@Test
public void testLikelySubtagsLoadedDataSameAsBuiltFromScratch() {
XLikelySubtags.Data built = LikelySubtagsBuilder.build();
XLikelySubtags.Data loaded = XLikelySubtags.Data.load();
assertEquals("run LocaleDistanceBuilder and update ICU4C langInfo.txt", built, loaded);
}
private static final class TestCase implements Cloneable {
private static final String ENDL = System.getProperties().getProperty("line.separator");
int lineNr = 0;
String nameLine = "";
String supportedLine = "";
String defaultLine = "";
String distanceLine = "";
String thresholdLine = "";
String matchLine = "";
String supported = "";
String def = "";
String favor = "";
String threshold = "";
String desired = "";
String expMatch = "";
String expDesired = "";
String expCombined = "";
@Override
public TestCase clone() throws CloneNotSupportedException {
return (TestCase) super.clone();
}
void reset(String newNameLine) {
nameLine = newNameLine;
supportedLine = "";
defaultLine = "";
distanceLine = "";
thresholdLine = "";
supported = "";
def = "";
favor = "";
threshold = "";
}
String toInputsKey() {
return supported + '+' + def + '+' + favor + '+' + threshold + '+' + desired;
}
private static void appendLine(StringBuilder sb, String line) {
if (!line.isEmpty()) {
sb.append(ENDL).append(line);
}
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder(nameLine);
appendLine(sb, supportedLine);
appendLine(sb, defaultLine);
appendLine(sb, distanceLine);
appendLine(sb, thresholdLine);
sb.append(ENDL).append("line ").append(lineNr).append(':');
appendLine(sb, matchLine);
return sb.toString();
}
}
private static String getSuffixAfterPrefix(String s, int limit, String prefix) {
if (prefix.length() <= limit && s.startsWith(prefix)) {
return s.substring(prefix.length(), limit);
} else {
return null;
}
}
// UsedReflectively, not private to avoid unused-warning
static List<TestCase> readTestCases() throws Exception {
List<TestCase> tests = new ArrayList<>();
Map<String, Integer> uniqueTests = new HashMap<>();
TestCase test = new TestCase();
String filename = "data/localeMatcherTest.txt";
try (BufferedReader in = FileUtilities.openFile(LocaleMatcherTest.class, filename)) {
String line;
while ((line = in.readLine()) != null) {
++test.lineNr;
// Start of comment, or end of line, minus trailing spaces.
int limit = line.indexOf('#');
if (limit < 0) {
limit = line.length();
}
char c;
while (limit > 0 && ((c = line.charAt(limit - 1)) == ' ' || c == '\t')) {
--limit;
}
if (limit == 0) { // empty line
continue;
}
String suffix;
if (line.startsWith("** test: ")) {
test.reset(line);
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@supported=")) != null) {
test.supportedLine = line;
test.supported = suffix;
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@default=")) != null) {
test.defaultLine = line;
test.def = suffix;
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@favor=")) != null) {
test.distanceLine = line;
test.favor = suffix;
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@threshold=")) != null) {
test.thresholdLine = line;
test.threshold = suffix;
} else {
int matchSep = line.indexOf(">>");
// >> before an inline comment, and followed by more than white space.
if (0 <= matchSep && (matchSep + 2) < limit) {
test.matchLine = line;
test.desired = line.substring(0, matchSep).trim();
test.expDesired = test.expCombined = "";
int start = matchSep + 2;
int expLimit = line.indexOf('|', start);
if (expLimit < 0) {
test.expMatch = line.substring(start, limit).trim();
} else {
test.expMatch = line.substring(start, expLimit).trim();
start = expLimit + 1;
expLimit = line.indexOf('|', start);
if (expLimit < 0) {
test.expDesired = line.substring(start, limit).trim();
} else {
test.expDesired = line.substring(start, expLimit).trim();
test.expCombined = line.substring(expLimit + 1, limit).trim();
}
}
String inputs = test.toInputsKey();
Integer prevIndex = uniqueTests.get(inputs);
if (prevIndex == null) {
uniqueTests.put(inputs, tests.size());
} else {
System.out.println("Locale matcher test case on line " + test.lineNr
+ " is a duplicate of line " + tests.get(prevIndex).lineNr);
}
tests.add(test.clone());
} else {
throw new IllegalArgumentException("test data syntax error on line "
+ test.lineNr + "\n" + line);
}
}
}
}
System.out.println("Number of duplicate locale matcher test cases: " + (tests.size() - uniqueTests.size()));
return tests;
}
private static ULocale getULocaleOrNull(String s) {
if (s.equals("null")) {
return null;
} else {
return new ULocale(s);
}
}
@Test
@Parameters(method = "readTestCases")
public void dataDriven(TestCase test) {
LocaleMatcher matcher;
if (test.def.isEmpty() && test.favor.isEmpty() && test.threshold.isEmpty()) {
matcher = new LocaleMatcher(test.supported);
} else {
LocaleMatcher.Builder builder = LocaleMatcher.builder();
builder.setSupportedLocales(test.supported);
if (!test.def.isEmpty()) {
builder.setDefaultULocale(new ULocale(test.def));
}
if (!test.favor.isEmpty()) {
FavorSubtag favor;
switch (test.favor) {
case "normal":
favor = FavorSubtag.LANGUAGE;
break;
case "script":
favor = FavorSubtag.SCRIPT;
break;
default:
throw new IllegalArgumentException("unsupported FavorSubtag value " + test.favor);
}
builder.setFavorSubtag(favor);
}
if (!test.threshold.isEmpty()) {
int threshold = Integer.valueOf(test.threshold);
builder.internalSetThresholdDistance(threshold);
}
matcher = builder.build();
}
ULocale expMatch = getULocaleOrNull(test.expMatch);
if (test.expDesired.isEmpty() && test.expCombined.isEmpty()) {
ULocale bestSupported = matcher.getBestMatch(test.desired);
assertEquals("bestSupported", expMatch, bestSupported);
} else {
LocalePriorityList desired = LocalePriorityList.add(test.desired).build();
LocaleMatcher.Result result = matcher.getBestMatchResult(desired);
assertEquals("bestSupported", expMatch, result.getSupportedULocale());
if (!test.expDesired.isEmpty()) {
ULocale expDesired = getULocaleOrNull(test.expDesired);
assertEquals("bestDesired", expDesired, result.getDesiredULocale());
}
if (!test.expCombined.isEmpty()) {
ULocale expCombined = getULocaleOrNull(test.expCombined);
ULocale combined = result.makeServiceULocale();
assertEquals("combined", expCombined, combined);
}
}
}
}

View file

@ -1,612 +0,0 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.dev.test.util;
import java.io.BufferedReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.junit.Test;
import org.junit.runner.RunWith;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.impl.locale.LocaleDistance;
import com.ibm.icu.impl.locale.XCldrStub.FileUtilities;
import com.ibm.icu.impl.locale.XLikelySubtags;
import com.ibm.icu.impl.locale.XLocaleMatcher;
import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
import com.ibm.icu.util.LocaleMatcher;
import com.ibm.icu.util.LocalePriorityList;
import com.ibm.icu.util.ULocale;
import junitparams.JUnitParamsRunner;
import junitparams.Parameters;
/**
* Test the XLocaleMatcher.
*
* @author markdavis
*/
@RunWith(JUnitParamsRunner.class)
public class XLocaleMatcherTest extends TestFmwk {
private static final int REGION_DISTANCE = 4;
private static final LocaleDistance LANGUAGE_MATCHER_DATA = LocaleDistance.INSTANCE;
private XLocaleMatcher newXLocaleMatcher() {
return new XLocaleMatcher("");
}
private XLocaleMatcher newXLocaleMatcher(LocalePriorityList build) {
return new XLocaleMatcher(build);
}
private XLocaleMatcher newXLocaleMatcher(String string) {
return new XLocaleMatcher(LocalePriorityList.add(string).build());
}
@SuppressWarnings("unused")
private XLocaleMatcher newXLocaleMatcher(LocalePriorityList list, int d) {
return XLocaleMatcher.builder().setSupportedULocales(list.getULocales()).
internalSetThresholdDistance(d).build();
}
// public void testParentLocales() {
// // find all the regions that have a closer relation because of an explicit parent
// Set<String> explicitParents = new HashSet<>(INFO.getExplicitParents());
// explicitParents.remove("root");
// Set<String> otherParents = new HashSet<>(INFO.getExplicitParents());
// for (String locale : explicitParents) {
// while (true) {
// locale = LocaleIDParser.getParent(locale);
// if (locale == null || locale.equals("root")) {
// break;
// }
// otherParents.add(locale);
// }
// }
// otherParents.remove("root");
//
// for (String locale : CONFIG.getCldrFactory().getAvailable()) {
// String parentId = LocaleIDParser.getParent(locale);
// String parentIdSimple = LocaleIDParser.getSimpleParent(locale);
// if (!explicitParents.contains(parentId) && !otherParents.contains(parentIdSimple)) {
// continue;
// }
// System.out.println(locale + "\t" + CONFIG.getEnglish().getName(locale) + "\t" + parentId + "\t" + parentIdSimple);
// }
// }
// TBD reenable with override data
// public void testOverrideData() {
// double threshold = 0.05;
// XLocaleDistance XLocaleMatcherData = new XLocaleDistance()
// .addDistance("br", "fr", 10, true)
// .addDistance("es", "cy", 10, true);
// logln(XLocaleMatcherData.toString());
//
// final XLocaleMatcher matcher = newXLocaleMatcher(
// LocalePriorityList
// .add(ULocale.ENGLISH)
// .add(ULocale.FRENCH)
// .add(ULocale.UK)
// .build(), XLocaleMatcherData, threshold);
// logln(matcher.toString());
//
// assertEquals(ULocale.FRENCH, matcher.getBestMatch(new ULocale("br")));
// assertEquals(ULocale.ENGLISH, matcher.getBestMatch(new ULocale("es"))); // one
// // way
// }
/**
* If all the base languages are the same, then each sublocale matches
* itself most closely
*/
@Test
public void testExactMatches() {
String lastBase = "";
TreeSet<ULocale> sorted = new TreeSet<>();
for (ULocale loc : ULocale.getAvailableLocales()) {
String language = loc.getLanguage();
if (!lastBase.equals(language)) {
check(sorted);
sorted.clear();
lastBase = language;
}
sorted.add(loc);
}
check(sorted);
}
private void check(Set<ULocale> sorted) {
if (sorted.isEmpty()) {
return;
}
check2(sorted);
ULocale first = sorted.iterator().next();
ULocale max = ULocale.addLikelySubtags(first);
sorted.add(max);
check2(sorted);
}
private static final ULocale posix = new ULocale("en_US_POSIX");
/**
* @param sorted
*/
private void check2(Set<ULocale> sorted) {
logln("Checking: " + sorted);
XLocaleMatcher matcher = newXLocaleMatcher(
LocalePriorityList.add(
sorted.toArray(new ULocale[sorted.size()]))
.build());
for (ULocale loc : sorted) {
// The result may not be the exact same locale, but it must be equivalent.
// Variants and extensions are ignored.
if (loc.equals(posix)) { continue; }
ULocale max = ULocale.addLikelySubtags(loc);
ULocale best = matcher.getBestMatch(loc);
ULocale maxBest = ULocale.addLikelySubtags(best);
assertEquals(loc.toString(), max, maxBest);
}
}
@Test
public void testDemotion() {
LocalePriorityList supported = LocalePriorityList.add("fr, de-CH, it").build();
LocalePriorityList desired = LocalePriorityList.add("fr-CH, de-CH, it").build();
XLocaleMatcher noDemotion = XLocaleMatcher.builder().
setSupportedULocales(supported.getULocales()).
setDemotionPerDesiredLocale(XLocaleMatcher.Demotion.NONE).build();
assertEquals("no demotion", new ULocale("de-CH"), noDemotion.getBestMatch(desired));
XLocaleMatcher regionDemotion = XLocaleMatcher.builder().
setSupportedULocales(supported.getULocales()).
setDemotionPerDesiredLocale(XLocaleMatcher.Demotion.REGION).build();
assertEquals("region demotion", ULocale.FRENCH, regionDemotion.getBestMatch(desired));
}
private static final class PerfCase {
ULocale desired;
ULocale expectedShort;
ULocale expectedLong;
ULocale expectedVeryLong;
PerfCase(String des, String expShort, String expLong, String expVeryLong) {
desired = new ULocale(des);
expectedShort = new ULocale(expShort);
expectedLong = new ULocale(expLong);
expectedVeryLong = new ULocale(expVeryLong);
}
}
private static final int WARM_UP_ITERATIONS = 1000;
private static final int BENCHMARK_ITERATIONS = 20000;
private static final int AVG_PCT_MEDIUM_NEW_OLD = 33;
private static final int AVG_PCT_LONG_NEW_OLD = 80;
@Test
public void testPerf() {
if (LANGUAGE_MATCHER_DATA == null) {
return; // skip except when testing data
}
final String shortList = "en, sv";
final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
"el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
"hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
"mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
"si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
"zh-CN, zh-TW, zu";
final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, " +
"ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, " +
"ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, " +
"ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, " +
"ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, " +
"bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, " +
"bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, " +
"bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, " +
"ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, " +
"cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, " +
"de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, " +
// removed en_001 to avoid exact match
"ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_150, " +
"en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, " +
"en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, " +
"en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, " +
"en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, " +
"en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, " +
"en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, " +
"en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, " +
"en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, " +
"en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, " +
"en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, " +
"es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, " +
"es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, " +
"et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, " +
"ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, " +
"fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, " +
"fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, " +
"fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, " +
"fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, " +
"fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, " +
"gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, " +
"he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, " +
"id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, " +
"jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, " +
"kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, " +
"kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, " +
"ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, " +
"lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, " +
"lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, " +
"lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, " +
"mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, " +
"ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, " +
"nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, " +
"nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, " +
"nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, " +
"pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, " +
"pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, " +
"rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, " +
"ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, " +
"sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, " +
"shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, " +
"smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, " +
"sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, " +
"sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, " +
"sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, " +
"teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, " +
"tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, " +
"uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, " +
"vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, " +
"wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, " +
"zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, " +
"zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA";
final XLocaleMatcher matcherShort = newXLocaleMatcher(shortList);
final XLocaleMatcher matcherLong = newXLocaleMatcher(longList);
final XLocaleMatcher matcherVeryLong = newXLocaleMatcher(veryLongList);
final LocaleMatcher matcherShortOld = new LocaleMatcher(shortList);
final LocaleMatcher matcherLongOld = new LocaleMatcher(longList);
final LocaleMatcher matcherVeryLongOld = new LocaleMatcher(veryLongList);
long timeShortNew=0;
long timeMediumNew=0;
long timeLongNew=0;
long timeShortOld=0;
long timeMediumOld=0;
long timeLongOld=0;
PerfCase[] pcs = new PerfCase[] {
// Exact match in all matchers.
new PerfCase("sv", "sv", "sv", "sv"),
// Common locale, exact match only in very long list.
new PerfCase("fr_CA", "en", "fr", "fr_CA"),
// Unusual locale, no exact match.
new PerfCase("de_CA", "en", "de", "de"),
// World English maps to several region partitions.
new PerfCase("en_001", "en", "en", "en"),
// Ancient language with interesting subtags.
new PerfCase("egy_Copt_CY", "en", "af", "af")
};
for (PerfCase pc : pcs) {
final ULocale desired = pc.desired;
assertEquals(desired.toString(), pc.expectedShort, matcherShort.getBestMatch(desired));
assertEquals(desired.toString(), pc.expectedLong, matcherLong.getBestMatch(desired));
assertEquals(desired.toString(), pc.expectedVeryLong, matcherVeryLong.getBestMatch(desired));
timeXLocaleMatcher(desired, matcherShort, WARM_UP_ITERATIONS);
timeXLocaleMatcher(desired, matcherLong, WARM_UP_ITERATIONS);
timeXLocaleMatcher(desired, matcherVeryLong, WARM_UP_ITERATIONS);
long tns = timeXLocaleMatcher(desired, matcherShort, BENCHMARK_ITERATIONS);
System.out.format("New Duration (few supported):\t%s\t%d\tnanos\n", desired, tns);
timeShortNew += tns;
long tnl = timeXLocaleMatcher(desired, matcherLong, BENCHMARK_ITERATIONS);
System.out.format("New Duration (med. supported):\t%s\t%d\tnanos\n", desired, tnl);
timeMediumNew += tnl;
long tnv = timeXLocaleMatcher(desired, matcherVeryLong, BENCHMARK_ITERATIONS);
System.out.format("New Duration (many supported):\t%s\t%d\tnanos\n", desired, tnv);
timeLongNew += tnv;
timeLocaleMatcher(desired, matcherShortOld, WARM_UP_ITERATIONS);
timeLocaleMatcher(desired, matcherLongOld, WARM_UP_ITERATIONS);
timeLocaleMatcher(desired, matcherVeryLongOld, WARM_UP_ITERATIONS);
long tos = timeLocaleMatcher(desired, matcherShortOld, BENCHMARK_ITERATIONS);
System.out.format("Old Duration (few supported):\t%s\t%d\tnanos new/old=%d%%\n",
desired, tos, (100 * tns) / tos);
timeShortOld += tos;
long tol = timeLocaleMatcher(desired, matcherLongOld, BENCHMARK_ITERATIONS);
System.out.format("Old Duration (med. supported):\t%s\t%d\tnanos new/old=%d%%\n",
desired, tol, (100 * tnl) / tol);
timeMediumOld += tol;
long tov = timeLocaleMatcher(desired, matcherVeryLongOld, BENCHMARK_ITERATIONS);
System.out.format("Old Duration (many supported):\t%s\t%d\tnanos new/old=%d%%\n",
desired, tov, (100 * tnv) / tov);
timeLongOld += tov;
}
assertTrue(
String.format("timeShortNew=%d < %d%% of timeShortOld=%d",
timeShortNew, AVG_PCT_MEDIUM_NEW_OLD, timeShortOld),
timeShortNew * 100 < timeShortOld * AVG_PCT_MEDIUM_NEW_OLD);
assertTrue(
String.format("timeMediumNew=%d < %d%% of timeMediumOld=%d",
timeMediumNew, AVG_PCT_MEDIUM_NEW_OLD, timeMediumOld),
timeMediumNew * 100 < timeMediumOld * AVG_PCT_MEDIUM_NEW_OLD);
assertTrue(
String.format("timeLongNew=%d < %d%% of timeLongOld=%d",
timeLongNew, AVG_PCT_LONG_NEW_OLD, timeLongOld),
timeLongNew * 100 < timeLongOld * AVG_PCT_LONG_NEW_OLD);
maximizePerf();
}
private static long timeXLocaleMatcher(ULocale desired, XLocaleMatcher matcher, int iterations) {
long start = System.nanoTime();
for (int i = iterations; i > 0; --i) {
matcher.getBestMatch(desired);
}
long delta = System.nanoTime() - start;
return (delta / iterations);
}
private static long timeLocaleMatcher(ULocale desired, LocaleMatcher matcher, int iterations) {
long start = System.nanoTime();
for (int i = iterations; i > 0; --i) {
matcher.getBestMatch(desired);
}
long delta = System.nanoTime() - start;
return (delta / iterations);
}
private void maximizePerf() {
final String tags = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
"el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
"hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
"mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
"si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
"zh-CN, zh-TW, zu";
LocalePriorityList list = LocalePriorityList.add(tags).build();
int few = 1000;
long t = timeMaximize(list, few); // warm up
t = timeMaximize(list, few); // measure for scale
long targetTime = 100000000L; // 10^8 ns = 0.1s
int iterations = (int)((targetTime * few) / t);
t = timeMaximize(list, iterations);
int length = 0;
for (@SuppressWarnings("unused") ULocale locale : list) { ++length; }
System.out.println("maximize: " + (t / iterations / length) + " ns/locale: " +
t + " ns / " + iterations + " iterations / " + length + " locales");
}
// returns total ns not per iteration
private static long timeMaximize(Iterable<ULocale> list, int iterations) {
long start = System.nanoTime();
for (int i = iterations; i > 0; --i) {
for (ULocale locale : list) {
XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
}
}
return System.nanoTime() - start;
}
private static final class TestCase implements Cloneable {
private static final String ENDL = System.getProperties().getProperty("line.separator");
int lineNr = 0;
String nameLine = "";
String supportedLine = "";
String defaultLine = "";
String distanceLine = "";
String thresholdLine = "";
String matchLine = "";
String supported = "";
String def = "";
String favor = "";
String threshold = "";
String desired = "";
String expMatch = "";
String expDesired = "";
String expCombined = "";
@Override
public TestCase clone() throws CloneNotSupportedException {
return (TestCase) super.clone();
}
void reset(String newNameLine) {
nameLine = newNameLine;
supportedLine = "";
defaultLine = "";
distanceLine = "";
thresholdLine = "";
supported = "";
def = "";
favor = "";
threshold = "";
}
String toInputsKey() {
return supported + '+' + def + '+' + favor + '+' + threshold + '+' + desired;
}
private static void appendLine(StringBuilder sb, String line) {
if (!line.isEmpty()) {
sb.append(ENDL).append(line);
}
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder(nameLine);
appendLine(sb, supportedLine);
appendLine(sb, defaultLine);
appendLine(sb, distanceLine);
appendLine(sb, thresholdLine);
sb.append(ENDL).append("line ").append(lineNr).append(':');
appendLine(sb, matchLine);
return sb.toString();
}
}
private static String getSuffixAfterPrefix(String s, int limit, String prefix) {
if (prefix.length() <= limit && s.startsWith(prefix)) {
return s.substring(prefix.length(), limit);
} else {
return null;
}
}
// UsedReflectively, not private to avoid unused-warning
static List<TestCase> readTestCases() throws Exception {
List<TestCase> tests = new ArrayList<>();
Map<String, Integer> uniqueTests = new HashMap<>();
TestCase test = new TestCase();
String filename = "data/localeMatcherTest.txt";
try (BufferedReader in = FileUtilities.openFile(XLocaleMatcherTest.class, filename)) {
String line;
while ((line = in.readLine()) != null) {
++test.lineNr;
// Start of comment, or end of line, minus trailing spaces.
int limit = line.indexOf('#');
if (limit < 0) {
limit = line.length();
}
char c;
while (limit > 0 && ((c = line.charAt(limit - 1)) == ' ' || c == '\t')) {
--limit;
}
if (limit == 0) { // empty line
continue;
}
String suffix;
if (line.startsWith("** test: ")) {
test.reset(line);
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@supported=")) != null) {
test.supportedLine = line;
test.supported = suffix;
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@default=")) != null) {
test.defaultLine = line;
test.def = suffix;
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@favor=")) != null) {
test.distanceLine = line;
test.favor = suffix;
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@threshold=")) != null) {
test.thresholdLine = line;
test.threshold = suffix;
} else {
int matchSep = line.indexOf(">>");
// >> before an inline comment, and followed by more than white space.
if (0 <= matchSep && (matchSep + 2) < limit) {
test.matchLine = line;
test.desired = line.substring(0, matchSep).trim();
test.expDesired = test.expCombined = "";
int start = matchSep + 2;
int expLimit = line.indexOf('|', start);
if (expLimit < 0) {
test.expMatch = line.substring(start, limit).trim();
} else {
test.expMatch = line.substring(start, expLimit).trim();
start = expLimit + 1;
expLimit = line.indexOf('|', start);
if (expLimit < 0) {
test.expDesired = line.substring(start, limit).trim();
} else {
test.expDesired = line.substring(start, expLimit).trim();
test.expCombined = line.substring(expLimit + 1, limit).trim();
}
}
String inputs = test.toInputsKey();
Integer prevIndex = uniqueTests.get(inputs);
if (prevIndex == null) {
uniqueTests.put(inputs, tests.size());
} else {
System.out.println("Locale matcher test case on line " + test.lineNr
+ " is a duplicate of line " + tests.get(prevIndex).lineNr);
}
tests.add(test.clone());
} else {
throw new IllegalArgumentException("test data syntax error on line "
+ test.lineNr + "\n" + line);
}
}
}
}
System.out.println("Number of duplicate locale matcher test cases: " + (tests.size() - uniqueTests.size()));
return tests;
}
private static ULocale getULocaleOrNull(String s) {
if (s.equals("null")) {
return null;
} else {
return new ULocale(s);
}
}
@Test
@Parameters(method = "readTestCases")
public void dataDriven(TestCase test) {
XLocaleMatcher matcher;
if (test.def.isEmpty() && test.favor.isEmpty() && test.threshold.isEmpty()) {
matcher = new XLocaleMatcher(test.supported);
} else {
XLocaleMatcher.Builder builder = XLocaleMatcher.builder();
builder.setSupportedLocales(test.supported);
if (!test.def.isEmpty()) {
builder.setDefaultULocale(new ULocale(test.def));
}
if (!test.favor.isEmpty()) {
FavorSubtag favor;
switch (test.favor) {
case "normal":
favor = FavorSubtag.LANGUAGE;
break;
case "script":
favor = FavorSubtag.SCRIPT;
break;
default:
throw new IllegalArgumentException("unsupported FavorSubtag value " + test.favor);
}
builder.setFavorSubtag(favor);
}
if (!test.threshold.isEmpty()) {
int threshold = Integer.valueOf(test.threshold);
builder.internalSetThresholdDistance(threshold);
}
matcher = builder.build();
}
ULocale expMatch = getULocaleOrNull(test.expMatch);
if (test.expDesired.isEmpty() && test.expCombined.isEmpty()) {
ULocale bestSupported = matcher.getBestMatch(test.desired);
assertEquals("bestSupported", expMatch, bestSupported);
} else {
LocalePriorityList desired = LocalePriorityList.add(test.desired).build();
XLocaleMatcher.Result result = matcher.getBestMatchResult(desired);
assertEquals("bestSupported", expMatch, result.getSupportedULocale());
if (!test.expDesired.isEmpty()) {
ULocale expDesired = getULocaleOrNull(test.expDesired);
assertEquals("bestDesired", expDesired, result.getDesiredULocale());
}
if (!test.expCombined.isEmpty()) {
ULocale expCombined = getULocaleOrNull(test.expCombined);
ULocale combined = result.makeServiceULocale();
assertEquals("combined", expCombined, combined);
}
}
}
}

View file

@ -3,10 +3,8 @@
<classpathentry kind="src" path="src"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/>
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-core"/>
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-translit-tests"/>
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-translit"/>
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-collate"/>
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-test-framework"/>
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-core-tests"/>
<classpathentry kind="output" path="out/bin"/>
</classpath>

View file

@ -3,10 +3,6 @@
<name>icu4j-tools</name>
<comment></comment>
<projects>
<project>icu4j-core</project>
<project>icu4j-core-tests</project>
<project>icu4j-shared</project>
<project>icu4j-test-framework</project>
</projects>
<buildSpec>
<buildCommand>

View file

@ -1,6 +1,6 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.locale;
package com.ibm.icu.dev.tool.locale;
import java.nio.ByteBuffer;
import java.util.Collection;
@ -14,10 +14,11 @@ import java.util.TreeMap;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.UResource;
import com.ibm.icu.impl.locale.LSR;
import com.ibm.icu.impl.locale.XCldrStub.HashMultimap;
import com.ibm.icu.impl.locale.XCldrStub.Multimap;
import com.ibm.icu.impl.locale.XCldrStub.Multimaps;
import com.ibm.icu.util.BytesTrie;
import com.ibm.icu.impl.locale.XLikelySubtags;
import com.ibm.icu.util.BytesTrieBuilder;
import com.ibm.icu.util.ICUException;
@ -25,7 +26,7 @@ import com.ibm.icu.util.ICUException;
* Builds data for XLikelySubtags.
* Reads source data from ICU resource bundles.
*/
class LikelySubtagsBuilder {
public class LikelySubtagsBuilder {
private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
private static ICUResourceBundle getSupplementalDataBundle(String name) {
@ -50,7 +51,7 @@ class LikelySubtagsBuilder {
UResource.Key key = new UResource.Key();
for (int i = 0; aliases.getKeyAndValue(i, key, value); ++i) {
String aliasFrom = key.toString();
if (aliasFrom.contains("_")) {
if (aliasFrom.contains("_") || aliasFrom.contains("-")) {
continue; // only simple aliasing
}
UResource.Table table = value.getTable();
@ -113,7 +114,7 @@ class LikelySubtagsBuilder {
}
}
BytesTrie build() {
byte[] build() {
ByteBuffer buffer = tb.buildByteBuffer(BytesTrieBuilder.Option.SMALL);
// Allocate an array with just the necessary capacity,
// so that we do not hold on to a larger array for a long time.
@ -122,11 +123,12 @@ class LikelySubtagsBuilder {
if (DEBUG_OUTPUT) {
System.out.println("likely subtags trie size: " + bytes.length + " bytes");
}
return new BytesTrie(bytes, 0);
return bytes;
}
}
static XLikelySubtags.Data build() {
// VisibleForTesting
public static XLikelySubtags.Data build() {
AliasesBuilder languageAliasesBuilder = new AliasesBuilder("language");
AliasesBuilder regionAliasesBuilder = new AliasesBuilder("territory");
@ -202,7 +204,7 @@ class LikelySubtagsBuilder {
}
}
}
BytesTrie trie = trieBuilder.build();
byte[] trie = trieBuilder.build();
LSR[] lsrs = lsrIndexes.keySet().toArray(new LSR[lsrIndexes.size()]);
return new XLikelySubtags.Data(
languageAliasesBuilder.toCanonical, regionAliasesBuilder.toCanonical, trie, lsrs);

View file

@ -1,8 +1,15 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.locale;
package com.ibm.icu.dev.tool.locale;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@ -17,11 +24,13 @@ import java.util.TreeSet;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.UResource;
import com.ibm.icu.impl.locale.LSR;
import com.ibm.icu.impl.locale.LocaleDistance;
import com.ibm.icu.impl.locale.XCldrStub.Multimap;
import com.ibm.icu.impl.locale.XCldrStub.Predicate;
import com.ibm.icu.impl.locale.XCldrStub.Splitter;
import com.ibm.icu.impl.locale.XCldrStub.TreeMultimap;
import com.ibm.icu.util.BytesTrie;
import com.ibm.icu.impl.locale.XLikelySubtags;
import com.ibm.icu.util.BytesTrieBuilder;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;
@ -153,7 +162,7 @@ public final class LocaleDistanceBuilder {
}
}
BytesTrie build() {
byte[] build() {
ByteBuffer buffer = tb.buildByteBuffer(BytesTrieBuilder.Option.SMALL);
// Allocate an array with just the necessary capacity,
// so that we do not hold on to a larger array for a long time.
@ -162,7 +171,7 @@ public final class LocaleDistanceBuilder {
if (DEBUG_OUTPUT) {
System.out.println("distance trie size: " + bytes.length + " bytes");
}
return new BytesTrie(bytes, 0);
return bytes;
}
}
@ -468,7 +477,8 @@ public final class LocaleDistanceBuilder {
return result;
}
static LocaleDistance build() {
// VisibleForTesting
public static LocaleDistance.Data build() {
// From CLDR supplementalData/languageMatching/languageMatches type="written_new"/
// and then paradigmLocales, matchVariable, and the last languageMatch items.
ICUResourceBundle supplementalData = getSupplementalDataBundle("supplementalData");
@ -591,8 +601,8 @@ public final class LocaleDistanceBuilder {
TrieBuilder trieBuilder = new TrieBuilder();
defaultDistanceTable.toTrie(trieBuilder);
BytesTrie trie = trieBuilder.build();
return new LocaleDistance(
byte[] trie = trieBuilder.build();
return new LocaleDistance.Data(
trie, rmb.regionToPartitionsIndex, rmb.partitionArrays,
paradigmLSRs, distances);
}
@ -845,4 +855,112 @@ public final class LocaleDistanceBuilder {
}
}
}
private static final String TXT_PATH = "/tmp";
private static final String TXT_FILE_BASE_NAME = "langInfo";
private static final String TXT_FILE_NAME = TXT_FILE_BASE_NAME + ".txt";
private static PrintWriter openWriter() throws IOException {
File file = new File(TXT_PATH, TXT_FILE_NAME);
return new PrintWriter(
new BufferedWriter(
new OutputStreamWriter(
new FileOutputStream(file), StandardCharsets.UTF_8), 4096));
}
private static void printManyHexBytes(PrintWriter out, byte[] bytes) {
for (int i = 0;; ++i) {
if (i == bytes.length) {
out.println();
break;
}
if (i != 0 && (i & 0xf) == 0) {
out.println();
}
out.format("%02x", bytes[i] & 0xff);
}
}
public static final void main(String[] args) throws IOException {
XLikelySubtags.Data likelyData = LikelySubtagsBuilder.build();
LocaleDistance.Data distanceData = build();
System.out.println("Writing LocaleDistance.Data to " + TXT_PATH + '/' + TXT_FILE_NAME);
try (PrintWriter out = openWriter()) {
out.println("// © 2019 and later: Unicode, Inc. and others.\n" +
"// License & terms of use: http://www.unicode.org/copyright.html#License\n" +
"// Generated by ICU4J LocaleDistanceBuilder.\n" +
TXT_FILE_BASE_NAME + ":table(nofallback){");
out.println(" likely{");
out.println(" languageAliases{ // " + likelyData.languageAliases.size());
for (Map.Entry<String, String> entry :
new TreeMap<>(likelyData.languageAliases).entrySet()) {
out.println(" \"" + entry.getKey() + "\",\"" + entry.getValue() + "\",");
}
out.println(" } // languageAliases");
out.println(" regionAliases{ // " + likelyData.regionAliases.size());
for (Map.Entry<String, String> entry :
new TreeMap<>(likelyData.regionAliases).entrySet()) {
out.println(" \"" + entry.getKey() + "\",\"" + entry.getValue() + "\",");
}
out.println(" } // regionAliases");
out.println(" trie:bin{ // BytesTrie: " + likelyData.trie.length + " bytes");
printManyHexBytes(out, likelyData.trie);
out.println(" } // trie");
out.println(" lsrs{ // " + likelyData.lsrs.length);
for (LSR lsr : likelyData.lsrs) {
out.println(" \"" + lsr.language + "\",\"" +
lsr.script + "\",\"" + lsr.region + "\",");
}
out.println(" } // lsrs");
out.println(" } // likely");
out.println(" match{");
out.println(" trie:bin{ // BytesTrie: " + distanceData.trie.length + " bytes");
printManyHexBytes(out, distanceData.trie);
out.println(" } // trie");
out.println(" regionToPartitions:bin{ // " +
distanceData.regionToPartitionsIndex.length + " bytes");
printManyHexBytes(out, distanceData.regionToPartitionsIndex);
out.println(" } // regionToPartitions");
out.print(" partitions{");
boolean first = true;
for (String p : distanceData.partitionArrays) {
if (first) {
first = false;
} else {
out.append(',');
}
out.append('"').print(p);
out.append('"');
}
out.println("}");
out.println(" paradigms{");
for (LSR lsr : distanceData.paradigmLSRs) {
out.println(" \"" + lsr.language + "\",\"" +
lsr.script + "\",\"" + lsr.region + "\",");
}
out.println(" }");
out.print(" distances:intvector{");
first = true;
for (int d : distanceData.distances) {
if (first) {
first = false;
} else {
out.append(',');
}
out.print(d);
}
out.println("}");
out.println(" } // match");
out.println("}");
}
}
}