mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 17:01:16 +00:00
ICU-20467 replace the LocaleMatcher implementation, load data from new bundle
- remove the old LocaleMatcher implementation code - move the XLocaleMatcher code into LocaleMatcher, same for test - remove unused internal methods - stop comparing old vs. new performance - generate langInfo.txt resource bundle file with precomputed likely-subtags and matcher data - make genrb handle multi-line binary values - load likely-subtags & distance data from new langInfo.res bundle - test that built data == loaded data - move data builders to tools, no more runtime dependency on builder code
This commit is contained in:
parent
93fde1c459
commit
61c4a728cd
23 changed files with 4475 additions and 2775 deletions
|
@ -191,7 +191,7 @@ summarizes the ICU data files and their corresponding features and categories:
|
|||
| Confusables | `"confusables"` | unidata/confusables\*.txt | 45 KiB |
|
||||
| Currencies | `"misc"` <br/> `"curr_supplemental"` <br/> `"curr_tree"` | misc/currencyNumericCodes.txt <br/> curr/supplementalData.txt <br/> curr/\*.txt | 3.1 KiB <br/> 27 KiB <br/> **2.5 MiB** |
|
||||
| Language Display <br/> Names | `"lang_tree"` | lang/\*.txt | **2.1 MiB** |
|
||||
| Language Tags | `"misc"` | misc/keyTypeData.txt <br/> misc/likelySubtags.txt <br/> misc/metadata.txt | 6.8 KiB <br/> 53 KiB <br/> 33 KiB |
|
||||
| Language Tags | `"misc"` | misc/keyTypeData.txt <br/> misc/langInfo.txt <br/> misc/likelySubtags.txt <br/> misc/metadata.txt | 6.8 KiB <br/> 37 KiB <br/> 53 KiB <br/> 33 KiB |
|
||||
| Normalization | `"normalization"` | in/\*.nrm except in/nfc.nrm | 160 KiB |
|
||||
| Plural Rules | `"misc"` | misc/pluralRanges.txt <br/> misc/plurals.txt | 3.3 KiB <br/> 33 KiB |
|
||||
| Region Display <br/> Names | `"region_tree"` | region/\*.txt | **1.1 MiB** |
|
||||
|
|
2614
icu4c/source/data/misc/langInfo.txt
Normal file
2614
icu4c/source/data/misc/langInfo.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -205,10 +205,10 @@ main(int argc,
|
|||
"\t-c or --copyright include copyright notice\n");
|
||||
fprintf(stderr,
|
||||
"\t-e or --encoding encoding of source files\n"
|
||||
"\t-d of --destdir destination directory, followed by the path, defaults to %s\n"
|
||||
"\t-s or --sourcedir source directory for files followed by path, defaults to %s\n"
|
||||
"\t-d or --destdir destination directory, followed by the path, defaults to '%s'\n"
|
||||
"\t-s or --sourcedir source directory for files followed by path, defaults to '%s'\n"
|
||||
"\t-i or --icudatadir directory for locating any needed intermediate data files,\n"
|
||||
"\t followed by path, defaults to %s\n",
|
||||
"\t followed by path, defaults to '%s'\n",
|
||||
u_getDataDirectory(), u_getDataDirectory(), u_getDataDirectory());
|
||||
fprintf(stderr,
|
||||
"\t-j or --write-java write a Java ListResourceBundle for ICU4J, followed by optional encoding\n"
|
||||
|
|
|
@ -274,11 +274,11 @@ expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenV
|
|||
}
|
||||
}
|
||||
|
||||
static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
|
||||
static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment,
|
||||
int32_t &stringLength, UErrorCode *status)
|
||||
{
|
||||
struct UString *tokenValue;
|
||||
char *result;
|
||||
uint32_t count;
|
||||
|
||||
expect(state, TOK_STRING, &tokenValue, comment, line, status);
|
||||
|
||||
|
@ -287,14 +287,13 @@ static char *getInvariantString(ParseState* state, uint32_t *line, struct UStrin
|
|||
return NULL;
|
||||
}
|
||||
|
||||
count = u_strlen(tokenValue->fChars);
|
||||
if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
|
||||
if(!uprv_isInvariantUString(tokenValue->fChars, tokenValue->fLength)) {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
error(*line, "invariant characters required for table keys, binary data, etc.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
result = static_cast<char *>(uprv_malloc(count+1));
|
||||
result = static_cast<char *>(uprv_malloc(tokenValue->fLength+1));
|
||||
|
||||
if (result == NULL)
|
||||
{
|
||||
|
@ -302,7 +301,8 @@ static char *getInvariantString(ParseState* state, uint32_t *line, struct UStrin
|
|||
return NULL;
|
||||
}
|
||||
|
||||
u_UCharsToChars(tokenValue->fChars, result, count+1);
|
||||
u_UCharsToChars(tokenValue->fChars, result, tokenValue->fLength+1);
|
||||
stringLength = tokenValue->fLength;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -1371,7 +1371,6 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US
|
|||
int32_t value;
|
||||
UBool readToken = FALSE;
|
||||
char *stopstring;
|
||||
uint32_t len;
|
||||
struct UString memberComments;
|
||||
|
||||
IntVectorResource *result = intvector_open(state->bundle, tag, comment, status);
|
||||
|
@ -1404,7 +1403,8 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US
|
|||
return result;
|
||||
}
|
||||
|
||||
string = getInvariantString(state, NULL, NULL, status);
|
||||
int32_t stringLength;
|
||||
string = getInvariantString(state, NULL, NULL, stringLength, status);
|
||||
|
||||
if (U_FAILURE(*status))
|
||||
{
|
||||
|
@ -1414,9 +1414,9 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US
|
|||
|
||||
/* For handling illegal char in the Intvector */
|
||||
value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
|
||||
len=(uint32_t)(stopstring-string);
|
||||
int32_t len = (int32_t)(stopstring-string);
|
||||
|
||||
if(len==uprv_strlen(string))
|
||||
if(len==stringLength)
|
||||
{
|
||||
result->add(value, *status);
|
||||
uprv_free(string);
|
||||
|
@ -1454,7 +1454,8 @@ static struct SResource *
|
|||
parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
|
||||
{
|
||||
uint32_t line;
|
||||
LocalMemory<char> string(getInvariantString(state, &line, NULL, status));
|
||||
int32_t stringLength;
|
||||
LocalMemory<char> string(getInvariantString(state, &line, NULL, stringLength, status));
|
||||
if (string.isNull() || U_FAILURE(*status))
|
||||
{
|
||||
return NULL;
|
||||
|
@ -1470,46 +1471,45 @@ parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UStri
|
|||
printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
|
||||
}
|
||||
|
||||
uint32_t count = (uint32_t)uprv_strlen(string.getAlias());
|
||||
if (count > 0){
|
||||
if((count % 2)==0){
|
||||
LocalMemory<uint8_t> value;
|
||||
if (value.allocateInsteadAndCopy(count) == NULL)
|
||||
{
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
LocalMemory<uint8_t> value;
|
||||
int32_t count = 0;
|
||||
if (stringLength > 0 && value.allocateInsteadAndCopy(stringLength) == NULL)
|
||||
{
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char toConv[3] = {'\0', '\0', '\0'};
|
||||
for (uint32_t i = 0; i < count; i += 2)
|
||||
{
|
||||
toConv[0] = string[i];
|
||||
toConv[1] = string[i + 1];
|
||||
|
||||
char *stopstring;
|
||||
value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
|
||||
uint32_t len=(uint32_t)(stopstring-toConv);
|
||||
|
||||
if(len!=2)
|
||||
{
|
||||
*status=U_INVALID_CHAR_FOUND;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return bin_open(state->bundle, tag, count >> 1, value.getAlias(), NULL, comment, status);
|
||||
char toConv[3] = {'\0', '\0', '\0'};
|
||||
for (int32_t i = 0; i < stringLength;)
|
||||
{
|
||||
// Skip spaces (which may have been line endings).
|
||||
char c0 = string[i++];
|
||||
if (c0 == ' ') { continue; }
|
||||
if (i == stringLength) {
|
||||
*status=U_INVALID_CHAR_FOUND;
|
||||
error(line, "Encountered invalid binary value (odd number of hex digits)");
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
toConv[0] = c0;
|
||||
toConv[1] = string[i++];
|
||||
|
||||
char *stopstring;
|
||||
value[count++] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
|
||||
uint32_t len=(uint32_t)(stopstring-toConv);
|
||||
|
||||
if(len!=2)
|
||||
{
|
||||
*status = U_INVALID_CHAR_FOUND;
|
||||
error(line, "Encountered invalid binary value (length is odd)");
|
||||
*status=U_INVALID_CHAR_FOUND;
|
||||
error(line, "Encountered invalid binary value (not all pairs of hex digits)");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
if (count == 0) {
|
||||
warning(startline, "Encountered empty binary value");
|
||||
return bin_open(state->bundle, tag, 0, NULL, "", comment, status);
|
||||
} else {
|
||||
return bin_open(state->bundle, tag, count, value.getAlias(), NULL, comment, status);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1520,9 +1520,9 @@ parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UStr
|
|||
int32_t value;
|
||||
char *string;
|
||||
char *stopstring;
|
||||
uint32_t len;
|
||||
|
||||
string = getInvariantString(state, NULL, NULL, status);
|
||||
int32_t stringLength;
|
||||
string = getInvariantString(state, NULL, NULL, stringLength, status);
|
||||
|
||||
if (string == NULL || U_FAILURE(*status))
|
||||
{
|
||||
|
@ -1541,7 +1541,7 @@ parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UStr
|
|||
printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
|
||||
}
|
||||
|
||||
if (uprv_strlen(string) <= 0)
|
||||
if (stringLength == 0)
|
||||
{
|
||||
warning(startline, "Encountered empty integer. Default value is 0.");
|
||||
}
|
||||
|
@ -1549,8 +1549,8 @@ parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UStr
|
|||
/* Allow integer support for hexdecimal, octal digit and decimal*/
|
||||
/* and handle illegal char in the integer*/
|
||||
value = uprv_strtoul(string, &stopstring, 0);
|
||||
len=(uint32_t)(stopstring-string);
|
||||
if(len==uprv_strlen(string))
|
||||
int32_t len = (int32_t)(stopstring-string);
|
||||
if(len==stringLength)
|
||||
{
|
||||
result = int_open(state->bundle, tag, value, comment, status);
|
||||
}
|
||||
|
@ -1567,7 +1567,8 @@ static struct SResource *
|
|||
parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
|
||||
{
|
||||
uint32_t line;
|
||||
LocalMemory<char> filename(getInvariantString(state, &line, NULL, status));
|
||||
int32_t stringLength;
|
||||
LocalMemory<char> filename(getInvariantString(state, &line, NULL, stringLength, status));
|
||||
if (U_FAILURE(*status))
|
||||
{
|
||||
return NULL;
|
||||
|
@ -1628,12 +1629,11 @@ parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UStr
|
|||
|
||||
UCHARBUF *ucbuf;
|
||||
char *fullname = NULL;
|
||||
int32_t count = 0;
|
||||
const char* cp = NULL;
|
||||
const UChar* uBuffer = NULL;
|
||||
|
||||
filename = getInvariantString(state, &line, NULL, status);
|
||||
count = (int32_t)uprv_strlen(filename);
|
||||
int32_t stringLength;
|
||||
filename = getInvariantString(state, &line, NULL, stringLength, status);
|
||||
|
||||
if (U_FAILURE(*status))
|
||||
{
|
||||
|
@ -1652,7 +1652,7 @@ parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UStr
|
|||
printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
|
||||
}
|
||||
|
||||
fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
|
||||
fullname = (char *) uprv_malloc(state->inputdirLength + stringLength + 2);
|
||||
/* test for NULL */
|
||||
if(fullname == NULL)
|
||||
{
|
||||
|
|
|
@ -368,6 +368,7 @@
|
|||
<pathelement location="${icu4j.regiondata.jar}"/>
|
||||
<pathelement location="${icu4j.translit.jar}"/>
|
||||
<pathelement location="${icu4j.test-framework.jar}"/>
|
||||
<pathelement location="${icu4j.tools.jar}"/>
|
||||
<pathelement location="${icu4j.core-tests.jar}"/>
|
||||
<pathelement location="${icu4j.collate-tests.jar}"/>
|
||||
<pathelement location="${icu4j.charset-tests.jar}"/>
|
||||
|
@ -1201,7 +1202,7 @@
|
|||
</ant>
|
||||
</target>
|
||||
|
||||
<target name="core-tests" depends="core, test-framework" description="Build core tests">
|
||||
<target name="core-tests" depends="core, test-framework, tools" description="Build core tests">
|
||||
<ant dir="${icu4j.core-tests.dir}" inheritAll="false">
|
||||
<reference refid="junit.jars"/>
|
||||
</ant>
|
||||
|
@ -1249,7 +1250,7 @@
|
|||
<ant dir="${icu4j.build-tools.dir}" inheritAll="false"/>
|
||||
</target>
|
||||
|
||||
<target name="tools" depends="core, core-tests, collate, translit, translit-tests" description="Build tool classes">
|
||||
<target name="tools" depends="core, collate, translit" description="Build tool classes">
|
||||
<ant dir="${icu4j.tools.dir}" inheritAll="false"/>
|
||||
</target>
|
||||
|
||||
|
|
|
@ -4,18 +4,18 @@ package com.ibm.icu.impl.locale;
|
|||
|
||||
import java.util.Objects;
|
||||
|
||||
final class LSR {
|
||||
static final int REGION_INDEX_LIMIT = 1001 + 26 * 26;
|
||||
public final class LSR {
|
||||
public static final int REGION_INDEX_LIMIT = 1001 + 26 * 26;
|
||||
|
||||
static final boolean DEBUG_OUTPUT = false;
|
||||
public static final boolean DEBUG_OUTPUT = false;
|
||||
|
||||
final String language;
|
||||
final String script;
|
||||
final String region;
|
||||
public final String language;
|
||||
public final String script;
|
||||
public final String region;
|
||||
/** Index for region, negative if ill-formed. @see indexForRegion */
|
||||
final int regionIndex;
|
||||
|
||||
LSR(String language, String script, String region) {
|
||||
public LSR(String language, String script, String region) {
|
||||
this.language = language;
|
||||
this.script = script;
|
||||
this.region = region;
|
||||
|
@ -27,7 +27,7 @@ final class LSR {
|
|||
* Do not rely on a particular region->index mapping; it may change.
|
||||
* Returns 0 for ill-formed strings.
|
||||
*/
|
||||
static final int indexForRegion(String region) {
|
||||
public static final int indexForRegion(String region) {
|
||||
if (region.length() == 2) {
|
||||
int a = region.charAt(0) - 'A';
|
||||
if (a < 0 || 25 < a) { return 0; }
|
||||
|
|
|
@ -2,12 +2,20 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.locale;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.MissingResourceException;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
|
||||
import com.ibm.icu.impl.ICUData;
|
||||
import com.ibm.icu.impl.ICUResourceBundle;
|
||||
import com.ibm.icu.impl.UResource;
|
||||
import com.ibm.icu.util.BytesTrie;
|
||||
import com.ibm.icu.util.LocaleMatcher.FavorSubtag;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
/**
|
||||
|
@ -16,17 +24,17 @@ import com.ibm.icu.util.ULocale;
|
|||
*/
|
||||
public class LocaleDistance {
|
||||
/** Distance value bit flag, set by the builder. */
|
||||
static final int DISTANCE_SKIP_SCRIPT = 0x80;
|
||||
public static final int DISTANCE_SKIP_SCRIPT = 0x80;
|
||||
/** Distance value bit flag, set by trieNext(). */
|
||||
private static final int DISTANCE_IS_FINAL = 0x100;
|
||||
private static final int DISTANCE_IS_FINAL_OR_SKIP_SCRIPT =
|
||||
DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT;
|
||||
// Indexes into array of distances.
|
||||
static final int IX_DEF_LANG_DISTANCE = 0;
|
||||
static final int IX_DEF_SCRIPT_DISTANCE = 1;
|
||||
static final int IX_DEF_REGION_DISTANCE = 2;
|
||||
static final int IX_MIN_REGION_DISTANCE = 3;
|
||||
static final int IX_LIMIT = 4;
|
||||
public static final int IX_DEF_LANG_DISTANCE = 0;
|
||||
public static final int IX_DEF_SCRIPT_DISTANCE = 1;
|
||||
public static final int IX_DEF_REGION_DISTANCE = 2;
|
||||
public static final int IX_MIN_REGION_DISTANCE = 3;
|
||||
public static final int IX_LIMIT = 4;
|
||||
private static final int ABOVE_THRESHOLD = 100;
|
||||
|
||||
private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
|
||||
|
@ -54,22 +62,100 @@ public class LocaleDistance {
|
|||
private final int minRegionDistance;
|
||||
private final int defaultDemotionPerDesiredLocale;
|
||||
|
||||
// TODO: Load prebuilt data from a resource bundle
|
||||
// to avoid the dependency on the builder code.
|
||||
// VisibleForTesting
|
||||
public static final LocaleDistance INSTANCE = LocaleDistanceBuilder.build();
|
||||
public static final class Data {
|
||||
public byte[] trie;
|
||||
public byte[] regionToPartitionsIndex;
|
||||
public String[] partitionArrays;
|
||||
public Set<LSR> paradigmLSRs;
|
||||
public int[] distances;
|
||||
|
||||
LocaleDistance(BytesTrie trie,
|
||||
byte[] regionToPartitionsIndex, String[] partitionArrays,
|
||||
Set<LSR> paradigmLSRs, int[] distances) {
|
||||
this.trie = trie;
|
||||
this.regionToPartitionsIndex = regionToPartitionsIndex;
|
||||
this.partitionArrays = partitionArrays;
|
||||
this.paradigmLSRs = paradigmLSRs;
|
||||
defaultLanguageDistance = distances[IX_DEF_LANG_DISTANCE];
|
||||
defaultScriptDistance = distances[IX_DEF_SCRIPT_DISTANCE];
|
||||
defaultRegionDistance = distances[IX_DEF_REGION_DISTANCE];
|
||||
this.minRegionDistance = distances[IX_MIN_REGION_DISTANCE];
|
||||
public Data(byte[] trie,
|
||||
byte[] regionToPartitionsIndex, String[] partitionArrays,
|
||||
Set<LSR> paradigmLSRs, int[] distances) {
|
||||
this.trie = trie;
|
||||
this.regionToPartitionsIndex = regionToPartitionsIndex;
|
||||
this.partitionArrays = partitionArrays;
|
||||
this.paradigmLSRs = paradigmLSRs;
|
||||
this.distances = distances;
|
||||
}
|
||||
|
||||
private static UResource.Value getValue(UResource.Table table,
|
||||
String key, UResource.Value value) {
|
||||
if (!table.findValue(key, value)) {
|
||||
throw new MissingResourceException(
|
||||
"langInfo.res missing data", "", "match/" + key);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
// VisibleForTesting
|
||||
public static Data load() throws MissingResourceException {
|
||||
ICUResourceBundle langInfo = ICUResourceBundle.getBundleInstance(
|
||||
ICUData.ICU_BASE_NAME, "langInfo",
|
||||
ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
|
||||
UResource.Value value = langInfo.getValueWithFallback("match");
|
||||
UResource.Table matchTable = value.getTable();
|
||||
|
||||
ByteBuffer buffer = getValue(matchTable, "trie", value).getBinary();
|
||||
byte[] trie = new byte[buffer.remaining()];
|
||||
buffer.get(trie);
|
||||
|
||||
buffer = getValue(matchTable, "regionToPartitions", value).getBinary();
|
||||
byte[] regionToPartitions = new byte[buffer.remaining()];
|
||||
buffer.get(regionToPartitions);
|
||||
if (regionToPartitions.length < LSR.REGION_INDEX_LIMIT) {
|
||||
throw new MissingResourceException(
|
||||
"langInfo.res binary data too short", "", "match/regionToPartitions");
|
||||
}
|
||||
|
||||
String[] partitions = getValue(matchTable, "partitions", value).getStringArray();
|
||||
|
||||
Set<LSR> paradigmLSRs;
|
||||
if (matchTable.findValue("paradigms", value)) {
|
||||
String[] paradigms = value.getStringArray();
|
||||
paradigmLSRs = new HashSet<>(paradigms.length / 3);
|
||||
for (int i = 0; i < paradigms.length; i += 3) {
|
||||
paradigmLSRs.add(new LSR(paradigms[i], paradigms[i + 1], paradigms[i + 2]));
|
||||
}
|
||||
} else {
|
||||
paradigmLSRs = Collections.emptySet();
|
||||
}
|
||||
|
||||
int[] distances = getValue(matchTable, "distances", value).getIntVector();
|
||||
if (distances.length < IX_LIMIT) {
|
||||
throw new MissingResourceException(
|
||||
"langInfo.res intvector too short", "", "match/distances");
|
||||
}
|
||||
|
||||
return new Data(trie, regionToPartitions, partitions, paradigmLSRs, distances);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (this == other) { return true; }
|
||||
if (!getClass().equals(other.getClass())) { return false; }
|
||||
Data od = (Data)other;
|
||||
return Arrays.equals(trie, od.trie) &&
|
||||
Arrays.equals(regionToPartitionsIndex, od.regionToPartitionsIndex) &&
|
||||
Arrays.equals(partitionArrays, od.partitionArrays) &&
|
||||
paradigmLSRs.equals(od.paradigmLSRs) &&
|
||||
Arrays.equals(distances, od.distances);
|
||||
}
|
||||
}
|
||||
|
||||
// VisibleForTesting
|
||||
public static final LocaleDistance INSTANCE = new LocaleDistance(Data.load());
|
||||
|
||||
private LocaleDistance(Data data) {
|
||||
this.trie = new BytesTrie(data.trie, 0);
|
||||
this.regionToPartitionsIndex = data.regionToPartitionsIndex;
|
||||
this.partitionArrays = data.partitionArrays;
|
||||
this.paradigmLSRs = data.paradigmLSRs;
|
||||
defaultLanguageDistance = data.distances[IX_DEF_LANG_DISTANCE];
|
||||
defaultScriptDistance = data.distances[IX_DEF_SCRIPT_DISTANCE];
|
||||
defaultRegionDistance = data.distances[IX_DEF_REGION_DISTANCE];
|
||||
this.minRegionDistance = data.distances[IX_MIN_REGION_DISTANCE];
|
||||
|
||||
LSR en = new LSR("en", "Latn", "US");
|
||||
LSR enGB = new LSR("en", "Latn", "GB");
|
||||
|
@ -102,7 +188,7 @@ public class LocaleDistance {
|
|||
* (negative if none has a distance below the threshold),
|
||||
* and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
|
||||
*/
|
||||
int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs,
|
||||
public int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs,
|
||||
int threshold, FavorSubtag favorSubtag) {
|
||||
BytesTrie iter = new BytesTrie(trie);
|
||||
// Look up the desired language only once for all supported LSRs.
|
||||
|
@ -335,7 +421,7 @@ public class LocaleDistance {
|
|||
return partitionArrays[pIndex];
|
||||
}
|
||||
|
||||
boolean isParadigmLSR(LSR lsr) {
|
||||
public boolean isParadigmLSR(LSR lsr) {
|
||||
return paradigmLSRs.contains(lsr);
|
||||
}
|
||||
|
||||
|
@ -348,7 +434,7 @@ public class LocaleDistance {
|
|||
return defaultRegionDistance;
|
||||
}
|
||||
|
||||
int getDefaultDemotionPerDesiredLocale() {
|
||||
public int getDefaultDemotionPerDesiredLocale() {
|
||||
return defaultDemotionPerDesiredLocale;
|
||||
}
|
||||
|
||||
|
|
|
@ -2,10 +2,18 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.locale;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.MissingResourceException;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import com.ibm.icu.impl.ICUData;
|
||||
import com.ibm.icu.impl.ICUResourceBundle;
|
||||
import com.ibm.icu.impl.UResource;
|
||||
import com.ibm.icu.util.BytesTrie;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
|
@ -14,30 +22,93 @@ public final class XLikelySubtags {
|
|||
private static final String PSEUDO_BIDI_PREFIX = "+"; // -XB, -PSBIDI
|
||||
private static final String PSEUDO_CRACKED_PREFIX = ","; // -XC, -PSCRACK
|
||||
|
||||
static final int SKIP_SCRIPT = 1;
|
||||
public static final int SKIP_SCRIPT = 1;
|
||||
|
||||
private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
|
||||
|
||||
// TODO: Load prebuilt data from a resource bundle
|
||||
// to avoid the dependency on the builder code.
|
||||
// VisibleForTesting
|
||||
public static final XLikelySubtags INSTANCE = new XLikelySubtags(LikelySubtagsBuilder.build());
|
||||
public static final class Data {
|
||||
public final Map<String, String> languageAliases;
|
||||
public final Map<String, String> regionAliases;
|
||||
public final byte[] trie;
|
||||
public final LSR[] lsrs;
|
||||
|
||||
static final class Data {
|
||||
private final Map<String, String> languageAliases;
|
||||
private final Map<String, String> regionAliases;
|
||||
private final BytesTrie trie;
|
||||
private final LSR[] lsrs;
|
||||
|
||||
Data(Map<String, String> languageAliases, Map<String, String> regionAliases,
|
||||
BytesTrie trie, LSR[] lsrs) {
|
||||
public Data(Map<String, String> languageAliases, Map<String, String> regionAliases,
|
||||
byte[] trie, LSR[] lsrs) {
|
||||
this.languageAliases = languageAliases;
|
||||
this.regionAliases = regionAliases;
|
||||
this.trie = trie;
|
||||
this.lsrs = lsrs;
|
||||
}
|
||||
|
||||
private static UResource.Value getValue(UResource.Table table,
|
||||
String key, UResource.Value value) {
|
||||
if (!table.findValue(key, value)) {
|
||||
throw new MissingResourceException(
|
||||
"langInfo.res missing data", "", "likely/" + key);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
// VisibleForTesting
|
||||
public static Data load() throws MissingResourceException {
|
||||
ICUResourceBundle langInfo = ICUResourceBundle.getBundleInstance(
|
||||
ICUData.ICU_BASE_NAME, "langInfo",
|
||||
ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
|
||||
UResource.Value value = langInfo.getValueWithFallback("likely");
|
||||
UResource.Table likelyTable = value.getTable();
|
||||
|
||||
Map<String, String> languageAliases;
|
||||
if (likelyTable.findValue("languageAliases", value)) {
|
||||
String[] pairs = value.getStringArray();
|
||||
languageAliases = new HashMap<>(pairs.length / 2);
|
||||
for (int i = 0; i < pairs.length; i += 2) {
|
||||
languageAliases.put(pairs[i], pairs[i + 1]);
|
||||
}
|
||||
} else {
|
||||
languageAliases = Collections.emptyMap();
|
||||
}
|
||||
|
||||
Map<String, String> regionAliases;
|
||||
if (likelyTable.findValue("regionAliases", value)) {
|
||||
String[] pairs = value.getStringArray();
|
||||
regionAliases = new HashMap<>(pairs.length / 2);
|
||||
for (int i = 0; i < pairs.length; i += 2) {
|
||||
regionAliases.put(pairs[i], pairs[i + 1]);
|
||||
}
|
||||
} else {
|
||||
regionAliases = Collections.emptyMap();
|
||||
}
|
||||
|
||||
ByteBuffer buffer = getValue(likelyTable, "trie", value).getBinary();
|
||||
byte[] trie = new byte[buffer.remaining()];
|
||||
buffer.get(trie);
|
||||
|
||||
String[] lsrSubtags = getValue(likelyTable, "lsrs", value).getStringArray();
|
||||
LSR[] lsrs = new LSR[lsrSubtags.length / 3];
|
||||
for (int i = 0, j = 0; i < lsrSubtags.length; i += 3, ++j) {
|
||||
lsrs[j] = new LSR(lsrSubtags[i], lsrSubtags[i + 1], lsrSubtags[i + 2]);
|
||||
}
|
||||
|
||||
return new Data(languageAliases, regionAliases, trie, lsrs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (this == other) { return true; }
|
||||
if (!getClass().equals(other.getClass())) { return false; }
|
||||
Data od = (Data)other;
|
||||
return
|
||||
languageAliases.equals(od.languageAliases) &&
|
||||
regionAliases.equals(od.regionAliases) &&
|
||||
Arrays.equals(trie, od.trie) &&
|
||||
Arrays.equals(lsrs, od.lsrs);
|
||||
}
|
||||
}
|
||||
|
||||
// VisibleForTesting
|
||||
public static final XLikelySubtags INSTANCE = new XLikelySubtags(Data.load());
|
||||
|
||||
private final Map<String, String> languageAliases;
|
||||
private final Map<String, String> regionAliases;
|
||||
|
||||
|
@ -54,7 +125,7 @@ public final class XLikelySubtags {
|
|||
private XLikelySubtags(XLikelySubtags.Data data) {
|
||||
languageAliases = data.languageAliases;
|
||||
regionAliases = data.regionAliases;
|
||||
trie = data.trie;
|
||||
trie = new BytesTrie(data.trie, 0);
|
||||
lsrs = data.lsrs;
|
||||
|
||||
// Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
|
||||
|
@ -85,6 +156,23 @@ public final class XLikelySubtags {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of LocaleMatcher.canonicalize(ULocale).
|
||||
*/
|
||||
public ULocale canonicalize(ULocale locale) {
|
||||
String lang = locale.getLanguage();
|
||||
String lang2 = languageAliases.get(lang);
|
||||
String region = locale.getCountry();
|
||||
String region2 = regionAliases.get(region);
|
||||
if (lang2 != null || region2 != null) {
|
||||
return new ULocale(
|
||||
lang2 == null ? lang : lang2,
|
||||
locale.getScript(),
|
||||
region2 == null ? region : region2);
|
||||
}
|
||||
return locale;
|
||||
}
|
||||
|
||||
private static String getCanonical(Map<String, String> aliases, String alias) {
|
||||
String canonical = aliases.get(alias);
|
||||
return canonical == null ? alias : canonical;
|
||||
|
@ -101,7 +189,7 @@ public final class XLikelySubtags {
|
|||
locale.getVariant());
|
||||
}
|
||||
|
||||
LSR makeMaximizedLsrFrom(Locale locale) {
|
||||
public LSR makeMaximizedLsrFrom(Locale locale) {
|
||||
String tag = locale.toLanguageTag();
|
||||
if (tag.startsWith("x-")) {
|
||||
// Private use language tag x-subtag-subtag...
|
||||
|
|
|
@ -1,900 +0,0 @@
|
|||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.locale;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
import com.ibm.icu.util.LocalePriorityList;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
/**
|
||||
* Immutable class that picks the best match between a user's desired locales and
|
||||
* and application's supported locales.
|
||||
*
|
||||
* <p>If there are multiple supported locales with the same (language, script, region)
|
||||
* likely subtags, then the current implementation returns the first of those locales.
|
||||
* It ignores variant subtags (except for pseudolocale variants) and extensions.
|
||||
* This may change in future versions.
|
||||
*
|
||||
* <p>For example, the current implementation does not distinguish between
|
||||
* de, de-DE, de-Latn, de-1901, de-u-co-phonebk.
|
||||
*
|
||||
* <p>If you prefer one equivalent locale over another, then provide only the preferred one,
|
||||
* or place it earlier in the list of supported locales.
|
||||
*
|
||||
* <p>Otherwise, the order of supported locales may have no effect on the best-match results.
|
||||
* The current implementation compares each desired locale with supported locales
|
||||
* in the following order:
|
||||
* 1. Default locale, if supported;
|
||||
* 2. CLDR "paradigm locales" like en-GB and es-419;
|
||||
* 3. other supported locales.
|
||||
* This may change in future versions.
|
||||
*
|
||||
* <p>TODO: Migration notes.
|
||||
*
|
||||
* @author markdavis
|
||||
*/
|
||||
public final class XLocaleMatcher {
|
||||
private static final LSR UND_LSR = new LSR("und","","");
|
||||
private static final ULocale UND_ULOCALE = new ULocale("und");
|
||||
private static final Locale UND_LOCALE = new Locale("und");
|
||||
|
||||
// Activates debugging output to stderr with details of GetBestMatch.
|
||||
private static final boolean TRACE_MATCHER = false;
|
||||
|
||||
private static abstract class LsrIterator implements Iterator<LSR> {
|
||||
int bestDesiredIndex = -1;
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public abstract void rememberCurrent(int desiredIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builder option for whether the language subtag or the script subtag is most important.
|
||||
*
|
||||
* @see Builder#setFavorSubtag(FavorSubtag)
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public enum FavorSubtag {
|
||||
/**
|
||||
* Language differences are most important, then script differences, then region differences.
|
||||
* (This is the default behavior.)
|
||||
*
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
LANGUAGE,
|
||||
/**
|
||||
* Makes script differences matter relatively more than language differences.
|
||||
*
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
SCRIPT
|
||||
}
|
||||
|
||||
/**
|
||||
* Builder option for whether all desired locales are treated equally or
|
||||
* earlier ones are preferred.
|
||||
*
|
||||
* @see Builder#setDemotionPerDesiredLocale(Demotion)
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public enum Demotion {
|
||||
/**
|
||||
* All desired locales are treated equally.
|
||||
*
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
NONE,
|
||||
/**
|
||||
* Earlier desired locales are preferred.
|
||||
*
|
||||
* <p>From each desired locale to the next,
|
||||
* the distance to any supported locale is increased by an additional amount
|
||||
* which is at least as large as most region mismatches.
|
||||
* A later desired locale has to have a better match with some supported locale
|
||||
* due to more than merely having the same region subtag.
|
||||
*
|
||||
* <p>For example: <code>Supported={en, sv} desired=[en-GB, sv]</code>
|
||||
* yields <code>Result(en-GB, en)</code> because
|
||||
* with the demotion of sv its perfect match is no better than
|
||||
* the region distance between the earlier desired locale en-GB and en=en-US.
|
||||
*
|
||||
* <p>Notes:
|
||||
* <ul>
|
||||
* <li>In some cases, language and/or script differences can be as small as
|
||||
* the typical region difference. (Example: sr-Latn vs. sr-Cyrl)
|
||||
* <li>It is possible for certain region differences to be larger than usual,
|
||||
* and larger than the demotion.
|
||||
* (As of CLDR 35 there is no such case, but
|
||||
* this is possible in future versions of the data.)
|
||||
* </ul>
|
||||
*
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
REGION
|
||||
}
|
||||
|
||||
/**
|
||||
* Data for the best-matching pair of a desired and a supported locale.
|
||||
*
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static final class Result {
|
||||
private final ULocale desiredULocale;
|
||||
private final ULocale supportedULocale;
|
||||
private final Locale desiredLocale;
|
||||
private final Locale supportedLocale;
|
||||
private final int desiredIndex;
|
||||
private final int supportedIndex;
|
||||
|
||||
private Result(ULocale udesired, ULocale usupported,
|
||||
Locale desired, Locale supported,
|
||||
int desIndex, int suppIndex) {
|
||||
desiredULocale = udesired;
|
||||
supportedULocale = usupported;
|
||||
desiredLocale = desired;
|
||||
supportedLocale = supported;
|
||||
desiredIndex = desIndex;
|
||||
supportedIndex = suppIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the best-matching desired locale.
|
||||
* null if the list of desired locales is empty or if none matched well enough.
|
||||
*
|
||||
* @return the best-matching desired locale, or null.
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public ULocale getDesiredULocale() {
|
||||
return desiredULocale == null && desiredLocale != null ?
|
||||
ULocale.forLocale(desiredLocale) : desiredULocale;
|
||||
}
|
||||
/**
|
||||
* Returns the best-matching desired locale.
|
||||
* null if the list of desired locales is empty or if none matched well enough.
|
||||
*
|
||||
* @return the best-matching desired locale, or null.
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public Locale getDesiredLocale() {
|
||||
return desiredLocale == null && desiredULocale != null ?
|
||||
desiredULocale.toLocale() : desiredLocale;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the best-matching supported locale.
|
||||
* If none matched well enough, this is the default locale.
|
||||
* The default locale is null if the list of supported locales is empty and
|
||||
* no explicit default locale is set.
|
||||
*
|
||||
* @return the best-matching supported locale, or null.
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public ULocale getSupportedULocale() { return supportedULocale; }
|
||||
/**
|
||||
* Returns the best-matching supported locale.
|
||||
* If none matched well enough, this is the default locale.
|
||||
* The default locale is null if the list of supported locales is empty and
|
||||
* no explicit default locale is set.
|
||||
*
|
||||
* @return the best-matching supported locale, or null.
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public Locale getSupportedLocale() { return supportedLocale; }
|
||||
|
||||
/**
|
||||
* Returns the index of the best-matching desired locale in the input Iterable order.
|
||||
* -1 if the list of desired locales is empty or if none matched well enough.
|
||||
*
|
||||
* @return the index of the best-matching desired locale, or -1.
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public int getDesiredIndex() { return desiredIndex; }
|
||||
|
||||
/**
|
||||
* Returns the index of the best-matching supported locale in the constructor’s or builder’s input order
|
||||
* (“set” Collection plus “added” locales).
|
||||
* If the matcher was built from a locale list string, then the iteration order is that
|
||||
* of a LocalePriorityList built from the same string.
|
||||
* -1 if the list of supported locales is empty or if none matched well enough.
|
||||
*
|
||||
* @return the index of the best-matching supported locale, or -1.
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public int getSupportedIndex() { return supportedIndex; }
|
||||
|
||||
/**
|
||||
* Takes the best-matching supported locale and adds relevant fields of the
|
||||
* best-matching desired locale, such as the -t- and -u- extensions.
|
||||
* May replace some fields of the supported locale.
|
||||
* The result is the locale that should be used for date and number formatting, collation, etc.
|
||||
*
|
||||
* <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn
|
||||
*
|
||||
* @return the service locale, combining the best-matching desired and supported locales.
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public ULocale makeServiceULocale() {
|
||||
ULocale bestDesired = getDesiredULocale();
|
||||
ULocale serviceLocale = supportedULocale;
|
||||
if (!serviceLocale.equals(bestDesired) && bestDesired != null) {
|
||||
ULocale.Builder b = new ULocale.Builder().setLocale(serviceLocale);
|
||||
|
||||
// Copy the region from bestDesired, if there is one.
|
||||
// TODO: Seems wrong to clobber serviceLocale.getCountry() if that is not empty.
|
||||
String region = bestDesired.getCountry();
|
||||
if (!region.isEmpty()) {
|
||||
b.setRegion(region);
|
||||
}
|
||||
|
||||
// Copy the variants from bestDesired, if there are any.
|
||||
// Note that this will override any serviceLocale variants.
|
||||
// For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster).
|
||||
// TODO: Why replace? Why not append?
|
||||
String variants = bestDesired.getVariant();
|
||||
if (!variants.isEmpty()) {
|
||||
b.setVariant(variants);
|
||||
}
|
||||
|
||||
// Copy the extensions from bestDesired, if there are any.
|
||||
// Note that this will override any serviceLocale extensions.
|
||||
// For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native"
|
||||
// (replacing calendar).
|
||||
// TODO: Maybe enumerate -u- keys to not replace others in the serviceLocale??
|
||||
// (Unsure about this one.)
|
||||
for (char extensionKey : bestDesired.getExtensionKeys()) {
|
||||
b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
|
||||
}
|
||||
serviceLocale = b.build();
|
||||
}
|
||||
return serviceLocale;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes the best-matching supported locale and adds relevant fields of the
|
||||
* best-matching desired locale, such as the -t- and -u- extensions.
|
||||
* May replace some fields of the supported locale.
|
||||
* The result is the locale that should be used for date and number formatting, collation, etc.
|
||||
*
|
||||
* <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, service locale=ar-EG-u-nu-latn
|
||||
*
|
||||
* @return the service locale, combining the best-matching desired and supported locales.
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public Locale makeServiceLocale() {
|
||||
return makeServiceULocale().toLocale();
|
||||
}
|
||||
}
|
||||
|
||||
private final int thresholdDistance;
|
||||
private final int demotionPerDesiredLocale;
|
||||
private final FavorSubtag favorSubtag;
|
||||
|
||||
// These are in input order.
|
||||
private final ULocale[] supportedULocales;
|
||||
private final Locale[] supportedLocales;
|
||||
// These are in preference order: 1. Default locale 2. paradigm locales 3. others.
|
||||
private final Map<LSR, Integer> supportedLsrToIndex;
|
||||
// Array versions of the supportedLsrToIndex keys and values.
|
||||
// The distance lookup loops over the supportedLsrs and returns the index of the best match.
|
||||
private final LSR[] supportedLsrs;
|
||||
private final int[] supportedIndexes;
|
||||
private final ULocale defaultULocale;
|
||||
private final Locale defaultLocale;
|
||||
private final int defaultLocaleIndex;
|
||||
|
||||
/**
|
||||
* LocaleMatcher Builder.
|
||||
*
|
||||
* @see XLocaleMatcher#builder()
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static class Builder {
|
||||
private List<ULocale> supportedLocales;
|
||||
private int thresholdDistance = -1;
|
||||
private Demotion demotion;
|
||||
private ULocale defaultLocale;
|
||||
private FavorSubtag favor;
|
||||
|
||||
/**
|
||||
* Parses the string like {@link LocalePriorityList} does and
|
||||
* sets the supported locales accordingly.
|
||||
* Clears any previously set/added supported locales first.
|
||||
*
|
||||
* @param locales the languagePriorityList to set
|
||||
* @return this Builder object
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public Builder setSupportedLocales(String locales) {
|
||||
return setSupportedULocales(LocalePriorityList.add(locales).build().getULocales());
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the supported locales, preserving iteration order.
|
||||
* Clears any previously set/added supported locales first.
|
||||
* Duplicates are allowed, and are not removed.
|
||||
*
|
||||
* @param locales the list of locale
|
||||
* @return this Builder object
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public Builder setSupportedULocales(Collection<ULocale> locales) {
|
||||
supportedLocales = new ArrayList<>(locales);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies the supported locales, preserving iteration order.
|
||||
* Clears any previously set/added supported locales first.
|
||||
* Duplicates are allowed, and are not removed.
|
||||
*
|
||||
* @param locales the list of locale
|
||||
* @return this Builder object
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public Builder setSupportedLocales(Collection<Locale> locales) {
|
||||
supportedLocales = new ArrayList<>(locales.size());
|
||||
for (Locale locale : locales) {
|
||||
supportedLocales.add(ULocale.forLocale(locale));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds another supported locale.
|
||||
* Duplicates are allowed, and are not removed.
|
||||
*
|
||||
* @param locale the list of locale
|
||||
* @return this Builder object
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public Builder addSupportedULocale(ULocale locale) {
|
||||
if (supportedLocales == null) {
|
||||
supportedLocales = new ArrayList<>();
|
||||
}
|
||||
supportedLocales.add(locale);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds another supported locale.
|
||||
* Duplicates are allowed, and are not removed.
|
||||
*
|
||||
* @param locale the list of locale
|
||||
* @return this Builder object
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public Builder addSupportedLocale(Locale locale) {
|
||||
return addSupportedULocale(ULocale.forLocale(locale));
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the default locale; if null, or if it is not set explicitly,
|
||||
* then the first supported locale is used as the default locale.
|
||||
*
|
||||
* @param defaultLocale the default locale
|
||||
* @return this Builder object
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public Builder setDefaultULocale(ULocale defaultLocale) {
|
||||
this.defaultLocale = defaultLocale;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the default locale; if null, or if it is not set explicitly,
|
||||
* then the first supported locale is used as the default locale.
|
||||
*
|
||||
* @param defaultLocale the default locale
|
||||
* @return this Builder object
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public Builder setDefaultLocale(Locale defaultLocale) {
|
||||
this.defaultLocale = ULocale.forLocale(defaultLocale);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* If SCRIPT, then the language differences are smaller than script differences.
|
||||
* This is used in situations (such as maps) where
|
||||
* it is better to fall back to the same script than a similar language.
|
||||
*
|
||||
* @param subtag the subtag to favor
|
||||
* @return this Builder object
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public Builder setFavorSubtag(FavorSubtag subtag) {
|
||||
this.favor = subtag;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Option for whether all desired locales are treated equally or
|
||||
* earlier ones are preferred (this is the default).
|
||||
*
|
||||
* @param demotion the demotion per desired locale to set.
|
||||
* @return this Builder object
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public Builder setDemotionPerDesiredLocale(Demotion demotion) {
|
||||
this.demotion = demotion;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* <i>Internal only!</i>
|
||||
*
|
||||
* @param thresholdDistance the thresholdDistance to set, with -1 = default
|
||||
* @return this Builder object
|
||||
* @internal
|
||||
* @deprecated This API is ICU internal only.
|
||||
*/
|
||||
@Deprecated
|
||||
public Builder internalSetThresholdDistance(int thresholdDistance) {
|
||||
if (thresholdDistance > 100) {
|
||||
thresholdDistance = 100;
|
||||
}
|
||||
this.thresholdDistance = thresholdDistance;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds and returns a new locale matcher.
|
||||
* This builder can continue to be used.
|
||||
*
|
||||
* @return new XLocaleMatcher.
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public XLocaleMatcher build() {
|
||||
return new XLocaleMatcher(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder s = new StringBuilder().append("{XLocaleMatcher.Builder");
|
||||
if (!supportedLocales.isEmpty()) {
|
||||
s.append(" supported={").append(supportedLocales.toString()).append('}');
|
||||
}
|
||||
if (defaultLocale != null) {
|
||||
s.append(" default=").append(defaultLocale.toString());
|
||||
}
|
||||
if (favor != null) {
|
||||
s.append(" distance=").append(favor.toString());
|
||||
}
|
||||
if (thresholdDistance >= 0) {
|
||||
s.append(String.format(" threshold=%d", thresholdDistance));
|
||||
}
|
||||
if (demotion != null) {
|
||||
s.append(" demotion=").append(demotion.toString());
|
||||
}
|
||||
return s.append('}').toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a builder used in chaining parameters for building a LocaleMatcher.
|
||||
*
|
||||
* @return a new Builder object
|
||||
* @draft ICU 65
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
/** Convenience method */
|
||||
public XLocaleMatcher(String supportedLocales) {
|
||||
this(builder().setSupportedLocales(supportedLocales));
|
||||
}
|
||||
/** Convenience method */
|
||||
public XLocaleMatcher(LocalePriorityList supportedLocales) {
|
||||
this(builder().setSupportedULocales(supportedLocales.getULocales()));
|
||||
}
|
||||
|
||||
private XLocaleMatcher(Builder builder) {
|
||||
thresholdDistance = builder.thresholdDistance < 0 ?
|
||||
LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
|
||||
// Store the supported locales in input order,
|
||||
// so that when different types are used (e.g., java.util.Locale)
|
||||
// we can return those by parallel index.
|
||||
int supportedLocalesLength = builder.supportedLocales.size();
|
||||
supportedULocales = new ULocale[supportedLocalesLength];
|
||||
supportedLocales = new Locale[supportedLocalesLength];
|
||||
// Supported LRSs in input order.
|
||||
LSR lsrs[] = new LSR[supportedLocalesLength];
|
||||
// Also find the first supported locale whose LSR is
|
||||
// the same as that for the default locale.
|
||||
ULocale udef = builder.defaultLocale;
|
||||
Locale def = null;
|
||||
LSR defLSR = null;
|
||||
int idef = -1;
|
||||
if (udef != null) {
|
||||
def = udef.toLocale();
|
||||
defLSR = getMaximalLsrOrUnd(udef);
|
||||
}
|
||||
int i = 0;
|
||||
for (ULocale locale : builder.supportedLocales) {
|
||||
supportedULocales[i] = locale;
|
||||
supportedLocales[i] = locale.toLocale();
|
||||
LSR lsr = lsrs[i] = getMaximalLsrOrUnd(locale);
|
||||
if (idef < 0 && defLSR != null && lsr.equals(defLSR)) {
|
||||
idef = i;
|
||||
}
|
||||
++i;
|
||||
}
|
||||
|
||||
// We need an unordered map from LSR to first supported locale with that LSR,
|
||||
// and an ordered list of (LSR, Indexes).
|
||||
// We use a LinkedHashMap for both,
|
||||
// and insert the supported locales in the following order:
|
||||
// 1. Default locale, if it is supported.
|
||||
// 2. Priority locales in builder order.
|
||||
// 3. Remaining locales in builder order.
|
||||
supportedLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
|
||||
Map<LSR, Integer> otherLsrToIndex = null;
|
||||
if (idef >= 0) {
|
||||
supportedLsrToIndex.put(defLSR, idef);
|
||||
}
|
||||
i = 0;
|
||||
for (ULocale locale : supportedULocales) {
|
||||
if (i == idef) { continue; }
|
||||
LSR lsr = lsrs[i];
|
||||
if (defLSR == null) {
|
||||
assert i == 0;
|
||||
udef = locale;
|
||||
def = supportedLocales[0];
|
||||
defLSR = lsr;
|
||||
idef = 0;
|
||||
supportedLsrToIndex.put(lsr, 0);
|
||||
} else if (lsr.equals(defLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
|
||||
putIfAbsent(supportedLsrToIndex, lsr, i);
|
||||
} else {
|
||||
if (otherLsrToIndex == null) {
|
||||
otherLsrToIndex = new LinkedHashMap<>(supportedLocalesLength);
|
||||
}
|
||||
putIfAbsent(otherLsrToIndex, lsr, i);
|
||||
}
|
||||
++i;
|
||||
}
|
||||
if (otherLsrToIndex != null) {
|
||||
supportedLsrToIndex.putAll(otherLsrToIndex);
|
||||
}
|
||||
int numSuppLsrs = supportedLsrToIndex.size();
|
||||
supportedLsrs = new LSR[numSuppLsrs];
|
||||
supportedIndexes = new int[numSuppLsrs];
|
||||
i = 0;
|
||||
for (Map.Entry<LSR, Integer> entry : supportedLsrToIndex.entrySet()) {
|
||||
supportedLsrs[i] = entry.getKey(); // = lsrs[entry.getValue()]
|
||||
supportedIndexes[i++] = entry.getValue();
|
||||
}
|
||||
|
||||
defaultULocale = udef;
|
||||
defaultLocale = def;
|
||||
defaultLocaleIndex = idef;
|
||||
demotionPerDesiredLocale =
|
||||
builder.demotion == Demotion.NONE ? 0 :
|
||||
LocaleDistance.INSTANCE.getDefaultDemotionPerDesiredLocale(); // null or REGION
|
||||
favorSubtag = builder.favor;
|
||||
}
|
||||
|
||||
private static final void putIfAbsent(Map<LSR, Integer> lsrToIndex, LSR lsr, int i) {
|
||||
Integer index = lsrToIndex.get(lsr);
|
||||
if (index == null) {
|
||||
lsrToIndex.put(lsr, i);
|
||||
}
|
||||
}
|
||||
|
||||
private static final LSR getMaximalLsrOrUnd(ULocale locale) {
|
||||
if (locale.equals(UND_ULOCALE)) {
|
||||
return UND_LSR;
|
||||
} else {
|
||||
return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
|
||||
}
|
||||
}
|
||||
|
||||
private static final LSR getMaximalLsrOrUnd(Locale locale) {
|
||||
if (locale.equals(UND_LOCALE)) {
|
||||
return UND_LSR;
|
||||
} else {
|
||||
return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
|
||||
}
|
||||
}
|
||||
|
||||
private static final class ULocaleLsrIterator extends LsrIterator {
|
||||
private Iterator<ULocale> locales;
|
||||
private ULocale current, remembered;
|
||||
|
||||
ULocaleLsrIterator(Iterator<ULocale> locales) {
|
||||
this.locales = locales;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return locales.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public LSR next() {
|
||||
current = locales.next();
|
||||
return getMaximalLsrOrUnd(current);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rememberCurrent(int desiredIndex) {
|
||||
bestDesiredIndex = desiredIndex;
|
||||
remembered = current;
|
||||
}
|
||||
}
|
||||
|
||||
private static final class LocaleLsrIterator extends LsrIterator {
|
||||
private Iterator<Locale> locales;
|
||||
private Locale current, remembered;
|
||||
|
||||
LocaleLsrIterator(Iterator<Locale> locales) {
|
||||
this.locales = locales;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return locales.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public LSR next() {
|
||||
current = locales.next();
|
||||
return getMaximalLsrOrUnd(current);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rememberCurrent(int desiredIndex) {
|
||||
bestDesiredIndex = desiredIndex;
|
||||
remembered = current;
|
||||
}
|
||||
}
|
||||
|
||||
public ULocale getBestMatch(ULocale desiredLocale) {
|
||||
LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
|
||||
int suppIndex = getBestSuppIndex(desiredLSR, null);
|
||||
return suppIndex >= 0 ? supportedULocales[suppIndex] : defaultULocale;
|
||||
}
|
||||
|
||||
public ULocale getBestMatch(Iterable<ULocale> desiredLocales) {
|
||||
Iterator<ULocale> desiredIter = desiredLocales.iterator();
|
||||
if (!desiredIter.hasNext()) {
|
||||
return defaultULocale;
|
||||
}
|
||||
ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter);
|
||||
LSR desiredLSR = lsrIter.next();
|
||||
int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
|
||||
return suppIndex >= 0 ? supportedULocales[suppIndex] : defaultULocale;
|
||||
}
|
||||
|
||||
public ULocale getBestMatch(String desiredLocaleList) {
|
||||
return getBestMatch(LocalePriorityList.add(desiredLocaleList).build());
|
||||
}
|
||||
|
||||
public Locale getBestLocale(Locale desiredLocale) {
|
||||
LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
|
||||
int suppIndex = getBestSuppIndex(desiredLSR, null);
|
||||
return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
|
||||
}
|
||||
|
||||
public Locale getBestLocale(Iterable<Locale> desiredLocales) {
|
||||
Iterator<Locale> desiredIter = desiredLocales.iterator();
|
||||
if (!desiredIter.hasNext()) {
|
||||
return defaultLocale;
|
||||
}
|
||||
LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter);
|
||||
LSR desiredLSR = lsrIter.next();
|
||||
int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
|
||||
return suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
|
||||
}
|
||||
|
||||
private Result makeResult(ULocale desiredLocale, ULocaleLsrIterator lsrIter, int suppIndex) {
|
||||
if (suppIndex < 0) {
|
||||
return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
|
||||
} else if (desiredLocale != null) {
|
||||
return new Result(desiredLocale, supportedULocales[suppIndex],
|
||||
null, supportedLocales[suppIndex], 0, suppIndex);
|
||||
} else {
|
||||
return new Result(lsrIter.remembered, supportedULocales[suppIndex],
|
||||
null, supportedLocales[suppIndex], lsrIter.bestDesiredIndex, suppIndex);
|
||||
}
|
||||
}
|
||||
|
||||
private Result makeResult(Locale desiredLocale, LocaleLsrIterator lsrIter, int suppIndex) {
|
||||
if (suppIndex < 0) {
|
||||
return new Result(null, defaultULocale, null, defaultLocale, -1, defaultLocaleIndex);
|
||||
} else if (desiredLocale != null) {
|
||||
return new Result(null, supportedULocales[suppIndex],
|
||||
desiredLocale, supportedLocales[suppIndex], 0, suppIndex);
|
||||
} else {
|
||||
return new Result(null, supportedULocales[suppIndex],
|
||||
lsrIter.remembered, supportedLocales[suppIndex],
|
||||
lsrIter.bestDesiredIndex, suppIndex);
|
||||
}
|
||||
}
|
||||
|
||||
public Result getBestMatchResult(ULocale desiredLocale) {
|
||||
LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
|
||||
int suppIndex = getBestSuppIndex(desiredLSR, null);
|
||||
return makeResult(desiredLocale, null, suppIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the best match between the desired and supported locales.
|
||||
*
|
||||
* @param desiredLocales Typically a user's languages, in order of preference (descending).
|
||||
* @return the best-matching pair of a desired and a supported locale.
|
||||
*/
|
||||
public Result getBestMatchResult(Iterable<ULocale> desiredLocales) {
|
||||
Iterator<ULocale> desiredIter = desiredLocales.iterator();
|
||||
if (!desiredIter.hasNext()) {
|
||||
return makeResult(UND_ULOCALE, null, -1);
|
||||
}
|
||||
ULocaleLsrIterator lsrIter = new ULocaleLsrIterator(desiredIter);
|
||||
LSR desiredLSR = lsrIter.next();
|
||||
int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
|
||||
return makeResult(null, lsrIter, suppIndex);
|
||||
}
|
||||
|
||||
public Result getBestLocaleResult(Locale desiredLocale) {
|
||||
LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
|
||||
int suppIndex = getBestSuppIndex(desiredLSR, null);
|
||||
return makeResult(desiredLocale, null, suppIndex);
|
||||
}
|
||||
|
||||
public Result getBestLocaleResult(Iterable<Locale> desiredLocales) {
|
||||
Iterator<Locale> desiredIter = desiredLocales.iterator();
|
||||
if (!desiredIter.hasNext()) {
|
||||
return makeResult(UND_LOCALE, null, -1);
|
||||
}
|
||||
LocaleLsrIterator lsrIter = new LocaleLsrIterator(desiredIter);
|
||||
LSR desiredLSR = lsrIter.next();
|
||||
int suppIndex = getBestSuppIndex(desiredLSR, lsrIter);
|
||||
return makeResult(null, lsrIter, suppIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param desiredLSR The first desired locale's LSR.
|
||||
* @param remainingIter Remaining desired LSRs, null or empty if none.
|
||||
* @return the index of the best-matching supported locale, or -1 if there is no good match.
|
||||
*/
|
||||
private int getBestSuppIndex(LSR desiredLSR, LsrIterator remainingIter) {
|
||||
int desiredIndex = 0;
|
||||
int bestSupportedLsrIndex = -1;
|
||||
for (int bestDistance = thresholdDistance;;) {
|
||||
// Quick check for exact maximized LSR.
|
||||
Integer index = supportedLsrToIndex.get(desiredLSR);
|
||||
if (index != null) {
|
||||
int suppIndex = index;
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Returning %s: desiredLSR=supportedLSR\n",
|
||||
supportedULocales[suppIndex]);
|
||||
}
|
||||
if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
|
||||
return suppIndex;
|
||||
}
|
||||
int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
|
||||
desiredLSR, supportedLsrs, bestDistance, favorSubtag);
|
||||
if (bestIndexAndDistance >= 0) {
|
||||
bestDistance = bestIndexAndDistance & 0xff;
|
||||
if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
|
||||
bestSupportedLsrIndex = bestIndexAndDistance >> 8;
|
||||
}
|
||||
if ((bestDistance -= demotionPerDesiredLocale) <= 0) {
|
||||
break;
|
||||
}
|
||||
if (remainingIter == null || !remainingIter.hasNext()) {
|
||||
break;
|
||||
}
|
||||
desiredLSR = remainingIter.next();
|
||||
}
|
||||
if (bestSupportedLsrIndex < 0) {
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Returning default %s: no good match\n", defaultULocale);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
int suppIndex = supportedIndexes[bestSupportedLsrIndex];
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Returning %s: best matching supported locale\n",
|
||||
supportedULocales[suppIndex]);
|
||||
}
|
||||
return suppIndex;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder s = new StringBuilder().append("{XLocaleMatcher");
|
||||
if (supportedULocales.length > 0) {
|
||||
s.append(" supported={").append(supportedULocales[0].toString());
|
||||
for (int i = 1; i < supportedULocales.length; ++i) {
|
||||
s.append(", ").append(supportedULocales[i].toString());
|
||||
}
|
||||
s.append('}');
|
||||
}
|
||||
s.append(" default=").append(Objects.toString(defaultULocale));
|
||||
if (favorSubtag != null) {
|
||||
s.append(" distance=").append(favorSubtag.toString());
|
||||
}
|
||||
if (thresholdDistance >= 0) {
|
||||
s.append(String.format(" threshold=%d", thresholdDistance));
|
||||
}
|
||||
s.append(String.format(" demotion=%d", demotionPerDesiredLocale));
|
||||
return s.append('}').toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a fraction between 0 and 1, where 1 means that the languages are a
|
||||
* perfect match, and 0 means that they are completely different. This is (100-distance(desired, supported))/100.0.
|
||||
* <br>Note that
|
||||
* the precise values may change over time; no code should be made dependent
|
||||
* on the values remaining constant.
|
||||
* @param desired Desired locale
|
||||
* @param desiredMax Maximized locale (using likely subtags)
|
||||
* @param supported Supported locale
|
||||
* @param supportedMax Maximized locale (using likely subtags)
|
||||
* @return value between 0 and 1, inclusive.
|
||||
* @deprecated ICU 65 Build and use a matcher rather than comparing pairs of locales.
|
||||
*/
|
||||
@Deprecated
|
||||
public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
|
||||
// Returns the inverse of the distance: That is, 1-distance(desired, supported).
|
||||
int distance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
|
||||
XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired),
|
||||
new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) },
|
||||
thresholdDistance, favorSubtag) & 0xff;
|
||||
return (100 - distance) / 100.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Canonicalize a locale (language). Note that for now, it is canonicalizing
|
||||
* according to CLDR conventions (he vs iw, etc), since that is what is needed
|
||||
* for likelySubtags.
|
||||
* @param ulocale language/locale code
|
||||
* @return ULocale with remapped subtags.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
public ULocale canonicalize(ULocale ulocale) {
|
||||
// TODO
|
||||
return null;
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load diff
|
@ -261,9 +261,10 @@
|
|||
<path id="javac.classpathref.core-tests">
|
||||
<pathelement location="${icu4j.core.jar}"/>
|
||||
<pathelement location="${icu4j.test-framework.jar}"/>
|
||||
<pathelement location="${icu4j.tools.jar}"/>
|
||||
</path>
|
||||
|
||||
<target name="_all.core-tests" depends="_all.core, _all.test-framework">
|
||||
<target name="_all.core-tests" depends="_all.core, _all.test-framework, _all.tools">
|
||||
<ant dir="${icu4j.core-tests.dir}" inheritAll="false"/>
|
||||
</target>
|
||||
|
||||
|
@ -349,11 +350,9 @@
|
|||
<pathelement location="${icu4j.collate.jar}"/>
|
||||
<pathelement location="${icu4j.translit.jar}"/>
|
||||
<pathelement location="${icu4j.test-framework.jar}"/>
|
||||
<pathelement location="${icu4j.core-tests.jar}"/>
|
||||
<pathelement location="${icu4j.translit-tests.jar}"/>
|
||||
</path>
|
||||
|
||||
<target name="_all.tools" depends="_all.core, _all.collate, _all.translit, _all.test-framework, _all.core-tests, _all.translit-tests">
|
||||
<target name="_all.tools" depends="_all.core, _all.collate, _all.translit, _all.test-framework">
|
||||
<ant dir="${icu4j.tools.dir}" inheritAll="false"/>
|
||||
</target>
|
||||
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bd004f5d8064e047cef4f7d31326b39b7fc43fba685fab2f0d23c154f4dbc637
|
||||
size 12818511
|
||||
oid sha256:b21585ec768edea7b099bd6a97b0a4130b53966a63e6a10de2f31b22f8b59fbd
|
||||
size 12840921
|
||||
|
|
|
@ -18,5 +18,6 @@
|
|||
<attribute name="javadoc_location" value="jar:platform:/resource/external-libraries/JUnitParams-1.0.5-javadoc.jar!/"/>
|
||||
</attributes>
|
||||
</classpathentry>
|
||||
<classpathentry kind="src" path="/icu4j-tools"/>
|
||||
<classpathentry kind="output" path="out/bin"/>
|
||||
</classpath>
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
<project>icu4j-regiondata</project>
|
||||
<project>icu4j-shared</project>
|
||||
<project>icu4j-test-framework</project>
|
||||
<project>icu4j-tools</project>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
*
|
||||
*/
|
||||
|
||||
package com.ibm.icu.dev.tool.serializable;
|
||||
package com.ibm.icu.dev.test.serializable;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
|
@ -23,7 +23,6 @@ import java.util.Arrays;
|
|||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import com.ibm.icu.dev.test.serializable.SerializableTestUtility;
|
||||
import com.ibm.icu.impl.URLHandler;
|
||||
|
||||
/**
|
||||
|
@ -31,32 +30,32 @@ import com.ibm.icu.impl.URLHandler;
|
|||
* and lists all those classes that implement <code>Serializable</code>. It also checks
|
||||
* to make sure that those classes have the <code>serialVersionUID</code>
|
||||
* field define.
|
||||
*
|
||||
*
|
||||
*/
|
||||
public class SerializableChecker implements URLHandler.URLVisitor
|
||||
{
|
||||
private static Class serializable;
|
||||
//private static Class throwable;
|
||||
|
||||
|
||||
private String path = null;
|
||||
|
||||
|
||||
//private boolean write;
|
||||
|
||||
|
||||
public SerializableChecker(String path)
|
||||
{
|
||||
this.path = path;
|
||||
|
||||
|
||||
if (path != null) {
|
||||
File dir = new File(path);
|
||||
|
||||
|
||||
if (!dir.exists()) {
|
||||
dir.mkdirs();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static {
|
||||
try {
|
||||
try {
|
||||
serializable = Class.forName("java.io.Serializable");
|
||||
//throwable = Class.forName("java.lang.Throwable");
|
||||
} catch (Exception e) {
|
||||
|
@ -64,45 +63,43 @@ public class SerializableChecker implements URLHandler.URLVisitor
|
|||
System.out.println("Woops! Can't get class info for Serializable and Throwable.");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void writeFile(String className, byte bytes[])
|
||||
{
|
||||
File file = new File(path + File.separator + className + ".dat");
|
||||
FileOutputStream stream;
|
||||
|
||||
try {
|
||||
stream = new FileOutputStream(file);
|
||||
|
||||
|
||||
try (FileOutputStream stream = new FileOutputStream(file)) {
|
||||
stream.write(bytes);
|
||||
stream.close();
|
||||
} catch (Exception e) {
|
||||
System.out.print(" - can't write file!");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void visit(String str)
|
||||
{
|
||||
int ix = str.lastIndexOf(".class");
|
||||
|
||||
|
||||
if (ix >= 0) {
|
||||
String className = "com.ibm.icu" + str.substring(0, ix).replace('/', '.');
|
||||
|
||||
|
||||
// Skip things in com.ibm.icu.dev; they're not relevant.
|
||||
if (className.startsWith("com.ibm.icu.dev.")) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
try {
|
||||
Class c = Class.forName(className);
|
||||
int m = c.getModifiers();
|
||||
|
||||
|
||||
if (serializable.isAssignableFrom(c) /*&&
|
||||
(! throwable.isAssignableFrom(c) || c.getDeclaredFields().length > 0)*/) {
|
||||
//Field uid;
|
||||
|
||||
|
||||
System.out.print(className + " (" + Modifier.toString(m) + ") - ");
|
||||
|
||||
if(!Modifier.isInterface(m)){
|
||||
|
||||
if(!Modifier.isInterface(m)){
|
||||
try {
|
||||
/* uid = */
|
||||
c.getDeclaredField("serialVersionUID");
|
||||
|
@ -110,18 +107,18 @@ public class SerializableChecker implements URLHandler.URLVisitor
|
|||
System.out.print("no serialVersionUID - ");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (Modifier.isPublic(m)) {
|
||||
SerializableTestUtility.Handler handler = SerializableTestUtility.getHandler(className);
|
||||
|
||||
|
||||
if (!Modifier.isInterface(m) && handler != null) {
|
||||
Object objectsOut[] = handler.getTestObjects();
|
||||
Object objectsIn[];
|
||||
boolean passed = true;
|
||||
|
||||
|
||||
ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
|
||||
ObjectOutputStream out = new ObjectOutputStream(byteOut);
|
||||
|
||||
|
||||
try {
|
||||
out.writeObject(objectsOut);
|
||||
out.close();
|
||||
|
@ -130,14 +127,14 @@ public class SerializableChecker implements URLHandler.URLVisitor
|
|||
System.out.println("Eror writing test objects:" + e.toString());
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
if (path != null) {
|
||||
writeFile(className, byteOut.toByteArray());
|
||||
}
|
||||
|
||||
|
||||
ByteArrayInputStream byteIn = new ByteArrayInputStream(byteOut.toByteArray());
|
||||
ObjectInputStream in = new ObjectInputStream(byteIn);
|
||||
|
||||
|
||||
try {
|
||||
objectsIn = (Object[]) in.readObject();
|
||||
in.close();
|
||||
|
@ -153,7 +150,7 @@ public class SerializableChecker implements URLHandler.URLVisitor
|
|||
System.out.println("Object " + i + " failed behavior test.");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (passed) {
|
||||
System.out.print("test passed.");
|
||||
}
|
||||
|
@ -164,7 +161,7 @@ public class SerializableChecker implements URLHandler.URLVisitor
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
System.out.println();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
|
@ -177,10 +174,10 @@ public class SerializableChecker implements URLHandler.URLVisitor
|
|||
{
|
||||
List argList = Arrays.asList(args);
|
||||
String path = null;
|
||||
|
||||
|
||||
for (Iterator it = argList.iterator(); it.hasNext(); /*anything?*/) {
|
||||
String arg = (String) it.next();
|
||||
|
||||
|
||||
if (arg.equals("-w")) {
|
||||
if (it.hasNext()) {
|
||||
path = (String) it.next();
|
||||
|
@ -188,15 +185,15 @@ public class SerializableChecker implements URLHandler.URLVisitor
|
|||
System.out.println("Missing directory name on -w command.");
|
||||
}
|
||||
} else {
|
||||
|
||||
|
||||
|
||||
|
||||
try {
|
||||
//URL jarURL = new URL("jar:file:/dev/eclipse/workspace/icu4j/icu4j.jar!/com/ibm/icu");
|
||||
//URL fileURL = new URL("file:/dev/eclipse/workspace/icu4j/classes/com/ibm/icu");
|
||||
URL url = new URL(arg);
|
||||
URLHandler handler = URLHandler.get(url);
|
||||
SerializableChecker checker = new SerializableChecker(path);
|
||||
|
||||
|
||||
System.out.println("Checking classes from " + arg + ":");
|
||||
handler.guide(checker, true, false);
|
||||
} catch (Exception e) {
|
|
@ -12,9 +12,10 @@ import org.junit.runner.RunWith;
|
|||
import org.junit.runners.JUnit4;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.dev.tool.locale.LocaleDistanceBuilder;
|
||||
import com.ibm.icu.impl.locale.LocaleDistance;
|
||||
import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
|
||||
import com.ibm.icu.util.LocaleMatcher;
|
||||
import com.ibm.icu.util.LocaleMatcher.FavorSubtag;
|
||||
import com.ibm.icu.util.Output;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
|
@ -25,13 +26,13 @@ import com.ibm.icu.util.ULocale;
|
|||
* @author markdavis
|
||||
*/
|
||||
@RunWith(JUnit4.class)
|
||||
public class XLocaleDistanceTest extends TestFmwk {
|
||||
public class LocaleDistanceTest extends TestFmwk {
|
||||
private static final boolean REFORMAT = false; // set to true to get a reformatted data file listed
|
||||
|
||||
private LocaleDistance localeDistance = LocaleDistance.INSTANCE;
|
||||
DataDrivenTestHelper tfh = new MyTestFileHandler()
|
||||
.setFramework(this)
|
||||
.load(XLocaleDistanceTest.class, "data/localeDistanceTest.txt");
|
||||
.load(LocaleDistanceTest.class, "data/localeDistanceTest.txt");
|
||||
|
||||
static class Arguments {
|
||||
final ULocale desired;
|
||||
|
@ -47,6 +48,13 @@ public class XLocaleDistanceTest extends TestFmwk {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLoadedDataSameAsBuiltFromScratch() {
|
||||
LocaleDistance.Data built = LocaleDistanceBuilder.build();
|
||||
LocaleDistance.Data loaded = LocaleDistance.Data.load();
|
||||
assertEquals("run LocaleDistanceBuilder and update ICU4C langInfo.txt", built, loaded);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@Ignore("Disabled because of Linux; need to investigate.")
|
||||
@Test
|
|
@ -1,22 +0,0 @@
|
|||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2015, Google, Inc., International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import com.ibm.icu.util.LocaleMatcher.LanguageMatcherData;
|
||||
|
||||
/**
|
||||
* @author markdavis
|
||||
*
|
||||
*/
|
||||
public class LocaleMatcherShim {
|
||||
public static LanguageMatcherData load() {
|
||||
// In CLDR, has different value
|
||||
return null;
|
||||
}
|
||||
}
|
|
@ -9,55 +9,47 @@
|
|||
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.io.BufferedReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.junit.runners.JUnit4;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.dev.tool.locale.LikelySubtagsBuilder;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.FileUtilities;
|
||||
import com.ibm.icu.impl.locale.XLikelySubtags;
|
||||
import com.ibm.icu.util.LocaleMatcher;
|
||||
import com.ibm.icu.util.LocaleMatcher.LanguageMatcherData;
|
||||
import com.ibm.icu.util.LocaleMatcher.FavorSubtag;
|
||||
import com.ibm.icu.util.LocalePriorityList;
|
||||
import com.ibm.icu.util.Output;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
import junitparams.JUnitParamsRunner;
|
||||
import junitparams.Parameters;
|
||||
|
||||
/**
|
||||
* Test the LocaleMatcher.
|
||||
*
|
||||
* @author markdavis
|
||||
*/
|
||||
@SuppressWarnings("deprecation")
|
||||
@RunWith(JUnit4.class)
|
||||
@RunWith(JUnitParamsRunner.class)
|
||||
public class LocaleMatcherTest extends TestFmwk {
|
||||
|
||||
|
||||
private static final ULocale ZH_MO = new ULocale("zh_MO");
|
||||
private static final ULocale ZH_HK = new ULocale("zh_HK");
|
||||
static LanguageMatcherData LANGUAGE_MATCHER_DATA = LocaleMatcherShim.load();
|
||||
|
||||
private LocaleMatcher newLocaleMatcher(LocalePriorityList build) {
|
||||
return new LocaleMatcher(build, LANGUAGE_MATCHER_DATA);
|
||||
}
|
||||
|
||||
private LocaleMatcher newLocaleMatcher(LocalePriorityList build, LanguageMatcherData data) {
|
||||
return new LocaleMatcher(build, data == null ? LANGUAGE_MATCHER_DATA : data);
|
||||
}
|
||||
|
||||
private LocaleMatcher newLocaleMatcher(LocalePriorityList lpl, LanguageMatcherData data, double d) {
|
||||
return new LocaleMatcher(lpl, data == null ? LANGUAGE_MATCHER_DATA : data, d);
|
||||
return new LocaleMatcher(build);
|
||||
}
|
||||
|
||||
private LocaleMatcher newLocaleMatcher(String string) {
|
||||
return new LocaleMatcher(LocalePriorityList.add(string).build(), LANGUAGE_MATCHER_DATA);
|
||||
return new LocaleMatcher(LocalePriorityList.add(string).build());
|
||||
}
|
||||
|
||||
// public LocaleMatcher(LocalePriorityList languagePriorityList,
|
||||
// LocaleMatcherData matcherData, double threshold)
|
||||
|
||||
@Test
|
||||
public void testParentLocales() {
|
||||
assertCloser("es_AR", "es_419", "es_ES");
|
||||
|
@ -87,32 +79,6 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
assertEquals("test " + a + " is closer to " + closer + " than to " + further, new ULocale(closer), matcher.getBestMatch(a));
|
||||
}
|
||||
|
||||
// public void testParentLocales() {
|
||||
// // find all the regions that have a closer relation because of an explicit parent
|
||||
// Set<String> explicitParents = new HashSet<>(INFO.getExplicitParents());
|
||||
// explicitParents.remove("root");
|
||||
// Set<String> otherParents = new HashSet<>(INFO.getExplicitParents());
|
||||
// for (String locale : explicitParents) {
|
||||
// while (true) {
|
||||
// locale = LocaleIDParser.getParent(locale);
|
||||
// if (locale == null || locale.equals("root")) {
|
||||
// break;
|
||||
// }
|
||||
// otherParents.add(locale);
|
||||
// }
|
||||
// }
|
||||
// otherParents.remove("root");
|
||||
//
|
||||
// for (String locale : CONFIG.getCldrFactory().getAvailable()) {
|
||||
// String parentId = LocaleIDParser.getParent(locale);
|
||||
// String parentIdSimple = LocaleIDParser.getSimpleParent(locale);
|
||||
// if (!explicitParents.contains(parentId) && !otherParents.contains(parentIdSimple)) {
|
||||
// continue;
|
||||
// }
|
||||
// System.out.println(locale + "\t" + CONFIG.getEnglish().getName(locale) + "\t" + parentId + "\t" + parentIdSimple);
|
||||
// }
|
||||
// }
|
||||
|
||||
@Test
|
||||
public void testChinese() {
|
||||
LocaleMatcher matcher = newLocaleMatcher("zh_CN, zh_TW, iw");
|
||||
|
@ -139,31 +105,10 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
@Test
|
||||
public void testFallbacks() {
|
||||
LocalePriorityList lpl = LocalePriorityList.add("en, hi").build();
|
||||
final LocaleMatcher matcher = newLocaleMatcher(lpl, null, 0.09);
|
||||
final LocaleMatcher matcher = newLocaleMatcher(lpl);
|
||||
assertEquals("hi", matcher.getBestMatch("sa").toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOverrideData() {
|
||||
double threshold = 0.05;
|
||||
LanguageMatcherData localeMatcherData = new LanguageMatcherData()
|
||||
.addDistance("br", "fr", 10, true)
|
||||
.addDistance("es", "cy", 10, true);
|
||||
logln(localeMatcherData.toString());
|
||||
|
||||
final LocaleMatcher matcher = newLocaleMatcher(
|
||||
LocalePriorityList
|
||||
.add(ULocale.ENGLISH)
|
||||
.add(ULocale.FRENCH)
|
||||
.add(ULocale.UK)
|
||||
.build(), localeMatcherData, threshold);
|
||||
logln(matcher.toString());
|
||||
|
||||
assertEquals(ULocale.FRENCH, matcher.getBestMatch(new ULocale("br")));
|
||||
assertEquals(ULocale.ENGLISH, matcher.getBestMatch(new ULocale("es"))); // one
|
||||
// way
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBasics() {
|
||||
final LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(ULocale.FRENCH).add(ULocale.UK)
|
||||
|
@ -184,7 +129,7 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
assertEquals(new ULocale("zh_CN"), matcher.getBestMatch("zh"));
|
||||
assertEquals(new ULocale("zh_CN"), matcher.getBestMatch("zh_Hans_CN"));
|
||||
assertEquals(new ULocale("zh_TW"), matcher.getBestMatch("zh_Hant_HK"));
|
||||
assertEquals(new ULocale("he"), matcher.getBestMatch("iw_IT"));
|
||||
assertEquals(new ULocale("iw"), matcher.getBestMatch("iw_IT"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -219,20 +164,8 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
@Test
|
||||
public void TestLocaleMatcherCoverage() {
|
||||
// Add tests for better code coverage
|
||||
LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(null, 0).build(), null);
|
||||
LocaleMatcher matcher = newLocaleMatcher(LocalePriorityList.add(null, 0).build());
|
||||
logln(matcher.toString());
|
||||
|
||||
LanguageMatcherData data = new LanguageMatcherData();
|
||||
|
||||
LanguageMatcherData clone = data.cloneAsThawed();
|
||||
|
||||
if (clone.equals(data)) {
|
||||
errln("Error cloneAsThawed() is equal.");
|
||||
}
|
||||
|
||||
if (data.isFrozen()) {
|
||||
errln("Error LocaleMatcherData is frozen!");
|
||||
}
|
||||
}
|
||||
|
||||
private void assertEquals(Object expected, Object string) {
|
||||
|
@ -251,17 +184,19 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
|
||||
static final ULocale ENGLISH_CANADA = new ULocale("en_CA");
|
||||
|
||||
private static double match(ULocale a, ULocale b) {
|
||||
final LocaleMatcher matcher = new LocaleMatcher("");
|
||||
return matcher.match(a, null, b, null);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMatch_exact() {
|
||||
assertEquals(1.0,
|
||||
LocaleMatcher.match(ENGLISH_CANADA, ENGLISH_CANADA));
|
||||
assertEquals(1.0, match(ENGLISH_CANADA, ENGLISH_CANADA));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMatch_none() {
|
||||
double match = LocaleMatcher.match(
|
||||
new ULocale("ar_MK"),
|
||||
ENGLISH_CANADA);
|
||||
double match = match(new ULocale("ar_MK"), ENGLISH_CANADA);
|
||||
assertTrue("Actual < 0: " + match, 0 <= match);
|
||||
assertTrue("Actual > 0.15 (~ language + script distance): " + match, 0.2 > match);
|
||||
}
|
||||
|
@ -270,13 +205,12 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
public void testMatch_matchOnMazimized() {
|
||||
ULocale undTw = new ULocale("und_TW");
|
||||
ULocale zhHant = new ULocale("zh_Hant");
|
||||
double matchZh = LocaleMatcher.match(undTw, new ULocale("zh"));
|
||||
double matchZhHant = LocaleMatcher.match(undTw, zhHant);
|
||||
double matchZh = match(undTw, new ULocale("zh"));
|
||||
double matchZhHant = match(undTw, zhHant);
|
||||
assertTrue("und_TW should be closer to zh_Hant (" + matchZhHant +
|
||||
") than to zh (" + matchZh + ")",
|
||||
matchZh < matchZhHant);
|
||||
double matchEnHantTw = LocaleMatcher.match(new ULocale("en_Hant_TW"),
|
||||
zhHant);
|
||||
double matchEnHantTw = match(new ULocale("en_Hant_TW"), zhHant);
|
||||
assertTrue("zh_Hant should be closer to und_TW (" + matchZhHant +
|
||||
") than to en_Hant_TW (" + matchEnHantTw + ")",
|
||||
matchEnHantTw < matchZhHant);
|
||||
|
@ -397,16 +331,9 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
assertEquals("it", matcher.getBestMatch("en").toString());
|
||||
}
|
||||
|
||||
// public void testGetBestMatch_emptyList() {
|
||||
// final LocaleMatcher matcher = newLocaleMatcher(
|
||||
// new LocalePriorityList(new HashMap()));
|
||||
// assertNull(matcher.getBestMatch(ULocale.ENGLISH));
|
||||
// }
|
||||
|
||||
@Test
|
||||
public void testGetBestMatch_googlePseudoLocales() {
|
||||
// Google pseudo locales are primarily based on variant subtags.
|
||||
// See http://sites/intl_eng/pseudo_locales.
|
||||
// (See below for the region code based fall back options.)
|
||||
final LocaleMatcher matcher = newLocaleMatcher(
|
||||
"fr, pt");
|
||||
|
@ -475,19 +402,25 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
check2(sorted);
|
||||
}
|
||||
|
||||
private static final ULocale posix = new ULocale("en_US_POSIX");
|
||||
|
||||
/**
|
||||
* @param sorted
|
||||
*/
|
||||
private void check2(Set<ULocale> sorted) {
|
||||
// TODO Auto-generated method stub
|
||||
logln("Checking: " + sorted);
|
||||
LocaleMatcher matcher = newLocaleMatcher(
|
||||
LocalePriorityList.add(
|
||||
sorted.toArray(new ULocale[sorted.size()]))
|
||||
.build());
|
||||
.build());
|
||||
for (ULocale loc : sorted) {
|
||||
String stringLoc = loc.toString();
|
||||
assertEquals(stringLoc, matcher.getBestMatch(stringLoc).toString());
|
||||
// The result may not be the exact same locale, but it must be equivalent.
|
||||
// Variants and extensions are ignored.
|
||||
if (loc.equals(posix)) { continue; }
|
||||
ULocale max = ULocale.addLikelySubtags(loc);
|
||||
ULocale best = matcher.getBestMatch(loc);
|
||||
ULocale maxBest = ULocale.addLikelySubtags(best);
|
||||
assertEquals(loc.toString(), max, maxBest);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -502,29 +435,8 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
}
|
||||
|
||||
|
||||
// public void testComputeDistance_monkeyTest() {
|
||||
// RegionCode[] codes = RegionCode.values();
|
||||
// Random random = new Random();
|
||||
// for (int i = 0; i < 1000; ++i) {
|
||||
// RegionCode x = codes[random.nextInt(codes.length)];
|
||||
// RegionCode y = codes[random.nextInt(codes.length)];
|
||||
// double d = LocaleMatcher.getRegionDistance(x, y, null, null);
|
||||
// if (x == RegionCode.ZZ || y == RegionCode.ZZ) {
|
||||
// assertEquals(LocaleMatcher.REGION_DISTANCE, d);
|
||||
// } else if (x == y) {
|
||||
// assertEquals(0.0, d);
|
||||
// } else {
|
||||
// assertTrue(d > 0);
|
||||
// assertTrue(d <= LocaleMatcher.REGION_DISTANCE);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
@Test
|
||||
public void testGetBestMatchForList_matchOnMaximized2() {
|
||||
// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
|
||||
// return;
|
||||
// }
|
||||
final LocaleMatcher matcher = newLocaleMatcher("fr, en-GB, ja, es-ES, es-MX");
|
||||
// ja-JP matches ja on likely subtags, and it's listed first, thus it wins over
|
||||
// thus it wins over the second preference en-GB.
|
||||
|
@ -537,9 +449,6 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
|
||||
@Test
|
||||
public void testGetBestMatchForList_closeEnoughMatchOnMaximized() {
|
||||
// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
|
||||
// return;
|
||||
// }
|
||||
final LocaleMatcher matcher = newLocaleMatcher("en-GB, en, de, fr, ja");
|
||||
assertEquals("de", matcher.getBestMatch("de-CH, fr").toString());
|
||||
assertEquals("en", matcher.getBestMatch("en-US, ar, nl, de, ja").toString());
|
||||
|
@ -547,23 +456,20 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
|
||||
@Test
|
||||
public void testGetBestMatchForPortuguese() {
|
||||
|
||||
// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
|
||||
// return;
|
||||
// }
|
||||
|
||||
final LocaleMatcher withPTExplicit = newLocaleMatcher("pt_PT, pt_BR, es, es_419");
|
||||
final LocaleMatcher withPTImplicit = newLocaleMatcher("pt_PT, pt, es, es_419");
|
||||
// Could happen because "pt_BR" is a tier_1 language and "pt_PT" is tier_2.
|
||||
|
||||
final LocaleMatcher withoutPT = newLocaleMatcher("pt_BR, es, es_419");
|
||||
// European user who prefers Spanish over Brazillian Portuguese as a fallback.
|
||||
// European user who prefers Spanish over Brazilian Portuguese as a fallback.
|
||||
|
||||
assertEquals("pt_PT", withPTExplicit.getBestMatch("pt_PT, es, pt").toString());
|
||||
assertEquals("pt_PT", withPTImplicit.getBestMatch("pt_PT, es, pt").toString());
|
||||
assertEquals("es", withoutPT.getBestMatch("pt_PT, es, pt").toString());
|
||||
// The earlier pt_PT vs. pt_BR region mismatch is as good as the later es perfect match
|
||||
// because of the demotion per desired locale.
|
||||
assertEquals("pt_BR", withoutPT.getBestMatch("pt_PT, es, pt").toString());
|
||||
|
||||
// Brazillian user who prefers South American Spanish over European Portuguese as a fallback.
|
||||
// Brazilian user who prefers South American Spanish over European Portuguese as a fallback.
|
||||
// The asymmetry between this case and above is because it's "pt_PT" that's missing between the
|
||||
// matchers as "pt_BR" is a much more common language.
|
||||
assertEquals("pt_BR", withPTExplicit.getBestMatch("pt, es_419, pt_PT").toString());
|
||||
|
@ -578,9 +484,6 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
|
||||
@Test
|
||||
public void testVariantWithScriptMatch() {
|
||||
// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
|
||||
// return;
|
||||
// }
|
||||
final LocaleMatcher matcher = newLocaleMatcher("fr, en, sv");
|
||||
assertEquals("en", matcher.getBestMatch("en-GB").toString());
|
||||
assertEquals("en", matcher.getBestMatch("en-GB, sv").toString());
|
||||
|
@ -588,54 +491,10 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
|
||||
@Test
|
||||
public void testVariantWithScriptMatch2() {
|
||||
// if (logKnownIssue("Cldrbug:8811", "Problems with LocaleMatcher test")) {
|
||||
// return;
|
||||
// }
|
||||
final LocaleMatcher matcher = newLocaleMatcher("en, sv");
|
||||
assertEquals("en", matcher.getBestMatch("en-GB, sv").toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPerf() {
|
||||
if (LANGUAGE_MATCHER_DATA == null) {
|
||||
return; // skip except when testing data
|
||||
}
|
||||
final String desired = "sv, en";
|
||||
|
||||
final LocaleMatcher matcherShort = newLocaleMatcher(desired);
|
||||
final LocaleMatcher matcherLong = newLocaleMatcher("af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, zh-CN, zh-TW, zu");
|
||||
final LocaleMatcher matcherVeryLong = newLocaleMatcher("af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_001, en_150, en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA");
|
||||
|
||||
//LocaleMatcher.DEBUG = true;
|
||||
ULocale expected = new ULocale("sv");
|
||||
assertEquals(expected, matcherShort.getBestMatch(desired));
|
||||
assertEquals(expected, matcherLong.getBestMatch(desired));
|
||||
assertEquals(expected, matcherVeryLong.getBestMatch(desired));
|
||||
//LocaleMatcher.DEBUG = false;
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
int iterations = i == 0 ? 1000 : 100000;
|
||||
boolean showMessage = i != 0;
|
||||
long timeShort = timeLocaleMatcher("Duration (few supported):\t", desired, matcherShort, showMessage, iterations, 0);
|
||||
@SuppressWarnings("unused")
|
||||
long timeMedium = timeLocaleMatcher("Duration (med. supported):\t", desired, matcherLong, showMessage, iterations, timeShort);
|
||||
@SuppressWarnings("unused")
|
||||
long timeLong = timeLocaleMatcher("Duration (many supported):\t", desired, matcherVeryLong, showMessage, iterations, timeShort);
|
||||
}
|
||||
}
|
||||
|
||||
private long timeLocaleMatcher(String title, String desired, LocaleMatcher matcher,
|
||||
boolean showmessage, int iterations, long comparisonTime) {
|
||||
long start = System.nanoTime();
|
||||
for (int i = iterations; i > 0; --i) {
|
||||
matcher.getBestMatch(desired);
|
||||
}
|
||||
long delta = System.nanoTime() - start;
|
||||
if (showmessage) warnln(title + (delta / iterations) + " nanos, "
|
||||
+ (comparisonTime > 0 ? (delta * 100 / comparisonTime - 100) + "% longer" : ""));
|
||||
return delta;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void Test8288() {
|
||||
final LocaleMatcher matcher = newLocaleMatcher("it, en");
|
||||
|
@ -644,24 +503,403 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void TestTechPreview() {
|
||||
final LocaleMatcher matcher = newLocaleMatcher("it, en, ru");
|
||||
ULocale und = new ULocale("und");
|
||||
ULocale bulgarian = new ULocale("bg");
|
||||
ULocale russian = new ULocale("ru");
|
||||
public void testDemotion() {
|
||||
LocalePriorityList supported = LocalePriorityList.add("fr, de-CH, it").build();
|
||||
LocalePriorityList desired = LocalePriorityList.add("fr-CH, de-CH, it").build();
|
||||
LocaleMatcher noDemotion = LocaleMatcher.builder().
|
||||
setSupportedULocales(supported.getULocales()).
|
||||
setDemotionPerDesiredLocale(LocaleMatcher.Demotion.NONE).build();
|
||||
assertEquals("no demotion", new ULocale("de-CH"), noDemotion.getBestMatch(desired));
|
||||
|
||||
Output<ULocale> outputBestDesired = new Output<>();
|
||||
LocaleMatcher regionDemotion = LocaleMatcher.builder().
|
||||
setSupportedULocales(supported.getULocales()).
|
||||
setDemotionPerDesiredLocale(LocaleMatcher.Demotion.REGION).build();
|
||||
assertEquals("region demotion", ULocale.FRENCH, regionDemotion.getBestMatch(desired));
|
||||
}
|
||||
|
||||
ULocale best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, ULocale.GERMAN)), outputBestDesired);
|
||||
assertEquals(ULocale.ITALIAN, best);
|
||||
assertEquals(null, outputBestDesired.value);
|
||||
private static final class PerfCase {
|
||||
ULocale desired;
|
||||
ULocale expectedShort;
|
||||
ULocale expectedLong;
|
||||
ULocale expectedVeryLong;
|
||||
|
||||
matcher.setDefaultLanguage(ULocale.JAPANESE);
|
||||
best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, ULocale.GERMAN)), outputBestDesired);
|
||||
assertEquals(ULocale.JAPANESE, best);
|
||||
PerfCase(String des, String expShort, String expLong, String expVeryLong) {
|
||||
desired = new ULocale(des);
|
||||
expectedShort = new ULocale(expShort);
|
||||
expectedLong = new ULocale(expLong);
|
||||
expectedVeryLong = new ULocale(expVeryLong);
|
||||
}
|
||||
}
|
||||
|
||||
matcher.setFavorScript(true);
|
||||
best = matcher.getBestMatch(new LinkedHashSet(Arrays.asList(und, bulgarian)), outputBestDesired);
|
||||
assertEquals(russian, best);
|
||||
private static final int WARM_UP_ITERATIONS = 1000;
|
||||
private static final int BENCHMARK_ITERATIONS = 20000;
|
||||
|
||||
@Test
|
||||
public void testPerf() {
|
||||
final String shortList = "en, sv";
|
||||
final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
|
||||
"el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
|
||||
"hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
|
||||
"mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
|
||||
"si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
|
||||
"zh-CN, zh-TW, zu";
|
||||
final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, " +
|
||||
"ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, " +
|
||||
"ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, " +
|
||||
"ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, " +
|
||||
"ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, " +
|
||||
"bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, " +
|
||||
"bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, " +
|
||||
"bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, " +
|
||||
"ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, " +
|
||||
"cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, " +
|
||||
"de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, " +
|
||||
// removed en_001 to avoid exact match
|
||||
"ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_150, " +
|
||||
"en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, " +
|
||||
"en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, " +
|
||||
"en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, " +
|
||||
"en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, " +
|
||||
"en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, " +
|
||||
"en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, " +
|
||||
"en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, " +
|
||||
"en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, " +
|
||||
"en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, " +
|
||||
"en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, " +
|
||||
"es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, " +
|
||||
"es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, " +
|
||||
"et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, " +
|
||||
"ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, " +
|
||||
"fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, " +
|
||||
"fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, " +
|
||||
"fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, " +
|
||||
"fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, " +
|
||||
"fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, " +
|
||||
"gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, " +
|
||||
"he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, " +
|
||||
"id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, " +
|
||||
"jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, " +
|
||||
"kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, " +
|
||||
"kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, " +
|
||||
"ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, " +
|
||||
"lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, " +
|
||||
"lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, " +
|
||||
"lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, " +
|
||||
"mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, " +
|
||||
"ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, " +
|
||||
"nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, " +
|
||||
"nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, " +
|
||||
"nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, " +
|
||||
"pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, " +
|
||||
"pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, " +
|
||||
"rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, " +
|
||||
"ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, " +
|
||||
"sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, " +
|
||||
"shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, " +
|
||||
"smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, " +
|
||||
"sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, " +
|
||||
"sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, " +
|
||||
"sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, " +
|
||||
"teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, " +
|
||||
"tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, " +
|
||||
"uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, " +
|
||||
"vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, " +
|
||||
"wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, " +
|
||||
"zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, " +
|
||||
"zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA";
|
||||
|
||||
final LocaleMatcher matcherShort = newLocaleMatcher(shortList);
|
||||
final LocaleMatcher matcherLong = newLocaleMatcher(longList);
|
||||
final LocaleMatcher matcherVeryLong = newLocaleMatcher(veryLongList);
|
||||
|
||||
PerfCase[] pcs = new PerfCase[] {
|
||||
// Exact match in all matchers.
|
||||
new PerfCase("sv", "sv", "sv", "sv"),
|
||||
// Common locale, exact match only in very long list.
|
||||
new PerfCase("fr_CA", "en", "fr", "fr_CA"),
|
||||
// Unusual locale, no exact match.
|
||||
new PerfCase("de_CA", "en", "de", "de"),
|
||||
// World English maps to several region partitions.
|
||||
new PerfCase("en_001", "en", "en", "en"),
|
||||
// Ancient language with interesting subtags.
|
||||
new PerfCase("egy_Copt_CY", "en", "af", "af")
|
||||
};
|
||||
|
||||
for (PerfCase pc : pcs) {
|
||||
final ULocale desired = pc.desired;
|
||||
|
||||
assertEquals(desired.toString(), pc.expectedShort, matcherShort.getBestMatch(desired));
|
||||
assertEquals(desired.toString(), pc.expectedLong, matcherLong.getBestMatch(desired));
|
||||
assertEquals(desired.toString(), pc.expectedVeryLong, matcherVeryLong.getBestMatch(desired));
|
||||
|
||||
timeLocaleMatcher(desired, matcherShort, WARM_UP_ITERATIONS);
|
||||
timeLocaleMatcher(desired, matcherLong, WARM_UP_ITERATIONS);
|
||||
timeLocaleMatcher(desired, matcherVeryLong, WARM_UP_ITERATIONS);
|
||||
long tns = timeLocaleMatcher(desired, matcherShort, BENCHMARK_ITERATIONS);
|
||||
System.out.format("New Duration (few supported):\t%s\t%d\tnanos\n", desired, tns);
|
||||
long tnl = timeLocaleMatcher(desired, matcherLong, BENCHMARK_ITERATIONS);
|
||||
System.out.format("New Duration (med. supported):\t%s\t%d\tnanos\n", desired, tnl);
|
||||
long tnv = timeLocaleMatcher(desired, matcherVeryLong, BENCHMARK_ITERATIONS);
|
||||
System.out.format("New Duration (many supported):\t%s\t%d\tnanos\n", desired, tnv);
|
||||
}
|
||||
|
||||
maximizePerf();
|
||||
}
|
||||
|
||||
private static long timeLocaleMatcher(ULocale desired, LocaleMatcher matcher, int iterations) {
|
||||
long start = System.nanoTime();
|
||||
for (int i = iterations; i > 0; --i) {
|
||||
matcher.getBestMatch(desired);
|
||||
}
|
||||
long delta = System.nanoTime() - start;
|
||||
return (delta / iterations);
|
||||
}
|
||||
|
||||
private void maximizePerf() {
|
||||
final String tags = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
|
||||
"el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
|
||||
"hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
|
||||
"mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
|
||||
"si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
|
||||
"zh-CN, zh-TW, zu";
|
||||
LocalePriorityList list = LocalePriorityList.add(tags).build();
|
||||
int few = 1000;
|
||||
long t = timeMaximize(list, few); // warm up
|
||||
t = timeMaximize(list, few); // measure for scale
|
||||
long targetTime = 100000000L; // 10^8 ns = 0.1s
|
||||
int iterations = (int)((targetTime * few) / t);
|
||||
t = timeMaximize(list, iterations);
|
||||
int length = 0;
|
||||
for (@SuppressWarnings("unused") ULocale locale : list) { ++length; }
|
||||
System.out.println("maximize: " + (t / iterations / length) + " ns/locale: " +
|
||||
t + " ns / " + iterations + " iterations / " + length + " locales");
|
||||
}
|
||||
|
||||
// returns total ns not per iteration
|
||||
private static long timeMaximize(Iterable<ULocale> list, int iterations) {
|
||||
long start = System.nanoTime();
|
||||
for (int i = iterations; i > 0; --i) {
|
||||
for (ULocale locale : list) {
|
||||
XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
|
||||
}
|
||||
}
|
||||
return System.nanoTime() - start;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLikelySubtagsLoadedDataSameAsBuiltFromScratch() {
|
||||
XLikelySubtags.Data built = LikelySubtagsBuilder.build();
|
||||
XLikelySubtags.Data loaded = XLikelySubtags.Data.load();
|
||||
assertEquals("run LocaleDistanceBuilder and update ICU4C langInfo.txt", built, loaded);
|
||||
}
|
||||
|
||||
private static final class TestCase implements Cloneable {
|
||||
private static final String ENDL = System.getProperties().getProperty("line.separator");
|
||||
|
||||
int lineNr = 0;
|
||||
|
||||
String nameLine = "";
|
||||
String supportedLine = "";
|
||||
String defaultLine = "";
|
||||
String distanceLine = "";
|
||||
String thresholdLine = "";
|
||||
String matchLine = "";
|
||||
|
||||
String supported = "";
|
||||
String def = "";
|
||||
String favor = "";
|
||||
String threshold = "";
|
||||
String desired = "";
|
||||
String expMatch = "";
|
||||
String expDesired = "";
|
||||
String expCombined = "";
|
||||
|
||||
@Override
|
||||
public TestCase clone() throws CloneNotSupportedException {
|
||||
return (TestCase) super.clone();
|
||||
}
|
||||
|
||||
void reset(String newNameLine) {
|
||||
nameLine = newNameLine;
|
||||
supportedLine = "";
|
||||
defaultLine = "";
|
||||
distanceLine = "";
|
||||
thresholdLine = "";
|
||||
|
||||
supported = "";
|
||||
def = "";
|
||||
favor = "";
|
||||
threshold = "";
|
||||
}
|
||||
|
||||
String toInputsKey() {
|
||||
return supported + '+' + def + '+' + favor + '+' + threshold + '+' + desired;
|
||||
}
|
||||
|
||||
private static void appendLine(StringBuilder sb, String line) {
|
||||
if (!line.isEmpty()) {
|
||||
sb.append(ENDL).append(line);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder(nameLine);
|
||||
appendLine(sb, supportedLine);
|
||||
appendLine(sb, defaultLine);
|
||||
appendLine(sb, distanceLine);
|
||||
appendLine(sb, thresholdLine);
|
||||
sb.append(ENDL).append("line ").append(lineNr).append(':');
|
||||
appendLine(sb, matchLine);
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
private static String getSuffixAfterPrefix(String s, int limit, String prefix) {
|
||||
if (prefix.length() <= limit && s.startsWith(prefix)) {
|
||||
return s.substring(prefix.length(), limit);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// UsedReflectively, not private to avoid unused-warning
|
||||
static List<TestCase> readTestCases() throws Exception {
|
||||
List<TestCase> tests = new ArrayList<>();
|
||||
Map<String, Integer> uniqueTests = new HashMap<>();
|
||||
TestCase test = new TestCase();
|
||||
String filename = "data/localeMatcherTest.txt";
|
||||
try (BufferedReader in = FileUtilities.openFile(LocaleMatcherTest.class, filename)) {
|
||||
String line;
|
||||
while ((line = in.readLine()) != null) {
|
||||
++test.lineNr;
|
||||
// Start of comment, or end of line, minus trailing spaces.
|
||||
int limit = line.indexOf('#');
|
||||
if (limit < 0) {
|
||||
limit = line.length();
|
||||
}
|
||||
char c;
|
||||
while (limit > 0 && ((c = line.charAt(limit - 1)) == ' ' || c == '\t')) {
|
||||
--limit;
|
||||
}
|
||||
if (limit == 0) { // empty line
|
||||
continue;
|
||||
}
|
||||
String suffix;
|
||||
if (line.startsWith("** test: ")) {
|
||||
test.reset(line);
|
||||
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@supported=")) != null) {
|
||||
test.supportedLine = line;
|
||||
test.supported = suffix;
|
||||
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@default=")) != null) {
|
||||
test.defaultLine = line;
|
||||
test.def = suffix;
|
||||
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@favor=")) != null) {
|
||||
test.distanceLine = line;
|
||||
test.favor = suffix;
|
||||
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@threshold=")) != null) {
|
||||
test.thresholdLine = line;
|
||||
test.threshold = suffix;
|
||||
} else {
|
||||
int matchSep = line.indexOf(">>");
|
||||
// >> before an inline comment, and followed by more than white space.
|
||||
if (0 <= matchSep && (matchSep + 2) < limit) {
|
||||
test.matchLine = line;
|
||||
test.desired = line.substring(0, matchSep).trim();
|
||||
test.expDesired = test.expCombined = "";
|
||||
int start = matchSep + 2;
|
||||
int expLimit = line.indexOf('|', start);
|
||||
if (expLimit < 0) {
|
||||
test.expMatch = line.substring(start, limit).trim();
|
||||
} else {
|
||||
test.expMatch = line.substring(start, expLimit).trim();
|
||||
start = expLimit + 1;
|
||||
expLimit = line.indexOf('|', start);
|
||||
if (expLimit < 0) {
|
||||
test.expDesired = line.substring(start, limit).trim();
|
||||
} else {
|
||||
test.expDesired = line.substring(start, expLimit).trim();
|
||||
test.expCombined = line.substring(expLimit + 1, limit).trim();
|
||||
}
|
||||
}
|
||||
String inputs = test.toInputsKey();
|
||||
Integer prevIndex = uniqueTests.get(inputs);
|
||||
if (prevIndex == null) {
|
||||
uniqueTests.put(inputs, tests.size());
|
||||
} else {
|
||||
System.out.println("Locale matcher test case on line " + test.lineNr
|
||||
+ " is a duplicate of line " + tests.get(prevIndex).lineNr);
|
||||
}
|
||||
tests.add(test.clone());
|
||||
} else {
|
||||
throw new IllegalArgumentException("test data syntax error on line "
|
||||
+ test.lineNr + "\n" + line);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
System.out.println("Number of duplicate locale matcher test cases: " + (tests.size() - uniqueTests.size()));
|
||||
return tests;
|
||||
}
|
||||
|
||||
private static ULocale getULocaleOrNull(String s) {
|
||||
if (s.equals("null")) {
|
||||
return null;
|
||||
} else {
|
||||
return new ULocale(s);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@Parameters(method = "readTestCases")
|
||||
public void dataDriven(TestCase test) {
|
||||
LocaleMatcher matcher;
|
||||
if (test.def.isEmpty() && test.favor.isEmpty() && test.threshold.isEmpty()) {
|
||||
matcher = new LocaleMatcher(test.supported);
|
||||
} else {
|
||||
LocaleMatcher.Builder builder = LocaleMatcher.builder();
|
||||
builder.setSupportedLocales(test.supported);
|
||||
if (!test.def.isEmpty()) {
|
||||
builder.setDefaultULocale(new ULocale(test.def));
|
||||
}
|
||||
if (!test.favor.isEmpty()) {
|
||||
FavorSubtag favor;
|
||||
switch (test.favor) {
|
||||
case "normal":
|
||||
favor = FavorSubtag.LANGUAGE;
|
||||
break;
|
||||
case "script":
|
||||
favor = FavorSubtag.SCRIPT;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("unsupported FavorSubtag value " + test.favor);
|
||||
}
|
||||
builder.setFavorSubtag(favor);
|
||||
}
|
||||
if (!test.threshold.isEmpty()) {
|
||||
int threshold = Integer.valueOf(test.threshold);
|
||||
builder.internalSetThresholdDistance(threshold);
|
||||
}
|
||||
matcher = builder.build();
|
||||
}
|
||||
|
||||
ULocale expMatch = getULocaleOrNull(test.expMatch);
|
||||
if (test.expDesired.isEmpty() && test.expCombined.isEmpty()) {
|
||||
ULocale bestSupported = matcher.getBestMatch(test.desired);
|
||||
assertEquals("bestSupported", expMatch, bestSupported);
|
||||
} else {
|
||||
LocalePriorityList desired = LocalePriorityList.add(test.desired).build();
|
||||
LocaleMatcher.Result result = matcher.getBestMatchResult(desired);
|
||||
assertEquals("bestSupported", expMatch, result.getSupportedULocale());
|
||||
if (!test.expDesired.isEmpty()) {
|
||||
ULocale expDesired = getULocaleOrNull(test.expDesired);
|
||||
assertEquals("bestDesired", expDesired, result.getDesiredULocale());
|
||||
}
|
||||
if (!test.expCombined.isEmpty()) {
|
||||
ULocale expCombined = getULocaleOrNull(test.expCombined);
|
||||
ULocale combined = result.makeServiceULocale();
|
||||
assertEquals("combined", expCombined, combined);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,612 +0,0 @@
|
|||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.dev.test.util;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.impl.locale.LocaleDistance;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.FileUtilities;
|
||||
import com.ibm.icu.impl.locale.XLikelySubtags;
|
||||
import com.ibm.icu.impl.locale.XLocaleMatcher;
|
||||
import com.ibm.icu.impl.locale.XLocaleMatcher.FavorSubtag;
|
||||
import com.ibm.icu.util.LocaleMatcher;
|
||||
import com.ibm.icu.util.LocalePriorityList;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
import junitparams.JUnitParamsRunner;
|
||||
import junitparams.Parameters;
|
||||
|
||||
/**
|
||||
* Test the XLocaleMatcher.
|
||||
*
|
||||
* @author markdavis
|
||||
*/
|
||||
@RunWith(JUnitParamsRunner.class)
|
||||
public class XLocaleMatcherTest extends TestFmwk {
|
||||
private static final int REGION_DISTANCE = 4;
|
||||
|
||||
private static final LocaleDistance LANGUAGE_MATCHER_DATA = LocaleDistance.INSTANCE;
|
||||
|
||||
private XLocaleMatcher newXLocaleMatcher() {
|
||||
return new XLocaleMatcher("");
|
||||
}
|
||||
|
||||
private XLocaleMatcher newXLocaleMatcher(LocalePriorityList build) {
|
||||
return new XLocaleMatcher(build);
|
||||
}
|
||||
|
||||
private XLocaleMatcher newXLocaleMatcher(String string) {
|
||||
return new XLocaleMatcher(LocalePriorityList.add(string).build());
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
private XLocaleMatcher newXLocaleMatcher(LocalePriorityList list, int d) {
|
||||
return XLocaleMatcher.builder().setSupportedULocales(list.getULocales()).
|
||||
internalSetThresholdDistance(d).build();
|
||||
}
|
||||
|
||||
// public void testParentLocales() {
|
||||
// // find all the regions that have a closer relation because of an explicit parent
|
||||
// Set<String> explicitParents = new HashSet<>(INFO.getExplicitParents());
|
||||
// explicitParents.remove("root");
|
||||
// Set<String> otherParents = new HashSet<>(INFO.getExplicitParents());
|
||||
// for (String locale : explicitParents) {
|
||||
// while (true) {
|
||||
// locale = LocaleIDParser.getParent(locale);
|
||||
// if (locale == null || locale.equals("root")) {
|
||||
// break;
|
||||
// }
|
||||
// otherParents.add(locale);
|
||||
// }
|
||||
// }
|
||||
// otherParents.remove("root");
|
||||
//
|
||||
// for (String locale : CONFIG.getCldrFactory().getAvailable()) {
|
||||
// String parentId = LocaleIDParser.getParent(locale);
|
||||
// String parentIdSimple = LocaleIDParser.getSimpleParent(locale);
|
||||
// if (!explicitParents.contains(parentId) && !otherParents.contains(parentIdSimple)) {
|
||||
// continue;
|
||||
// }
|
||||
// System.out.println(locale + "\t" + CONFIG.getEnglish().getName(locale) + "\t" + parentId + "\t" + parentIdSimple);
|
||||
// }
|
||||
// }
|
||||
|
||||
|
||||
// TBD reenable with override data
|
||||
// public void testOverrideData() {
|
||||
// double threshold = 0.05;
|
||||
// XLocaleDistance XLocaleMatcherData = new XLocaleDistance()
|
||||
// .addDistance("br", "fr", 10, true)
|
||||
// .addDistance("es", "cy", 10, true);
|
||||
// logln(XLocaleMatcherData.toString());
|
||||
//
|
||||
// final XLocaleMatcher matcher = newXLocaleMatcher(
|
||||
// LocalePriorityList
|
||||
// .add(ULocale.ENGLISH)
|
||||
// .add(ULocale.FRENCH)
|
||||
// .add(ULocale.UK)
|
||||
// .build(), XLocaleMatcherData, threshold);
|
||||
// logln(matcher.toString());
|
||||
//
|
||||
// assertEquals(ULocale.FRENCH, matcher.getBestMatch(new ULocale("br")));
|
||||
// assertEquals(ULocale.ENGLISH, matcher.getBestMatch(new ULocale("es"))); // one
|
||||
// // way
|
||||
// }
|
||||
|
||||
|
||||
/**
|
||||
* If all the base languages are the same, then each sublocale matches
|
||||
* itself most closely
|
||||
*/
|
||||
@Test
|
||||
public void testExactMatches() {
|
||||
String lastBase = "";
|
||||
TreeSet<ULocale> sorted = new TreeSet<>();
|
||||
for (ULocale loc : ULocale.getAvailableLocales()) {
|
||||
String language = loc.getLanguage();
|
||||
if (!lastBase.equals(language)) {
|
||||
check(sorted);
|
||||
sorted.clear();
|
||||
lastBase = language;
|
||||
}
|
||||
sorted.add(loc);
|
||||
}
|
||||
check(sorted);
|
||||
}
|
||||
|
||||
private void check(Set<ULocale> sorted) {
|
||||
if (sorted.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
check2(sorted);
|
||||
ULocale first = sorted.iterator().next();
|
||||
ULocale max = ULocale.addLikelySubtags(first);
|
||||
sorted.add(max);
|
||||
check2(sorted);
|
||||
}
|
||||
|
||||
private static final ULocale posix = new ULocale("en_US_POSIX");
|
||||
|
||||
/**
|
||||
* @param sorted
|
||||
*/
|
||||
private void check2(Set<ULocale> sorted) {
|
||||
logln("Checking: " + sorted);
|
||||
XLocaleMatcher matcher = newXLocaleMatcher(
|
||||
LocalePriorityList.add(
|
||||
sorted.toArray(new ULocale[sorted.size()]))
|
||||
.build());
|
||||
for (ULocale loc : sorted) {
|
||||
// The result may not be the exact same locale, but it must be equivalent.
|
||||
// Variants and extensions are ignored.
|
||||
if (loc.equals(posix)) { continue; }
|
||||
ULocale max = ULocale.addLikelySubtags(loc);
|
||||
ULocale best = matcher.getBestMatch(loc);
|
||||
ULocale maxBest = ULocale.addLikelySubtags(best);
|
||||
assertEquals(loc.toString(), max, maxBest);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDemotion() {
|
||||
LocalePriorityList supported = LocalePriorityList.add("fr, de-CH, it").build();
|
||||
LocalePriorityList desired = LocalePriorityList.add("fr-CH, de-CH, it").build();
|
||||
XLocaleMatcher noDemotion = XLocaleMatcher.builder().
|
||||
setSupportedULocales(supported.getULocales()).
|
||||
setDemotionPerDesiredLocale(XLocaleMatcher.Demotion.NONE).build();
|
||||
assertEquals("no demotion", new ULocale("de-CH"), noDemotion.getBestMatch(desired));
|
||||
|
||||
XLocaleMatcher regionDemotion = XLocaleMatcher.builder().
|
||||
setSupportedULocales(supported.getULocales()).
|
||||
setDemotionPerDesiredLocale(XLocaleMatcher.Demotion.REGION).build();
|
||||
assertEquals("region demotion", ULocale.FRENCH, regionDemotion.getBestMatch(desired));
|
||||
}
|
||||
|
||||
private static final class PerfCase {
|
||||
ULocale desired;
|
||||
ULocale expectedShort;
|
||||
ULocale expectedLong;
|
||||
ULocale expectedVeryLong;
|
||||
|
||||
PerfCase(String des, String expShort, String expLong, String expVeryLong) {
|
||||
desired = new ULocale(des);
|
||||
expectedShort = new ULocale(expShort);
|
||||
expectedLong = new ULocale(expLong);
|
||||
expectedVeryLong = new ULocale(expVeryLong);
|
||||
}
|
||||
}
|
||||
|
||||
private static final int WARM_UP_ITERATIONS = 1000;
|
||||
private static final int BENCHMARK_ITERATIONS = 20000;
|
||||
private static final int AVG_PCT_MEDIUM_NEW_OLD = 33;
|
||||
private static final int AVG_PCT_LONG_NEW_OLD = 80;
|
||||
|
||||
@Test
|
||||
public void testPerf() {
|
||||
if (LANGUAGE_MATCHER_DATA == null) {
|
||||
return; // skip except when testing data
|
||||
}
|
||||
|
||||
final String shortList = "en, sv";
|
||||
final String longList = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
|
||||
"el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
|
||||
"hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
|
||||
"mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
|
||||
"si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
|
||||
"zh-CN, zh-TW, zu";
|
||||
final String veryLongList = "af, af_NA, af_ZA, agq, agq_CM, ak, ak_GH, am, am_ET, " +
|
||||
"ar, ar_001, ar_AE, ar_BH, ar_DJ, ar_DZ, ar_EG, ar_EH, ar_ER, ar_IL, ar_IQ, " +
|
||||
"ar_JO, ar_KM, ar_KW, ar_LB, ar_LY, ar_MA, ar_MR, ar_OM, ar_PS, ar_QA, " +
|
||||
"ar_SA, ar_SD, ar_SO, ar_SS, ar_SY, ar_TD, ar_TN, ar_YE, as, as_IN, asa, asa_TZ, " +
|
||||
"ast, ast_ES, az, az_Cyrl, az_Cyrl_AZ, az_Latn, az_Latn_AZ, " +
|
||||
"bas, bas_CM, be, be_BY, bem, bem_ZM, bez, bez_TZ, bg, bg_BG, bm, bm_ML, " +
|
||||
"bn, bn_BD, bn_IN, bo, bo_CN, bo_IN, br, br_FR, brx, brx_IN, " +
|
||||
"bs, bs_Cyrl, bs_Cyrl_BA, bs_Latn, bs_Latn_BA, ca, ca_AD, ca_ES, ca_ES_VALENCIA, " +
|
||||
"ca_FR, ca_IT, ce, ce_RU, cgg, cgg_UG, chr, chr_US, ckb, ckb_IQ, ckb_IR, cs, cs_CZ, " +
|
||||
"cu, cu_RU, cy, cy_GB, da, da_DK, da_GL, dav, dav_KE, de, de_AT, de_BE, de_CH, " +
|
||||
"de_DE, de_LI, de_LU, dje, dje_NE, dsb, dsb_DE, dua, dua_CM, dyo, dyo_SN, dz, dz_BT, " +
|
||||
// removed en_001 to avoid exact match
|
||||
"ebu, ebu_KE, ee, ee_GH, ee_TG, el, el_CY, el_GR, en, en_150, " +
|
||||
"en_AG, en_AI, en_AS, en_AT, en_AU, en_BB, en_BE, en_BI, en_BM, en_BS, en_BW, " +
|
||||
"en_BZ, en_CA, en_CC, en_CH, en_CK, en_CM, en_CX, en_CY, en_DE, en_DG, en_DK, " +
|
||||
"en_DM, en_ER, en_FI, en_FJ, en_FK, en_FM, en_GB, en_GD, en_GG, en_GH, en_GI, " +
|
||||
"en_GM, en_GU, en_GY, en_HK, en_IE, en_IL, en_IM, en_IN, en_IO, en_JE, en_JM, " +
|
||||
"en_KE, en_KI, en_KN, en_KY, en_LC, en_LR, en_LS, en_MG, en_MH, en_MO, en_MP, " +
|
||||
"en_MS, en_MT, en_MU, en_MW, en_MY, en_NA, en_NF, en_NG, en_NL, en_NR, en_NU, " +
|
||||
"en_NZ, en_PG, en_PH, en_PK, en_PN, en_PR, en_PW, en_RW, en_SB, en_SC, en_SD, " +
|
||||
"en_SE, en_SG, en_SH, en_SI, en_SL, en_SS, en_SX, en_SZ, en_TC, en_TK, en_TO, " +
|
||||
"en_TT, en_TV, en_TZ, en_UG, en_UM, en_US, en_US_POSIX, en_VC, en_VG, en_VI, " +
|
||||
"en_VU, en_WS, en_ZA, en_ZM, en_ZW, eo, eo_001, es, es_419, es_AR, es_BO, es_CL, " +
|
||||
"es_CO, es_CR, es_CU, es_DO, es_EA, es_EC, es_ES, es_GQ, es_GT, es_HN, es_IC, " +
|
||||
"es_MX, es_NI, es_PA, es_PE, es_PH, es_PR, es_PY, es_SV, es_US, es_UY, es_VE, " +
|
||||
"et, et_EE, eu, eu_ES, ewo, ewo_CM, fa, fa_AF, fa_IR, ff, ff_CM, ff_GN, ff_MR, " +
|
||||
"ff_SN, fi, fi_FI, fil, fil_PH, fo, fo_DK, fo_FO, fr, fr_BE, fr_BF, fr_BI, fr_BJ, " +
|
||||
"fr_BL, fr_CA, fr_CD, fr_CF, fr_CG, fr_CH, fr_CI, fr_CM, fr_DJ, fr_DZ, " +
|
||||
"fr_FR, fr_GA, fr_GF, fr_GN, fr_GP, fr_GQ, fr_HT, fr_KM, fr_LU, fr_MA, fr_MC, " +
|
||||
"fr_MF, fr_MG, fr_ML, fr_MQ, fr_MR, fr_MU, fr_NC, fr_NE, fr_PF, fr_PM, fr_RE, " +
|
||||
"fr_RW, fr_SC, fr_SN, fr_SY, fr_TD, fr_TG, fr_TN, fr_VU, fr_WF, fr_YT, " +
|
||||
"fur, fur_IT, fy, fy_NL, ga, ga_IE, gd, gd_GB, gl, gl_ES, gsw, gsw_CH, gsw_FR, " +
|
||||
"gsw_LI, gu, gu_IN, guz, guz_KE, gv, gv_IM, ha, ha_GH, ha_NE, ha_NG, haw, haw_US, " +
|
||||
"he, he_IL, hi, hi_IN, hr, hr_BA, hr_HR, hsb, hsb_DE, hu, hu_HU, hy, hy_AM, " +
|
||||
"id, id_ID, ig, ig_NG, ii, ii_CN, is, is_IS, it, it_CH, it_IT, it_SM, ja, ja_JP, " +
|
||||
"jgo, jgo_CM, jmc, jmc_TZ, ka, ka_GE, kab, kab_DZ, kam, kam_KE, kde, kde_TZ, " +
|
||||
"kea, kea_CV, khq, khq_ML, ki, ki_KE, kk, kk_KZ, kkj, kkj_CM, kl, kl_GL, " +
|
||||
"kln, kln_KE, km, km_KH, kn, kn_IN, ko, ko_KP, ko_KR, kok, kok_IN, " +
|
||||
"ks, ks_IN, ksb, ksb_TZ, ksf, ksf_CM, ksh, ksh_DE, kw, kw_GB, ky, ky_KG, " +
|
||||
"lag, lag_TZ, lb, lb_LU, lg, lg_UG, lkt, lkt_US, ln, ln_AO, ln_CD, ln_CF, ln_CG, " +
|
||||
"lo, lo_LA, lrc, lrc_IQ, lrc_IR, lt, lt_LT, lu, lu_CD, luo, luo_KE, luy, luy_KE, " +
|
||||
"lv, lv_LV, mas, mas_KE, mas_TZ, mer, mer_KE, mfe, mfe_MU, mg, mg_MG, " +
|
||||
"mgh, mgh_MZ, mgo, mgo_CM, mk, mk_MK, ml, ml_IN, mn, mn_MN, mr, mr_IN, ms, ms_BN, " +
|
||||
"ms_MY, ms_SG, mt, mt_MT, mua, mua_CM, my, my_MM, mzn, mzn_IR, naq, naq_NA, " +
|
||||
"nb, nb_NO, nb_SJ, nd, nd_ZW, ne, ne_IN, ne_NP, nl, nl_AW, nl_BE, nl_BQ, nl_CW, " +
|
||||
"nl_NL, nl_SR, nl_SX, nmg, nmg_CM, nn, nn_NO, nnh, nnh_CM, nus, nus_SS, nyn, " +
|
||||
"nyn_UG, om, om_ET, om_KE, or, or_IN, os, os_GE, os_RU, pa, pa_Arab, pa_Arab_PK, " +
|
||||
"pa_Guru, pa_Guru_IN, pl, pl_PL, prg, prg_001, ps, ps_AF, pt, pt_AO, pt_BR, " +
|
||||
"pt_CV, pt_GW, pt_MO, pt_MZ, pt_PT, pt_ST, pt_TL, qu, qu_BO, qu_EC, qu_PE, rm, " +
|
||||
"rm_CH, rn, rn_BI, ro, ro_MD, ro_RO, rof, rof_TZ, root, ru, ru_BY, ru_KG, ru_KZ, " +
|
||||
"ru_MD, ru_RU, ru_UA, rw, rw_RW, rwk, rwk_TZ, sah, sah_RU, saq, saq_KE, sbp, " +
|
||||
"sbp_TZ, se, se_FI, se_NO, se_SE, seh, seh_MZ, ses, ses_ML, sg, sg_CF, shi, " +
|
||||
"shi_Latn, shi_Latn_MA, shi_Tfng, shi_Tfng_MA, si, si_LK, sk, sk_SK, sl, sl_SI, " +
|
||||
"smn, smn_FI, sn, sn_ZW, so, so_DJ, so_ET, so_KE, so_SO, sq, sq_AL, sq_MK, sq_XK, " +
|
||||
"sr, sr_Cyrl, sr_Cyrl_BA, sr_Cyrl_ME, sr_Cyrl_RS, sr_Cyrl_XK, sr_Latn, " +
|
||||
"sr_Latn_BA, sr_Latn_ME, sr_Latn_RS, sr_Latn_XK, sv, sv_AX, sv_FI, sv_SE, sw, " +
|
||||
"sw_CD, sw_KE, sw_TZ, sw_UG, ta, ta_IN, ta_LK, ta_MY, ta_SG, te, te_IN, teo, " +
|
||||
"teo_KE, teo_UG, th, th_TH, ti, ti_ER, ti_ET, tk, tk_TM, to, to_TO, tr, tr_CY, " +
|
||||
"tr_TR, twq, twq_NE, tzm, tzm_MA, ug, ug_CN, uk, uk_UA, ur, ur_IN, ur_PK, uz, " +
|
||||
"uz_Arab, uz_Arab_AF, uz_Cyrl, uz_Cyrl_UZ, uz_Latn, uz_Latn_UZ, vai, vai_Latn, " +
|
||||
"vai_Latn_LR, vai_Vaii, vai_Vaii_LR, vi, vi_VN, vo, vo_001, vun, vun_TZ, wae, " +
|
||||
"wae_CH, xog, xog_UG, yav, yav_CM, yi, yi_001, yo, yo_BJ, yo_NG, zgh, zgh_MA, " +
|
||||
"zh, zh_Hans, zh_Hans_CN, zh_Hans_HK, zh_Hans_MO, zh_Hans_SG, zh_Hant, " +
|
||||
"zh_Hant_HK, zh_Hant_MO, zh_Hant_TW, zu, zu_ZA";
|
||||
|
||||
final XLocaleMatcher matcherShort = newXLocaleMatcher(shortList);
|
||||
final XLocaleMatcher matcherLong = newXLocaleMatcher(longList);
|
||||
final XLocaleMatcher matcherVeryLong = newXLocaleMatcher(veryLongList);
|
||||
|
||||
final LocaleMatcher matcherShortOld = new LocaleMatcher(shortList);
|
||||
final LocaleMatcher matcherLongOld = new LocaleMatcher(longList);
|
||||
final LocaleMatcher matcherVeryLongOld = new LocaleMatcher(veryLongList);
|
||||
|
||||
long timeShortNew=0;
|
||||
long timeMediumNew=0;
|
||||
long timeLongNew=0;
|
||||
|
||||
long timeShortOld=0;
|
||||
long timeMediumOld=0;
|
||||
long timeLongOld=0;
|
||||
|
||||
PerfCase[] pcs = new PerfCase[] {
|
||||
// Exact match in all matchers.
|
||||
new PerfCase("sv", "sv", "sv", "sv"),
|
||||
// Common locale, exact match only in very long list.
|
||||
new PerfCase("fr_CA", "en", "fr", "fr_CA"),
|
||||
// Unusual locale, no exact match.
|
||||
new PerfCase("de_CA", "en", "de", "de"),
|
||||
// World English maps to several region partitions.
|
||||
new PerfCase("en_001", "en", "en", "en"),
|
||||
// Ancient language with interesting subtags.
|
||||
new PerfCase("egy_Copt_CY", "en", "af", "af")
|
||||
};
|
||||
|
||||
for (PerfCase pc : pcs) {
|
||||
final ULocale desired = pc.desired;
|
||||
|
||||
assertEquals(desired.toString(), pc.expectedShort, matcherShort.getBestMatch(desired));
|
||||
assertEquals(desired.toString(), pc.expectedLong, matcherLong.getBestMatch(desired));
|
||||
assertEquals(desired.toString(), pc.expectedVeryLong, matcherVeryLong.getBestMatch(desired));
|
||||
|
||||
timeXLocaleMatcher(desired, matcherShort, WARM_UP_ITERATIONS);
|
||||
timeXLocaleMatcher(desired, matcherLong, WARM_UP_ITERATIONS);
|
||||
timeXLocaleMatcher(desired, matcherVeryLong, WARM_UP_ITERATIONS);
|
||||
long tns = timeXLocaleMatcher(desired, matcherShort, BENCHMARK_ITERATIONS);
|
||||
System.out.format("New Duration (few supported):\t%s\t%d\tnanos\n", desired, tns);
|
||||
timeShortNew += tns;
|
||||
long tnl = timeXLocaleMatcher(desired, matcherLong, BENCHMARK_ITERATIONS);
|
||||
System.out.format("New Duration (med. supported):\t%s\t%d\tnanos\n", desired, tnl);
|
||||
timeMediumNew += tnl;
|
||||
long tnv = timeXLocaleMatcher(desired, matcherVeryLong, BENCHMARK_ITERATIONS);
|
||||
System.out.format("New Duration (many supported):\t%s\t%d\tnanos\n", desired, tnv);
|
||||
timeLongNew += tnv;
|
||||
|
||||
timeLocaleMatcher(desired, matcherShortOld, WARM_UP_ITERATIONS);
|
||||
timeLocaleMatcher(desired, matcherLongOld, WARM_UP_ITERATIONS);
|
||||
timeLocaleMatcher(desired, matcherVeryLongOld, WARM_UP_ITERATIONS);
|
||||
long tos = timeLocaleMatcher(desired, matcherShortOld, BENCHMARK_ITERATIONS);
|
||||
System.out.format("Old Duration (few supported):\t%s\t%d\tnanos new/old=%d%%\n",
|
||||
desired, tos, (100 * tns) / tos);
|
||||
timeShortOld += tos;
|
||||
long tol = timeLocaleMatcher(desired, matcherLongOld, BENCHMARK_ITERATIONS);
|
||||
System.out.format("Old Duration (med. supported):\t%s\t%d\tnanos new/old=%d%%\n",
|
||||
desired, tol, (100 * tnl) / tol);
|
||||
timeMediumOld += tol;
|
||||
long tov = timeLocaleMatcher(desired, matcherVeryLongOld, BENCHMARK_ITERATIONS);
|
||||
System.out.format("Old Duration (many supported):\t%s\t%d\tnanos new/old=%d%%\n",
|
||||
desired, tov, (100 * tnv) / tov);
|
||||
timeLongOld += tov;
|
||||
}
|
||||
|
||||
assertTrue(
|
||||
String.format("timeShortNew=%d < %d%% of timeShortOld=%d",
|
||||
timeShortNew, AVG_PCT_MEDIUM_NEW_OLD, timeShortOld),
|
||||
timeShortNew * 100 < timeShortOld * AVG_PCT_MEDIUM_NEW_OLD);
|
||||
assertTrue(
|
||||
String.format("timeMediumNew=%d < %d%% of timeMediumOld=%d",
|
||||
timeMediumNew, AVG_PCT_MEDIUM_NEW_OLD, timeMediumOld),
|
||||
timeMediumNew * 100 < timeMediumOld * AVG_PCT_MEDIUM_NEW_OLD);
|
||||
assertTrue(
|
||||
String.format("timeLongNew=%d < %d%% of timeLongOld=%d",
|
||||
timeLongNew, AVG_PCT_LONG_NEW_OLD, timeLongOld),
|
||||
timeLongNew * 100 < timeLongOld * AVG_PCT_LONG_NEW_OLD);
|
||||
|
||||
maximizePerf();
|
||||
}
|
||||
|
||||
private static long timeXLocaleMatcher(ULocale desired, XLocaleMatcher matcher, int iterations) {
|
||||
long start = System.nanoTime();
|
||||
for (int i = iterations; i > 0; --i) {
|
||||
matcher.getBestMatch(desired);
|
||||
}
|
||||
long delta = System.nanoTime() - start;
|
||||
return (delta / iterations);
|
||||
}
|
||||
|
||||
private static long timeLocaleMatcher(ULocale desired, LocaleMatcher matcher, int iterations) {
|
||||
long start = System.nanoTime();
|
||||
for (int i = iterations; i > 0; --i) {
|
||||
matcher.getBestMatch(desired);
|
||||
}
|
||||
long delta = System.nanoTime() - start;
|
||||
return (delta / iterations);
|
||||
}
|
||||
|
||||
private void maximizePerf() {
|
||||
final String tags = "af, am, ar, az, be, bg, bn, bs, ca, cs, cy, cy, da, de, " +
|
||||
"el, en, en-GB, es, es-419, et, eu, fa, fi, fil, fr, ga, gl, gu, " +
|
||||
"hi, hr, hu, hy, id, is, it, iw, ja, ka, kk, km, kn, ko, ky, lo, lt, lv, " +
|
||||
"mk, ml, mn, mr, ms, my, ne, nl, no, pa, pl, pt, pt-PT, ro, ru, " +
|
||||
"si, sk, sl, sq, sr, sr-Latn, sv, sw, ta, te, th, tr, uk, ur, uz, vi, " +
|
||||
"zh-CN, zh-TW, zu";
|
||||
LocalePriorityList list = LocalePriorityList.add(tags).build();
|
||||
int few = 1000;
|
||||
long t = timeMaximize(list, few); // warm up
|
||||
t = timeMaximize(list, few); // measure for scale
|
||||
long targetTime = 100000000L; // 10^8 ns = 0.1s
|
||||
int iterations = (int)((targetTime * few) / t);
|
||||
t = timeMaximize(list, iterations);
|
||||
int length = 0;
|
||||
for (@SuppressWarnings("unused") ULocale locale : list) { ++length; }
|
||||
System.out.println("maximize: " + (t / iterations / length) + " ns/locale: " +
|
||||
t + " ns / " + iterations + " iterations / " + length + " locales");
|
||||
}
|
||||
|
||||
// returns total ns not per iteration
|
||||
private static long timeMaximize(Iterable<ULocale> list, int iterations) {
|
||||
long start = System.nanoTime();
|
||||
for (int i = iterations; i > 0; --i) {
|
||||
for (ULocale locale : list) {
|
||||
XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
|
||||
}
|
||||
}
|
||||
return System.nanoTime() - start;
|
||||
}
|
||||
|
||||
private static final class TestCase implements Cloneable {
|
||||
private static final String ENDL = System.getProperties().getProperty("line.separator");
|
||||
|
||||
int lineNr = 0;
|
||||
|
||||
String nameLine = "";
|
||||
String supportedLine = "";
|
||||
String defaultLine = "";
|
||||
String distanceLine = "";
|
||||
String thresholdLine = "";
|
||||
String matchLine = "";
|
||||
|
||||
String supported = "";
|
||||
String def = "";
|
||||
String favor = "";
|
||||
String threshold = "";
|
||||
String desired = "";
|
||||
String expMatch = "";
|
||||
String expDesired = "";
|
||||
String expCombined = "";
|
||||
|
||||
@Override
|
||||
public TestCase clone() throws CloneNotSupportedException {
|
||||
return (TestCase) super.clone();
|
||||
}
|
||||
|
||||
void reset(String newNameLine) {
|
||||
nameLine = newNameLine;
|
||||
supportedLine = "";
|
||||
defaultLine = "";
|
||||
distanceLine = "";
|
||||
thresholdLine = "";
|
||||
|
||||
supported = "";
|
||||
def = "";
|
||||
favor = "";
|
||||
threshold = "";
|
||||
}
|
||||
|
||||
String toInputsKey() {
|
||||
return supported + '+' + def + '+' + favor + '+' + threshold + '+' + desired;
|
||||
}
|
||||
|
||||
private static void appendLine(StringBuilder sb, String line) {
|
||||
if (!line.isEmpty()) {
|
||||
sb.append(ENDL).append(line);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder(nameLine);
|
||||
appendLine(sb, supportedLine);
|
||||
appendLine(sb, defaultLine);
|
||||
appendLine(sb, distanceLine);
|
||||
appendLine(sb, thresholdLine);
|
||||
sb.append(ENDL).append("line ").append(lineNr).append(':');
|
||||
appendLine(sb, matchLine);
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
private static String getSuffixAfterPrefix(String s, int limit, String prefix) {
|
||||
if (prefix.length() <= limit && s.startsWith(prefix)) {
|
||||
return s.substring(prefix.length(), limit);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// UsedReflectively, not private to avoid unused-warning
|
||||
static List<TestCase> readTestCases() throws Exception {
|
||||
List<TestCase> tests = new ArrayList<>();
|
||||
Map<String, Integer> uniqueTests = new HashMap<>();
|
||||
TestCase test = new TestCase();
|
||||
String filename = "data/localeMatcherTest.txt";
|
||||
try (BufferedReader in = FileUtilities.openFile(XLocaleMatcherTest.class, filename)) {
|
||||
String line;
|
||||
while ((line = in.readLine()) != null) {
|
||||
++test.lineNr;
|
||||
// Start of comment, or end of line, minus trailing spaces.
|
||||
int limit = line.indexOf('#');
|
||||
if (limit < 0) {
|
||||
limit = line.length();
|
||||
}
|
||||
char c;
|
||||
while (limit > 0 && ((c = line.charAt(limit - 1)) == ' ' || c == '\t')) {
|
||||
--limit;
|
||||
}
|
||||
if (limit == 0) { // empty line
|
||||
continue;
|
||||
}
|
||||
String suffix;
|
||||
if (line.startsWith("** test: ")) {
|
||||
test.reset(line);
|
||||
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@supported=")) != null) {
|
||||
test.supportedLine = line;
|
||||
test.supported = suffix;
|
||||
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@default=")) != null) {
|
||||
test.defaultLine = line;
|
||||
test.def = suffix;
|
||||
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@favor=")) != null) {
|
||||
test.distanceLine = line;
|
||||
test.favor = suffix;
|
||||
} else if ((suffix = getSuffixAfterPrefix(line, limit, "@threshold=")) != null) {
|
||||
test.thresholdLine = line;
|
||||
test.threshold = suffix;
|
||||
} else {
|
||||
int matchSep = line.indexOf(">>");
|
||||
// >> before an inline comment, and followed by more than white space.
|
||||
if (0 <= matchSep && (matchSep + 2) < limit) {
|
||||
test.matchLine = line;
|
||||
test.desired = line.substring(0, matchSep).trim();
|
||||
test.expDesired = test.expCombined = "";
|
||||
int start = matchSep + 2;
|
||||
int expLimit = line.indexOf('|', start);
|
||||
if (expLimit < 0) {
|
||||
test.expMatch = line.substring(start, limit).trim();
|
||||
} else {
|
||||
test.expMatch = line.substring(start, expLimit).trim();
|
||||
start = expLimit + 1;
|
||||
expLimit = line.indexOf('|', start);
|
||||
if (expLimit < 0) {
|
||||
test.expDesired = line.substring(start, limit).trim();
|
||||
} else {
|
||||
test.expDesired = line.substring(start, expLimit).trim();
|
||||
test.expCombined = line.substring(expLimit + 1, limit).trim();
|
||||
}
|
||||
}
|
||||
String inputs = test.toInputsKey();
|
||||
Integer prevIndex = uniqueTests.get(inputs);
|
||||
if (prevIndex == null) {
|
||||
uniqueTests.put(inputs, tests.size());
|
||||
} else {
|
||||
System.out.println("Locale matcher test case on line " + test.lineNr
|
||||
+ " is a duplicate of line " + tests.get(prevIndex).lineNr);
|
||||
}
|
||||
tests.add(test.clone());
|
||||
} else {
|
||||
throw new IllegalArgumentException("test data syntax error on line "
|
||||
+ test.lineNr + "\n" + line);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
System.out.println("Number of duplicate locale matcher test cases: " + (tests.size() - uniqueTests.size()));
|
||||
return tests;
|
||||
}
|
||||
|
||||
private static ULocale getULocaleOrNull(String s) {
|
||||
if (s.equals("null")) {
|
||||
return null;
|
||||
} else {
|
||||
return new ULocale(s);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@Parameters(method = "readTestCases")
|
||||
public void dataDriven(TestCase test) {
|
||||
XLocaleMatcher matcher;
|
||||
if (test.def.isEmpty() && test.favor.isEmpty() && test.threshold.isEmpty()) {
|
||||
matcher = new XLocaleMatcher(test.supported);
|
||||
} else {
|
||||
XLocaleMatcher.Builder builder = XLocaleMatcher.builder();
|
||||
builder.setSupportedLocales(test.supported);
|
||||
if (!test.def.isEmpty()) {
|
||||
builder.setDefaultULocale(new ULocale(test.def));
|
||||
}
|
||||
if (!test.favor.isEmpty()) {
|
||||
FavorSubtag favor;
|
||||
switch (test.favor) {
|
||||
case "normal":
|
||||
favor = FavorSubtag.LANGUAGE;
|
||||
break;
|
||||
case "script":
|
||||
favor = FavorSubtag.SCRIPT;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("unsupported FavorSubtag value " + test.favor);
|
||||
}
|
||||
builder.setFavorSubtag(favor);
|
||||
}
|
||||
if (!test.threshold.isEmpty()) {
|
||||
int threshold = Integer.valueOf(test.threshold);
|
||||
builder.internalSetThresholdDistance(threshold);
|
||||
}
|
||||
matcher = builder.build();
|
||||
}
|
||||
|
||||
ULocale expMatch = getULocaleOrNull(test.expMatch);
|
||||
if (test.expDesired.isEmpty() && test.expCombined.isEmpty()) {
|
||||
ULocale bestSupported = matcher.getBestMatch(test.desired);
|
||||
assertEquals("bestSupported", expMatch, bestSupported);
|
||||
} else {
|
||||
LocalePriorityList desired = LocalePriorityList.add(test.desired).build();
|
||||
XLocaleMatcher.Result result = matcher.getBestMatchResult(desired);
|
||||
assertEquals("bestSupported", expMatch, result.getSupportedULocale());
|
||||
if (!test.expDesired.isEmpty()) {
|
||||
ULocale expDesired = getULocaleOrNull(test.expDesired);
|
||||
assertEquals("bestDesired", expDesired, result.getDesiredULocale());
|
||||
}
|
||||
if (!test.expCombined.isEmpty()) {
|
||||
ULocale expCombined = getULocaleOrNull(test.expCombined);
|
||||
ULocale combined = result.makeServiceULocale();
|
||||
assertEquals("combined", expCombined, combined);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -3,10 +3,8 @@
|
|||
<classpathentry kind="src" path="src"/>
|
||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/>
|
||||
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-core"/>
|
||||
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-translit-tests"/>
|
||||
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-translit"/>
|
||||
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-collate"/>
|
||||
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-test-framework"/>
|
||||
<classpathentry combineaccessrules="false" kind="src" path="/icu4j-core-tests"/>
|
||||
<classpathentry kind="output" path="out/bin"/>
|
||||
</classpath>
|
||||
|
|
|
@ -3,10 +3,6 @@
|
|||
<name>icu4j-tools</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
<project>icu4j-core</project>
|
||||
<project>icu4j-core-tests</project>
|
||||
<project>icu4j-shared</project>
|
||||
<project>icu4j-test-framework</project>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.locale;
|
||||
package com.ibm.icu.dev.tool.locale;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Collection;
|
||||
|
@ -14,10 +14,11 @@ import java.util.TreeMap;
|
|||
import com.ibm.icu.impl.ICUData;
|
||||
import com.ibm.icu.impl.ICUResourceBundle;
|
||||
import com.ibm.icu.impl.UResource;
|
||||
import com.ibm.icu.impl.locale.LSR;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.HashMultimap;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Multimap;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Multimaps;
|
||||
import com.ibm.icu.util.BytesTrie;
|
||||
import com.ibm.icu.impl.locale.XLikelySubtags;
|
||||
import com.ibm.icu.util.BytesTrieBuilder;
|
||||
import com.ibm.icu.util.ICUException;
|
||||
|
||||
|
@ -25,7 +26,7 @@ import com.ibm.icu.util.ICUException;
|
|||
* Builds data for XLikelySubtags.
|
||||
* Reads source data from ICU resource bundles.
|
||||
*/
|
||||
class LikelySubtagsBuilder {
|
||||
public class LikelySubtagsBuilder {
|
||||
private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
|
||||
|
||||
private static ICUResourceBundle getSupplementalDataBundle(String name) {
|
||||
|
@ -50,7 +51,7 @@ class LikelySubtagsBuilder {
|
|||
UResource.Key key = new UResource.Key();
|
||||
for (int i = 0; aliases.getKeyAndValue(i, key, value); ++i) {
|
||||
String aliasFrom = key.toString();
|
||||
if (aliasFrom.contains("_")) {
|
||||
if (aliasFrom.contains("_") || aliasFrom.contains("-")) {
|
||||
continue; // only simple aliasing
|
||||
}
|
||||
UResource.Table table = value.getTable();
|
||||
|
@ -113,7 +114,7 @@ class LikelySubtagsBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
BytesTrie build() {
|
||||
byte[] build() {
|
||||
ByteBuffer buffer = tb.buildByteBuffer(BytesTrieBuilder.Option.SMALL);
|
||||
// Allocate an array with just the necessary capacity,
|
||||
// so that we do not hold on to a larger array for a long time.
|
||||
|
@ -122,11 +123,12 @@ class LikelySubtagsBuilder {
|
|||
if (DEBUG_OUTPUT) {
|
||||
System.out.println("likely subtags trie size: " + bytes.length + " bytes");
|
||||
}
|
||||
return new BytesTrie(bytes, 0);
|
||||
return bytes;
|
||||
}
|
||||
}
|
||||
|
||||
static XLikelySubtags.Data build() {
|
||||
// VisibleForTesting
|
||||
public static XLikelySubtags.Data build() {
|
||||
AliasesBuilder languageAliasesBuilder = new AliasesBuilder("language");
|
||||
AliasesBuilder regionAliasesBuilder = new AliasesBuilder("territory");
|
||||
|
||||
|
@ -202,7 +204,7 @@ class LikelySubtagsBuilder {
|
|||
}
|
||||
}
|
||||
}
|
||||
BytesTrie trie = trieBuilder.build();
|
||||
byte[] trie = trieBuilder.build();
|
||||
LSR[] lsrs = lsrIndexes.keySet().toArray(new LSR[lsrIndexes.size()]);
|
||||
return new XLikelySubtags.Data(
|
||||
languageAliasesBuilder.toCanonical, regionAliasesBuilder.toCanonical, trie, lsrs);
|
|
@ -1,8 +1,15 @@
|
|||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.locale;
|
||||
package com.ibm.icu.dev.tool.locale;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.PrintWriter;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
@ -17,11 +24,13 @@ import java.util.TreeSet;
|
|||
import com.ibm.icu.impl.ICUData;
|
||||
import com.ibm.icu.impl.ICUResourceBundle;
|
||||
import com.ibm.icu.impl.UResource;
|
||||
import com.ibm.icu.impl.locale.LSR;
|
||||
import com.ibm.icu.impl.locale.LocaleDistance;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Multimap;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Predicate;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.Splitter;
|
||||
import com.ibm.icu.impl.locale.XCldrStub.TreeMultimap;
|
||||
import com.ibm.icu.util.BytesTrie;
|
||||
import com.ibm.icu.impl.locale.XLikelySubtags;
|
||||
import com.ibm.icu.util.BytesTrieBuilder;
|
||||
import com.ibm.icu.util.Output;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
@ -153,7 +162,7 @@ public final class LocaleDistanceBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
BytesTrie build() {
|
||||
byte[] build() {
|
||||
ByteBuffer buffer = tb.buildByteBuffer(BytesTrieBuilder.Option.SMALL);
|
||||
// Allocate an array with just the necessary capacity,
|
||||
// so that we do not hold on to a larger array for a long time.
|
||||
|
@ -162,7 +171,7 @@ public final class LocaleDistanceBuilder {
|
|||
if (DEBUG_OUTPUT) {
|
||||
System.out.println("distance trie size: " + bytes.length + " bytes");
|
||||
}
|
||||
return new BytesTrie(bytes, 0);
|
||||
return bytes;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -468,7 +477,8 @@ public final class LocaleDistanceBuilder {
|
|||
return result;
|
||||
}
|
||||
|
||||
static LocaleDistance build() {
|
||||
// VisibleForTesting
|
||||
public static LocaleDistance.Data build() {
|
||||
// From CLDR supplementalData/languageMatching/languageMatches type="written_new"/
|
||||
// and then paradigmLocales, matchVariable, and the last languageMatch items.
|
||||
ICUResourceBundle supplementalData = getSupplementalDataBundle("supplementalData");
|
||||
|
@ -591,8 +601,8 @@ public final class LocaleDistanceBuilder {
|
|||
|
||||
TrieBuilder trieBuilder = new TrieBuilder();
|
||||
defaultDistanceTable.toTrie(trieBuilder);
|
||||
BytesTrie trie = trieBuilder.build();
|
||||
return new LocaleDistance(
|
||||
byte[] trie = trieBuilder.build();
|
||||
return new LocaleDistance.Data(
|
||||
trie, rmb.regionToPartitionsIndex, rmb.partitionArrays,
|
||||
paradigmLSRs, distances);
|
||||
}
|
||||
|
@ -845,4 +855,112 @@ public final class LocaleDistanceBuilder {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static final String TXT_PATH = "/tmp";
|
||||
private static final String TXT_FILE_BASE_NAME = "langInfo";
|
||||
private static final String TXT_FILE_NAME = TXT_FILE_BASE_NAME + ".txt";
|
||||
|
||||
private static PrintWriter openWriter() throws IOException {
|
||||
File file = new File(TXT_PATH, TXT_FILE_NAME);
|
||||
return new PrintWriter(
|
||||
new BufferedWriter(
|
||||
new OutputStreamWriter(
|
||||
new FileOutputStream(file), StandardCharsets.UTF_8), 4096));
|
||||
}
|
||||
|
||||
private static void printManyHexBytes(PrintWriter out, byte[] bytes) {
|
||||
for (int i = 0;; ++i) {
|
||||
if (i == bytes.length) {
|
||||
out.println();
|
||||
break;
|
||||
}
|
||||
if (i != 0 && (i & 0xf) == 0) {
|
||||
out.println();
|
||||
}
|
||||
out.format("%02x", bytes[i] & 0xff);
|
||||
}
|
||||
}
|
||||
|
||||
public static final void main(String[] args) throws IOException {
|
||||
XLikelySubtags.Data likelyData = LikelySubtagsBuilder.build();
|
||||
LocaleDistance.Data distanceData = build();
|
||||
System.out.println("Writing LocaleDistance.Data to " + TXT_PATH + '/' + TXT_FILE_NAME);
|
||||
try (PrintWriter out = openWriter()) {
|
||||
out.println("// © 2019 and later: Unicode, Inc. and others.\n" +
|
||||
"// License & terms of use: http://www.unicode.org/copyright.html#License\n" +
|
||||
"// Generated by ICU4J LocaleDistanceBuilder.\n" +
|
||||
TXT_FILE_BASE_NAME + ":table(nofallback){");
|
||||
out.println(" likely{");
|
||||
out.println(" languageAliases{ // " + likelyData.languageAliases.size());
|
||||
for (Map.Entry<String, String> entry :
|
||||
new TreeMap<>(likelyData.languageAliases).entrySet()) {
|
||||
out.println(" \"" + entry.getKey() + "\",\"" + entry.getValue() + "\",");
|
||||
}
|
||||
out.println(" } // languageAliases");
|
||||
|
||||
out.println(" regionAliases{ // " + likelyData.regionAliases.size());
|
||||
for (Map.Entry<String, String> entry :
|
||||
new TreeMap<>(likelyData.regionAliases).entrySet()) {
|
||||
out.println(" \"" + entry.getKey() + "\",\"" + entry.getValue() + "\",");
|
||||
}
|
||||
out.println(" } // regionAliases");
|
||||
|
||||
out.println(" trie:bin{ // BytesTrie: " + likelyData.trie.length + " bytes");
|
||||
printManyHexBytes(out, likelyData.trie);
|
||||
out.println(" } // trie");
|
||||
|
||||
out.println(" lsrs{ // " + likelyData.lsrs.length);
|
||||
for (LSR lsr : likelyData.lsrs) {
|
||||
out.println(" \"" + lsr.language + "\",\"" +
|
||||
lsr.script + "\",\"" + lsr.region + "\",");
|
||||
}
|
||||
out.println(" } // lsrs");
|
||||
out.println(" } // likely");
|
||||
|
||||
out.println(" match{");
|
||||
out.println(" trie:bin{ // BytesTrie: " + distanceData.trie.length + " bytes");
|
||||
printManyHexBytes(out, distanceData.trie);
|
||||
out.println(" } // trie");
|
||||
|
||||
out.println(" regionToPartitions:bin{ // " +
|
||||
distanceData.regionToPartitionsIndex.length + " bytes");
|
||||
printManyHexBytes(out, distanceData.regionToPartitionsIndex);
|
||||
out.println(" } // regionToPartitions");
|
||||
|
||||
out.print(" partitions{");
|
||||
boolean first = true;
|
||||
for (String p : distanceData.partitionArrays) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
out.append(',');
|
||||
}
|
||||
out.append('"').print(p);
|
||||
out.append('"');
|
||||
}
|
||||
out.println("}");
|
||||
|
||||
out.println(" paradigms{");
|
||||
for (LSR lsr : distanceData.paradigmLSRs) {
|
||||
out.println(" \"" + lsr.language + "\",\"" +
|
||||
lsr.script + "\",\"" + lsr.region + "\",");
|
||||
}
|
||||
out.println(" }");
|
||||
|
||||
out.print(" distances:intvector{");
|
||||
first = true;
|
||||
for (int d : distanceData.distances) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
out.append(',');
|
||||
}
|
||||
out.print(d);
|
||||
}
|
||||
out.println("}");
|
||||
|
||||
out.println(" } // match");
|
||||
out.println("}");
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue