mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 06:25:30 +00:00
ICU-20916 LocaleMatcher distinguish between equivalent locales
- equivalent but originally unequal - locale distance shifted left for additional fraction bits with micro distance - Java more verbose matcher debug output See #949
This commit is contained in:
parent
2f72a932ac
commit
ad638c274e
14 changed files with 392 additions and 206 deletions
|
@ -22,7 +22,7 @@
|
|||
#include "uhash.h"
|
||||
#include "uvector.h"
|
||||
|
||||
#define UND_LSR LSR("und", "", "")
|
||||
#define UND_LSR LSR("und", "", "", LSR::EXPLICIT_LSR)
|
||||
|
||||
/**
|
||||
* Indicator for the lifetime of desired-locale objects passed into the LocaleMatcher.
|
||||
|
@ -393,26 +393,27 @@ LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
|
|||
// 3. Remaining locales in builder order.
|
||||
// In Java, we use a LinkedHashMap for both map & ordered lists.
|
||||
// In C++, we use separate structures.
|
||||
// We over-allocate arrays of LSRs and indexes for simplicity.
|
||||
// We reserve slots at the array starts for the default and paradigm locales,
|
||||
// plus enough for all supported locales.
|
||||
// If there are few paradigm locales and few duplicate supported LSRs,
|
||||
// then the amount of wasted space is small.
|
||||
//
|
||||
// We allocate arrays of LSRs and indexes,
|
||||
// with as many slots as supported locales, for simplicity.
|
||||
// We write the default and paradigm LSRs starting from the front of the arrays,
|
||||
// and others starting from the back.
|
||||
// At the end we reverse the non-paradigm LSRs.
|
||||
// We end up wasting as many array slots as there are duplicate supported LSRs,
|
||||
// but the amount of wasted space is small as long as there are few duplicates.
|
||||
supportedLsrToIndex = uhash_openSize(hashLSR, compareLSRs, uhash_compareLong,
|
||||
supportedLocalesLength, &errorCode);
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
int32_t paradigmLimit = 1 + localeDistance.getParadigmLSRsLength();
|
||||
int32_t suppLSRsCapacity = paradigmLimit + supportedLocalesLength;
|
||||
supportedLSRs = static_cast<const LSR **>(
|
||||
uprv_malloc(suppLSRsCapacity * sizeof(const LSR *)));
|
||||
uprv_malloc(supportedLocalesLength * sizeof(const LSR *)));
|
||||
supportedIndexes = static_cast<int32_t *>(
|
||||
uprv_malloc(suppLSRsCapacity * sizeof(int32_t)));
|
||||
uprv_malloc(supportedLocalesLength * sizeof(int32_t)));
|
||||
if (supportedLSRs == nullptr || supportedIndexes == nullptr) {
|
||||
errorCode = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
int32_t paradigmIndex = 0;
|
||||
int32_t otherIndex = paradigmLimit;
|
||||
int32_t otherIndex = supportedLocalesLength;
|
||||
if (idef >= 0) {
|
||||
uhash_puti(supportedLsrToIndex, const_cast<LSR *>(defLSR), idef + 1, &errorCode);
|
||||
supportedLSRs[0] = defLSR;
|
||||
|
@ -446,21 +447,32 @@ LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
|
|||
supportedLSRs[paradigmIndex] = &lsr;
|
||||
supportedIndexes[paradigmIndex++] = i;
|
||||
} else {
|
||||
supportedLSRs[otherIndex] = &lsr;
|
||||
supportedIndexes[otherIndex++] = i;
|
||||
supportedLSRs[--otherIndex] = &lsr;
|
||||
supportedIndexes[otherIndex] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
}
|
||||
// Squeeze out unused array slots.
|
||||
if (paradigmIndex < paradigmLimit && paradigmLimit < otherIndex) {
|
||||
uprv_memmove(supportedLSRs + paradigmIndex, supportedLSRs + paradigmLimit,
|
||||
(otherIndex - paradigmLimit) * sizeof(const LSR *));
|
||||
uprv_memmove(supportedIndexes + paradigmIndex, supportedIndexes + paradigmLimit,
|
||||
(otherIndex - paradigmLimit) * sizeof(int32_t));
|
||||
// Reverse the non-paradigm LSRs to be in order, right after the paradigm LSRs.
|
||||
// First fill the unused slots between paradigm LSRs and other LSRs.
|
||||
// This gap is as large as the number of locales with duplicate LSRs.
|
||||
int32_t i = paradigmIndex;
|
||||
int32_t j = supportedLocalesLength - 1;
|
||||
while (i < otherIndex && otherIndex <= j) {
|
||||
supportedLSRs[i] = supportedLSRs[j];
|
||||
supportedIndexes[i++] = supportedIndexes[j--];
|
||||
}
|
||||
supportedLSRsLength = otherIndex - (paradigmLimit - paradigmIndex);
|
||||
// Swap remaining non-paradigm LSRs in place.
|
||||
while (i < j) {
|
||||
const LSR *tempLSR = supportedLSRs[i];
|
||||
supportedLSRs[i] = supportedLSRs[j];
|
||||
supportedLSRs[j] = tempLSR;
|
||||
int32_t tempIndex = supportedIndexes[i];
|
||||
supportedIndexes[i++] = supportedIndexes[j];
|
||||
supportedIndexes[j--] = tempIndex;
|
||||
}
|
||||
supportedLSRsLength = supportedLocalesLength - (otherIndex - paradigmIndex);
|
||||
}
|
||||
|
||||
if (def != nullptr && (idef < 0 || def != supportedLocales[idef])) {
|
||||
|
@ -662,7 +674,7 @@ int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remai
|
|||
if (U_FAILURE(errorCode)) { return -1; }
|
||||
int32_t desiredIndex = 0;
|
||||
int32_t bestSupportedLsrIndex = -1;
|
||||
for (int32_t bestDistance = thresholdDistance;;) {
|
||||
for (int32_t bestShiftedDistance = LocaleDistance::shiftDistance(thresholdDistance);;) {
|
||||
// Quick check for exact maximized LSR.
|
||||
// Returns suppIndex+1 where 0 means not found.
|
||||
if (supportedLsrToIndex != nullptr) {
|
||||
|
@ -677,16 +689,17 @@ int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remai
|
|||
}
|
||||
}
|
||||
int32_t bestIndexAndDistance = localeDistance.getBestIndexAndDistance(
|
||||
desiredLSR, supportedLSRs, supportedLSRsLength, bestDistance, favorSubtag);
|
||||
desiredLSR, supportedLSRs, supportedLSRsLength, bestShiftedDistance, favorSubtag);
|
||||
if (bestIndexAndDistance >= 0) {
|
||||
bestDistance = bestIndexAndDistance & 0xff;
|
||||
bestShiftedDistance = LocaleDistance::getShiftedDistance(bestIndexAndDistance);
|
||||
if (remainingIter != nullptr) {
|
||||
remainingIter->rememberCurrent(desiredIndex, errorCode);
|
||||
if (U_FAILURE(errorCode)) { return -1; }
|
||||
}
|
||||
bestSupportedLsrIndex = bestIndexAndDistance >= 0 ? bestIndexAndDistance >> 8 : -1;
|
||||
bestSupportedLsrIndex = bestIndexAndDistance >= 0 ?
|
||||
LocaleDistance::getIndex(bestIndexAndDistance) : -1;
|
||||
}
|
||||
if ((bestDistance -= demotionPerDesiredLocale) <= 0) {
|
||||
if ((bestShiftedDistance -= LocaleDistance::shiftDistance(demotionPerDesiredLocale)) <= 0) {
|
||||
break;
|
||||
}
|
||||
if (remainingIter == nullptr || !remainingIter->hasNext()) {
|
||||
|
@ -708,11 +721,12 @@ double LocaleMatcher::internalMatch(const Locale &desired, const Locale &support
|
|||
LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
|
||||
if (U_FAILURE(errorCode)) { return 0; }
|
||||
const LSR *pSuppLSR = &suppLSR;
|
||||
int32_t distance = localeDistance.getBestIndexAndDistance(
|
||||
int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
|
||||
getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
|
||||
&pSuppLSR, 1,
|
||||
thresholdDistance, favorSubtag) & 0xff;
|
||||
return (100 - distance) / 100.0;
|
||||
LocaleDistance::shiftDistance(thresholdDistance), favorSubtag);
|
||||
double distance = LocaleDistance::getDistanceDouble(indexAndDistance);
|
||||
return (100.0 - distance) / 100.0;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -97,17 +97,23 @@ LocaleDistance::LocaleDistance(const LocaleDistanceData &data) :
|
|||
// a mere region difference for one desired locale
|
||||
// is as good as a perfect match for the next following desired locale.
|
||||
// As of CLDR 36, we have <languageMatch desired="en_*_*" supported="en_*_*" distance="5"/>.
|
||||
LSR en("en", "Latn", "US");
|
||||
LSR enGB("en", "Latn", "GB");
|
||||
LSR en("en", "Latn", "US", LSR::EXPLICIT_LSR);
|
||||
LSR enGB("en", "Latn", "GB", LSR::EXPLICIT_LSR);
|
||||
const LSR *p_enGB = &enGB;
|
||||
defaultDemotionPerDesiredLocale = getBestIndexAndDistance(en, &p_enGB, 1,
|
||||
50, ULOCMATCH_FAVOR_LANGUAGE) & 0xff;
|
||||
int32_t indexAndDistance = getBestIndexAndDistance(en, &p_enGB, 1,
|
||||
shiftDistance(50), ULOCMATCH_FAVOR_LANGUAGE);
|
||||
defaultDemotionPerDesiredLocale = getDistanceFloor(indexAndDistance);
|
||||
}
|
||||
|
||||
int32_t LocaleDistance::getBestIndexAndDistance(
|
||||
const LSR &desired,
|
||||
const LSR **supportedLSRs, int32_t supportedLSRsLength,
|
||||
int32_t threshold, ULocMatchFavorSubtag favorSubtag) const {
|
||||
int32_t shiftedThreshold, ULocMatchFavorSubtag favorSubtag) const {
|
||||
// Round up the shifted threshold (if fraction bits are not 0)
|
||||
// for comparison with un-shifted distances until we need fraction bits.
|
||||
// (If we simply shifted non-zero fraction bits away, then we might ignore a language
|
||||
// when it's really still a micro distance below the threshold.)
|
||||
int32_t roundedThreshold = (shiftedThreshold + DISTANCE_FRACTION_MASK) >> DISTANCE_SHIFT;
|
||||
BytesTrie iter(trie);
|
||||
// Look up the desired language only once for all supported LSRs.
|
||||
// Its "distance" is either a match point value of 0, or a non-match negative value.
|
||||
|
@ -153,7 +159,7 @@ int32_t LocaleDistance::getBestIndexAndDistance(
|
|||
if (favorSubtag == ULOCMATCH_FAVOR_SCRIPT) {
|
||||
distance >>= 2;
|
||||
}
|
||||
if (distance >= threshold) {
|
||||
if (distance >= roundedThreshold) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -171,7 +177,7 @@ int32_t LocaleDistance::getBestIndexAndDistance(
|
|||
scriptDistance &= ~DISTANCE_IS_FINAL;
|
||||
}
|
||||
distance += scriptDistance;
|
||||
if (distance >= threshold) {
|
||||
if (distance >= roundedThreshold) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -180,7 +186,7 @@ int32_t LocaleDistance::getBestIndexAndDistance(
|
|||
} else if (star || (flags & DISTANCE_IS_FINAL) != 0) {
|
||||
distance += defaultRegionDistance;
|
||||
} else {
|
||||
int32_t remainingThreshold = threshold - distance;
|
||||
int32_t remainingThreshold = roundedThreshold - distance;
|
||||
if (minRegionDistance >= remainingThreshold) {
|
||||
continue;
|
||||
}
|
||||
|
@ -196,15 +202,23 @@ int32_t LocaleDistance::getBestIndexAndDistance(
|
|||
partitionsForRegion(supported),
|
||||
remainingThreshold);
|
||||
}
|
||||
if (distance < threshold) {
|
||||
if (distance == 0) {
|
||||
return slIndex << 8;
|
||||
int32_t shiftedDistance = shiftDistance(distance);
|
||||
if (shiftedDistance == 0) {
|
||||
// Distinguish between equivalent but originally unequal locales via an
|
||||
// additional micro distance.
|
||||
shiftedDistance |= (desired.flags ^ supported.flags);
|
||||
}
|
||||
if (shiftedDistance < shiftedThreshold) {
|
||||
if (shiftedDistance == 0) {
|
||||
return slIndex << INDEX_SHIFT;
|
||||
}
|
||||
bestIndex = slIndex;
|
||||
threshold = distance;
|
||||
shiftedThreshold = shiftedDistance;
|
||||
}
|
||||
}
|
||||
return bestIndex >= 0 ? (bestIndex << 8) | threshold : 0xffffff00 | ABOVE_THRESHOLD;
|
||||
return bestIndex >= 0 ?
|
||||
(bestIndex << INDEX_SHIFT) | shiftedThreshold :
|
||||
INDEX_NEG_1 | shiftDistance(ABOVE_THRESHOLD);
|
||||
}
|
||||
|
||||
int32_t LocaleDistance::getDesSuppScriptDistance(
|
||||
|
@ -352,11 +366,14 @@ int32_t LocaleDistance::trieNext(BytesTrie &iter, const char *s, bool wantValue)
|
|||
}
|
||||
|
||||
UBool LocaleDistance::isParadigmLSR(const LSR &lsr) const {
|
||||
// Linear search for a very short list (length 6 as of 2019).
|
||||
// If there are many paradigm LSRs we should use a hash set.
|
||||
// Linear search for a very short list (length 6 as of 2019),
|
||||
// because we look for equivalence not equality, and
|
||||
// because it's easy.
|
||||
// If there are many paradigm LSRs we should use a hash set
|
||||
// with custom comparator and hasher.
|
||||
U_ASSERT(paradigmLSRsLength <= 15);
|
||||
for (int32_t i = 0; i < paradigmLSRsLength; ++i) {
|
||||
if (lsr == paradigmLSRs[i]) { return true; }
|
||||
if (lsr.isEquivalentTo(paradigmLSRs[i])) { return true; }
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -26,19 +26,36 @@ class LocaleDistance final : public UMemory {
|
|||
public:
|
||||
static const LocaleDistance *getSingleton(UErrorCode &errorCode);
|
||||
|
||||
static int32_t shiftDistance(int32_t distance) {
|
||||
return distance << DISTANCE_SHIFT;
|
||||
}
|
||||
|
||||
static int32_t getShiftedDistance(int32_t indexAndDistance) {
|
||||
return indexAndDistance & DISTANCE_MASK;
|
||||
}
|
||||
|
||||
static double getDistanceDouble(int32_t indexAndDistance) {
|
||||
double shiftedDistance = getShiftedDistance(indexAndDistance);
|
||||
return shiftedDistance / (1 << DISTANCE_SHIFT);
|
||||
}
|
||||
|
||||
static int32_t getIndex(int32_t indexAndDistance) {
|
||||
// assert indexAndDistance >= 0;
|
||||
return indexAndDistance >> INDEX_SHIFT;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the supported LSR with the smallest distance from the desired one.
|
||||
* Equivalent LSR subtags must be normalized into a canonical form.
|
||||
*
|
||||
* <p>Returns the index of the lowest-distance supported LSR in bits 31..8
|
||||
* <p>Returns the index of the lowest-distance supported LSR in the high bits
|
||||
* (negative if none has a distance below the threshold),
|
||||
* and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
|
||||
* and its distance (0..ABOVE_THRESHOLD) in the low bits.
|
||||
*/
|
||||
int32_t getBestIndexAndDistance(const LSR &desired,
|
||||
const LSR **supportedLSRs, int32_t supportedLSRsLength,
|
||||
int32_t threshold, ULocMatchFavorSubtag favorSubtag) const;
|
||||
|
||||
int32_t getParadigmLSRsLength() const { return paradigmLSRsLength; }
|
||||
int32_t shiftedThreshold,
|
||||
ULocMatchFavorSubtag favorSubtag) const;
|
||||
|
||||
UBool isParadigmLSR(const LSR &lsr) const;
|
||||
|
||||
|
@ -51,6 +68,20 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
// The distance is shifted left to gain some fraction bits.
|
||||
static constexpr int32_t DISTANCE_SHIFT = 3;
|
||||
static constexpr int32_t DISTANCE_FRACTION_MASK = 7;
|
||||
// 7 bits for 0..100
|
||||
static constexpr int32_t DISTANCE_INT_SHIFT = 7;
|
||||
static constexpr int32_t INDEX_SHIFT = DISTANCE_INT_SHIFT + DISTANCE_SHIFT;
|
||||
static constexpr int32_t DISTANCE_MASK = 0x3ff;
|
||||
// tic constexpr int32_t MAX_INDEX = 0x1fffff; // avoids sign bit
|
||||
static constexpr int32_t INDEX_NEG_1 = 0xfffffc00;
|
||||
|
||||
static int32_t getDistanceFloor(int32_t indexAndDistance) {
|
||||
return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
|
||||
}
|
||||
|
||||
LocaleDistance(const LocaleDistanceData &data);
|
||||
LocaleDistance(const LocaleDistance &other) = delete;
|
||||
LocaleDistance &operator=(const LocaleDistance &other) = delete;
|
||||
|
|
|
@ -250,7 +250,8 @@ struct XLikelySubtagsData {
|
|||
for (int32_t i = 0, j = 0; i < lsrSubtagsLength; i += 3, ++j) {
|
||||
lsrs[j] = LSR(strings.get(lsrSubtagIndexes[i]),
|
||||
strings.get(lsrSubtagIndexes[i + 1]),
|
||||
strings.get(lsrSubtagIndexes[i + 2]));
|
||||
strings.get(lsrSubtagIndexes[i + 2]),
|
||||
LSR::IMPLICIT_LSR);
|
||||
}
|
||||
|
||||
if (partitionsLength > 0) {
|
||||
|
@ -275,7 +276,8 @@ struct XLikelySubtagsData {
|
|||
for (int32_t i = 0, j = 0; i < paradigmSubtagsLength; i += 3, ++j) {
|
||||
paradigms[j] = LSR(strings.get(paradigmSubtagIndexes[i]),
|
||||
strings.get(paradigmSubtagIndexes[i + 1]),
|
||||
strings.get(paradigmSubtagIndexes[i + 2]));
|
||||
strings.get(paradigmSubtagIndexes[i + 2]),
|
||||
LSR::DONT_CARE_FLAGS);
|
||||
}
|
||||
distanceData.paradigms = paradigms;
|
||||
}
|
||||
|
@ -383,7 +385,7 @@ LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &error
|
|||
const char *name = locale.getName();
|
||||
if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
|
||||
// Private use language tag x-subtag-subtag...
|
||||
return LSR(name, "", "");
|
||||
return LSR(name, "", "", LSR::EXPLICIT_LSR);
|
||||
}
|
||||
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
|
||||
locale.getVariant(), errorCode);
|
||||
|
@ -407,26 +409,31 @@ LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, c
|
|||
if (region[0] == 'X' && (c1 = region[1]) != 0 && region[2] == 0) {
|
||||
switch (c1) {
|
||||
case 'A':
|
||||
return LSR(PSEUDO_ACCENTS_PREFIX, language, script, region, errorCode);
|
||||
return LSR(PSEUDO_ACCENTS_PREFIX, language, script, region,
|
||||
LSR::EXPLICIT_LSR, errorCode);
|
||||
case 'B':
|
||||
return LSR(PSEUDO_BIDI_PREFIX, language, script, region, errorCode);
|
||||
return LSR(PSEUDO_BIDI_PREFIX, language, script, region,
|
||||
LSR::EXPLICIT_LSR, errorCode);
|
||||
case 'C':
|
||||
return LSR(PSEUDO_CRACKED_PREFIX, language, script, region, errorCode);
|
||||
return LSR(PSEUDO_CRACKED_PREFIX, language, script, region,
|
||||
LSR::EXPLICIT_LSR, errorCode);
|
||||
default: // normal locale
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (variant[0] == 'P' && variant[1] == 'S') {
|
||||
int32_t lsrFlags = *region == 0 ?
|
||||
LSR::EXPLICIT_LANGUAGE | LSR::EXPLICIT_SCRIPT : LSR::EXPLICIT_LSR;
|
||||
if (uprv_strcmp(variant, "PSACCENT") == 0) {
|
||||
return LSR(PSEUDO_ACCENTS_PREFIX, language, script,
|
||||
*region == 0 ? "XA" : region, errorCode);
|
||||
*region == 0 ? "XA" : region, lsrFlags, errorCode);
|
||||
} else if (uprv_strcmp(variant, "PSBIDI") == 0) {
|
||||
return LSR(PSEUDO_BIDI_PREFIX, language, script,
|
||||
*region == 0 ? "XB" : region, errorCode);
|
||||
*region == 0 ? "XB" : region, lsrFlags, errorCode);
|
||||
} else if (uprv_strcmp(variant, "PSCRACK") == 0) {
|
||||
return LSR(PSEUDO_CRACKED_PREFIX, language, script,
|
||||
*region == 0 ? "XC" : region, errorCode);
|
||||
*region == 0 ? "XC" : region, lsrFlags, errorCode);
|
||||
}
|
||||
// else normal locale
|
||||
}
|
||||
|
@ -448,7 +455,7 @@ LSR XLikelySubtags::maximize(const char *language, const char *script, const cha
|
|||
region = "";
|
||||
}
|
||||
if (*script != 0 && *region != 0 && *language != 0) {
|
||||
return LSR(language, script, region); // already maximized
|
||||
return LSR(language, script, region, LSR::EXPLICIT_LSR); // already maximized
|
||||
}
|
||||
|
||||
uint32_t retainOldMask = 0;
|
||||
|
@ -535,7 +542,7 @@ LSR XLikelySubtags::maximize(const char *language, const char *script, const cha
|
|||
if (retainOldMask == 0) {
|
||||
// Quickly return a copy of the lookup-result LSR
|
||||
// without new allocation of the subtags.
|
||||
return LSR(result.language, result.script, result.region);
|
||||
return LSR(result.language, result.script, result.region, result.flags);
|
||||
}
|
||||
if ((retainOldMask & 4) == 0) {
|
||||
language = result.language;
|
||||
|
@ -546,7 +553,8 @@ LSR XLikelySubtags::maximize(const char *language, const char *script, const cha
|
|||
if ((retainOldMask & 1) == 0) {
|
||||
region = result.region;
|
||||
}
|
||||
return LSR(language, script, region);
|
||||
// retainOldMask flags = LSR explicit-subtag flags
|
||||
return LSR(language, script, region, retainOldMask);
|
||||
}
|
||||
|
||||
int32_t XLikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
|
||||
|
@ -615,9 +623,9 @@ LSR XLikelySubtags::minimizeSubtags(const char *languageIn, const char *scriptIn
|
|||
boolean favorRegionOk = false;
|
||||
if (result.script.equals(value00.script)) { //script is default
|
||||
if (result.region.equals(value00.region)) {
|
||||
return new LSR(result.language, "", "");
|
||||
return new LSR(result.language, "", "", LSR.DONT_CARE_FLAGS);
|
||||
} else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
|
||||
return new LSR(result.language, "", result.region);
|
||||
return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
|
||||
} else {
|
||||
favorRegionOk = true;
|
||||
}
|
||||
|
@ -627,9 +635,9 @@ LSR XLikelySubtags::minimizeSubtags(const char *languageIn, const char *scriptIn
|
|||
// Maybe do later, but for now use the straightforward code.
|
||||
LSR result2 = maximize(languageIn, scriptIn, "");
|
||||
if (result2.equals(result)) {
|
||||
return new LSR(result.language, result.script, "");
|
||||
return new LSR(result.language, result.script, "", LSR.DONT_CARE_FLAGS);
|
||||
} else if (favorRegionOk) {
|
||||
return new LSR(result.language, "", result.region);
|
||||
return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -14,9 +14,10 @@
|
|||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
LSR::LSR(char prefix, const char *lang, const char *scr, const char *r, UErrorCode &errorCode) :
|
||||
LSR::LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t f,
|
||||
UErrorCode &errorCode) :
|
||||
language(nullptr), script(nullptr), region(r),
|
||||
regionIndex(indexForRegion(region)) {
|
||||
regionIndex(indexForRegion(region)), flags(f) {
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
CharString langScript;
|
||||
langScript.append(prefix, errorCode).append(lang, errorCode).append('\0', errorCode);
|
||||
|
@ -32,7 +33,8 @@ LSR::LSR(char prefix, const char *lang, const char *scr, const char *r, UErrorCo
|
|||
|
||||
LSR::LSR(LSR &&other) U_NOEXCEPT :
|
||||
language(other.language), script(other.script), region(other.region), owned(other.owned),
|
||||
regionIndex(other.regionIndex), hashCode(other.hashCode) {
|
||||
regionIndex(other.regionIndex), flags(other.flags),
|
||||
hashCode(other.hashCode) {
|
||||
if (owned != nullptr) {
|
||||
other.language = other.script = "";
|
||||
other.owned = nullptr;
|
||||
|
@ -50,6 +52,7 @@ LSR &LSR::operator=(LSR &&other) U_NOEXCEPT {
|
|||
script = other.script;
|
||||
region = other.region;
|
||||
regionIndex = other.regionIndex;
|
||||
flags = other.flags;
|
||||
owned = other.owned;
|
||||
hashCode = other.hashCode;
|
||||
if (owned != nullptr) {
|
||||
|
@ -60,7 +63,7 @@ LSR &LSR::operator=(LSR &&other) U_NOEXCEPT {
|
|||
return *this;
|
||||
}
|
||||
|
||||
UBool LSR::operator==(const LSR &other) const {
|
||||
UBool LSR::isEquivalentTo(const LSR &other) const {
|
||||
return
|
||||
uprv_strcmp(language, other.language) == 0 &&
|
||||
uprv_strcmp(script, other.script) == 0 &&
|
||||
|
@ -69,6 +72,16 @@ UBool LSR::operator==(const LSR &other) const {
|
|||
(regionIndex > 0 || uprv_strcmp(region, other.region) == 0);
|
||||
}
|
||||
|
||||
UBool LSR::operator==(const LSR &other) const {
|
||||
return
|
||||
uprv_strcmp(language, other.language) == 0 &&
|
||||
uprv_strcmp(script, other.script) == 0 &&
|
||||
regionIndex == other.regionIndex &&
|
||||
// Compare regions if both are ill-formed (and their indexes are 0).
|
||||
(regionIndex > 0 || uprv_strcmp(region, other.region) == 0) &&
|
||||
flags == other.flags;
|
||||
}
|
||||
|
||||
int32_t LSR::indexForRegion(const char *region) {
|
||||
int32_t c = region[0];
|
||||
int32_t a = c - '0';
|
||||
|
@ -90,10 +103,10 @@ int32_t LSR::indexForRegion(const char *region) {
|
|||
|
||||
LSR &LSR::setHashCode() {
|
||||
if (hashCode == 0) {
|
||||
hashCode =
|
||||
(ustr_hashCharsN(language, static_cast<int32_t>(uprv_strlen(language))) * 37 +
|
||||
ustr_hashCharsN(script, static_cast<int32_t>(uprv_strlen(script)))) * 37 +
|
||||
regionIndex;
|
||||
int32_t h = ustr_hashCharsN(language, static_cast<int32_t>(uprv_strlen(language)));
|
||||
h = h * 37 + ustr_hashCharsN(script, static_cast<int32_t>(uprv_strlen(script)));
|
||||
h = h * 37 + regionIndex;
|
||||
hashCode = h * 37 + flags;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
|
|
@ -16,26 +16,35 @@ U_NAMESPACE_BEGIN
|
|||
struct LSR final : public UMemory {
|
||||
static constexpr int32_t REGION_INDEX_LIMIT = 1001 + 26 * 26;
|
||||
|
||||
static constexpr int32_t EXPLICIT_LSR = 7;
|
||||
static constexpr int32_t EXPLICIT_LANGUAGE = 4;
|
||||
static constexpr int32_t EXPLICIT_SCRIPT = 2;
|
||||
static constexpr int32_t EXPLICIT_REGION = 1;
|
||||
static constexpr int32_t IMPLICIT_LSR = 0;
|
||||
static constexpr int32_t DONT_CARE_FLAGS = 0;
|
||||
|
||||
const char *language;
|
||||
const char *script;
|
||||
const char *region;
|
||||
char *owned = nullptr;
|
||||
/** Index for region, 0 if ill-formed. @see indexForRegion */
|
||||
int32_t regionIndex = 0;
|
||||
int32_t flags = 0;
|
||||
/** Only set for LSRs that will be used in a hash table. */
|
||||
int32_t hashCode = 0;
|
||||
|
||||
LSR() : language("und"), script(""), region("") {}
|
||||
|
||||
/** Constructor which aliases all subtag pointers. */
|
||||
LSR(const char *lang, const char *scr, const char *r) :
|
||||
LSR(const char *lang, const char *scr, const char *r, int32_t f) :
|
||||
language(lang), script(scr), region(r),
|
||||
regionIndex(indexForRegion(region)) {}
|
||||
regionIndex(indexForRegion(region)), flags(f) {}
|
||||
/**
|
||||
* Constructor which prepends the prefix to the language and script,
|
||||
* copies those into owned memory, and aliases the region.
|
||||
*/
|
||||
LSR(char prefix, const char *lang, const char *scr, const char *r, UErrorCode &errorCode);
|
||||
LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t f,
|
||||
UErrorCode &errorCode);
|
||||
LSR(LSR &&other) U_NOEXCEPT;
|
||||
LSR(const LSR &other) = delete;
|
||||
inline ~LSR() {
|
||||
|
@ -55,6 +64,7 @@ struct LSR final : public UMemory {
|
|||
*/
|
||||
static int32_t indexForRegion(const char *region);
|
||||
|
||||
UBool isEquivalentTo(const LSR &other) const;
|
||||
UBool operator==(const LSR &other) const;
|
||||
|
||||
inline UBool operator!=(const LSR &other) const {
|
||||
|
|
60
icu4c/source/test/testdata/localeMatcherTest.txt
vendored
60
icu4c/source/test/testdata/localeMatcherTest.txt
vendored
|
@ -1052,9 +1052,9 @@ en >> en-DE
|
|||
ar-EG >> ar-SY
|
||||
pt-BR >> pt
|
||||
ar-XB >> ar-XB
|
||||
ar-PSBIDI >> ar-XB # These are equivalent.
|
||||
ar-PSBIDI >> ar-PSBIDI
|
||||
en-XA >> en-XA
|
||||
en-PSACCENT >> en-XA # These are equivalent.
|
||||
en-PSACCENT >> en-PSACCENT
|
||||
ar-PSCRACK >> ar-PSCRACK
|
||||
|
||||
@favor=script
|
||||
|
@ -1063,9 +1063,9 @@ en >> en-DE
|
|||
ar-EG >> ar-SY
|
||||
pt-BR >> pt
|
||||
ar-XB >> ar-XB
|
||||
ar-PSBIDI >> ar-XB # These are equivalent.
|
||||
ar-PSBIDI >> ar-PSBIDI
|
||||
en-XA >> en-XA
|
||||
en-PSACCENT >> en-XA # These are equivalent.
|
||||
en-PSACCENT >> en-PSACCENT
|
||||
ar-PSCRACK >> ar-PSCRACK
|
||||
|
||||
** test: BestMatchForTraditionalChinese
|
||||
|
@ -1544,50 +1544,44 @@ zh-TW, en >> en-US
|
|||
zh-Hant-CN, en >> en-US
|
||||
zh-Hans, en >> zh-Hans-CN
|
||||
|
||||
** test: return first among likely-subtags equivalent locales
|
||||
# Was: more specific script should win in case regions are identical
|
||||
# with some different results.
|
||||
** test: return most originally similar among likely-subtags equivalent locales
|
||||
@supported=af, af-Latn, af-Arab
|
||||
af >> af
|
||||
af-ZA >> af
|
||||
af-Latn-ZA >> af
|
||||
af-Latn >> af
|
||||
af-Latn-ZA >> af-Latn
|
||||
af-Latn >> af-Latn
|
||||
|
||||
@favor=script
|
||||
af >> af
|
||||
af-ZA >> af
|
||||
af-Latn-ZA >> af
|
||||
af-Latn >> af
|
||||
af-Latn-ZA >> af-Latn
|
||||
af-Latn >> af-Latn
|
||||
|
||||
# Was: more specific region should win
|
||||
# with some different results.
|
||||
@supported=nl, nl-NL, nl-BE
|
||||
@favor=
|
||||
nl >> nl
|
||||
nl-Latn >> nl
|
||||
nl-Latn-NL >> nl
|
||||
nl-NL >> nl
|
||||
nl-Latn-NL >> nl-NL
|
||||
nl-NL >> nl-NL
|
||||
|
||||
@favor=script
|
||||
nl >> nl
|
||||
nl-Latn >> nl
|
||||
nl-Latn-NL >> nl
|
||||
nl-NL >> nl
|
||||
nl-Latn-NL >> nl-NL
|
||||
nl-NL >> nl-NL
|
||||
|
||||
# Was: more specific region wins over more specific script
|
||||
# with some different results.
|
||||
@supported=nl, nl-Latn, nl-NL, nl-BE
|
||||
@favor=
|
||||
nl >> nl
|
||||
nl-Latn >> nl
|
||||
nl-NL >> nl
|
||||
nl-Latn-NL >> nl
|
||||
nl-Latn >> nl-Latn
|
||||
nl-NL >> nl-NL
|
||||
nl-Latn-NL >> nl-Latn
|
||||
|
||||
@favor=script
|
||||
nl >> nl
|
||||
nl-Latn >> nl
|
||||
nl-NL >> nl
|
||||
nl-Latn-NL >> nl
|
||||
nl-Latn >> nl-Latn
|
||||
nl-NL >> nl-NL
|
||||
nl-Latn-NL >> nl-Latn
|
||||
|
||||
** test: region may replace matched if matched is enclosing
|
||||
@supported=es-419, es
|
||||
|
@ -1670,22 +1664,22 @@ ja-Jpan-JP, en-GB >> ja
|
|||
** test: pick best maximized tag
|
||||
@supported=ja, ja-Jpan-US, ja-JP, en, ru
|
||||
ja-Jpan, ru >> ja
|
||||
ja-JP, ru >> ja
|
||||
ja-JP, ru >> ja-JP
|
||||
ja-US, ru >> ja-Jpan-US
|
||||
|
||||
@favor=script
|
||||
ja-Jpan, ru >> ja
|
||||
ja-JP, ru >> ja
|
||||
ja-JP, ru >> ja-JP
|
||||
ja-US, ru >> ja-Jpan-US
|
||||
|
||||
** test: termination: pick best maximized match
|
||||
@supported=ja, ja-Jpan, ja-JP, en, ru
|
||||
ja-Jpan-JP, ru >> ja
|
||||
ja-Jpan, ru >> ja
|
||||
ja-Jpan-JP, ru >> ja-Jpan
|
||||
ja-Jpan, ru >> ja-Jpan
|
||||
|
||||
@favor=script
|
||||
ja-Jpan-JP, ru >> ja
|
||||
ja-Jpan, ru >> ja
|
||||
ja-Jpan-JP, ru >> ja-Jpan
|
||||
ja-Jpan, ru >> ja-Jpan
|
||||
|
||||
** test: same language over exact, but distinguish when user is explicit
|
||||
@supported=fr, en-GB, ja, es-ES, es-MX
|
||||
|
@ -1900,7 +1894,7 @@ zh-TW >> zh
|
|||
** test: testGetBestMatchWithMinMatchScore
|
||||
@supported=fr-FR, fr, fr-CA, en
|
||||
@default=und
|
||||
fr >> fr-FR # First likely-subtags equivalent match is chosen.
|
||||
fr >> fr
|
||||
@supported=en, fr, fr-CA
|
||||
fr-FR >> fr # Parent match is chosen.
|
||||
@supported=en, fr-CA
|
||||
|
@ -1930,7 +1924,7 @@ ru >> und
|
|||
|
||||
@favor=script
|
||||
@supported=fr-FR, fr, fr-CA, en
|
||||
fr >> fr-FR
|
||||
fr >> fr
|
||||
@supported=en, fr, fr-CA
|
||||
fr-FR >> fr
|
||||
@supported=en, fr-CA
|
||||
|
|
|
@ -7,6 +7,13 @@ import java.util.Objects;
|
|||
public final class LSR {
|
||||
public static final int REGION_INDEX_LIMIT = 1001 + 26 * 26;
|
||||
|
||||
public static final int EXPLICIT_LSR = 7;
|
||||
public static final int EXPLICIT_LANGUAGE = 4;
|
||||
public static final int EXPLICIT_SCRIPT = 2;
|
||||
public static final int EXPLICIT_REGION = 1;
|
||||
public static final int IMPLICIT_LSR = 0;
|
||||
public static final int DONT_CARE_FLAGS = 0;
|
||||
|
||||
public static final boolean DEBUG_OUTPUT = false;
|
||||
|
||||
public final String language;
|
||||
|
@ -14,12 +21,14 @@ public final class LSR {
|
|||
public final String region;
|
||||
/** Index for region, negative if ill-formed. @see indexForRegion */
|
||||
final int regionIndex;
|
||||
public final int flags;
|
||||
|
||||
public LSR(String language, String script, String region) {
|
||||
public LSR(String language, String script, String region, int flags) {
|
||||
this.language = language;
|
||||
this.script = script;
|
||||
this.region = region;
|
||||
regionIndex = indexForRegion(region);
|
||||
this.flags = flags;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -57,6 +66,13 @@ public final class LSR {
|
|||
}
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
public boolean isEquivalentTo(LSR other) {
|
||||
return language.equals(other.language)
|
||||
&& script.equals(other.script)
|
||||
&& region.equals(other.region);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
LSR other;
|
||||
|
@ -65,10 +81,12 @@ public final class LSR {
|
|||
&& obj.getClass() == this.getClass()
|
||||
&& language.equals((other = (LSR) obj).language)
|
||||
&& script.equals(other.script)
|
||||
&& region.equals(other.region));
|
||||
&& region.equals(other.region)
|
||||
&& flags == other.flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(language, script, region);
|
||||
return Objects.hash(language, script, region, flags);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,6 +34,17 @@ public class LocaleDistance {
|
|||
private static final int DISTANCE_IS_FINAL = 0x100;
|
||||
private static final int DISTANCE_IS_FINAL_OR_SKIP_SCRIPT =
|
||||
DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT;
|
||||
|
||||
// The distance is shifted left to gain some fraction bits.
|
||||
private static final int DISTANCE_SHIFT = 3;
|
||||
private static final int DISTANCE_FRACTION_MASK = 7;
|
||||
// 7 bits for 0..100
|
||||
private static final int DISTANCE_INT_SHIFT = 7;
|
||||
private static final int INDEX_SHIFT = DISTANCE_INT_SHIFT + DISTANCE_SHIFT;
|
||||
private static final int DISTANCE_MASK = 0x3ff;
|
||||
// vate static final int MAX_INDEX = 0x1fffff; // avoids sign bit
|
||||
private static final int INDEX_NEG_1 = 0xfffffc00;
|
||||
|
||||
// Indexes into array of distances.
|
||||
public static final int IX_DEF_LANG_DISTANCE = 0;
|
||||
public static final int IX_DEF_SCRIPT_DISTANCE = 1;
|
||||
|
@ -67,6 +78,28 @@ public class LocaleDistance {
|
|||
private final int minRegionDistance;
|
||||
private final int defaultDemotionPerDesiredLocale;
|
||||
|
||||
public static final int shiftDistance(int distance) {
|
||||
return distance << DISTANCE_SHIFT;
|
||||
}
|
||||
|
||||
public static final int getShiftedDistance(int indexAndDistance) {
|
||||
return indexAndDistance & DISTANCE_MASK;
|
||||
}
|
||||
|
||||
public static final double getDistanceDouble(int indexAndDistance) {
|
||||
double shiftedDistance = getShiftedDistance(indexAndDistance);
|
||||
return shiftedDistance / (1 << DISTANCE_SHIFT);
|
||||
}
|
||||
|
||||
private static final int getDistanceFloor(int indexAndDistance) {
|
||||
return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
|
||||
}
|
||||
|
||||
public static final int getIndex(int indexAndDistance) {
|
||||
assert indexAndDistance >= 0;
|
||||
return indexAndDistance >> INDEX_SHIFT;
|
||||
}
|
||||
|
||||
// VisibleForTesting
|
||||
public static final class Data {
|
||||
public byte[] trie;
|
||||
|
@ -121,7 +154,8 @@ public class LocaleDistance {
|
|||
String[] paradigms = value.getStringArray();
|
||||
paradigmLSRs = new HashSet<>(paradigms.length / 3);
|
||||
for (int i = 0; i < paradigms.length; i += 3) {
|
||||
paradigmLSRs.add(new LSR(paradigms[i], paradigms[i + 1], paradigms[i + 2]));
|
||||
paradigmLSRs.add(new LSR(paradigms[i], paradigms[i + 1], paradigms[i + 2],
|
||||
LSR.DONT_CARE_FLAGS));
|
||||
}
|
||||
} else {
|
||||
paradigmLSRs = Collections.emptySet();
|
||||
|
@ -168,10 +202,11 @@ public class LocaleDistance {
|
|||
// a mere region difference for one desired locale
|
||||
// is as good as a perfect match for the next following desired locale.
|
||||
// As of CLDR 36, we have <languageMatch desired="en_*_*" supported="en_*_*" distance="5"/>.
|
||||
LSR en = new LSR("en", "Latn", "US");
|
||||
LSR enGB = new LSR("en", "Latn", "GB");
|
||||
defaultDemotionPerDesiredLocale = getBestIndexAndDistance(en, new LSR[] { enGB },
|
||||
50, FavorSubtag.LANGUAGE) & 0xff;
|
||||
LSR en = new LSR("en", "Latn", "US", LSR.EXPLICIT_LSR);
|
||||
LSR enGB = new LSR("en", "Latn", "GB", LSR.EXPLICIT_LSR);
|
||||
int indexAndDistance = getBestIndexAndDistance(en, new LSR[] { enGB },
|
||||
shiftDistance(50), FavorSubtag.LANGUAGE);
|
||||
defaultDemotionPerDesiredLocale = getDistanceFloor(indexAndDistance);
|
||||
|
||||
if (DEBUG_OUTPUT) {
|
||||
System.out.println("*** locale distance");
|
||||
|
@ -187,20 +222,26 @@ public class LocaleDistance {
|
|||
int threshold, FavorSubtag favorSubtag) {
|
||||
LSR supportedLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported);
|
||||
LSR desiredLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired);
|
||||
return getBestIndexAndDistance(desiredLSR, new LSR[] { supportedLSR },
|
||||
threshold, favorSubtag) & 0xff;
|
||||
int indexAndDistance = getBestIndexAndDistance(desiredLSR, new LSR[] { supportedLSR },
|
||||
shiftDistance(threshold), favorSubtag);
|
||||
return getDistanceFloor(indexAndDistance);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the supported LSR with the smallest distance from the desired one.
|
||||
* Equivalent LSR subtags must be normalized into a canonical form.
|
||||
*
|
||||
* <p>Returns the index of the lowest-distance supported LSR in bits 31..8
|
||||
* <p>Returns the index of the lowest-distance supported LSR in the high bits
|
||||
* (negative if none has a distance below the threshold),
|
||||
* and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
|
||||
* and its distance (0..ABOVE_THRESHOLD) in the low bits.
|
||||
*/
|
||||
public int getBestIndexAndDistance(LSR desired, LSR[] supportedLSRs,
|
||||
int threshold, FavorSubtag favorSubtag) {
|
||||
int shiftedThreshold, FavorSubtag favorSubtag) {
|
||||
// Round up the shifted threshold (if fraction bits are not 0)
|
||||
// for comparison with un-shifted distances until we need fraction bits.
|
||||
// (If we simply shifted non-zero fraction bits away, then we might ignore a language
|
||||
// when it's really still a micro distance below the threshold.)
|
||||
int roundedThreshold = (shiftedThreshold + DISTANCE_FRACTION_MASK) >> DISTANCE_SHIFT;
|
||||
BytesTrie iter = new BytesTrie(trie);
|
||||
// Look up the desired language only once for all supported LSRs.
|
||||
// Its "distance" is either a match point value of 0, or a non-match negative value.
|
||||
|
@ -246,7 +287,7 @@ public class LocaleDistance {
|
|||
if (favorSubtag == FavorSubtag.SCRIPT) {
|
||||
distance >>= 2;
|
||||
}
|
||||
if (distance >= threshold) {
|
||||
if (distance >= roundedThreshold) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -264,7 +305,7 @@ public class LocaleDistance {
|
|||
scriptDistance &= ~DISTANCE_IS_FINAL;
|
||||
}
|
||||
distance += scriptDistance;
|
||||
if (distance >= threshold) {
|
||||
if (distance >= roundedThreshold) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -273,7 +314,7 @@ public class LocaleDistance {
|
|||
} else if (star || (flags & DISTANCE_IS_FINAL) != 0) {
|
||||
distance += defaultRegionDistance;
|
||||
} else {
|
||||
int remainingThreshold = threshold - distance;
|
||||
int remainingThreshold = roundedThreshold - distance;
|
||||
if (minRegionDistance >= remainingThreshold) {
|
||||
continue;
|
||||
}
|
||||
|
@ -289,15 +330,23 @@ public class LocaleDistance {
|
|||
partitionsForRegion(supported),
|
||||
remainingThreshold);
|
||||
}
|
||||
if (distance < threshold) {
|
||||
if (distance == 0) {
|
||||
return slIndex << 8;
|
||||
int shiftedDistance = shiftDistance(distance);
|
||||
if (shiftedDistance == 0) {
|
||||
// Distinguish between equivalent but originally unequal locales via an
|
||||
// additional micro distance.
|
||||
shiftedDistance |= (desired.flags ^ supported.flags);
|
||||
}
|
||||
if (shiftedDistance < shiftedThreshold) {
|
||||
if (shiftedDistance == 0) {
|
||||
return slIndex << INDEX_SHIFT;
|
||||
}
|
||||
bestIndex = slIndex;
|
||||
threshold = distance;
|
||||
shiftedThreshold = shiftedDistance;
|
||||
}
|
||||
}
|
||||
return bestIndex >= 0 ? (bestIndex << 8) | threshold : 0xffffff00 | ABOVE_THRESHOLD;
|
||||
return bestIndex >= 0 ?
|
||||
(bestIndex << INDEX_SHIFT) | shiftedThreshold :
|
||||
INDEX_NEG_1 | shiftDistance(ABOVE_THRESHOLD);
|
||||
}
|
||||
|
||||
private static final int getDesSuppScriptDistance(BytesTrie iter, long startState,
|
||||
|
@ -439,7 +488,17 @@ public class LocaleDistance {
|
|||
}
|
||||
|
||||
public boolean isParadigmLSR(LSR lsr) {
|
||||
return paradigmLSRs.contains(lsr);
|
||||
// Linear search for a very short list (length 6 as of 2019),
|
||||
// because we look for equivalence not equality, and
|
||||
// HashSet does not support customizing equality.
|
||||
// If there are many paradigm LSRs we should revisit this.
|
||||
assert paradigmLSRs.size() <= 15;
|
||||
for (LSR plsr : paradigmLSRs) {
|
||||
if (lsr.isEquivalentTo(plsr)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// VisibleForTesting
|
||||
|
@ -455,9 +514,6 @@ public class LocaleDistance {
|
|||
return defaultDemotionPerDesiredLocale;
|
||||
}
|
||||
|
||||
// TODO: When we build data offline,
|
||||
// write test code to compare the loaded table with the builder output.
|
||||
// Fail if different, with instructions for how to update the data file.
|
||||
// VisibleForTesting
|
||||
public Map<String, Integer> testOnlyGetDistanceTable() {
|
||||
Map<String, Integer> map = new TreeMap<>();
|
||||
|
|
|
@ -87,7 +87,8 @@ public final class XLikelySubtags {
|
|||
String[] lsrSubtags = getValue(likelyTable, "lsrs", value).getStringArray();
|
||||
LSR[] lsrs = new LSR[lsrSubtags.length / 3];
|
||||
for (int i = 0, j = 0; i < lsrSubtags.length; i += 3, ++j) {
|
||||
lsrs[j] = new LSR(lsrSubtags[i], lsrSubtags[i + 1], lsrSubtags[i + 2]);
|
||||
lsrs[j] = new LSR(lsrSubtags[i], lsrSubtags[i + 1], lsrSubtags[i + 2],
|
||||
LSR.IMPLICIT_LSR);
|
||||
}
|
||||
|
||||
return new Data(languageAliases, regionAliases, trie, lsrs);
|
||||
|
@ -185,7 +186,7 @@ public final class XLikelySubtags {
|
|||
String tag = locale.toLanguageTag();
|
||||
assert tag.startsWith("x-");
|
||||
// Private use language tag x-subtag-subtag...
|
||||
return new LSR(tag, "", "");
|
||||
return new LSR(tag, "", "", LSR.EXPLICIT_LSR);
|
||||
}
|
||||
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
|
||||
locale.getVariant());
|
||||
|
@ -195,7 +196,7 @@ public final class XLikelySubtags {
|
|||
String tag = locale.toLanguageTag();
|
||||
if (tag.startsWith("x-")) {
|
||||
// Private use language tag x-subtag-subtag...
|
||||
return new LSR(tag, "", "");
|
||||
return new LSR(tag, "", "", LSR.EXPLICIT_LSR);
|
||||
}
|
||||
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
|
||||
locale.getVariant());
|
||||
|
@ -209,29 +210,34 @@ public final class XLikelySubtags {
|
|||
switch (region.charAt(1)) {
|
||||
case 'A':
|
||||
return new LSR(PSEUDO_ACCENTS_PREFIX + language,
|
||||
PSEUDO_ACCENTS_PREFIX + script, region);
|
||||
PSEUDO_ACCENTS_PREFIX + script, region, LSR.EXPLICIT_LSR);
|
||||
case 'B':
|
||||
return new LSR(PSEUDO_BIDI_PREFIX + language,
|
||||
PSEUDO_BIDI_PREFIX + script, region);
|
||||
PSEUDO_BIDI_PREFIX + script, region, LSR.EXPLICIT_LSR);
|
||||
case 'C':
|
||||
return new LSR(PSEUDO_CRACKED_PREFIX + language,
|
||||
PSEUDO_CRACKED_PREFIX + script, region);
|
||||
PSEUDO_CRACKED_PREFIX + script, region, LSR.EXPLICIT_LSR);
|
||||
default: // normal locale
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (variant.startsWith("PS")) {
|
||||
int lsrFlags = region.isEmpty() ?
|
||||
LSR.EXPLICIT_LANGUAGE | LSR.EXPLICIT_SCRIPT : LSR.EXPLICIT_LSR;
|
||||
switch (variant) {
|
||||
case "PSACCENT":
|
||||
return new LSR(PSEUDO_ACCENTS_PREFIX + language,
|
||||
PSEUDO_ACCENTS_PREFIX + script, region.isEmpty() ? "XA" : region);
|
||||
PSEUDO_ACCENTS_PREFIX + script,
|
||||
region.isEmpty() ? "XA" : region, lsrFlags);
|
||||
case "PSBIDI":
|
||||
return new LSR(PSEUDO_BIDI_PREFIX + language,
|
||||
PSEUDO_BIDI_PREFIX + script, region.isEmpty() ? "XB" : region);
|
||||
PSEUDO_BIDI_PREFIX + script,
|
||||
region.isEmpty() ? "XB" : region, lsrFlags);
|
||||
case "PSCRACK":
|
||||
return new LSR(PSEUDO_CRACKED_PREFIX + language,
|
||||
PSEUDO_CRACKED_PREFIX + script, region.isEmpty() ? "XC" : region);
|
||||
PSEUDO_CRACKED_PREFIX + script,
|
||||
region.isEmpty() ? "XC" : region, lsrFlags);
|
||||
default: // normal locale
|
||||
break;
|
||||
}
|
||||
|
@ -257,7 +263,7 @@ public final class XLikelySubtags {
|
|||
region = "";
|
||||
}
|
||||
if (!script.isEmpty() && !region.isEmpty() && !language.isEmpty()) {
|
||||
return new LSR(language, script, region); // already maximized
|
||||
return new LSR(language, script, region, LSR.EXPLICIT_LSR); // already maximized
|
||||
}
|
||||
|
||||
int retainOldMask = 0;
|
||||
|
@ -340,6 +346,7 @@ public final class XLikelySubtags {
|
|||
}
|
||||
|
||||
if (retainOldMask == 0) {
|
||||
assert result.flags == LSR.IMPLICIT_LSR;
|
||||
return result;
|
||||
}
|
||||
if ((retainOldMask & 4) == 0) {
|
||||
|
@ -351,7 +358,8 @@ public final class XLikelySubtags {
|
|||
if ((retainOldMask & 1) == 0) {
|
||||
region = result.region;
|
||||
}
|
||||
return new LSR(language, script, region);
|
||||
// retainOldMask flags = LSR explicit-subtag flags
|
||||
return new LSR(language, script, region, retainOldMask);
|
||||
}
|
||||
|
||||
private static final int trieNext(BytesTrie iter, String s, int i) {
|
||||
|
@ -411,9 +419,9 @@ public final class XLikelySubtags {
|
|||
boolean favorRegionOk = false;
|
||||
if (result.script.equals(value00.script)) { //script is default
|
||||
if (result.region.equals(value00.region)) {
|
||||
return new LSR(result.language, "", "");
|
||||
return new LSR(result.language, "", "", LSR.DONT_CARE_FLAGS);
|
||||
} else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
|
||||
return new LSR(result.language, "", result.region);
|
||||
return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
|
||||
} else {
|
||||
favorRegionOk = true;
|
||||
}
|
||||
|
@ -423,9 +431,9 @@ public final class XLikelySubtags {
|
|||
// Maybe do later, but for now use the straightforward code.
|
||||
LSR result2 = maximize(languageIn, scriptIn, "");
|
||||
if (result2.equals(result)) {
|
||||
return new LSR(result.language, result.script, "");
|
||||
return new LSR(result.language, result.script, "", LSR.DONT_CARE_FLAGS);
|
||||
} else if (favorRegionOk) {
|
||||
return new LSR(result.language, "", result.region);
|
||||
return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -64,7 +64,7 @@ import com.ibm.icu.impl.locale.XLikelySubtags;
|
|||
* @stable ICU 4.4
|
||||
*/
|
||||
public final class LocaleMatcher {
|
||||
private static final LSR UND_LSR = new LSR("und","","");
|
||||
private static final LSR UND_LSR = new LSR("und","","", LSR.EXPLICIT_LSR);
|
||||
// In ULocale, "und" and "" make the same object.
|
||||
private static final ULocale UND_ULOCALE = new ULocale("und");
|
||||
// In Locale, "und" and "" make different objects.
|
||||
|
@ -680,6 +680,9 @@ public final class LocaleMatcher {
|
|||
builder.demotion == Demotion.NONE ? 0 :
|
||||
LocaleDistance.INSTANCE.getDefaultDemotionPerDesiredLocale(); // null or REGION
|
||||
favorSubtag = builder.favor;
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("new LocaleMatcher: %s\n", toString());
|
||||
}
|
||||
}
|
||||
|
||||
private static final void putIfAbsent(Map<LSR, Integer> lsrToIndex, LSR lsr, int i) {
|
||||
|
@ -938,26 +941,34 @@ public final class LocaleMatcher {
|
|||
private int getBestSuppIndex(LSR desiredLSR, LsrIterator remainingIter) {
|
||||
int desiredIndex = 0;
|
||||
int bestSupportedLsrIndex = -1;
|
||||
for (int bestDistance = thresholdDistance;;) {
|
||||
StringBuilder sb = null;
|
||||
if (TRACE_MATCHER) {
|
||||
sb = new StringBuilder("LocaleMatcher desired:");
|
||||
}
|
||||
for (int bestShiftedDistance = LocaleDistance.shiftDistance(thresholdDistance);;) {
|
||||
if (TRACE_MATCHER) {
|
||||
sb.append(' ').append(desiredLSR);
|
||||
}
|
||||
// Quick check for exact maximized LSR.
|
||||
Integer index = supportedLsrToIndex.get(desiredLSR);
|
||||
if (index != null) {
|
||||
int suppIndex = index;
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Returning %s: desiredLSR=supportedLSR\n",
|
||||
supportedULocales[suppIndex]);
|
||||
System.err.printf("%s --> best=%s: desiredLSR=supportedLSR\n",
|
||||
sb, supportedULocales[suppIndex]);
|
||||
}
|
||||
if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
|
||||
return suppIndex;
|
||||
}
|
||||
int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
|
||||
desiredLSR, supportedLSRs, bestDistance, favorSubtag);
|
||||
desiredLSR, supportedLSRs, bestShiftedDistance, favorSubtag);
|
||||
if (bestIndexAndDistance >= 0) {
|
||||
bestDistance = bestIndexAndDistance & 0xff;
|
||||
bestShiftedDistance = LocaleDistance.getShiftedDistance(bestIndexAndDistance);
|
||||
if (remainingIter != null) { remainingIter.rememberCurrent(desiredIndex); }
|
||||
bestSupportedLsrIndex = bestIndexAndDistance >> 8;
|
||||
bestSupportedLsrIndex = LocaleDistance.getIndex(bestIndexAndDistance);
|
||||
}
|
||||
if ((bestDistance -= demotionPerDesiredLocale) <= 0) {
|
||||
if ((bestShiftedDistance -= LocaleDistance.shiftDistance(demotionPerDesiredLocale))
|
||||
<= 0) {
|
||||
break;
|
||||
}
|
||||
if (remainingIter == null || !remainingIter.hasNext()) {
|
||||
|
@ -968,14 +979,14 @@ public final class LocaleMatcher {
|
|||
}
|
||||
if (bestSupportedLsrIndex < 0) {
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Returning default %s: no good match\n", defaultULocale);
|
||||
System.err.printf("%s --> best=default %s: no good match\n", sb, defaultULocale);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
int suppIndex = supportedIndexes[bestSupportedLsrIndex];
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Returning %s: best matching supported locale\n",
|
||||
supportedULocales[suppIndex]);
|
||||
System.err.printf("%s --> best=%s: best matching supported locale\n",
|
||||
sb, supportedULocales[suppIndex]);
|
||||
}
|
||||
return suppIndex;
|
||||
}
|
||||
|
@ -1000,11 +1011,16 @@ public final class LocaleMatcher {
|
|||
@Deprecated
|
||||
public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
|
||||
// Returns the inverse of the distance: That is, 1-distance(desired, supported).
|
||||
int distance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
|
||||
int indexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
|
||||
getMaximalLsrOrUnd(desired),
|
||||
new LSR[] { getMaximalLsrOrUnd(supported) },
|
||||
thresholdDistance, favorSubtag) & 0xff;
|
||||
return (100 - distance) / 100.0;
|
||||
LocaleDistance.shiftDistance(thresholdDistance), favorSubtag);
|
||||
double distance = LocaleDistance.getDistanceDouble(indexAndDistance);
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("LocaleMatcher distance(desired=%s, supported=%s)=%g\n",
|
||||
Objects.toString(desired), Objects.toString(supported), distance);
|
||||
}
|
||||
return (100.0 - distance) / 100.0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1032,16 +1048,17 @@ public final class LocaleMatcher {
|
|||
@Override
|
||||
public String toString() {
|
||||
StringBuilder s = new StringBuilder().append("{LocaleMatcher");
|
||||
if (supportedULocales.length > 0) {
|
||||
s.append(" supported={").append(supportedULocales[0].toString());
|
||||
for (int i = 1; i < supportedULocales.length; ++i) {
|
||||
s.append(", ").append(supportedULocales[i].toString());
|
||||
// Supported languages in the order that we try to match them.
|
||||
if (supportedLSRs.length > 0) {
|
||||
s.append(" supportedLSRs={").append(supportedLSRs[0].toString());
|
||||
for (int i = 1; i < supportedLSRs.length; ++i) {
|
||||
s.append(", ").append(supportedLSRs[i].toString());
|
||||
}
|
||||
s.append('}');
|
||||
}
|
||||
s.append(" default=").append(Objects.toString(defaultULocale));
|
||||
if (favorSubtag != null) {
|
||||
s.append(" distance=").append(favorSubtag.toString());
|
||||
s.append(" favor=").append(favorSubtag.toString());
|
||||
}
|
||||
if (thresholdDistance >= 0) {
|
||||
s.append(String.format(" threshold=%d", thresholdDistance));
|
||||
|
|
|
@ -1052,9 +1052,9 @@ en >> en-DE
|
|||
ar-EG >> ar-SY
|
||||
pt-BR >> pt
|
||||
ar-XB >> ar-XB
|
||||
ar-PSBIDI >> ar-XB # These are equivalent.
|
||||
ar-PSBIDI >> ar-PSBIDI
|
||||
en-XA >> en-XA
|
||||
en-PSACCENT >> en-XA # These are equivalent.
|
||||
en-PSACCENT >> en-PSACCENT
|
||||
ar-PSCRACK >> ar-PSCRACK
|
||||
|
||||
@favor=script
|
||||
|
@ -1063,9 +1063,9 @@ en >> en-DE
|
|||
ar-EG >> ar-SY
|
||||
pt-BR >> pt
|
||||
ar-XB >> ar-XB
|
||||
ar-PSBIDI >> ar-XB # These are equivalent.
|
||||
ar-PSBIDI >> ar-PSBIDI
|
||||
en-XA >> en-XA
|
||||
en-PSACCENT >> en-XA # These are equivalent.
|
||||
en-PSACCENT >> en-PSACCENT
|
||||
ar-PSCRACK >> ar-PSCRACK
|
||||
|
||||
** test: BestMatchForTraditionalChinese
|
||||
|
@ -1544,50 +1544,44 @@ zh-TW, en >> en-US
|
|||
zh-Hant-CN, en >> en-US
|
||||
zh-Hans, en >> zh-Hans-CN
|
||||
|
||||
** test: return first among likely-subtags equivalent locales
|
||||
# Was: more specific script should win in case regions are identical
|
||||
# with some different results.
|
||||
** test: return most originally similar among likely-subtags equivalent locales
|
||||
@supported=af, af-Latn, af-Arab
|
||||
af >> af
|
||||
af-ZA >> af
|
||||
af-Latn-ZA >> af
|
||||
af-Latn >> af
|
||||
af-Latn-ZA >> af-Latn
|
||||
af-Latn >> af-Latn
|
||||
|
||||
@favor=script
|
||||
af >> af
|
||||
af-ZA >> af
|
||||
af-Latn-ZA >> af
|
||||
af-Latn >> af
|
||||
af-Latn-ZA >> af-Latn
|
||||
af-Latn >> af-Latn
|
||||
|
||||
# Was: more specific region should win
|
||||
# with some different results.
|
||||
@supported=nl, nl-NL, nl-BE
|
||||
@favor=
|
||||
nl >> nl
|
||||
nl-Latn >> nl
|
||||
nl-Latn-NL >> nl
|
||||
nl-NL >> nl
|
||||
nl-Latn-NL >> nl-NL
|
||||
nl-NL >> nl-NL
|
||||
|
||||
@favor=script
|
||||
nl >> nl
|
||||
nl-Latn >> nl
|
||||
nl-Latn-NL >> nl
|
||||
nl-NL >> nl
|
||||
nl-Latn-NL >> nl-NL
|
||||
nl-NL >> nl-NL
|
||||
|
||||
# Was: more specific region wins over more specific script
|
||||
# with some different results.
|
||||
@supported=nl, nl-Latn, nl-NL, nl-BE
|
||||
@favor=
|
||||
nl >> nl
|
||||
nl-Latn >> nl
|
||||
nl-NL >> nl
|
||||
nl-Latn-NL >> nl
|
||||
nl-Latn >> nl-Latn
|
||||
nl-NL >> nl-NL
|
||||
nl-Latn-NL >> nl-Latn
|
||||
|
||||
@favor=script
|
||||
nl >> nl
|
||||
nl-Latn >> nl
|
||||
nl-NL >> nl
|
||||
nl-Latn-NL >> nl
|
||||
nl-Latn >> nl-Latn
|
||||
nl-NL >> nl-NL
|
||||
nl-Latn-NL >> nl-Latn
|
||||
|
||||
** test: region may replace matched if matched is enclosing
|
||||
@supported=es-419, es
|
||||
|
@ -1670,22 +1664,22 @@ ja-Jpan-JP, en-GB >> ja
|
|||
** test: pick best maximized tag
|
||||
@supported=ja, ja-Jpan-US, ja-JP, en, ru
|
||||
ja-Jpan, ru >> ja
|
||||
ja-JP, ru >> ja
|
||||
ja-JP, ru >> ja-JP
|
||||
ja-US, ru >> ja-Jpan-US
|
||||
|
||||
@favor=script
|
||||
ja-Jpan, ru >> ja
|
||||
ja-JP, ru >> ja
|
||||
ja-JP, ru >> ja-JP
|
||||
ja-US, ru >> ja-Jpan-US
|
||||
|
||||
** test: termination: pick best maximized match
|
||||
@supported=ja, ja-Jpan, ja-JP, en, ru
|
||||
ja-Jpan-JP, ru >> ja
|
||||
ja-Jpan, ru >> ja
|
||||
ja-Jpan-JP, ru >> ja-Jpan
|
||||
ja-Jpan, ru >> ja-Jpan
|
||||
|
||||
@favor=script
|
||||
ja-Jpan-JP, ru >> ja
|
||||
ja-Jpan, ru >> ja
|
||||
ja-Jpan-JP, ru >> ja-Jpan
|
||||
ja-Jpan, ru >> ja-Jpan
|
||||
|
||||
** test: same language over exact, but distinguish when user is explicit
|
||||
@supported=fr, en-GB, ja, es-ES, es-MX
|
||||
|
@ -1900,7 +1894,7 @@ zh-TW >> zh
|
|||
** test: testGetBestMatchWithMinMatchScore
|
||||
@supported=fr-FR, fr, fr-CA, en
|
||||
@default=und
|
||||
fr >> fr-FR # First likely-subtags equivalent match is chosen.
|
||||
fr >> fr
|
||||
@supported=en, fr, fr-CA
|
||||
fr-FR >> fr # Parent match is chosen.
|
||||
@supported=en, fr-CA
|
||||
|
@ -1930,7 +1924,7 @@ ru >> und
|
|||
|
||||
@favor=script
|
||||
@supported=fr-FR, fr, fr-CA, en
|
||||
fr >> fr-FR
|
||||
fr >> fr
|
||||
@supported=en, fr, fr-CA
|
||||
fr-FR >> fr
|
||||
@supported=en, fr-CA
|
||||
|
|
|
@ -139,10 +139,11 @@ public class LikelySubtagsBuilder {
|
|||
Map<LSR, Integer> lsrIndexes = new LinkedHashMap<>();
|
||||
// Reserve index 0 as "no value":
|
||||
// The runtime lookup returns 0 for an intermediate match with no value.
|
||||
lsrIndexes.put(new LSR("", "", ""), 0); // arbitrary LSR
|
||||
lsrIndexes.put(new LSR("", "", "", LSR.DONT_CARE_FLAGS), 0); // arbitrary LSR
|
||||
// Reserve index 1 for SKIP_SCRIPT:
|
||||
// The runtime lookup returns 1 for an intermediate match with a value.
|
||||
lsrIndexes.put(new LSR("skip", "script", ""), 1); // looks good when printing the data
|
||||
// This LSR looks good when printing the data.
|
||||
lsrIndexes.put(new LSR("skip", "script", "", LSR.DONT_CARE_FLAGS), 1);
|
||||
// We could prefill the lsrList with common locales to give them small indexes,
|
||||
// and see if that improves performance a little.
|
||||
for (Map.Entry<String, Map<String, Map<String, LSR>>> ls : langTable.entrySet()) {
|
||||
|
@ -251,7 +252,7 @@ public class LikelySubtagsBuilder {
|
|||
}
|
||||
}
|
||||
// hack
|
||||
set(result, "und", "Latn", "", new LSR("en", "Latn", "US"));
|
||||
set(result, "und", "Latn", "", new LSR("en", "Latn", "US", LSR.DONT_CARE_FLAGS));
|
||||
|
||||
// hack, ensure that if und-YY => und-Xxxx-YY, then we add Xxxx=>YY to the table
|
||||
// <likelySubtag from="und_GH" to="ak_Latn_GH"/>
|
||||
|
@ -294,7 +295,9 @@ public class LikelySubtagsBuilder {
|
|||
String lang = parts[0];
|
||||
String p2 = parts.length < 2 ? "" : parts[1];
|
||||
String p3 = parts.length < 3 ? "" : parts[2];
|
||||
return p2.length() < 4 ? new LSR(lang, "", p2) : new LSR(lang, p2, p3);
|
||||
return p2.length() < 4 ?
|
||||
new LSR(lang, "", p2, LSR.DONT_CARE_FLAGS) :
|
||||
new LSR(lang, p2, p3, LSR.DONT_CARE_FLAGS);
|
||||
}
|
||||
|
||||
private static void set(Map<String, Map<String, Map<String, LSR>>> langTable,
|
||||
|
|
|
@ -487,7 +487,10 @@ public final class LocaleDistanceBuilder {
|
|||
Set<LSR> paradigmLSRs = new HashSet<>(); // could be TreeSet if LSR were Comparable
|
||||
for (String paradigm : paradigms) {
|
||||
ULocale pl = new ULocale(paradigm);
|
||||
paradigmLSRs.add(XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(pl));
|
||||
LSR max = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(pl);
|
||||
// Clear the LSR flags to make the data equality test in
|
||||
// LocaleDistanceTest happy.
|
||||
paradigmLSRs.add(new LSR(max.language, max.script, max.region, LSR.DONT_CARE_FLAGS));
|
||||
}
|
||||
|
||||
TerritoryContainment tc = new TerritoryContainment(supplementalData);
|
||||
|
|
Loading…
Add table
Reference in a new issue