mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 14:05:32 +00:00
ICU-21144 LocaleMatcher setMaxDistance(), isMatch()
This commit is contained in:
parent
17f889bd0e
commit
ef12882fdb
7 changed files with 271 additions and 10 deletions
|
@ -141,6 +141,8 @@ LocaleMatcher::Builder::Builder(LocaleMatcher::Builder &&src) U_NOEXCEPT :
|
|||
LocaleMatcher::Builder::~Builder() {
|
||||
delete supportedLocales_;
|
||||
delete defaultLocale_;
|
||||
delete maxDistanceDesired_;
|
||||
delete maxDistanceSupported_;
|
||||
}
|
||||
|
||||
LocaleMatcher::Builder &LocaleMatcher::Builder::operator=(LocaleMatcher::Builder &&src) U_NOEXCEPT {
|
||||
|
@ -267,6 +269,24 @@ LocaleMatcher::Builder &LocaleMatcher::Builder::setDemotionPerDesiredLocale(ULoc
|
|||
return *this;
|
||||
}
|
||||
|
||||
LocaleMatcher::Builder &LocaleMatcher::Builder::setMaxDistance(const Locale &desired,
|
||||
const Locale &supported) {
|
||||
if (U_FAILURE(errorCode_)) { return *this; }
|
||||
Locale *desiredClone = desired.clone();
|
||||
Locale *supportedClone = supported.clone();
|
||||
if (desiredClone == nullptr || supportedClone == nullptr) {
|
||||
delete desiredClone; // in case only one could not be allocated
|
||||
delete supportedClone;
|
||||
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
|
||||
return *this;
|
||||
}
|
||||
delete maxDistanceDesired_;
|
||||
delete maxDistanceSupported_;
|
||||
maxDistanceDesired_ = desiredClone;
|
||||
maxDistanceSupported_ = supportedClone;
|
||||
return *this;
|
||||
}
|
||||
|
||||
#if 0
|
||||
/**
|
||||
* <i>Internal only!</i>
|
||||
|
@ -351,9 +371,6 @@ LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
|
|||
supportedLSRs(nullptr), supportedIndexes(nullptr), supportedLSRsLength(0),
|
||||
ownedDefaultLocale(nullptr), defaultLocale(nullptr) {
|
||||
if (U_FAILURE(errorCode)) { return; }
|
||||
if (thresholdDistance < 0) {
|
||||
thresholdDistance = localeDistance.getDefaultScriptDistance();
|
||||
}
|
||||
const Locale *def = builder.defaultLocale_;
|
||||
LSR builderDefaultLSR;
|
||||
const LSR *defLSR = nullptr;
|
||||
|
@ -470,6 +487,25 @@ LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
|
|||
if (builder.demotion_ == ULOCMATCH_DEMOTION_REGION) {
|
||||
demotionPerDesiredLocale = localeDistance.getDefaultDemotionPerDesiredLocale();
|
||||
}
|
||||
|
||||
if (thresholdDistance >= 0) {
|
||||
// already copied
|
||||
} else if (builder.maxDistanceDesired_ != nullptr) {
|
||||
LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, *builder.maxDistanceSupported_, errorCode);
|
||||
const LSR *pSuppLSR = &suppLSR;
|
||||
int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
|
||||
getMaximalLsrOrUnd(likelySubtags, *builder.maxDistanceDesired_, errorCode),
|
||||
&pSuppLSR, 1,
|
||||
LocaleDistance::shiftDistance(100), favorSubtag, direction);
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
// +1 for an exclusive threshold from an inclusive max.
|
||||
thresholdDistance = LocaleDistance::getDistanceFloor(indexAndDistance) + 1;
|
||||
} else {
|
||||
thresholdDistance = 0;
|
||||
}
|
||||
} else {
|
||||
thresholdDistance = localeDistance.getDefaultScriptDistance();
|
||||
}
|
||||
}
|
||||
|
||||
LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT :
|
||||
|
@ -695,6 +731,18 @@ int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remai
|
|||
return supportedIndexes[bestSupportedLsrIndex];
|
||||
}
|
||||
|
||||
UBool LocaleMatcher::isMatch(const Locale &desired, const Locale &supported,
|
||||
UErrorCode &errorCode) const {
|
||||
LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
|
||||
if (U_FAILURE(errorCode)) { return 0; }
|
||||
const LSR *pSuppLSR = &suppLSR;
|
||||
int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
|
||||
getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
|
||||
&pSuppLSR, 1,
|
||||
LocaleDistance::shiftDistance(thresholdDistance), favorSubtag, direction);
|
||||
return indexAndDistance >= 0;
|
||||
}
|
||||
|
||||
double LocaleMatcher::internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const {
|
||||
// Returns the inverse of the distance: That is, 1-distance(desired, supported).
|
||||
LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
|
||||
|
|
|
@ -39,6 +39,10 @@ public:
|
|||
return shiftedDistance / (1 << DISTANCE_SHIFT);
|
||||
}
|
||||
|
||||
static int32_t getDistanceFloor(int32_t indexAndDistance) {
|
||||
return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
|
||||
}
|
||||
|
||||
static int32_t getIndex(int32_t indexAndDistance) {
|
||||
// assert indexAndDistance >= 0;
|
||||
return indexAndDistance >> INDEX_SHIFT;
|
||||
|
@ -79,10 +83,6 @@ private:
|
|||
// tic constexpr int32_t MAX_INDEX = 0x1fffff; // avoids sign bit
|
||||
static constexpr int32_t INDEX_NEG_1 = 0xfffffc00;
|
||||
|
||||
static int32_t getDistanceFloor(int32_t indexAndDistance) {
|
||||
return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
|
||||
}
|
||||
|
||||
LocaleDistance(const LocaleDistanceData &data, const XLikelySubtags &likely);
|
||||
LocaleDistance(const LocaleDistance &other) = delete;
|
||||
LocaleDistance &operator=(const LocaleDistance &other) = delete;
|
||||
|
|
|
@ -480,6 +480,31 @@ public:
|
|||
return *this;
|
||||
}
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Sets the maximum distance for an acceptable match.
|
||||
* The matcher will return a match for a pair of locales only if
|
||||
* they match at least as well as the pair given here.
|
||||
*
|
||||
* For example, setMaxDistance(en-US, en-GB) limits matches to ones where the
|
||||
* (desired, support) locales have a distance no greater than a region subtag difference.
|
||||
* This is much stricter than the CLDR default.
|
||||
*
|
||||
* The details of locale matching are subject to changes in
|
||||
* CLDR data and in the algorithm.
|
||||
* Specifying a maximum distance in relative terms via a sample pair of locales
|
||||
* insulates from changes that affect all distance metrics similarly,
|
||||
* but some changes will necessarily affect relative distances between
|
||||
* different pairs of locales.
|
||||
*
|
||||
* @param desired the desired locale for distance comparison.
|
||||
* @param supported the supported locale for distance comparison.
|
||||
* @return this Builder object
|
||||
* @draft ICU 68
|
||||
*/
|
||||
Builder &setMaxDistance(const Locale &desired, const Locale &supported);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Sets the UErrorCode if an error occurred while setting parameters.
|
||||
* Preserves older error codes in the outErrorCode.
|
||||
|
@ -522,6 +547,8 @@ public:
|
|||
bool withDefault_ = true;
|
||||
ULocMatchFavorSubtag favor_ = ULOCMATCH_FAVOR_LANGUAGE;
|
||||
ULocMatchDirection direction_ = ULOCMATCH_DIRECTION_WITH_ONE_WAY;
|
||||
Locale *maxDistanceDesired_ = nullptr;
|
||||
Locale *maxDistanceSupported_ = nullptr;
|
||||
};
|
||||
|
||||
// FYI No public LocaleMatcher constructors in C++; use the Builder.
|
||||
|
@ -620,6 +647,23 @@ public:
|
|||
Result getBestMatchResult(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Returns true if the pair of locales matches acceptably.
|
||||
* This is influenced by Builder options such as setDirection(), setFavorSubtag(),
|
||||
* and setMaxDistance().
|
||||
*
|
||||
* @param desired The desired locale.
|
||||
* @param supported The supported locale.
|
||||
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
|
||||
* or else the function returns immediately. Check for U_FAILURE()
|
||||
* on output or use with function chaining. (See User Guide for details.)
|
||||
* @return true if the pair of locales matches acceptably.
|
||||
* @draft ICU 68
|
||||
*/
|
||||
UBool isMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const;
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Returns a fraction between 0 and 1, where 1 means that the languages are a
|
||||
|
|
|
@ -61,6 +61,7 @@ public:
|
|||
void testNoDefault();
|
||||
void testDemotion();
|
||||
void testDirection();
|
||||
void testMaxDistanceAndIsMatch();
|
||||
void testMatch();
|
||||
void testResolvedLocale();
|
||||
void testDataDriven();
|
||||
|
@ -86,6 +87,7 @@ void LocaleMatcherTest::runIndexedTest(int32_t index, UBool exec, const char *&n
|
|||
TESTCASE_AUTO(testNoDefault);
|
||||
TESTCASE_AUTO(testDemotion);
|
||||
TESTCASE_AUTO(testDirection);
|
||||
TESTCASE_AUTO(testMaxDistanceAndIsMatch);
|
||||
TESTCASE_AUTO(testMatch);
|
||||
TESTCASE_AUTO(testResolvedLocale);
|
||||
TESTCASE_AUTO(testDataDriven);
|
||||
|
@ -380,6 +382,36 @@ void LocaleMatcherTest::testDirection() {
|
|||
}
|
||||
}
|
||||
|
||||
void LocaleMatcherTest::testMaxDistanceAndIsMatch() {
|
||||
IcuTestErrorCode errorCode(*this, "testMaxDistanceAndIsMatch");
|
||||
LocaleMatcher::Builder builder;
|
||||
LocaleMatcher standard = builder.build(errorCode);
|
||||
Locale germanLux("de-LU");
|
||||
Locale germanPhoenician("de-Phnx-AT");
|
||||
Locale greek("el");
|
||||
assertTrue("standard de-LU / de", standard.isMatch(germanLux, Locale::getGerman(), errorCode));
|
||||
assertFalse("standard de-Phnx-AT / de",
|
||||
standard.isMatch(germanPhoenician, Locale::getGerman(), errorCode));
|
||||
|
||||
// Allow a script difference to still match.
|
||||
LocaleMatcher loose =
|
||||
builder.setMaxDistance(germanPhoenician, Locale::getGerman()).build(errorCode);
|
||||
assertTrue("loose de-LU / de", loose.isMatch(germanLux, Locale::getGerman(), errorCode));
|
||||
assertTrue("loose de-Phnx-AT / de",
|
||||
loose.isMatch(germanPhoenician, Locale::getGerman(), errorCode));
|
||||
assertFalse("loose el / de", loose.isMatch(greek, Locale::getGerman(), errorCode));
|
||||
|
||||
// Allow at most a regional difference.
|
||||
LocaleMatcher regional =
|
||||
builder.setMaxDistance(Locale("de-AT"), Locale::getGerman()).build(errorCode);
|
||||
assertTrue("regional de-LU / de",
|
||||
regional.isMatch(Locale("de-LU"), Locale::getGerman(), errorCode));
|
||||
assertFalse("regional da / no", regional.isMatch(Locale("da"), Locale("no"), errorCode));
|
||||
assertFalse("regional zh-Hant / zh",
|
||||
regional.isMatch(Locale::getChinese(), Locale::getTraditionalChinese(), errorCode));
|
||||
}
|
||||
|
||||
|
||||
void LocaleMatcherTest::testMatch() {
|
||||
IcuTestErrorCode errorCode(*this, "testMatch");
|
||||
LocaleMatcher matcher = LocaleMatcher::Builder().build(errorCode);
|
||||
|
|
|
@ -92,7 +92,7 @@ public class LocaleDistance {
|
|||
return shiftedDistance / (1 << DISTANCE_SHIFT);
|
||||
}
|
||||
|
||||
private static final int getDistanceFloor(int indexAndDistance) {
|
||||
public static final int getDistanceFloor(int indexAndDistance) {
|
||||
return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
|
||||
}
|
||||
|
||||
|
|
|
@ -385,6 +385,8 @@ public final class LocaleMatcher {
|
|||
private boolean withDefault = true;
|
||||
private FavorSubtag favor;
|
||||
private Direction direction;
|
||||
private ULocale maxDistanceDesired;
|
||||
private ULocale maxDistanceSupported;
|
||||
|
||||
private Builder() {}
|
||||
|
||||
|
@ -557,6 +559,66 @@ public final class LocaleMatcher {
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the maximum distance for an acceptable match.
|
||||
* The matcher will return a match for a pair of locales only if
|
||||
* they match at least as well as the pair given here.
|
||||
*
|
||||
* <p>For example, setMaxDistance(en-US, en-GB) limits matches to ones where the
|
||||
* (desired, support) locales have a distance no greater than a region subtag difference.
|
||||
* This is much stricter than the CLDR default.
|
||||
*
|
||||
* <p>The details of locale matching are subject to changes in
|
||||
* CLDR data and in the algorithm.
|
||||
* Specifying a maximum distance in relative terms via a sample pair of locales
|
||||
* insulates from changes that affect all distance metrics similarly,
|
||||
* but some changes will necessarily affect relative distances between
|
||||
* different pairs of locales.
|
||||
*
|
||||
* @param desired the desired locale for distance comparison.
|
||||
* @param supported the supported locale for distance comparison.
|
||||
* @return this Builder object
|
||||
* @draft ICU 68
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public Builder setMaxDistance(Locale desired, Locale supported) {
|
||||
if (desired == null || supported == null) {
|
||||
throw new IllegalArgumentException("desired/supported locales must not be null");
|
||||
}
|
||||
return setMaxDistance(ULocale.forLocale(desired), ULocale.forLocale(supported));
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the maximum distance for an acceptable match.
|
||||
* The matcher will return a match for a pair of locales only if
|
||||
* they match at least as well as the pair given here.
|
||||
*
|
||||
* <p>For example, setMaxDistance(en-US, en-GB) limits matches to ones where the
|
||||
* (desired, support) locales have a distance no greater than a region subtag difference.
|
||||
* This is much stricter than the CLDR default.
|
||||
*
|
||||
* <p>The details of locale matching are subject to changes in
|
||||
* CLDR data and in the algorithm.
|
||||
* Specifying a maximum distance in relative terms via a sample pair of locales
|
||||
* insulates from changes that affect all distance metrics similarly,
|
||||
* but some changes will necessarily affect relative distances between
|
||||
* different pairs of locales.
|
||||
*
|
||||
* @param desired the desired locale for distance comparison.
|
||||
* @param supported the supported locale for distance comparison.
|
||||
* @return this Builder object
|
||||
* @draft ICU 68
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public Builder setMaxDistance(ULocale desired, ULocale supported) {
|
||||
if (desired == null || supported == null) {
|
||||
throw new IllegalArgumentException("desired/supported locales must not be null");
|
||||
}
|
||||
maxDistanceDesired = desired;
|
||||
maxDistanceSupported = supported;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* <i>Internal only!</i>
|
||||
*
|
||||
|
@ -650,8 +712,6 @@ public final class LocaleMatcher {
|
|||
}
|
||||
|
||||
private LocaleMatcher(Builder builder) {
|
||||
thresholdDistance = builder.thresholdDistance < 0 ?
|
||||
LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
|
||||
ULocale udef = builder.defaultLocale;
|
||||
Locale def = null;
|
||||
LSR defLSR = null;
|
||||
|
@ -737,6 +797,22 @@ public final class LocaleMatcher {
|
|||
LocaleDistance.INSTANCE.getDefaultDemotionPerDesiredLocale(); // null or REGION
|
||||
favorSubtag = builder.favor;
|
||||
direction = builder.direction;
|
||||
|
||||
int threshold;
|
||||
if (builder.thresholdDistance >= 0) {
|
||||
threshold = builder.thresholdDistance;
|
||||
} else if (builder.maxDistanceDesired != null) {
|
||||
int indexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
|
||||
getMaximalLsrOrUnd(builder.maxDistanceDesired),
|
||||
new LSR[] { getMaximalLsrOrUnd(builder.maxDistanceSupported) }, 1,
|
||||
LocaleDistance.shiftDistance(100), favorSubtag, direction);
|
||||
// +1 for an exclusive threshold from an inclusive max.
|
||||
threshold = LocaleDistance.getDistanceFloor(indexAndDistance) + 1;
|
||||
} else {
|
||||
threshold = LocaleDistance.INSTANCE.getDefaultScriptDistance();
|
||||
}
|
||||
thresholdDistance = threshold;
|
||||
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("new LocaleMatcher: %s\n", toString());
|
||||
}
|
||||
|
@ -1051,6 +1127,44 @@ public final class LocaleMatcher {
|
|||
return suppIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the pair of locales matches acceptably.
|
||||
* This is influenced by Builder options such as setDirection(), setFavorSubtag(),
|
||||
* and setMaxDistance().
|
||||
*
|
||||
* @param desired The desired locale.
|
||||
* @param supported The supported locale.
|
||||
* @return true if the pair of locales matches acceptably.
|
||||
* @draft ICU 68
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public boolean isMatch(Locale desired, Locale supported) {
|
||||
int indexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
|
||||
getMaximalLsrOrUnd(desired),
|
||||
new LSR[] { getMaximalLsrOrUnd(supported) }, 1,
|
||||
LocaleDistance.shiftDistance(thresholdDistance), favorSubtag, direction);
|
||||
return indexAndDistance >= 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the pair of locales matches acceptably.
|
||||
* This is influenced by Builder options such as setDirection(), setFavorSubtag(),
|
||||
* and setMaxDistance().
|
||||
*
|
||||
* @param desired The desired locale.
|
||||
* @param supported The supported locale.
|
||||
* @return true if the pair of locales matches acceptably.
|
||||
* @draft ICU 68
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
public boolean isMatch(ULocale desired, ULocale supported) {
|
||||
int indexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
|
||||
getMaximalLsrOrUnd(desired),
|
||||
new LSR[] { getMaximalLsrOrUnd(supported) }, 1,
|
||||
LocaleDistance.shiftDistance(thresholdDistance), favorSubtag, direction);
|
||||
return indexAndDistance >= 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a fraction between 0 and 1, where 1 means that the languages are a
|
||||
* perfect match, and 0 means that they are completely different.
|
||||
|
|
|
@ -677,6 +677,29 @@ public class LocaleMatcherTest extends TestFmwk {
|
|||
assertEquals("only two-way", "nn", onlyTwoWay.getBestMatch(desired).toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaxDistanceAndIsMatch() {
|
||||
LocaleMatcher.Builder builder = LocaleMatcher.builder();
|
||||
LocaleMatcher standard = builder.build();
|
||||
ULocale germanLux = new ULocale("de-LU");
|
||||
ULocale germanPhoenician = new ULocale("de-Phnx-AT");
|
||||
ULocale greek = new ULocale("el");
|
||||
assertTrue("standard de-LU / de", standard.isMatch(germanLux, ULocale.GERMAN));
|
||||
assertFalse("standard de-Phnx-AT / de", standard.isMatch(germanPhoenician, ULocale.GERMAN));
|
||||
|
||||
// Allow a script difference to still match.
|
||||
LocaleMatcher loose = builder.setMaxDistance(germanPhoenician, ULocale.GERMAN).build();
|
||||
assertTrue("loose de-LU / de", loose.isMatch(germanLux, ULocale.GERMAN));
|
||||
assertTrue("loose de-Phnx-AT / de", loose.isMatch(germanPhoenician, ULocale.GERMAN));
|
||||
assertFalse("loose el / de", loose.isMatch(greek, ULocale.GERMAN));
|
||||
|
||||
// Allow at most a regional difference.
|
||||
LocaleMatcher regional = builder.setMaxDistance(new Locale("de", "AT"), Locale.GERMAN).build();
|
||||
assertTrue("regional de-LU / de", regional.isMatch(new Locale("de", "LU"), Locale.GERMAN));
|
||||
assertFalse("regional da / no", regional.isMatch(new Locale("da"), new Locale("no")));
|
||||
assertFalse("regional zh-Hant / zh", regional.isMatch(Locale.CHINESE, Locale.TRADITIONAL_CHINESE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCanonicalize() {
|
||||
LocaleMatcher matcher = LocaleMatcher.builder().build();
|
||||
|
|
Loading…
Add table
Reference in a new issue