ICU-21144 LocaleMatcher setMaxDistance(), isMatch()

This commit is contained in:
Markus Scherer 2020-06-22 18:55:04 -07:00
parent 17f889bd0e
commit ef12882fdb
7 changed files with 271 additions and 10 deletions

View file

@ -141,6 +141,8 @@ LocaleMatcher::Builder::Builder(LocaleMatcher::Builder &&src) U_NOEXCEPT :
LocaleMatcher::Builder::~Builder() {
delete supportedLocales_;
delete defaultLocale_;
delete maxDistanceDesired_;
delete maxDistanceSupported_;
}
LocaleMatcher::Builder &LocaleMatcher::Builder::operator=(LocaleMatcher::Builder &&src) U_NOEXCEPT {
@ -267,6 +269,24 @@ LocaleMatcher::Builder &LocaleMatcher::Builder::setDemotionPerDesiredLocale(ULoc
return *this;
}
LocaleMatcher::Builder &LocaleMatcher::Builder::setMaxDistance(const Locale &desired,
const Locale &supported) {
if (U_FAILURE(errorCode_)) { return *this; }
Locale *desiredClone = desired.clone();
Locale *supportedClone = supported.clone();
if (desiredClone == nullptr || supportedClone == nullptr) {
delete desiredClone; // in case only one could not be allocated
delete supportedClone;
errorCode_ = U_MEMORY_ALLOCATION_ERROR;
return *this;
}
delete maxDistanceDesired_;
delete maxDistanceSupported_;
maxDistanceDesired_ = desiredClone;
maxDistanceSupported_ = supportedClone;
return *this;
}
#if 0
/**
* <i>Internal only!</i>
@ -351,9 +371,6 @@ LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
supportedLSRs(nullptr), supportedIndexes(nullptr), supportedLSRsLength(0),
ownedDefaultLocale(nullptr), defaultLocale(nullptr) {
if (U_FAILURE(errorCode)) { return; }
if (thresholdDistance < 0) {
thresholdDistance = localeDistance.getDefaultScriptDistance();
}
const Locale *def = builder.defaultLocale_;
LSR builderDefaultLSR;
const LSR *defLSR = nullptr;
@ -470,6 +487,25 @@ LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
if (builder.demotion_ == ULOCMATCH_DEMOTION_REGION) {
demotionPerDesiredLocale = localeDistance.getDefaultDemotionPerDesiredLocale();
}
if (thresholdDistance >= 0) {
// already copied
} else if (builder.maxDistanceDesired_ != nullptr) {
LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, *builder.maxDistanceSupported_, errorCode);
const LSR *pSuppLSR = &suppLSR;
int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
getMaximalLsrOrUnd(likelySubtags, *builder.maxDistanceDesired_, errorCode),
&pSuppLSR, 1,
LocaleDistance::shiftDistance(100), favorSubtag, direction);
if (U_SUCCESS(errorCode)) {
// +1 for an exclusive threshold from an inclusive max.
thresholdDistance = LocaleDistance::getDistanceFloor(indexAndDistance) + 1;
} else {
thresholdDistance = 0;
}
} else {
thresholdDistance = localeDistance.getDefaultScriptDistance();
}
}
LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT :
@ -695,6 +731,18 @@ int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remai
return supportedIndexes[bestSupportedLsrIndex];
}
UBool LocaleMatcher::isMatch(const Locale &desired, const Locale &supported,
UErrorCode &errorCode) const {
LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
if (U_FAILURE(errorCode)) { return 0; }
const LSR *pSuppLSR = &suppLSR;
int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
&pSuppLSR, 1,
LocaleDistance::shiftDistance(thresholdDistance), favorSubtag, direction);
return indexAndDistance >= 0;
}
double LocaleMatcher::internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const {
// Returns the inverse of the distance: That is, 1-distance(desired, supported).
LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);

View file

@ -39,6 +39,10 @@ public:
return shiftedDistance / (1 << DISTANCE_SHIFT);
}
static int32_t getDistanceFloor(int32_t indexAndDistance) {
return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
}
static int32_t getIndex(int32_t indexAndDistance) {
// assert indexAndDistance >= 0;
return indexAndDistance >> INDEX_SHIFT;
@ -79,10 +83,6 @@ private:
// tic constexpr int32_t MAX_INDEX = 0x1fffff; // avoids sign bit
static constexpr int32_t INDEX_NEG_1 = 0xfffffc00;
static int32_t getDistanceFloor(int32_t indexAndDistance) {
return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
}
LocaleDistance(const LocaleDistanceData &data, const XLikelySubtags &likely);
LocaleDistance(const LocaleDistance &other) = delete;
LocaleDistance &operator=(const LocaleDistance &other) = delete;

View file

@ -480,6 +480,31 @@ public:
return *this;
}
#ifndef U_HIDE_DRAFT_API
/**
* Sets the maximum distance for an acceptable match.
* The matcher will return a match for a pair of locales only if
* they match at least as well as the pair given here.
*
* For example, setMaxDistance(en-US, en-GB) limits matches to ones where the
* (desired, support) locales have a distance no greater than a region subtag difference.
* This is much stricter than the CLDR default.
*
* The details of locale matching are subject to changes in
* CLDR data and in the algorithm.
* Specifying a maximum distance in relative terms via a sample pair of locales
* insulates from changes that affect all distance metrics similarly,
* but some changes will necessarily affect relative distances between
* different pairs of locales.
*
* @param desired the desired locale for distance comparison.
* @param supported the supported locale for distance comparison.
* @return this Builder object
* @draft ICU 68
*/
Builder &setMaxDistance(const Locale &desired, const Locale &supported);
#endif // U_HIDE_DRAFT_API
/**
* Sets the UErrorCode if an error occurred while setting parameters.
* Preserves older error codes in the outErrorCode.
@ -522,6 +547,8 @@ public:
bool withDefault_ = true;
ULocMatchFavorSubtag favor_ = ULOCMATCH_FAVOR_LANGUAGE;
ULocMatchDirection direction_ = ULOCMATCH_DIRECTION_WITH_ONE_WAY;
Locale *maxDistanceDesired_ = nullptr;
Locale *maxDistanceSupported_ = nullptr;
};
// FYI No public LocaleMatcher constructors in C++; use the Builder.
@ -620,6 +647,23 @@ public:
Result getBestMatchResult(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DRAFT_API
/**
* Returns true if the pair of locales matches acceptably.
* This is influenced by Builder options such as setDirection(), setFavorSubtag(),
* and setMaxDistance().
*
* @param desired The desired locale.
* @param supported The supported locale.
* @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
* or else the function returns immediately. Check for U_FAILURE()
* on output or use with function chaining. (See User Guide for details.)
* @return true if the pair of locales matches acceptably.
* @draft ICU 68
*/
UBool isMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const;
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_INTERNAL_API
/**
* Returns a fraction between 0 and 1, where 1 means that the languages are a

View file

@ -61,6 +61,7 @@ public:
void testNoDefault();
void testDemotion();
void testDirection();
void testMaxDistanceAndIsMatch();
void testMatch();
void testResolvedLocale();
void testDataDriven();
@ -86,6 +87,7 @@ void LocaleMatcherTest::runIndexedTest(int32_t index, UBool exec, const char *&n
TESTCASE_AUTO(testNoDefault);
TESTCASE_AUTO(testDemotion);
TESTCASE_AUTO(testDirection);
TESTCASE_AUTO(testMaxDistanceAndIsMatch);
TESTCASE_AUTO(testMatch);
TESTCASE_AUTO(testResolvedLocale);
TESTCASE_AUTO(testDataDriven);
@ -380,6 +382,36 @@ void LocaleMatcherTest::testDirection() {
}
}
void LocaleMatcherTest::testMaxDistanceAndIsMatch() {
IcuTestErrorCode errorCode(*this, "testMaxDistanceAndIsMatch");
LocaleMatcher::Builder builder;
LocaleMatcher standard = builder.build(errorCode);
Locale germanLux("de-LU");
Locale germanPhoenician("de-Phnx-AT");
Locale greek("el");
assertTrue("standard de-LU / de", standard.isMatch(germanLux, Locale::getGerman(), errorCode));
assertFalse("standard de-Phnx-AT / de",
standard.isMatch(germanPhoenician, Locale::getGerman(), errorCode));
// Allow a script difference to still match.
LocaleMatcher loose =
builder.setMaxDistance(germanPhoenician, Locale::getGerman()).build(errorCode);
assertTrue("loose de-LU / de", loose.isMatch(germanLux, Locale::getGerman(), errorCode));
assertTrue("loose de-Phnx-AT / de",
loose.isMatch(germanPhoenician, Locale::getGerman(), errorCode));
assertFalse("loose el / de", loose.isMatch(greek, Locale::getGerman(), errorCode));
// Allow at most a regional difference.
LocaleMatcher regional =
builder.setMaxDistance(Locale("de-AT"), Locale::getGerman()).build(errorCode);
assertTrue("regional de-LU / de",
regional.isMatch(Locale("de-LU"), Locale::getGerman(), errorCode));
assertFalse("regional da / no", regional.isMatch(Locale("da"), Locale("no"), errorCode));
assertFalse("regional zh-Hant / zh",
regional.isMatch(Locale::getChinese(), Locale::getTraditionalChinese(), errorCode));
}
void LocaleMatcherTest::testMatch() {
IcuTestErrorCode errorCode(*this, "testMatch");
LocaleMatcher matcher = LocaleMatcher::Builder().build(errorCode);

View file

@ -92,7 +92,7 @@ public class LocaleDistance {
return shiftedDistance / (1 << DISTANCE_SHIFT);
}
private static final int getDistanceFloor(int indexAndDistance) {
public static final int getDistanceFloor(int indexAndDistance) {
return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
}

View file

@ -385,6 +385,8 @@ public final class LocaleMatcher {
private boolean withDefault = true;
private FavorSubtag favor;
private Direction direction;
private ULocale maxDistanceDesired;
private ULocale maxDistanceSupported;
private Builder() {}
@ -557,6 +559,66 @@ public final class LocaleMatcher {
return this;
}
/**
* Sets the maximum distance for an acceptable match.
* The matcher will return a match for a pair of locales only if
* they match at least as well as the pair given here.
*
* <p>For example, setMaxDistance(en-US, en-GB) limits matches to ones where the
* (desired, support) locales have a distance no greater than a region subtag difference.
* This is much stricter than the CLDR default.
*
* <p>The details of locale matching are subject to changes in
* CLDR data and in the algorithm.
* Specifying a maximum distance in relative terms via a sample pair of locales
* insulates from changes that affect all distance metrics similarly,
* but some changes will necessarily affect relative distances between
* different pairs of locales.
*
* @param desired the desired locale for distance comparison.
* @param supported the supported locale for distance comparison.
* @return this Builder object
* @draft ICU 68
* @provisional This API might change or be removed in a future release.
*/
public Builder setMaxDistance(Locale desired, Locale supported) {
if (desired == null || supported == null) {
throw new IllegalArgumentException("desired/supported locales must not be null");
}
return setMaxDistance(ULocale.forLocale(desired), ULocale.forLocale(supported));
}
/**
* Sets the maximum distance for an acceptable match.
* The matcher will return a match for a pair of locales only if
* they match at least as well as the pair given here.
*
* <p>For example, setMaxDistance(en-US, en-GB) limits matches to ones where the
* (desired, support) locales have a distance no greater than a region subtag difference.
* This is much stricter than the CLDR default.
*
* <p>The details of locale matching are subject to changes in
* CLDR data and in the algorithm.
* Specifying a maximum distance in relative terms via a sample pair of locales
* insulates from changes that affect all distance metrics similarly,
* but some changes will necessarily affect relative distances between
* different pairs of locales.
*
* @param desired the desired locale for distance comparison.
* @param supported the supported locale for distance comparison.
* @return this Builder object
* @draft ICU 68
* @provisional This API might change or be removed in a future release.
*/
public Builder setMaxDistance(ULocale desired, ULocale supported) {
if (desired == null || supported == null) {
throw new IllegalArgumentException("desired/supported locales must not be null");
}
maxDistanceDesired = desired;
maxDistanceSupported = supported;
return this;
}
/**
* <i>Internal only!</i>
*
@ -650,8 +712,6 @@ public final class LocaleMatcher {
}
private LocaleMatcher(Builder builder) {
thresholdDistance = builder.thresholdDistance < 0 ?
LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
ULocale udef = builder.defaultLocale;
Locale def = null;
LSR defLSR = null;
@ -737,6 +797,22 @@ public final class LocaleMatcher {
LocaleDistance.INSTANCE.getDefaultDemotionPerDesiredLocale(); // null or REGION
favorSubtag = builder.favor;
direction = builder.direction;
int threshold;
if (builder.thresholdDistance >= 0) {
threshold = builder.thresholdDistance;
} else if (builder.maxDistanceDesired != null) {
int indexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
getMaximalLsrOrUnd(builder.maxDistanceDesired),
new LSR[] { getMaximalLsrOrUnd(builder.maxDistanceSupported) }, 1,
LocaleDistance.shiftDistance(100), favorSubtag, direction);
// +1 for an exclusive threshold from an inclusive max.
threshold = LocaleDistance.getDistanceFloor(indexAndDistance) + 1;
} else {
threshold = LocaleDistance.INSTANCE.getDefaultScriptDistance();
}
thresholdDistance = threshold;
if (TRACE_MATCHER) {
System.err.printf("new LocaleMatcher: %s\n", toString());
}
@ -1051,6 +1127,44 @@ public final class LocaleMatcher {
return suppIndex;
}
/**
* Returns true if the pair of locales matches acceptably.
* This is influenced by Builder options such as setDirection(), setFavorSubtag(),
* and setMaxDistance().
*
* @param desired The desired locale.
* @param supported The supported locale.
* @return true if the pair of locales matches acceptably.
* @draft ICU 68
* @provisional This API might change or be removed in a future release.
*/
public boolean isMatch(Locale desired, Locale supported) {
int indexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
getMaximalLsrOrUnd(desired),
new LSR[] { getMaximalLsrOrUnd(supported) }, 1,
LocaleDistance.shiftDistance(thresholdDistance), favorSubtag, direction);
return indexAndDistance >= 0;
}
/**
* Returns true if the pair of locales matches acceptably.
* This is influenced by Builder options such as setDirection(), setFavorSubtag(),
* and setMaxDistance().
*
* @param desired The desired locale.
* @param supported The supported locale.
* @return true if the pair of locales matches acceptably.
* @draft ICU 68
* @provisional This API might change or be removed in a future release.
*/
public boolean isMatch(ULocale desired, ULocale supported) {
int indexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
getMaximalLsrOrUnd(desired),
new LSR[] { getMaximalLsrOrUnd(supported) }, 1,
LocaleDistance.shiftDistance(thresholdDistance), favorSubtag, direction);
return indexAndDistance >= 0;
}
/**
* Returns a fraction between 0 and 1, where 1 means that the languages are a
* perfect match, and 0 means that they are completely different.

View file

@ -677,6 +677,29 @@ public class LocaleMatcherTest extends TestFmwk {
assertEquals("only two-way", "nn", onlyTwoWay.getBestMatch(desired).toString());
}
@Test
public void testMaxDistanceAndIsMatch() {
LocaleMatcher.Builder builder = LocaleMatcher.builder();
LocaleMatcher standard = builder.build();
ULocale germanLux = new ULocale("de-LU");
ULocale germanPhoenician = new ULocale("de-Phnx-AT");
ULocale greek = new ULocale("el");
assertTrue("standard de-LU / de", standard.isMatch(germanLux, ULocale.GERMAN));
assertFalse("standard de-Phnx-AT / de", standard.isMatch(germanPhoenician, ULocale.GERMAN));
// Allow a script difference to still match.
LocaleMatcher loose = builder.setMaxDistance(germanPhoenician, ULocale.GERMAN).build();
assertTrue("loose de-LU / de", loose.isMatch(germanLux, ULocale.GERMAN));
assertTrue("loose de-Phnx-AT / de", loose.isMatch(germanPhoenician, ULocale.GERMAN));
assertFalse("loose el / de", loose.isMatch(greek, ULocale.GERMAN));
// Allow at most a regional difference.
LocaleMatcher regional = builder.setMaxDistance(new Locale("de", "AT"), Locale.GERMAN).build();
assertTrue("regional de-LU / de", regional.isMatch(new Locale("de", "LU"), Locale.GERMAN));
assertFalse("regional da / no", regional.isMatch(new Locale("da"), new Locale("no")));
assertFalse("regional zh-Hant / zh", regional.isMatch(Locale.CHINESE, Locale.TRADITIONAL_CHINESE));
}
@Test
public void testCanonicalize() {
LocaleMatcher matcher = LocaleMatcher.builder().build();