ICU-11595 Merging locale matcher's fix from the work branch emmons/t11595 to trunk, with ICU4J data jar files regenerated from the ICU4C trunk.

X-SVN-Rev: 37295
This commit is contained in:
Yoshito Umaoka 2015-03-27 17:57:18 +00:00
parent c9a8c21d51
commit bc3b3a3916
5 changed files with 81 additions and 24 deletions

View file

@ -1,6 +1,6 @@
/*
****************************************************************************************
* Copyright (C) 2009-2014, Google, Inc.; International Business Machines Corporation *
* Copyright (C) 2009-2015, Google, Inc.; International Business Machines Corporation *
* and others. All Rights Reserved. *
****************************************************************************************
*/
@ -11,7 +11,6 @@ import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -19,6 +18,7 @@ import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.Row;
import com.ibm.icu.impl.Row.R2;
import com.ibm.icu.impl.Row.R3;
import com.ibm.icu.impl.Utility;
/**
* Provides a way to match the languages (locales) supported by a product to the
@ -382,6 +382,29 @@ public class LocaleMatcher {
}
return result;
}
/* (non-Javadoc)
* @see java.lang.Object#equals(java.lang.Object)
*/
@Override
public boolean equals(Object obj) {
LocalePatternMatcher other = (LocalePatternMatcher) obj;
return Utility.objectEquals(level, other.level)
&& Utility.objectEquals(lang, other.lang)
&& Utility.objectEquals(script, other.script)
&& Utility.objectEquals(region, other.region);
}
/* (non-Javadoc)
* @see java.lang.Object#hashCode()
*/
@Override
public int hashCode() {
return level.ordinal()
^ (lang == null ? 0 : lang.hashCode())
^ (script == null ? 0 : script.hashCode())
^ (region == null ? 0 : region.hashCode());
}
}
enum Level {
@ -420,7 +443,10 @@ public class LocaleMatcher {
// lang_result.put(supported, result = new LinkedHashSet());
// }
// result.add(data);
scores.add(data);
boolean added = scores.add(data);
if (!added) {
throw new ICUException("trying to add duplicate data: " + data);
}
}
double getScore(ULocale desiredLocale, ULocale dMax, String desiredRaw, String desiredMax,
@ -605,6 +631,7 @@ public class LocaleMatcher {
* @internal
* @deprecated This API is ICU internal only.
*/
@SuppressWarnings("unused")
@Deprecated
private LanguageMatcherData addDistance(String desired, String supported, int percent) {
return addDistance(desired, supported, percent, false, null);
@ -654,12 +681,13 @@ public class LocaleMatcher {
}
R3<LocalePatternMatcher,LocalePatternMatcher,Double> data = Row.of(desiredMatcher, supportedMatcher, score);
R3<LocalePatternMatcher,LocalePatternMatcher,Double> data2 = oneway ? null : Row.of(supportedMatcher, desiredMatcher, score);
boolean desiredEqualsSupported = desiredMatcher.equals(supportedMatcher);
switch (desiredLen) {
case language:
String dlanguage = desiredMatcher.getLanguage();
String slanguage = supportedMatcher.getLanguage();
languageScores.addDataToScores(dlanguage, slanguage, data);
if (!oneway) {
if (!oneway && !desiredEqualsSupported) {
languageScores.addDataToScores(slanguage, dlanguage, data2);
}
break;
@ -667,7 +695,7 @@ public class LocaleMatcher {
String dscript = desiredMatcher.getScript();
String sscript = supportedMatcher.getScript();
scriptScores.addDataToScores(dscript, sscript, data);
if (!oneway) {
if (!oneway && !desiredEqualsSupported) {
scriptScores.addDataToScores(sscript, dscript, data2);
}
break;
@ -675,7 +703,7 @@ public class LocaleMatcher {
String dregion = desiredMatcher.getRegion();
String sregion = supportedMatcher.getRegion();
regionScores.addDataToScores(dregion, sregion, data);
if (!oneway) {
if (!oneway && !desiredEqualsSupported) {
regionScores.addDataToScores(sregion, dregion, data2);
}
break;
@ -832,12 +860,12 @@ public class LocaleMatcher {
defaultWritten = new LanguageMatcherData();
// HACK
// The data coming from ICU may be old, and badly ordered.
TreeSet<DataHack> hack = new TreeSet<DataHack>();
defaultWritten.addDistance("en_*_US", "en_*_*", 97);
defaultWritten.addDistance("en_*_GB", "en_*_*", 98);
defaultWritten.addDistance("es_*_ES", "es_*_*", 97);
defaultWritten.addDistance("es_*_419", "es_*_*", 99);
defaultWritten.addDistance("es_*_*", "es_*_*", 98);
// TreeSet<DataHack> hack = new TreeSet<DataHack>();
// defaultWritten.addDistance("en_*_US", "en_*_*", 97);
// defaultWritten.addDistance("en_*_GB", "en_*_*", 98);
// defaultWritten.addDistance("es_*_ES", "es_*_*", 97);
// defaultWritten.addDistance("es_*_419", "es_*_*", 99);
// defaultWritten.addDistance("es_*_*", "es_*_*", 98);
for(UResourceBundleIterator iter = written.getIterator(); iter.hasNext();) {
ICUResourceBundle item = (ICUResourceBundle) iter.next();
@ -846,11 +874,14 @@ public class LocaleMatcher {
"*_*_*",
"96",
*/
hack.add(new DataHack(item.getString(0), item.getString(1), Integer.parseInt(item.getString(2))));
}
for (DataHack dataHack : hack) {
defaultWritten.addDistance(dataHack.source, dataHack.target, dataHack.percent);
// <languageMatch desired="gsw" supported="de" percent="96" oneway="true" />
boolean oneway = item.getSize() > 3 && "1".equals(item.getString(3));
//hack.add(new DataHack(item.getString(0), item.getString(1), Integer.parseInt(item.getString(2))));
defaultWritten.addDistance(item.getString(0), item.getString(1), Integer.parseInt(item.getString(2)), oneway);
}
// for (DataHack dataHack : hack) {
// defaultWritten.addDistance(dataHack.source, dataHack.target, dataHack.percent);
// }
defaultWritten.freeze();
}

View file

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:63af7b9299539edde5eda9f5217dfdd3d5a9399aea724369460f0bcf3b325552
size 11844298
oid sha256:ec99f0d76c36f19be3534bffbe91e858af779ead34533072eb365c20f96d4224
size 11844585

View file

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2c942526376c88668628fab01d7788ddecc1d41a52e44c1631cde1ebbfee9339
oid sha256:9c5ed5779c8211dd24a2820a71c0f78224158a464a5e4fb0e599f786a60adbaa
size 90539

View file

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:483fd47da6b6dfc63c5306139c2ac120a820f3b7024c89d5707c66581ab6d69d
size 812499
oid sha256:269b1f98365fd3dcdaf78a0a6c9832f12c262553fb12906067ec7bdec2359c15
size 812503

View file

@ -1,6 +1,6 @@
/*
******************************************************************************************
* Copyright (C) 2009-2014, Google, Inc.; International Business Machines Corporation and *
* Copyright (C) 2009-2015, Google, Inc.; International Business Machines Corporation and *
* others. All Rights Reserved. *
******************************************************************************************
*/
@ -285,12 +285,29 @@ public class LocaleMatcherTest extends TestFmwk {
LocaleMatcher matcher = new LocaleMatcher("es_AR, es");
assertEquals("es_AR", matcher.getBestMatch("es_MX").toString());
matcher = new LocaleMatcher("fr, en, en_CA");
assertEquals("en_CA", matcher.getBestMatch("en_GB").toString());
matcher = new LocaleMatcher("fr, en, en_GB");
assertEquals("en_GB", matcher.getBestMatch("en_CA").toString());
matcher = new LocaleMatcher("de_AT, de_DE, de_CH");
assertEquals("de_DE", matcher.getBestMatch("de").toString());
showDistance(matcher, "en", "en_CA");
showDistance(matcher, "en_CA", "en");
showDistance(matcher, "en_US", "en_CA");
showDistance(matcher, "en_CA", "en_US");
showDistance(matcher, "en_GB", "en_CA");
showDistance(matcher, "en_CA", "en_GB");
showDistance(matcher, "en", "en_UM");
showDistance(matcher, "en_UM", "en");
}
private void showDistance(LocaleMatcher matcher, String desired, String supported) {
ULocale desired2 = new ULocale(desired);
ULocale supported2 = new ULocale(supported);
double distance = matcher.match(desired2, ULocale.addLikelySubtags(desired2), supported2, ULocale.addLikelySubtags(supported2));
logln(desired + " to " + supported + " :\t" + distance);
}
/**
* If all the base languages are the same, then each sublocale matches itself most closely
@ -336,6 +353,15 @@ public class LocaleMatcherTest extends TestFmwk {
}
}
public void testAsymmetry() {
LocaleMatcher matcher;
matcher = new LocaleMatcher("mul, nl");
assertEquals("nl", matcher.getBestMatch("af").toString()); // af => nl
matcher = new LocaleMatcher("mul, af");
assertEquals("mul", matcher.getBestMatch("nl").toString()); // but nl !=> af
}
// public void testComputeDistance_monkeyTest() {
// RegionCode[] codes = RegionCode.values();