mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-4229 Enhanced the checking further.
X-SVN-Rev: 38128
This commit is contained in:
parent
bb50b15953
commit
565abe4808
2 changed files with 132 additions and 40 deletions
|
@ -93,7 +93,7 @@ public class LocaleValidityChecker {
|
|||
if (!isValidT(locale.getExtension(c), where)) return false;
|
||||
break;
|
||||
case u:
|
||||
if (!isValidU(locale.getExtension(c), where)) return false;
|
||||
if (!isValidU(locale, locale.getExtension(c), where)) return false;
|
||||
break;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
|
@ -104,12 +104,14 @@ public class LocaleValidityChecker {
|
|||
}
|
||||
|
||||
enum SpecialCase {
|
||||
normal, anything, reorder, codepoints;
|
||||
normal, anything, reorder, codepoints, subdivision;
|
||||
static SpecialCase get(String key) {
|
||||
if (key.equals("kr")) {
|
||||
return SpecialCase.reorder;
|
||||
} else if (key.equals("vt")) {
|
||||
return SpecialCase.codepoints;
|
||||
} else if (key.equals("sd")) {
|
||||
return subdivision;
|
||||
} else if (key.equals("x0")) {
|
||||
return anything;
|
||||
} else {
|
||||
|
@ -118,15 +120,17 @@ public class LocaleValidityChecker {
|
|||
}
|
||||
}
|
||||
/**
|
||||
* @param locale
|
||||
* @param extension
|
||||
* @param where
|
||||
* @return
|
||||
*/
|
||||
private boolean isValidU(String extensionString, Where where) {
|
||||
private boolean isValidU(ULocale locale, String extensionString, Where where) {
|
||||
String key = "";
|
||||
int typeCount = 0;
|
||||
ValueType valueType = null;
|
||||
SpecialCase specialCase = null;
|
||||
StringBuilder prefix = new StringBuilder();
|
||||
// TODO: is empty -u- valid?
|
||||
for (String subtag : SEPARATOR.split(extensionString)) {
|
||||
if (subtag.length() == 2) {
|
||||
|
@ -142,8 +146,20 @@ public class LocaleValidityChecker {
|
|||
typeCount = 0;
|
||||
} else {
|
||||
++typeCount;
|
||||
if (valueType == ValueType.single && typeCount > 1) {
|
||||
return where.set(Datatype.u, key+"-"+subtag);
|
||||
switch (valueType) {
|
||||
case single:
|
||||
if (typeCount > 1) {
|
||||
return where.set(Datatype.u, key+"-"+subtag);
|
||||
}
|
||||
break;
|
||||
case incremental:
|
||||
if (typeCount == 1) {
|
||||
prefix.setLength(0);
|
||||
prefix.append(subtag);
|
||||
} else {
|
||||
prefix.append('-').append(subtag);
|
||||
subtag = prefix.toString();
|
||||
}
|
||||
}
|
||||
switch (specialCase) {
|
||||
case anything:
|
||||
|
@ -162,8 +178,13 @@ public class LocaleValidityChecker {
|
|||
return where.set(Datatype.u, key+"-"+subtag);
|
||||
}
|
||||
continue;
|
||||
case subdivision:
|
||||
if (!isSubdivision(locale, subtag)) {
|
||||
return where.set(Datatype.u, key+"-"+subtag);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// en-u-sd-usca
|
||||
// en-US-u-sd-usca
|
||||
Output<Boolean> isKnownKey = new Output<Boolean>();
|
||||
|
@ -180,6 +201,33 @@ public class LocaleValidityChecker {
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param locale
|
||||
* @param subtag
|
||||
* @return
|
||||
*/
|
||||
private boolean isSubdivision(ULocale locale, String subtag) {
|
||||
// First check if the subtag is valid
|
||||
if (subtag.length() < 3) {
|
||||
return false;
|
||||
}
|
||||
String region = subtag.substring(0, subtag.charAt(0) <= '9' ? 3 : 2);
|
||||
String subdivision = subtag.substring(region.length());
|
||||
if (ValidIdentifiers.isValid(Datatype.subdivision, datasubtypes, region, subdivision) == null) {
|
||||
return false;
|
||||
}
|
||||
// Then check for consistency with the locale's region
|
||||
String localeRegion = locale.getCountry();
|
||||
if (localeRegion.isEmpty()) {
|
||||
ULocale max = ULocale.addLikelySubtags(locale);
|
||||
localeRegion = max.getCountry();
|
||||
}
|
||||
if (!region.equalsIgnoreCase(localeRegion)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static final Set<String> REORDERING_INCLUDE = new HashSet<String>(Arrays.asList("space", "punct", "symbol", "currency", "digit", "others"));
|
||||
static final Set<String> REORDERING_EXCLUDE = new HashSet<String>(Arrays.asList("zinh", "zyyy"));
|
||||
/**
|
||||
|
@ -194,12 +242,12 @@ public class LocaleValidityChecker {
|
|||
return false;
|
||||
}
|
||||
return ValidIdentifiers.isValid(Datatype.script, datasubtypes, subtag) != null;
|
||||
// space, punct, symbol, currency, digit - core groups of characters below 'a'
|
||||
// any script code except Common and Inherited.
|
||||
// sc ; Zinh ; Inherited ; Qaai
|
||||
// sc ; Zyyy ; Common
|
||||
// Some pairs of scripts sort primary-equal and always reorder together. For example, Katakana characters are are always reordered with Hiragana.
|
||||
// others - where all codes not explicitly mentioned should be ordered. The script code Zzzz (Unknown Script) is a synonym for others. return false;
|
||||
// space, punct, symbol, currency, digit - core groups of characters below 'a'
|
||||
// any script code except Common and Inherited.
|
||||
// sc ; Zinh ; Inherited ; Qaai
|
||||
// sc ; Zyyy ; Common
|
||||
// Some pairs of scripts sort primary-equal and always reorder together. For example, Katakana characters are are always reordered with Hiragana.
|
||||
// others - where all codes not explicitly mentioned should be ordered. The script code Zzzz (Unknown Script) is a synonym for others. return false;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -235,14 +283,14 @@ public class LocaleValidityChecker {
|
|||
}
|
||||
|
||||
public enum ValueType {
|
||||
single, multiple, specific;
|
||||
single, multiple, incremental;
|
||||
private static Set<String> multipleValueTypes = new HashSet<String>(Arrays.asList("x0", "kr", "vt"));
|
||||
private static Set<String> specificValueTypes = new HashSet<String>(Arrays.asList("ca"));
|
||||
static ValueType get(String key) {
|
||||
if (multipleValueTypes.contains(key)) {
|
||||
return multiple;
|
||||
} else if (specificValueTypes.contains(key)) {
|
||||
return specific;
|
||||
return incremental;
|
||||
} else {
|
||||
return single;
|
||||
}
|
||||
|
|
|
@ -34,15 +34,11 @@ public class TestLocaleValidity extends TestFmwk {
|
|||
|
||||
public void testBasic() {
|
||||
String[][] tests = {
|
||||
{"OK", "en-u-kr-latn-digit"},
|
||||
{"Incomplete extension 'u' [at index 3]", "en-u"},
|
||||
{"Incomplete extension 't' [at index 3]", "en-t"},
|
||||
{"OK", "en-u-ca-chinese"},
|
||||
{"OK", "en-x-abcdefg"},
|
||||
{"OK", "x-abcdefg"},
|
||||
{"OK", "en-u-sd-usca"},
|
||||
{"OK", "en-US-u-sd-usca"},
|
||||
{"OK", "en-AQ-u-sd-usca"},
|
||||
{"OK", "en-t-it"},
|
||||
{"OK", "und-Cyrl-t-und-latn"},
|
||||
{"OK", "root"},
|
||||
|
@ -53,22 +49,9 @@ public class TestLocaleValidity extends TestFmwk {
|
|||
{"OK", "zh-Hant"},
|
||||
{"OK", "zh-Hant-AQ"},
|
||||
{"OK", "x-abcdefg-g-foobar"},
|
||||
{"Empty subtag [at index 0]", ""},
|
||||
{"{u, ca-chinesx}", "en-u-ca-chinesx"},
|
||||
{"{illegal, q}", "en-q-abcdefg"},
|
||||
{"Incomplete privateuse [at index 0]", "x-abc$defg"},
|
||||
{"{script, Latx}", "und-Cyrl-t-und-latx"},
|
||||
{"{variant, FOOBAR}", "zh-Hant-1606nict-1694acad-foobar"},
|
||||
{"{region, AB}", "zh-Hant-AB"},
|
||||
{"{language, ex}", "ex"},
|
||||
{"{script, Hanx}", "zh-Hanx"},
|
||||
{"{language, qaa}", "qaa"},
|
||||
{"Invalid subtag: $ [at index 3]", "EN-$"},
|
||||
{"Invalid subtag: $ [at index 0]", "$"},
|
||||
// too many items
|
||||
{"{u, cu-usd}", "en-u-cu-adp-usd"},
|
||||
|
||||
{"OK", "en-u-ca-buddhist"},
|
||||
{"OK", "en-u-ca-islamic-umalqura"}, // additive
|
||||
{"OK", "en-u-cf-account"},
|
||||
{"OK", "en-u-co-big5han"},
|
||||
{"OK", "en-u-cu-adp"},
|
||||
|
@ -80,17 +63,80 @@ public class TestLocaleValidity extends TestFmwk {
|
|||
{"OK", "en-u-kf-false"},
|
||||
{"OK", "en-u-kk-false"},
|
||||
{"OK", "en-u-kn-false"},
|
||||
{"OK", "en-u-kr-latn-digit-symbol"},
|
||||
{"OK", "en-u-kr-latn-digit-symbol"}, // reorder codes, multiple
|
||||
{"OK", "en-u-ks-identic"},
|
||||
{"OK", "en-u-kv-currency"},
|
||||
{"OK", "en-u-nu-ahom"},
|
||||
{"OK", "en-u-sd-usny"},
|
||||
{"OK", "en-u-tz-adalv"},
|
||||
{"OK", "en-u-va-posix"},
|
||||
{"{u, ca-civil}", "en-u-ca-islamicc"}, // deprecated
|
||||
|
||||
// really long case
|
||||
|
||||
{"OK", "en-u-ca-buddhist-ca-islamic-umalqura-cf-account-co-big5han-cu-adp-fw-fri-hc-h11-ka-noignore-kb-false-kc-false-kf-false-kk-false-kn-false-kr-latn-digit-symbol-ks-identic-kv-currency-nu-ahom-sd-usny-tz-adalv-va-posix"},
|
||||
|
||||
// deprecated, but turned into valid by ULocale.Builder()
|
||||
{"OK", "en-u-ca-islamicc"}, // deprecated
|
||||
{"OK", "en-u-tz-aqams"}, // deprecated
|
||||
|
||||
// Bad syntax (caught by ULocale.Builder())
|
||||
|
||||
{"Incomplete extension 'u' [at index 3]", "en-u"},
|
||||
{"Incomplete extension 't' [at index 3]", "en-t"},
|
||||
{"Empty subtag [at index 0]", ""},
|
||||
{"Incomplete privateuse [at index 0]", "x-abc$defg"},
|
||||
{"Invalid subtag: $ [at index 3]", "EN-$"},
|
||||
{"Invalid subtag: $ [at index 0]", "$"},
|
||||
|
||||
// bad extension
|
||||
|
||||
{"{illegal, q}", "en-q-abcdefg"},
|
||||
|
||||
// bad subtags
|
||||
|
||||
{"{variant, FOOBAR}", "zh-Hant-1606nict-1694acad-foobar"},
|
||||
{"{region, AB}", "zh-Hant-AB"},
|
||||
{"{language, ex}", "ex"},
|
||||
{"{script, Hanx}", "zh-Hanx"},
|
||||
{"{language, qaa}", "qaa"},
|
||||
|
||||
// bad types for keys
|
||||
|
||||
{"{u, ca-chinesx}", "en-u-ca-chinesx"},
|
||||
{"{script, Latx}", "und-Cyrl-t-und-latx"},
|
||||
{"{u, sd-usca}", "en-AQ-u-sd-usca"},
|
||||
|
||||
{"{u, ca-buddhisx}", "en-u-ca-buddhisx"},
|
||||
{"{u, ca-islamic-umalqurx}", "en-u-ca-islamic-umalqurx"}, // additive
|
||||
{"{u, cf-accounx}", "en-u-cf-accounx"},
|
||||
{"{u, co-big5hax}", "en-u-co-big5hax"},
|
||||
{"{u, cu-adx}", "en-u-cu-adx"},
|
||||
{"{u, fw-frx}", "en-u-fw-frx"},
|
||||
{"{u, hc-h1x}", "en-u-hc-h1x"},
|
||||
{"{u, ka-noignorx}", "en-u-ka-noignorx"},
|
||||
{"{u, kb-falsx}", "en-u-kb-falsx"},
|
||||
{"{u, kc-falsx}", "en-u-kc-falsx"},
|
||||
{"{u, kf-falsx}", "en-u-kf-falsx"},
|
||||
{"{u, kk-falsx}", "en-u-kk-falsx"},
|
||||
{"{u, kn-falsx}", "en-u-kn-falsx"},
|
||||
{"{u, kr-symbox}", "en-u-kr-latn-digit-symbox"}, // reorder codes, multiple
|
||||
{"{u, ks-identix}", "en-u-ks-identix"},
|
||||
{"{u, kv-currencx}", "en-u-kv-currencx"},
|
||||
{"{u, nu-ahox}", "en-u-nu-ahox"},
|
||||
{"{u, sd-usnx}", "en-u-sd-usnx"},
|
||||
{"{u, tz-adalx}", "en-u-tz-adalx"},
|
||||
{"{u, va-posit}", "en-u-va-posit"},
|
||||
|
||||
|
||||
// too many items
|
||||
|
||||
{"{u, cu-usd}", "en-u-cu-adp-usd"},
|
||||
|
||||
// use deprecated subtags. testDeprecated checks if they work when Datasubtype.deprecated is added
|
||||
//{"{u, ca-civil}", "en-u-ca-islamicc"}, // deprecated, but turns into valid
|
||||
{"{u, co-direct}", "en-u-co-direct"}, // deprecated
|
||||
{"{u, kh}", "en-u-kh-false"}, // deprecated
|
||||
{"{u, tz-aqams}", "en-u-tz-aqams"}, // deprecated
|
||||
{"{u, tz-camtr}", "en-u-tz-camtr"}, // deprecated
|
||||
{"{u, vt}", "en-u-vt-0020-0041"}, // deprecated
|
||||
};
|
||||
check(tests, Datasubtype.regular, Datasubtype.unknown);
|
||||
|
@ -120,12 +166,10 @@ public class TestLocaleValidity extends TestFmwk {
|
|||
}
|
||||
|
||||
public void testDeprecated() {
|
||||
LocaleValidityChecker regularAndDeprecated = new LocaleValidityChecker(EnumSet.of(Datasubtype.regular, Datasubtype.deprecated));
|
||||
String[][] tests = {
|
||||
{"OK", "en-u-ca-islamicc"}, // deprecated
|
||||
{"OK", "en-u-co-direct"}, // deprecated
|
||||
{"OK", "en-u-kh-false"}, // deprecated
|
||||
{"OK", "en-u-tz-aqams"}, // deprecated
|
||||
{"OK", "en-u-tz-camtr"}, // deprecated
|
||||
{"OK", "en-u-vt-0020"}, // deprecated
|
||||
};
|
||||
check(tests, Datasubtype.regular, Datasubtype.unknown, Datasubtype.deprecated);
|
||||
|
@ -133,9 +177,9 @@ public class TestLocaleValidity extends TestFmwk {
|
|||
|
||||
private void check(String[][] tests, Datasubtype... datasubtypes) {
|
||||
int count = 0;
|
||||
LocaleValidityChecker regularAndUnknown = new LocaleValidityChecker(datasubtypes);
|
||||
LocaleValidityChecker localeValidityChecker = new LocaleValidityChecker(datasubtypes);
|
||||
for (String[] test : tests) {
|
||||
check(++count, regularAndUnknown, test[0], test[1]);
|
||||
check(++count, localeValidityChecker, test[0], test[1]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue