mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-15 17:56:54 +00:00
ICU-13513 Generalizing UnicodeSetStaticCache to cover more locales.
X-SVN-Rev: 40786
This commit is contained in:
parent
d0a75c667b
commit
b28712d52f
7 changed files with 153 additions and 26 deletions
|
@ -33,10 +33,10 @@ public class NanMatcher extends SymbolMatcher {
|
|||
@Override
|
||||
public UnicodeSet getLeadCodePoints() {
|
||||
// Overriding this here to allow use of statically allocated sets
|
||||
if (this == DEFAULT) {
|
||||
return UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.CAPITAL_N);
|
||||
} else if (this == DEFAULT_FOLDED) {
|
||||
return UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.FOLDED_N);
|
||||
int leadCp = string.codePointAt(0);
|
||||
UnicodeSet s = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.NAN_LEAD);
|
||||
if (s.contains(leadCp)) {
|
||||
return s;
|
||||
} else {
|
||||
return super.getLeadCodePoints();
|
||||
}
|
||||
|
|
|
@ -32,7 +32,8 @@ import com.ibm.icu.util.ULocale;
|
|||
*/
|
||||
public class NumberParserImpl {
|
||||
@Deprecated
|
||||
public static NumberParserImpl createParserFromPattern(String pattern, boolean strictGrouping) {
|
||||
public static NumberParserImpl createParserFromPattern(
|
||||
ULocale locale, String pattern, boolean strictGrouping) {
|
||||
// Temporary frontend for testing.
|
||||
|
||||
int parseFlags = ParsingUtils.PARSE_FLAG_IGNORE_CASE
|
||||
|
@ -42,7 +43,6 @@ public class NumberParserImpl {
|
|||
}
|
||||
|
||||
NumberParserImpl parser = new NumberParserImpl(parseFlags, true);
|
||||
ULocale locale = new ULocale("en_IN");
|
||||
DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(locale);
|
||||
IgnorablesMatcher ignorables = IgnorablesMatcher.DEFAULT;
|
||||
|
||||
|
@ -54,6 +54,7 @@ public class NumberParserImpl {
|
|||
parser.addMatcher(ignorables);
|
||||
parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
|
||||
parser.addMatcher(MinusSignMatcher.getInstance(symbols));
|
||||
parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
|
||||
parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags));
|
||||
parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
|
||||
parser.addMatcher(new RequireNumberMatcher());
|
||||
|
|
|
@ -35,13 +35,12 @@ public class ParsingUtils {
|
|||
}
|
||||
}
|
||||
|
||||
private static final UnicodeSet LETTERS = new UnicodeSet("[:letter:]").freeze();
|
||||
|
||||
/**
|
||||
* Case-folds the string if IGNORE_CASE flag is set; otherwise, returns the same string.
|
||||
*/
|
||||
public static String maybeFold(String input, int parseFlags) {
|
||||
if (0 != (parseFlags & PARSE_FLAG_IGNORE_CASE) && LETTERS.containsSome(input)) {
|
||||
UnicodeSet cwcf = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.CWCF);
|
||||
if (0 != (parseFlags & PARSE_FLAG_IGNORE_CASE) && cwcf.containsSome(input)) {
|
||||
return UCharacter.foldCase(input, true);
|
||||
} else {
|
||||
return input;
|
||||
|
|
|
@ -84,13 +84,12 @@ public class ScientificMatcher implements NumberParseMatcher {
|
|||
|
||||
@Override
|
||||
public UnicodeSet getLeadCodePoints() {
|
||||
int cp = exponentSeparatorString.codePointAt(0);
|
||||
if (cp == 'E') {
|
||||
return UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.CAPITAL_E);
|
||||
} else if (cp == 'e') {
|
||||
return UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.FOLDED_E);
|
||||
int leadCp = exponentSeparatorString.codePointAt(0);
|
||||
UnicodeSet s = UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.SCIENTIFIC_LEAD);
|
||||
if (s.contains(leadCp)) {
|
||||
return s;
|
||||
} else {
|
||||
return new UnicodeSet().add(cp).freeze();
|
||||
return new UnicodeSet().add(leadCp).freeze();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -48,10 +48,9 @@ public class UnicodeSetStaticCache {
|
|||
|
||||
// Other
|
||||
DIGITS,
|
||||
CAPITAL_N,
|
||||
FOLDED_N,
|
||||
CAPITAL_E,
|
||||
FOLDED_E,
|
||||
NAN_LEAD,
|
||||
SCIENTIFIC_LEAD,
|
||||
CWCF,
|
||||
|
||||
// Combined Separators with Digits (for lead code points)
|
||||
DIGITS_OR_COMMA_OR_OTHER,
|
||||
|
@ -189,10 +188,12 @@ public class UnicodeSetStaticCache {
|
|||
unicodeSets.put(Key.INFINITY, new UnicodeSet("[∞]").freeze());
|
||||
|
||||
unicodeSets.put(Key.DIGITS, new UnicodeSet("[:digit:]").freeze());
|
||||
unicodeSets.put(Key.CAPITAL_N, new UnicodeSet("[N]").freeze());
|
||||
unicodeSets.put(Key.FOLDED_N, new UnicodeSet("[n]").freeze());
|
||||
unicodeSets.put(Key.CAPITAL_E, new UnicodeSet("[E]").freeze());
|
||||
unicodeSets.put(Key.FOLDED_E, new UnicodeSet("[e]").freeze());
|
||||
// Note: locale fi translation of NaN starts with 'e' (conflicts with scientific?)
|
||||
unicodeSets.put(Key.NAN_LEAD,
|
||||
new UnicodeSet("[NnТтmeՈոс¤НнчTtsҳ\u975e\u1002\u0e9a\u10d0\u0f68\u0644\u0646]")
|
||||
.freeze());
|
||||
unicodeSets.put(Key.SCIENTIFIC_LEAD, new UnicodeSet("[Ee×·е\u0627]").freeze());
|
||||
unicodeSets.put(Key.CWCF, new UnicodeSet("[:CWCF:]").freeze());
|
||||
|
||||
unicodeSets.put(Key.DIGITS_OR_COMMA_OR_OTHER, computeUnion(Key.DIGITS, Key.COMMA_OR_OTHER));
|
||||
unicodeSets.put(Key.DIGITS_OR_PERIOD_OR_OTHER, computeUnion(Key.DIGITS, Key.PERIOD_OR_OTHER));
|
||||
|
|
|
@ -4,11 +4,13 @@ package com.ibm.icu.dev.test.number;
|
|||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import com.ibm.icu.impl.number.parse.NumberParserImpl;
|
||||
import com.ibm.icu.impl.number.parse.ParsedNumber;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
/**
|
||||
* @author sffc
|
||||
|
@ -69,7 +71,7 @@ public class NumberParserTest {
|
|||
{ 3, "𝟱.𝟭𝟰𝟮E-𝟯", "0", 13, 0.005142 },
|
||||
{ 3, "𝟱.𝟭𝟰𝟮e-𝟯", "0", 13, 0.005142 },
|
||||
{ 7, "5,142.50 Canadian dollars", "#,##,##0", 25, 5142.5 },
|
||||
// { 3, "a$ b5", "a ¤ b0", 6, 5.0 }, // TODO: Does not work
|
||||
// { 3, "a$ b5", "a ¤ b0", 6, 5.0 }, // TODO: Does not work
|
||||
{ 3, "📺1.23", "📺0;📻0", 6, 1.23 },
|
||||
{ 3, "📻1.23", "📺0;📻0", 6, -1.23 },
|
||||
{ 3, ".00", "0", 3, 0.0 },
|
||||
|
@ -81,7 +83,8 @@ public class NumberParserTest {
|
|||
String pattern = (String) cas[2];
|
||||
int expectedCharsConsumed = (Integer) cas[3];
|
||||
double resultDouble = (Double) cas[4];
|
||||
NumberParserImpl parser = NumberParserImpl.createParserFromPattern(pattern, false);
|
||||
NumberParserImpl parser = NumberParserImpl
|
||||
.createParserFromPattern(ULocale.ENGLISH, pattern, false);
|
||||
String message = "Input <" + input + "> Parser " + parser;
|
||||
|
||||
if (0 != (flags & 0x01)) {
|
||||
|
@ -104,7 +107,7 @@ public class NumberParserTest {
|
|||
|
||||
if (0 != (flags & 0x04)) {
|
||||
// Test with strict separators
|
||||
parser = NumberParserImpl.createParserFromPattern(pattern, true);
|
||||
parser = NumberParserImpl.createParserFromPattern(ULocale.ENGLISH, pattern, true);
|
||||
ParsedNumber resultObject = new ParsedNumber();
|
||||
parser.parse(input, true, resultObject);
|
||||
assertNotNull(message, resultObject.quantity);
|
||||
|
@ -113,4 +116,21 @@ public class NumberParserTest {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLocaleFi() {
|
||||
// This case is interesting because locale fi has NaN starting with 'e', the same as scientific
|
||||
NumberParserImpl parser = NumberParserImpl
|
||||
.createParserFromPattern(new ULocale("fi"), "0", false);
|
||||
|
||||
ParsedNumber resultObject = new ParsedNumber();
|
||||
parser.parse("epäluku", false, resultObject);
|
||||
assertTrue(resultObject.success());
|
||||
assertEquals(Double.NaN, resultObject.getNumber().doubleValue(), 0.0);
|
||||
|
||||
resultObject = new ParsedNumber();
|
||||
parser.parse("1.2e3", false, resultObject);
|
||||
assertTrue(resultObject.success());
|
||||
assertEquals(12000.0, resultObject.getNumber().doubleValue(), 0.0);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,107 @@
|
|||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.dev.test.number;
|
||||
|
||||
import static com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.get;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache;
|
||||
import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.text.DecimalFormatSymbols;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
/**
|
||||
* @author sffc
|
||||
*
|
||||
*/
|
||||
public class UnicodeSetStaticCacheTest {
|
||||
|
||||
@Test
|
||||
public void testSetCoverage() {
|
||||
// Lenient comma/period should be supersets of strict comma/period;
|
||||
// it also makes the coverage logic cheaper.
|
||||
assertTrue("COMMA should be superset of STRICT_COMMA",
|
||||
get(Key.COMMA).containsAll(get(Key.STRICT_COMMA)));
|
||||
assertTrue("PERIOD should be superset of STRICT_PERIOD",
|
||||
get(Key.PERIOD).containsAll(get(Key.STRICT_PERIOD)));
|
||||
|
||||
UnicodeSet decimals = get(Key.STRICT_COMMA).cloneAsThawed().addAll(get(Key.STRICT_PERIOD))
|
||||
.freeze();
|
||||
UnicodeSet grouping = decimals.cloneAsThawed().addAll(get(Key.OTHER_GROUPING_SEPARATORS))
|
||||
.freeze();
|
||||
UnicodeSet plusSign = get(Key.PLUS_SIGN);
|
||||
UnicodeSet minusSign = get(Key.MINUS_SIGN);
|
||||
UnicodeSet percent = get(Key.PERCENT_SIGN);
|
||||
UnicodeSet permille = get(Key.PERMILLE_SIGN);
|
||||
UnicodeSet infinity = get(Key.INFINITY);
|
||||
UnicodeSet nanLead = get(Key.NAN_LEAD);
|
||||
UnicodeSet scientificLead = get(Key.SCIENTIFIC_LEAD);
|
||||
|
||||
for (ULocale locale : ULocale.getAvailableLocales()) {
|
||||
DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(locale);
|
||||
|
||||
assertInSet(locale, decimals, dfs.getDecimalSeparatorString());
|
||||
assertInSet(locale, grouping, dfs.getGroupingSeparatorString());
|
||||
assertInSet(locale, plusSign, dfs.getPlusSignString());
|
||||
assertInSet(locale, minusSign, dfs.getMinusSignString());
|
||||
assertInSet(locale, percent, dfs.getPercentString());
|
||||
assertInSet(locale, permille, dfs.getPerMillString());
|
||||
assertInSet(locale, infinity, dfs.getInfinity());
|
||||
assertInSet(locale, nanLead, dfs.getNaN().codePointAt(0));
|
||||
assertInSet(locale, nanLead, UCharacter.foldCase(dfs.getNaN(), true).codePointAt(0));
|
||||
assertInSet(locale,
|
||||
scientificLead,
|
||||
UCharacter.foldCase(dfs.getExponentSeparator(), true).codePointAt(0));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFrozen() {
|
||||
for (Key key : Key.values()) {
|
||||
assertTrue(get(key).isFrozen());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnions() {
|
||||
for (Key key1 : Key.values()) {
|
||||
for (Key key2 : Key.values()) {
|
||||
Key key3 = UnicodeSetStaticCache.unionOf(key1, key2);
|
||||
if (key3 != null) {
|
||||
UnicodeSet s1 = get(key1);
|
||||
UnicodeSet s2 = get(key2);
|
||||
UnicodeSet s3 = get(key3);
|
||||
UnicodeSet s1_s2 = s1.cloneAsThawed().addAll(s2);
|
||||
assertEquals(key1 + "/" + key2 + "/" + key3, s1_s2, s3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void assertInSet(ULocale locale, UnicodeSet set, String str) {
|
||||
if (str.codePointCount(0, str.length()) != 1) {
|
||||
// Ignore locale strings with more than one code point (usually a bidi mark)
|
||||
return;
|
||||
}
|
||||
assertInSet(locale, set, str.codePointAt(0));
|
||||
}
|
||||
|
||||
static void assertInSet(ULocale locale, UnicodeSet set, int cp) {
|
||||
// If this test case fails, add the specified code point to the corresponding set in
|
||||
// UnicodeSetStaticCache.java
|
||||
assertTrue(
|
||||
locale
|
||||
+ " U+"
|
||||
+ Integer.toHexString(cp)
|
||||
+ " ("
|
||||
+ UCharacter.toString(cp)
|
||||
+ ") should be in "
|
||||
+ set,
|
||||
set.contains(cp));
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue