From 7942b58b81a342e686aede260d76752bc3d36505 Mon Sep 17 00:00:00 2001 From: Victor Chang Date: Mon, 13 Aug 2018 16:49:10 +0100 Subject: [PATCH] ICU-20623 Add java compability parse mode into DecimalFormat This internal parse mode is introduced for Android libcore. --- .../com/ibm/icu/impl/StaticUnicodeSets.java | 2 + .../impl/number/DecimalFormatProperties.java | 11 +- .../impl/number/parse/IgnorablesMatcher.java | 7 +- .../impl/number/parse/NumberParserImpl.java | 20 +++- .../icu/impl/number/parse/ParsingUtils.java | 1 + .../src/com/ibm/icu/text/DecimalFormat.java | 9 ++ .../NumberFormatJavaCompatilityTest.java | 111 ++++++++++++++++++ 7 files changed, 152 insertions(+), 9 deletions(-) create mode 100644 icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatJavaCompatilityTest.java diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/StaticUnicodeSets.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/StaticUnicodeSets.java index 18c8c9bf10a..f50814b78de 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/StaticUnicodeSets.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/StaticUnicodeSets.java @@ -24,6 +24,7 @@ import com.ibm.icu.util.UResourceBundle; */ public class StaticUnicodeSets { public static enum Key { + EMPTY, // Ignorables DEFAULT_IGNORABLES, STRICT_IGNORABLES, @@ -231,6 +232,7 @@ public class StaticUnicodeSets { } static { + unicodeSets.put(Key.EMPTY, new UnicodeSet("[]").freeze()); // These sets were decided after discussion with icu-design@. See tickets #13084 and #13309. // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property). unicodeSets.put(Key.DEFAULT_IGNORABLES, diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/DecimalFormatProperties.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/DecimalFormatProperties.java index 82bd2e0860c..6b6c561d6d8 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/DecimalFormatProperties.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/DecimalFormatProperties.java @@ -61,6 +61,13 @@ public class DecimalFormatProperties implements Cloneable, Serializable { * */ STRICT, + + /** + * Internal parse mode for increased compatibility with java.text.DecimalFormat. + * Used by Android libcore. To enable this feature, java.text.DecimalFormat holds an instance of + * ICU4J's DecimalFormat and enable it by calling setParseStrictMode(ParseMode.COMPATIBILITY). + */ + JAVA_COMPATIBILITY, } // The setters in this class should NOT have any side-effects or perform any validation. It is @@ -1396,8 +1403,8 @@ public class DecimalFormatProperties implements Cloneable, Serializable { // Extra int for possible future use oos.writeInt(0); - ArrayList fieldsToSerialize = new ArrayList(); - ArrayList valuesToSerialize = new ArrayList(); + ArrayList fieldsToSerialize = new ArrayList<>(); + ArrayList valuesToSerialize = new ArrayList<>(); Field[] fields = DecimalFormatProperties.class.getDeclaredFields(); for (Field field : fields) { if (Modifier.isStatic(field.getModifiers())) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java index ac2b1e4443e..9bbb2896759 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/IgnorablesMatcher.java @@ -18,8 +18,13 @@ public class IgnorablesMatcher extends SymbolMatcher implements NumberParseMatch private static final IgnorablesMatcher STRICT = new IgnorablesMatcher( StaticUnicodeSets.get(StaticUnicodeSets.Key.STRICT_IGNORABLES)); + private static final IgnorablesMatcher JAVA_COMPATIBILITY = new IgnorablesMatcher( + StaticUnicodeSets.get(StaticUnicodeSets.Key.EMPTY)); + public static IgnorablesMatcher getInstance(int parseFlags) { - if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_STRICT_IGNORABLES)) { + if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_JAVA_COMPATIBILITY_IGNORABLES)) { + return JAVA_COMPATIBILITY; + } else if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_STRICT_IGNORABLES)) { return STRICT; } else { return DEFAULT; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java index 7178aa1086c..c347f90b452 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java @@ -145,7 +145,10 @@ public class NumberParserImpl { affixProvider = new CurrencyPluralInfoAffixProvider(properties.getCurrencyPluralInfo(), properties); } Currency currency = CustomSymbolCurrency.resolve(properties.getCurrency(), locale, symbols); - boolean isStrict = properties.getParseMode() == ParseMode.STRICT; + ParseMode parseMode = properties.getParseMode(); + if (parseMode == null) { + parseMode = ParseMode.LENIENT; + } Grouper grouper = Grouper.forProperties(properties); int parseFlags = 0; if (!properties.getParseCaseSensitive()) { @@ -160,7 +163,12 @@ public class NumberParserImpl { if (properties.getSignAlwaysShown()) { parseFlags |= ParsingUtils.PARSE_FLAG_PLUS_SIGN_ALLOWED; } - if (isStrict) { + if (parseMode == ParseMode.JAVA_COMPATIBILITY) { + parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_SEPARATORS; + parseFlags |= ParsingUtils.PARSE_FLAG_USE_FULL_AFFIXES; + parseFlags |= ParsingUtils.PARSE_FLAG_EXACT_AFFIX; + parseFlags |= ParsingUtils.PARSE_FLAG_JAVA_COMPATIBILITY_IGNORABLES; + } else if (parseMode == ParseMode.STRICT) { parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE; parseFlags |= ParsingUtils.PARSE_FLAG_STRICT_SEPARATORS; parseFlags |= ParsingUtils.PARSE_FLAG_USE_FULL_AFFIXES; @@ -210,10 +218,10 @@ public class NumberParserImpl { // ICU-TC meeting, April 11, 2018: accept percent/permille only if it is in the pattern, // and to maintain regressive behavior, divide by 100 even if no percent sign is present. - if (!isStrict && affixProvider.containsSymbolType(AffixUtils.TYPE_PERCENT)) { + if (parseMode == ParseMode.LENIENT && affixProvider.containsSymbolType(AffixUtils.TYPE_PERCENT)) { parser.addMatcher(PercentMatcher.getInstance(symbols)); } - if (!isStrict && affixProvider.containsSymbolType(AffixUtils.TYPE_PERMILLE)) { + if (parseMode == ParseMode.LENIENT && affixProvider.containsSymbolType(AffixUtils.TYPE_PERMILLE)) { parser.addMatcher(PermilleMatcher.getInstance(symbols)); } @@ -221,7 +229,7 @@ public class NumberParserImpl { /// OTHER STANDARD MATCHERS /// /////////////////////////////// - if (!isStrict) { + if (parseMode == ParseMode.LENIENT) { parser.addMatcher(PlusSignMatcher.getInstance(symbols, false)); parser.addMatcher(MinusSignMatcher.getInstance(symbols, false)); } @@ -243,7 +251,7 @@ public class NumberParserImpl { ////////////////// parser.addMatcher(new RequireNumberValidator()); - if (isStrict) { + if (parseMode != ParseMode.LENIENT) { parser.addMatcher(new RequireAffixValidator()); } if (parseCurrency) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java index 0240971af1d..c15b02f6ed2 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java @@ -26,6 +26,7 @@ public class ParsingUtils { public static final int PARSE_FLAG_NO_FOREIGN_CURRENCIES = 0x2000; public static final int PARSE_FLAG_ALLOW_INFINITE_RECURSION = 0x4000; public static final int PARSE_FLAG_STRICT_IGNORABLES = 0x8000; + public static final int PARSE_FLAG_JAVA_COMPATIBILITY_IGNORABLES = 0x10000; public static void putLeadCodePoints(UnicodeSet input, UnicodeSet output) { for (EntryRange range : input.ranges()) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DecimalFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DecimalFormat.java index fe7ad50de7d..57e4708184d 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/DecimalFormat.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DecimalFormat.java @@ -2194,6 +2194,15 @@ public class DecimalFormat extends NumberFormat { refreshFormatter(); } + /** + * Android libcore uses this internal method to set {@link ParseMode#JAVA_COMPATIBILITY}. + * @internal + */ + public synchronized void setParseStrictMode(ParseMode parseMode) { + properties.setParseMode(parseMode); + refreshFormatter(); + } + /** * {@inheritDoc} * diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatJavaCompatilityTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatJavaCompatilityTest.java new file mode 100644 index 00000000000..5b99167cd1c --- /dev/null +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/NumberFormatJavaCompatilityTest.java @@ -0,0 +1,111 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html#License +package com.ibm.icu.dev.test.format; + +import static org.junit.Assert.assertEquals; + +import java.text.ParsePosition; +import java.util.Locale; +import java.util.Objects; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +import com.ibm.icu.dev.test.TestUtil; +import com.ibm.icu.impl.number.DecimalFormatProperties.ParseMode; +import com.ibm.icu.text.DecimalFormat; +import com.ibm.icu.text.DecimalFormatSymbols; +import com.ibm.icu.util.ULocale; + +/** + * Test for {@link DecimalFormat} in {@link ParseMode#JAVA_COMPATIBILITY} mode. + */ +@RunWith(JUnit4.class) +public class NumberFormatJavaCompatilityTest { + + @Test + public void testIgnoreables() { + // Test bidi characters + assertParseError("0", "\u200e1"); + assertParsed("0", "1\u200e", 1); + assertParseError("0%", "\u200e1%"); + } + + @Test + public void testParseGroupingSeparator() { + // Test that grouping separator is optional when the group separator is specified + assertParsed("#,##0", "9,999", 9999); + assertParsed("#,##0", "9999", 9999); + assertParsed("#,###0", "9,9999", 99999); + + // Test that grouping size doesn't affect parsing at all + assertParsed("#,##0", "9,9999", 99999); + assertParsed("#,###0", "99,999", 99999); + + assertParsed("###0", "9999", 9999); + assertParsed("###0", "99999", 99999); + + // Test that grouping separator must not be present when the group separator is NOT specified + // Only the 1st character in front of separator , should be consumed. + assertParsed("###0", "9,9999", 9); + assertParsed("###0", "9,999", 9); + } + + @Test + public void testParseScienificNotation() { + assertParsed("0.###E0", "1E-3", 0.001); + assertParsed("0.###E0", "1E0", 1); + assertParsed("0.###E0", "1E3", 1000); + assertParsed("0.###E0", "1.111E3", 1111); + assertParsed("0.###E0", "1.1E3", 1100); + + // "0.###E0" is engineering notation, i.e. the exponent should be a multiple of 3 + // for formatting. But it shouldn't affect parsing. + assertParsed("0.###E0", "1E1", 10); + + // Test that exponent is not required for parsing + assertParsed("0.###E0", "1.1", 1.1); + assertParsed("0.###E0", "1100", 1100); + + // Test that the max of fraction, integer or signficant digits don't affect parsing + // Note that the max of signficant digits is 4 = min integer digits (1) + // + max fraction digits (3) + assertParsed("0.###E0", "1111.4E3", 1111400); + assertParsed("0.###E0", "1111.9999E3", 1111999.9); + } + + private void assertParseError(String pattern, String input) { + assertParsed(pattern, input, null); + } + + private void assertParsed(String pattern, String input, Number expected) { + assertParsedICU4J(pattern, input, expected); + + // Skip the OpenJDK test if the runtime is not OpenJDK + if (TestUtil.getJavaRuntimeName() != TestUtil.JavaRuntimeName.OpenJDK) { + return; + } + + assertParsedOpenJDK(pattern, input, expected); + } + + private void assertParsedICU4J(String pattern, String input, Number expected) { + DecimalFormat df = new DecimalFormat(pattern, new DecimalFormatSymbols(ULocale.US)); + df.setParseStrictMode(ParseMode.JAVA_COMPATIBILITY); + ParsePosition ppos = new ParsePosition(0); + Number actual = df.parse(input, ppos); + assertEquals(String.format("pattern: %s input: %s", pattern, input), + Objects.toString(expected), Objects.toString(actual)); + } + + private void assertParsedOpenJDK(String pattern, String input, Number expected) { + java.text.DecimalFormat df = new java.text.DecimalFormat(pattern, + new java.text.DecimalFormatSymbols(Locale.US)); + ParsePosition ppos = new ParsePosition(0); + Number actual = df.parse(input, ppos); + assertEquals(String.format("pattern: %s input: %s", pattern, input), + Objects.toString(expected), Objects.toString(actual)); + } + +}