diff --git a/icu4c/source/i18n/numparse_impl.cpp b/icu4c/source/i18n/numparse_impl.cpp index 4348d86c6d6..99d19df455f 100644 --- a/icu4c/source/i18n/numparse_impl.cpp +++ b/icu4c/source/i18n/numparse_impl.cpp @@ -29,8 +29,8 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString& auto* parser = new NumberParserImpl(parseFlags, true); DecimalFormatSymbols symbols(locale, status); -// IgnorablesMatcher* ignorables = IgnorablesMatcher.getDefault(); -// + IgnorablesMatcher* ignorables = new IgnorablesMatcher(unisets::DEFAULT_IGNORABLES); + // MatcherFactory factory = new MatcherFactory(); // factory.currency = Currency.getInstance("USD"); // factory.symbols = symbols; @@ -45,10 +45,13 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString& Grouper grouper = Grouper::forStrategy(UNUM_GROUPING_AUTO); grouper.setLocaleData(patternInfo, locale); -// parser.addMatcher({ignorables, false}); + parser->addAndAdoptMatcher(ignorables); parser->addAndAdoptMatcher(new DecimalMatcher(symbols, grouper, parseFlags)); parser->addAndAdoptMatcher(new MinusSignMatcher(symbols, false)); -// parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags)); + parser->addAndAdoptMatcher(new PlusSignMatcher(symbols, false)); + parser->addAndAdoptMatcher(new PercentMatcher(symbols)); + parser->addAndAdoptMatcher(new PermilleMatcher(symbols)); + parser->addAndAdoptMatcher(new NanMatcher(symbols)); // parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags)); // parser.addMatcher(CurrencyTrieMatcher.getInstance(locale)); // parser.addMatcher(new RequireNumberMatcher()); diff --git a/icu4c/source/i18n/numparse_symbols.cpp b/icu4c/source/i18n/numparse_symbols.cpp index 8d1631256c5..5fabd2fb17f 100644 --- a/icu4c/source/i18n/numparse_symbols.cpp +++ b/icu4c/source/i18n/numparse_symbols.cpp @@ -16,7 +16,6 @@ using namespace icu::numparse::impl; SymbolMatcher::SymbolMatcher(const UnicodeString& symbolString, unisets::Key key) { fUniSet = unisets::get(key); - fOwnsUniSet = false; if (fUniSet->contains(symbolString)) { fString.setToBogus(); } else { @@ -24,13 +23,6 @@ SymbolMatcher::SymbolMatcher(const UnicodeString& symbolString, unisets::Key key } } -SymbolMatcher::~SymbolMatcher() { - if (fOwnsUniSet) { - delete fUniSet; - fUniSet = nullptr; - } -} - const UnicodeSet* SymbolMatcher::getSet() { return fUniSet; } @@ -76,14 +68,30 @@ const UnicodeSet* SymbolMatcher::getLeadCodePoints() const { } -MinusSignMatcher::MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing) : SymbolMatcher( - dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol), - unisets::MINUS_SIGN), fAllowTrailing(allowTrailing) { +IgnorablesMatcher::IgnorablesMatcher(unisets::Key key) + : SymbolMatcher({}, key) { +} + +bool IgnorablesMatcher::isFlexible() const { + return true; +} + +bool IgnorablesMatcher::isDisabled(const ParsedNumber&) const { + return false; +} + +void IgnorablesMatcher::accept(StringSegment&, ParsedNumber&) const { + // No-op +} + + +MinusSignMatcher::MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing) + : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol), unisets::MINUS_SIGN), + fAllowTrailing(allowTrailing) { } bool MinusSignMatcher::isDisabled(const ParsedNumber& result) const { - return 0 != (result.flags & FLAG_NEGATIVE) || - (fAllowTrailing ? false : result.seenNumber()); + return 0 != (result.flags & FLAG_NEGATIVE) || (fAllowTrailing ? false : result.seenNumber()); } void MinusSignMatcher::accept(StringSegment& segment, ParsedNumber& result) const { @@ -92,4 +100,85 @@ void MinusSignMatcher::accept(StringSegment& segment, ParsedNumber& result) cons } +NanMatcher::NanMatcher(const DecimalFormatSymbols& dfs) + : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kNaNSymbol), unisets::EMPTY) { +} + +const UnicodeSet* NanMatcher::getLeadCodePoints() const { + // Overriding this here to allow use of statically allocated sets + int leadCp = fString.char32At(0); + const UnicodeSet* s = unisets::get(unisets::NAN_LEAD); + if (s->contains(leadCp)) { + return new UnicodeSet(*s); + } else { + return SymbolMatcher::getLeadCodePoints(); + } +} + +bool NanMatcher::isDisabled(const ParsedNumber& result) const { + return result.seenNumber(); +} + +void NanMatcher::accept(StringSegment& segment, ParsedNumber& result) const { + result.flags |= FLAG_NAN; + result.setCharsConsumed(segment); +} + + +PercentMatcher::PercentMatcher(const DecimalFormatSymbols& dfs) + : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kPercentSymbol), unisets::PERCENT_SIGN) { +} + +void PercentMatcher::postProcess(ParsedNumber& result) const { + SymbolMatcher::postProcess(result); + if (0 != (result.flags & FLAG_PERCENT) && !result.quantity.bogus) { + result.quantity.adjustMagnitude(-2); + } +} + +bool PercentMatcher::isDisabled(const ParsedNumber& result) const { + return 0 != (result.flags & FLAG_PERCENT); +} + +void PercentMatcher::accept(StringSegment& segment, ParsedNumber& result) const { + result.flags |= FLAG_PERCENT; + result.setCharsConsumed(segment); +} + + +PermilleMatcher::PermilleMatcher(const DecimalFormatSymbols& dfs) + : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol), unisets::PERMILLE_SIGN) { +} + +void PermilleMatcher::postProcess(ParsedNumber& result) const { + SymbolMatcher::postProcess(result); + if (0 != (result.flags & FLAG_PERMILLE) && !result.quantity.bogus) { + result.quantity.adjustMagnitude(-3); + } +} + +bool PermilleMatcher::isDisabled(const ParsedNumber& result) const { + return 0 != (result.flags & FLAG_PERMILLE); +} + +void PermilleMatcher::accept(StringSegment& segment, ParsedNumber& result) const { + result.flags |= FLAG_PERMILLE; + result.setCharsConsumed(segment); +} + + +PlusSignMatcher::PlusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing) + : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol), unisets::PLUS_SIGN), + fAllowTrailing(allowTrailing) { +} + +bool PlusSignMatcher::isDisabled(const ParsedNumber& result) const { + return fAllowTrailing ? false : result.seenNumber(); +} + +void PlusSignMatcher::accept(StringSegment& segment, ParsedNumber& result) const { + result.setCharsConsumed(segment); +} + + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/numparse_symbols.h b/icu4c/source/i18n/numparse_symbols.h index d730ef57535..4a17d67a44d 100644 --- a/icu4c/source/i18n/numparse_symbols.h +++ b/icu4c/source/i18n/numparse_symbols.h @@ -17,8 +17,6 @@ namespace impl { class SymbolMatcher : public NumberParseMatcher, public UMemory { public: - ~SymbolMatcher() override; - const UnicodeSet* getSet(); bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; @@ -31,13 +29,25 @@ class SymbolMatcher : public NumberParseMatcher, public UMemory { protected: UnicodeString fString; - const UnicodeSet* fUniSet; - bool fOwnsUniSet; + const UnicodeSet* fUniSet; // a reference from numparse_unisets.h; never owned SymbolMatcher(const UnicodeString& symbolString, unisets::Key key); }; +class IgnorablesMatcher : public SymbolMatcher { + public: + explicit IgnorablesMatcher(unisets::Key key); + + bool isFlexible() const override; + + protected: + bool isDisabled(const ParsedNumber& result) const override; + + void accept(StringSegment& segment, ParsedNumber& result) const override; +}; + + class MinusSignMatcher : public SymbolMatcher { public: MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing); @@ -52,6 +62,59 @@ class MinusSignMatcher : public SymbolMatcher { }; +class NanMatcher : public SymbolMatcher { + public: + explicit NanMatcher(const DecimalFormatSymbols& dfs); + + const UnicodeSet* getLeadCodePoints() const override; + + protected: + bool isDisabled(const ParsedNumber& result) const override; + + void accept(StringSegment& segment, ParsedNumber& result) const override; +}; + + +class PercentMatcher : public SymbolMatcher { + public: + explicit PercentMatcher(const DecimalFormatSymbols& dfs); + + void postProcess(ParsedNumber& result) const override; + + protected: + bool isDisabled(const ParsedNumber& result) const override; + + void accept(StringSegment& segment, ParsedNumber& result) const override; +}; + + +class PermilleMatcher : public SymbolMatcher { + public: + explicit PermilleMatcher(const DecimalFormatSymbols& dfs); + + void postProcess(ParsedNumber& result) const override; + + protected: + bool isDisabled(const ParsedNumber& result) const override; + + void accept(StringSegment& segment, ParsedNumber& result) const override; +}; + + +class PlusSignMatcher : public SymbolMatcher { + public: + PlusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing); + + protected: + bool isDisabled(const ParsedNumber& result) const override; + + void accept(StringSegment& segment, ParsedNumber& result) const override; + + private: + bool fAllowTrailing; +}; + + } // namespace impl } // namespace numparse U_NAMESPACE_END diff --git a/icu4c/source/i18n/numparse_unisets.cpp b/icu4c/source/i18n/numparse_unisets.cpp index 8477870e29d..a7d1fbdba26 100644 --- a/icu4c/source/i18n/numparse_unisets.cpp +++ b/icu4c/source/i18n/numparse_unisets.cpp @@ -58,6 +58,8 @@ void U_CALLCONV initNumberParseUniSets(UErrorCode &status) { ucln_i18n_registerCleanup(UCLN_I18N_NUMPARSE_UNISETS, cleanupNumberParseUnitSets); #define NEW_UNISET(pattern, status) new UnicodeSet(UnicodeString(pattern), status) + gUnicodeSets[EMPTY] = new UnicodeSet(); + // BiDi characters are skipped over and ignored at any point in the string, even in strict mode. gUnicodeSets[BIDI] = NEW_UNISET(u"[[\\u200E\\u200F\\u061C]]", status); diff --git a/icu4c/source/i18n/numparse_unisets.h b/icu4c/source/i18n/numparse_unisets.h index 1d923613e98..27f609dc5d9 100644 --- a/icu4c/source/i18n/numparse_unisets.h +++ b/icu4c/source/i18n/numparse_unisets.h @@ -15,6 +15,8 @@ namespace impl { namespace unisets { enum Key { + EMPTY, + // Ignorables BIDI, WHITESPACE, diff --git a/icu4c/source/test/intltest/intltest.cpp b/icu4c/source/test/intltest/intltest.cpp index c45913796a4..c7d67565b23 100644 --- a/icu4c/source/test/intltest/intltest.cpp +++ b/icu4c/source/test/intltest/intltest.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include "unicode/ctest.h" // for str_timeDelta #include "unicode/curramt.h" @@ -1998,7 +1999,8 @@ UBool IntlTest::assertEquals(const char* message, UBool IntlTest::assertEquals(const char* message, double expected, double actual) { - if (expected != actual) { + bool bothNaN = std::isnan(expected) && std::isnan(actual); + if (expected != actual && !bothNaN) { errln((UnicodeString)"FAIL: " + message + "; got " + actual + "; expected " + expected); diff --git a/icu4c/source/test/intltest/numbertest_parse.cpp b/icu4c/source/test/intltest/numbertest_parse.cpp index c594a493adc..1fcaa1b0c93 100644 --- a/icu4c/source/test/intltest/numbertest_parse.cpp +++ b/icu4c/source/test/intltest/numbertest_parse.cpp @@ -41,6 +41,9 @@ void NumberParserTest::testBasic() { {3, u"๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏx", u"0", 10, 51423.}, {3, u" ๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"0", 11, 51423.}, {3, u"๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ ", u"0", 10, 51423.}, + {7, u"51,423", u"#,##,##0", 6, 51423.}, + {7, u" 51,423", u"#,##,##0", 7, 51423.}, + {7, u"51,423 ", u"#,##,##0", 6, 51423.}, {7, u"๐Ÿฑ๐Ÿญ,๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"#,##,##0", 11, 51423.}, {7, u"๐Ÿณ,๐Ÿด๐Ÿต,๐Ÿฑ๐Ÿญ,๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"#,##,##0", 19, 78951423.}, {7, u"๐Ÿณ๐Ÿด,๐Ÿต๐Ÿฑ๐Ÿญ.๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"#,##,##0", 18, 78951.423}, @@ -48,8 +51,16 @@ void NumberParserTest::testBasic() { {7, u"๐Ÿณ๐Ÿด,๐Ÿฌ๐Ÿฌ๐Ÿฌ.๐Ÿฌ๐Ÿฌ๐Ÿฌ", u"#,##,##0", 18, 78000.}, {7, u"๐Ÿณ๐Ÿด,๐Ÿฌ๐Ÿฌ๐Ÿฌ.๐Ÿฌ๐Ÿฎ๐Ÿฏ", u"#,##,##0", 18, 78000.023}, {7, u"๐Ÿณ๐Ÿด.๐Ÿฌ๐Ÿฌ๐Ÿฌ.๐Ÿฌ๐Ÿฎ๐Ÿฏ", u"#,##,##0", 11, 78.}, - {3, u"-๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ", u"0", 11, -51423.}, - {3, u"-๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ-", u"0", 11, -51423.}, + {3, u"-51423", u"0", 6, -51423.}, + {3, u"51423-", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after + {3, u"+51423", u"0", 6, 51423.}, + {3, u"51423+", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after + {3, u"%51423", u"0", 6, 514.23}, + {3, u"51423%", u"0", 6, 514.23}, + {3, u"51423%%", u"0", 6, 514.23}, + {3, u"โ€ฐ51423", u"0", 6, 51.423}, + {3, u"51423โ€ฐ", u"0", 6, 51.423}, + {3, u"51423โ€ฐโ€ฐ", u"0", 6, 51.423}, // {3, u"a51423US dollars", u"a0ยคยคยค", 16, 51423.}, // {3, u"a 51423 US dollars", u"a0ยคยคยค", 18, 51423.}, // {3, u"514.23 USD", u"ยค0", 10, 514.23}, @@ -77,12 +88,11 @@ void NumberParserTest::testBasic() { // {3, u"a$ b5", u"a ยค b0", 5, 5.0}, // {3, u"๐Ÿ“บ1.23", u"๐Ÿ“บ0;๐Ÿ“ป0", 6, 1.23}, // {3, u"๐Ÿ“ป1.23", u"๐Ÿ“บ0;๐Ÿ“ป0", 6, -1.23}, -// {3, u".00", u"0", 3, 0.0}, -// {3, u" 0", u"a0", 31, 0.0}, // should not hang -// {3, u"NaN", u"0", 3, NAN}, -// {3, u"NaN E5", u"0", 3, NAN}, -// {3, u"0", u"0", 1, 0.0} - }; + {3, u".00", u"0", 3, 0.0}, + {3, u" 1,234", u"a0", 35, 1234.}, // should not hang + {3, u"NaN", u"0", 3, NAN}, + {3, u"NaN E5", u"0", 3, NAN}, + {3, u"0", u"0", 1, 0.0}}; parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES; for (auto cas : cases) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java index 4f9d6c0f325..ae5650c3119 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java @@ -88,6 +88,9 @@ public class NumberParserImpl { parser.addMatcher(ignorables); parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags)); parser.addMatcher(MinusSignMatcher.getInstance(symbols, false)); + parser.addMatcher(PlusSignMatcher.getInstance(symbols, false)); + parser.addMatcher(PercentMatcher.getInstance(symbols)); + parser.addMatcher(PermilleMatcher.getInstance(symbols)); parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags)); parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper)); parser.addMatcher(CurrencyTrieMatcher.getInstance(locale)); diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java index 8c57dd6e1d9..541ead1945a 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java @@ -47,6 +47,9 @@ public class NumberParserTest { { 3, "๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏx", "0", 10, 51423. }, { 3, " ๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ", "0", 11, 51423. }, { 3, "๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ ", "0", 10, 51423. }, + { 7, "51,423", "#,##,##0", 6, 51423. }, + { 7, " 51,423", "#,##,##0", 7, 51423. }, + { 7, "51,423 ", "#,##,##0", 6, 51423. }, { 7, "๐Ÿฑ๐Ÿญ,๐Ÿฐ๐Ÿฎ๐Ÿฏ", "#,##,##0", 11, 51423. }, { 7, "๐Ÿณ,๐Ÿด๐Ÿต,๐Ÿฑ๐Ÿญ,๐Ÿฐ๐Ÿฎ๐Ÿฏ", "#,##,##0", 19, 78951423. }, { 7, "๐Ÿณ๐Ÿด,๐Ÿต๐Ÿฑ๐Ÿญ.๐Ÿฐ๐Ÿฎ๐Ÿฏ", "#,##,##0", 18, 78951.423 }, @@ -54,8 +57,16 @@ public class NumberParserTest { { 7, "๐Ÿณ๐Ÿด,๐Ÿฌ๐Ÿฌ๐Ÿฌ.๐Ÿฌ๐Ÿฌ๐Ÿฌ", "#,##,##0", 18, 78000. }, { 7, "๐Ÿณ๐Ÿด,๐Ÿฌ๐Ÿฌ๐Ÿฌ.๐Ÿฌ๐Ÿฎ๐Ÿฏ", "#,##,##0", 18, 78000.023 }, { 7, "๐Ÿณ๐Ÿด.๐Ÿฌ๐Ÿฌ๐Ÿฌ.๐Ÿฌ๐Ÿฎ๐Ÿฏ", "#,##,##0", 11, 78. }, - { 3, "-๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ", "0", 11, -51423. }, - { 3, "-๐Ÿฑ๐Ÿญ๐Ÿฐ๐Ÿฎ๐Ÿฏ-", "0", 11, -51423. }, + { 3, "-51423", "0", 6, -51423. }, + { 3, "51423-", "0", 5, 51423. }, // plus and minus sign by default do NOT match after + { 3, "+51423", "0", 6, 51423. }, + { 3, "51423+", "0", 5, 51423. }, // plus and minus sign by default do NOT match after + { 3, "%51423", "0", 6, 514.23 }, + { 3, "51423%", "0", 6, 514.23 }, + { 3, "51423%%", "0", 6, 514.23 }, + { 3, "โ€ฐ51423", "0", 6, 51.423 }, + { 3, "51423โ€ฐ", "0", 6, 51.423 }, + { 3, "51423โ€ฐโ€ฐ", "0", 6, 51.423 }, { 3, "a51423US dollars", "a0ยคยคยค", 16, 51423. }, { 3, "a 51423 US dollars", "a0ยคยคยค", 18, 51423. }, { 3, "514.23 USD", "ยค0", 10, 514.23 }, @@ -84,7 +95,7 @@ public class NumberParserTest { { 3, "๐Ÿ“บ1.23", "๐Ÿ“บ0;๐Ÿ“ป0", 6, 1.23 }, { 3, "๐Ÿ“ป1.23", "๐Ÿ“บ0;๐Ÿ“ป0", 6, -1.23 }, { 3, ".00", "0", 3, 0.0 }, - { 3, " 0", "a0", 31, 0.0 }, // should not hang + { 3, " 1,234", "a0", 35, 1234. }, // should not hang { 3, "NaN", "0", 3, Double.NaN }, { 3, "NaN E5", "0", 3, Double.NaN }, { 3, "0", "0", 1, 0.0 } };