From f17e5c0e0ee31b8e97422500c3cf66c92ff77325 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Tue, 25 Mar 2025 19:29:27 -0700 Subject: [PATCH 1/4] ICU-22885 Add parsing of approximately sign --- icu4c/source/common/static_unicode_sets.cpp | 4 +++ icu4c/source/common/static_unicode_sets.h | 1 + icu4c/source/i18n/numparse_affixes.cpp | 8 ++++++ icu4c/source/i18n/numparse_affixes.h | 3 ++ icu4c/source/i18n/numparse_impl.cpp | 1 + icu4c/source/i18n/numparse_impl.h | 1 + icu4c/source/i18n/numparse_symbols.cpp | 14 ++++++++++ icu4c/source/i18n/numparse_symbols.h | 17 +++++++++++ .../source/test/intltest/numbertest_parse.cpp | 8 ++++++ icu4c/source/test/intltest/numfmtst.cpp | 28 +++++++++++++++++-- icu4c/source/test/intltest/numfmtst.h | 1 + 11 files changed, 84 insertions(+), 2 deletions(-) diff --git a/icu4c/source/common/static_unicode_sets.cpp b/icu4c/source/common/static_unicode_sets.cpp index 0db5ea000d4..d1426947596 100644 --- a/icu4c/source/common/static_unicode_sets.cpp +++ b/icu4c/source/common/static_unicode_sets.cpp @@ -187,7 +187,11 @@ void U_CALLCONV initNumberParseUniSets(UErrorCode& status) { U_ASSERT(gUnicodeSets[PERCENT_SIGN] != nullptr); U_ASSERT(gUnicodeSets[PERMILLE_SIGN] != nullptr); + // The following don't currently have parseLenients in data. + U_ASSERT(gUnicodeSets[INFINITY_SIGN] == nullptr); gUnicodeSets[INFINITY_SIGN] = new UnicodeSet(u"[∞]", status); + U_ASSERT(gUnicodeSets[APPROXIMATELY_SIGN] == nullptr); + gUnicodeSets[APPROXIMATELY_SIGN] = new UnicodeSet(u"[∼~≈≃約]", status); // this set was manually curated if (U_FAILURE(status)) { return; } U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr); diff --git a/icu4c/source/common/static_unicode_sets.h b/icu4c/source/common/static_unicode_sets.h index 5d90ce5908d..6441ea56c97 100644 --- a/icu4c/source/common/static_unicode_sets.h +++ b/icu4c/source/common/static_unicode_sets.h @@ -56,6 +56,7 @@ enum Key { PERCENT_SIGN, PERMILLE_SIGN, INFINITY_SIGN, + APPROXIMATELY_SIGN, // Currency Symbols DOLLAR_SIGN, diff --git a/icu4c/source/i18n/numparse_affixes.cpp b/icu4c/source/i18n/numparse_affixes.cpp index 8333600f9e2..94a2513a7f7 100644 --- a/icu4c/source/i18n/numparse_affixes.cpp +++ b/icu4c/source/i18n/numparse_affixes.cpp @@ -86,6 +86,9 @@ void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp, case TYPE_PLUS_SIGN: addMatcher(fWarehouse.plusSign()); break; + case TYPE_APPROXIMATELY_SIGN: + addMatcher(fWarehouse.approximatelySign()); + break; case TYPE_PERCENT: addMatcher(fWarehouse.percent()); break; @@ -97,6 +100,7 @@ void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp, case TYPE_CURRENCY_TRIPLE: case TYPE_CURRENCY_QUAD: case TYPE_CURRENCY_QUINT: + case TYPE_CURRENCY_OVERFLOW: // All currency symbols use the same matcher addMatcher(fWarehouse.currency(status)); break; @@ -142,6 +146,10 @@ NumberParseMatcher& AffixTokenMatcherWarehouse::plusSign() { return fPlusSign = {fSetupData->dfs, true}; } +NumberParseMatcher& AffixTokenMatcherWarehouse::approximatelySign() { + return fApproximatelySign = {fSetupData->dfs, true}; +} + NumberParseMatcher& AffixTokenMatcherWarehouse::percent() { return fPercent = {fSetupData->dfs}; } diff --git a/icu4c/source/i18n/numparse_affixes.h b/icu4c/source/i18n/numparse_affixes.h index a9e1c414e8c..c3136042100 100644 --- a/icu4c/source/i18n/numparse_affixes.h +++ b/icu4c/source/i18n/numparse_affixes.h @@ -89,6 +89,8 @@ class U_I18N_API AffixTokenMatcherWarehouse : public UMemory { NumberParseMatcher& plusSign(); + NumberParseMatcher& approximatelySign(); + NumberParseMatcher& percent(); NumberParseMatcher& permille(); @@ -108,6 +110,7 @@ class U_I18N_API AffixTokenMatcherWarehouse : public UMemory { // NOTE: These are default-constructed and should not be used until initialized. MinusSignMatcher fMinusSign; PlusSignMatcher fPlusSign; + ApproximatelySignMatcher fApproximatelySign; PercentMatcher fPercent; PermilleMatcher fPermille; CombinedCurrencyMatcher fCurrency; diff --git a/icu4c/source/i18n/numparse_impl.cpp b/icu4c/source/i18n/numparse_impl.cpp index 91c60747f21..5806e1edc25 100644 --- a/icu4c/source/i18n/numparse_impl.cpp +++ b/icu4c/source/i18n/numparse_impl.cpp @@ -65,6 +65,7 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString& parser->addMatcher(parser->fLocalMatchers.decimal = {symbols, grouper, parseFlags}); parser->addMatcher(parser->fLocalMatchers.minusSign = {symbols, false}); parser->addMatcher(parser->fLocalMatchers.plusSign = {symbols, false}); + parser->addMatcher(parser->fLocalMatchers.approximatelySign = {symbols, false}); parser->addMatcher(parser->fLocalMatchers.percent = {symbols}); parser->addMatcher(parser->fLocalMatchers.permille = {symbols}); parser->addMatcher(parser->fLocalMatchers.nan = {symbols}); diff --git a/icu4c/source/i18n/numparse_impl.h b/icu4c/source/i18n/numparse_impl.h index 4695a624d65..5ba922e83ab 100644 --- a/icu4c/source/i18n/numparse_impl.h +++ b/icu4c/source/i18n/numparse_impl.h @@ -79,6 +79,7 @@ class U_I18N_API NumberParserImpl : public MutableMatcherCollection, public UMem PercentMatcher percent; PermilleMatcher permille; PlusSignMatcher plusSign; + ApproximatelySignMatcher approximatelySign; DecimalMatcher decimal; ScientificMatcher scientific; CombinedCurrencyMatcher currency; diff --git a/icu4c/source/i18n/numparse_symbols.cpp b/icu4c/source/i18n/numparse_symbols.cpp index 608f4f5c8b0..4e3d0c5473e 100644 --- a/icu4c/source/i18n/numparse_symbols.cpp +++ b/icu4c/source/i18n/numparse_symbols.cpp @@ -195,4 +195,18 @@ void PlusSignMatcher::accept(StringSegment& segment, ParsedNumber& result) const } +ApproximatelySignMatcher::ApproximatelySignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing) + : SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kApproximatelySignSymbol), unisets::APPROXIMATELY_SIGN), + fAllowTrailing(allowTrailing) { +} + +bool ApproximatelySignMatcher::isDisabled(const ParsedNumber& result) const { + return !fAllowTrailing && result.seenNumber(); +} + +void ApproximatelySignMatcher::accept(StringSegment& segment, ParsedNumber& result) const { + result.setCharsConsumed(segment); +} + + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/numparse_symbols.h b/icu4c/source/i18n/numparse_symbols.h index 3bc07b2be39..f7810612bb5 100644 --- a/icu4c/source/i18n/numparse_symbols.h +++ b/icu4c/source/i18n/numparse_symbols.h @@ -163,6 +163,23 @@ class U_I18N_API PlusSignMatcher : public SymbolMatcher { bool fAllowTrailing; }; + +// Exported as U_I18N_API for tests +class U_I18N_API ApproximatelySignMatcher : public SymbolMatcher { + public: + ApproximatelySignMatcher() = default; // WARNING: Leaves the object in an unusable state + + ApproximatelySignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing); + + protected: + bool isDisabled(const ParsedNumber& result) const override; + + void accept(StringSegment& segment, ParsedNumber& result) const override; + + private: + bool fAllowTrailing; +}; + } // namespace numparse::impl U_NAMESPACE_END diff --git a/icu4c/source/test/intltest/numbertest_parse.cpp b/icu4c/source/test/intltest/numbertest_parse.cpp index a0264fc86db..2ce6daa9d76 100644 --- a/icu4c/source/test/intltest/numbertest_parse.cpp +++ b/icu4c/source/test/intltest/numbertest_parse.cpp @@ -123,6 +123,10 @@ void NumberParserTest::testBasic() { {3, u" 1,234", u"a0", 35, 1234.}, // should not hang {3, u"NaN", u"0", 3, NAN}, {3, u"NaN E5", u"0", 6, NAN}, + {3, u"~100", u"~0", 4, 100.0}, + {3, u" ~ 100", u"~0", 6, 100.0}, + {3, u"≈100", u"~0", 4, 100.0}, + {3, u"100≈", u"~0", 3, 100.0}, {3, u"0", u"0", 1, 0.0}}; parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES; @@ -180,6 +184,10 @@ void NumberParserTest::testBasic() { assertEquals("Strict Parse failed: " + message, cas.expectedResultDouble, resultObject.getDouble(status)); } + + if (status.errDataIfFailureAndReset("parsing test failed")) { + continue; + } } } diff --git a/icu4c/source/test/intltest/numfmtst.cpp b/icu4c/source/test/intltest/numfmtst.cpp index 2c54a696c48..202a9f84f0f 100644 --- a/icu4c/source/test/intltest/numfmtst.cpp +++ b/icu4c/source/test/intltest/numfmtst.cpp @@ -156,6 +156,7 @@ void NumberFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n TESTCASE_AUTO(TestFormatAttributes); TESTCASE_AUTO(TestFieldPositionIterator); TESTCASE_AUTO(TestDecimal); + TESTCASE_AUTO(TestDecimalFormatParse7E); TESTCASE_AUTO(TestCurrencyFractionDigits); TESTCASE_AUTO(TestExponentParse); TESTCASE_AUTO(TestExplicitParents); @@ -6981,6 +6982,29 @@ void NumberFormatTest::TestDecimal() { } +void NumberFormatTest::TestDecimalFormatParse7E() { + UErrorCode status = U_ZERO_ERROR; + UnicodeString testdata = u"~"; + icu::Formattable result; + icu::DecimalFormat dfmt(testdata, status); + if (U_SUCCESS(status)) { + dfmt.parse(testdata, result, status); + } + + // Test basic behavior + status = U_ZERO_ERROR; + dfmt = icu::DecimalFormat(u"~0", status); + ASSERT_SUCCESS(status); + dfmt.parse(u"200", result, status); + ASSERT_EQUALS(status, U_INVALID_FORMAT_ERROR); + status = U_ZERO_ERROR; + dfmt.parse(u"≈200", result, status); + ASSERT_SUCCESS(status); + if (result.getInt64() != 200) { + errln(UnicodeString(u"Got unexpected parse result: ") + DoubleToUnicodeString(result.getInt64())); + } +} + void NumberFormatTest::TestCurrencyFractionDigits() { UErrorCode status = U_ZERO_ERROR; UnicodeString text1, text2; @@ -10048,7 +10072,7 @@ void NumberFormatTest::Test13733_StrictAndLenient() { parsedStrictValue = ca_strict->getNumber().getInt64(); } assertEquals("Strict parse of " + inputString + " using " + patternString, - parsedStrictValue, cas.expectedStrictParse); + cas.expectedStrictParse, parsedStrictValue); ppos.setIndex(0); df.setLenient(true); @@ -10058,7 +10082,7 @@ void NumberFormatTest::Test13733_StrictAndLenient() { parsedLenientValue = ca_lenient->getNumber().getInt64(); } assertEquals("Lenient parse of " + inputString + " using " + patternString, - parsedLenientValue, cas.expectedLenientParse); + cas.expectedLenientParse, parsedLenientValue); } } diff --git a/icu4c/source/test/intltest/numfmtst.h b/icu4c/source/test/intltest/numfmtst.h index 634536fff7c..bf2a9a0f945 100644 --- a/icu4c/source/test/intltest/numfmtst.h +++ b/icu4c/source/test/intltest/numfmtst.h @@ -201,6 +201,7 @@ class NumberFormatTest: public CalendarTimeZoneTest { void TestLenientParse(); void TestDecimal(); + void TestDecimalFormatParse7E(); void TestCurrencyFractionDigits(); void TestExponentParse(); From 226cfe18e404bebd49eb2d84b640161cac5878df Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Wed, 26 Mar 2025 15:14:20 -0700 Subject: [PATCH 2/4] Port to Java --- .../icu/dev/test/format/NumberFormatTest.java | 30 ++++++++++- .../icu/dev/test/number/NumberParserTest.java | 4 ++ .../com/ibm/icu/impl/StaticUnicodeSets.java | 4 ++ .../number/parse/AffixPatternMatcher.java | 4 ++ .../parse/AffixTokenMatcherFactory.java | 4 ++ .../parse/ApproximatelySignMatcher.java | 54 +++++++++++++++++++ .../impl/number/parse/NumberParserImpl.java | 1 + 7 files changed, 99 insertions(+), 2 deletions(-) create mode 100644 icu4j/main/core/src/main/java/com/ibm/icu/impl/number/parse/ApproximatelySignMatcher.java diff --git a/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/format/NumberFormatTest.java b/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/format/NumberFormatTest.java index 8b61dadc405..7f37ae37799 100644 --- a/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/format/NumberFormatTest.java +++ b/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/format/NumberFormatTest.java @@ -3682,6 +3682,32 @@ public class NumberFormatTest extends CoreTestFmwk { } } + @Test + public void TestDecimalFormatParse7E() { + String testdata = "~"; + DecimalFormat dfmt = new DecimalFormat(testdata); + try { + dfmt.parse(testdata); + errln("parsing ~ should fail with a handled exception"); + } catch (ParseException e) { + } + + // Test basic behavior + dfmt = new DecimalFormat("~0"); + dfmt.setParseStrict(true); + try { + dfmt.parse("200"); + errln("parsing 200 should fail"); + } catch (ParseException e) { + } + try { + Number result = dfmt.parse("≈200"); + assertEquals("parsing with approximately should succeed", result.longValue(), 200); + } catch (ParseException e) { + errln(e.toString()); + } + } + /* * Testing currency driven max/min fraction digits problem * reported by ticket#7282 @@ -6917,7 +6943,7 @@ public class NumberFormatTest extends CoreTestFmwk { parsedStrictValue = ca_strict.getNumber().intValue(); } assertEquals("Strict parse of " + inputString + " using " + patternString, - parsedStrictValue, expectedStrictParse); + expectedStrictParse, parsedStrictValue); ppos.setIndex(0); df.setParseStrict(false); @@ -6926,7 +6952,7 @@ public class NumberFormatTest extends CoreTestFmwk { parsedLenientValue = ca_lenient.getNumber().intValue(); } assertEquals("Strict parse of " + inputString + " using " + patternString, - parsedLenientValue, expectedLenientParse); + expectedLenientParse, parsedLenientValue); } } diff --git a/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/number/NumberParserTest.java b/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/number/NumberParserTest.java index 1e0d18e8072..6db23f9056c 100644 --- a/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/number/NumberParserTest.java +++ b/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/number/NumberParserTest.java @@ -128,6 +128,10 @@ public class NumberParserTest extends CoreTestFmwk { { 3, " 1,234", "a0", 35, 1234. }, // should not hang { 3, "NaN", "0", 3, Double.NaN }, { 3, "NaN E5", "0", 6, Double.NaN }, + { 3, "~100", "~0", 4, 100.0 }, + { 3, " ~ 100", "~0", 6, 100.0 }, + { 3, "≈100", "~0", 4, 100.0 }, + { 3, "100≈", "~0", 3, 100.0 }, { 3, "0", "0", 1, 0.0 } }; int parseFlags = ParsingUtils.PARSE_FLAG_IGNORE_CASE diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/impl/StaticUnicodeSets.java b/icu4j/main/core/src/main/java/com/ibm/icu/impl/StaticUnicodeSets.java index ce10abc950a..9e1375a7628 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/impl/StaticUnicodeSets.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/impl/StaticUnicodeSets.java @@ -49,6 +49,7 @@ public class StaticUnicodeSets { PERCENT_SIGN, PERMILLE_SIGN, INFINITY_SIGN, + APPROXIMATELY_SIGN, // Currency Symbols DOLLAR_SIGN, @@ -263,7 +264,10 @@ public class StaticUnicodeSets { assert unicodeSets.containsKey(Key.PERCENT_SIGN); assert unicodeSets.containsKey(Key.PERMILLE_SIGN); + // The following don't currently have parseLenients in data. unicodeSets.put(Key.INFINITY_SIGN, new UnicodeSet("[∞]").freeze()); + // This set of characters was manually curated from the values of the approximatelySign element of CLDR common/main/*.xml files. + unicodeSets.put(Key.APPROXIMATELY_SIGN, new UnicodeSet("[∼~≈≃約]").freeze()); assert unicodeSets.containsKey(Key.DOLLAR_SIGN); assert unicodeSets.containsKey(Key.POUND_SIGN); diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java b/icu4j/main/core/src/main/java/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java index 48bb8673ce5..d0e1980c494 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/impl/number/parse/AffixPatternMatcher.java @@ -76,6 +76,9 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok case AffixUtils.TYPE_PLUS_SIGN: addMatcher(factory.plusSign()); break; + case AffixUtils.TYPE_APPROXIMATELY_SIGN: + addMatcher(factory.approximatelySign()); + break; case AffixUtils.TYPE_PERCENT: addMatcher(factory.percent()); break; @@ -87,6 +90,7 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok case AffixUtils.TYPE_CURRENCY_TRIPLE: case AffixUtils.TYPE_CURRENCY_QUAD: case AffixUtils.TYPE_CURRENCY_QUINT: + case AffixUtils.TYPE_CURRENCY_OVERFLOW: // All currency symbols use the same matcher addMatcher(factory.currency()); break; diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/impl/number/parse/AffixTokenMatcherFactory.java b/icu4j/main/core/src/main/java/com/ibm/icu/impl/number/parse/AffixTokenMatcherFactory.java index b4613db67fc..d1819243ca6 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/impl/number/parse/AffixTokenMatcherFactory.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/impl/number/parse/AffixTokenMatcherFactory.java @@ -26,6 +26,10 @@ public class AffixTokenMatcherFactory { return PlusSignMatcher.getInstance(symbols, true); } + public ApproximatelySignMatcher approximatelySign() { + return ApproximatelySignMatcher.getInstance(symbols, true); + } + public PercentMatcher percent() { return PercentMatcher.getInstance(symbols); } diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/impl/number/parse/ApproximatelySignMatcher.java b/icu4j/main/core/src/main/java/com/ibm/icu/impl/number/parse/ApproximatelySignMatcher.java new file mode 100644 index 00000000000..28f9e75dd00 --- /dev/null +++ b/icu4j/main/core/src/main/java/com/ibm/icu/impl/number/parse/ApproximatelySignMatcher.java @@ -0,0 +1,54 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package com.ibm.icu.impl.number.parse; + +import com.ibm.icu.impl.StaticUnicodeSets; +import com.ibm.icu.impl.StringSegment; +import com.ibm.icu.text.DecimalFormatSymbols; + +/** + * @author sffc + * + */ +public class ApproximatelySignMatcher extends SymbolMatcher { + + private static final ApproximatelySignMatcher DEFAULT = new ApproximatelySignMatcher(false); + private static final ApproximatelySignMatcher DEFAULT_ALLOW_TRAILING = new ApproximatelySignMatcher(true); + + public static ApproximatelySignMatcher getInstance(DecimalFormatSymbols symbols, boolean allowTrailing) { + String symbolString = symbols.getApproximatelySignString(); + if (DEFAULT.uniSet.contains(symbolString)) { + return allowTrailing ? DEFAULT_ALLOW_TRAILING : DEFAULT; + } else { + return new ApproximatelySignMatcher(symbolString, allowTrailing); + } + } + + private final boolean allowTrailing; + + private ApproximatelySignMatcher(String symbolString, boolean allowTrailing) { + super(symbolString, DEFAULT.uniSet); + this.allowTrailing = allowTrailing; + } + + private ApproximatelySignMatcher(boolean allowTrailing) { + super(StaticUnicodeSets.Key.APPROXIMATELY_SIGN); + this.allowTrailing = allowTrailing; + } + + @Override + protected boolean isDisabled(ParsedNumber result) { + return !allowTrailing && result.seenNumber(); + } + + @Override + protected void accept(StringSegment segment, ParsedNumber result) { + result.setCharsConsumed(segment); + } + + @Override + public String toString() { + return ""; + } + +} diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/impl/number/parse/NumberParserImpl.java b/icu4j/main/core/src/main/java/com/ibm/icu/impl/number/parse/NumberParserImpl.java index 2afb04e360d..3964d0b1a4f 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/impl/number/parse/NumberParserImpl.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/impl/number/parse/NumberParserImpl.java @@ -59,6 +59,7 @@ public class NumberParserImpl { parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags)); parser.addMatcher(MinusSignMatcher.getInstance(symbols, false)); parser.addMatcher(PlusSignMatcher.getInstance(symbols, false)); + parser.addMatcher(ApproximatelySignMatcher.getInstance(symbols, false)); parser.addMatcher(PercentMatcher.getInstance(symbols)); parser.addMatcher(PermilleMatcher.getInstance(symbols)); parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags)); From 36a388d5bc503db77ccb0f18af891f2970455547 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Wed, 26 Mar 2025 15:18:25 -0700 Subject: [PATCH 3/4] Add it as a standard matcher, C and J --- icu4c/source/i18n/numparse_impl.cpp | 1 + .../java/com/ibm/icu/impl/number/parse/NumberParserImpl.java | 1 + 2 files changed, 2 insertions(+) diff --git a/icu4c/source/i18n/numparse_impl.cpp b/icu4c/source/i18n/numparse_impl.cpp index 5806e1edc25..ee7a2cddb1c 100644 --- a/icu4c/source/i18n/numparse_impl.cpp +++ b/icu4c/source/i18n/numparse_impl.cpp @@ -165,6 +165,7 @@ NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatPr if (!isStrict) { parser->addMatcher(parser->fLocalMatchers.plusSign = {symbols, false}); parser->addMatcher(parser->fLocalMatchers.minusSign = {symbols, false}); + parser->addMatcher(parser->fLocalMatchers.approximatelySign = {symbols, false}); } parser->addMatcher(parser->fLocalMatchers.nan = {symbols}); parser->addMatcher(parser->fLocalMatchers.infinity = {symbols}); diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/impl/number/parse/NumberParserImpl.java b/icu4j/main/core/src/main/java/com/ibm/icu/impl/number/parse/NumberParserImpl.java index 3964d0b1a4f..e031b10c884 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/impl/number/parse/NumberParserImpl.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/impl/number/parse/NumberParserImpl.java @@ -227,6 +227,7 @@ public class NumberParserImpl { if (parseMode == ParseMode.LENIENT) { parser.addMatcher(PlusSignMatcher.getInstance(symbols, false)); parser.addMatcher(MinusSignMatcher.getInstance(symbols, false)); + parser.addMatcher(ApproximatelySignMatcher.getInstance(symbols, false)); } parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags)); parser.addMatcher(InfinityMatcher.getInstance(symbols)); From cb354b18e4ac9415b37bebfdc675faa90cddaac0 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Wed, 26 Mar 2025 15:19:08 -0700 Subject: [PATCH 4/4] Improve comment --- icu4c/source/common/static_unicode_sets.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/icu4c/source/common/static_unicode_sets.cpp b/icu4c/source/common/static_unicode_sets.cpp index d1426947596..716e7ecdccd 100644 --- a/icu4c/source/common/static_unicode_sets.cpp +++ b/icu4c/source/common/static_unicode_sets.cpp @@ -191,7 +191,8 @@ void U_CALLCONV initNumberParseUniSets(UErrorCode& status) { U_ASSERT(gUnicodeSets[INFINITY_SIGN] == nullptr); gUnicodeSets[INFINITY_SIGN] = new UnicodeSet(u"[∞]", status); U_ASSERT(gUnicodeSets[APPROXIMATELY_SIGN] == nullptr); - gUnicodeSets[APPROXIMATELY_SIGN] = new UnicodeSet(u"[∼~≈≃約]", status); // this set was manually curated + // This set of characters was manually curated from the values of the approximatelySign element of CLDR common/main/*.xml files. + gUnicodeSets[APPROXIMATELY_SIGN] = new UnicodeSet(u"[∼~≈≃約]", status); if (U_FAILURE(status)) { return; } U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr);