This commit is contained in:
Shane F. Carr 2025-04-03 23:38:20 +01:00 committed by GitHub
commit 490aced60d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 186 additions and 4 deletions

View file

@ -187,7 +187,12 @@ void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
U_ASSERT(gUnicodeSets[PERCENT_SIGN] != nullptr);
U_ASSERT(gUnicodeSets[PERMILLE_SIGN] != nullptr);
// The following don't currently have parseLenients in data.
U_ASSERT(gUnicodeSets[INFINITY_SIGN] == nullptr);
gUnicodeSets[INFINITY_SIGN] = new UnicodeSet(u"[∞]", status);
U_ASSERT(gUnicodeSets[APPROXIMATELY_SIGN] == nullptr);
// This set of characters was manually curated from the values of the approximatelySign element of CLDR common/main/*.xml files.
gUnicodeSets[APPROXIMATELY_SIGN] = new UnicodeSet(u"[~≈≃約]", status);
if (U_FAILURE(status)) { return; }
U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr);

View file

@ -56,6 +56,7 @@ enum Key {
PERCENT_SIGN,
PERMILLE_SIGN,
INFINITY_SIGN,
APPROXIMATELY_SIGN,
// Currency Symbols
DOLLAR_SIGN,

View file

@ -86,6 +86,9 @@ void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp,
case TYPE_PLUS_SIGN:
addMatcher(fWarehouse.plusSign());
break;
case TYPE_APPROXIMATELY_SIGN:
addMatcher(fWarehouse.approximatelySign());
break;
case TYPE_PERCENT:
addMatcher(fWarehouse.percent());
break;
@ -97,6 +100,7 @@ void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp,
case TYPE_CURRENCY_TRIPLE:
case TYPE_CURRENCY_QUAD:
case TYPE_CURRENCY_QUINT:
case TYPE_CURRENCY_OVERFLOW:
// All currency symbols use the same matcher
addMatcher(fWarehouse.currency(status));
break;
@ -142,6 +146,10 @@ NumberParseMatcher& AffixTokenMatcherWarehouse::plusSign() {
return fPlusSign = {fSetupData->dfs, true};
}
NumberParseMatcher& AffixTokenMatcherWarehouse::approximatelySign() {
return fApproximatelySign = {fSetupData->dfs, true};
}
NumberParseMatcher& AffixTokenMatcherWarehouse::percent() {
return fPercent = {fSetupData->dfs};
}

View file

@ -89,6 +89,8 @@ class U_I18N_API AffixTokenMatcherWarehouse : public UMemory {
NumberParseMatcher& plusSign();
NumberParseMatcher& approximatelySign();
NumberParseMatcher& percent();
NumberParseMatcher& permille();
@ -108,6 +110,7 @@ class U_I18N_API AffixTokenMatcherWarehouse : public UMemory {
// NOTE: These are default-constructed and should not be used until initialized.
MinusSignMatcher fMinusSign;
PlusSignMatcher fPlusSign;
ApproximatelySignMatcher fApproximatelySign;
PercentMatcher fPercent;
PermilleMatcher fPermille;
CombinedCurrencyMatcher fCurrency;

View file

@ -65,6 +65,7 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
parser->addMatcher(parser->fLocalMatchers.decimal = {symbols, grouper, parseFlags});
parser->addMatcher(parser->fLocalMatchers.minusSign = {symbols, false});
parser->addMatcher(parser->fLocalMatchers.plusSign = {symbols, false});
parser->addMatcher(parser->fLocalMatchers.approximatelySign = {symbols, false});
parser->addMatcher(parser->fLocalMatchers.percent = {symbols});
parser->addMatcher(parser->fLocalMatchers.permille = {symbols});
parser->addMatcher(parser->fLocalMatchers.nan = {symbols});
@ -164,6 +165,7 @@ NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatPr
if (!isStrict) {
parser->addMatcher(parser->fLocalMatchers.plusSign = {symbols, false});
parser->addMatcher(parser->fLocalMatchers.minusSign = {symbols, false});
parser->addMatcher(parser->fLocalMatchers.approximatelySign = {symbols, false});
}
parser->addMatcher(parser->fLocalMatchers.nan = {symbols});
parser->addMatcher(parser->fLocalMatchers.infinity = {symbols});

View file

@ -79,6 +79,7 @@ class U_I18N_API NumberParserImpl : public MutableMatcherCollection, public UMem
PercentMatcher percent;
PermilleMatcher permille;
PlusSignMatcher plusSign;
ApproximatelySignMatcher approximatelySign;
DecimalMatcher decimal;
ScientificMatcher scientific;
CombinedCurrencyMatcher currency;

View file

@ -195,4 +195,18 @@ void PlusSignMatcher::accept(StringSegment& segment, ParsedNumber& result) const
}
ApproximatelySignMatcher::ApproximatelySignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing)
: SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kApproximatelySignSymbol), unisets::APPROXIMATELY_SIGN),
fAllowTrailing(allowTrailing) {
}
bool ApproximatelySignMatcher::isDisabled(const ParsedNumber& result) const {
return !fAllowTrailing && result.seenNumber();
}
void ApproximatelySignMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
result.setCharsConsumed(segment);
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -163,6 +163,23 @@ class U_I18N_API PlusSignMatcher : public SymbolMatcher {
bool fAllowTrailing;
};
// Exported as U_I18N_API for tests
class U_I18N_API ApproximatelySignMatcher : public SymbolMatcher {
public:
ApproximatelySignMatcher() = default; // WARNING: Leaves the object in an unusable state
ApproximatelySignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing);
protected:
bool isDisabled(const ParsedNumber& result) const override;
void accept(StringSegment& segment, ParsedNumber& result) const override;
private:
bool fAllowTrailing;
};
} // namespace numparse::impl
U_NAMESPACE_END

View file

@ -123,6 +123,10 @@ void NumberParserTest::testBasic() {
{3, u" 1,234", u"a0", 35, 1234.}, // should not hang
{3, u"NaN", u"0", 3, NAN},
{3, u"NaN E5", u"0", 6, NAN},
{3, u"~100", u"~0", 4, 100.0},
{3, u" ~ 100", u"~0", 6, 100.0},
{3, u"≈100", u"~0", 4, 100.0},
{3, u"100≈", u"~0", 3, 100.0},
{3, u"0", u"0", 1, 0.0}};
parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
@ -180,6 +184,10 @@ void NumberParserTest::testBasic() {
assertEquals("Strict Parse failed: " + message,
cas.expectedResultDouble, resultObject.getDouble(status));
}
if (status.errDataIfFailureAndReset("parsing test failed")) {
continue;
}
}
}

View file

@ -156,6 +156,7 @@ void NumberFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n
TESTCASE_AUTO(TestFormatAttributes);
TESTCASE_AUTO(TestFieldPositionIterator);
TESTCASE_AUTO(TestDecimal);
TESTCASE_AUTO(TestDecimalFormatParse7E);
TESTCASE_AUTO(TestCurrencyFractionDigits);
TESTCASE_AUTO(TestExponentParse);
TESTCASE_AUTO(TestExplicitParents);
@ -6981,6 +6982,29 @@ void NumberFormatTest::TestDecimal() {
}
void NumberFormatTest::TestDecimalFormatParse7E() {
UErrorCode status = U_ZERO_ERROR;
UnicodeString testdata = u"~";
icu::Formattable result;
icu::DecimalFormat dfmt(testdata, status);
if (U_SUCCESS(status)) {
dfmt.parse(testdata, result, status);
}
// Test basic behavior
status = U_ZERO_ERROR;
dfmt = icu::DecimalFormat(u"~0", status);
ASSERT_SUCCESS(status);
dfmt.parse(u"200", result, status);
ASSERT_EQUALS(status, U_INVALID_FORMAT_ERROR);
status = U_ZERO_ERROR;
dfmt.parse(u"≈200", result, status);
ASSERT_SUCCESS(status);
if (result.getInt64() != 200) {
errln(UnicodeString(u"Got unexpected parse result: ") + DoubleToUnicodeString(result.getInt64()));
}
}
void NumberFormatTest::TestCurrencyFractionDigits() {
UErrorCode status = U_ZERO_ERROR;
UnicodeString text1, text2;
@ -10048,7 +10072,7 @@ void NumberFormatTest::Test13733_StrictAndLenient() {
parsedStrictValue = ca_strict->getNumber().getInt64();
}
assertEquals("Strict parse of " + inputString + " using " + patternString,
parsedStrictValue, cas.expectedStrictParse);
cas.expectedStrictParse, parsedStrictValue);
ppos.setIndex(0);
df.setLenient(true);
@ -10058,7 +10082,7 @@ void NumberFormatTest::Test13733_StrictAndLenient() {
parsedLenientValue = ca_lenient->getNumber().getInt64();
}
assertEquals("Lenient parse of " + inputString + " using " + patternString,
parsedLenientValue, cas.expectedLenientParse);
cas.expectedLenientParse, parsedLenientValue);
}
}

View file

@ -201,6 +201,7 @@ class NumberFormatTest: public CalendarTimeZoneTest {
void TestLenientParse();
void TestDecimal();
void TestDecimalFormatParse7E();
void TestCurrencyFractionDigits();
void TestExponentParse();

View file

@ -3682,6 +3682,32 @@ public class NumberFormatTest extends CoreTestFmwk {
}
}
@Test
public void TestDecimalFormatParse7E() {
String testdata = "~";
DecimalFormat dfmt = new DecimalFormat(testdata);
try {
dfmt.parse(testdata);
errln("parsing ~ should fail with a handled exception");
} catch (ParseException e) {
}
// Test basic behavior
dfmt = new DecimalFormat("~0");
dfmt.setParseStrict(true);
try {
dfmt.parse("200");
errln("parsing 200 should fail");
} catch (ParseException e) {
}
try {
Number result = dfmt.parse("≈200");
assertEquals("parsing with approximately should succeed", result.longValue(), 200);
} catch (ParseException e) {
errln(e.toString());
}
}
/*
* Testing currency driven max/min fraction digits problem
* reported by ticket#7282
@ -6917,7 +6943,7 @@ public class NumberFormatTest extends CoreTestFmwk {
parsedStrictValue = ca_strict.getNumber().intValue();
}
assertEquals("Strict parse of " + inputString + " using " + patternString,
parsedStrictValue, expectedStrictParse);
expectedStrictParse, parsedStrictValue);
ppos.setIndex(0);
df.setParseStrict(false);
@ -6926,7 +6952,7 @@ public class NumberFormatTest extends CoreTestFmwk {
parsedLenientValue = ca_lenient.getNumber().intValue();
}
assertEquals("Strict parse of " + inputString + " using " + patternString,
parsedLenientValue, expectedLenientParse);
expectedLenientParse, parsedLenientValue);
}
}

View file

@ -128,6 +128,10 @@ public class NumberParserTest extends CoreTestFmwk {
{ 3, " 1,234", "a0", 35, 1234. }, // should not hang
{ 3, "NaN", "0", 3, Double.NaN },
{ 3, "NaN E5", "0", 6, Double.NaN },
{ 3, "~100", "~0", 4, 100.0 },
{ 3, " ~ 100", "~0", 6, 100.0 },
{ 3, "≈100", "~0", 4, 100.0 },
{ 3, "100≈", "~0", 3, 100.0 },
{ 3, "0", "0", 1, 0.0 } };
int parseFlags = ParsingUtils.PARSE_FLAG_IGNORE_CASE

View file

@ -49,6 +49,7 @@ public class StaticUnicodeSets {
PERCENT_SIGN,
PERMILLE_SIGN,
INFINITY_SIGN,
APPROXIMATELY_SIGN,
// Currency Symbols
DOLLAR_SIGN,
@ -263,7 +264,10 @@ public class StaticUnicodeSets {
assert unicodeSets.containsKey(Key.PERCENT_SIGN);
assert unicodeSets.containsKey(Key.PERMILLE_SIGN);
// The following don't currently have parseLenients in data.
unicodeSets.put(Key.INFINITY_SIGN, new UnicodeSet("[∞]").freeze());
// This set of characters was manually curated from the values of the approximatelySign element of CLDR common/main/*.xml files.
unicodeSets.put(Key.APPROXIMATELY_SIGN, new UnicodeSet("[~≈≃約]").freeze());
assert unicodeSets.containsKey(Key.DOLLAR_SIGN);
assert unicodeSets.containsKey(Key.POUND_SIGN);

View file

@ -76,6 +76,9 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok
case AffixUtils.TYPE_PLUS_SIGN:
addMatcher(factory.plusSign());
break;
case AffixUtils.TYPE_APPROXIMATELY_SIGN:
addMatcher(factory.approximatelySign());
break;
case AffixUtils.TYPE_PERCENT:
addMatcher(factory.percent());
break;
@ -87,6 +90,7 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok
case AffixUtils.TYPE_CURRENCY_TRIPLE:
case AffixUtils.TYPE_CURRENCY_QUAD:
case AffixUtils.TYPE_CURRENCY_QUINT:
case AffixUtils.TYPE_CURRENCY_OVERFLOW:
// All currency symbols use the same matcher
addMatcher(factory.currency());
break;

View file

@ -26,6 +26,10 @@ public class AffixTokenMatcherFactory {
return PlusSignMatcher.getInstance(symbols, true);
}
public ApproximatelySignMatcher approximatelySign() {
return ApproximatelySignMatcher.getInstance(symbols, true);
}
public PercentMatcher percent() {
return PercentMatcher.getInstance(symbols);
}

View file

@ -0,0 +1,54 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package com.ibm.icu.impl.number.parse;
import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.DecimalFormatSymbols;
/**
* @author sffc
*
*/
public class ApproximatelySignMatcher extends SymbolMatcher {
private static final ApproximatelySignMatcher DEFAULT = new ApproximatelySignMatcher(false);
private static final ApproximatelySignMatcher DEFAULT_ALLOW_TRAILING = new ApproximatelySignMatcher(true);
public static ApproximatelySignMatcher getInstance(DecimalFormatSymbols symbols, boolean allowTrailing) {
String symbolString = symbols.getApproximatelySignString();
if (DEFAULT.uniSet.contains(symbolString)) {
return allowTrailing ? DEFAULT_ALLOW_TRAILING : DEFAULT;
} else {
return new ApproximatelySignMatcher(symbolString, allowTrailing);
}
}
private final boolean allowTrailing;
private ApproximatelySignMatcher(String symbolString, boolean allowTrailing) {
super(symbolString, DEFAULT.uniSet);
this.allowTrailing = allowTrailing;
}
private ApproximatelySignMatcher(boolean allowTrailing) {
super(StaticUnicodeSets.Key.APPROXIMATELY_SIGN);
this.allowTrailing = allowTrailing;
}
@Override
protected boolean isDisabled(ParsedNumber result) {
return !allowTrailing && result.seenNumber();
}
@Override
protected void accept(StringSegment segment, ParsedNumber result) {
result.setCharsConsumed(segment);
}
@Override
public String toString() {
return "<ApproximatelySignMatcher>";
}
}

View file

@ -59,6 +59,7 @@ public class NumberParserImpl {
parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
parser.addMatcher(MinusSignMatcher.getInstance(symbols, false));
parser.addMatcher(PlusSignMatcher.getInstance(symbols, false));
parser.addMatcher(ApproximatelySignMatcher.getInstance(symbols, false));
parser.addMatcher(PercentMatcher.getInstance(symbols));
parser.addMatcher(PermilleMatcher.getInstance(symbols));
parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
@ -226,6 +227,7 @@ public class NumberParserImpl {
if (parseMode == ParseMode.LENIENT) {
parser.addMatcher(PlusSignMatcher.getInstance(symbols, false));
parser.addMatcher(MinusSignMatcher.getInstance(symbols, false));
parser.addMatcher(ApproximatelySignMatcher.getInstance(symbols, false));
}
parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
parser.addMatcher(InfinityMatcher.getInstance(symbols));