mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 05:25:34 +00:00
Merge cb354b18e4
into 770c4b8042
This commit is contained in:
commit
490aced60d
18 changed files with 186 additions and 4 deletions
|
@ -187,7 +187,12 @@ void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
|
|||
U_ASSERT(gUnicodeSets[PERCENT_SIGN] != nullptr);
|
||||
U_ASSERT(gUnicodeSets[PERMILLE_SIGN] != nullptr);
|
||||
|
||||
// The following don't currently have parseLenients in data.
|
||||
U_ASSERT(gUnicodeSets[INFINITY_SIGN] == nullptr);
|
||||
gUnicodeSets[INFINITY_SIGN] = new UnicodeSet(u"[∞]", status);
|
||||
U_ASSERT(gUnicodeSets[APPROXIMATELY_SIGN] == nullptr);
|
||||
// This set of characters was manually curated from the values of the approximatelySign element of CLDR common/main/*.xml files.
|
||||
gUnicodeSets[APPROXIMATELY_SIGN] = new UnicodeSet(u"[∼~≈≃約]", status);
|
||||
if (U_FAILURE(status)) { return; }
|
||||
|
||||
U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr);
|
||||
|
|
|
@ -56,6 +56,7 @@ enum Key {
|
|||
PERCENT_SIGN,
|
||||
PERMILLE_SIGN,
|
||||
INFINITY_SIGN,
|
||||
APPROXIMATELY_SIGN,
|
||||
|
||||
// Currency Symbols
|
||||
DOLLAR_SIGN,
|
||||
|
|
|
@ -86,6 +86,9 @@ void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp,
|
|||
case TYPE_PLUS_SIGN:
|
||||
addMatcher(fWarehouse.plusSign());
|
||||
break;
|
||||
case TYPE_APPROXIMATELY_SIGN:
|
||||
addMatcher(fWarehouse.approximatelySign());
|
||||
break;
|
||||
case TYPE_PERCENT:
|
||||
addMatcher(fWarehouse.percent());
|
||||
break;
|
||||
|
@ -97,6 +100,7 @@ void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp,
|
|||
case TYPE_CURRENCY_TRIPLE:
|
||||
case TYPE_CURRENCY_QUAD:
|
||||
case TYPE_CURRENCY_QUINT:
|
||||
case TYPE_CURRENCY_OVERFLOW:
|
||||
// All currency symbols use the same matcher
|
||||
addMatcher(fWarehouse.currency(status));
|
||||
break;
|
||||
|
@ -142,6 +146,10 @@ NumberParseMatcher& AffixTokenMatcherWarehouse::plusSign() {
|
|||
return fPlusSign = {fSetupData->dfs, true};
|
||||
}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::approximatelySign() {
|
||||
return fApproximatelySign = {fSetupData->dfs, true};
|
||||
}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::percent() {
|
||||
return fPercent = {fSetupData->dfs};
|
||||
}
|
||||
|
|
|
@ -89,6 +89,8 @@ class U_I18N_API AffixTokenMatcherWarehouse : public UMemory {
|
|||
|
||||
NumberParseMatcher& plusSign();
|
||||
|
||||
NumberParseMatcher& approximatelySign();
|
||||
|
||||
NumberParseMatcher& percent();
|
||||
|
||||
NumberParseMatcher& permille();
|
||||
|
@ -108,6 +110,7 @@ class U_I18N_API AffixTokenMatcherWarehouse : public UMemory {
|
|||
// NOTE: These are default-constructed and should not be used until initialized.
|
||||
MinusSignMatcher fMinusSign;
|
||||
PlusSignMatcher fPlusSign;
|
||||
ApproximatelySignMatcher fApproximatelySign;
|
||||
PercentMatcher fPercent;
|
||||
PermilleMatcher fPermille;
|
||||
CombinedCurrencyMatcher fCurrency;
|
||||
|
|
|
@ -65,6 +65,7 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
|
|||
parser->addMatcher(parser->fLocalMatchers.decimal = {symbols, grouper, parseFlags});
|
||||
parser->addMatcher(parser->fLocalMatchers.minusSign = {symbols, false});
|
||||
parser->addMatcher(parser->fLocalMatchers.plusSign = {symbols, false});
|
||||
parser->addMatcher(parser->fLocalMatchers.approximatelySign = {symbols, false});
|
||||
parser->addMatcher(parser->fLocalMatchers.percent = {symbols});
|
||||
parser->addMatcher(parser->fLocalMatchers.permille = {symbols});
|
||||
parser->addMatcher(parser->fLocalMatchers.nan = {symbols});
|
||||
|
@ -164,6 +165,7 @@ NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatPr
|
|||
if (!isStrict) {
|
||||
parser->addMatcher(parser->fLocalMatchers.plusSign = {symbols, false});
|
||||
parser->addMatcher(parser->fLocalMatchers.minusSign = {symbols, false});
|
||||
parser->addMatcher(parser->fLocalMatchers.approximatelySign = {symbols, false});
|
||||
}
|
||||
parser->addMatcher(parser->fLocalMatchers.nan = {symbols});
|
||||
parser->addMatcher(parser->fLocalMatchers.infinity = {symbols});
|
||||
|
|
|
@ -79,6 +79,7 @@ class U_I18N_API NumberParserImpl : public MutableMatcherCollection, public UMem
|
|||
PercentMatcher percent;
|
||||
PermilleMatcher permille;
|
||||
PlusSignMatcher plusSign;
|
||||
ApproximatelySignMatcher approximatelySign;
|
||||
DecimalMatcher decimal;
|
||||
ScientificMatcher scientific;
|
||||
CombinedCurrencyMatcher currency;
|
||||
|
|
|
@ -195,4 +195,18 @@ void PlusSignMatcher::accept(StringSegment& segment, ParsedNumber& result) const
|
|||
}
|
||||
|
||||
|
||||
ApproximatelySignMatcher::ApproximatelySignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing)
|
||||
: SymbolMatcher(dfs.getConstSymbol(DecimalFormatSymbols::kApproximatelySignSymbol), unisets::APPROXIMATELY_SIGN),
|
||||
fAllowTrailing(allowTrailing) {
|
||||
}
|
||||
|
||||
bool ApproximatelySignMatcher::isDisabled(const ParsedNumber& result) const {
|
||||
return !fAllowTrailing && result.seenNumber();
|
||||
}
|
||||
|
||||
void ApproximatelySignMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
|
||||
result.setCharsConsumed(segment);
|
||||
}
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
|
|
@ -163,6 +163,23 @@ class U_I18N_API PlusSignMatcher : public SymbolMatcher {
|
|||
bool fAllowTrailing;
|
||||
};
|
||||
|
||||
|
||||
// Exported as U_I18N_API for tests
|
||||
class U_I18N_API ApproximatelySignMatcher : public SymbolMatcher {
|
||||
public:
|
||||
ApproximatelySignMatcher() = default; // WARNING: Leaves the object in an unusable state
|
||||
|
||||
ApproximatelySignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing);
|
||||
|
||||
protected:
|
||||
bool isDisabled(const ParsedNumber& result) const override;
|
||||
|
||||
void accept(StringSegment& segment, ParsedNumber& result) const override;
|
||||
|
||||
private:
|
||||
bool fAllowTrailing;
|
||||
};
|
||||
|
||||
} // namespace numparse::impl
|
||||
U_NAMESPACE_END
|
||||
|
||||
|
|
|
@ -123,6 +123,10 @@ void NumberParserTest::testBasic() {
|
|||
{3, u" 1,234", u"a0", 35, 1234.}, // should not hang
|
||||
{3, u"NaN", u"0", 3, NAN},
|
||||
{3, u"NaN E5", u"0", 6, NAN},
|
||||
{3, u"~100", u"~0", 4, 100.0},
|
||||
{3, u" ~ 100", u"~0", 6, 100.0},
|
||||
{3, u"≈100", u"~0", 4, 100.0},
|
||||
{3, u"100≈", u"~0", 3, 100.0},
|
||||
{3, u"0", u"0", 1, 0.0}};
|
||||
|
||||
parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
|
||||
|
@ -180,6 +184,10 @@ void NumberParserTest::testBasic() {
|
|||
assertEquals("Strict Parse failed: " + message,
|
||||
cas.expectedResultDouble, resultObject.getDouble(status));
|
||||
}
|
||||
|
||||
if (status.errDataIfFailureAndReset("parsing test failed")) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -156,6 +156,7 @@ void NumberFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n
|
|||
TESTCASE_AUTO(TestFormatAttributes);
|
||||
TESTCASE_AUTO(TestFieldPositionIterator);
|
||||
TESTCASE_AUTO(TestDecimal);
|
||||
TESTCASE_AUTO(TestDecimalFormatParse7E);
|
||||
TESTCASE_AUTO(TestCurrencyFractionDigits);
|
||||
TESTCASE_AUTO(TestExponentParse);
|
||||
TESTCASE_AUTO(TestExplicitParents);
|
||||
|
@ -6981,6 +6982,29 @@ void NumberFormatTest::TestDecimal() {
|
|||
|
||||
}
|
||||
|
||||
void NumberFormatTest::TestDecimalFormatParse7E() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeString testdata = u"~";
|
||||
icu::Formattable result;
|
||||
icu::DecimalFormat dfmt(testdata, status);
|
||||
if (U_SUCCESS(status)) {
|
||||
dfmt.parse(testdata, result, status);
|
||||
}
|
||||
|
||||
// Test basic behavior
|
||||
status = U_ZERO_ERROR;
|
||||
dfmt = icu::DecimalFormat(u"~0", status);
|
||||
ASSERT_SUCCESS(status);
|
||||
dfmt.parse(u"200", result, status);
|
||||
ASSERT_EQUALS(status, U_INVALID_FORMAT_ERROR);
|
||||
status = U_ZERO_ERROR;
|
||||
dfmt.parse(u"≈200", result, status);
|
||||
ASSERT_SUCCESS(status);
|
||||
if (result.getInt64() != 200) {
|
||||
errln(UnicodeString(u"Got unexpected parse result: ") + DoubleToUnicodeString(result.getInt64()));
|
||||
}
|
||||
}
|
||||
|
||||
void NumberFormatTest::TestCurrencyFractionDigits() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeString text1, text2;
|
||||
|
@ -10048,7 +10072,7 @@ void NumberFormatTest::Test13733_StrictAndLenient() {
|
|||
parsedStrictValue = ca_strict->getNumber().getInt64();
|
||||
}
|
||||
assertEquals("Strict parse of " + inputString + " using " + patternString,
|
||||
parsedStrictValue, cas.expectedStrictParse);
|
||||
cas.expectedStrictParse, parsedStrictValue);
|
||||
|
||||
ppos.setIndex(0);
|
||||
df.setLenient(true);
|
||||
|
@ -10058,7 +10082,7 @@ void NumberFormatTest::Test13733_StrictAndLenient() {
|
|||
parsedLenientValue = ca_lenient->getNumber().getInt64();
|
||||
}
|
||||
assertEquals("Lenient parse of " + inputString + " using " + patternString,
|
||||
parsedLenientValue, cas.expectedLenientParse);
|
||||
cas.expectedLenientParse, parsedLenientValue);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -201,6 +201,7 @@ class NumberFormatTest: public CalendarTimeZoneTest {
|
|||
void TestLenientParse();
|
||||
|
||||
void TestDecimal();
|
||||
void TestDecimalFormatParse7E();
|
||||
void TestCurrencyFractionDigits();
|
||||
|
||||
void TestExponentParse();
|
||||
|
|
|
@ -3682,6 +3682,32 @@ public class NumberFormatTest extends CoreTestFmwk {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestDecimalFormatParse7E() {
|
||||
String testdata = "~";
|
||||
DecimalFormat dfmt = new DecimalFormat(testdata);
|
||||
try {
|
||||
dfmt.parse(testdata);
|
||||
errln("parsing ~ should fail with a handled exception");
|
||||
} catch (ParseException e) {
|
||||
}
|
||||
|
||||
// Test basic behavior
|
||||
dfmt = new DecimalFormat("~0");
|
||||
dfmt.setParseStrict(true);
|
||||
try {
|
||||
dfmt.parse("200");
|
||||
errln("parsing 200 should fail");
|
||||
} catch (ParseException e) {
|
||||
}
|
||||
try {
|
||||
Number result = dfmt.parse("≈200");
|
||||
assertEquals("parsing with approximately should succeed", result.longValue(), 200);
|
||||
} catch (ParseException e) {
|
||||
errln(e.toString());
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Testing currency driven max/min fraction digits problem
|
||||
* reported by ticket#7282
|
||||
|
@ -6917,7 +6943,7 @@ public class NumberFormatTest extends CoreTestFmwk {
|
|||
parsedStrictValue = ca_strict.getNumber().intValue();
|
||||
}
|
||||
assertEquals("Strict parse of " + inputString + " using " + patternString,
|
||||
parsedStrictValue, expectedStrictParse);
|
||||
expectedStrictParse, parsedStrictValue);
|
||||
|
||||
ppos.setIndex(0);
|
||||
df.setParseStrict(false);
|
||||
|
@ -6926,7 +6952,7 @@ public class NumberFormatTest extends CoreTestFmwk {
|
|||
parsedLenientValue = ca_lenient.getNumber().intValue();
|
||||
}
|
||||
assertEquals("Strict parse of " + inputString + " using " + patternString,
|
||||
parsedLenientValue, expectedLenientParse);
|
||||
expectedLenientParse, parsedLenientValue);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -128,6 +128,10 @@ public class NumberParserTest extends CoreTestFmwk {
|
|||
{ 3, " 1,234", "a0", 35, 1234. }, // should not hang
|
||||
{ 3, "NaN", "0", 3, Double.NaN },
|
||||
{ 3, "NaN E5", "0", 6, Double.NaN },
|
||||
{ 3, "~100", "~0", 4, 100.0 },
|
||||
{ 3, " ~ 100", "~0", 6, 100.0 },
|
||||
{ 3, "≈100", "~0", 4, 100.0 },
|
||||
{ 3, "100≈", "~0", 3, 100.0 },
|
||||
{ 3, "0", "0", 1, 0.0 } };
|
||||
|
||||
int parseFlags = ParsingUtils.PARSE_FLAG_IGNORE_CASE
|
||||
|
|
|
@ -49,6 +49,7 @@ public class StaticUnicodeSets {
|
|||
PERCENT_SIGN,
|
||||
PERMILLE_SIGN,
|
||||
INFINITY_SIGN,
|
||||
APPROXIMATELY_SIGN,
|
||||
|
||||
// Currency Symbols
|
||||
DOLLAR_SIGN,
|
||||
|
@ -263,7 +264,10 @@ public class StaticUnicodeSets {
|
|||
assert unicodeSets.containsKey(Key.PERCENT_SIGN);
|
||||
assert unicodeSets.containsKey(Key.PERMILLE_SIGN);
|
||||
|
||||
// The following don't currently have parseLenients in data.
|
||||
unicodeSets.put(Key.INFINITY_SIGN, new UnicodeSet("[∞]").freeze());
|
||||
// This set of characters was manually curated from the values of the approximatelySign element of CLDR common/main/*.xml files.
|
||||
unicodeSets.put(Key.APPROXIMATELY_SIGN, new UnicodeSet("[∼~≈≃約]").freeze());
|
||||
|
||||
assert unicodeSets.containsKey(Key.DOLLAR_SIGN);
|
||||
assert unicodeSets.containsKey(Key.POUND_SIGN);
|
||||
|
|
|
@ -76,6 +76,9 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok
|
|||
case AffixUtils.TYPE_PLUS_SIGN:
|
||||
addMatcher(factory.plusSign());
|
||||
break;
|
||||
case AffixUtils.TYPE_APPROXIMATELY_SIGN:
|
||||
addMatcher(factory.approximatelySign());
|
||||
break;
|
||||
case AffixUtils.TYPE_PERCENT:
|
||||
addMatcher(factory.percent());
|
||||
break;
|
||||
|
@ -87,6 +90,7 @@ public class AffixPatternMatcher extends SeriesMatcher implements AffixUtils.Tok
|
|||
case AffixUtils.TYPE_CURRENCY_TRIPLE:
|
||||
case AffixUtils.TYPE_CURRENCY_QUAD:
|
||||
case AffixUtils.TYPE_CURRENCY_QUINT:
|
||||
case AffixUtils.TYPE_CURRENCY_OVERFLOW:
|
||||
// All currency symbols use the same matcher
|
||||
addMatcher(factory.currency());
|
||||
break;
|
||||
|
|
|
@ -26,6 +26,10 @@ public class AffixTokenMatcherFactory {
|
|||
return PlusSignMatcher.getInstance(symbols, true);
|
||||
}
|
||||
|
||||
public ApproximatelySignMatcher approximatelySign() {
|
||||
return ApproximatelySignMatcher.getInstance(symbols, true);
|
||||
}
|
||||
|
||||
public PercentMatcher percent() {
|
||||
return PercentMatcher.getInstance(symbols);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
package com.ibm.icu.impl.number.parse;
|
||||
|
||||
import com.ibm.icu.impl.StaticUnicodeSets;
|
||||
import com.ibm.icu.impl.StringSegment;
|
||||
import com.ibm.icu.text.DecimalFormatSymbols;
|
||||
|
||||
/**
|
||||
* @author sffc
|
||||
*
|
||||
*/
|
||||
public class ApproximatelySignMatcher extends SymbolMatcher {
|
||||
|
||||
private static final ApproximatelySignMatcher DEFAULT = new ApproximatelySignMatcher(false);
|
||||
private static final ApproximatelySignMatcher DEFAULT_ALLOW_TRAILING = new ApproximatelySignMatcher(true);
|
||||
|
||||
public static ApproximatelySignMatcher getInstance(DecimalFormatSymbols symbols, boolean allowTrailing) {
|
||||
String symbolString = symbols.getApproximatelySignString();
|
||||
if (DEFAULT.uniSet.contains(symbolString)) {
|
||||
return allowTrailing ? DEFAULT_ALLOW_TRAILING : DEFAULT;
|
||||
} else {
|
||||
return new ApproximatelySignMatcher(symbolString, allowTrailing);
|
||||
}
|
||||
}
|
||||
|
||||
private final boolean allowTrailing;
|
||||
|
||||
private ApproximatelySignMatcher(String symbolString, boolean allowTrailing) {
|
||||
super(symbolString, DEFAULT.uniSet);
|
||||
this.allowTrailing = allowTrailing;
|
||||
}
|
||||
|
||||
private ApproximatelySignMatcher(boolean allowTrailing) {
|
||||
super(StaticUnicodeSets.Key.APPROXIMATELY_SIGN);
|
||||
this.allowTrailing = allowTrailing;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isDisabled(ParsedNumber result) {
|
||||
return !allowTrailing && result.seenNumber();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void accept(StringSegment segment, ParsedNumber result) {
|
||||
result.setCharsConsumed(segment);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "<ApproximatelySignMatcher>";
|
||||
}
|
||||
|
||||
}
|
|
@ -59,6 +59,7 @@ public class NumberParserImpl {
|
|||
parser.addMatcher(DecimalMatcher.getInstance(symbols, grouper, parseFlags));
|
||||
parser.addMatcher(MinusSignMatcher.getInstance(symbols, false));
|
||||
parser.addMatcher(PlusSignMatcher.getInstance(symbols, false));
|
||||
parser.addMatcher(ApproximatelySignMatcher.getInstance(symbols, false));
|
||||
parser.addMatcher(PercentMatcher.getInstance(symbols));
|
||||
parser.addMatcher(PermilleMatcher.getInstance(symbols));
|
||||
parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
|
||||
|
@ -226,6 +227,7 @@ public class NumberParserImpl {
|
|||
if (parseMode == ParseMode.LENIENT) {
|
||||
parser.addMatcher(PlusSignMatcher.getInstance(symbols, false));
|
||||
parser.addMatcher(MinusSignMatcher.getInstance(symbols, false));
|
||||
parser.addMatcher(ApproximatelySignMatcher.getInstance(symbols, false));
|
||||
}
|
||||
parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
|
||||
parser.addMatcher(InfinityMatcher.getInstance(symbols));
|
||||
|
|
Loading…
Add table
Reference in a new issue