ICU-13736 Change number parsing to require locale currency when using non-currency parse function.

X-SVN-Rev: 41450
This commit is contained in:
Shane Carr 2018-05-24 04:35:16 +00:00
parent 09a8f5609d
commit 8e1dc1f3b1
17 changed files with 300 additions and 157 deletions

View file

@ -190,7 +190,7 @@ NumberParseMatcher& AffixTokenMatcherWarehouse::permille() {
}
NumberParseMatcher& AffixTokenMatcherWarehouse::currency(UErrorCode& status) {
return fCurrency = {fSetupData->currencySymbols, fSetupData->dfs, status};
return fCurrency = {fSetupData->currencySymbols, fSetupData->dfs, fSetupData->parseFlags, status};
}
IgnorablesMatcher& AffixTokenMatcherWarehouse::ignorables() {

View file

@ -87,6 +87,7 @@ struct AffixTokenMatcherSetupData {
const DecimalFormatSymbols& dfs;
IgnorablesMatcher& ignorables;
const Locale& locale;
parse_flags_t parseFlags;
};

View file

@ -20,15 +20,25 @@ using namespace icu::numparse;
using namespace icu::numparse::impl;
CombinedCurrencyMatcher::CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols,
const DecimalFormatSymbols& dfs, UErrorCode& status)
CombinedCurrencyMatcher::CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols, const DecimalFormatSymbols& dfs,
parse_flags_t parseFlags, UErrorCode& status)
: fCurrency1(currencySymbols.getCurrencySymbol(status)),
fCurrency2(currencySymbols.getIntlCurrencySymbol(status)),
fUseFullCurrencyData(0 == (parseFlags & PARSE_FLAG_NO_FOREIGN_CURRENCY)),
afterPrefixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, false, status)),
beforeSuffixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, true, status)),
fLocaleName(dfs.getLocale().getName(), -1, status) {
utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode());
// Pre-load the long names for the current locale and currency
// if we are parsing without the full currency data.
if (!fUseFullCurrencyData) {
for (int32_t i=0; i<StandardPlural::COUNT; i++) {
auto plural = static_cast<StandardPlural::Form>(i);
fLocalLongNames[i] = currencySymbols.getPluralName(plural, status);
}
}
// TODO: Figure out how to make this faster and re-enable.
// Computing the "lead code points" set for fastpathing is too slow to use in production.
// See http://bugs.icu-project.org/trac/ticket/13584
@ -83,47 +93,73 @@ CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UEr
bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber& result,
UErrorCode& status) const {
bool maybeMore = false;
int32_t overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1);
maybeMore = maybeMore || overlap1 == segment.length();
if (overlap1 == fCurrency1.length()) {
utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
segment.adjustOffset(overlap1);
result.setCharsConsumed(segment);
return segment.length() == 0;
return maybeMore;
}
int32_t overlap2 = segment.getCaseSensitivePrefixLength(fCurrency2);
maybeMore = maybeMore || overlap2 == segment.length();
if (overlap2 == fCurrency2.length()) {
utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
segment.adjustOffset(overlap2);
result.setCharsConsumed(segment);
return segment.length() == 0;
return maybeMore;
}
// NOTE: This call site should be improved with #13584.
const UnicodeString segmentString = segment.toTempUnicodeString();
if (fUseFullCurrencyData) {
// Use the full currency data.
// NOTE: This call site should be improved with #13584.
const UnicodeString segmentString = segment.toTempUnicodeString();
// Try to parse the currency
ParsePosition ppos(0);
int32_t partialMatchLen = 0;
uprv_parseCurrency(
fLocaleName.data(),
segmentString,
ppos,
UCURR_SYMBOL_NAME, // checks for both UCURR_SYMBOL_NAME and UCURR_LONG_NAME
&partialMatchLen,
result.currencyCode,
status);
// Try to parse the currency
ParsePosition ppos(0);
int32_t partialMatchLen = 0;
uprv_parseCurrency(
fLocaleName.data(),
segmentString,
ppos,
UCURR_SYMBOL_NAME, // checks for both UCURR_SYMBOL_NAME and UCURR_LONG_NAME
&partialMatchLen,
result.currencyCode,
status);
maybeMore = maybeMore || partialMatchLen == segment.length();
if (U_SUCCESS(status) && ppos.getIndex() != 0) {
// Complete match.
// NOTE: The currency code should already be saved in the ParsedNumber.
segment.adjustOffset(ppos.getIndex());
result.setCharsConsumed(segment);
if (U_SUCCESS(status) && ppos.getIndex() != 0) {
// Complete match.
// NOTE: The currency code should already be saved in the ParsedNumber.
segment.adjustOffset(ppos.getIndex());
result.setCharsConsumed(segment);
return maybeMore;
}
} else {
// Use the locale long names.
int32_t longestFullMatch = 0;
for (int32_t i=0; i<StandardPlural::COUNT; i++) {
const UnicodeString& name = fLocalLongNames[i];
int32_t overlap = segment.getCommonPrefixLength(name);
if (overlap == name.length() && name.length() > longestFullMatch) {
longestFullMatch = name.length();
}
maybeMore = maybeMore || overlap > 0;
}
if (longestFullMatch > 0) {
utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
segment.adjustOffset(longestFullMatch);
result.setCharsConsumed(segment);
return maybeMore;
}
}
return overlap1 == segment.length() || overlap2 == segment.length() ||
partialMatchLen == segment.length();
// No match found.
return maybeMore;
}
bool CombinedCurrencyMatcher::smokeTest(const StringSegment&) const {

View file

@ -34,7 +34,8 @@ class U_I18N_API CombinedCurrencyMatcher : public NumberParseMatcher, public UMe
public:
CombinedCurrencyMatcher() = default; // WARNING: Leaves the object in an unusable state
CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols, const DecimalFormatSymbols& dfs, UErrorCode& status);
CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols, const DecimalFormatSymbols& dfs,
parse_flags_t parseFlags, UErrorCode& status);
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
@ -47,6 +48,9 @@ class U_I18N_API CombinedCurrencyMatcher : public NumberParseMatcher, public UMe
UnicodeString fCurrency1;
UnicodeString fCurrency2;
bool fUseFullCurrencyData;
UnicodeString fLocalLongNames[StandardPlural::COUNT];
UnicodeString afterPrefixInsert;
UnicodeString beforeSuffixInsert;

View file

@ -53,7 +53,7 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
// The following statements set up the affix matchers.
AffixTokenMatcherSetupData affixSetupData = {
currencySymbols, symbols, ignorables, locale};
currencySymbols, symbols, ignorables, locale, parseFlags};
parser->fLocalMatchers.affixTokenMatcherWarehouse = {&affixSetupData};
parser->fLocalMatchers.affixMatcherWarehouse = {&parser->fLocalMatchers.affixTokenMatcherWarehouse};
parser->fLocalMatchers.affixMatcherWarehouse.createAffixMatchers(
@ -72,7 +72,7 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
parser->addMatcher(parser->fLocalMatchers.infinity = {symbols});
parser->addMatcher(parser->fLocalMatchers.padding = {u"@"});
parser->addMatcher(parser->fLocalMatchers.scientific = {symbols, grouper});
parser->addMatcher(parser->fLocalMatchers.currency = {currencySymbols, symbols, status});
parser->addMatcher(parser->fLocalMatchers.currency = {currencySymbols, symbols, parseFlags, status});
// parser.addMatcher(new RequireNumberMatcher());
parser->freeze();
@ -124,6 +124,9 @@ NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatPr
if (parseCurrency || affixProvider->hasCurrencySign()) {
parseFlags |= PARSE_FLAG_MONETARY_SEPARATORS;
}
if (!parseCurrency) {
parseFlags |= PARSE_FLAG_NO_FOREIGN_CURRENCY;
}
LocalPointer<NumberParserImpl> parser(new NumberParserImpl(parseFlags));
@ -137,7 +140,7 @@ NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatPr
// The following statements set up the affix matchers.
AffixTokenMatcherSetupData affixSetupData = {
currencySymbols, symbols, ignorables, locale};
currencySymbols, symbols, ignorables, locale, parseFlags};
parser->fLocalMatchers.affixTokenMatcherWarehouse = {&affixSetupData};
parser->fLocalMatchers.affixMatcherWarehouse = {&parser->fLocalMatchers.affixTokenMatcherWarehouse};
parser->fLocalMatchers.affixMatcherWarehouse.createAffixMatchers(
@ -148,7 +151,7 @@ NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatPr
////////////////////////
if (parseCurrency || affixProvider->hasCurrencySign()) {
parser->addMatcher(parser->fLocalMatchers.currency = {currencySymbols, symbols, status});
parser->addMatcher(parser->fLocalMatchers.currency = {currencySymbols, symbols, parseFlags, status});
}
///////////////

View file

@ -48,6 +48,7 @@ enum ParseFlags {
PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400,
// PARSE_FLAG_OPTIMIZE = 0x0800, // no longer used
// PARSE_FLAG_FORCE_BIG_DECIMAL = 0x1000, // not used in ICU4C
PARSE_FLAG_NO_FOREIGN_CURRENCY = 0x2000,
};

View file

@ -1026,12 +1026,9 @@ typedef struct {
const char * descrip;
const UChar * currStr;
const UChar * plurStr;
// ICU 62: currencies are accepted in non-currency mode
/*
UErrorCode parsDoubExpectErr;
int32_t parsDoubExpectPos;
double parsDoubExpectVal;
*/
UErrorCode parsCurrExpectErr;
int32_t parsCurrExpectPos;
double parsCurrExpectVal;
@ -1039,29 +1036,29 @@ typedef struct {
} ParseCurrencyItem;
static const ParseCurrencyItem parseCurrencyItems[] = {
{ "en_US", "dollars2", dollars2Sym, NULL, /* U_ZERO_ERROR, 5, 2.0, */ U_ZERO_ERROR, 5, 2.0, "USD" },
{ "en_US", "dollars4", dollars4Sym, dollars4PluEn, /* U_ZERO_ERROR, 2, 4.0, */ U_ZERO_ERROR, 2, 4.0, "USD" },
{ "en_US", "dollars9", dollars9Sym, NULL, /* U_PARSE_ERROR, 1, 0.0, */ U_PARSE_ERROR, 1, 0.0, "" },
{ "en_US", "pounds3", pounds3Sym, NULL, /* U_PARSE_ERROR, 0, 0.0, */ U_ZERO_ERROR, 5, 3.0, "GBP" },
{ "en_US", "pounds5", pounds5Sym, pounds5PluEn, /* U_PARSE_ERROR, 0, 0.0, */ U_ZERO_ERROR, 2, 5.0, "GBP" },
{ "en_US", "pounds7", pounds7Sym, NULL, /* U_PARSE_ERROR, 1, 0.0, */ U_PARSE_ERROR, 1, 0.0, "" },
{ "en_US", "euros8", euros8Sym, euros8PluEn, /* U_PARSE_ERROR, 0, 0.0, */ U_ZERO_ERROR, 2, 8.0, "EUR" },
{ "en_US", "dollars2", dollars2Sym, NULL, U_ZERO_ERROR, 5, 2.0, U_ZERO_ERROR, 5, 2.0, "USD" },
{ "en_US", "dollars4", dollars4Sym, dollars4PluEn, U_ZERO_ERROR, 2, 4.0, U_ZERO_ERROR, 2, 4.0, "USD" },
{ "en_US", "dollars9", dollars9Sym, NULL, U_PARSE_ERROR, 1, 0.0, U_PARSE_ERROR, 1, 0.0, "" },
{ "en_US", "pounds3", pounds3Sym, NULL, U_PARSE_ERROR, 0, 0.0, U_ZERO_ERROR, 5, 3.0, "GBP" },
{ "en_US", "pounds5", pounds5Sym, pounds5PluEn, U_PARSE_ERROR, 0, 0.0, U_ZERO_ERROR, 2, 5.0, "GBP" },
{ "en_US", "pounds7", pounds7Sym, NULL, U_PARSE_ERROR, 1, 0.0, U_PARSE_ERROR, 1, 0.0, "" },
{ "en_US", "euros8", euros8Sym, euros8PluEn, U_PARSE_ERROR, 0, 0.0, U_ZERO_ERROR, 2, 8.0, "EUR" },
{ "en_GB", "pounds3", pounds3Sym, NULL, /* U_ZERO_ERROR, 5, 3.0, */ U_ZERO_ERROR, 5, 3.0, "GBP" },
{ "en_GB", "pounds5", pounds5Sym, pounds5PluEn, /* U_ZERO_ERROR, 2, 5.0, */ U_ZERO_ERROR, 2, 5.0, "GBP" },
{ "en_GB", "pounds7", pounds7Sym, NULL, /* U_PARSE_ERROR, 1, 0.0, */ U_PARSE_ERROR, 1, 0.0, "" },
{ "en_GB", "euros4", euros4Sym, NULL, /* U_PARSE_ERROR, 4, 0.0, */ U_PARSE_ERROR, 0, 0.0, "" },
{ "en_GB", "euros6", euros6Sym, NULL, /* U_PARSE_ERROR, 1, 0.0, */ U_PARSE_ERROR, 1, 0.0, "" },
{ "en_GB", "euros8", euros8Sym, euros8PluEn, /* U_PARSE_ERROR, 0, 0.0, */ U_ZERO_ERROR, 2, 8.0, "EUR" },
{ "en_GB", "dollars4", dollarsUS4Sym,dollars4PluEn, /* U_PARSE_ERROR, 0, 0.0, */ U_ZERO_ERROR, 4, 4.0, "USD" },
{ "en_GB", "pounds3", pounds3Sym, NULL, U_ZERO_ERROR, 5, 3.0, U_ZERO_ERROR, 5, 3.0, "GBP" },
{ "en_GB", "pounds5", pounds5Sym, pounds5PluEn, U_ZERO_ERROR, 2, 5.0, U_ZERO_ERROR, 2, 5.0, "GBP" },
{ "en_GB", "pounds7", pounds7Sym, NULL, U_PARSE_ERROR, 1, 0.0, U_PARSE_ERROR, 1, 0.0, "" },
{ "en_GB", "euros4", euros4Sym, NULL, U_PARSE_ERROR, 0, 0.0, U_PARSE_ERROR, 0, 0.0, "" },
{ "en_GB", "euros6", euros6Sym, NULL, U_PARSE_ERROR, 1, 0.0, U_PARSE_ERROR, 1, 0.0, "" },
{ "en_GB", "euros8", euros8Sym, euros8PluEn, U_PARSE_ERROR, 0, 0.0, U_ZERO_ERROR, 2, 8.0, "EUR" },
{ "en_GB", "dollars4", dollarsUS4Sym,dollars4PluEn, U_PARSE_ERROR, 0, 0.0, U_ZERO_ERROR, 4, 4.0, "USD" },
{ "fr_FR", "euros4", euros4Sym, NULL, /* U_ZERO_ERROR, 6, 4.0, */ U_ZERO_ERROR, 6, 4.0, "EUR" },
{ "fr_FR", "euros6", euros6Sym, euros6PluFr, /* U_ZERO_ERROR, 3, 6.0, */ U_ZERO_ERROR, 3, 6.0, "EUR" },
{ "fr_FR", "euros8", euros8Sym, NULL, /* U_PARSE_ERROR, 0, 0.0, */ U_PARSE_ERROR, 2, 0.0, "" },
{ "fr_FR", "dollars2", dollars2Sym, NULL, /* U_PARSE_ERROR, 0, 0.0, */ U_PARSE_ERROR, 0, 0.0, "" },
{ "fr_FR", "dollars4", dollars4Sym, NULL, /* U_PARSE_ERROR, 0, 0.0, */ U_PARSE_ERROR, 0, 0.0, "" },
{ "fr_FR", "euros4", euros4Sym, NULL, U_ZERO_ERROR, 6, 4.0, U_ZERO_ERROR, 6, 4.0, "EUR" },
{ "fr_FR", "euros6", euros6Sym, euros6PluFr, U_ZERO_ERROR, 3, 6.0, U_ZERO_ERROR, 3, 6.0, "EUR" },
{ "fr_FR", "euros8", euros8Sym, NULL, U_PARSE_ERROR, 2, 0.0, U_PARSE_ERROR, 2, 0.0, "" },
{ "fr_FR", "dollars2", dollars2Sym, NULL, U_PARSE_ERROR, 0, 0.0, U_PARSE_ERROR, 0, 0.0, "" },
{ "fr_FR", "dollars4", dollars4Sym, NULL, U_PARSE_ERROR, 0, 0.0, U_PARSE_ERROR, 0, 0.0, "" },
{ NULL, NULL, NULL, NULL, /* 0, 0, 0.0, */ 0, 0, 0.0, NULL }
{ NULL, NULL, NULL, NULL, 0, 0, 0.0, 0, 0, 0.0, NULL }
};
static void TestParseCurrency()
@ -1079,14 +1076,13 @@ static void TestParseCurrency()
unum = unum_open(UNUM_CURRENCY, NULL, 0, itemPtr->locale, NULL, &status);
if (U_SUCCESS(status)) {
const UChar * currStr = itemPtr->currStr;
int32_t currExpectPos = itemPtr->parsCurrExpectPos;
status = U_ZERO_ERROR;
parsePos = 0;
parseVal = unum_parseDouble(unum, currStr, -1, &parsePos, &status);
if (status != itemPtr->parsCurrExpectErr || parsePos != itemPtr->parsCurrExpectPos || parseVal != itemPtr->parsCurrExpectVal) {
if (status != itemPtr->parsDoubExpectErr || parsePos != itemPtr->parsDoubExpectPos || parseVal != itemPtr->parsDoubExpectVal) {
log_err("UNUM_CURRENCY parseDouble %s/%s, expect %s pos %d val %.1f, get %s pos %d val %.1f\n",
itemPtr->locale, itemPtr->descrip,
u_errorName(itemPtr->parsCurrExpectErr), itemPtr->parsCurrExpectPos, itemPtr->parsCurrExpectVal,
u_errorName(itemPtr->parsDoubExpectErr), itemPtr->parsDoubExpectPos, itemPtr->parsDoubExpectVal,
u_errorName(status), parsePos, parseVal );
}
status = U_ZERO_ERROR;
@ -1094,11 +1090,11 @@ static void TestParseCurrency()
parseCurr[0] = 0;
parseVal = unum_parseDoubleCurrency(unum, currStr, -1, &parsePos, parseCurr, &status);
u_austrncpy(parseCurrB, parseCurr, 4);
if (status != itemPtr->parsCurrExpectErr || parsePos != currExpectPos || parseVal != itemPtr->parsCurrExpectVal ||
if (status != itemPtr->parsCurrExpectErr || parsePos != itemPtr->parsCurrExpectPos || parseVal != itemPtr->parsCurrExpectVal ||
strncmp(parseCurrB, itemPtr->parsCurrExpectCurr, 4) != 0) {
log_err("UNUM_CURRENCY parseDoubleCurrency %s/%s, expect %s pos %d val %.1f cur %s, get %s pos %d val %.1f cur %s\n",
itemPtr->locale, itemPtr->descrip,
u_errorName(itemPtr->parsCurrExpectErr), currExpectPos, itemPtr->parsCurrExpectVal, itemPtr->parsCurrExpectCurr,
u_errorName(itemPtr->parsCurrExpectErr), itemPtr->parsCurrExpectPos, itemPtr->parsCurrExpectVal, itemPtr->parsCurrExpectCurr,
u_errorName(status), parsePos, parseVal, parseCurrB );
}
unum_close(unum);
@ -1113,10 +1109,10 @@ static void TestParseCurrency()
status = U_ZERO_ERROR;
parsePos = 0;
parseVal = unum_parseDouble(unum, itemPtr->plurStr, -1, &parsePos, &status);
if (status != itemPtr->parsCurrExpectErr || parseVal != itemPtr->parsCurrExpectVal) {
log_err("UNUM_CURRENCY parseDouble %s/%s, expect %s val %.1f, get %s val %.1f\n",
if (status != itemPtr->parsDoubExpectErr || parseVal != itemPtr->parsDoubExpectVal) {
log_err("UNUM_CURRENCY parseDouble Plural %s/%s, expect %s val %.1f, get %s val %.1f\n",
itemPtr->locale, itemPtr->descrip,
u_errorName(itemPtr->parsCurrExpectErr), itemPtr->parsCurrExpectVal,
u_errorName(itemPtr->parsDoubExpectErr), itemPtr->parsDoubExpectVal,
u_errorName(status), parseVal );
}
status = U_ZERO_ERROR;
@ -1126,7 +1122,7 @@ static void TestParseCurrency()
u_austrncpy(parseCurrB, parseCurr, 4);
if (status != itemPtr->parsCurrExpectErr || parseVal != itemPtr->parsCurrExpectVal ||
strncmp(parseCurrB, itemPtr->parsCurrExpectCurr, 4) != 0) {
log_err("UNUM_CURRENCY parseDoubleCurrency %s/%s, expect %s val %.1f cur %s, get %s val %.1f cur %s\n",
log_err("UNUM_CURRENCY parseDoubleCurrency Plural %s/%s, expect %s val %.1f cur %s, get %s val %.1f cur %s\n",
itemPtr->locale, itemPtr->descrip,
u_errorName(itemPtr->parsCurrExpectErr), itemPtr->parsCurrExpectVal, itemPtr->parsCurrExpectCurr,
u_errorName(status), parseVal, parseCurrB );

View file

@ -242,33 +242,55 @@ void NumberParserTest::testCombinedCurrencyMatcher() {
CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status);
AffixTokenMatcherSetupData affixSetupData = {
currencySymbols, {"en", status}, ignorables, "en"};
currencySymbols, {"en", status}, ignorables, "en", 0};
AffixTokenMatcherWarehouse warehouse(&affixSetupData);
NumberParseMatcher& matcher = warehouse.currency(status);
affixSetupData.parseFlags = PARSE_FLAG_NO_FOREIGN_CURRENCY;
AffixTokenMatcherWarehouse warehouseNoForeign(&affixSetupData);
NumberParseMatcher& matcherNoForeign = warehouseNoForeign.currency(status);
static const struct TestCase {
const char16_t* input;
const char16_t* expectedCurrencyCode;
} cases[]{{u"", u"\x00"},
{u"FOO", u"\x00"},
{u"USD", u"USD"},
{u"$", u"USD"},
{u"US dollars", u"USD"},
{u"eu", u"\x00"},
{u"euros", u"EUR"},
{u"ICU", u"ICU"},
{u"IU$", u"ICU"}};
const char16_t* expectedNoForeignCurrencyCode;
} cases[]{{u"", u"", u""},
{u"FOO", u"", u""},
{u"USD", u"USD", u""},
{u"$", u"USD", u""},
{u"US dollars", u"USD", u""},
{u"eu", u"", u""},
{u"euros", u"EUR", u""},
{u"ICU", u"ICU", u"ICU"},
{u"IU$", u"ICU", u"ICU"}};
for (auto& cas : cases) {
UnicodeString input(cas.input);
StringSegment segment(input, false);
ParsedNumber result;
matcher.match(segment, result, status);
assertEquals("Parsing " + input, cas.expectedCurrencyCode, result.currencyCode);
assertEquals(
"Whole string on " + input,
cas.expectedCurrencyCode[0] == 0 ? 0 : input.length(),
result.charEnd);
{
StringSegment segment(input, false);
ParsedNumber result;
matcher.match(segment, result, status);
assertEquals(
"Parsing " + input,
cas.expectedCurrencyCode,
result.currencyCode);
assertEquals(
"Whole string on " + input,
cas.expectedCurrencyCode[0] == 0 ? 0 : input.length(),
result.charEnd);
}
{
StringSegment segment(input, false);
ParsedNumber result;
matcherNoForeign.match(segment, result, status);
assertEquals(
"[no foreign] Parsing " + input,
cas.expectedNoForeignCurrencyCode,
result.currencyCode);
assertEquals(
"[no foreign] Whole string on " + input,
cas.expectedNoForeignCurrencyCode[0] == 0 ? 0 : input.length(),
result.charEnd);
}
}
}
@ -283,7 +305,7 @@ void NumberParserTest::testAffixPatternMatcher() {
CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status);
AffixTokenMatcherSetupData affixSetupData = {
currencySymbols, {"en", status}, ignorables, "en"};
currencySymbols, {"en", status}, ignorables, "en", 0};
AffixTokenMatcherWarehouse warehouse(&affixSetupData);
static const struct TestCase {

View file

@ -3750,7 +3750,8 @@ NumberFormatTest::TestCurrencyParsing() {
{"zh_Hant", "1", "CNY", "CN\\u00A51.00", "CNY\\u00A01.00", "1.00 \\u4eba\\u6c11\\u5e63"},
{"zh_Hant", "1", "JPY", "\\u00A51.00", "JPY\\u00A01.00", "1 \\u65E5\\u5713"},
{"ja_JP", "1", "JPY", "\\uFFE51.00", "JPY\\u00A01.00", "1\\u00A0\\u5186"},
{"ja_JP", "1", "JPY", "\\u00A51.00", "JPY\\u00A01.00", "1\\u00A0\\u5186"},
// ICU 62 requires #parseCurrency() to recognize variants when parsing
// {"ja_JP", "1", "JPY", "\\u00A51.00", "JPY\\u00A01.00", "1\\u00A0\\u5186"},
{"ru_RU", "1", "RUB", "1,00\\u00A0\\u00A0\\u20BD", "1,00\\u00A0\\u00A0RUB", "1,00 \\u0440\\u043E\\u0441\\u0441\\u0438\\u0439\\u0441\\u043A\\u043E\\u0433\\u043E \\u0440\\u0443\\u0431\\u043B\\u044F"}
};
static const UNumberFormatStyle currencyStyles[] = {

View file

@ -1304,17 +1304,16 @@ Euros 7.82 7.82 EUR
test parse currency without currency mode
// Should accept a symbol associated with the currency specified by the API,
// but should not traverse the full currency data.
// P always traverses full currency data.
set locale en_US
set pattern \u00a4#,##0.00
begin
parse currency output breaks
$52.41 USD 52.41
USD52.41 USD 52.41 K
\u20ac52.41 USD fail CJP
EUR52.41 USD fail CJP
$52.41 EUR fail CJP
USD52.41 EUR fail CJP
\u20ac52.41 USD fail
EUR52.41 USD fail
$52.41 EUR fail
USD52.41 EUR fail
\u20ac52.41 EUR 52.41 K
EUR52.41 EUR 52.41

View file

@ -16,6 +16,7 @@ public class AffixTokenMatcherFactory {
public DecimalFormatSymbols symbols;
public IgnorablesMatcher ignorables;
public ULocale locale;
public int parseFlags;
public MinusSignMatcher minusSign() {
return MinusSignMatcher.getInstance(symbols, true);
@ -34,7 +35,7 @@ public class AffixTokenMatcherFactory {
}
public CombinedCurrencyMatcher currency() {
return CombinedCurrencyMatcher.getInstance(currency, symbols);
return CombinedCurrencyMatcher.getInstance(currency, symbols, parseFlags);
}
public IgnorablesMatcher ignorables() {

View file

@ -4,6 +4,7 @@ package com.ibm.icu.impl.number.parse;
import java.util.Iterator;
import com.ibm.icu.impl.StandardPlural;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.impl.TextTrieMap;
import com.ibm.icu.text.DecimalFormatSymbols;
@ -27,6 +28,8 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher {
private final String currency1;
private final String currency2;
private final String[] localLongNames;
private final String afterPrefixInsert;
private final String beforeSuffixInsert;
@ -36,12 +39,12 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher {
// TODO: See comments in constructor.
// private final UnicodeSet leadCodePoints;
public static CombinedCurrencyMatcher getInstance(Currency currency, DecimalFormatSymbols dfs) {
public static CombinedCurrencyMatcher getInstance(Currency currency, DecimalFormatSymbols dfs, int parseFlags) {
// TODO: Cache these instances. They are somewhat expensive.
return new CombinedCurrencyMatcher(currency, dfs);
return new CombinedCurrencyMatcher(currency, dfs, parseFlags);
}
private CombinedCurrencyMatcher(Currency currency, DecimalFormatSymbols dfs) {
private CombinedCurrencyMatcher(Currency currency, DecimalFormatSymbols dfs, int parseFlags) {
this.isoCode = currency.getSubtype();
this.currency1 = currency.getSymbol(dfs.getULocale());
this.currency2 = currency.getCurrencyCode();
@ -51,10 +54,23 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher {
beforeSuffixInsert = dfs.getPatternForCurrencySpacing(DecimalFormatSymbols.CURRENCY_SPC_INSERT,
true);
// TODO: Currency trie does not currently have an option for case folding. It defaults to use
// case folding on long-names but not symbols.
longNameTrie = Currency.getParsingTrie(dfs.getULocale(), Currency.LONG_NAME);
symbolTrie = Currency.getParsingTrie(dfs.getULocale(), Currency.SYMBOL_NAME);
if (0 == (parseFlags & ParsingUtils.PARSE_FLAG_NO_FOREIGN_CURRENCIES)) {
// TODO: Currency trie does not currently have an option for case folding. It defaults to use
// case folding on long-names but not symbols.
longNameTrie = Currency.getParsingTrie(dfs.getULocale(), Currency.LONG_NAME);
symbolTrie = Currency.getParsingTrie(dfs.getULocale(), Currency.SYMBOL_NAME);
localLongNames = null;
} else {
longNameTrie = null;
symbolTrie = null;
localLongNames = new String[StandardPlural.COUNT];
for (int i = 0; i < StandardPlural.COUNT; i++) {
String pluralKeyword = StandardPlural.VALUES.get(i).getKeyword();
localLongNames[i] = currency
.getName(dfs.getLocale(), Currency.PLURAL_LONG_NAME, pluralKeyword, null);
}
}
// TODO: Figure out how to make this faster and re-enable.
// Computing the "lead code points" set for fastpathing is too slow to use in production.
@ -112,34 +128,63 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher {
/** Matches the currency string without concern for currency spacing. */
private boolean matchCurrency(StringSegment segment, ParsedNumber result) {
boolean maybeMore = false;
int overlap1 = segment.getCaseSensitivePrefixLength(currency1);
maybeMore = maybeMore || overlap1 == segment.length();
if (overlap1 == currency1.length()) {
result.currencyCode = isoCode;
segment.adjustOffset(overlap1);
result.setCharsConsumed(segment);
return segment.length() == 0;
return maybeMore;
}
int overlap2 = segment.getCaseSensitivePrefixLength(currency2);
maybeMore = maybeMore || overlap2 == segment.length();
if (overlap2 == currency2.length()) {
result.currencyCode = isoCode;
segment.adjustOffset(overlap2);
result.setCharsConsumed(segment);
return segment.length() == 0;
return maybeMore;
}
TextTrieMap.Output trieOutput = new TextTrieMap.Output();
Iterator<CurrencyStringInfo> values = longNameTrie.get(segment, 0, trieOutput);
if (values == null) {
values = symbolTrie.get(segment, 0, trieOutput);
}
if (values != null) {
result.currencyCode = values.next().getISOCode();
segment.adjustOffset(trieOutput.matchLength);
result.setCharsConsumed(segment);
if (longNameTrie != null) {
// Use the full currency data.
TextTrieMap.Output trieOutput = new TextTrieMap.Output();
Iterator<CurrencyStringInfo> values = longNameTrie.get(segment, 0, trieOutput);
maybeMore = maybeMore || trieOutput.partialMatch;
if (values == null) {
values = symbolTrie.get(segment, 0, trieOutput);
maybeMore = maybeMore || trieOutput.partialMatch;
}
if (values != null) {
result.currencyCode = values.next().getISOCode();
segment.adjustOffset(trieOutput.matchLength);
result.setCharsConsumed(segment);
return maybeMore;
}
} else {
// Use the locale long names.
int longestFullMatch = 0;
for (int i=0; i<StandardPlural.COUNT; i++) {
String name = localLongNames[i];
int overlap = segment.getCommonPrefixLength(name);
if (overlap == name.length() && name.length() > longestFullMatch) {
longestFullMatch = name.length();
}
maybeMore = maybeMore || overlap > 0;
}
if (longestFullMatch > 0) {
result.currencyCode = isoCode;
segment.adjustOffset(longestFullMatch);
result.setCharsConsumed(segment);
return maybeMore;
}
}
return overlap1 == segment.length() || overlap2 == segment.length() || trieOutput.partialMatch;
// No match found.
return maybeMore;
}
@Override

View file

@ -49,6 +49,7 @@ public class NumberParserImpl {
factory.symbols = symbols;
factory.ignorables = ignorables;
factory.locale = locale;
factory.parseFlags = parseFlags;
ParsedPatternInfo patternInfo = PatternStringParser.parseToPatternInfo(pattern);
AffixMatcher.createMatchers(patternInfo, parser, factory, ignorables, parseFlags);
@ -65,7 +66,7 @@ public class NumberParserImpl {
parser.addMatcher(InfinityMatcher.getInstance(symbols));
parser.addMatcher(PaddingMatcher.getInstance("@"));
parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper));
parser.addMatcher(CombinedCurrencyMatcher.getInstance(currency, symbols));
parser.addMatcher(CombinedCurrencyMatcher.getInstance(currency, symbols, parseFlags));
parser.addMatcher(new RequireNumberValidator());
parser.freeze();
@ -173,6 +174,9 @@ public class NumberParserImpl {
if (parseCurrency || affixProvider.hasCurrencySign()) {
parseFlags |= ParsingUtils.PARSE_FLAG_MONETARY_SEPARATORS;
}
if (!parseCurrency) {
parseFlags |= ParsingUtils.PARSE_FLAG_NO_FOREIGN_CURRENCIES;
}
IgnorablesMatcher ignorables = isStrict ? IgnorablesMatcher.STRICT : IgnorablesMatcher.DEFAULT;
NumberParserImpl parser = new NumberParserImpl(parseFlags);
@ -182,6 +186,7 @@ public class NumberParserImpl {
factory.symbols = symbols;
factory.ignorables = ignorables;
factory.locale = locale;
factory.parseFlags = parseFlags;
//////////////////////
/// AFFIX MATCHERS ///
@ -195,7 +200,7 @@ public class NumberParserImpl {
////////////////////////
if (parseCurrency || affixProvider.hasCurrencySign()) {
parser.addMatcher(CombinedCurrencyMatcher.getInstance(currency, symbols));
parser.addMatcher(CombinedCurrencyMatcher.getInstance(currency, symbols, parseFlags));
}
///////////////

View file

@ -23,6 +23,7 @@ public class ParsingUtils {
public static final int PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400;
// public static final int PARSE_FLAG_OPTIMIZE = 0x0800; // no longer used
public static final int PARSE_FLAG_FORCE_BIG_DECIMAL = 0x1000;
public static final int PARSE_FLAG_NO_FOREIGN_CURRENCIES = 0x2000;
public static void putLeadCodePoints(UnicodeSet input, UnicodeSet output) {
for (EntryRange range : input.ranges()) {

View file

@ -1304,17 +1304,16 @@ Euros 7.82 7.82 EUR
test parse currency without currency mode
// Should accept a symbol associated with the currency specified by the API,
// but should not traverse the full currency data.
// P always traverses full currency data.
set locale en_US
set pattern \u00a4#,##0.00
begin
parse currency output breaks
$52.41 USD 52.41
USD52.41 USD 52.41 K
\u20ac52.41 USD fail CJP
EUR52.41 USD fail CJP
$52.41 EUR fail CJP
USD52.41 EUR fail CJP
\u20ac52.41 USD fail
EUR52.41 USD fail
$52.41 EUR fail
USD52.41 EUR fail
\u20ac52.41 EUR 52.41 K
EUR52.41 EUR 52.41

View file

@ -817,7 +817,7 @@ public class NumberFormatTest extends TestFmwk {
{"1.00 UAE dirha", "4", "-1", "0", "4"},
{"1.00 us dollar", "14", "-1", "14", "-1"},
{"1.00 US DOLLAR", "14", "-1", "14", "-1"},
{"1.00 usd", "8", "-1", "8", "-1"},
{"1.00 usd", "4", "-1", "8", "-1"},
{"1.00 USD", "8", "-1", "8", "-1"},
};
ULocale locale = new ULocale("en_US");
@ -859,14 +859,18 @@ public class NumberFormatTest extends TestFmwk {
private final String localeString;
private final String descrip;
private final String currStr;
private final int doubExpectPos;
private final int doubExpectVal;
private final int curExpectPos;
private final int curExpectVal;
private final String curExpectCurr;
ParseCurrencyItem(String locStr, String desc, String curr, int curExPos, int curExVal, String curExCurr) {
ParseCurrencyItem(String locStr, String desc, String curr, int doubExPos, int doubExVal, int curExPos, int curExVal, String curExCurr) {
localeString = locStr;
descrip = desc;
currStr = curr;
doubExpectPos = doubExPos;
doubExpectVal = doubExVal;
curExpectPos = curExPos;
curExpectVal = curExVal;
curExpectCurr = curExCurr;
@ -874,6 +878,8 @@ public class NumberFormatTest extends TestFmwk {
public String getLocaleString() { return localeString; }
public String getDescrip() { return descrip; }
public String getCurrStr() { return currStr; }
public int getDoubExpectPos() { return doubExpectPos; }
public int getDoubExpectVal() { return doubExpectVal; }
public int getCurExpectPos() { return curExpectPos; }
public int getCurExpectVal() { return curExpectVal; }
public String getCurExpectCurr() { return curExpectCurr; }
@ -881,27 +887,27 @@ public class NumberFormatTest extends TestFmwk {
// Note: In cases where the number occurs before the currency sign, non-currency mode will parse the number
// and stop when it reaches the currency symbol.
final ParseCurrencyItem[] parseCurrencyItems = {
new ParseCurrencyItem( "en_US", "dollars2", "$2.00", 5, 2, "USD" ),
new ParseCurrencyItem( "en_US", "dollars4", "$4", 2, 4, "USD" ),
new ParseCurrencyItem( "en_US", "dollars9", "9\u00A0$", 3, 9, "USD" ),
new ParseCurrencyItem( "en_US", "pounds3", "\u00A33.00", 5, 3, "GBP" ),
new ParseCurrencyItem( "en_US", "pounds5", "\u00A35", 2, 5, "GBP" ),
new ParseCurrencyItem( "en_US", "pounds7", "7\u00A0\u00A3", 3, 7, "GBP" ),
new ParseCurrencyItem( "en_US", "euros8", "\u20AC8", 2, 8, "EUR" ),
new ParseCurrencyItem( "en_US", "dollars2", "$2.00", 5, 2, 5, 2, "USD" ),
new ParseCurrencyItem( "en_US", "dollars4", "$4", 2, 4, 2, 4, "USD" ),
new ParseCurrencyItem( "en_US", "dollars9", "9\u00A0$", 3, 9, 3, 9, "USD" ),
new ParseCurrencyItem( "en_US", "pounds3", "\u00A33.00", 0, 0, 5, 3, "GBP" ),
new ParseCurrencyItem( "en_US", "pounds5", "\u00A35", 0, 0, 2, 5, "GBP" ),
new ParseCurrencyItem( "en_US", "pounds7", "7\u00A0\u00A3", 1, 7, 3, 7, "GBP" ),
new ParseCurrencyItem( "en_US", "euros8", "\u20AC8", 0, 0, 2, 8, "EUR" ),
new ParseCurrencyItem( "en_GB", "pounds3", "\u00A33.00", 5, 3, "GBP" ),
new ParseCurrencyItem( "en_GB", "pounds5", "\u00A35", 2, 5, "GBP" ),
new ParseCurrencyItem( "en_GB", "pounds7", "7\u00A0\u00A3", 3, 7, "GBP" ),
new ParseCurrencyItem( "en_GB", "euros4", "4,00\u00A0\u20AC", 6,400, "EUR" ),
new ParseCurrencyItem( "en_GB", "euros6", "6\u00A0\u20AC", 3, 6, "EUR" ),
new ParseCurrencyItem( "en_GB", "euros8", "\u20AC8", 2, 8, "EUR" ),
new ParseCurrencyItem( "en_GB", "dollars4", "US$4", 4, 4, "USD" ),
new ParseCurrencyItem( "en_GB", "pounds3", "\u00A33.00", 5, 3, 5, 3, "GBP" ),
new ParseCurrencyItem( "en_GB", "pounds5", "\u00A35", 2, 5, 2, 5, "GBP" ),
new ParseCurrencyItem( "en_GB", "pounds7", "7\u00A0\u00A3", 3, 7, 3, 7, "GBP" ),
new ParseCurrencyItem( "en_GB", "euros4", "4,00\u00A0\u20AC", 4,400, 6,400, "EUR" ),
new ParseCurrencyItem( "en_GB", "euros6", "6\u00A0\u20AC", 1, 6, 3, 6, "EUR" ),
new ParseCurrencyItem( "en_GB", "euros8", "\u20AC8", 0, 0, 2, 8, "EUR" ),
new ParseCurrencyItem( "en_GB", "dollars4", "US$4", 0, 0, 4, 4, "USD" ),
new ParseCurrencyItem( "fr_FR", "euros4", "4,00\u00A0\u20AC", 6, 4, "EUR" ),
new ParseCurrencyItem( "fr_FR", "euros6", "6\u00A0\u20AC", 3, 6, "EUR" ),
new ParseCurrencyItem( "fr_FR", "euros8", "\u20AC8", 2, 8, "EUR" ),
new ParseCurrencyItem( "fr_FR", "dollars2", "$2.00", 0, 0, "" ),
new ParseCurrencyItem( "fr_FR", "dollars4", "$4", 0, 0, "" ),
new ParseCurrencyItem( "fr_FR", "euros4", "4,00\u00A0\u20AC", 6, 4, 6, 4, "EUR" ),
new ParseCurrencyItem( "fr_FR", "euros6", "6\u00A0\u20AC", 3, 6, 3, 6, "EUR" ),
new ParseCurrencyItem( "fr_FR", "euros8", "\u20AC8", 2, 8, 2, 8, "EUR" ),
new ParseCurrencyItem( "fr_FR", "dollars2", "$2.00", 0, 0, 0, 0, "" ),
new ParseCurrencyItem( "fr_FR", "dollars4", "$4", 0, 0, 0, 0, "" ),
};
for (ParseCurrencyItem item: parseCurrencyItems) {
String localeString = item.getLocaleString();
@ -917,14 +923,14 @@ public class NumberFormatTest extends TestFmwk {
ParsePosition parsePos = new ParsePosition(0);
Number numVal = fmt.parse(currStr, parsePos);
if ( parsePos.getIndex() != item.getCurExpectPos() || (numVal != null && numVal.intValue() != item.getCurExpectVal()) ) {
if ( parsePos.getIndex() != item.getDoubExpectPos() || (numVal != null && numVal.intValue() != item.getDoubExpectVal()) ) {
if (numVal != null) {
errln("NumberFormat.getCurrencyInstance parse " + localeString + "/" + item.getDescrip() +
", expect pos/val " + item.getCurExpectPos() + "/" + item.getCurExpectVal() +
", expect pos/val " + item.getDoubExpectPos() + "/" + item.getDoubExpectVal() +
", get " + parsePos.getIndex() + "/" + numVal.intValue() );
} else {
errln("NumberFormat.getCurrencyInstance parse " + localeString + "/" + item.getDescrip() +
", expect pos/val " + item.getCurExpectPos() + "/" + item.getCurExpectVal() +
", expect pos/val " + item.getDoubExpectPos() + "/" + item.getDoubExpectVal() +
", get " + parsePos.getIndex() + "/(NULL)" );
}
}
@ -6061,12 +6067,15 @@ public class NumberFormatTest extends TestFmwk {
df.setCurrency(Currency.getInstance("USD"));
double input = 514.23;
String formatted = df.format(input);
assertEquals("Should format as expected", "514.23 US dollars", formatted);
String expected = "514.23 US dollars";
assertEquals("Should format as expected", expected, formatted);
ParsePosition ppos = new ParsePosition(0);
CurrencyAmount ca = df.parseCurrency(formatted, ppos);
assertEquals("Should consume whole number", ppos.getIndex(), 17);
assertEquals("Number should round-trip", ca.getNumber().doubleValue(), input);
assertEquals("Should get correct currency", ca.getCurrency().getCurrencyCode(), "USD");
// Should also round-trip in non-currency parsing
expect2(df, input, expected);
}
@Test

View file

@ -251,33 +251,52 @@ public class NumberParserTest {
@Test
public void testCombinedCurrencyMatcher() {
AffixTokenMatcherFactory factory = new AffixTokenMatcherFactory();
factory.locale = ULocale.ENGLISH;
factory.locale = ULocale.US;
CustomSymbolCurrency currency = new CustomSymbolCurrency("ICU", "IU$", "ICU");
factory.currency = currency;
factory.symbols = DecimalFormatSymbols.getInstance(ULocale.ENGLISH);
factory.symbols = DecimalFormatSymbols.getInstance(ULocale.US);
factory.parseFlags = 0;
CombinedCurrencyMatcher matcher = factory.currency();
factory.parseFlags = ParsingUtils.PARSE_FLAG_NO_FOREIGN_CURRENCIES;
CombinedCurrencyMatcher matcherNoForeignCurrencies = factory.currency();
Object[][] cases = new Object[][] {
{ "", null },
{ "FOO", null },
{ "USD", "USD" },
{ "$", "USD" },
{ "US dollars", "USD" },
{ "eu", null },
{ "euros", "EUR" },
{ "ICU", "ICU" },
{ "IU$", "ICU" } };
{ "", null, null },
{ "FOO", null, null },
{ "USD", "USD", null },
{ "$", "USD", null },
{ "US dollars", "USD", null },
{ "eu", null, null },
{ "euros", "EUR", null },
{ "ICU", "ICU", "ICU" },
{ "IU$", "ICU", "ICU" } };
for (Object[] cas : cases) {
String input = (String) cas[0];
String expectedCurrencyCode = (String) cas[1];
String expectedNoForeignCurrencyCode = (String) cas[2];
StringSegment segment = new StringSegment(input, true);
ParsedNumber result = new ParsedNumber();
matcher.match(segment, result);
assertEquals("Parsing " + input, expectedCurrencyCode, result.currencyCode);
assertEquals("Whole string on " + input,
expectedCurrencyCode == null ? 0 : input.length(),
result.charEnd);
{
StringSegment segment = new StringSegment(input, true);
ParsedNumber result = new ParsedNumber();
matcher.match(segment, result);
assertEquals("Parsing " + input,
expectedCurrencyCode,
result.currencyCode);
assertEquals("Whole string on " + input,
expectedCurrencyCode == null ? 0 : input.length(),
result.charEnd);
}
{
StringSegment segment = new StringSegment(input, true);
ParsedNumber result = new ParsedNumber();
matcherNoForeignCurrencies.match(segment, result);
assertEquals("[no foreign] Parsing " + input,
expectedNoForeignCurrencyCode,
result.currencyCode);
assertEquals("[no foreign] Whole string on " + input,
expectedNoForeignCurrencyCode == null ? 0 : input.length(),
result.charEnd);
}
}
}
@ -288,6 +307,7 @@ public class NumberParserTest {
factory.symbols = DecimalFormatSymbols.getInstance(ULocale.ENGLISH);
factory.ignorables = IgnorablesMatcher.DEFAULT;
factory.locale = ULocale.ENGLISH;
factory.parseFlags = 0;
Object[][] cases = {
{ false, "-", 1, "-" },