From 9dc1c020a19a5b950af3e6304b97e7396b92aaaf Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 16 Sep 2022 00:08:41 -0700 Subject: [PATCH] ICU-20512 Add extra matchers to handle empty currency symbols --- icu4c/source/i18n/number_currencysymbols.cpp | 4 + icu4c/source/i18n/number_currencysymbols.h | 2 + icu4c/source/i18n/number_patternmodifier.cpp | 1 + icu4c/source/i18n/number_patternstring.cpp | 7 +- icu4c/source/i18n/number_patternstring.h | 4 +- icu4c/source/i18n/numparse_affixes.cpp | 20 ++- icu4c/source/i18n/numparse_affixes.h | 12 +- icu4c/source/test/cintltst/cnumtst.c | 145 +++++++++++++++++++ 8 files changed, 185 insertions(+), 10 deletions(-) diff --git a/icu4c/source/i18n/number_currencysymbols.cpp b/icu4c/source/i18n/number_currencysymbols.cpp index da1812f49f0..8d5127556be 100644 --- a/icu4c/source/i18n/number_currencysymbols.cpp +++ b/icu4c/source/i18n/number_currencysymbols.cpp @@ -108,6 +108,10 @@ UnicodeString CurrencySymbols::getPluralName(StandardPlural::Form plural, UError } } +bool CurrencySymbols::hasEmptyCurrencySymbol() const { + return !fCurrencySymbol.isBogus() && fCurrencySymbol.isEmpty(); +} + CurrencyUnit icu::number::impl::resolveCurrency(const DecimalFormatProperties& properties, const Locale& locale, diff --git a/icu4c/source/i18n/number_currencysymbols.h b/icu4c/source/i18n/number_currencysymbols.h index 7e38fdf8287..c2223bd0f0b 100644 --- a/icu4c/source/i18n/number_currencysymbols.h +++ b/icu4c/source/i18n/number_currencysymbols.h @@ -41,6 +41,8 @@ class U_I18N_API CurrencySymbols : public UMemory { UnicodeString getPluralName(StandardPlural::Form plural, UErrorCode& status) const; + bool hasEmptyCurrencySymbol() const; + protected: // Required fields: CurrencyUnit fCurrency; diff --git a/icu4c/source/i18n/number_patternmodifier.cpp b/icu4c/source/i18n/number_patternmodifier.cpp index b6543b262b4..088a30ecd7f 100644 --- a/icu4c/source/i18n/number_patternmodifier.cpp +++ b/icu4c/source/i18n/number_patternmodifier.cpp @@ -284,6 +284,7 @@ void MutablePatternModifier::prepareAffix(bool isPrefix) { fApproximately, fPlural, fPerMilleReplacesPercent, + false, // dropCurrencySymbols currentAffix); } diff --git a/icu4c/source/i18n/number_patternstring.cpp b/icu4c/source/i18n/number_patternstring.cpp index 2738895d8ad..557a7320856 100644 --- a/icu4c/source/i18n/number_patternstring.cpp +++ b/icu4c/source/i18n/number_patternstring.cpp @@ -1056,7 +1056,9 @@ void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& PatternSignType patternSignType, bool approximately, StandardPlural::Form plural, - bool perMilleReplacesPercent, UnicodeString& output) { + bool perMilleReplacesPercent, + bool dropCurrencySymbols, + UnicodeString& output) { // Should the output render '+' where '-' would normally appear in the pattern? bool plusReplacesMinusSign = (patternSignType == PATTERN_SIGN_TYPE_POS_SIGN) @@ -1130,6 +1132,9 @@ void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& if (perMilleReplacesPercent && candidate == u'%') { candidate = u'‰'; } + if (dropCurrencySymbols && candidate == u'\u00A4') { + continue; + } output.append(candidate); } } diff --git a/icu4c/source/i18n/number_patternstring.h b/icu4c/source/i18n/number_patternstring.h index 94afda37229..08696697847 100644 --- a/icu4c/source/i18n/number_patternstring.h +++ b/icu4c/source/i18n/number_patternstring.h @@ -317,7 +317,9 @@ class U_I18N_API PatternStringUtils { static void patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix, PatternSignType patternSignType, bool approximately, - StandardPlural::Form plural, bool perMilleReplacesPercent, + StandardPlural::Form plural, + bool perMilleReplacesPercent, + bool dropCurrencySymbols, UnicodeString& output); static PatternSignType resolveSignDisplay(UNumberSignDisplay signDisplay, Signum signum); diff --git a/icu4c/source/i18n/numparse_affixes.cpp b/icu4c/source/i18n/numparse_affixes.cpp index 14140065983..ad3d48b4731 100644 --- a/icu4c/source/i18n/numparse_affixes.cpp +++ b/icu4c/source/i18n/numparse_affixes.cpp @@ -169,6 +169,10 @@ NumberParseMatcher* AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp, return result; } +bool AffixTokenMatcherWarehouse::hasEmptyCurrencySymbol() const { + return fSetupData->currencySymbols.hasEmptyCurrencySymbol(); +} + CodePointMatcher::CodePointMatcher(UChar32 cp) : fCp(cp) {} @@ -280,8 +284,16 @@ void AffixMatcherWarehouse::createAffixMatchers(const AffixPatternProvider& patt AffixPatternMatcher* posSuffix = nullptr; // Pre-process the affix strings to resolve LDML rules like sign display. - for (int8_t typeInt = 0; typeInt < PATTERN_SIGN_TYPE_COUNT; typeInt++) { - auto type = static_cast(typeInt); + for (int8_t typeInt = 0; typeInt < PATTERN_SIGN_TYPE_COUNT * 2; typeInt++) { + auto type = static_cast(typeInt / 2); + bool dropCurrencySymbols = (typeInt % 2) == 1; + + if (dropCurrencySymbols && !patternInfo.hasCurrencySign()) { + continue; + } + if (dropCurrencySymbols && !fTokenWarehouse->hasEmptyCurrencySymbol()) { + continue; + } // Skip affixes in some cases if (type == PATTERN_SIGN_TYPE_POS @@ -297,7 +309,7 @@ void AffixMatcherWarehouse::createAffixMatchers(const AffixPatternProvider& patt // TODO: Handle approximately sign? bool hasPrefix = false; PatternStringUtils::patternInfoToStringBuilder( - patternInfo, true, type, false, StandardPlural::OTHER, false, sb); + patternInfo, true, type, false, StandardPlural::OTHER, false, dropCurrencySymbols, sb); fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern( sb, *fTokenWarehouse, parseFlags, &hasPrefix, status); AffixPatternMatcher* prefix = hasPrefix ? &fAffixPatternMatchers[numAffixPatternMatchers++] @@ -307,7 +319,7 @@ void AffixMatcherWarehouse::createAffixMatchers(const AffixPatternProvider& patt // TODO: Handle approximately sign? bool hasSuffix = false; PatternStringUtils::patternInfoToStringBuilder( - patternInfo, false, type, false, StandardPlural::OTHER, false, sb); + patternInfo, false, type, false, StandardPlural::OTHER, false, dropCurrencySymbols, sb); fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern( sb, *fTokenWarehouse, parseFlags, &hasSuffix, status); AffixPatternMatcher* suffix = hasSuffix ? &fAffixPatternMatchers[numAffixPatternMatchers++] diff --git a/icu4c/source/i18n/numparse_affixes.h b/icu4c/source/i18n/numparse_affixes.h index a82b731ab5b..ad731ed5d80 100644 --- a/icu4c/source/i18n/numparse_affixes.h +++ b/icu4c/source/i18n/numparse_affixes.h @@ -101,6 +101,8 @@ class U_I18N_API AffixTokenMatcherWarehouse : public UMemory { NumberParseMatcher* nextCodePointMatcher(UChar32 cp, UErrorCode& status); + bool hasEmptyCurrencySymbol() const; + private: // NOTE: The following field may be unsafe to access after construction is done! const AffixTokenMatcherSetupData* fSetupData; @@ -204,10 +206,12 @@ class AffixMatcherWarehouse { UErrorCode& status); private: - // 9 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix - AffixMatcher fAffixMatchers[9]; - // 6 is the limit: positive, zero, and negative, a prefix and a suffix for each - AffixPatternMatcher fAffixPatternMatchers[6]; + // 18 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix, + // and doubled since there may be an empty currency symbol + AffixMatcher fAffixMatchers[18]; + // 6 is the limit: positive, zero, and negative, a prefix and a suffix for each, + // and doubled since there may be an empty currency symbol + AffixPatternMatcher fAffixPatternMatchers[12]; // Reference to the warehouse for tokens used by the AffixPatternMatchers AffixTokenMatcherWarehouse* fTokenWarehouse; diff --git a/icu4c/source/test/cintltst/cnumtst.c b/icu4c/source/test/cintltst/cnumtst.c index 615deacf31f..6448760f2c6 100644 --- a/icu4c/source/test/cintltst/cnumtst.c +++ b/icu4c/source/test/cintltst/cnumtst.c @@ -78,6 +78,7 @@ static void TestSciNotationMaxFracCap(void); static void TestMinIntMinFracZero(void); static void Test21479_ExactCurrency(void); static void Test22088_Ethiopic(void); +static void TestParseWithEmptyCurr(void); #define TESTCASE(x) addTest(root, &x, "tsformat/cnumtst/" #x) @@ -121,6 +122,7 @@ void addNumForTest(TestNode** root) TESTCASE(TestMinIntMinFracZero); TESTCASE(Test21479_ExactCurrency); TESTCASE(Test22088_Ethiopic); + TESTCASE(TestParseWithEmptyCurr); } /* test Parse int 64 */ @@ -3634,4 +3636,147 @@ static void Test22088_Ethiopic(void) { unum_close(nf3); } +static void TestParseWithEmptyCurr(void) { + UErrorCode status = U_ZERO_ERROR; + UNumberFormat* unum = unum_open(UNUM_CURRENCY, NULL, 0, "en_US", NULL, &status); + if (U_FAILURE(status)) { + log_data_err("unum_open UNUM_CURRENCY for \"en_US\" fails with %s\n", u_errorName(status)); + } else { + unum_setSymbol(unum, UNUM_CURRENCY_SYMBOL, u"", 0, &status); + if (U_FAILURE(status)) { + log_err("unum_setSymbol UNUM_CURRENCY_SYMBOL u\"\" fails with %s\n", u_errorName(status)); + } else { + char bbuf[kBBufMax] = { 0 }; + UChar curr[4] = { 0 }; + int32_t ppos, blen; + double val; + const UChar* text = u"3"; + + status = U_ZERO_ERROR; + ppos = 0; + blen = unum_parseDecimal(unum, text, -1, &ppos, bbuf, kBBufMax, &status); + if (U_FAILURE(status)) { + log_err("unum_parseDecimal u\"3\" with empty curr symbol fails with %s, ppos %d\n", u_errorName(status), ppos); + } else if (ppos != 1 || blen != 1 || bbuf[0] != '3') { + log_err("unum_parseDecimal expect ppos 1, blen 1, str 3; get %d, %d, %s\n", ppos, blen, bbuf); + } + + status = U_ZERO_ERROR; + ppos = 0; + val = unum_parseDouble(unum, text, -1, &ppos, &status); + if (U_FAILURE(status)) { + log_err("unum_parseDouble u\"3\" with empty curr symbol fails with %s, ppos %d\n", u_errorName(status), ppos); + } else if (ppos != 1 || val != 3.0) { + log_err("unum_parseDouble expect ppos 1, val 3.0; get %d, %.2f\n", ppos, val); + } + + status = U_ZERO_ERROR; + ppos = 0; + val = unum_parseDoubleCurrency(unum, text, -1, &ppos, curr, &status); + if (U_SUCCESS(status)) { + log_err("unum_parseDoubleCurrency u\"3\" with empty curr symbol succeeds, get ppos %d, val %.2f\n", ppos, val); + } + } + unum_close(unum); + } + + // "¤#,##0.00" "¤ #,##0.00" "#,##0.00 ¤" "#,##,##0.00¤" + static const char* locales[] = {"en_US", "nb_NO", "cs_CZ", "bn_BD", NULL }; + const char ** localesPtr = locales; + const char* locale; + while ((locale = *localesPtr++) != NULL) { + status = U_ZERO_ERROR; + unum = unum_open(UNUM_CURRENCY, NULL, 0, locale, NULL, &status); + if (U_FAILURE(status)) { + log_data_err("locale %s unum_open UNUM_CURRENCY fails with %s\n", locale, u_errorName(status)); + } else { + UChar ubuf[kUBufMax]; + int32_t ppos, ulen; + const double posValToUse = 37.0; + const double negValToUse = -3.0; + double val; + + status = U_ZERO_ERROR; + unum_setSymbol(unum, UNUM_CURRENCY_SYMBOL, u"", 0, &status); + if (U_FAILURE(status)) { + log_err("locale %s unum_setSymbol UNUM_CURRENCY_SYMBOL u\"\" fails with %s, skipping\n", locale, u_errorName(status)); + continue; + } + + status = U_ZERO_ERROR; + ulen = unum_formatDouble(unum, posValToUse, ubuf, kUBufMax, NULL, &status); + if (U_FAILURE(status)) { + log_err("locale %s unum_formatDouble %.1f fails with %s, skipping\n", locale, posValToUse, u_errorName(status)); + continue; + } + + status = U_ZERO_ERROR; + ppos = 0; + val = unum_parseDouble(unum, ubuf, ulen, &ppos, &status); + if (U_FAILURE(status)) { + log_err("locale %s unum_parseDouble fails with %s, ppos %d, expect %.1f\n", locale, u_errorName(status), ppos, posValToUse); + } else if (ppos != ulen || val != posValToUse) { + log_err("locale %s unum_parseDouble expect ppos %d, val %.1f; get %d, %.2f\n", locale, ulen, posValToUse, ppos, val); + } + + status = U_ZERO_ERROR; + ulen = unum_formatDouble(unum, negValToUse, ubuf, kUBufMax, NULL, &status); + if (U_FAILURE(status)) { + log_err("locale %s unum_formatDouble %.1f fails with %s, skipping\n", locale, negValToUse, u_errorName(status)); + continue; + } + + status = U_ZERO_ERROR; + ppos = 0; + val = unum_parseDouble(unum, ubuf, ulen, &ppos, &status); + if (U_FAILURE(status)) { + log_err("locale %s unum_parseDouble fails with %s, ppos %d, expect %.1f\n", locale, u_errorName(status), ppos, negValToUse); + } else if (ppos != ulen || val != negValToUse) { + log_err("locale %s unum_parseDouble expect ppos %d, val %.1f; get %d, %.2f\n", locale, ulen, negValToUse, ppos, val); + } + + status = U_ZERO_ERROR; + unum_applyPattern(unum, false, u"#,##0.00¤", -1, NULL, &status); + if (U_FAILURE(status)) { + log_err("locale %s unum_applyPattern \"#,##0.00¤\" fails with %s, skipping\n", locale, u_errorName(status)); + continue; + } + + status = U_ZERO_ERROR; + ulen = unum_formatDouble(unum, posValToUse, ubuf, kUBufMax, NULL, &status); + if (U_FAILURE(status)) { + log_err("locale %s with \"#,##0.00¤\" unum_formatDouble %.1f fails with %s, skipping\n", locale, posValToUse, u_errorName(status)); + continue; + } + + status = U_ZERO_ERROR; + ppos = 0; + val = unum_parseDouble(unum, ubuf, ulen, &ppos, &status); + if (U_FAILURE(status)) { + log_err("locale %s with \"#,##0.00¤\" unum_parseDouble fails with %s, ppos %d, expect %.1f\n", locale, u_errorName(status), ppos, posValToUse); + } else if (ppos != ulen || val != posValToUse) { + log_err("locale %s with \"#,##0.00¤\" unum_parseDouble expect ppos %d, val %.1f; get %d, %.2f\n", locale, ulen, posValToUse, ppos, val); + } + + status = U_ZERO_ERROR; + ulen = unum_formatDouble(unum, negValToUse, ubuf, kUBufMax, NULL, &status); + if (U_FAILURE(status)) { + log_err("locale %s with \"#,##0.00¤\" unum_formatDouble %.1f fails with %s, skipping\n", locale, negValToUse, u_errorName(status)); + continue; + } + + status = U_ZERO_ERROR; + ppos = 0; + val = unum_parseDouble(unum, ubuf, ulen, &ppos, &status); + if (U_FAILURE(status)) { + log_err("locale %s with \"#,##0.00¤\" unum_parseDouble fails with %s, ppos %d, expect %.1f\n", locale, u_errorName(status), ppos, negValToUse); + } else if (ppos != ulen || val != negValToUse) { + log_err("locale %s with \"#,##0.00¤\" unum_parseDouble expect ppos %d, val %.1f; get %d, %.2f\n", locale, ulen, negValToUse, ppos, val); + } + + unum_close(unum); + } + } +} + #endif /* #if !UCONFIG_NO_FORMATTING */