ICU-20512 Add extra matchers to handle empty currency symbols

This commit is contained in:
Shane F. Carr 2022-09-16 00:08:41 -07:00 committed by Peter Edberg
parent b734048799
commit 9dc1c020a1
8 changed files with 185 additions and 10 deletions

View file

@ -108,6 +108,10 @@ UnicodeString CurrencySymbols::getPluralName(StandardPlural::Form plural, UError
}
}
bool CurrencySymbols::hasEmptyCurrencySymbol() const {
return !fCurrencySymbol.isBogus() && fCurrencySymbol.isEmpty();
}
CurrencyUnit
icu::number::impl::resolveCurrency(const DecimalFormatProperties& properties, const Locale& locale,

View file

@ -41,6 +41,8 @@ class U_I18N_API CurrencySymbols : public UMemory {
UnicodeString getPluralName(StandardPlural::Form plural, UErrorCode& status) const;
bool hasEmptyCurrencySymbol() const;
protected:
// Required fields:
CurrencyUnit fCurrency;

View file

@ -284,6 +284,7 @@ void MutablePatternModifier::prepareAffix(bool isPrefix) {
fApproximately,
fPlural,
fPerMilleReplacesPercent,
false, // dropCurrencySymbols
currentAffix);
}

View file

@ -1056,7 +1056,9 @@ void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider&
PatternSignType patternSignType,
bool approximately,
StandardPlural::Form plural,
bool perMilleReplacesPercent, UnicodeString& output) {
bool perMilleReplacesPercent,
bool dropCurrencySymbols,
UnicodeString& output) {
// Should the output render '+' where '-' would normally appear in the pattern?
bool plusReplacesMinusSign = (patternSignType == PATTERN_SIGN_TYPE_POS_SIGN)
@ -1130,6 +1132,9 @@ void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider&
if (perMilleReplacesPercent && candidate == u'%') {
candidate = u'';
}
if (dropCurrencySymbols && candidate == u'\u00A4') {
continue;
}
output.append(candidate);
}
}

View file

@ -317,7 +317,9 @@ class U_I18N_API PatternStringUtils {
static void patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix,
PatternSignType patternSignType,
bool approximately,
StandardPlural::Form plural, bool perMilleReplacesPercent,
StandardPlural::Form plural,
bool perMilleReplacesPercent,
bool dropCurrencySymbols,
UnicodeString& output);
static PatternSignType resolveSignDisplay(UNumberSignDisplay signDisplay, Signum signum);

View file

@ -169,6 +169,10 @@ NumberParseMatcher* AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp,
return result;
}
bool AffixTokenMatcherWarehouse::hasEmptyCurrencySymbol() const {
return fSetupData->currencySymbols.hasEmptyCurrencySymbol();
}
CodePointMatcher::CodePointMatcher(UChar32 cp)
: fCp(cp) {}
@ -280,8 +284,16 @@ void AffixMatcherWarehouse::createAffixMatchers(const AffixPatternProvider& patt
AffixPatternMatcher* posSuffix = nullptr;
// Pre-process the affix strings to resolve LDML rules like sign display.
for (int8_t typeInt = 0; typeInt < PATTERN_SIGN_TYPE_COUNT; typeInt++) {
auto type = static_cast<PatternSignType>(typeInt);
for (int8_t typeInt = 0; typeInt < PATTERN_SIGN_TYPE_COUNT * 2; typeInt++) {
auto type = static_cast<PatternSignType>(typeInt / 2);
bool dropCurrencySymbols = (typeInt % 2) == 1;
if (dropCurrencySymbols && !patternInfo.hasCurrencySign()) {
continue;
}
if (dropCurrencySymbols && !fTokenWarehouse->hasEmptyCurrencySymbol()) {
continue;
}
// Skip affixes in some cases
if (type == PATTERN_SIGN_TYPE_POS
@ -297,7 +309,7 @@ void AffixMatcherWarehouse::createAffixMatchers(const AffixPatternProvider& patt
// TODO: Handle approximately sign?
bool hasPrefix = false;
PatternStringUtils::patternInfoToStringBuilder(
patternInfo, true, type, false, StandardPlural::OTHER, false, sb);
patternInfo, true, type, false, StandardPlural::OTHER, false, dropCurrencySymbols, sb);
fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
sb, *fTokenWarehouse, parseFlags, &hasPrefix, status);
AffixPatternMatcher* prefix = hasPrefix ? &fAffixPatternMatchers[numAffixPatternMatchers++]
@ -307,7 +319,7 @@ void AffixMatcherWarehouse::createAffixMatchers(const AffixPatternProvider& patt
// TODO: Handle approximately sign?
bool hasSuffix = false;
PatternStringUtils::patternInfoToStringBuilder(
patternInfo, false, type, false, StandardPlural::OTHER, false, sb);
patternInfo, false, type, false, StandardPlural::OTHER, false, dropCurrencySymbols, sb);
fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
sb, *fTokenWarehouse, parseFlags, &hasSuffix, status);
AffixPatternMatcher* suffix = hasSuffix ? &fAffixPatternMatchers[numAffixPatternMatchers++]

View file

@ -101,6 +101,8 @@ class U_I18N_API AffixTokenMatcherWarehouse : public UMemory {
NumberParseMatcher* nextCodePointMatcher(UChar32 cp, UErrorCode& status);
bool hasEmptyCurrencySymbol() const;
private:
// NOTE: The following field may be unsafe to access after construction is done!
const AffixTokenMatcherSetupData* fSetupData;
@ -204,10 +206,12 @@ class AffixMatcherWarehouse {
UErrorCode& status);
private:
// 9 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix
AffixMatcher fAffixMatchers[9];
// 6 is the limit: positive, zero, and negative, a prefix and a suffix for each
AffixPatternMatcher fAffixPatternMatchers[6];
// 18 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix,
// and doubled since there may be an empty currency symbol
AffixMatcher fAffixMatchers[18];
// 6 is the limit: positive, zero, and negative, a prefix and a suffix for each,
// and doubled since there may be an empty currency symbol
AffixPatternMatcher fAffixPatternMatchers[12];
// Reference to the warehouse for tokens used by the AffixPatternMatchers
AffixTokenMatcherWarehouse* fTokenWarehouse;

View file

@ -78,6 +78,7 @@ static void TestSciNotationMaxFracCap(void);
static void TestMinIntMinFracZero(void);
static void Test21479_ExactCurrency(void);
static void Test22088_Ethiopic(void);
static void TestParseWithEmptyCurr(void);
#define TESTCASE(x) addTest(root, &x, "tsformat/cnumtst/" #x)
@ -121,6 +122,7 @@ void addNumForTest(TestNode** root)
TESTCASE(TestMinIntMinFracZero);
TESTCASE(Test21479_ExactCurrency);
TESTCASE(Test22088_Ethiopic);
TESTCASE(TestParseWithEmptyCurr);
}
/* test Parse int 64 */
@ -3634,4 +3636,147 @@ static void Test22088_Ethiopic(void) {
unum_close(nf3);
}
static void TestParseWithEmptyCurr(void) {
UErrorCode status = U_ZERO_ERROR;
UNumberFormat* unum = unum_open(UNUM_CURRENCY, NULL, 0, "en_US", NULL, &status);
if (U_FAILURE(status)) {
log_data_err("unum_open UNUM_CURRENCY for \"en_US\" fails with %s\n", u_errorName(status));
} else {
unum_setSymbol(unum, UNUM_CURRENCY_SYMBOL, u"", 0, &status);
if (U_FAILURE(status)) {
log_err("unum_setSymbol UNUM_CURRENCY_SYMBOL u\"\" fails with %s\n", u_errorName(status));
} else {
char bbuf[kBBufMax] = { 0 };
UChar curr[4] = { 0 };
int32_t ppos, blen;
double val;
const UChar* text = u"3";
status = U_ZERO_ERROR;
ppos = 0;
blen = unum_parseDecimal(unum, text, -1, &ppos, bbuf, kBBufMax, &status);
if (U_FAILURE(status)) {
log_err("unum_parseDecimal u\"3\" with empty curr symbol fails with %s, ppos %d\n", u_errorName(status), ppos);
} else if (ppos != 1 || blen != 1 || bbuf[0] != '3') {
log_err("unum_parseDecimal expect ppos 1, blen 1, str 3; get %d, %d, %s\n", ppos, blen, bbuf);
}
status = U_ZERO_ERROR;
ppos = 0;
val = unum_parseDouble(unum, text, -1, &ppos, &status);
if (U_FAILURE(status)) {
log_err("unum_parseDouble u\"3\" with empty curr symbol fails with %s, ppos %d\n", u_errorName(status), ppos);
} else if (ppos != 1 || val != 3.0) {
log_err("unum_parseDouble expect ppos 1, val 3.0; get %d, %.2f\n", ppos, val);
}
status = U_ZERO_ERROR;
ppos = 0;
val = unum_parseDoubleCurrency(unum, text, -1, &ppos, curr, &status);
if (U_SUCCESS(status)) {
log_err("unum_parseDoubleCurrency u\"3\" with empty curr symbol succeeds, get ppos %d, val %.2f\n", ppos, val);
}
}
unum_close(unum);
}
// "¤#,##0.00" "¤ #,##0.00" "#,##0.00 ¤" "#,##,##0.00¤"
static const char* locales[] = {"en_US", "nb_NO", "cs_CZ", "bn_BD", NULL };
const char ** localesPtr = locales;
const char* locale;
while ((locale = *localesPtr++) != NULL) {
status = U_ZERO_ERROR;
unum = unum_open(UNUM_CURRENCY, NULL, 0, locale, NULL, &status);
if (U_FAILURE(status)) {
log_data_err("locale %s unum_open UNUM_CURRENCY fails with %s\n", locale, u_errorName(status));
} else {
UChar ubuf[kUBufMax];
int32_t ppos, ulen;
const double posValToUse = 37.0;
const double negValToUse = -3.0;
double val;
status = U_ZERO_ERROR;
unum_setSymbol(unum, UNUM_CURRENCY_SYMBOL, u"", 0, &status);
if (U_FAILURE(status)) {
log_err("locale %s unum_setSymbol UNUM_CURRENCY_SYMBOL u\"\" fails with %s, skipping\n", locale, u_errorName(status));
continue;
}
status = U_ZERO_ERROR;
ulen = unum_formatDouble(unum, posValToUse, ubuf, kUBufMax, NULL, &status);
if (U_FAILURE(status)) {
log_err("locale %s unum_formatDouble %.1f fails with %s, skipping\n", locale, posValToUse, u_errorName(status));
continue;
}
status = U_ZERO_ERROR;
ppos = 0;
val = unum_parseDouble(unum, ubuf, ulen, &ppos, &status);
if (U_FAILURE(status)) {
log_err("locale %s unum_parseDouble fails with %s, ppos %d, expect %.1f\n", locale, u_errorName(status), ppos, posValToUse);
} else if (ppos != ulen || val != posValToUse) {
log_err("locale %s unum_parseDouble expect ppos %d, val %.1f; get %d, %.2f\n", locale, ulen, posValToUse, ppos, val);
}
status = U_ZERO_ERROR;
ulen = unum_formatDouble(unum, negValToUse, ubuf, kUBufMax, NULL, &status);
if (U_FAILURE(status)) {
log_err("locale %s unum_formatDouble %.1f fails with %s, skipping\n", locale, negValToUse, u_errorName(status));
continue;
}
status = U_ZERO_ERROR;
ppos = 0;
val = unum_parseDouble(unum, ubuf, ulen, &ppos, &status);
if (U_FAILURE(status)) {
log_err("locale %s unum_parseDouble fails with %s, ppos %d, expect %.1f\n", locale, u_errorName(status), ppos, negValToUse);
} else if (ppos != ulen || val != negValToUse) {
log_err("locale %s unum_parseDouble expect ppos %d, val %.1f; get %d, %.2f\n", locale, ulen, negValToUse, ppos, val);
}
status = U_ZERO_ERROR;
unum_applyPattern(unum, false, u"#,##0.00¤", -1, NULL, &status);
if (U_FAILURE(status)) {
log_err("locale %s unum_applyPattern \"#,##0.00¤\" fails with %s, skipping\n", locale, u_errorName(status));
continue;
}
status = U_ZERO_ERROR;
ulen = unum_formatDouble(unum, posValToUse, ubuf, kUBufMax, NULL, &status);
if (U_FAILURE(status)) {
log_err("locale %s with \"#,##0.00¤\" unum_formatDouble %.1f fails with %s, skipping\n", locale, posValToUse, u_errorName(status));
continue;
}
status = U_ZERO_ERROR;
ppos = 0;
val = unum_parseDouble(unum, ubuf, ulen, &ppos, &status);
if (U_FAILURE(status)) {
log_err("locale %s with \"#,##0.00¤\" unum_parseDouble fails with %s, ppos %d, expect %.1f\n", locale, u_errorName(status), ppos, posValToUse);
} else if (ppos != ulen || val != posValToUse) {
log_err("locale %s with \"#,##0.00¤\" unum_parseDouble expect ppos %d, val %.1f; get %d, %.2f\n", locale, ulen, posValToUse, ppos, val);
}
status = U_ZERO_ERROR;
ulen = unum_formatDouble(unum, negValToUse, ubuf, kUBufMax, NULL, &status);
if (U_FAILURE(status)) {
log_err("locale %s with \"#,##0.00¤\" unum_formatDouble %.1f fails with %s, skipping\n", locale, negValToUse, u_errorName(status));
continue;
}
status = U_ZERO_ERROR;
ppos = 0;
val = unum_parseDouble(unum, ubuf, ulen, &ppos, &status);
if (U_FAILURE(status)) {
log_err("locale %s with \"#,##0.00¤\" unum_parseDouble fails with %s, ppos %d, expect %.1f\n", locale, u_errorName(status), ppos, negValToUse);
} else if (ppos != ulen || val != negValToUse) {
log_err("locale %s with \"#,##0.00¤\" unum_parseDouble expect ppos %d, val %.1f; get %d, %.2f\n", locale, ulen, negValToUse, ppos, val);
}
unum_close(unum);
}
}
}
#endif /* #if !UCONFIG_NO_FORMATTING */