From 13bf3c8313ff3bee2bfb72185b8615c636782aac Mon Sep 17 00:00:00 2001 From: Rich Gillam Date: Thu, 14 Mar 2024 17:53:38 -0700 Subject: [PATCH] ICU-22611 Fixed the RBNF MultiplierSubstitution to only perform floor() on the value being formatted when the substitution is using a DecimalFormat and its owning rule also has a modulus substitution. Took out a redundant call to floor(). Added a hack to allow the caller to change the rounding behavior with setRoundingMode(). Added appropriate unit tests. Added additional documentation of the behavior to the API docs. --- icu4c/source/i18n/nfrule.cpp | 8 ++ icu4c/source/i18n/nfrule.h | 5 + icu4c/source/i18n/nfsubs.cpp | 50 +++++----- icu4c/source/i18n/unicode/rbnf.h | 11 ++- icu4c/source/test/intltest/itrbnf.cpp | 93 +++++++++++++++++++ icu4c/source/test/intltest/itrbnf.h | 6 ++ .../com/ibm/icu/dev/test/format/RbnfTest.java | 52 +++++++++++ .../main/java/com/ibm/icu/text/NFRule.java | 9 +- .../java/com/ibm/icu/text/NFSubstitution.java | 57 ++++++------ .../ibm/icu/text/RuleBasedNumberFormat.java | 11 ++- 10 files changed, 242 insertions(+), 60 deletions(-) diff --git a/icu4c/source/i18n/nfrule.cpp b/icu4c/source/i18n/nfrule.cpp index c82b6107a83..1c78e6b1a25 100644 --- a/icu4c/source/i18n/nfrule.cpp +++ b/icu4c/source/i18n/nfrule.cpp @@ -721,6 +721,14 @@ int64_t NFRule::getDivisor() const return util64_pow(radix, exponent); } +/** + * Internal function to facilitate numerical rounding. See the explanation in MultiplierSubstitution::transformNumber(). + */ +bool NFRule::hasModulusSubstitution() const +{ + return (sub1 != nullptr && sub1->isModulusSubstitution()) || (sub2 != nullptr && sub2->isModulusSubstitution()); +} + //----------------------------------------------------------------------- // formatting diff --git a/icu4c/source/i18n/nfrule.h b/icu4c/source/i18n/nfrule.h index fda74fabf2c..2ae074cd860 100644 --- a/icu4c/source/i18n/nfrule.h +++ b/icu4c/source/i18n/nfrule.h @@ -66,6 +66,8 @@ public: char16_t getDecimalPoint() const { return decimalPoint; } int64_t getDivisor() const; + + bool hasModulusSubstitution() const; void doFormat(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const; void doFormat(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const; @@ -117,6 +119,9 @@ private: NFRule(const NFRule &other); // forbid copying of this class NFRule &operator=(const NFRule &other); // forbid copying of this class + + // TODO: temporary hack to allow MultiplierSubstitution to get to formatter's rounding mode + friend class MultiplierSubstitution; }; U_NAMESPACE_END diff --git a/icu4c/source/i18n/nfsubs.cpp b/icu4c/source/i18n/nfsubs.cpp index 5256b8a39bb..ecd3b7b68fc 100644 --- a/icu4c/source/i18n/nfsubs.cpp +++ b/icu4c/source/i18n/nfsubs.cpp @@ -73,6 +73,7 @@ SameValueSubstitution::~SameValueSubstitution() {} class MultiplierSubstitution : public NFSubstitution { int64_t divisor; + const NFRule* owningRule; public: MultiplierSubstitution(int32_t _pos, @@ -80,7 +81,7 @@ public: const NFRuleSet* _ruleSet, const UnicodeString& description, UErrorCode& status) - : NFSubstitution(_pos, _ruleSet, description, status), divisor(rule->getDivisor()) + : NFSubstitution(_pos, _ruleSet, description, status), divisor(rule->getDivisor()), owningRule(rule) { if (divisor == 0) { status = U_PARSE_ERROR; @@ -103,25 +104,22 @@ public: } virtual double transformNumber(double number) const override { - bool doFloor = getRuleSet() != nullptr; - if (!doFloor) { - // This is a HACK that partially addresses ICU-22313. The original code wanted us to do - // floor() on the result if we were passing it to another rule set, but not if we were passing - // it to a DecimalFormat. But the DurationRules rule set has multiplier substitutions where - // we DO want to do the floor() operation. What we REALLY want is to do floor() any time - // the owning rule also has a ModulusSubsitution, but we don't have access to that information - // here, so instead we're doing a floor() any time the DecimalFormat has maxFracDigits equal to - // 0. This seems to work with our existing rule sets, but could be a problem in the future, - // but the "real" fix for DurationRules isn't worth doing, since we're deprecating DurationRules - // anyway. This is enough to keep it from being egregiously wrong, without obvious side - // effects. --rtg 8/16/23 - const DecimalFormat* decimalFormat = getNumberFormat(); - if (decimalFormat == nullptr || decimalFormat->getMaximumFractionDigits() == 0) { - doFloor = true; - } - } - - if (doFloor) { + // Most of the time, when a number is handled by an NFSubstitution, we do a floor() on it, but + // if a substitution uses a DecimalFormat to format the number instead of a ruleset, we generally + // don't want to do a floor()-- we want to keep the value intact so that the DecimalFormat can + // either include the fractional part or round properly. The big exception to this is here in + // MultiplierSubstitution. If the rule includes two substitutions, the MultiplierSubstitution + // (which is handling the larger part of the number) really _does_ want to do a floor(), because + // the ModulusSubstitution (which is handling the smaller part of the number) will take + // care of the fractional part. (Consider something like `1/12: <0< feet >0.0> inches;`.) + // But if there is no ModulusSubstitution, we're shortening the number in some way-- the "larger part" + // of the number is the only part we're keeping. Even if the DecimalFormat doesn't include the + // fractional part in its output, we still want it to round. (Consider something like `1/1000: <0hasModulusSubstitution() || owningRule->formatter->getRoundingMode() == NumberFormat::kRoundFloor) { return uprv_floor(number / divisor); } else { return number / divisor; @@ -598,17 +596,11 @@ NFSubstitution::doSubstitution(int64_t number, UnicodeString& toInsertInto, int3 ruleSet->format(transformNumber(number), toInsertInto, _pos + this->pos, recursionCount, status); } else if (numberFormat != nullptr) { if (number <= MAX_INT64_IN_DOUBLE) { - // or perform the transformation on the number (preserving - // the result's fractional part if the formatter it set - // to show it), then use that formatter's format() method + // or perform the transformation on the number, + // then use that formatter's format() method // to format the result - double numberToFormat = transformNumber((double)number); - if (numberFormat->getMaximumFractionDigits() == 0) { - numberToFormat = uprv_floor(numberToFormat); - } - UnicodeString temp; - numberFormat->format(numberToFormat, temp, status); + numberFormat->format(transformNumber((double)number), temp, status); toInsertInto.insert(_pos + this->pos, temp); } else { diff --git a/icu4c/source/i18n/unicode/rbnf.h b/icu4c/source/i18n/unicode/rbnf.h index c762c921360..f42d91d776f 100644 --- a/icu4c/source/i18n/unicode/rbnf.h +++ b/icu4c/source/i18n/unicode/rbnf.h @@ -436,7 +436,16 @@ enum URBNFRuleSetTag { * * << * in normal rule - * Divide the number by the rule's divisor and format the quotient + * Divide the number by the rule's divisor, perform floor() on the quotient, + * and format the resulting value.
+ * If there is a DecimalFormat pattern between the < characters and the + * rule does NOT also contain a >> substitution, we DON'T perform + * floor() on the quotient-- the quotient is passed through to the DecimalFormat + * intact. That is, for the value 1,900:
+ * - "1/1000: << thousand;" will produce "one thousand"
+ * - "1/1000: <0< thousand;" will produce "2 thousand" (NOT "1 thousand")
+ * - "1/1000: <0< seconds >0> milliseconds;" will produce "1 second 900 milliseconds" + * * * * diff --git a/icu4c/source/test/intltest/itrbnf.cpp b/icu4c/source/test/intltest/itrbnf.cpp index 3a2383bcd05..16046864bb5 100644 --- a/icu4c/source/test/intltest/itrbnf.cpp +++ b/icu4c/source/test/intltest/itrbnf.cpp @@ -79,6 +79,7 @@ void IntlTestRBNF::runIndexedTest(int32_t index, UBool exec, const char* &name, TESTCASE(27, TestMinMaxIntegerDigitsIgnored); TESTCASE(28, TestNorwegianSpellout); TESTCASE(29, TestNumberingSystem); + TESTCASE(30, TestDFRounding); #else TESTCASE(0, TestRBNFDisabled); #endif @@ -1272,6 +1273,73 @@ IntlTestRBNF::TestDurations() delete formatter; } +void IntlTestRBNF::TestDFRounding() +{ + // test for ICU-22611 + UParseError parseError; + UErrorCode err = U_ZERO_ERROR; + + // no decimal places + LocalPointer nf0(new RuleBasedNumberFormat(u"1000/1000: <##K<;", parseError, err)); + if (U_FAILURE(err)) { + errcheckln(err, "FAIL: could not construct formatter - %s", u_errorName(err)); + } else { + static const char* const integerTestData[][2] = { + { "-1400", "-1K" }, + { "-1900", "-2K" }, + { "1400", "1K" }, + { "1900", "2K" }, + { nullptr, nullptr } + }; + doTest(nf0.getAlias(), integerTestData, false); + } + + // 1 decimal place + LocalPointer nf1(new RuleBasedNumberFormat(u"1000/1000: <##.0K<;", parseError, err)); + if (U_FAILURE(err)) { + errcheckln(err, "FAIL: could not construct formatter - %s", u_errorName(err)); + } else { + static const char* const oneDecimalPlaceTestData[][2] = { + { "-1440", "-1.4K" }, + { "1890", "1.9K" }, + { nullptr, nullptr } + }; + doTest(nf1.getAlias(), oneDecimalPlaceTestData, false); + } + + // with modulus substitution + LocalPointer nfMod(new RuleBasedNumberFormat(u"1000/1000: <####>; -x: ->>;", parseError, err)); + if (U_FAILURE(err)) { + errcheckln(err, "FAIL: could not construct formatter - %s", u_errorName(err)); + } else { + static const char* const integerTestData[][2] = { + { "-1400", "-1K400" }, + { "-1900", "-1K900" }, + { "1400", "1K400" }, + { "1900", "1K900" }, + { nullptr, nullptr } + }; + doTest(nfMod.getAlias(), integerTestData, false); + } + + // no decimal places, but with rounding mode set to ROUND_FLOOR + LocalPointer nfFloor(new RuleBasedNumberFormat(u"1000/1000: <##K<;", parseError, err)); + nfFloor->setMaximumFractionDigits(0); + nfFloor->setRoundingMode(NumberFormat::kRoundFloor); + if (U_FAILURE(err)) { + errcheckln(err, "FAIL: could not construct formatter - %s", u_errorName(err)); + } else { + static const char* const integerTestData[][2] = { + { "-1400", "-2K" }, + { "-1900", "-2K" }, + { "1400", "1K" }, + { "1900", "1K" }, + { nullptr, nullptr } + }; + doTest(nfFloor.getAlias(), integerTestData, false); + } +} + void IntlTestRBNF::TestSpanishSpellout() { @@ -1565,6 +1633,7 @@ IntlTestRBNF::TestGermanSpellout() { "200", "zwei\\u00ADhundert" }, { "579", "f\\u00fcnf\\u00ADhundert\\u00ADneun\\u00ADund\\u00ADsiebzig" }, { "1,000", "ein\\u00ADtausend" }, + { "1,101", "ein\\u00adtausend\\u00adein\\u00adhundert\\u00adeins" }, { "2,000", "zwei\\u00ADtausend" }, { "3,004", "drei\\u00ADtausend\\u00ADvier" }, { "4,567", "vier\\u00ADtausend\\u00ADf\\u00fcnf\\u00ADhundert\\u00ADsieben\\u00ADund\\u00ADsechzig" }, @@ -1583,6 +1652,30 @@ IntlTestRBNF::TestGermanSpellout() }; doLenientParseTest(formatter, lpTestData); #endif + + static const char* testDataYear[][2] = { + { "101", "ein\\u00adhundert\\u00adeins" }, + { "900", "neun\\u00adhundert" }, + { "1,001", "ein\\u00adtausend\\u00adeins" }, + { "1,100", "elf\\u00adhundert" }, + { "1,101", "elf\\u00adhundert\\u00adeins" }, + { "1,234", "zw\\u00f6lf\\u00adhundert\\u00advier\\u00adund\\u00addrei\\u00dfig" }, + { "2,001", "zwei\\u00adtausend\\u00adeins" }, + { "10,001", "zehn\\u00adtausend\\u00adeins" }, + { "-100", "minus ein\\u00adhundert" }, + { "12.34", "12,3" }, + { nullptr, nullptr } + }; + + status = U_ZERO_ERROR; + formatter->setDefaultRuleSet("%spellout-numbering-year", status); + if (U_SUCCESS(status)) { + logln("testing year rules"); + doTest(formatter, testDataYear, false); + } + else { + errln("Can't test year rules"); + } } delete formatter; } diff --git a/icu4c/source/test/intltest/itrbnf.h b/icu4c/source/test/intltest/itrbnf.h index 96147ea224d..fd6e4107817 100644 --- a/icu4c/source/test/intltest/itrbnf.h +++ b/icu4c/source/test/intltest/itrbnf.h @@ -60,6 +60,12 @@ class IntlTestRBNF : public IntlTest { * Perform a simple spot check on the duration-formatting rules */ void TestDurations(); + + /** + * Test that rounding works correctly on multiplier substitutions that use + * a DecimalFormat. + */ + void TestDFRounding(); /** * Perform a simple spot check on the Spanish spellout rules diff --git a/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/format/RbnfTest.java b/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/format/RbnfTest.java index 74a812d3bf1..15d0a2dc2f9 100644 --- a/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/format/RbnfTest.java +++ b/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/format/RbnfTest.java @@ -376,6 +376,40 @@ public class RbnfTest extends CoreTestFmwk { doParsingTest(formatter, testDataLenient, true); } + @Test + public void TestDFRounding() { + // test for ICU-22611 + RuleBasedNumberFormat nf; + + // no decimal places + nf = new RuleBasedNumberFormat("1000/1000: <##K<;"); + assertEquals("-1K", nf.format(-1400)); + assertEquals("-2K", nf.format(-1900)); + assertEquals("1K", nf.format(1400)); + assertEquals("2K", nf.format(1900)); + + // 1 decimal place + nf = new RuleBasedNumberFormat("1000/1000: <##.0K<;"); + assertEquals("-1.4K", nf.format(-1440)); + assertEquals("1.9K", nf.format(1890)); + + // with modulus substitution + nf = new RuleBasedNumberFormat("1000/1000: <####>; -x: ->>;"); + assertEquals("-1K400", nf.format(-1400)); + assertEquals("-1K900", nf.format(-1900)); + assertEquals("1K400", nf.format(1400)); + assertEquals("1K900", nf.format(1900)); + + // no decimal places, but with rounding mode set to ROUND_FLOOR + nf = new RuleBasedNumberFormat("1000/1000: <##K<;"); + nf.setMaximumFractionDigits(0); + nf.setRoundingMode(BigDecimal.ROUND_FLOOR); + assertEquals("-2K", nf.format(-1400)); + assertEquals("-2K", nf.format(-1900)); + assertEquals("1K", nf.format(1400)); + assertEquals("1K", nf.format(1900)); + } + /** * Perform a simple spot check on the Spanish spellout rules */ @@ -551,6 +585,7 @@ public class RbnfTest extends CoreTestFmwk { { "200", "zwei\u00ADhundert" }, { "579", "f\u00fcnf\u00ADhundert\u00ADneun\u00ADund\u00ADsiebzig" }, { "1,000", "ein\u00ADtausend" }, + { "1,101", "ein\u00adtausend\u00adein\u00adhundert\u00adeins" }, { "2,000", "zwei\u00ADtausend" }, { "3,004", "drei\u00ADtausend\u00ADvier" }, { "4,567", "vier\u00ADtausend\u00ADf\u00fcnf\u00ADhundert\u00ADsieben\u00ADund\u00ADsechzig" }, @@ -566,6 +601,23 @@ public class RbnfTest extends CoreTestFmwk { }; doParsingTest(formatter, testDataLenient, true); + + String[][] testDataYear = { + { "101", "ein\u00adhundert\u00adeins" }, + { "900", "neun\u00adhundert" }, + { "1,001", "ein\u00adtausend\u00adeins" }, + { "1,100", "elf\u00adhundert" }, + { "1,101", "elf\u00adhundert\u00adeins" }, + { "1,234", "zw\u00f6lf\u00adhundert\u00advier\u00adund\u00addrei\u00dfig" }, + { "2,001", "zwei\u00adtausend\u00adeins" }, + { "10,001", "zehn\u00adtausend\u00adeins" }, + { "-100", "minus ein\u00adhundert" }, + { "12.34", "12,3" }, + }; + + formatter.setDefaultRuleSet("%spellout-numbering-year"); + logln("testing year rules"); + doTest(formatter, testDataYear, false); } /** diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/text/NFRule.java b/icu4j/main/core/src/main/java/com/ibm/icu/text/NFRule.java index 2530ef5adef..f93b4d95083 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/text/NFRule.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/text/NFRule.java @@ -112,7 +112,7 @@ final class NFRule { /** * The RuleBasedNumberFormat that owns this rule */ - private final RuleBasedNumberFormat formatter; + final RuleBasedNumberFormat formatter; //----------------------------------------------------------------------- // construction @@ -730,6 +730,13 @@ final class NFRule { return power(radix, exponent); } + /** + * Internal function used by the rounding code in MultiplierSubstitution. + */ + boolean hasModulusSubstitution() { + return (sub1 instanceof ModulusSubstitution) || (sub2 instanceof ModulusSubstitution); + } + //----------------------------------------------------------------------- // formatting //----------------------------------------------------------------------- diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/text/NFSubstitution.java b/icu4j/main/core/src/main/java/com/ibm/icu/text/NFSubstitution.java index 01444528c47..e9757078621 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/text/NFSubstitution.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/text/NFSubstitution.java @@ -11,6 +11,7 @@ package com.ibm.icu.text; import java.text.ParsePosition; import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD; +import com.ibm.icu.math.BigDecimal; //=================================================================== // NFSubstitution (abstract base class) @@ -305,16 +306,10 @@ abstract class NFSubstitution { ruleSet.format(numberToFormat, toInsertInto, position + pos, recursionCount); } else { if (number <= MAX_INT64_IN_DOUBLE) { - // or perform the transformation on the number (preserving - // the result's fractional part if the formatter it set - // to show it), then use that formatter's format() method + // or perform the transformation on the number, + // then use that formatter's format() method // to format the result - double numberToFormat = transformNumber((double) number); - if (numberFormat.getMaximumFractionDigits() == 0) { - numberToFormat = Math.floor(numberToFormat); - } - - toInsertInto.insert(position + pos, numberFormat.format(numberToFormat)); + toInsertInto.insert(position + pos, numberFormat.format(transformNumber((double) number))); } else { // We have gone beyond double precision. Something has to give. @@ -666,6 +661,12 @@ class MultiplierSubstitution extends NFSubstitution { */ long divisor; + /** + * A backpointer to the owning rule. Used in the rounding logic to determine + * whether the owning rule also has a modulus substitution. + */ + NFRule owningRule; + //----------------------------------------------------------------------- // construction //----------------------------------------------------------------------- @@ -689,6 +690,7 @@ class MultiplierSubstitution extends NFSubstitution { // substitution. Rather than keeping a back-pointer to the // rule, we keep a copy of the divisor this.divisor = rule.getDivisor(); + this.owningRule = rule; if (divisor == 0) { // this will cause recursion throw new IllegalStateException("Substitution with divisor 0 " + description.substring(0, pos) + @@ -749,26 +751,25 @@ class MultiplierSubstitution extends NFSubstitution { */ @Override public double transformNumber(double number) { - boolean doFloor = ruleSet != null; - if (!doFloor) { - // This is a HACK that partially addresses ICU-22313. The original code wanted us to do - // floor() on the result if we were passing it to another rule set, but not if we were passing - // it to a DecimalFormat. But the DurationRules rule set has multiplier substitutions where - // we DO want to do the floor() operation. What we REALLY want is to do floor() any time - // the owning rule also has a ModulusSubsitution, but we don't have access to that information - // here, so instead we're doing a floor() any time the DecimalFormat has maxFracDigits equal to - // 0. This seems to work with our existing rule sets, but could be a problem in the future, - // but the "real" fix for DurationRules isn't worth doing, since we're deprecating DurationRules - // anyway. This is enough to keep it from being egregiously wrong, without obvious side - // effects. --rtg 8/16/23 - if (numberFormat == null || numberFormat.getMaximumFractionDigits() == 0) { - doFloor = true; - } - } - if (!doFloor) { - return number / divisor; - } else { + // Most of the time, when a number is handled by an NFSubstitution, we do a floor() on it, but + // if a substitution uses a DecimalFormat to format the number instead of a ruleset, we generally + // don't want to do a floor()-- we want to keep the value intact so that the DecimalFormat can + // either include the fractional part or round properly. The big exception to this is here in + // MultiplierSubstitution. If the rule includes two substitutions, the MultiplierSubstitution + // (which is handling the larger part of the number) really _does_ want to do a floor(), because + // the ModulusSubstitution (which is handling the smaller part of the number) will take + // care of the fractional part. (Consider something like `1/12: <0< feet >0.0> inches;`.) + // But if there is no ModulusSubstitution, we're shortening the number in some way-- the "larger part" + // of the number is the only part we're keeping. Even if the DecimalFormat doesn't include the + // fractional part in its output, we still want it to round. (Consider something like `1/1000: <0 * << * in normal rule - * Divide the number by the rule's divisor and format the quotient + * Divide the number by the rule's divisor, perform floor() on the quotient, + * and format the resulting value.
+ * If there is a DecimalFormat pattern between the < characters and the + * rule does NOT also contain a >> substitution, we DON'T perform + * floor() on the quotient-- the quotient is passed through to the DecimalFormat + * intact. That is, for the value 1,900:
+ * - "1/1000: << thousand;" will produce "one thousand"
+ * - "1/1000: <0< thousand;" will produce "2 thousand" (NOT "1 thousand")
+ * - "1/1000: <0< seconds >0> milliseconds;" will produce "1 second 900 milliseconds" + * * * *