From 14ca2b0e6da14ef45aadf5067deb562fc6c70a52 Mon Sep 17 00:00:00 2001 From: Rich Gillam Date: Fri, 18 Aug 2023 18:12:03 -0700 Subject: [PATCH] ICU-22313 Various fixes for duration formatting: - Changed the C++ and Java interfaces to that the URBNF_DURATION ruleset is marked deprecated. - Fixed a bug in RuleBasedNumberFormat in both Java and C++ that caused the existing duration-formatting rules to produce bogus results when used on a non-integral value. (Earlier versions of this PR added code to use a MeasureFormat under the covers when a caller used unum_open(UNUM_DURATION). I took that out because of backward compatibility concerns, so we're still using RBNF in the C API. I'm hoping to add a "real" duration formatter in ICU 75.) --- icu4c/source/i18n/nfsubs.cpp | 20 ++++++++++- icu4c/source/i18n/unicode/rbnf.h | 6 ++-- icu4c/source/i18n/unum.cpp | 18 ++++++---- icu4c/source/test/cintltst/cnumtst.c | 36 ++++++++++++++----- icu4c/source/test/intltest/itrbnf.cpp | 9 ++++- .../com/ibm/icu/dev/test/format/RbnfTest.java | 7 ++++ .../java/com/ibm/icu/text/NFSubstitution.java | 20 +++++++++-- .../ibm/icu/text/RuleBasedNumberFormat.java | 3 +- 8 files changed, 97 insertions(+), 22 deletions(-) diff --git a/icu4c/source/i18n/nfsubs.cpp b/icu4c/source/i18n/nfsubs.cpp index 4f3247ce50d..5256b8a39bb 100644 --- a/icu4c/source/i18n/nfsubs.cpp +++ b/icu4c/source/i18n/nfsubs.cpp @@ -103,7 +103,25 @@ public: } virtual double transformNumber(double number) const override { - if (getRuleSet()) { + bool doFloor = getRuleSet() != nullptr; + if (!doFloor) { + // This is a HACK that partially addresses ICU-22313. The original code wanted us to do + // floor() on the result if we were passing it to another rule set, but not if we were passing + // it to a DecimalFormat. But the DurationRules rule set has multiplier substitutions where + // we DO want to do the floor() operation. What we REALLY want is to do floor() any time + // the owning rule also has a ModulusSubsitution, but we don't have access to that information + // here, so instead we're doing a floor() any time the DecimalFormat has maxFracDigits equal to + // 0. This seems to work with our existing rule sets, but could be a problem in the future, + // but the "real" fix for DurationRules isn't worth doing, since we're deprecating DurationRules + // anyway. This is enough to keep it from being egregiously wrong, without obvious side + // effects. --rtg 8/16/23 + const DecimalFormat* decimalFormat = getNumberFormat(); + if (decimalFormat == nullptr || decimalFormat->getMaximumFractionDigits() == 0) { + doFloor = true; + } + } + + if (doFloor) { return uprv_floor(number / divisor); } else { return number / divisor; diff --git a/icu4c/source/i18n/unicode/rbnf.h b/icu4c/source/i18n/unicode/rbnf.h index 0336ac1f81e..f9a1c8f93a5 100644 --- a/icu4c/source/i18n/unicode/rbnf.h +++ b/icu4c/source/i18n/unicode/rbnf.h @@ -64,18 +64,20 @@ enum URBNFRuleSetTag { * @stable ICU 2.2 */ URBNF_ORDINAL, +#ifndef U_HIDE_DEPRECATED_API /** * Requests predefined ruleset for formatting a value as a duration in hours, minutes, and seconds. - * @stable ICU 2.2 + * @deprecated ICU 74 Use MeasureFormat instead. */ URBNF_DURATION, +#endif // U_HIDE_DERECATED_API /** * Requests predefined ruleset for various non-place-value numbering systems. * WARNING: The same resource contains rule sets for a variety of different numbering systems. * You need to call setDefaultRuleSet() on the formatter to choose the actual numbering system. * @stable ICU 2.2 */ - URBNF_NUMBERING_SYSTEM, + URBNF_NUMBERING_SYSTEM = 3, #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal URBNFRuleSetTag value. diff --git a/icu4c/source/i18n/unum.cpp b/icu4c/source/i18n/unum.cpp index 11a7bcba57d..eb6c86252b7 100644 --- a/icu4c/source/i18n/unum.cpp +++ b/icu4c/source/i18n/unum.cpp @@ -28,17 +28,19 @@ #include "unicode/dcfmtsym.h" #include "unicode/curramt.h" #include "unicode/localpointer.h" +#include "unicode/measfmt.h" #include "unicode/udisplaycontext.h" #include "uassert.h" #include "cpputils.h" #include "cstring.h" +#include "putilimp.h" U_NAMESPACE_USE U_CAPI UNumberFormat* U_EXPORT2 -unum_open( UNumberFormatStyle style, +unum_open( UNumberFormatStyle style, const char16_t* pattern, int32_t patternLength, const char* locale, @@ -671,6 +673,7 @@ unum_getTextAttribute(const UNumberFormat* fmt, const NumberFormat* nf = reinterpret_cast(fmt); const DecimalFormat* df = dynamic_cast(nf); + const RuleBasedNumberFormat* rbnf = nullptr; // cast is below for performance if (df != nullptr) { switch(tag) { case UNUM_POSITIVE_PREFIX: @@ -701,8 +704,7 @@ unum_getTextAttribute(const UNumberFormat* fmt, *status = U_UNSUPPORTED_ERROR; return -1; } - } else { - const RuleBasedNumberFormat* rbnf = dynamic_cast(nf); + } else if ((rbnf = dynamic_cast(nf)) != nullptr) { U_ASSERT(rbnf != nullptr); if (tag == UNUM_DEFAULT_RULESET) { res = rbnf->getDefaultRuleSetName(); @@ -716,6 +718,9 @@ unum_getTextAttribute(const UNumberFormat* fmt, *status = U_UNSUPPORTED_ERROR; return -1; } + } else { + *status = U_UNSUPPORTED_ERROR; + return -1; } return res.extract(result, resultLength, *status); @@ -794,15 +799,16 @@ unum_toPattern( const UNumberFormat* fmt, const NumberFormat* nf = reinterpret_cast(fmt); const DecimalFormat* df = dynamic_cast(nf); + const RuleBasedNumberFormat* rbnf = nullptr; // cast is below for performance if (df != nullptr) { if(isPatternLocalized) df->toLocalizedPattern(pat); else df->toPattern(pat); + } else if ((rbnf = dynamic_cast(nf)) != nullptr) { + pat = rbnf->getRules(); } else { - const RuleBasedNumberFormat* rbnf = dynamic_cast(nf); - U_ASSERT(rbnf != nullptr); - pat = rbnf->getRules(); + // leave `pat` empty } return pat.extract(result, resultLength, *status); } diff --git a/icu4c/source/test/cintltst/cnumtst.c b/icu4c/source/test/cintltst/cnumtst.c index d71edd59112..17bfbde3c40 100644 --- a/icu4c/source/test/cintltst/cnumtst.c +++ b/icu4c/source/test/cintltst/cnumtst.c @@ -80,6 +80,7 @@ static void Test21479_ExactCurrency(void); static void Test22088_Ethiopic(void); static void TestChangingRuleset(void); static void TestParseWithEmptyCurr(void); +static void TestDuration(void); #define TESTCASE(x) addTest(root, &x, "tsformat/cnumtst/" #x) @@ -125,6 +126,7 @@ void addNumForTest(TestNode** root) TESTCASE(Test22088_Ethiopic); TESTCASE(TestChangingRuleset); TESTCASE(TestParseWithEmptyCurr); + TESTCASE(TestDuration); } /* test Parse int 64 */ @@ -1733,7 +1735,7 @@ static void TestRBNFFormat() { UParseError perr; UChar pat[1024]; UChar tempUChars[512]; - UNumberFormat *formats[5]; + UNumberFormat *formats[4]; int COUNT = UPRV_LENGTHOF(formats); int i; @@ -1764,13 +1766,6 @@ static void TestRBNFFormat() { return; } - status = U_ZERO_ERROR; - formats[3] = unum_open(UNUM_DURATION, NULL, 0, "en_US", &perr, &status); - if (U_FAILURE(status)) { - log_err_status(status, "unable to open duration %s\n", u_errorName(status)); - return; - } - status = U_ZERO_ERROR; u_uastrcpy(pat, "%standard:\n" @@ -1809,7 +1804,7 @@ static void TestRBNFFormat() { "100,000,000: some huge number;\n"); /* This is to get around some compiler warnings about char * string length. */ u_strcat(pat, tempUChars); - formats[4] = unum_open(UNUM_PATTERN_RULEBASED, pat, -1, "en_US", &perr, &status); + formats[3] = unum_open(UNUM_PATTERN_RULEBASED, pat, -1, "en_US", &perr, &status); if (U_FAILURE(status)) { log_err_status(status, "unable to open rulebased pattern -> %s\n", u_errorName(status)); } @@ -3835,4 +3830,27 @@ static void TestParseWithEmptyCurr(void) { } } +static void TestDuration(void) { + // NOTE: at the moment, UNUM_DURATION is still backed by a set of RBNF rules, which don't handle + // showing fractional seconds. This test should be updated or replaced + // when https://unicode-org.atlassian.net/browse/ICU-22487 is fixed. + double values[] = { 34, 34.5, 1234, 1234.2, 1234.7, 1235, 8434, 8434.5 }; + const UChar* expectedResults[] = { u"34 sec.", u"34 sec.", u"20:34", u"20:34", u"20:35", u"20:35", u"2:20:34", u"2:20:34" }; + + UErrorCode err = U_ZERO_ERROR; + UNumberFormat* nf = unum_open(UNUM_DURATION, NULL, 0, "en_US", NULL, &err); + + if (assertSuccess("Failed to create duration formatter", &err)) { + UChar actualResult[200]; + + for (int32_t i = 0; i < UPRV_LENGTHOF(values); i++) { + unum_formatDouble(nf, values[i], actualResult, 200, NULL, &err); + if (assertSuccess("Error formatting duration", &err)) { + assertUEquals("Wrong formatting result", expectedResults[i], actualResult); + } + } + unum_close(nf); + } +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/intltest/itrbnf.cpp b/icu4c/source/test/intltest/itrbnf.cpp index 1dfefad83bf..3a2383bcd05 100644 --- a/icu4c/source/test/intltest/itrbnf.cpp +++ b/icu4c/source/test/intltest/itrbnf.cpp @@ -1250,9 +1250,16 @@ IntlTestRBNF::TestDurations() { "10,293", "2:51:33" }, { nullptr, nullptr} }; - doTest(formatter, testData, true); + static const char* const fractionalTestData[][2] = { + { "1234", "20:34" }, + { "1234.2", "20:34" }, + { "1234.7", "20:35" }, + { nullptr, nullptr } + }; + doTest(formatter, fractionalTestData, false); + #if !UCONFIG_NO_COLLATION formatter->setLenient(true); static const char* lpTestData[][2] = { diff --git a/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/format/RbnfTest.java b/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/format/RbnfTest.java index 2ebafe2b522..8066a8bf4a4 100644 --- a/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/format/RbnfTest.java +++ b/icu4j/main/common_tests/src/test/java/com/ibm/icu/dev/test/format/RbnfTest.java @@ -362,6 +362,13 @@ public class RbnfTest extends TestFmwk { doTest(formatter, testData, true); + String[][] fractionalTestData = { + { "1234", "20:34" }, + { "1234.2", "20:34" }, + { "1234.7", "20:35" } + }; + doTest(formatter, fractionalTestData, false); + String[][] testDataLenient = { { "2-51-33", "10,293" }, }; diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/text/NFSubstitution.java b/icu4j/main/core/src/main/java/com/ibm/icu/text/NFSubstitution.java index c701f38a57f..d8f3684c16d 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/text/NFSubstitution.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/text/NFSubstitution.java @@ -749,7 +749,23 @@ class MultiplierSubstitution extends NFSubstitution { */ @Override public double transformNumber(double number) { - if (ruleSet == null) { + boolean doFloor = ruleSet != null; + if (!doFloor) { + // This is a HACK that partially addresses ICU-22313. The original code wanted us to do + // floor() on the result if we were passing it to another rule set, but not if we were passing + // it to a DecimalFormat. But the DurationRules rule set has multiplier substitutions where + // we DO want to do the floor() operation. What we REALLY want is to do floor() any time + // the owning rule also has a ModulusSubsitution, but we don't have access to that information + // here, so instead we're doing a floor() any time the DecimalFormat has maxFracDigits equal to + // 0. This seems to work with our existing rule sets, but could be a problem in the future, + // but the "real" fix for DurationRules isn't worth doing, since we're deprecating DurationRules + // anyway. This is enough to keep it from being egregiously wrong, without obvious side + // effects. --rtg 8/16/23 + if (numberFormat == null || numberFormat.getMaximumFractionDigits() == 0) { + doFloor = true; + } + } + if (!doFloor) { return number / divisor; } else { return Math.floor(number / divisor); @@ -977,7 +993,7 @@ class ModulusSubstitution extends NFSubstitution { */ @Override public double transformNumber(double number) { - return Math.floor(number % divisor); + return number % divisor; } //----------------------------------------------------------------------- diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/text/RuleBasedNumberFormat.java b/icu4j/main/core/src/main/java/com/ibm/icu/text/RuleBasedNumberFormat.java index 292bd918711..a57925abc63 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/text/RuleBasedNumberFormat.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/text/RuleBasedNumberFormat.java @@ -560,8 +560,9 @@ public class RuleBasedNumberFormat extends NumberFormat { /** * Selector code that tells the constructor to create a duration formatter - * @stable ICU 2.0 + * @deprecated ICU 74 Use MeasureFormat instead. */ + @Deprecated public static final int DURATION = 3; /**