From 697f7c0e0073f0424f83259bae184e5b56b2a792 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 5 Feb 2025 13:25:40 +0000 Subject: [PATCH] ICU-22781 Fix &Improve MeasureUnit identifier generation for constant denominators (C++) See #3364 --- icu4c/source/i18n/measunit_extra.cpp | 55 ++++++++++++++++++++--- icu4c/source/test/intltest/units_test.cpp | 37 +++++++++++++++ 2 files changed, 87 insertions(+), 5 deletions(-) diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index 18bd82c4f35..dcba02129a9 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -31,7 +31,7 @@ #include "unicode/ustringtrie.h" #include "uresimp.h" #include "util.h" -#include +#include #include U_NAMESPACE_BEGIN @@ -1269,6 +1269,51 @@ MeasureUnitImpl::extractIndividualUnitsWithIndices(UErrorCode &status) const { return result; } +int32_t countCharacter(const CharString &str, char c) { + int32_t count = 0; + for (int32_t i = 0, n = str.length(); i < n; i++) { + if (str[i] == c) { + count++; + } + } + return count; +} + +/** + * Internal function that returns a string of the constants in the correct + * format. + * + * Example: + * 1000 --> "-per-1000" + * 1000000 --> "-per-1e6" + * + * NOTE: this function is only used when the constant denominator is greater + * than 0. + */ +CharString getConstantsString(uint64_t constantDenominator, UErrorCode &status) { + U_ASSERT(constantDenominator > 0 && constantDenominator <= LLONG_MAX); + + CharString result; + result.appendNumber(constantDenominator, status); + if (U_FAILURE(status)) { + return result; + } + + if (constantDenominator <= 1000) { + return result; + } + + // Check if the constant is a power of 10. + int32_t zeros = countCharacter(result, '0'); + if (zeros == result.length() - 1 && result[0] == '1') { + result.clear(); + result.append(StringPiece("1e"), status); + result.appendNumber(zeros, status); + } + + return result; +} + /** * Normalize a MeasureUnitImpl and generate the identifier string in place. */ @@ -1319,8 +1364,8 @@ void MeasureUnitImpl::serialize(UErrorCode &status) { result.append(StringPiece("-per-"), status); } - if (this->constantDenominator != 0) { - result.appendNumber(this->constantDenominator, status); + if (this->constantDenominator > 0) { + result.append(getConstantsString(this->constantDenominator, status), status); result.append(StringPiece("-"), status); constantDenominatorAppended = true; } @@ -1333,9 +1378,9 @@ void MeasureUnitImpl::serialize(UErrorCode &status) { this->singleUnits[i]->appendNeutralIdentifier(result, status); } - if (!constantDenominatorAppended && this->constantDenominator != 0) { + if (!constantDenominatorAppended && this->constantDenominator > 0) { result.append(StringPiece("-per-"), status); - result.appendNumber(this->constantDenominator, status); + result.append(getConstantsString(this->constantDenominator, status), status); } if (U_FAILURE(status)) { diff --git a/icu4c/source/test/intltest/units_test.cpp b/icu4c/source/test/intltest/units_test.cpp index 0c342ea43c5..7790149f966 100644 --- a/icu4c/source/test/intltest/units_test.cpp +++ b/icu4c/source/test/intltest/units_test.cpp @@ -53,6 +53,7 @@ class UnitsTest : public IntlTest { void testUnitPreferencesWithCLDRTests(); void testUnitsConstantsDenomenator(); void testMeasureUnit_withConstantDenominator(); + void testUnitsConstantsDenomenator_getIdentifier(); void testConverter(); }; @@ -72,6 +73,7 @@ void UnitsTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha TESTCASE_AUTO(testUnitPreferencesWithCLDRTests); TESTCASE_AUTO(testUnitsConstantsDenomenator); TESTCASE_AUTO(testMeasureUnit_withConstantDenominator); + TESTCASE_AUTO(testUnitsConstantsDenomenator_getIdentifier); TESTCASE_AUTO(testConverter); TESTCASE_AUTO_END; } @@ -1342,4 +1344,39 @@ void UnitsTest::testMeasureUnit_withConstantDenominator() { status.reset(); } +void UnitsTest::testUnitsConstantsDenomenator_getIdentifier() { + IcuTestErrorCode status(*this, "UnitTests::testUnitsConstantsDenomenator_getIdentifier"); + + // Test Cases + struct TestCase { + const char *source; + const char *expectedIdentifier; + } testCases[]{ + {"meter-per-1000", "meter-per-1000"}, + {"meter-per-1000-kilometer", "meter-per-1000-kilometer"}, + {"meter-per-1000000", "meter-per-1e6"}, + {"meter-per-1000000-kilometer", "meter-per-1e6-kilometer"}, + {"meter-per-1000000000", "meter-per-1e9"}, + {"meter-per-1000000000-kilometer", "meter-per-1e9-kilometer"}, + {"meter-per-1000000000000", "meter-per-1e12"}, + {"meter-per-1000000000000-kilometer", "meter-per-1e12-kilometer"}, + {"meter-per-1000000000000000", "meter-per-1e15"}, + {"meter-per-1e15-kilometer", "meter-per-1e15-kilometer"}, + {"meter-per-1000000000000000000", "meter-per-1e18"}, + {"meter-per-1e18-kilometer", "meter-per-1e18-kilometer"}, + {"meter-per-1000000000000001", "meter-per-1000000000000001"}, + {"meter-per-1000000000000001-kilometer", "meter-per-1000000000000001-kilometer"}, + }; + + for (const auto &testCase : testCases) { + MeasureUnit unit = MeasureUnit::forIdentifier(testCase.source, status); + if (status.errIfFailureAndReset("forIdentifier(\"%s\")", testCase.source)) { + continue; + } + + auto actualIdentifier = unit.getIdentifier(); + assertEquals(" getIdentifier(\"%s\")", testCase.expectedIdentifier, actualIdentifier); + } +} + #endif /* #if !UCONFIG_NO_FORMATTING */