From 0895a223a35d491f997a8819005151afcdebafa6 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Fri, 11 Dec 2020 13:56:22 +0000 Subject: [PATCH] ICU-21349 Adjust C++ `MeasureUnitImpl::serialize` to be as same as the Java one See #1496 --- icu4c/source/common/charstr.cpp | 32 +++ icu4c/source/common/charstr.h | 3 + icu4c/source/common/util.h | 6 +- icu4c/source/i18n/measunit_extra.cpp | 201 +++++++++--------- icu4c/source/i18n/measunit_impl.h | 15 +- icu4c/source/i18n/unicode/measunit.h | 4 +- icu4c/source/test/intltest/strtest.cpp | 31 +++ icu4c/source/test/intltest/strtest.h | 1 + .../ibm/icu/impl/units/MeasureUnitImpl.java | 5 +- .../ibm/icu/impl/units/SingleUnitImpl.java | 17 +- 10 files changed, 196 insertions(+), 119 deletions(-) diff --git a/icu4c/source/common/charstr.cpp b/icu4c/source/common/charstr.cpp index 318a185b3f1..c4710d6ed56 100644 --- a/icu4c/source/common/charstr.cpp +++ b/icu4c/source/common/charstr.cpp @@ -141,6 +141,38 @@ CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &error return *this; } +CharString &CharString::appendNumber(int32_t number, UErrorCode &status) { + if (number < 0) { + this->append('-', status); + if (U_FAILURE(status)) { + return *this; + } + } + + if (number == 0) { + this->append('0', status); + return *this; + } + + int32_t numLen = 0; + while (number != 0) { + int32_t residue = number % 10; + number /= 10; + this->append(std::abs(residue) + '0', status); + numLen++; + if (U_FAILURE(status)) { + return *this; + } + } + + int32_t start = this->length() - numLen, end = this->length() - 1; + while(start < end) { + std::swap(this->data()[start++], this->data()[end--]); + } + + return *this; +} + char *CharString::getAppendBuffer(int32_t minCapacity, int32_t desiredCapacityHint, int32_t &resultCapacity, diff --git a/icu4c/source/common/charstr.h b/icu4c/source/common/charstr.h index 6619faac618..175acd1c0a2 100644 --- a/icu4c/source/common/charstr.h +++ b/icu4c/source/common/charstr.h @@ -127,6 +127,9 @@ public: return append(s.data(), s.length(), errorCode); } CharString &append(const char *s, int32_t sLength, UErrorCode &status); + + CharString &appendNumber(int32_t number, UErrorCode &status); + /** * Returns a writable buffer for appending and writes the buffer's capacity to * resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS(). diff --git a/icu4c/source/common/util.h b/icu4c/source/common/util.h index 9c3b76d9ed5..b5fac383a2f 100644 --- a/icu4c/source/common/util.h +++ b/icu4c/source/common/util.h @@ -13,10 +13,10 @@ #ifndef ICU_UTIL_H #define ICU_UTIL_H -#include "unicode/utypes.h" -#include "unicode/uobject.h" +#include "charstr.h" #include "unicode/unistr.h" - +#include "unicode/uobject.h" +#include "unicode/utypes.h" //-------------------------------------------------------------------- // class ICU_Utility // i18n utility functions, scoped into the class ICU_Utility. diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index 1edc77212fa..03aefcded61 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -30,6 +30,7 @@ #include "unicode/ures.h" #include "unicode/ustringtrie.h" #include "uresimp.h" +#include "util.h" #include U_NAMESPACE_BEGIN @@ -628,107 +629,6 @@ compareSingleUnits(const void* /*context*/, const void* left, const void* right) return (*realLeft)->compareTo(**realRight); } -/** - * Generate the identifier string for a single unit in place. - * - * Does not support the dimensionless SingleUnitImpl: calling serializeSingle - * with the dimensionless unit results in an U_INTERNAL_PROGRAM_ERROR. - * - * @param first If singleUnit is part of a compound unit, and not its first - * single unit, set this to false. Otherwise: set to true. - */ -void serializeSingle(const SingleUnitImpl& singleUnit, bool first, CharString& output, UErrorCode& status) { - if (first && singleUnit.dimensionality < 0) { - // Essentially the "unary per". For compound units with a numerator, the - // caller takes care of the "binary per". - output.append("per-", status); - } - - if (singleUnit.isDimensionless()) { - status = U_INTERNAL_PROGRAM_ERROR; - return; - } - int8_t posPower = std::abs(singleUnit.dimensionality); - if (posPower == 0) { - status = U_INTERNAL_PROGRAM_ERROR; - } else if (posPower == 1) { - // no-op - } else if (posPower == 2) { - output.append("square-", status); - } else if (posPower == 3) { - output.append("cubic-", status); - } else if (posPower < 10) { - output.append("pow", status); - output.append(posPower + '0', status); - output.append('-', status); - } else if (posPower <= 15) { - output.append("pow1", status); - output.append('0' + (posPower % 10), status); - output.append('-', status); - } else { - status = kUnitIdentifierSyntaxError; - } - if (U_FAILURE(status)) { - return; - } - - if (singleUnit.siPrefix != UMEASURE_SI_PREFIX_ONE) { - for (const auto& siPrefixInfo : gSIPrefixStrings) { - if (siPrefixInfo.value == singleUnit.siPrefix) { - output.append(siPrefixInfo.string, status); - break; - } - } - } - if (U_FAILURE(status)) { - return; - } - - output.append(singleUnit.getSimpleUnitID(), status); -} - -/** - * Normalize a MeasureUnitImpl and generate the identifier string in place. - */ -void serialize(MeasureUnitImpl& impl, UErrorCode& status) { - if (U_FAILURE(status)) { - return; - } - U_ASSERT(impl.identifier.isEmpty()); - if (impl.singleUnits.length() == 0) { - // Dimensionless, constructed by the default constructor: no appending - // to impl.identifier, we wish it to contain the zero-length string. - return; - } - if (impl.complexity == UMEASURE_UNIT_COMPOUND) { - // Note: don't sort a MIXED unit - uprv_sortArray(impl.singleUnits.getAlias(), impl.singleUnits.length(), - sizeof(impl.singleUnits[0]), compareSingleUnits, nullptr, false, &status); - if (U_FAILURE(status)) { - return; - } - } - serializeSingle(*impl.singleUnits[0], true, impl.identifier, status); - if (impl.singleUnits.length() == 1) { - return; - } - for (int32_t i = 1; i < impl.singleUnits.length(); i++) { - const SingleUnitImpl &prev = *impl.singleUnits[i - 1]; - const SingleUnitImpl &curr = *impl.singleUnits[i]; - if (impl.complexity == UMEASURE_UNIT_MIXED) { - impl.identifier.append("-and-", status); - serializeSingle(curr, true, impl.identifier, status); - } else { - if (prev.dimensionality > 0 && curr.dimensionality < 0) { - impl.identifier.append("-per-", status); - } else { - impl.identifier.append('-', status); - } - serializeSingle(curr, false, impl.identifier, status); - } - } -} - } // namespace @@ -758,6 +658,42 @@ const char *SingleUnitImpl::getSimpleUnitID() const { return gSimpleUnits[index]; } +void SingleUnitImpl::appendNeutralIdentifier(CharString &result, UErrorCode &status) const { + int32_t absPower = std::abs(this->dimensionality); + + U_ASSERT(absPower > 0); // "this function does not support the dimensionless single units"; + + if (absPower == 1) { + // no-op + } else if (absPower == 2) { + result.append(StringPiece("square-"), status); + } else if (absPower == 3) { + result.append(StringPiece("cubic-"), status); + } else if (absPower <= 15) { + result.append(StringPiece("pow"), status); + result.appendNumber(absPower, status); + result.append(StringPiece("-"), status); + } else { + status = U_ILLEGAL_ARGUMENT_ERROR; // Unit Identifier Syntax Error + return; + } + + if (U_FAILURE(status)) { + return; + } + + if (this->siPrefix != UMEASURE_SI_PREFIX_ONE) { + for (const auto &siPrefixInfo : gSIPrefixStrings) { + if (siPrefixInfo.value == this->siPrefix) { + result.append(siPrefixInfo.string, status); + break; + } + } + } + + result.append(StringPiece(this->getSimpleUnitID()), status); +} + MeasureUnitImpl::MeasureUnitImpl(const MeasureUnitImpl &other, UErrorCode &status) { *this = other.copy(status); } @@ -853,8 +789,69 @@ MaybeStackVector MeasureUnitImpl::extractIndividualUnits(UError return result; } +/** + * Normalize a MeasureUnitImpl and generate the identifier string in place. + */ +void MeasureUnitImpl::serialize(UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + + if (this->singleUnits.length() == 0) { + // Dimensionless, constructed by the default constructor. + return; + } + + if (this->complexity == UMEASURE_UNIT_COMPOUND) { + // Note: don't sort a MIXED unit + uprv_sortArray(this->singleUnits.getAlias(), this->singleUnits.length(), + sizeof(this->singleUnits[0]), compareSingleUnits, nullptr, false, &status); + if (U_FAILURE(status)) { + return; + } + } + + CharString result; + bool beforePer = true; + bool firstTimeNegativeDimension = false; + for (int32_t i = 0; i < this->singleUnits.length(); i++) { + if (beforePer && (*this->singleUnits[i]).dimensionality < 0) { + beforePer = false; + firstTimeNegativeDimension = true; + } else if ((*this->singleUnits[i]).dimensionality < 0) { + firstTimeNegativeDimension = false; + } + + if (U_FAILURE(status)) { + return; + } + + if (this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) { + if (result.length() != 0) { + result.append(StringPiece("-and-"), status); + } + } else { + if (firstTimeNegativeDimension) { + if (result.length() == 0) { + result.append(StringPiece("per-"), status); + } else { + result.append(StringPiece("-per-"), status); + } + } else { + if (result.length() != 0) { + result.append(StringPiece("-"), status); + } + } + } + + this->singleUnits[i]->appendNeutralIdentifier(result, status); + } + + this->identifier = CharString(result, status); +} + MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && { - serialize(*this, status); + this->serialize(status); return MeasureUnit(std::move(*this)); } diff --git a/icu4c/source/i18n/measunit_impl.h b/icu4c/source/i18n/measunit_impl.h index a4f97cffe73..ef027ec6a2c 100644 --- a/icu4c/source/i18n/measunit_impl.h +++ b/icu4c/source/i18n/measunit_impl.h @@ -42,6 +42,12 @@ struct U_I18N_API SingleUnitImpl : public UMemory { */ const char *getSimpleUnitID() const; + /** + * Generates and append a neutral identifier string for a single unit which means we do not include + * the dimension signal. + */ + void appendNeutralIdentifier(CharString &result, UErrorCode &status) const; + /** * Compare this SingleUnitImpl to another SingleUnitImpl for the sake of * sorting and coalescing. @@ -133,7 +139,8 @@ template class U_I18N_API MaybeStackVector; * Internal representation of measurement units. Capable of representing all complexities of units, * including mixed and compound units. */ -struct U_I18N_API MeasureUnitImpl : public UMemory { +class U_I18N_API MeasureUnitImpl : public UMemory { + public: MeasureUnitImpl() = default; MeasureUnitImpl(MeasureUnitImpl &&other) = default; MeasureUnitImpl(const MeasureUnitImpl &other, UErrorCode &status); @@ -233,6 +240,12 @@ struct U_I18N_API MeasureUnitImpl : public UMemory { * The full unit identifier. Owned by the MeasureUnitImpl. Empty if not computed. */ CharString identifier; + + private: + /** + * Normalizes a MeasureUnitImpl and generate the identifier string in place. + */ + void serialize(UErrorCode &status); }; U_NAMESPACE_END diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index 8db6527e0cc..6761f847fc9 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -30,7 +30,7 @@ U_NAMESPACE_BEGIN class StringEnumeration; -struct MeasureUnitImpl; +class MeasureUnitImpl; #ifndef U_HIDE_DRAFT_API /** @@ -3568,7 +3568,7 @@ private: /** Internal version of public API */ LocalArray splitToSingleUnitsImpl(int32_t& outCount, UErrorCode& status) const; - friend struct MeasureUnitImpl; + friend class MeasureUnitImpl; }; #ifndef U_HIDE_DRAFT_API // @draft ICU 68 diff --git a/icu4c/source/test/intltest/strtest.cpp b/icu4c/source/test/intltest/strtest.cpp index 043355ff767..93c7faf1d6e 100644 --- a/icu4c/source/test/intltest/strtest.cpp +++ b/icu4c/source/test/intltest/strtest.cpp @@ -255,6 +255,7 @@ void StringTest::runIndexedTest(int32_t index, UBool exec, const char *&name, ch TESTCASE_AUTO(TestStringByteSinkAppendU8); TESTCASE_AUTO(TestCharString); TESTCASE_AUTO(TestCStr); + TESTCASE_AUTO(TestCharStrAppendNumber); TESTCASE_AUTO(Testctou); TESTCASE_AUTO_END; } @@ -842,6 +843,36 @@ StringTest::TestCStr() { } } +void StringTest::TestCharStrAppendNumber() { + IcuTestErrorCode errorCode(*this, "TestCharStrAppendNumber()"); + + CharString testString; + testString.appendNumber(1, errorCode); + assertEquals("TestAppendNumber 1", "1", testString.data()); + + testString.clear(); + testString.appendNumber(-1, errorCode); + assertEquals("TestAppendNumber -1", "-1", testString.data()); + + testString.clear(); + testString.appendNumber(12345, errorCode); + assertEquals("TestAppendNumber 12345", "12345", testString.data()); + testString.appendNumber(123, errorCode); + assertEquals("TestAppendNumber 12345 and then 123", "12345123", testString.data()); + + testString.clear(); + testString.appendNumber(2147483647, errorCode); + assertEquals("TestAppendNumber when appending the biggest int32", "2147483647", testString.data()); + + testString.clear(); + testString.appendNumber(-2147483648, errorCode); + assertEquals("TestAppendNumber when appending the smallest int32", "-2147483648", testString.data()); + + testString.clear(); + testString.appendNumber(0, errorCode); + assertEquals("TestAppendNumber when appending zero", "0", testString.data()); +} + void StringTest::Testctou() { const char *cs = "Fa\\u0127mu"; diff --git a/icu4c/source/test/intltest/strtest.h b/icu4c/source/test/intltest/strtest.h index 2a1b98804f3..8890c0b99d6 100644 --- a/icu4c/source/test/intltest/strtest.h +++ b/icu4c/source/test/intltest/strtest.h @@ -57,6 +57,7 @@ private: void TestSTLCompatibility(); void TestCharString(); void TestCStr(); + void TestCharStrAppendNumber(); void Testctou(); }; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/units/MeasureUnitImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/units/MeasureUnitImpl.java index 0821aa24588..e0aa3c512db 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/units/MeasureUnitImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/units/MeasureUnitImpl.java @@ -223,6 +223,8 @@ public class MeasureUnitImpl { // to this.result, we wish it to contain the zero-length string. return; } + + if (this.complexity == MeasureUnit.Complexity.COMPOUND) { // Note: don't sort a MIXED unit Collections.sort(this.getSingleUnits(), new SingleUnitComparator()); @@ -240,7 +242,6 @@ public class MeasureUnitImpl { firstTimeNegativeDimension = false; } - String singleUnitIdentifier = singleUnit.getNeutralIdentifier(); if (this.getComplexity() == MeasureUnit.Complexity.MIXED) { if (result.length() != 0) { result.append("-and-"); @@ -259,7 +260,7 @@ public class MeasureUnitImpl { } } - result.append(singleUnitIdentifier); + result.append(singleUnit.getNeutralIdentifier()); } this.identifier = result.toString(); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/units/SingleUnitImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/units/SingleUnitImpl.java index 0ca51c93021..6afb71b87f3 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/units/SingleUnitImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/units/SingleUnitImpl.java @@ -49,26 +49,25 @@ public class SingleUnitImpl { } /** - * Generates an neutral identifier string for a single unit which means we do not include the dimension signal. + * Generates a neutral identifier string for a single unit which means we do not include the dimension signal. */ public String getNeutralIdentifier() { StringBuilder result = new StringBuilder(); - int posPower = Math.abs(this.getDimensionality()); + int absPower = Math.abs(this.getDimensionality()); - assert posPower > 0 : "getIdentifier does not support the dimensionless"; + assert absPower > 0 : "this function does not support the dimensionless single units"; - if (posPower == 1) { + if (absPower == 1) { // no-op - } else if (posPower == 2) { + } else if (absPower == 2) { result.append("square-"); - } else if (posPower == 3) { + } else if (absPower == 3) { result.append("cubic-"); - } else if (posPower <= 15) { + } else if (absPower <= 15) { result.append("pow"); - result.append(posPower); + result.append(absPower); result.append('-'); } else { - // TODO: IllegalArgumentException might not be appropriate here throw new IllegalArgumentException("Unit Identifier Syntax Error"); }