From 4cd5a81ad22b7ac180b1b81ca0c33c8b2f0dba27 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Mon, 13 Jan 2020 21:52:52 +0100 Subject: [PATCH 01/13] Intermediate work on new unit identifiers --- icu4c/source/i18n/measunit.cpp | 56 +++- icu4c/source/i18n/unicode/measunit.h | 359 ++++++++++++++++++++- icu4c/source/test/intltest/measfmttest.cpp | 81 +++++ 3 files changed, 478 insertions(+), 18 deletions(-) diff --git a/icu4c/source/i18n/measunit.cpp b/icu4c/source/i18n/measunit.cpp index 66ddd0a1d54..0886b23caf7 100644 --- a/icu4c/source/i18n/measunit.cpp +++ b/icu4c/source/i18n/measunit.cpp @@ -2006,21 +2006,54 @@ static int32_t binarySearch( return -1; } -MeasureUnit::MeasureUnit() { - fCurrency[0] = 0; - fTypeId = kBaseTypeIdx; - fSubTypeId = kBaseSubTypeIdx; +MeasureUnit::MeasureUnit() : MeasureUnit(kBaseTypeIdx, kBaseSubTypeIdx) { } -MeasureUnit::MeasureUnit(const MeasureUnit &other) - : fTypeId(other.fTypeId), fSubTypeId(other.fSubTypeId) { +MeasureUnit::MeasureUnit(int32_t typeId, int32_t subTypeId) + : fId(nullptr), fSubTypeId(subTypeId), fTypeId(typeId) { + fCurrency[0] = 0; +} + +MeasureUnit::MeasureUnit(const MeasureUnit &other) { + *this = other; +} + +MeasureUnit::MeasureUnit(MeasureUnit &&other) noexcept + : fId(other.fId), + fSubTypeId(other.fSubTypeId), + fTypeId(other.fTypeId) { uprv_strcpy(fCurrency, other.fCurrency); + other.fId = nullptr; } MeasureUnit &MeasureUnit::operator=(const MeasureUnit &other) { if (this == &other) { return *this; } + if (other.fId) { + auto* id = static_cast(uprv_malloc(uprv_strlen(other.fId) + 1)); + if (!id) { + // Unrecoverable allocation error; set to the default unit + *this = MeasureUnit(); + return *this; + } + uprv_strcpy(id, other.fId); + fId = id; + } else { + fId = nullptr; + } + fTypeId = other.fTypeId; + fSubTypeId = other.fSubTypeId; + uprv_strcpy(fCurrency, other.fCurrency); + return *this; +} + +MeasureUnit &MeasureUnit::operator=(MeasureUnit &&other) noexcept { + if (this == &other) { + return *this; + } + fId = other.fId; + other.fId = nullptr; fTypeId = other.fTypeId; fSubTypeId = other.fSubTypeId; uprv_strcpy(fCurrency, other.fCurrency); @@ -2032,6 +2065,8 @@ MeasureUnit *MeasureUnit::clone() const { } MeasureUnit::~MeasureUnit() { + uprv_free(const_cast(fId)); + fId = nullptr; } const char *MeasureUnit::getType() const { @@ -2042,6 +2077,10 @@ const char *MeasureUnit::getSubtype() const { return fCurrency[0] == 0 ? gSubTypes[getOffset()] : fCurrency; } +const char *MeasureUnit::toString() const { + return fId ? fId : getSubtype(); +} + UBool MeasureUnit::operator==(const UObject& other) const { if (this == &other) { // Same object, equal return TRUE; @@ -2050,10 +2089,7 @@ UBool MeasureUnit::operator==(const UObject& other) const { return FALSE; } const MeasureUnit &rhs = static_cast(other); - return ( - fTypeId == rhs.fTypeId - && fSubTypeId == rhs.fSubTypeId - && uprv_strcmp(fCurrency, rhs.fCurrency) == 0); + return uprv_strcmp(toString(), rhs.toString()) == 0; } int32_t MeasureUnit::getIndex() const { diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index ebcf6914fdb..cca944cc8ff 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -29,6 +29,162 @@ U_NAMESPACE_BEGIN class StringEnumeration; +class MeasureUnitFields; + +/** + * Enumeration for SI prefixes, such as "kilo". + * + * @draft ICU 67 + */ +typedef enum UMeasureSIPrefix { + + /** + * SI prefix: yotta, 10^24. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_YOTTA = 24, + + /** + * SI prefix: zetta, 10^21. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_ZETTA = 21, + + /** + * SI prefix: exa, 10^18. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_EXA = 18, + + /** + * SI prefix: peta, 10^15. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_PETA = 15, + + /** + * SI prefix: tera, 10^12. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_TERA = 12, + + /** + * SI prefix: giga, 10^9. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_GIGA = 9, + + /** + * SI prefix: mega, 10^6. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_MEGA = 6, + + /** + * SI prefix: kilo, 10^3. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_KILO = 3, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_HECTO = 2, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_DEKA = 1, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_ONE = 0, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_DECI = -1, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_CENTI = -2, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_MILLI = -3, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_MICRO = -6, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_NANO = -9, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_PICO = -12, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_FEMTO = -15, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_ATTO = -18, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_ZEPTO = -21, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_YOCTO = -24 +} UMeasureSIPrefix; /** * A unit such as length, mass, volume, currency, etc. A unit is @@ -52,13 +208,39 @@ class U_I18N_API MeasureUnit: public UObject { * @stable ICU 3.0 */ MeasureUnit(const MeasureUnit &other); - + /** - * Assignment operator. + * Move constructor. + * @stable ICU 3.0 + */ + MeasureUnit(MeasureUnit &&other) noexcept; + + /** + * Construct a MeasureUnit from a CLDR Sequence Unit Identifier, defined in UTS 35. + * Validates and canonicalizes the identifier. + * + *
+     * MeasureUnit example = MeasureUnit::forIdentifier("furlong-per-nanosecond")
+     * 
+ * + * @param id The CLDR Sequence Unit Identifier + * @param status Set if the identifier is invalid. + * @draft ICU 67 + */ + static MeasureUnit forIdentifier(const char* identifier, UErrorCode& status); + + /** + * Copy assignment operator. * @stable ICU 3.0 */ MeasureUnit &operator=(const MeasureUnit &other); + /** + * Move assignment operator. + * @stable ICU 3.0 + */ + MeasureUnit &operator=(MeasureUnit &&other) noexcept; + /** * Returns a polymorphic clone of this object. The result will * have the same class as returned by getDynamicClassID(). @@ -90,16 +272,177 @@ class U_I18N_API MeasureUnit: public UObject { /** * Get the type. + * + * If the unit does not have a type, the empty string is returned. + * * @stable ICU 53 */ const char *getType() const; /** * Get the sub type. + * + * If the unit does not have a subtype, the empty string is returned. + * * @stable ICU 53 */ const char *getSubtype() const; + /** + * Get the CLDR Sequence Unit Identifier for this MeasureUnit, as defined in UTS 35. + * + * @return The string form of this unit, owned by this MeasureUnit. + * @draft ICU 67 + */ + const char* toString() const; + + /** + * Creates a MeasureUnit which is this MeasureUnit augmented with the specified SI prefix. + * For example, UMEASURE_SI_PREFIX_KILO for "kilo". + * + * There is sufficient locale data to format all standard SI prefixes. + * + * If the MeasureUnit is composed of multiple simple units, only the first simple unit is + * modified. For example, starting at "meter-kilogram-per-second", if you set the SI prefix + * to "centi", then you get "centimeter-kilogram-per-second". + * + * @param prefix The SI prefix, from UMeasureSIPrefix. + * @return A new MeasureUnit. + */ + MeasureUnit withSIPrefix(UMeasureSIPrefix prefix) const; + + /** + * Gets the current SI prefix of this MeasureUnit. For example, if the unit has the SI prefix + * "kilo", then UMEASURE_SI_PREFIX_KILO is returned. + * + * If the MeasureUnit is composed of multiple simple units, the SI prefix of the first simple + * unit is returned. For example, from "centimeter-kilogram-per-second", the SI prefix + * UMEASURE_SI_PREFIX_CENTI will be returned. + * + * @return The SI prefix of the first simple unit, from UMeasureSIPrefix. + */ + UMeasureSIPrefix getSIPrefix() const; + + /** + * Creates a MeasureUnit which is this MeasureUnit augmented with the specified power. For + * example, if power is 2, the unit will be squared. + * + * If the MeasureUnit is composed of multiple simple units, only the first simple unit is + * modified. For example, starting at "meter-kilogram-per-second", if you set the power to 2, + * then you get "square-meter-kilogram-per-second". + * + * @param power The power. + * @return A new MeasureUnit. + */ + MeasureUnit withPower(int8_t power) const; + + /** + * Gets the power of this MeasureUnit. For example, if the unit is square, then 2 is returned. + * + * If the MeasureUnit is composed of multiple simple units, the power of the first simple unit + * is returned. For example, from "cubic-meter-per-square-second", 3 is returned. + * + * @return The power of the first simple unit. + */ + int8_t getPower() const; + + /** + * Gets the reciprocal of the unit, with the numerator and denominator flipped. + * + * For example, if the receiver is "meter-per-second", the unit "second-per-meter" is returned. + * + * @return The reciprocal of the target unit. + */ + MeasureUnit reciprocal() const; + + /** + * Gets the product of this unit with another unit. This is a way to build units from + * constituent parts. + * + * The numerator and denominator are preserved through this operation. + * + * For example, if the receiver is "kilowatt" and the argument is "hour-per-day", then the + * unit "kilowatt-hour-per-day" is returned. + * + * @return The product of the target unit with the provided unit. + */ + MeasureUnit product(const MeasureUnit& other) const; + + /** + * Gets the number of constituent simple units. + * + * For example, if the receiver is "meter-per-square-second", then 2 is returned, since there + * are two simple units: "meter" and "second". + * + * @return The number of constituent units. + */ + size_t getSimpleUnitCount() const; + + /** + * Gets the constituent unit at the given index. + * + * For example, to loop over all simple units: + * + *
+     * MeasureUnit unit(u"meter-per-square-second");
+     * for (size_t i = 0; i < unit.getSimpleUnitCount(); i++) {
+     *     std::cout << unit.simpleUnitAt(i).toString() << std::endl;
+     * }
+     * 
+ * + * Expected output: meter, one-per-square-second + * + * @param index Zero-based index. If out of range, the dimensionless unit is returned. + * @return The constituent simple unit at the specified position. + */ + MeasureUnit simpleUnitAt(size_t index) const; + + /** + * Composes this unit with a super unit. + * + * A super unit, used for formatting only, should be a larger unit sharing the same dimension. + * For example, if the current unit is "inch", a super unit could be "foot", in order to + * render 71 inches as "5 feet, 11 inches". If the super unit is invalid, an error will occur + * during formatting. + * + * A unit can have multiple super units; for example, "second" could have both "minute" and + * "hour" as super units. + * + * Super units are ignored and left untouched in most other methods, such as withSIPrefix, + * withPower, and reciprocal. + * + * @param other The super unit to compose with the target unit. + * @return The composition of the given super unit with this unit. + */ + MeasureUnit withSuperUnit(const MeasureUnit& other) const; + + /** + * Gets the number of super units in the receiver. + * + * For example, "foot+inch" has one super unit. + * + * @return The number of super units. + */ + size_t getSuperUnitCount() const; + + /** + * Gets the super unit at the given index. + * + * For example, to loop over all super units: + * + *
+     * MeasureUnit unit(u"hour+minute+second");
+     * for (size_t i = 0; i < unit.getSuperUnitCount(); i++) {
+     *     std::cout << unit.superUnitAt(i).toCoreUnitIdentifier() << std::endl;
+     * }
+     * 
+ * + * Expected output: hour, minute + * + * @return The super unit at the specified position. + */ + MeasureUnit superUnitAt(size_t index) const; + /** * getAvailable gets all of the available units. * If there are too many units to fit into destCapacity then the @@ -3353,13 +3696,13 @@ class U_I18N_API MeasureUnit: public UObject { #endif /* U_HIDE_INTERNAL_API */ private: - int32_t fTypeId; - int32_t fSubTypeId; - char fCurrency[4]; - MeasureUnit(int32_t typeId, int32_t subTypeId) : fTypeId(typeId), fSubTypeId(subTypeId) { - fCurrency[0] = 0; - } + const char* fId; + char fCurrency[4]; + int16_t fSubTypeId; + int8_t fTypeId; + + MeasureUnit(int32_t typeId, int32_t subTypeId); void setTo(int32_t typeId, int32_t subTypeId); int32_t getOffset() const; static MeasureUnit *create(int typeId, int subTypeId, UErrorCode &status); diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 2b53f8bc024..94a499e73a4 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -79,6 +79,7 @@ private: void Test20332_PersonUnits(); void TestNumericTime(); void TestNumericTimeSomeSpecialFormats(); + void TestCompoundUnitOperations(); void verifyFormat( const char *description, const MeasureFormat &fmt, @@ -138,6 +139,11 @@ private: NumberFormat::EAlignmentFields field, int32_t start, int32_t end); + void verifyUnitParts( + const MeasureUnit& unit, + UMeasureSIPrefix siPrefix, + int8_t power, + const char* identifier); }; void MeasureFormatTest::runIndexedTest( @@ -3215,6 +3221,69 @@ void MeasureFormatTest::TestNumericTimeSomeSpecialFormats() { verifyFormat("Danish fhoursFminutes", fmtDa, fhoursFminutes, 2, "2.03,877"); } +void MeasureFormatTest::TestCompoundUnitOperations() { + IcuTestErrorCode status(*this, "TestCompoundUnitOperations"); + + MeasureUnit kilometer = MeasureUnit::getKilometer(); + MeasureUnit meter = kilometer.withSIPrefix(UMEASURE_SI_PREFIX_ONE); + MeasureUnit centimeter1 = kilometer.withSIPrefix(UMEASURE_SI_PREFIX_CENTI); + MeasureUnit centimeter2 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI); + + verifyUnitParts(kilometer, UMEASURE_SI_PREFIX_KILO, 0, "kilometer"); + verifyUnitParts(meter, UMEASURE_SI_PREFIX_ONE, 0, "meter"); + verifyUnitParts(centimeter1, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter"); + verifyUnitParts(centimeter2, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter"); + + assertTrue("centimeter equality", centimeter1 == centimeter2); + assertTrue("kilometer inequality", centimeter1 != kilometer); + + MeasureUnit squareMeter = meter.withPower(2); + MeasureUnit overCubicCentimeter = centimeter1.withPower(-3); + MeasureUnit quarticKilometer = kilometer.withPower(4); + MeasureUnit overQuarticKilometer1 = kilometer.withPower(-4); + + verifyUnitParts(squareMeter, UMEASURE_SI_PREFIX_ONE, 2, "square-meter"); + verifyUnitParts(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, 2, "one-per-cubic-centimeter"); + verifyUnitParts(quarticKilometer, UMEASURE_SI_PREFIX_ONE, 2, "p4-kilometer"); + verifyUnitParts(overQuarticKilometer1, UMEASURE_SI_PREFIX_ONE, 2, "one-per-p4-kilometer"); + + assertTrue("power inequality", quarticKilometer != overQuarticKilometer1); + + MeasureUnit overQuarticKilometer2 = overQuarticKilometer1.reciprocal(); + MeasureUnit overQuarticKilometer3 = kilometer.product(kilometer).product(kilometer) + .product(kilometer).reciprocal(); + + verifyUnitParts(overQuarticKilometer2, UMEASURE_SI_PREFIX_ONE, 2, "one-per-p4-kilometer"); + verifyUnitParts(overQuarticKilometer3, UMEASURE_SI_PREFIX_ONE, 2, "one-per-p4-kilometer"); + + assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); + assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); + + MeasureUnit kiloSquareSecond = MeasureUnit::getSecond() + .withPower(2).withSIPrefix(UMEASURE_SI_PREFIX_KILO); + MeasureUnit meterSecond = meter.product(kiloSquareSecond); + MeasureUnit cubicMeterSecond1 = meter.withPower(3).product(kiloSquareSecond); + MeasureUnit cubicMeterSecond2 = meterSecond.withPower(3); + MeasureUnit centimeterSecond1 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI).product(kiloSquareSecond); + MeasureUnit centimeterSecond2 = meterSecond.withSIPrefix(UMEASURE_SI_PREFIX_CENTI); + MeasureUnit secondCubicMeter = kiloSquareSecond.product(meter.withPower(3)); + MeasureUnit secondCentimeter = kiloSquareSecond.product(meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI)); + + verifyUnitParts(kiloSquareSecond, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); + verifyUnitParts(meterSecond, UMEASURE_SI_PREFIX_ONE, 0, "meter-square-kilosecond"); + verifyUnitParts(cubicMeterSecond1, UMEASURE_SI_PREFIX_ONE, 2, "cubic-meter-square-kilosecond"); + verifyUnitParts(cubicMeterSecond2, UMEASURE_SI_PREFIX_ONE, 2, "cubic-meter-square-kilosecond"); + verifyUnitParts(centimeterSecond1, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter-square-kilosecond"); + verifyUnitParts(centimeterSecond2, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter-square-kilosecond"); + verifyUnitParts(secondCubicMeter, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond-cubic-meter"); + verifyUnitParts(secondCentimeter, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond-centimeter"); + + assertTrue("multipart power equality", cubicMeterSecond1 == cubicMeterSecond2); + assertTrue("multipart SI prefix equality", centimeterSecond1 == centimeterSecond2); + assertTrue("order matters inequality", cubicMeterSecond1 != secondCubicMeter); + assertTrue("additional simple units inequality", secondCubicMeter != secondCentimeter); +} + void MeasureFormatTest::verifyFieldPosition( const char *description, @@ -3286,6 +3355,18 @@ void MeasureFormatTest::verifyFormat( } } +void MeasureFormatTest::verifyUnitParts( + const MeasureUnit& unit, + UMeasureSIPrefix siPrefix, + int8_t power, + const char* identifier) { + IcuTestErrorCode status(*this, "verifyUnitParts"); + assertEquals(UnicodeString(identifier) + ": SI prefix", siPrefix, unit.getSIPrefix()); + assertEquals(UnicodeString(identifier) + ": Power", power, unit.getPower()); + assertEquals(UnicodeString(identifier) + ": Identifier", identifier, unit.toString()); + assertTrue(UnicodeString(identifier) + ": Constructor", unit == MeasureUnit::forIdentifier(identifier, status)); +} + extern IntlTest *createMeasureFormatTest() { return new MeasureFormatTest(); } From c787452cfecd35e304c1a826e863505f0b2dddf3 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Tue, 14 Jan 2020 15:12:27 +0100 Subject: [PATCH 02/13] Removing fCurrency to always use fId --- icu4c/source/i18n/measunit.cpp | 55 +++++++++++++++++-------- icu4c/source/i18n/unicode/measunit.h | 6 ++- icu4c/source/test/intltest/numfmtst.cpp | 15 ++++++- 3 files changed, 55 insertions(+), 21 deletions(-) diff --git a/icu4c/source/i18n/measunit.cpp b/icu4c/source/i18n/measunit.cpp index 0886b23caf7..43811a5331d 100644 --- a/icu4c/source/i18n/measunit.cpp +++ b/icu4c/source/i18n/measunit.cpp @@ -2011,10 +2011,10 @@ MeasureUnit::MeasureUnit() : MeasureUnit(kBaseTypeIdx, kBaseSubTypeIdx) { MeasureUnit::MeasureUnit(int32_t typeId, int32_t subTypeId) : fId(nullptr), fSubTypeId(subTypeId), fTypeId(typeId) { - fCurrency[0] = 0; } -MeasureUnit::MeasureUnit(const MeasureUnit &other) { +MeasureUnit::MeasureUnit(const MeasureUnit &other) + : fId(nullptr) { *this = other; } @@ -2022,7 +2022,6 @@ MeasureUnit::MeasureUnit(MeasureUnit &&other) noexcept : fId(other.fId), fSubTypeId(other.fSubTypeId), fTypeId(other.fTypeId) { - uprv_strcpy(fCurrency, other.fCurrency); other.fId = nullptr; } @@ -2030,21 +2029,19 @@ MeasureUnit &MeasureUnit::operator=(const MeasureUnit &other) { if (this == &other) { return *this; } + uprv_free(fId); if (other.fId) { - auto* id = static_cast(uprv_malloc(uprv_strlen(other.fId) + 1)); - if (!id) { + fId = uprv_strdup(other.fId); + if (!fId) { // Unrecoverable allocation error; set to the default unit *this = MeasureUnit(); return *this; } - uprv_strcpy(id, other.fId); - fId = id; } else { fId = nullptr; } fTypeId = other.fTypeId; fSubTypeId = other.fSubTypeId; - uprv_strcpy(fCurrency, other.fCurrency); return *this; } @@ -2052,11 +2049,11 @@ MeasureUnit &MeasureUnit::operator=(MeasureUnit &&other) noexcept { if (this == &other) { return *this; } + uprv_free(fId); fId = other.fId; other.fId = nullptr; fTypeId = other.fTypeId; fSubTypeId = other.fSubTypeId; - uprv_strcpy(fCurrency, other.fCurrency); return *this; } @@ -2065,20 +2062,28 @@ MeasureUnit *MeasureUnit::clone() const { } MeasureUnit::~MeasureUnit() { - uprv_free(const_cast(fId)); + uprv_free(fId); fId = nullptr; } const char *MeasureUnit::getType() const { + // We have a type & subtype only if fTypeId is present. + if (fTypeId == -1) { + return ""; + } return gTypes[fTypeId]; } const char *MeasureUnit::getSubtype() const { - return fCurrency[0] == 0 ? gSubTypes[getOffset()] : fCurrency; + // We have a type & subtype only if fTypeId is present. + if (fTypeId == -1) { + return ""; + } + return toString(); } const char *MeasureUnit::toString() const { - return fId ? fId : getSubtype(); + return fId ? fId : gSubTypes[getOffset()]; } UBool MeasureUnit::operator==(const UObject& other) const { @@ -2225,6 +2230,10 @@ MeasureUnit MeasureUnit::resolveUnitPerUnit( const MeasureUnit &unit, const MeasureUnit &perUnit, bool* isResolved) { int32_t unitOffset = unit.getOffset(); int32_t perUnitOffset = perUnit.getOffset(); + if (unitOffset == -1 || perUnitOffset == -1) { + *isResolved = false; + return MeasureUnit(); + } // binary search for (unitOffset, perUnitOffset) int32_t start = 0; @@ -2278,12 +2287,18 @@ void MeasureUnit::initCurrency(const char *isoCurrency) { fTypeId = result; result = binarySearch( gSubTypes, gOffsets[fTypeId], gOffsets[fTypeId + 1], isoCurrency); - if (result != -1) { - fSubTypeId = result - gOffsets[fTypeId]; - } else { - uprv_strncpy(fCurrency, isoCurrency, UPRV_LENGTHOF(fCurrency)); - fCurrency[3] = 0; + if (result == -1) { + fId = uprv_strdup(isoCurrency); + if (fId) { + fSubTypeId = -1; + return; + } + // malloc error: fall back to the undefined currency + result = binarySearch( + gSubTypes, gOffsets[fTypeId], gOffsets[fTypeId + 1], "XXX"); + U_ASSERT(result != -1); } + fSubTypeId = result - gOffsets[fTypeId]; } void MeasureUnit::initNoUnit(const char *subtype) { @@ -2298,10 +2313,14 @@ void MeasureUnit::initNoUnit(const char *subtype) { void MeasureUnit::setTo(int32_t typeId, int32_t subTypeId) { fTypeId = typeId; fSubTypeId = subTypeId; - fCurrency[0] = 0; + uprv_free(fId); + fId = nullptr; } int32_t MeasureUnit::getOffset() const { + if (fTypeId < 0 || fSubTypeId < 0) { + return -1; + } return gOffsets[fTypeId] + fSubTypeId; } diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index cca944cc8ff..16790f8a3d9 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -3697,8 +3697,10 @@ class U_I18N_API MeasureUnit: public UObject { private: - const char* fId; - char fCurrency[4]; + // If non-null, fId is owned by the MeasureUnit. + char* fId; + + // These two ints are indices into static string lists in measunit.cpp int16_t fSubTypeId; int8_t fTypeId; diff --git a/icu4c/source/test/intltest/numfmtst.cpp b/icu4c/source/test/intltest/numfmtst.cpp index 8b0f7e0a77b..b1b032a05a3 100644 --- a/icu4c/source/test/intltest/numfmtst.cpp +++ b/icu4c/source/test/intltest/numfmtst.cpp @@ -2164,6 +2164,8 @@ void NumberFormatTest::TestCurrencyUnit(void){ static const UChar BAD2[] = u"??A"; static const UChar XXX[] = u"XXX"; static const char XXX8[] = "XXX"; + static const UChar XYZ[] = u"XYZ"; + static const char XYZ8[] = "XYZ"; static const UChar INV[] = u"{$%"; static const char INV8[] = "{$%"; static const UChar ZZZ[] = u"zz"; @@ -2180,10 +2182,16 @@ void NumberFormatTest::TestCurrencyUnit(void){ CurrencyUnit cu(USD, ec); assertSuccess("CurrencyUnit", ec); - assertEquals("getISOCurrency()", USD, cu.getISOCurrency()); assertEquals("getSubtype()", USD8, cu.getSubtype()); + // Test XYZ, a valid but non-standard currency. + // Note: Country code XY is private-use, so XYZ should remain unallocated. + CurrencyUnit extended(XYZ, ec); + assertSuccess("non-standard", ec); + assertEquals("non-standard", XYZ, extended.getISOCurrency()); + assertEquals("non-standard", XYZ8, extended.getSubtype()); + CurrencyUnit inv(INV, ec); assertEquals("non-invariant", U_INVARIANT_CONVERSION_ERROR, ec); assertEquals("non-invariant", XXX, inv.getISOCurrency()); @@ -2257,15 +2265,20 @@ void NumberFormatTest::TestCurrencyUnit(void){ // Test slicing MeasureUnit sliced1 = cu; MeasureUnit sliced2 = cu; + MeasureUnit sliced3 = extended; assertEquals("Subtype after slicing 1", USD8, sliced1.getSubtype()); assertEquals("Subtype after slicing 2", USD8, sliced2.getSubtype()); + assertEquals("Subtype after slicing 3", XYZ8, sliced3.getSubtype()); CurrencyUnit restored1(sliced1, ec); CurrencyUnit restored2(sliced2, ec); + CurrencyUnit restored3(sliced3, ec); assertSuccess("Restoring from MeasureUnit", ec); assertEquals("Subtype after restoring 1", USD8, restored1.getSubtype()); assertEquals("Subtype after restoring 2", USD8, restored2.getSubtype()); + assertEquals("Subtype after restoring 3", XYZ8, restored3.getSubtype()); assertEquals("ISO Code after restoring 1", USD, restored1.getISOCurrency()); assertEquals("ISO Code after restoring 2", USD, restored2.getISOCurrency()); + assertEquals("ISO Code after restoring 3", XYZ, restored3.getISOCurrency()); // Test copy constructor failure LocalPointer meter(MeasureUnit::createMeter(ec)); From 9a6caa01afeb79180c8ce3bf1ad9f3dcbd20cd4d Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Tue, 14 Jan 2020 16:47:18 +0100 Subject: [PATCH 03/13] Adding measunit_extra.cpp with basic trie --- icu4c/source/i18n/Makefile.in | 3 +- icu4c/source/i18n/i18n.vcxproj | 1 + icu4c/source/i18n/i18n.vcxproj.filters | 3 + icu4c/source/i18n/i18n_uwp.vcxproj | 1 + icu4c/source/i18n/measunit_extra.cpp | 231 ++++++++++++++++++++ icu4c/source/i18n/ucln_in.h | 1 + icu4c/source/i18n/unicode/measunit.h | 1 - icu4c/source/test/depstest/dependencies.txt | 6 + 8 files changed, 245 insertions(+), 2 deletions(-) create mode 100644 icu4c/source/i18n/measunit_extra.cpp diff --git a/icu4c/source/i18n/Makefile.in b/icu4c/source/i18n/Makefile.in index f22c20eabca..f6f9860fb3a 100644 --- a/icu4c/source/i18n/Makefile.in +++ b/icu4c/source/i18n/Makefile.in @@ -114,7 +114,8 @@ numparse_symbols.o numparse_decimal.o numparse_scientific.o numparse_currency.o numparse_affixes.o numparse_compositions.o numparse_validators.o \ numrange_fluent.o numrange_impl.o \ erarules.o \ -formattedvalue.o formattedval_iterimpl.o formattedval_sbimpl.o formatted_string_builder.o +formattedvalue.o formattedval_iterimpl.o formattedval_sbimpl.o formatted_string_builder.o \ +measunit_extra.o ## Header files to install HEADERS = $(srcdir)/unicode/*.h diff --git a/icu4c/source/i18n/i18n.vcxproj b/icu4c/source/i18n/i18n.vcxproj index 5c6760d220e..f22fd2ff097 100644 --- a/icu4c/source/i18n/i18n.vcxproj +++ b/icu4c/source/i18n/i18n.vcxproj @@ -187,6 +187,7 @@ + diff --git a/icu4c/source/i18n/i18n.vcxproj.filters b/icu4c/source/i18n/i18n.vcxproj.filters index a1813fc0696..eade0d07354 100644 --- a/icu4c/source/i18n/i18n.vcxproj.filters +++ b/icu4c/source/i18n/i18n.vcxproj.filters @@ -204,6 +204,9 @@ formatting + + formatting + formatting diff --git a/icu4c/source/i18n/i18n_uwp.vcxproj b/icu4c/source/i18n/i18n_uwp.vcxproj index 989cef88fe5..2466ad95864 100644 --- a/icu4c/source/i18n/i18n_uwp.vcxproj +++ b/icu4c/source/i18n/i18n_uwp.vcxproj @@ -408,6 +408,7 @@ + diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp new file mode 100644 index 00000000000..d84e50af350 --- /dev/null +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -0,0 +1,231 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// Extra functions for MeasureUnit not needed for all clients. +// Separate .o file so that it can be removed for modularity. + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +// Allow implicit conversion from char16_t* to UnicodeString for this file: +// Helpful in toString methods and elsewhere. +#define UNISTR_FROM_STRING_EXPLICIT + +#include "cstring.h" +#include "ucln_in.h" +#include "umutex.h" +#include "unicode/errorcode.h" +#include "unicode/measunit.h" +#include "unicode/ucharstrie.h" +#include "unicode/ucharstriebuilder.h" + +U_NAMESPACE_BEGIN + + +namespace { + +// This is to ensure we only insert positive integers into the trie +constexpr int32_t kSIPrefixOffset = 64; + +constexpr int32_t kSyntaxPartOffset = 256; + +enum SyntaxPart { + SYNTAX_PART_PER = kSyntaxPartOffset, + SYNTAX_PART_SQUARE, + SYNTAX_PART_CUBIC, + SYNTAX_PART_P1, + SYNTAX_PART_P2, + SYNTAX_PART_P3, + SYNTAX_PART_P4, + SYNTAX_PART_P5, + SYNTAX_PART_P6, + SYNTAX_PART_P7, + SYNTAX_PART_P8, + SYNTAX_PART_P9, +}; + +constexpr int32_t kSimpleUnitOffset = 512; + +// FIXME: Get this list from data +const char16_t* gSimpleUnits[] = { + u"100kilometer", + u"acre", + u"ampere", + u"arc-minute", + u"arc-second", + u"astronomical-unit", + u"atmosphere", + u"bar", + u"barrel", + u"bit", + u"british-thermal-unit", + u"bushel", + u"byte", + u"calorie", + u"carat", + u"celsius", + u"century", + u"cup", + u"cup-metric", + u"dalton", + u"day", + u"day-person", + u"decade", + u"degree", + u"dot", // (as in "dot-per-inch") + u"dunam", + u"earth-mass", + u"electronvolt", + u"em", + u"fahrenheit", + u"fathom", + u"fluid-ounce", + u"fluid-ounce-imperial", + u"foodcalorie", + u"foot", + u"furlong", + u"g-force", + u"gallon", + u"gallon-imperial", + u"generic", // (i.e., "temperature-generic") + u"gram", + u"hectare", // (note: other "are" derivatives are uncommon) + u"hertz", + u"horsepower", + u"hour", + u"inch", + u"inch-hg", + u"joule", + u"karat", + u"kelvin", + u"knot", + u"light-year", + u"liter", + u"lux", + u"meter", + u"meter-of-mercury", // (not "millimeter-of-mercury") + u"metric-ton", + u"mile", + u"mile-scandinavian", + u"minute", + u"mole", + u"month", + u"month-person", + u"nautical-mile", + u"newton", + u"ohm", + u"ounce", + u"ounce-troy", + u"parsec", + u"pascal", + u"percent", + u"permille", + u"permillion", + u"permyriad", + u"pint", + u"pint-metric", + u"pixel", + u"point", + u"pound", + u"pound-force", + u"quart", + u"radian", + u"revolution", + u"second", + u"solar-luminosity", + u"solar-mass", + u"solar-radius", + u"stone", + u"tablespoon", + u"teaspoon", + u"therm-us", + u"ton", + u"volt", + u"watt", + u"week", + u"week-person", + u"yard", + u"year", + u"year-person", +}; + +icu::UInitOnce gUnitExtrasInitOnce = U_INITONCE_INITIALIZER; + +char16_t* kSerializedUnitExtrasStemTrie = nullptr; + +UBool U_CALLCONV cleanupUnitExtras() { + uprv_free(kSerializedUnitExtrasStemTrie); + kSerializedUnitExtrasStemTrie = nullptr; + gUnitExtrasInitOnce.reset(); + return TRUE; +} + +void U_CALLCONV initUnitExtras(UErrorCode& status) { + ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras); + + UCharsTrieBuilder b(status); + if (U_FAILURE(status)) { return; } + + // Add SI prefixes + b.add(u"yotta", kSIPrefixOffset + UMEASURE_SI_PREFIX_YOTTA, status); + b.add(u"zetta", kSIPrefixOffset + UMEASURE_SI_PREFIX_ZETTA, status); + b.add(u"exa", kSIPrefixOffset + UMEASURE_SI_PREFIX_EXA, status); + b.add(u"peta", kSIPrefixOffset + UMEASURE_SI_PREFIX_PETA, status); + b.add(u"tera", kSIPrefixOffset + UMEASURE_SI_PREFIX_TERA, status); + b.add(u"giga", kSIPrefixOffset + UMEASURE_SI_PREFIX_GIGA, status); + b.add(u"mega", kSIPrefixOffset + UMEASURE_SI_PREFIX_MEGA, status); + b.add(u"kilo", kSIPrefixOffset + UMEASURE_SI_PREFIX_KILO, status); + b.add(u"hecto", kSIPrefixOffset + UMEASURE_SI_PREFIX_HECTO, status); + b.add(u"deka", kSIPrefixOffset + UMEASURE_SI_PREFIX_DEKA, status); + b.add(u"deci", kSIPrefixOffset + UMEASURE_SI_PREFIX_DECI, status); + b.add(u"centi", kSIPrefixOffset + UMEASURE_SI_PREFIX_CENTI, status); + b.add(u"milli", kSIPrefixOffset + UMEASURE_SI_PREFIX_MILLI, status); + b.add(u"micro", kSIPrefixOffset + UMEASURE_SI_PREFIX_MICRO, status); + b.add(u"nano", kSIPrefixOffset + UMEASURE_SI_PREFIX_NANO, status); + b.add(u"pico", kSIPrefixOffset + UMEASURE_SI_PREFIX_PICO, status); + b.add(u"femto", kSIPrefixOffset + UMEASURE_SI_PREFIX_FEMTO, status); + b.add(u"atto", kSIPrefixOffset + UMEASURE_SI_PREFIX_ATTO, status); + b.add(u"zepto", kSIPrefixOffset + UMEASURE_SI_PREFIX_ZEPTO, status); + b.add(u"yocto", kSIPrefixOffset + UMEASURE_SI_PREFIX_YOCTO, status); + if (U_FAILURE(status)) { return; } + + // Add syntax parts (per, power prefixes) + b.add(u"-per-", SYNTAX_PART_PER, status); + b.add(u"square-", SYNTAX_PART_SQUARE, status); + b.add(u"cubic-", SYNTAX_PART_CUBIC, status); + b.add(u"p1", SYNTAX_PART_P1, status); + b.add(u"p2", SYNTAX_PART_P2, status); + b.add(u"p3", SYNTAX_PART_P3, status); + b.add(u"p4", SYNTAX_PART_P4, status); + b.add(u"p5", SYNTAX_PART_P5, status); + b.add(u"p6", SYNTAX_PART_P6, status); + b.add(u"p7", SYNTAX_PART_P7, status); + b.add(u"p8", SYNTAX_PART_P8, status); + b.add(u"p9", SYNTAX_PART_P9, status); + if (U_FAILURE(status)) { return; } + + // Add sanctioned simple units by offset + int32_t simpleUnitOffset = kSimpleUnitOffset; + for (auto simpleUnit : gSimpleUnits) { + b.add(simpleUnit, simpleUnitOffset++, status); + } + + // Build the CharsTrie + // TODO: Use SLOW or FAST here? + UnicodeString result; + b.buildUnicodeString(USTRINGTRIE_BUILD_FAST, result, status); + if (U_FAILURE(status)) { return; } + + // Copy the result into the global constant pointer + size_t numBytes = result.length() * sizeof(char16_t); + kSerializedUnitExtrasStemTrie = static_cast(uprv_malloc(numBytes)); + uprv_memcpy(kSerializedUnitExtrasStemTrie, result.getBuffer(), numBytes); +} + +} // namespace + + +U_NAMESPACE_END + +#endif /* !UNCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/ucln_in.h b/icu4c/source/i18n/ucln_in.h index 2f70a8500e1..765cdd559fb 100644 --- a/icu4c/source/i18n/ucln_in.h +++ b/icu4c/source/i18n/ucln_in.h @@ -26,6 +26,7 @@ as the functions are suppose to be called. It's usually best to have child dependencies called first. */ typedef enum ECleanupI18NType { UCLN_I18N_START = -1, + UCLN_I18N_UNIT_EXTRAS, UCLN_I18N_NUMBER_SKELETONS, UCLN_I18N_CURRENCY_SPACING, UCLN_I18N_SPOOF, diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index 16790f8a3d9..1c535540467 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -29,7 +29,6 @@ U_NAMESPACE_BEGIN class StringEnumeration; -class MeasureUnitFields; /** * Enumeration for SI prefixes, such as "kilo". diff --git a/icu4c/source/test/depstest/dependencies.txt b/icu4c/source/test/depstest/dependencies.txt index 1d726b6ea32..080beeff637 100644 --- a/icu4c/source/test/depstest/dependencies.txt +++ b/icu4c/source/test/depstest/dependencies.txt @@ -870,6 +870,7 @@ library: i18n listformatter formatting formattable_cnv regex regex_cnv translit double_conversion number_representation number_output numberformatter numberparser + units_extra universal_time_scale uclean_i18n @@ -1053,6 +1054,11 @@ group: sharedbreakiterator deps breakiterator +group: units_extra + measunit_extra.o + deps + units + group: units measunit.o currunit.o nounit.o deps From fb1129a188bb663737a747360c24a332a175f206 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Tue, 14 Jan 2020 23:25:36 +0100 Subject: [PATCH 04/13] Additional WIP --- icu4c/source/i18n/measunit.cpp | 4 + icu4c/source/i18n/measunit_extra.cpp | 374 +++++++++++++++++++++++---- icu4c/source/i18n/unicode/measunit.h | 1 + 3 files changed, 331 insertions(+), 48 deletions(-) diff --git a/icu4c/source/i18n/measunit.cpp b/icu4c/source/i18n/measunit.cpp index 43811a5331d..22f877a95b8 100644 --- a/icu4c/source/i18n/measunit.cpp +++ b/icu4c/source/i18n/measunit.cpp @@ -2025,6 +2025,10 @@ MeasureUnit::MeasureUnit(MeasureUnit &&other) noexcept other.fId = nullptr; } +MeasureUnit::MeasureUnit(char* idToAdopt) + : fId(idToAdopt), fSubTypeId(-1), fTypeId(-1) { +} + MeasureUnit &MeasureUnit::operator=(const MeasureUnit &other) { if (this == &other) { return *this; diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index d84e50af350..379587a8778 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -13,6 +13,7 @@ #define UNISTR_FROM_STRING_EXPLICIT #include "cstring.h" +#include "uassert.h" #include "ucln_in.h" #include "umutex.h" #include "unicode/errorcode.h" @@ -20,6 +21,8 @@ #include "unicode/ucharstrie.h" #include "unicode/ucharstriebuilder.h" +#include "cstr.h" + U_NAMESPACE_BEGIN @@ -28,27 +31,64 @@ namespace { // This is to ensure we only insert positive integers into the trie constexpr int32_t kSIPrefixOffset = 64; -constexpr int32_t kSyntaxPartOffset = 256; +constexpr int32_t kCompoundPartOffset = 128; -enum SyntaxPart { - SYNTAX_PART_PER = kSyntaxPartOffset, - SYNTAX_PART_SQUARE, - SYNTAX_PART_CUBIC, - SYNTAX_PART_P1, - SYNTAX_PART_P2, - SYNTAX_PART_P3, - SYNTAX_PART_P4, - SYNTAX_PART_P5, - SYNTAX_PART_P6, - SYNTAX_PART_P7, - SYNTAX_PART_P8, - SYNTAX_PART_P9, +enum CompoundPart { + COMPOUND_PART_PER = kCompoundPartOffset, + COMPOUND_PART_TIMES, + COMPOUND_PART_ONE_PER, + COMPOUND_PART_PLUS, +}; + +constexpr int32_t kPowerPartOffset = 256; + +enum PowerPart { + POWER_PART_P2 = kPowerPartOffset + 2, + POWER_PART_P3, + POWER_PART_P4, + POWER_PART_P5, + POWER_PART_P6, + POWER_PART_P7, + POWER_PART_P8, + POWER_PART_P9, + POWER_PART_P10, + POWER_PART_P11, + POWER_PART_P12, + POWER_PART_P13, + POWER_PART_P14, + POWER_PART_P15, }; constexpr int32_t kSimpleUnitOffset = 512; +const struct SIPrefixStrings { + const char* const string; + UMeasureSIPrefix value; +} gSIPrefixStrings[] = { + { "yotta", UMEASURE_SI_PREFIX_YOTTA }, + { "zetta", UMEASURE_SI_PREFIX_ZETTA }, + { "exa", UMEASURE_SI_PREFIX_EXA }, + { "peta", UMEASURE_SI_PREFIX_PETA }, + { "tera", UMEASURE_SI_PREFIX_TERA }, + { "giga", UMEASURE_SI_PREFIX_GIGA }, + { "mega", UMEASURE_SI_PREFIX_MEGA }, + { "kilo", UMEASURE_SI_PREFIX_KILO }, + { "hecto", UMEASURE_SI_PREFIX_HECTO }, + { "deka", UMEASURE_SI_PREFIX_DEKA }, + { "deci", UMEASURE_SI_PREFIX_DECI }, + { "centi", UMEASURE_SI_PREFIX_CENTI }, + { "milli", UMEASURE_SI_PREFIX_MILLI }, + { "micro", UMEASURE_SI_PREFIX_MICRO }, + { "nano", UMEASURE_SI_PREFIX_NANO }, + { "pico", UMEASURE_SI_PREFIX_PICO }, + { "femto", UMEASURE_SI_PREFIX_FEMTO }, + { "atto", UMEASURE_SI_PREFIX_ATTO }, + { "zepto", UMEASURE_SI_PREFIX_ZEPTO }, + { "yocto", UMEASURE_SI_PREFIX_YOCTO }, +}; + // FIXME: Get this list from data -const char16_t* gSimpleUnits[] = { +const char16_t* const gSimpleUnits[] = { u"100kilometer", u"acre", u"ampere", @@ -168,41 +208,33 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) { if (U_FAILURE(status)) { return; } // Add SI prefixes - b.add(u"yotta", kSIPrefixOffset + UMEASURE_SI_PREFIX_YOTTA, status); - b.add(u"zetta", kSIPrefixOffset + UMEASURE_SI_PREFIX_ZETTA, status); - b.add(u"exa", kSIPrefixOffset + UMEASURE_SI_PREFIX_EXA, status); - b.add(u"peta", kSIPrefixOffset + UMEASURE_SI_PREFIX_PETA, status); - b.add(u"tera", kSIPrefixOffset + UMEASURE_SI_PREFIX_TERA, status); - b.add(u"giga", kSIPrefixOffset + UMEASURE_SI_PREFIX_GIGA, status); - b.add(u"mega", kSIPrefixOffset + UMEASURE_SI_PREFIX_MEGA, status); - b.add(u"kilo", kSIPrefixOffset + UMEASURE_SI_PREFIX_KILO, status); - b.add(u"hecto", kSIPrefixOffset + UMEASURE_SI_PREFIX_HECTO, status); - b.add(u"deka", kSIPrefixOffset + UMEASURE_SI_PREFIX_DEKA, status); - b.add(u"deci", kSIPrefixOffset + UMEASURE_SI_PREFIX_DECI, status); - b.add(u"centi", kSIPrefixOffset + UMEASURE_SI_PREFIX_CENTI, status); - b.add(u"milli", kSIPrefixOffset + UMEASURE_SI_PREFIX_MILLI, status); - b.add(u"micro", kSIPrefixOffset + UMEASURE_SI_PREFIX_MICRO, status); - b.add(u"nano", kSIPrefixOffset + UMEASURE_SI_PREFIX_NANO, status); - b.add(u"pico", kSIPrefixOffset + UMEASURE_SI_PREFIX_PICO, status); - b.add(u"femto", kSIPrefixOffset + UMEASURE_SI_PREFIX_FEMTO, status); - b.add(u"atto", kSIPrefixOffset + UMEASURE_SI_PREFIX_ATTO, status); - b.add(u"zepto", kSIPrefixOffset + UMEASURE_SI_PREFIX_ZEPTO, status); - b.add(u"yocto", kSIPrefixOffset + UMEASURE_SI_PREFIX_YOCTO, status); + for (const auto& siPrefixInfo : gSIPrefixStrings) { + UnicodeString uSIPrefix(siPrefixInfo.string, -1, US_INV); + b.add(uSIPrefix, siPrefixInfo.value + kSIPrefixOffset, status); + } if (U_FAILURE(status)) { return; } - // Add syntax parts (per, power prefixes) - b.add(u"-per-", SYNTAX_PART_PER, status); - b.add(u"square-", SYNTAX_PART_SQUARE, status); - b.add(u"cubic-", SYNTAX_PART_CUBIC, status); - b.add(u"p1", SYNTAX_PART_P1, status); - b.add(u"p2", SYNTAX_PART_P2, status); - b.add(u"p3", SYNTAX_PART_P3, status); - b.add(u"p4", SYNTAX_PART_P4, status); - b.add(u"p5", SYNTAX_PART_P5, status); - b.add(u"p6", SYNTAX_PART_P6, status); - b.add(u"p7", SYNTAX_PART_P7, status); - b.add(u"p8", SYNTAX_PART_P8, status); - b.add(u"p9", SYNTAX_PART_P9, status); + // Add syntax parts (compound, power prefixes) + b.add(u"-per-", COMPOUND_PART_PER, status); + b.add(u"-", COMPOUND_PART_TIMES, status); + b.add(u"one-per-", COMPOUND_PART_ONE_PER, status); + b.add(u"+", COMPOUND_PART_PLUS, status); + b.add(u"square-", POWER_PART_P2, status); + b.add(u"cubic-", POWER_PART_P3, status); + b.add(u"p2-", POWER_PART_P2, status); + b.add(u"p3-", POWER_PART_P3, status); + b.add(u"p4-", POWER_PART_P4, status); + b.add(u"p5-", POWER_PART_P5, status); + b.add(u"p6-", POWER_PART_P6, status); + b.add(u"p7-", POWER_PART_P7, status); + b.add(u"p8-", POWER_PART_P8, status); + b.add(u"p9-", POWER_PART_P9, status); + b.add(u"p10-", POWER_PART_P10, status); + b.add(u"p11-", POWER_PART_P11, status); + b.add(u"p12-", POWER_PART_P12, status); + b.add(u"p13-", POWER_PART_P13, status); + b.add(u"p14-", POWER_PART_P14, status); + b.add(u"p15-", POWER_PART_P15, status); if (U_FAILURE(status)) { return; } // Add sanctioned simple units by offset @@ -223,9 +255,255 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) { uprv_memcpy(kSerializedUnitExtrasStemTrie, result.getBuffer(), numBytes); } +class UnitIdentifierParser { +public: + static UnitIdentifierParser from(StringPiece source, UErrorCode& status) { + umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status); + if (U_FAILURE(status)) { + return UnitIdentifierParser(); + } + return UnitIdentifierParser(source); + } + + int32_t nextToken(UErrorCode& status) { + fTrie.reset(); + int32_t match = -1; + int32_t previ = -1; + do { + fTrie.next(fSource.data()[fIndex++]); + if (fTrie.current() == USTRINGTRIE_NO_MATCH) { + break; + } else if (fTrie.current() == USTRINGTRIE_NO_VALUE) { + continue; + } else if (fTrie.current() == USTRINGTRIE_FINAL_VALUE) { + match = fTrie.getValue(); + previ = fIndex; + break; + } else if (fTrie.current() == USTRINGTRIE_INTERMEDIATE_VALUE) { + match = fTrie.getValue(); + previ = fIndex; + continue; + } else { + UPRV_UNREACHABLE; + } + } while (fIndex < fSource.length()); + + if (match < 0) { + // TODO: Make a new status code? + status = U_ILLEGAL_ARGUMENT_ERROR; + } else { + fIndex = previ; + } + return match; + } + + bool hasNext() const { + return fIndex < fSource.length(); + } + + int32_t currentIndex() const { + return fIndex; + } + +private: + int32_t fIndex = 0; + StringPiece fSource; + UCharsTrie fTrie; + + UnitIdentifierParser() : fSource(""), fTrie(u"") {} + + UnitIdentifierParser(StringPiece source) + : fSource(source), fTrie(kSerializedUnitExtrasStemTrie) {} +}; + } // namespace +MeasureUnit MeasureUnit::forIdentifier(const char* identifier, UErrorCode& status) { + UnitIdentifierParser parser = UnitIdentifierParser::from(identifier, status); + if (U_FAILURE(status)) { + // Unrecoverable error + return MeasureUnit(); + } + + while (parser.hasNext()) { + parser.nextToken(status); + if (U_FAILURE(status)) { + // Invalid syntax + return MeasureUnit(); + } + + // if (match < kCompoundPartOffset) { + // // SI Prefix + // auto prefix = static_cast(match - kSIPrefixOffset); + // } else if (match < kPowerPartOffset) { + // // Compound part + // const char* operation = (match == COMPOUND_PART_PER) ? "per" : "times/plus"; + // } else if (match < kSimpleUnitOffset) { + // // Power part + // int32_t power = match - kPowerPartOffset; + // } else { + // // Simple unit + // const char16_t* simpleUnit = gSimpleUnits[match - kSimpleUnitOffset]; + // } + } + + // Success + return MeasureUnit(uprv_strdup(identifier)); +} + +UMeasureSIPrefix MeasureUnit::getSIPrefix() const { + ErrorCode status; + const char* id = toString(); + UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); + if (status.isFailure()) { + // Unrecoverable error + return UMEASURE_SI_PREFIX_ONE; + } + + int32_t match = parser.nextToken(status); + if (status.isFailure()) { + // Invalid syntax + return UMEASURE_SI_PREFIX_ONE; + } + + if (match >= kPowerPartOffset && match < kSimpleUnitOffset) { + // Skip the power part + match = parser.nextToken(status); + if (status.isFailure()) { + // Invalid syntax + return UMEASURE_SI_PREFIX_ONE; + } + } + + if (match >= kCompoundPartOffset) { + // No SI prefix + return UMEASURE_SI_PREFIX_ONE; + } + + return static_cast(match - kSIPrefixOffset); +} + +MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix) const { + ErrorCode status; + const char* id = toString(); + UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); + if (status.isFailure()) { + // Unrecoverable error + return *this; + } + + int32_t match = parser.nextToken(status); + if (status.isFailure()) { + // Invalid syntax + return *this; + } + + CharString builder; + int32_t unitStart = 0; + if (match >= kPowerPartOffset && match < kSimpleUnitOffset) { + // Skip the power part + unitStart = parser.currentIndex(); + builder.append(id, unitStart, status); + match = parser.nextToken(status); + } + + // Append the new SI prefix + for (const auto& siPrefixInfo : gSIPrefixStrings) { + if (siPrefixInfo.value == prefix) { + builder.append(siPrefixInfo.string, status); + break; + } + } + + if (match < kCompoundPartOffset) { + // Remove the old SI prefix + unitStart = parser.currentIndex(); + } + builder.append(id + unitStart, status); + if (status.isFailure()) { + // Unrecoverable error + return *this; + } + + return MeasureUnit(builder.cloneData(status)); +} + +int8_t MeasureUnit::getPower() const { + ErrorCode status; + const char* id = toString(); + UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); + if (status.isFailure()) { + // Unrecoverable error + return 0; + } + + int32_t match = parser.nextToken(status); + if (status.isFailure()) { + // Invalid syntax + return 0; + } + + if (match < kPowerPartOffset || match >= kSimpleUnitOffset) { + // No power part + return 0; + } + + return static_cast(match - kPowerPartOffset); +} + +MeasureUnit MeasureUnit::withPower(int8_t power) const { + if (power < 0) { + // Don't know how to handle this yet + U_ASSERT(FALSE); + } + + ErrorCode status; + const char* id = toString(); + UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); + if (status.isFailure()) { + // Unrecoverable error + return *this; + } + + int32_t match = parser.nextToken(status); + if (status.isFailure()) { + // Invalid syntax + return *this; + } + + // Append the new power + CharString builder; + if (power == 2) { + builder.append("square-", status); + } else if (power == 3) { + builder.append("cubic-", status); + } else if (power < 10) { + builder.append('p', status); + builder.append(power + '0', status); + builder.append('-', status); + } else { + builder.append("p1", status); + builder.append('0' + (power % 10), status); + builder.append('-', status); + } + + if (match < kCompoundPartOffset) { + // Remove the old power + builder.append(id + parser.currentIndex(), status); + } else { + // Append the whole identifier + builder.append(id, status); + } + if (status.isFailure()) { + // Unrecoverable error + return *this; + } + + return MeasureUnit(builder.cloneData(status)); +} + + U_NAMESPACE_END #endif /* !UNCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index 1c535540467..466460b02a0 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -3704,6 +3704,7 @@ private: int8_t fTypeId; MeasureUnit(int32_t typeId, int32_t subTypeId); + MeasureUnit(char* idToAdopt); void setTo(int32_t typeId, int32_t subTypeId); int32_t getOffset() const; static MeasureUnit *create(int typeId, int subTypeId, UErrorCode &status); From 6161b508a872031d29c4613e747c6fd65c846cbe Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Wed, 15 Jan 2020 15:17:12 +0100 Subject: [PATCH 05/13] Next checkpoint --- icu4c/source/i18n/measunit_extra.cpp | 323 ++++++++++++++++----------- 1 file changed, 193 insertions(+), 130 deletions(-) diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index 379587a8778..86453302b1e 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -36,7 +36,6 @@ constexpr int32_t kCompoundPartOffset = 128; enum CompoundPart { COMPOUND_PART_PER = kCompoundPartOffset, COMPOUND_PART_TIMES, - COMPOUND_PART_ONE_PER, COMPOUND_PART_PLUS, }; @@ -217,7 +216,6 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) { // Add syntax parts (compound, power prefixes) b.add(u"-per-", COMPOUND_PART_PER, status); b.add(u"-", COMPOUND_PART_TIMES, status); - b.add(u"one-per-", COMPOUND_PART_ONE_PER, status); b.add(u"+", COMPOUND_PART_PLUS, status); b.add(u"square-", POWER_PART_P2, status); b.add(u"cubic-", POWER_PART_P3, status); @@ -255,6 +253,98 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) { uprv_memcpy(kSerializedUnitExtrasStemTrie, result.getBuffer(), numBytes); } +class Token { +public: + Token(int32_t match) : fMatch(match) {} + + enum Type { + TYPE_UNDEFINED, + TYPE_SI_PREFIX, + TYPE_COMPOUND_PART, + TYPE_POWER_PART, + TYPE_SIMPLE_UNIT, + }; + + Type getType() const { + if (fMatch <= 0) { + UPRV_UNREACHABLE; + } else if (fMatch < kCompoundPartOffset) { + return TYPE_SI_PREFIX; + } else if (fMatch < kPowerPartOffset) { + return TYPE_COMPOUND_PART; + } else if (fMatch < kSimpleUnitOffset) { + return TYPE_POWER_PART; + } else { + return TYPE_SIMPLE_UNIT; + } + } + + UMeasureSIPrefix getSIPrefix() const { + U_ASSERT(getType() == TYPE_SI_PREFIX); + return static_cast(fMatch - kSIPrefixOffset); + } + + int32_t getMatch() const { + return fMatch; + } + + int8_t getPower() const { + return static_cast(fMatch - kPowerPartOffset); + } + + int32_t getSimpleUnitOffset() const { + return fMatch - kSimpleUnitOffset; + } + +private: + int32_t fMatch; +}; + +struct PowerUnit { + int8_t power = 0; + bool plus = false; + UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; + StringPiece id; + + void toString(CharString& builder, UErrorCode& status) { + if (power == 0) { + // no-op + } else if (power == 1) { + UPRV_UNREACHABLE; + } else if (power == 2) { + builder.append("square-", status); + } else if (power == 3) { + builder.append("cubic-", status); + } else if (power < 10) { + builder.append('p', status); + builder.append(power + '0', status); + builder.append('-', status); + } else { + U_ASSERT(power <= 15); + builder.append("p1", status); + builder.append('0' + (power % 10), status); + builder.append('-', status); + } + if (U_FAILURE(status)) { + return; + } + + if (siPrefix != UMEASURE_SI_PREFIX_ONE) { + for (const auto& siPrefixInfo : gSIPrefixStrings) { + if (siPrefixInfo.value == siPrefix) { + builder.append(siPrefixInfo.string, status); + break; + } + } + } + if (U_FAILURE(status)) { + return; + } + + builder.append(id, status); + } +}; + class UnitIdentifierParser { public: static UnitIdentifierParser from(StringPiece source, UErrorCode& status) { @@ -265,7 +355,7 @@ public: return UnitIdentifierParser(source); } - int32_t nextToken(UErrorCode& status) { + Token nextToken(UErrorCode& status) { fTrie.reset(); int32_t match = -1; int32_t previ = -1; @@ -294,13 +384,101 @@ public: } else { fIndex = previ; } - return match; + return Token(match); } bool hasNext() const { return fIndex < fSource.length(); } + PowerUnit nextUnit(UErrorCode& status) { + PowerUnit retval; + if (U_FAILURE(status)) { + return retval; + } + + // state: + // 0 = no tokens seen yet (will accept power, SI prefix, or simple unit) + // 1 = power token seen (will not accept another power token) + // 2 = SI prefix token seen (will not accept a power or SI prefix token) + int32_t state = 0; + int32_t previ = fIndex; + + // Maybe read a compound part + if (!fExpectingUnit) { + Token token = nextToken(status); + if (U_FAILURE(status)) { + return retval; + } + if (token.getType() != Token::TYPE_COMPOUND_PART) { + goto fail; + } + switch (token.getMatch()) { + case COMPOUND_PART_PER: + if (fAfterPer) { + goto fail; + } + fAfterPer = true; + break; + + case COMPOUND_PART_TIMES: + break; + + case COMPOUND_PART_PLUS: + retval.plus = true; + fAfterPer = false; + break; + } + previ = fIndex; + } + + // Read a unit + while (hasNext()) { + Token token = nextToken(status); + if (U_FAILURE(status)) { + return retval; + } + + switch (token.getType()) { + case Token::TYPE_POWER_PART: + if (state > 0) { + goto fail; + } + retval.power = token.getPower(); + if (fAfterPer) { + retval.power *= -1; + } + previ = fIndex; + state = 1; + break; + + case Token::TYPE_SI_PREFIX: + if (state > 1) { + goto fail; + } + retval.siPrefix = token.getSIPrefix(); + previ = fIndex; + state = 2; + break; + + case Token::TYPE_SIMPLE_UNIT: + if (state > 2) { + goto fail; + } + retval.id = fSource.substr(previ, fIndex - previ); + return retval; + + default: + goto fail; + } + } + + fail: + // TODO: Make a new status code? + status = U_ILLEGAL_ARGUMENT_ERROR; + return retval; + } + int32_t currentIndex() const { return fIndex; } @@ -310,6 +488,9 @@ private: StringPiece fSource; UCharsTrie fTrie; + bool fAfterPer = false; + bool fExpectingUnit = true; + UnitIdentifierParser() : fSource(""), fTrie(u"") {} UnitIdentifierParser(StringPiece source) @@ -332,20 +513,6 @@ MeasureUnit MeasureUnit::forIdentifier(const char* identifier, UErrorCode& statu // Invalid syntax return MeasureUnit(); } - - // if (match < kCompoundPartOffset) { - // // SI Prefix - // auto prefix = static_cast(match - kSIPrefixOffset); - // } else if (match < kPowerPartOffset) { - // // Compound part - // const char* operation = (match == COMPOUND_PART_PER) ? "per" : "times/plus"; - // } else if (match < kSimpleUnitOffset) { - // // Power part - // int32_t power = match - kPowerPartOffset; - // } else { - // // Simple unit - // const char16_t* simpleUnit = gSimpleUnits[match - kSimpleUnitOffset]; - // } } // Success @@ -355,101 +522,28 @@ MeasureUnit MeasureUnit::forIdentifier(const char* identifier, UErrorCode& statu UMeasureSIPrefix MeasureUnit::getSIPrefix() const { ErrorCode status; const char* id = toString(); - UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); - if (status.isFailure()) { - // Unrecoverable error - return UMEASURE_SI_PREFIX_ONE; - } - - int32_t match = parser.nextToken(status); - if (status.isFailure()) { - // Invalid syntax - return UMEASURE_SI_PREFIX_ONE; - } - - if (match >= kPowerPartOffset && match < kSimpleUnitOffset) { - // Skip the power part - match = parser.nextToken(status); - if (status.isFailure()) { - // Invalid syntax - return UMEASURE_SI_PREFIX_ONE; - } - } - - if (match >= kCompoundPartOffset) { - // No SI prefix - return UMEASURE_SI_PREFIX_ONE; - } - - return static_cast(match - kSIPrefixOffset); + return UnitIdentifierParser::from(id, status).nextUnit(status).siPrefix; } MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix) const { ErrorCode status; const char* id = toString(); UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); + PowerUnit powerUnit = parser.nextUnit(status); if (status.isFailure()) { - // Unrecoverable error - return *this; - } - - int32_t match = parser.nextToken(status); - if (status.isFailure()) { - // Invalid syntax return *this; } + powerUnit.siPrefix = prefix; CharString builder; - int32_t unitStart = 0; - if (match >= kPowerPartOffset && match < kSimpleUnitOffset) { - // Skip the power part - unitStart = parser.currentIndex(); - builder.append(id, unitStart, status); - match = parser.nextToken(status); - } - - // Append the new SI prefix - for (const auto& siPrefixInfo : gSIPrefixStrings) { - if (siPrefixInfo.value == prefix) { - builder.append(siPrefixInfo.string, status); - break; - } - } - - if (match < kCompoundPartOffset) { - // Remove the old SI prefix - unitStart = parser.currentIndex(); - } - builder.append(id + unitStart, status); - if (status.isFailure()) { - // Unrecoverable error - return *this; - } - + powerUnit.toString(builder, status); return MeasureUnit(builder.cloneData(status)); } int8_t MeasureUnit::getPower() const { ErrorCode status; const char* id = toString(); - UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); - if (status.isFailure()) { - // Unrecoverable error - return 0; - } - - int32_t match = parser.nextToken(status); - if (status.isFailure()) { - // Invalid syntax - return 0; - } - - if (match < kPowerPartOffset || match >= kSimpleUnitOffset) { - // No power part - return 0; - } - - return static_cast(match - kPowerPartOffset); + return UnitIdentifierParser::from(id, status).nextUnit(status).power; } MeasureUnit MeasureUnit::withPower(int8_t power) const { @@ -461,45 +555,14 @@ MeasureUnit MeasureUnit::withPower(int8_t power) const { ErrorCode status; const char* id = toString(); UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); + PowerUnit powerUnit = parser.nextUnit(status); if (status.isFailure()) { - // Unrecoverable error return *this; } - int32_t match = parser.nextToken(status); - if (status.isFailure()) { - // Invalid syntax - return *this; - } - - // Append the new power + powerUnit.power = power; CharString builder; - if (power == 2) { - builder.append("square-", status); - } else if (power == 3) { - builder.append("cubic-", status); - } else if (power < 10) { - builder.append('p', status); - builder.append(power + '0', status); - builder.append('-', status); - } else { - builder.append("p1", status); - builder.append('0' + (power % 10), status); - builder.append('-', status); - } - - if (match < kCompoundPartOffset) { - // Remove the old power - builder.append(id + parser.currentIndex(), status); - } else { - // Append the whole identifier - builder.append(id, status); - } - if (status.isFailure()) { - // Unrecoverable error - return *this; - } - + powerUnit.toString(builder, status); return MeasureUnit(builder.cloneData(status)); } From 127ea0a0255841da4900779dbf17212aedb049f0 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Wed, 15 Jan 2020 16:08:53 +0100 Subject: [PATCH 06/13] Checkpoint --- icu4c/source/i18n/measunit.cpp | 6 +- icu4c/source/i18n/measunit_extra.cpp | 183 ++++++++++++--------- icu4c/source/i18n/unicode/measunit.h | 36 ++-- icu4c/source/test/intltest/measfmttest.cpp | 99 ++++++----- 4 files changed, 183 insertions(+), 141 deletions(-) diff --git a/icu4c/source/i18n/measunit.cpp b/icu4c/source/i18n/measunit.cpp index 22f877a95b8..c575c53745b 100644 --- a/icu4c/source/i18n/measunit.cpp +++ b/icu4c/source/i18n/measunit.cpp @@ -2083,10 +2083,10 @@ const char *MeasureUnit::getSubtype() const { if (fTypeId == -1) { return ""; } - return toString(); + return getIdentifier(); } -const char *MeasureUnit::toString() const { +const char *MeasureUnit::getIdentifier() const { return fId ? fId : gSubTypes[getOffset()]; } @@ -2098,7 +2098,7 @@ UBool MeasureUnit::operator==(const UObject& other) const { return FALSE; } const MeasureUnit &rhs = static_cast(other); - return uprv_strcmp(toString(), rhs.toString()) == 0; + return uprv_strcmp(getIdentifier(), rhs.getIdentifier()) == 0; } int32_t MeasureUnit::getIndex() const { diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index 86453302b1e..83257fbcf56 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -88,6 +88,7 @@ const struct SIPrefixStrings { // FIXME: Get this list from data const char16_t* const gSimpleUnits[] = { + u"one", // note: expected to be index 0 u"100kilometer", u"acre", u"ampere", @@ -262,6 +263,7 @@ public: TYPE_SI_PREFIX, TYPE_COMPOUND_PART, TYPE_POWER_PART, + TYPE_ONE, TYPE_SIMPLE_UNIT, }; @@ -274,6 +276,8 @@ public: return TYPE_COMPOUND_PART; } else if (fMatch < kSimpleUnitOffset) { return TYPE_POWER_PART; + } else if (fMatch == kSimpleUnitOffset) { + return TYPE_ONE; } else { return TYPE_SIMPLE_UNIT; } @@ -285,14 +289,17 @@ public: } int32_t getMatch() const { + U_ASSERT(getType() == TYPE_COMPOUND_PART); return fMatch; } int8_t getPower() const { + U_ASSERT(getType() == TYPE_POWER_PART); return static_cast(fMatch - kPowerPartOffset); } int32_t getSimpleUnitOffset() const { + U_ASSERT(getType() == TYPE_SIMPLE_UNIT); return fMatch - kSimpleUnitOffset; } @@ -302,11 +309,10 @@ private: struct PowerUnit { int8_t power = 0; - bool plus = false; UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; StringPiece id; - void toString(CharString& builder, UErrorCode& status) { + void appendTo(CharString& builder, UErrorCode& status) { if (power == 0) { // no-op } else if (power == 1) { @@ -348,50 +354,22 @@ struct PowerUnit { class UnitIdentifierParser { public: static UnitIdentifierParser from(StringPiece source, UErrorCode& status) { + if (U_FAILURE(status)) { + return UnitIdentifierParser(); + } umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status); if (U_FAILURE(status)) { return UnitIdentifierParser(); } return UnitIdentifierParser(source); } - - Token nextToken(UErrorCode& status) { - fTrie.reset(); - int32_t match = -1; - int32_t previ = -1; - do { - fTrie.next(fSource.data()[fIndex++]); - if (fTrie.current() == USTRINGTRIE_NO_MATCH) { - break; - } else if (fTrie.current() == USTRINGTRIE_NO_VALUE) { - continue; - } else if (fTrie.current() == USTRINGTRIE_FINAL_VALUE) { - match = fTrie.getValue(); - previ = fIndex; - break; - } else if (fTrie.current() == USTRINGTRIE_INTERMEDIATE_VALUE) { - match = fTrie.getValue(); - previ = fIndex; - continue; - } else { - UPRV_UNREACHABLE; - } - } while (fIndex < fSource.length()); - - if (match < 0) { - // TODO: Make a new status code? - status = U_ILLEGAL_ARGUMENT_ERROR; - } else { - fIndex = previ; - } - return Token(match); - } bool hasNext() const { return fIndex < fSource.length(); } - PowerUnit nextUnit(UErrorCode& status) { + PowerUnit nextUnit(bool& sawPlus, UErrorCode& status) { + sawPlus = false; PowerUnit retval; if (U_FAILURE(status)) { return retval; @@ -405,7 +383,7 @@ public: int32_t previ = fIndex; // Maybe read a compound part - if (!fExpectingUnit) { + if (fIndex != 0) { Token token = nextToken(status); if (U_FAILURE(status)) { return retval; @@ -425,7 +403,7 @@ public: break; case COMPOUND_PART_PLUS: - retval.plus = true; + sawPlus = true; fAfterPer = false; break; } @@ -461,10 +439,11 @@ public: state = 2; break; + case Token::TYPE_ONE: + // Skip "one" and go to the next unit + return nextUnit(sawPlus, status); + case Token::TYPE_SIMPLE_UNIT: - if (state > 2) { - goto fail; - } retval.id = fSource.substr(previ, fIndex - previ); return retval; @@ -473,14 +452,24 @@ public: } } - fail: - // TODO: Make a new status code? - status = U_ILLEGAL_ARGUMENT_ERROR; - return retval; + fail: + // TODO: Make a new status code? + status = U_ILLEGAL_ARGUMENT_ERROR; + return retval; } - int32_t currentIndex() const { - return fIndex; + PowerUnit getOnlyUnit(UErrorCode& status) { + bool sawPlus; + PowerUnit retval = nextUnit(sawPlus, status); + if (U_FAILURE(status)) { + return retval; + } + if (sawPlus || hasNext()) { + // Expected to find only one unit in the string + status = U_ILLEGAL_ARGUMENT_ERROR; + return retval; + } + return retval; } private: @@ -489,12 +478,43 @@ private: UCharsTrie fTrie; bool fAfterPer = false; - bool fExpectingUnit = true; UnitIdentifierParser() : fSource(""), fTrie(u"") {} UnitIdentifierParser(StringPiece source) : fSource(source), fTrie(kSerializedUnitExtrasStemTrie) {} + + Token nextToken(UErrorCode& status) { + fTrie.reset(); + int32_t match = -1; + int32_t previ = -1; + do { + fTrie.next(fSource.data()[fIndex++]); + if (fTrie.current() == USTRINGTRIE_NO_MATCH) { + break; + } else if (fTrie.current() == USTRINGTRIE_NO_VALUE) { + continue; + } else if (fTrie.current() == USTRINGTRIE_FINAL_VALUE) { + match = fTrie.getValue(); + previ = fIndex; + break; + } else if (fTrie.current() == USTRINGTRIE_INTERMEDIATE_VALUE) { + match = fTrie.getValue(); + previ = fIndex; + continue; + } else { + UPRV_UNREACHABLE; + } + } while (fIndex < fSource.length()); + + if (match < 0) { + // TODO: Make a new status code? + status = U_ILLEGAL_ARGUMENT_ERROR; + } else { + fIndex = previ; + } + return Token(match); + } }; } // namespace @@ -507,62 +527,71 @@ MeasureUnit MeasureUnit::forIdentifier(const char* identifier, UErrorCode& statu return MeasureUnit(); } + CharString builder; + bool afterPer = false; while (parser.hasNext()) { - parser.nextToken(status); + bool sawPlus; + PowerUnit powerUnit = parser.nextUnit(sawPlus, status); if (U_FAILURE(status)) { // Invalid syntax return MeasureUnit(); } + + if (sawPlus) { + builder.append('+', status); + afterPer = false; + } + if (powerUnit.power < 0 && !afterPer) { + if (builder.length() == 0 || sawPlus) { + builder.append("one", status); + } + builder.append("-per-", status); + powerUnit.power *= -1; + } + powerUnit.appendTo(builder, status); } // Success - return MeasureUnit(uprv_strdup(identifier)); + return MeasureUnit(builder.cloneData(status)); } -UMeasureSIPrefix MeasureUnit::getSIPrefix() const { - ErrorCode status; - const char* id = toString(); - return UnitIdentifierParser::from(id, status).nextUnit(status).siPrefix; +UMeasureSIPrefix MeasureUnit::getSIPrefix(UErrorCode& status) const { + const char* id = getIdentifier(); + return UnitIdentifierParser::from(id, status).getOnlyUnit(status).siPrefix; } -MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix) const { - ErrorCode status; - const char* id = toString(); - UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); - PowerUnit powerUnit = parser.nextUnit(status); - if (status.isFailure()) { +MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const { + const char* id = getIdentifier(); + PowerUnit powerUnit = UnitIdentifierParser::from(id, status).getOnlyUnit(status); + if (U_FAILURE(status)) { return *this; } powerUnit.siPrefix = prefix; CharString builder; - powerUnit.toString(builder, status); + powerUnit.appendTo(builder, status); return MeasureUnit(builder.cloneData(status)); } -int8_t MeasureUnit::getPower() const { - ErrorCode status; - const char* id = toString(); - return UnitIdentifierParser::from(id, status).nextUnit(status).power; +int8_t MeasureUnit::getPower(UErrorCode& status) const { + const char* id = getIdentifier(); + return UnitIdentifierParser::from(id, status).getOnlyUnit(status).power; } -MeasureUnit MeasureUnit::withPower(int8_t power) const { - if (power < 0) { - // Don't know how to handle this yet - U_ASSERT(FALSE); - } - - ErrorCode status; - const char* id = toString(); - UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); - PowerUnit powerUnit = parser.nextUnit(status); - if (status.isFailure()) { +MeasureUnit MeasureUnit::withPower(int8_t power, UErrorCode& status) const { + const char* id = getIdentifier(); + PowerUnit powerUnit = UnitIdentifierParser::from(id, status).getOnlyUnit(status); + if (U_FAILURE(status)) { return *this; } - powerUnit.power = power; CharString builder; - powerUnit.toString(builder, status); + if (power < 0) { + builder.append("one-per-", status); + power *= -1; + } + powerUnit.power = power; + powerUnit.appendTo(builder, status); return MeasureUnit(builder.cloneData(status)); } diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index 466460b02a0..8ed893190ab 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -293,7 +293,7 @@ class U_I18N_API MeasureUnit: public UObject { * @return The string form of this unit, owned by this MeasureUnit. * @draft ICU 67 */ - const char* toString() const; + const char* getIdentifier() const; /** * Creates a MeasureUnit which is this MeasureUnit augmented with the specified SI prefix. @@ -301,14 +301,14 @@ class U_I18N_API MeasureUnit: public UObject { * * There is sufficient locale data to format all standard SI prefixes. * - * If the MeasureUnit is composed of multiple simple units, only the first simple unit is - * modified. For example, starting at "meter-kilogram-per-second", if you set the SI prefix - * to "centi", then you get "centimeter-kilogram-per-second". + * This method only works if the MeasureUnit is composed of only one simple unit. An error + * will be set if called on a MeasureUnit containing multiple units. * * @param prefix The SI prefix, from UMeasureSIPrefix. + * @param status ICU error code * @return A new MeasureUnit. */ - MeasureUnit withSIPrefix(UMeasureSIPrefix prefix) const; + MeasureUnit withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const; /** * Gets the current SI prefix of this MeasureUnit. For example, if the unit has the SI prefix @@ -320,20 +320,20 @@ class U_I18N_API MeasureUnit: public UObject { * * @return The SI prefix of the first simple unit, from UMeasureSIPrefix. */ - UMeasureSIPrefix getSIPrefix() const; + UMeasureSIPrefix getSIPrefix(UErrorCode& status) const; /** * Creates a MeasureUnit which is this MeasureUnit augmented with the specified power. For * example, if power is 2, the unit will be squared. * - * If the MeasureUnit is composed of multiple simple units, only the first simple unit is - * modified. For example, starting at "meter-kilogram-per-second", if you set the power to 2, - * then you get "square-meter-kilogram-per-second". + * This method only works if the MeasureUnit is composed of only one simple unit. An error + * will be set if called on a MeasureUnit containing multiple units. * * @param power The power. + * @param status ICU error code * @return A new MeasureUnit. */ - MeasureUnit withPower(int8_t power) const; + MeasureUnit withPower(int8_t power, UErrorCode& status) const; /** * Gets the power of this MeasureUnit. For example, if the unit is square, then 2 is returned. @@ -343,7 +343,7 @@ class U_I18N_API MeasureUnit: public UObject { * * @return The power of the first simple unit. */ - int8_t getPower() const; + int8_t getPower(UErrorCode& status) const; /** * Gets the reciprocal of the unit, with the numerator and denominator flipped. @@ -352,7 +352,7 @@ class U_I18N_API MeasureUnit: public UObject { * * @return The reciprocal of the target unit. */ - MeasureUnit reciprocal() const; + MeasureUnit reciprocal(UErrorCode& status) const; /** * Gets the product of this unit with another unit. This is a way to build units from @@ -365,7 +365,7 @@ class U_I18N_API MeasureUnit: public UObject { * * @return The product of the target unit with the provided unit. */ - MeasureUnit product(const MeasureUnit& other) const; + MeasureUnit product(const MeasureUnit& other, UErrorCode& status) const; /** * Gets the number of constituent simple units. @@ -375,7 +375,7 @@ class U_I18N_API MeasureUnit: public UObject { * * @return The number of constituent units. */ - size_t getSimpleUnitCount() const; + size_t getSimpleUnitCount(UErrorCode& status) const; /** * Gets the constituent unit at the given index. @@ -394,7 +394,7 @@ class U_I18N_API MeasureUnit: public UObject { * @param index Zero-based index. If out of range, the dimensionless unit is returned. * @return The constituent simple unit at the specified position. */ - MeasureUnit simpleUnitAt(size_t index) const; + MeasureUnit simpleUnitAt(size_t index, UErrorCode& status) const; /** * Composes this unit with a super unit. @@ -413,7 +413,7 @@ class U_I18N_API MeasureUnit: public UObject { * @param other The super unit to compose with the target unit. * @return The composition of the given super unit with this unit. */ - MeasureUnit withSuperUnit(const MeasureUnit& other) const; + MeasureUnit withSuperUnit(const MeasureUnit& other, UErrorCode& status) const; /** * Gets the number of super units in the receiver. @@ -422,7 +422,7 @@ class U_I18N_API MeasureUnit: public UObject { * * @return The number of super units. */ - size_t getSuperUnitCount() const; + size_t getSuperUnitCount(UErrorCode& status) const; /** * Gets the super unit at the given index. @@ -440,7 +440,7 @@ class U_I18N_API MeasureUnit: public UObject { * * @return The super unit at the specified position. */ - MeasureUnit superUnitAt(size_t index) const; + MeasureUnit superUnitAt(size_t index, UErrorCode& status) const; /** * getAvailable gets all of the available units. diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 94a499e73a4..27ed41b9590 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -188,6 +188,7 @@ void MeasureFormatTest::runIndexedTest( TESTCASE_AUTO(Test20332_PersonUnits); TESTCASE_AUTO(TestNumericTime); TESTCASE_AUTO(TestNumericTimeSomeSpecialFormats); + TESTCASE_AUTO(TestCompoundUnitOperations); TESTCASE_AUTO_END; } @@ -3224,64 +3225,69 @@ void MeasureFormatTest::TestNumericTimeSomeSpecialFormats() { void MeasureFormatTest::TestCompoundUnitOperations() { IcuTestErrorCode status(*this, "TestCompoundUnitOperations"); + MeasureUnit::forIdentifier("kilometer-per-second-joule", status); + MeasureUnit kilometer = MeasureUnit::getKilometer(); - MeasureUnit meter = kilometer.withSIPrefix(UMEASURE_SI_PREFIX_ONE); - MeasureUnit centimeter1 = kilometer.withSIPrefix(UMEASURE_SI_PREFIX_CENTI); - MeasureUnit centimeter2 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI); + MeasureUnit cubicMeter = MeasureUnit::getCubicMeter(); + MeasureUnit meter = kilometer.withSIPrefix(UMEASURE_SI_PREFIX_ONE, status); + MeasureUnit centimeter1 = kilometer.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); + MeasureUnit centimeter2 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); + MeasureUnit cubicDecimeter = cubicMeter.withSIPrefix(UMEASURE_SI_PREFIX_DECI, status); verifyUnitParts(kilometer, UMEASURE_SI_PREFIX_KILO, 0, "kilometer"); verifyUnitParts(meter, UMEASURE_SI_PREFIX_ONE, 0, "meter"); verifyUnitParts(centimeter1, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter"); verifyUnitParts(centimeter2, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter"); + verifyUnitParts(cubicDecimeter, UMEASURE_SI_PREFIX_DECI, 3, "cubic-decimeter"); assertTrue("centimeter equality", centimeter1 == centimeter2); assertTrue("kilometer inequality", centimeter1 != kilometer); - MeasureUnit squareMeter = meter.withPower(2); - MeasureUnit overCubicCentimeter = centimeter1.withPower(-3); - MeasureUnit quarticKilometer = kilometer.withPower(4); - MeasureUnit overQuarticKilometer1 = kilometer.withPower(-4); + MeasureUnit squareMeter = meter.withPower(2, status); + MeasureUnit overCubicCentimeter = centimeter1.withPower(-3, status); + MeasureUnit quarticKilometer = kilometer.withPower(4, status); + MeasureUnit overQuarticKilometer1 = kilometer.withPower(-4, status); verifyUnitParts(squareMeter, UMEASURE_SI_PREFIX_ONE, 2, "square-meter"); - verifyUnitParts(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, 2, "one-per-cubic-centimeter"); - verifyUnitParts(quarticKilometer, UMEASURE_SI_PREFIX_ONE, 2, "p4-kilometer"); - verifyUnitParts(overQuarticKilometer1, UMEASURE_SI_PREFIX_ONE, 2, "one-per-p4-kilometer"); + verifyUnitParts(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "one-per-cubic-centimeter"); + verifyUnitParts(quarticKilometer, UMEASURE_SI_PREFIX_KILO, 4, "p4-kilometer"); + verifyUnitParts(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); - assertTrue("power inequality", quarticKilometer != overQuarticKilometer1); + // assertTrue("power inequality", quarticKilometer != overQuarticKilometer1); - MeasureUnit overQuarticKilometer2 = overQuarticKilometer1.reciprocal(); - MeasureUnit overQuarticKilometer3 = kilometer.product(kilometer).product(kilometer) - .product(kilometer).reciprocal(); + // MeasureUnit overQuarticKilometer2 = overQuarticKilometer1.reciprocal(); + // MeasureUnit overQuarticKilometer3 = kilometer.product(kilometer).product(kilometer) + // .product(kilometer).reciprocal(); - verifyUnitParts(overQuarticKilometer2, UMEASURE_SI_PREFIX_ONE, 2, "one-per-p4-kilometer"); - verifyUnitParts(overQuarticKilometer3, UMEASURE_SI_PREFIX_ONE, 2, "one-per-p4-kilometer"); + // verifyUnitParts(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, 2, "one-per-p4-kilometer"); + // verifyUnitParts(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, 2, "one-per-p4-kilometer"); - assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); - assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); + // assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); + // assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); - MeasureUnit kiloSquareSecond = MeasureUnit::getSecond() - .withPower(2).withSIPrefix(UMEASURE_SI_PREFIX_KILO); - MeasureUnit meterSecond = meter.product(kiloSquareSecond); - MeasureUnit cubicMeterSecond1 = meter.withPower(3).product(kiloSquareSecond); - MeasureUnit cubicMeterSecond2 = meterSecond.withPower(3); - MeasureUnit centimeterSecond1 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI).product(kiloSquareSecond); - MeasureUnit centimeterSecond2 = meterSecond.withSIPrefix(UMEASURE_SI_PREFIX_CENTI); - MeasureUnit secondCubicMeter = kiloSquareSecond.product(meter.withPower(3)); - MeasureUnit secondCentimeter = kiloSquareSecond.product(meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI)); + // MeasureUnit kiloSquareSecond = MeasureUnit::getSecond() + // .withPower(2).withSIPrefix(UMEASURE_SI_PREFIX_KILO); + // MeasureUnit meterSecond = meter.product(kiloSquareSecond); + // MeasureUnit cubicMeterSecond1 = meter.withPower(3).product(kiloSquareSecond); + // MeasureUnit cubicMeterSecond2 = meterSecond.withPower(3); + // MeasureUnit centimeterSecond1 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI).product(kiloSquareSecond); + // MeasureUnit centimeterSecond2 = meterSecond.withSIPrefix(UMEASURE_SI_PREFIX_CENTI); + // MeasureUnit secondCubicMeter = kiloSquareSecond.product(meter.withPower(3)); + // MeasureUnit secondCentimeter = kiloSquareSecond.product(meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI)); - verifyUnitParts(kiloSquareSecond, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); - verifyUnitParts(meterSecond, UMEASURE_SI_PREFIX_ONE, 0, "meter-square-kilosecond"); - verifyUnitParts(cubicMeterSecond1, UMEASURE_SI_PREFIX_ONE, 2, "cubic-meter-square-kilosecond"); - verifyUnitParts(cubicMeterSecond2, UMEASURE_SI_PREFIX_ONE, 2, "cubic-meter-square-kilosecond"); - verifyUnitParts(centimeterSecond1, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter-square-kilosecond"); - verifyUnitParts(centimeterSecond2, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter-square-kilosecond"); - verifyUnitParts(secondCubicMeter, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond-cubic-meter"); - verifyUnitParts(secondCentimeter, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond-centimeter"); + // verifyUnitParts(kiloSquareSecond, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); + // verifyUnitParts(meterSecond, UMEASURE_SI_PREFIX_ONE, 0, "meter-square-kilosecond"); + // verifyUnitParts(cubicMeterSecond1, UMEASURE_SI_PREFIX_ONE, 2, "cubic-meter-square-kilosecond"); + // verifyUnitParts(cubicMeterSecond2, UMEASURE_SI_PREFIX_ONE, 2, "cubic-meter-square-kilosecond"); + // verifyUnitParts(centimeterSecond1, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter-square-kilosecond"); + // verifyUnitParts(centimeterSecond2, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter-square-kilosecond"); + // verifyUnitParts(secondCubicMeter, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond-cubic-meter"); + // verifyUnitParts(secondCentimeter, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond-centimeter"); - assertTrue("multipart power equality", cubicMeterSecond1 == cubicMeterSecond2); - assertTrue("multipart SI prefix equality", centimeterSecond1 == centimeterSecond2); - assertTrue("order matters inequality", cubicMeterSecond1 != secondCubicMeter); - assertTrue("additional simple units inequality", secondCubicMeter != secondCentimeter); + // assertTrue("multipart power equality", cubicMeterSecond1 == cubicMeterSecond2); + // assertTrue("multipart SI prefix equality", centimeterSecond1 == centimeterSecond2); + // assertTrue("order matters inequality", cubicMeterSecond1 != secondCubicMeter); + // assertTrue("additional simple units inequality", secondCubicMeter != secondCentimeter); } @@ -3361,10 +3367,17 @@ void MeasureFormatTest::verifyUnitParts( int8_t power, const char* identifier) { IcuTestErrorCode status(*this, "verifyUnitParts"); - assertEquals(UnicodeString(identifier) + ": SI prefix", siPrefix, unit.getSIPrefix()); - assertEquals(UnicodeString(identifier) + ": Power", power, unit.getPower()); - assertEquals(UnicodeString(identifier) + ": Identifier", identifier, unit.toString()); - assertTrue(UnicodeString(identifier) + ": Constructor", unit == MeasureUnit::forIdentifier(identifier, status)); + assertEquals(UnicodeString(identifier) + ": SI prefix", + siPrefix, + unit.getSIPrefix(status)); + assertEquals(UnicodeString(identifier) + ": Power", + static_cast(power), + static_cast(unit.getPower(status))); + assertEquals(UnicodeString(identifier) + ": Identifier", + identifier, + unit.getIdentifier()); + assertTrue(UnicodeString(identifier) + ": Constructor", + unit == MeasureUnit::forIdentifier(identifier, status)); } extern IntlTest *createMeasureFormatTest() { From eb5b4dff8386b93f542eb2bcfa1badd04e6613da Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Wed, 15 Jan 2020 18:15:40 +0100 Subject: [PATCH 07/13] Checkpoint --- icu4c/source/common/cmemory.h | 17 + icu4c/source/i18n/measunit.cpp | 4 + icu4c/source/i18n/measunit_extra.cpp | 378 ++++++++++++++------- icu4c/source/test/intltest/measfmttest.cpp | 26 +- 4 files changed, 287 insertions(+), 138 deletions(-) diff --git a/icu4c/source/common/cmemory.h b/icu4c/source/common/cmemory.h index 7f7fd8d0864..8f9be605396 100644 --- a/icu4c/source/common/cmemory.h +++ b/icu4c/source/common/cmemory.h @@ -725,6 +725,23 @@ public: return pool[count++] = new T(std::forward(args)...); } + /** + * @return Number of elements that have been allocated. + */ + int32_t size() const { + return count; + } + + /** + * Array item access (writable). + * No index bounds check. + * @param i array index + * @return reference to the array item + */ + T *operator[](ptrdiff_t i) const { + return pool[i]; + } + private: int32_t count; MaybeStackArray pool; diff --git a/icu4c/source/i18n/measunit.cpp b/icu4c/source/i18n/measunit.cpp index c575c53745b..307156961b0 100644 --- a/icu4c/source/i18n/measunit.cpp +++ b/icu4c/source/i18n/measunit.cpp @@ -2027,6 +2027,10 @@ MeasureUnit::MeasureUnit(MeasureUnit &&other) noexcept MeasureUnit::MeasureUnit(char* idToAdopt) : fId(idToAdopt), fSubTypeId(-1), fTypeId(-1) { + if (fId == nullptr) { + // Invalid; reset to the base dimensionless unit + setTo(kBaseTypeIdx, kBaseSubTypeIdx); + } } MeasureUnit &MeasureUnit::operator=(const MeasureUnit &other) { diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index 83257fbcf56..eec204ed0e7 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -298,7 +298,7 @@ public: return static_cast(fMatch - kPowerPartOffset); } - int32_t getSimpleUnitOffset() const { + int32_t getSimpleUnitIndex() const { U_ASSERT(getType() == TYPE_SIMPLE_UNIT); return fMatch - kSimpleUnitOffset; } @@ -308,28 +308,31 @@ private: }; struct PowerUnit { - int8_t power = 0; + int8_t power = 1; UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; + int32_t simpleUnitIndex = 0; StringPiece id; - void appendTo(CharString& builder, UErrorCode& status) { - if (power == 0) { + void appendTo(CharString& builder, UErrorCode& status) const { + int8_t posPower = power < 0 ? -power : power; + if (posPower == 0) { + status = U_ILLEGAL_ARGUMENT_ERROR; + } else if (posPower == 1) { // no-op - } else if (power == 1) { - UPRV_UNREACHABLE; - } else if (power == 2) { + } else if (posPower == 2) { builder.append("square-", status); - } else if (power == 3) { + } else if (posPower == 3) { builder.append("cubic-", status); - } else if (power < 10) { + } else if (posPower < 10) { builder.append('p', status); - builder.append(power + '0', status); + builder.append(posPower + '0', status); + builder.append('-', status); + } else if (posPower <= 15) { + builder.append("p1", status); + builder.append('0' + (posPower % 10), status); builder.append('-', status); } else { - U_ASSERT(power <= 15); - builder.append("p1", status); - builder.append('0' + (power % 10), status); - builder.append('-', status); + status = U_ILLEGAL_ARGUMENT_ERROR; } if (U_FAILURE(status)) { return; @@ -351,6 +354,71 @@ struct PowerUnit { } }; +class CompoundUnit { +public: + void append(PowerUnit&& powerUnit, UErrorCode& status) { + if (powerUnit.power >= 0) { + appendImpl(numerator, std::move(powerUnit), status); + } else { + appendImpl(denominator, std::move(powerUnit), status); + } + } + + void reciprocal() { + auto temp = std::move(numerator); + numerator = std::move(denominator); + denominator = std::move(temp); + } + + void appendTo(CharString& builder, UErrorCode& status) { + if (numerator.size() == 0) { + builder.append("one", status); + } else { + appendToImpl(numerator, numerator.size(), builder, status); + } + if (denominator.size() > 0) { + builder.append("-per-", status); + appendToImpl(denominator, denominator.size(), builder, status); + } + } + +private: + typedef MemoryPool PowerUnitList; + PowerUnitList numerator; + PowerUnitList denominator; + + void appendToImpl(const PowerUnitList& unitList, int32_t len, CharString& builder, UErrorCode& status) { + bool first = true; + for (int32_t i = 0; i < len; i++) { + if (first) { + first = false; + } else { + builder.append('-', status); + } + unitList[i]->appendTo(builder, status); + } + } + + void appendImpl(PowerUnitList& unitList, PowerUnit&& powerUnit, UErrorCode& status) { + // Check that the same simple unit doesn't already exist + for (int32_t i = 0; i < unitList.size(); i++) { + PowerUnit* candidate = unitList[i]; + if (candidate->simpleUnitIndex == powerUnit.simpleUnitIndex + && candidate->siPrefix == powerUnit.siPrefix) { + candidate->power += powerUnit.power; + return; + } + } + // Add a new unit + PowerUnit* destination = unitList.create(); + if (!destination) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + *destination = std::move(powerUnit); + } +}; + class UnitIdentifierParser { public: static UnitIdentifierParser from(StringPiece source, UErrorCode& status) { @@ -368,99 +436,10 @@ public: return fIndex < fSource.length(); } - PowerUnit nextUnit(bool& sawPlus, UErrorCode& status) { - sawPlus = false; - PowerUnit retval; - if (U_FAILURE(status)) { - return retval; - } - - // state: - // 0 = no tokens seen yet (will accept power, SI prefix, or simple unit) - // 1 = power token seen (will not accept another power token) - // 2 = SI prefix token seen (will not accept a power or SI prefix token) - int32_t state = 0; - int32_t previ = fIndex; - - // Maybe read a compound part - if (fIndex != 0) { - Token token = nextToken(status); - if (U_FAILURE(status)) { - return retval; - } - if (token.getType() != Token::TYPE_COMPOUND_PART) { - goto fail; - } - switch (token.getMatch()) { - case COMPOUND_PART_PER: - if (fAfterPer) { - goto fail; - } - fAfterPer = true; - break; - - case COMPOUND_PART_TIMES: - break; - - case COMPOUND_PART_PLUS: - sawPlus = true; - fAfterPer = false; - break; - } - previ = fIndex; - } - - // Read a unit - while (hasNext()) { - Token token = nextToken(status); - if (U_FAILURE(status)) { - return retval; - } - - switch (token.getType()) { - case Token::TYPE_POWER_PART: - if (state > 0) { - goto fail; - } - retval.power = token.getPower(); - if (fAfterPer) { - retval.power *= -1; - } - previ = fIndex; - state = 1; - break; - - case Token::TYPE_SI_PREFIX: - if (state > 1) { - goto fail; - } - retval.siPrefix = token.getSIPrefix(); - previ = fIndex; - state = 2; - break; - - case Token::TYPE_ONE: - // Skip "one" and go to the next unit - return nextUnit(sawPlus, status); - - case Token::TYPE_SIMPLE_UNIT: - retval.id = fSource.substr(previ, fIndex - previ); - return retval; - - default: - goto fail; - } - } - - fail: - // TODO: Make a new status code? - status = U_ILLEGAL_ARGUMENT_ERROR; - return retval; - } - - PowerUnit getOnlyUnit(UErrorCode& status) { + PowerUnit getOnlyPowerUnit(UErrorCode& status) { bool sawPlus; - PowerUnit retval = nextUnit(sawPlus, status); + PowerUnit retval; + nextPowerUnit(retval, sawPlus, status); if (U_FAILURE(status)) { return retval; } @@ -472,6 +451,38 @@ public: return retval; } + void nextCompoundUnit(CompoundUnit& result, UErrorCode& status) { + bool sawPlus; + if (U_FAILURE(status)) { + return; + } + while (hasNext()) { + int32_t previ = fIndex; + PowerUnit powerUnit; + nextPowerUnit(powerUnit, sawPlus, status); + if (sawPlus) { + fIndex = previ; + break; + } + result.append(std::move(powerUnit), status); + } + return; + } + + CompoundUnit getOnlyCompoundUnit(UErrorCode& status) { + CompoundUnit retval; + nextCompoundUnit(retval, status); + if (U_FAILURE(status)) { + return retval; + } + if (hasNext()) { + // Expected to find only one unit in the string + status = U_ILLEGAL_ARGUMENT_ERROR; + return retval; + } + return retval; + } + private: int32_t fIndex = 0; StringPiece fSource; @@ -515,6 +526,96 @@ private: } return Token(match); } + + void nextPowerUnit(PowerUnit& result, bool& sawPlus, UErrorCode& status) { + sawPlus = false; + if (U_FAILURE(status)) { + return; + } + + // state: + // 0 = no tokens seen yet (will accept power, SI prefix, or simple unit) + // 1 = power token seen (will not accept another power token) + // 2 = SI prefix token seen (will not accept a power or SI prefix token) + int32_t state = 0; + int32_t previ = fIndex; + + // Maybe read a compound part + if (fIndex != 0) { + Token token = nextToken(status); + if (U_FAILURE(status)) { + return; + } + if (token.getType() != Token::TYPE_COMPOUND_PART) { + goto fail; + } + switch (token.getMatch()) { + case COMPOUND_PART_PER: + if (fAfterPer) { + goto fail; + } + fAfterPer = true; + break; + + case COMPOUND_PART_TIMES: + break; + + case COMPOUND_PART_PLUS: + sawPlus = true; + fAfterPer = false; + break; + } + previ = fIndex; + } + + // Read a unit + while (hasNext()) { + Token token = nextToken(status); + if (U_FAILURE(status)) { + return; + } + + switch (token.getType()) { + case Token::TYPE_POWER_PART: + if (state > 0) { + goto fail; + } + result.power = token.getPower(); + if (fAfterPer) { + result.power *= -1; + } + previ = fIndex; + state = 1; + break; + + case Token::TYPE_SI_PREFIX: + if (state > 1) { + goto fail; + } + result.siPrefix = token.getSIPrefix(); + previ = fIndex; + state = 2; + break; + + case Token::TYPE_ONE: + // Skip "one" and go to the next unit + return nextPowerUnit(result, sawPlus, status); + + case Token::TYPE_SIMPLE_UNIT: + result.simpleUnitIndex = token.getSimpleUnitIndex(); + result.id = fSource.substr(previ, fIndex - previ); + return; + + default: + goto fail; + } + } + + fail: + // TODO: Make a new status code? + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } }; } // namespace @@ -528,27 +629,17 @@ MeasureUnit MeasureUnit::forIdentifier(const char* identifier, UErrorCode& statu } CharString builder; - bool afterPer = false; while (parser.hasNext()) { - bool sawPlus; - PowerUnit powerUnit = parser.nextUnit(sawPlus, status); + CompoundUnit compoundUnit; + parser.nextCompoundUnit(compoundUnit, status); if (U_FAILURE(status)) { // Invalid syntax return MeasureUnit(); } - - if (sawPlus) { + if (builder.length() > 0) { builder.append('+', status); - afterPer = false; } - if (powerUnit.power < 0 && !afterPer) { - if (builder.length() == 0 || sawPlus) { - builder.append("one", status); - } - builder.append("-per-", status); - powerUnit.power *= -1; - } - powerUnit.appendTo(builder, status); + compoundUnit.appendTo(builder, status); } // Success @@ -557,12 +648,12 @@ MeasureUnit MeasureUnit::forIdentifier(const char* identifier, UErrorCode& statu UMeasureSIPrefix MeasureUnit::getSIPrefix(UErrorCode& status) const { const char* id = getIdentifier(); - return UnitIdentifierParser::from(id, status).getOnlyUnit(status).siPrefix; + return UnitIdentifierParser::from(id, status).getOnlyPowerUnit(status).siPrefix; } MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const { const char* id = getIdentifier(); - PowerUnit powerUnit = UnitIdentifierParser::from(id, status).getOnlyUnit(status); + PowerUnit powerUnit = UnitIdentifierParser::from(id, status).getOnlyPowerUnit(status); if (U_FAILURE(status)) { return *this; } @@ -575,26 +666,61 @@ MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& statu int8_t MeasureUnit::getPower(UErrorCode& status) const { const char* id = getIdentifier(); - return UnitIdentifierParser::from(id, status).getOnlyUnit(status).power; + return UnitIdentifierParser::from(id, status).getOnlyPowerUnit(status).power; } MeasureUnit MeasureUnit::withPower(int8_t power, UErrorCode& status) const { const char* id = getIdentifier(); - PowerUnit powerUnit = UnitIdentifierParser::from(id, status).getOnlyUnit(status); + PowerUnit powerUnit = UnitIdentifierParser::from(id, status).getOnlyPowerUnit(status); if (U_FAILURE(status)) { return *this; } CharString builder; + powerUnit.power = power; if (power < 0) { builder.append("one-per-", status); - power *= -1; } - powerUnit.power = power; powerUnit.appendTo(builder, status); return MeasureUnit(builder.cloneData(status)); } +MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const { + const char* id = getIdentifier(); + CompoundUnit compoundUnit = UnitIdentifierParser::from(id, status).getOnlyCompoundUnit(status); + if (U_FAILURE(status)) { + return *this; + } + + compoundUnit.reciprocal(); + CharString builder; + compoundUnit.appendTo(builder, status); + return MeasureUnit(builder.cloneData(status)); +} + +MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const { + const char* id = getIdentifier(); + CompoundUnit compoundUnit = UnitIdentifierParser::from(id, status).getOnlyCompoundUnit(status); + if (U_FAILURE(status)) { + return *this; + } + + // Append other's first CompoundUnit to compoundUnit, then assert other has only one + UnitIdentifierParser otherParser = UnitIdentifierParser::from(other.getIdentifier(), status); + otherParser.nextCompoundUnit(compoundUnit, status); + if (U_FAILURE(status)) { + return *this; + } + if (otherParser.hasNext()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return *this; + } + + CharString builder; + compoundUnit.appendTo(builder, status); + return MeasureUnit(builder.cloneData(status)); +} + U_NAMESPACE_END diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 27ed41b9590..86814a7a8df 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -3234,10 +3234,10 @@ void MeasureFormatTest::TestCompoundUnitOperations() { MeasureUnit centimeter2 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); MeasureUnit cubicDecimeter = cubicMeter.withSIPrefix(UMEASURE_SI_PREFIX_DECI, status); - verifyUnitParts(kilometer, UMEASURE_SI_PREFIX_KILO, 0, "kilometer"); - verifyUnitParts(meter, UMEASURE_SI_PREFIX_ONE, 0, "meter"); - verifyUnitParts(centimeter1, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter"); - verifyUnitParts(centimeter2, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter"); + verifyUnitParts(kilometer, UMEASURE_SI_PREFIX_KILO, 1, "kilometer"); + verifyUnitParts(meter, UMEASURE_SI_PREFIX_ONE, 1, "meter"); + verifyUnitParts(centimeter1, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); + verifyUnitParts(centimeter2, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); verifyUnitParts(cubicDecimeter, UMEASURE_SI_PREFIX_DECI, 3, "cubic-decimeter"); assertTrue("centimeter equality", centimeter1 == centimeter2); @@ -3253,17 +3253,19 @@ void MeasureFormatTest::TestCompoundUnitOperations() { verifyUnitParts(quarticKilometer, UMEASURE_SI_PREFIX_KILO, 4, "p4-kilometer"); verifyUnitParts(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); - // assertTrue("power inequality", quarticKilometer != overQuarticKilometer1); + assertTrue("power inequality", quarticKilometer != overQuarticKilometer1); - // MeasureUnit overQuarticKilometer2 = overQuarticKilometer1.reciprocal(); - // MeasureUnit overQuarticKilometer3 = kilometer.product(kilometer).product(kilometer) - // .product(kilometer).reciprocal(); + MeasureUnit overQuarticKilometer2 = quarticKilometer.reciprocal(status); + MeasureUnit overQuarticKilometer3 = kilometer.product(kilometer, status) + .product(kilometer, status) + .product(kilometer, status) + .reciprocal(status); - // verifyUnitParts(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, 2, "one-per-p4-kilometer"); - // verifyUnitParts(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, 2, "one-per-p4-kilometer"); + verifyUnitParts(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifyUnitParts(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); - // assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); - // assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); + assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); + assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); // MeasureUnit kiloSquareSecond = MeasureUnit::getSecond() // .withPower(2).withSIPrefix(UMEASURE_SI_PREFIX_KILO); From f70ac326ff291133ed878e48100375f0afe2cf7c Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Wed, 15 Jan 2020 18:31:09 +0100 Subject: [PATCH 08/13] Checkpoint --- icu4c/source/test/intltest/measfmttest.cpp | 77 +++++++++++++++------- 1 file changed, 52 insertions(+), 25 deletions(-) diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 86814a7a8df..01f118f4d36 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -144,6 +144,9 @@ private: UMeasureSIPrefix siPrefix, int8_t power, const char* identifier); + void verifyUnitIdentifierOnly( + const MeasureUnit& unit, + const char* identifier); }; void MeasureFormatTest::runIndexedTest( @@ -3267,29 +3270,34 @@ void MeasureFormatTest::TestCompoundUnitOperations() { assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); - // MeasureUnit kiloSquareSecond = MeasureUnit::getSecond() - // .withPower(2).withSIPrefix(UMEASURE_SI_PREFIX_KILO); - // MeasureUnit meterSecond = meter.product(kiloSquareSecond); - // MeasureUnit cubicMeterSecond1 = meter.withPower(3).product(kiloSquareSecond); - // MeasureUnit cubicMeterSecond2 = meterSecond.withPower(3); - // MeasureUnit centimeterSecond1 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI).product(kiloSquareSecond); - // MeasureUnit centimeterSecond2 = meterSecond.withSIPrefix(UMEASURE_SI_PREFIX_CENTI); - // MeasureUnit secondCubicMeter = kiloSquareSecond.product(meter.withPower(3)); - // MeasureUnit secondCentimeter = kiloSquareSecond.product(meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI)); + MeasureUnit kiloSquareSecond = MeasureUnit::getSecond() + .withPower(2, status).withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); + MeasureUnit meterSecond = meter.product(kiloSquareSecond, status); + MeasureUnit cubicMeterSecond1 = meter.withPower(3, status).product(kiloSquareSecond, status); + MeasureUnit centimeterSecond1 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status).product(kiloSquareSecond, status); + MeasureUnit secondCubicMeter = kiloSquareSecond.product(meter.withPower(3, status), status); + MeasureUnit secondCentimeter = kiloSquareSecond.product(meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status), status); - // verifyUnitParts(kiloSquareSecond, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); - // verifyUnitParts(meterSecond, UMEASURE_SI_PREFIX_ONE, 0, "meter-square-kilosecond"); - // verifyUnitParts(cubicMeterSecond1, UMEASURE_SI_PREFIX_ONE, 2, "cubic-meter-square-kilosecond"); - // verifyUnitParts(cubicMeterSecond2, UMEASURE_SI_PREFIX_ONE, 2, "cubic-meter-square-kilosecond"); - // verifyUnitParts(centimeterSecond1, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter-square-kilosecond"); - // verifyUnitParts(centimeterSecond2, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter-square-kilosecond"); - // verifyUnitParts(secondCubicMeter, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond-cubic-meter"); - // verifyUnitParts(secondCentimeter, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond-centimeter"); + verifyUnitParts(kiloSquareSecond, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); + verifyUnitIdentifierOnly(meterSecond, "meter-square-kilosecond"); + verifyUnitIdentifierOnly(cubicMeterSecond1, "cubic-meter-square-kilosecond"); + verifyUnitIdentifierOnly(centimeterSecond1, "centimeter-square-kilosecond"); + verifyUnitIdentifierOnly(secondCubicMeter, "square-kilosecond-cubic-meter"); + verifyUnitIdentifierOnly(secondCentimeter, "square-kilosecond-centimeter"); - // assertTrue("multipart power equality", cubicMeterSecond1 == cubicMeterSecond2); - // assertTrue("multipart SI prefix equality", centimeterSecond1 == centimeterSecond2); - // assertTrue("order matters inequality", cubicMeterSecond1 != secondCubicMeter); - // assertTrue("additional simple units inequality", secondCubicMeter != secondCentimeter); + // Don't allow get/set power or SI prefix on compound units + status.errIfFailureAndReset(); + meterSecond.getPower(status); + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + meterSecond.withPower(3, status); + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + meterSecond.getSIPrefix(status); + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + meterSecond.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + + assertTrue("order matters inequality", cubicMeterSecond1 != secondCubicMeter); + assertTrue("additional simple units inequality", secondCubicMeter != secondCentimeter); } @@ -3369,17 +3377,36 @@ void MeasureFormatTest::verifyUnitParts( int8_t power, const char* identifier) { IcuTestErrorCode status(*this, "verifyUnitParts"); - assertEquals(UnicodeString(identifier) + ": SI prefix", + UnicodeString uid(identifier, -1, US_INV); + assertEquals(uid + ": SI prefix", siPrefix, unit.getSIPrefix(status)); - assertEquals(UnicodeString(identifier) + ": Power", + status.errIfFailureAndReset("%s: SI prefix", identifier); + assertEquals(uid + ": Power", static_cast(power), static_cast(unit.getPower(status))); - assertEquals(UnicodeString(identifier) + ": Identifier", + status.errIfFailureAndReset("%s: Power", identifier); + assertEquals(uid + ": Identifier", identifier, unit.getIdentifier()); - assertTrue(UnicodeString(identifier) + ": Constructor", + status.errIfFailureAndReset("%s: Identifier", identifier); + assertTrue(uid + ": Constructor", unit == MeasureUnit::forIdentifier(identifier, status)); + status.errIfFailureAndReset("%s: Constructor", identifier); +} + +void MeasureFormatTest::verifyUnitIdentifierOnly( + const MeasureUnit& unit, + const char* identifier) { + IcuTestErrorCode status(*this, "verifyUnitIdentifierOnly"); + UnicodeString uid(identifier, -1, US_INV); + assertEquals(uid + ": Identifier", + identifier, + unit.getIdentifier()); + status.errIfFailureAndReset("%s: Identifier", identifier); + assertTrue(uid + ": Constructor", + unit == MeasureUnit::forIdentifier(identifier, status)); + status.errIfFailureAndReset("%s: Constructor", identifier); } extern IntlTest *createMeasureFormatTest() { From 8fa27836959bbdaa06a5cf69a2153aa81fbcaede Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Wed, 15 Jan 2020 21:05:27 +0100 Subject: [PATCH 09/13] Checkpoint --- icu4c/source/common/cmemory.h | 77 ++++++++++++++------ icu4c/source/common/unicode/localpointer.h | 13 ++++ icu4c/source/i18n/measunit_extra.cpp | 57 ++++++++++++--- icu4c/source/i18n/unicode/measunit.h | 5 ++ icu4c/source/test/intltest/measfmttest.cpp | 83 ++++++++++++++-------- 5 files changed, 175 insertions(+), 60 deletions(-) diff --git a/icu4c/source/common/cmemory.h b/icu4c/source/common/cmemory.h index 8f9be605396..da663918cfd 100644 --- a/icu4c/source/common/cmemory.h +++ b/icu4c/source/common/cmemory.h @@ -684,26 +684,26 @@ inline H *MaybeStackHeaderAndArray::orphanOrClone(int32_t l template class MemoryPool : public UMemory { public: - MemoryPool() : count(0), pool() {} + MemoryPool() : fCount(0), fPool() {} ~MemoryPool() { - for (int32_t i = 0; i < count; ++i) { - delete pool[i]; + for (int32_t i = 0; i < fCount; ++i) { + delete fPool[i]; } } MemoryPool(const MemoryPool&) = delete; MemoryPool& operator=(const MemoryPool&) = delete; - MemoryPool(MemoryPool&& other) U_NOEXCEPT : count(other.count), - pool(std::move(other.pool)) { - other.count = 0; + MemoryPool(MemoryPool&& other) U_NOEXCEPT : fCount(other.fCount), + fPool(std::move(other.fPool)) { + other.fCount = 0; } MemoryPool& operator=(MemoryPool&& other) U_NOEXCEPT { - count = other.count; - pool = std::move(other.pool); - other.count = 0; + fCount = other.fCount; + fPool = std::move(other.fPool); + other.fCount = 0; return *this; } @@ -716,20 +716,58 @@ public: */ template T* create(Args&&... args) { - int32_t capacity = pool.getCapacity(); - if (count == capacity && - pool.resize(capacity == stackCapacity ? 4 * capacity : 2 * capacity, - capacity) == nullptr) { + int32_t capacity = fPool.getCapacity(); + if (fCount == capacity && + fPool.resize(capacity == stackCapacity ? 4 * capacity : 2 * capacity, + capacity) == nullptr) { return nullptr; } - return pool[count++] = new T(std::forward(args)...); + return fPool[fCount++] = new T(std::forward(args)...); } /** * @return Number of elements that have been allocated. */ - int32_t size() const { - return count; + int32_t count() const { + return fCount; + } + +protected: + int32_t fCount; + MaybeStackArray fPool; +}; + +/** + * An internal Vector-like implementation based on MemoryPool. + * + * To append an item to the vector, use emplaceBack. + * + * MaybeStackVector vector; + * MyType* element = vector.emplaceBack(); + * if (!element) { + * status = U_MEMORY_ALLOCATION_ERROR; + * } + * // do stuff with element + * + * To loop over the vector, use a for loop with indices: + * + * for (int32_t i = 0; i < vector.length(); i++) { + * MyType* element = vector[i]; + * } + */ +template +class MaybeStackVector : protected MemoryPool { +public: + using MemoryPool::MemoryPool; + using MemoryPool::operator=; + + template + T* emplaceBack(Args&&... args) { + return this->create(args...); + } + + int32_t length() const { + return this->count(); } /** @@ -739,14 +777,11 @@ public: * @return reference to the array item */ T *operator[](ptrdiff_t i) const { - return pool[i]; + return this->fPool[i]; } - -private: - int32_t count; - MaybeStackArray pool; }; + U_NAMESPACE_END #endif /* __cplusplus */ diff --git a/icu4c/source/common/unicode/localpointer.h b/icu4c/source/common/unicode/localpointer.h index e011688b1a5..01f4205f99b 100644 --- a/icu4c/source/common/unicode/localpointer.h +++ b/icu4c/source/common/unicode/localpointer.h @@ -406,6 +406,10 @@ public: src.ptr=NULL; } + static LocalArray withLength(T *p, int32_t length) { + return LocalArray(p, length); + } + #ifndef U_HIDE_DRAFT_API /** * Constructs a LocalArray from a C++11 std::unique_ptr of an array type. @@ -537,6 +541,15 @@ public: return std::unique_ptr(LocalPointerBase::orphan()); } #endif /* U_HIDE_DRAFT_API */ + + int32_t length() const { return fLength; } + +private: + int32_t fLength = -1; + + LocalArray(T *p, int32_t length) : LocalArray(p) { + fLength = length; + } }; /** diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index eec204ed0e7..090cb803ac0 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -17,6 +17,7 @@ #include "ucln_in.h" #include "umutex.h" #include "unicode/errorcode.h" +#include "unicode/localpointer.h" #include "unicode/measunit.h" #include "unicode/ucharstrie.h" #include "unicode/ucharstriebuilder.h" @@ -356,6 +357,8 @@ struct PowerUnit { class CompoundUnit { public: + typedef MaybeStackVector PowerUnitList; + void append(PowerUnit&& powerUnit, UErrorCode& status) { if (powerUnit.power >= 0) { appendImpl(numerator, std::move(powerUnit), status); @@ -371,19 +374,26 @@ public: } void appendTo(CharString& builder, UErrorCode& status) { - if (numerator.size() == 0) { + if (numerator.length() == 0) { builder.append("one", status); } else { - appendToImpl(numerator, numerator.size(), builder, status); + appendToImpl(numerator, numerator.length(), builder, status); } - if (denominator.size() > 0) { + if (denominator.length() > 0) { builder.append("-per-", status); - appendToImpl(denominator, denominator.size(), builder, status); + appendToImpl(denominator, denominator.length(), builder, status); } } + const PowerUnitList& getNumeratorUnits() { + return numerator; + } + + const PowerUnitList& getDenominatorUnits() { + return denominator; + } + private: - typedef MemoryPool PowerUnitList; PowerUnitList numerator; PowerUnitList denominator; @@ -401,7 +411,7 @@ private: void appendImpl(PowerUnitList& unitList, PowerUnit&& powerUnit, UErrorCode& status) { // Check that the same simple unit doesn't already exist - for (int32_t i = 0; i < unitList.size(); i++) { + for (int32_t i = 0; i < unitList.length(); i++) { PowerUnit* candidate = unitList[i]; if (candidate->simpleUnitIndex == powerUnit.simpleUnitIndex && candidate->siPrefix == powerUnit.siPrefix) { @@ -410,7 +420,7 @@ private: } } // Add a new unit - PowerUnit* destination = unitList.create(); + PowerUnit* destination = unitList.emplaceBack(); if (!destination) { status = U_MEMORY_ALLOCATION_ERROR; return; @@ -555,6 +565,7 @@ private: goto fail; } fAfterPer = true; + result.power = -1; break; case COMPOUND_PART_TIMES: @@ -580,10 +591,7 @@ private: if (state > 0) { goto fail; } - result.power = token.getPower(); - if (fAfterPer) { - result.power *= -1; - } + result.power *= token.getPower(); previ = fIndex; state = 1; break; @@ -721,6 +729,33 @@ MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) c return MeasureUnit(builder.cloneData(status)); } +LocalArray MeasureUnit::getSimpleUnits(UErrorCode& status) const { + const char* id = getIdentifier(); + CompoundUnit compoundUnit = UnitIdentifierParser::from(id, status).getOnlyCompoundUnit(status); + if (U_FAILURE(status)) { + return LocalArray::withLength(nullptr, 0); + } + + const CompoundUnit::PowerUnitList& numerator = compoundUnit.getNumeratorUnits(); + const CompoundUnit::PowerUnitList& denominator = compoundUnit.getDenominatorUnits(); + int32_t count = numerator.length() + denominator.length(); + MeasureUnit* arr = new MeasureUnit[count]; + + CharString builder; + int32_t i = 0; + for (int32_t j = 0; j < numerator.length(); j++) { + numerator[j]->appendTo(builder.clear(), status); + arr[i++] = MeasureUnit(builder.cloneData(status)); + } + for (int32_t j = 0; j < denominator.length(); j++) { + builder.clear().append("one-per-", status); + denominator[j]->appendTo(builder, status); + arr[i++] = MeasureUnit(builder.cloneData(status)); + } + + return LocalArray::withLength(arr, count); +} + U_NAMESPACE_END diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index 8ed893190ab..c9947d3edff 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -20,6 +20,7 @@ #if !UCONFIG_NO_FORMATTING #include "unicode/unistr.h" +#include "unicode/localpointer.h" /** * \file @@ -377,6 +378,10 @@ class U_I18N_API MeasureUnit: public UObject { */ size_t getSimpleUnitCount(UErrorCode& status) const; + LocalArray getSimpleUnits(UErrorCode& status) const; + + LocalArray getSequenceUnits(UErrorCode& status) const; + /** * Gets the constituent unit at the given index. * diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 01f118f4d36..5af03b77e64 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -139,14 +139,16 @@ private: NumberFormat::EAlignmentFields field, int32_t start, int32_t end); - void verifyUnitParts( + void verifyPowerUnit( const MeasureUnit& unit, UMeasureSIPrefix siPrefix, int8_t power, const char* identifier); - void verifyUnitIdentifierOnly( + void verifyCompoundUnit( const MeasureUnit& unit, - const char* identifier); + const char* identifier, + const char** subIdentifiers, + int32_t subIdentifierCount); }; void MeasureFormatTest::runIndexedTest( @@ -3237,11 +3239,11 @@ void MeasureFormatTest::TestCompoundUnitOperations() { MeasureUnit centimeter2 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); MeasureUnit cubicDecimeter = cubicMeter.withSIPrefix(UMEASURE_SI_PREFIX_DECI, status); - verifyUnitParts(kilometer, UMEASURE_SI_PREFIX_KILO, 1, "kilometer"); - verifyUnitParts(meter, UMEASURE_SI_PREFIX_ONE, 1, "meter"); - verifyUnitParts(centimeter1, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); - verifyUnitParts(centimeter2, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); - verifyUnitParts(cubicDecimeter, UMEASURE_SI_PREFIX_DECI, 3, "cubic-decimeter"); + verifyPowerUnit(kilometer, UMEASURE_SI_PREFIX_KILO, 1, "kilometer"); + verifyPowerUnit(meter, UMEASURE_SI_PREFIX_ONE, 1, "meter"); + verifyPowerUnit(centimeter1, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); + verifyPowerUnit(centimeter2, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); + verifyPowerUnit(cubicDecimeter, UMEASURE_SI_PREFIX_DECI, 3, "cubic-decimeter"); assertTrue("centimeter equality", centimeter1 == centimeter2); assertTrue("kilometer inequality", centimeter1 != kilometer); @@ -3251,10 +3253,10 @@ void MeasureFormatTest::TestCompoundUnitOperations() { MeasureUnit quarticKilometer = kilometer.withPower(4, status); MeasureUnit overQuarticKilometer1 = kilometer.withPower(-4, status); - verifyUnitParts(squareMeter, UMEASURE_SI_PREFIX_ONE, 2, "square-meter"); - verifyUnitParts(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "one-per-cubic-centimeter"); - verifyUnitParts(quarticKilometer, UMEASURE_SI_PREFIX_KILO, 4, "p4-kilometer"); - verifyUnitParts(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifyPowerUnit(squareMeter, UMEASURE_SI_PREFIX_ONE, 2, "square-meter"); + verifyPowerUnit(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "one-per-cubic-centimeter"); + verifyPowerUnit(quarticKilometer, UMEASURE_SI_PREFIX_KILO, 4, "p4-kilometer"); + verifyPowerUnit(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); assertTrue("power inequality", quarticKilometer != overQuarticKilometer1); @@ -3264,8 +3266,8 @@ void MeasureFormatTest::TestCompoundUnitOperations() { .product(kilometer, status) .reciprocal(status); - verifyUnitParts(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); - verifyUnitParts(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifyPowerUnit(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifyPowerUnit(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); @@ -3277,13 +3279,30 @@ void MeasureFormatTest::TestCompoundUnitOperations() { MeasureUnit centimeterSecond1 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status).product(kiloSquareSecond, status); MeasureUnit secondCubicMeter = kiloSquareSecond.product(meter.withPower(3, status), status); MeasureUnit secondCentimeter = kiloSquareSecond.product(meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status), status); + MeasureUnit secondCentimeterPerKilometer = secondCentimeter.product(kilometer.reciprocal(status), status); - verifyUnitParts(kiloSquareSecond, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); - verifyUnitIdentifierOnly(meterSecond, "meter-square-kilosecond"); - verifyUnitIdentifierOnly(cubicMeterSecond1, "cubic-meter-square-kilosecond"); - verifyUnitIdentifierOnly(centimeterSecond1, "centimeter-square-kilosecond"); - verifyUnitIdentifierOnly(secondCubicMeter, "square-kilosecond-cubic-meter"); - verifyUnitIdentifierOnly(secondCentimeter, "square-kilosecond-centimeter"); + verifyPowerUnit(kiloSquareSecond, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); + const char* meterSecondSub[] = {"meter", "square-kilosecond"}; + verifyCompoundUnit(meterSecond, "meter-square-kilosecond", + meterSecondSub, UPRV_LENGTHOF(meterSecondSub)); + const char* cubicMeterSecond1Sub[] = {"cubic-meter", "square-kilosecond"}; + verifyCompoundUnit(cubicMeterSecond1, "cubic-meter-square-kilosecond", + cubicMeterSecond1Sub, UPRV_LENGTHOF(cubicMeterSecond1Sub)); + const char* centimeterSecond1Sub[] = {"centimeter", "square-kilosecond"}; + verifyCompoundUnit(centimeterSecond1, "centimeter-square-kilosecond", + centimeterSecond1Sub, UPRV_LENGTHOF(centimeterSecond1Sub)); + const char* secondCubicMeterSub[] = {"square-kilosecond", "cubic-meter"}; + verifyCompoundUnit(secondCubicMeter, "square-kilosecond-cubic-meter", + secondCubicMeterSub, UPRV_LENGTHOF(secondCubicMeterSub)); + const char* secondCentimeterSub[] = {"square-kilosecond", "centimeter"}; + verifyCompoundUnit(secondCentimeter, "square-kilosecond-centimeter", + secondCentimeterSub, UPRV_LENGTHOF(secondCentimeterSub)); + const char* secondCentimeterPerKilometerSub[] = {"square-kilosecond", "centimeter", "one-per-kilometer"}; + verifyCompoundUnit(secondCentimeterPerKilometer, "square-kilosecond-centimeter-per-kilometer", + secondCentimeterPerKilometerSub, UPRV_LENGTHOF(secondCentimeterPerKilometerSub)); + + assertTrue("order matters inequality", cubicMeterSecond1 != secondCubicMeter); + assertTrue("additional simple units inequality", secondCubicMeter != secondCentimeter); // Don't allow get/set power or SI prefix on compound units status.errIfFailureAndReset(); @@ -3295,9 +3314,6 @@ void MeasureFormatTest::TestCompoundUnitOperations() { status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); meterSecond.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); - - assertTrue("order matters inequality", cubicMeterSecond1 != secondCubicMeter); - assertTrue("additional simple units inequality", secondCubicMeter != secondCentimeter); } @@ -3371,12 +3387,12 @@ void MeasureFormatTest::verifyFormat( } } -void MeasureFormatTest::verifyUnitParts( +void MeasureFormatTest::verifyPowerUnit( const MeasureUnit& unit, UMeasureSIPrefix siPrefix, int8_t power, const char* identifier) { - IcuTestErrorCode status(*this, "verifyUnitParts"); + IcuTestErrorCode status(*this, "verifyPowerUnit"); UnicodeString uid(identifier, -1, US_INV); assertEquals(uid + ": SI prefix", siPrefix, @@ -3395,10 +3411,12 @@ void MeasureFormatTest::verifyUnitParts( status.errIfFailureAndReset("%s: Constructor", identifier); } -void MeasureFormatTest::verifyUnitIdentifierOnly( +void MeasureFormatTest::verifyCompoundUnit( const MeasureUnit& unit, - const char* identifier) { - IcuTestErrorCode status(*this, "verifyUnitIdentifierOnly"); + const char* identifier, + const char** subIdentifiers, + int32_t subIdentifierCount) { + IcuTestErrorCode status(*this, "verifyCompoundUnit"); UnicodeString uid(identifier, -1, US_INV); assertEquals(uid + ": Identifier", identifier, @@ -3407,6 +3425,15 @@ void MeasureFormatTest::verifyUnitIdentifierOnly( assertTrue(uid + ": Constructor", unit == MeasureUnit::forIdentifier(identifier, status)); status.errIfFailureAndReset("%s: Constructor", identifier); + + LocalArray subUnits = unit.getSimpleUnits(status); + assertEquals(uid + ": Length", subIdentifierCount, subUnits.length()); + for (int32_t i = 0;; i++) { + if (i >= subIdentifierCount || i >= subUnits.length()) break; + assertEquals(uid + ": Sub-unit #" + Int64ToUnicodeString(i), + subIdentifiers[i], + subUnits[i].getIdentifier()); + } } extern IntlTest *createMeasureFormatTest() { From 05ec2aea59a9b6a86425b2e4379b26485f8d495a Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Thu, 16 Jan 2020 22:17:46 +0100 Subject: [PATCH 10/13] Renaming and refactoring --- icu4c/source/i18n/measunit_extra.cpp | 103 +++++++----- icu4c/source/i18n/unicode/measunit.h | 186 +++++++++++---------- icu4c/source/test/intltest/measfmttest.cpp | 43 +++-- 3 files changed, 186 insertions(+), 146 deletions(-) diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index 090cb803ac0..e39d6dab697 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -87,7 +87,7 @@ const struct SIPrefixStrings { { "yocto", UMEASURE_SI_PREFIX_YOCTO }, }; -// FIXME: Get this list from data +// TODO(ICU-20920): Get this list from data const char16_t* const gSimpleUnits[] = { u"one", // note: expected to be index 0 u"100kilometer", @@ -308,7 +308,7 @@ private: int32_t fMatch; }; -struct PowerUnit { +struct SingleUnit { int8_t power = 1; UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; int32_t simpleUnitIndex = 0; @@ -357,13 +357,13 @@ struct PowerUnit { class CompoundUnit { public: - typedef MaybeStackVector PowerUnitList; + typedef MaybeStackVector SingleUnitList; - void append(PowerUnit&& powerUnit, UErrorCode& status) { - if (powerUnit.power >= 0) { - appendImpl(numerator, std::move(powerUnit), status); + void append(SingleUnit&& singleUnit, UErrorCode& status) { + if (singleUnit.power >= 0) { + appendImpl(numerator, std::move(singleUnit), status); } else { - appendImpl(denominator, std::move(powerUnit), status); + appendImpl(denominator, std::move(singleUnit), status); } } @@ -373,7 +373,7 @@ public: denominator = std::move(temp); } - void appendTo(CharString& builder, UErrorCode& status) { + void appendTo(CharString& builder, UErrorCode& status) const { if (numerator.length() == 0) { builder.append("one", status); } else { @@ -385,19 +385,23 @@ public: } } - const PowerUnitList& getNumeratorUnits() { + const SingleUnitList& getNumeratorUnits() const { return numerator; } - const PowerUnitList& getDenominatorUnits() { + const SingleUnitList& getDenominatorUnits() const { return denominator; } -private: - PowerUnitList numerator; - PowerUnitList denominator; + bool isSingle() const { + return numerator.length() + denominator.length() == 1; + } - void appendToImpl(const PowerUnitList& unitList, int32_t len, CharString& builder, UErrorCode& status) { +private: + SingleUnitList numerator; + SingleUnitList denominator; + + void appendToImpl(const SingleUnitList& unitList, int32_t len, CharString& builder, UErrorCode& status) const { bool first = true; for (int32_t i = 0; i < len; i++) { if (first) { @@ -409,23 +413,23 @@ private: } } - void appendImpl(PowerUnitList& unitList, PowerUnit&& powerUnit, UErrorCode& status) { + void appendImpl(SingleUnitList& unitList, SingleUnit&& singleUnit, UErrorCode& status) { // Check that the same simple unit doesn't already exist for (int32_t i = 0; i < unitList.length(); i++) { - PowerUnit* candidate = unitList[i]; - if (candidate->simpleUnitIndex == powerUnit.simpleUnitIndex - && candidate->siPrefix == powerUnit.siPrefix) { - candidate->power += powerUnit.power; + SingleUnit* candidate = unitList[i]; + if (candidate->simpleUnitIndex == singleUnit.simpleUnitIndex + && candidate->siPrefix == singleUnit.siPrefix) { + candidate->power += singleUnit.power; return; } } // Add a new unit - PowerUnit* destination = unitList.emplaceBack(); + SingleUnit* destination = unitList.emplaceBack(); if (!destination) { status = U_MEMORY_ALLOCATION_ERROR; return; } - *destination = std::move(powerUnit); + *destination = std::move(singleUnit); } }; @@ -446,10 +450,10 @@ public: return fIndex < fSource.length(); } - PowerUnit getOnlyPowerUnit(UErrorCode& status) { + SingleUnit getOnlySingleUnit(UErrorCode& status) { bool sawPlus; - PowerUnit retval; - nextPowerUnit(retval, sawPlus, status); + SingleUnit retval; + nextSingleUnit(retval, sawPlus, status); if (U_FAILURE(status)) { return retval; } @@ -468,13 +472,13 @@ public: } while (hasNext()) { int32_t previ = fIndex; - PowerUnit powerUnit; - nextPowerUnit(powerUnit, sawPlus, status); + SingleUnit singleUnit; + nextSingleUnit(singleUnit, sawPlus, status); if (sawPlus) { fIndex = previ; break; } - result.append(std::move(powerUnit), status); + result.append(std::move(singleUnit), status); } return; } @@ -537,7 +541,7 @@ private: return Token(match); } - void nextPowerUnit(PowerUnit& result, bool& sawPlus, UErrorCode& status) { + void nextSingleUnit(SingleUnit& result, bool& sawPlus, UErrorCode& status) { sawPlus = false; if (U_FAILURE(status)) { return; @@ -607,7 +611,7 @@ private: case Token::TYPE_ONE: // Skip "one" and go to the next unit - return nextPowerUnit(result, sawPlus, status); + return nextSingleUnit(result, sawPlus, status); case Token::TYPE_SIMPLE_UNIT: result.simpleUnitIndex = token.getSimpleUnitIndex(); @@ -654,42 +658,61 @@ MeasureUnit MeasureUnit::forIdentifier(const char* identifier, UErrorCode& statu return MeasureUnit(builder.cloneData(status)); } +UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const { + const char* id = getIdentifier(); + UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); + if (U_FAILURE(status)) { + // Unrecoverable error + return UMEASURE_UNIT_SINGLE; + } + + CompoundUnit compoundUnit; + parser.nextCompoundUnit(compoundUnit, status); + if (compoundUnit.isSingle()) { + return UMEASURE_UNIT_SINGLE; + } else if (parser.hasNext()) { + return UMEASURE_UNIT_SEQUENCE; + } else { + return UMEASURE_UNIT_COMPOUND; + } +} + UMeasureSIPrefix MeasureUnit::getSIPrefix(UErrorCode& status) const { const char* id = getIdentifier(); - return UnitIdentifierParser::from(id, status).getOnlyPowerUnit(status).siPrefix; + return UnitIdentifierParser::from(id, status).getOnlySingleUnit(status).siPrefix; } MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const { const char* id = getIdentifier(); - PowerUnit powerUnit = UnitIdentifierParser::from(id, status).getOnlyPowerUnit(status); + SingleUnit singleUnit = UnitIdentifierParser::from(id, status).getOnlySingleUnit(status); if (U_FAILURE(status)) { return *this; } - powerUnit.siPrefix = prefix; + singleUnit.siPrefix = prefix; CharString builder; - powerUnit.appendTo(builder, status); + singleUnit.appendTo(builder, status); return MeasureUnit(builder.cloneData(status)); } int8_t MeasureUnit::getPower(UErrorCode& status) const { const char* id = getIdentifier(); - return UnitIdentifierParser::from(id, status).getOnlyPowerUnit(status).power; + return UnitIdentifierParser::from(id, status).getOnlySingleUnit(status).power; } MeasureUnit MeasureUnit::withPower(int8_t power, UErrorCode& status) const { const char* id = getIdentifier(); - PowerUnit powerUnit = UnitIdentifierParser::from(id, status).getOnlyPowerUnit(status); + SingleUnit singleUnit = UnitIdentifierParser::from(id, status).getOnlySingleUnit(status); if (U_FAILURE(status)) { return *this; } CharString builder; - powerUnit.power = power; + singleUnit.power = power; if (power < 0) { builder.append("one-per-", status); } - powerUnit.appendTo(builder, status); + singleUnit.appendTo(builder, status); return MeasureUnit(builder.cloneData(status)); } @@ -729,15 +752,15 @@ MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) c return MeasureUnit(builder.cloneData(status)); } -LocalArray MeasureUnit::getSimpleUnits(UErrorCode& status) const { +LocalArray MeasureUnit::getSingleUnits(UErrorCode& status) const { const char* id = getIdentifier(); CompoundUnit compoundUnit = UnitIdentifierParser::from(id, status).getOnlyCompoundUnit(status); if (U_FAILURE(status)) { return LocalArray::withLength(nullptr, 0); } - const CompoundUnit::PowerUnitList& numerator = compoundUnit.getNumeratorUnits(); - const CompoundUnit::PowerUnitList& denominator = compoundUnit.getDenominatorUnits(); + const CompoundUnit::SingleUnitList& numerator = compoundUnit.getNumeratorUnits(); + const CompoundUnit::SingleUnitList& denominator = compoundUnit.getDenominatorUnits(); int32_t count = numerator.length() + denominator.length(); MeasureUnit* arr = new MeasureUnit[count]; diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index c9947d3edff..cdf760fdbdb 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -31,6 +31,44 @@ U_NAMESPACE_BEGIN class StringEnumeration; +/** + * Enumeration for unit complexity. There are three levels: + * + * - SINGLE: A single unit, optionally with a power and/or SI prefix. Examples: hectare, + * square-kilometer, kilojoule, one-per-second. + * - COMPOUND: A unit composed of the product of multiple single units. Examples: + * meter-per-second, kilowatt-hour, kilogram-meter-per-square-second. + * - SEQUENCE: A unit composed of the sum of multiple compound units. Examples: foot+inch, + * hour+minute+second, hectare+square-meter. + * + * The complexity determines which operations are available. For example, you cannot set the power + * or SI prefix of a compound unit. + * + * @draft ICU 67 + */ +enum UMeasureUnitComplexity { + /** + * A single unit, like kilojoule. + * + * @draft ICU 67 + */ + UMEASURE_UNIT_SINGLE, + + /** + * A compound unit, like meter-per-second. + * + * @draft ICU 67 + */ + UMEASURE_UNIT_COMPOUND, + + /** + * A sequence unit, like hour+minute. + * + * @draft ICU 67 + */ + UMEASURE_UNIT_SEQUENCE +}; + /** * Enumeration for SI prefixes, such as "kilo". * @@ -297,60 +335,74 @@ class U_I18N_API MeasureUnit: public UObject { const char* getIdentifier() const; /** - * Creates a MeasureUnit which is this MeasureUnit augmented with the specified SI prefix. + * Compute the complexity of the unit. See UMeasureUnitComplexity for more information. + * + * @param status Set if an error occurs. + * @return The unit complexity. + * @draft ICU 67 + */ + UMeasureUnitComplexity getComplexity(UErrorCode& status) const; + + /** + * Creates a MeasureUnit which is this SINGLE unit augmented with the specified SI prefix. * For example, UMEASURE_SI_PREFIX_KILO for "kilo". * * There is sufficient locale data to format all standard SI prefixes. * - * This method only works if the MeasureUnit is composed of only one simple unit. An error - * will be set if called on a MeasureUnit containing multiple units. + * NOTE: Only works on SINGLE units. If this is a COMPOUND or SEQUENCE unit, an error will + * occur. For more information, see UMeasureUnitComplexity. * * @param prefix The SI prefix, from UMeasureSIPrefix. - * @param status ICU error code - * @return A new MeasureUnit. + * @param status Set if this is not a SINGLE unit or if another error occurs. + * @return A new SINGLE unit. */ MeasureUnit withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const; /** - * Gets the current SI prefix of this MeasureUnit. For example, if the unit has the SI prefix + * Gets the current SI prefix of this SINGLE unit. For example, if the unit has the SI prefix * "kilo", then UMEASURE_SI_PREFIX_KILO is returned. * - * If the MeasureUnit is composed of multiple simple units, the SI prefix of the first simple - * unit is returned. For example, from "centimeter-kilogram-per-second", the SI prefix - * UMEASURE_SI_PREFIX_CENTI will be returned. + * NOTE: Only works on SINGLE units. If this is a COMPOUND or SEQUENCE unit, an error will + * occur. For more information, see UMeasureUnitComplexity. * - * @return The SI prefix of the first simple unit, from UMeasureSIPrefix. + * @param status Set if this is not a SINGLE unit or if another error occurs. + * @return The SI prefix of this SINGLE unit, from UMeasureSIPrefix. */ UMeasureSIPrefix getSIPrefix(UErrorCode& status) const; /** - * Creates a MeasureUnit which is this MeasureUnit augmented with the specified power. For + * Creates a MeasureUnit which is this SINGLE unit augmented with the specified power. For * example, if power is 2, the unit will be squared. * - * This method only works if the MeasureUnit is composed of only one simple unit. An error - * will be set if called on a MeasureUnit containing multiple units. + * NOTE: Only works on SINGLE units. If this is a COMPOUND or SEQUENCE unit, an error will + * occur. For more information, see UMeasureUnitComplexity. * * @param power The power. - * @param status ICU error code - * @return A new MeasureUnit. + * @param status Set if this is not a SINGLE unit or if another error occurs. + * @return A new SINGLE unit. */ MeasureUnit withPower(int8_t power, UErrorCode& status) const; /** * Gets the power of this MeasureUnit. For example, if the unit is square, then 2 is returned. * - * If the MeasureUnit is composed of multiple simple units, the power of the first simple unit - * is returned. For example, from "cubic-meter-per-square-second", 3 is returned. + * NOTE: Only works on SINGLE units. If this is a COMPOUND or SEQUENCE unit, an error will + * occur. For more information, see UMeasureUnitComplexity. * - * @return The power of the first simple unit. + * @param status Set if this is not a SINGLE unit or if another error occurs. + * @return The power of this simple unit. */ int8_t getPower(UErrorCode& status) const; /** - * Gets the reciprocal of the unit, with the numerator and denominator flipped. + * Gets the reciprocal of this MeasureUnit, with the numerator and denominator flipped. * * For example, if the receiver is "meter-per-second", the unit "second-per-meter" is returned. * + * NOTE: Only works on SINGLE and COMPOUND units. If this is a SEQUENCE unit, an error will + * occur. For more information, see UMeasureUnitComplexity. + * + * @param status Set if this is a SEQUENCE unit or if another error occurs. * @return The reciprocal of the target unit. */ MeasureUnit reciprocal(UErrorCode& status) const; @@ -364,88 +416,42 @@ class U_I18N_API MeasureUnit: public UObject { * For example, if the receiver is "kilowatt" and the argument is "hour-per-day", then the * unit "kilowatt-hour-per-day" is returned. * + * NOTE: Only works on SINGLE and COMPOUND units. If either unit (receivee and argument) is a + * SEQUENCE unit, an error will occur. For more information, see UMeasureUnitComplexity. + * + * @param status Set if this or other is a SEQUENCE unit or if another error occurs. * @return The product of the target unit with the provided unit. */ MeasureUnit product(const MeasureUnit& other, UErrorCode& status) const; /** - * Gets the number of constituent simple units. + * Gets the list of single units contained within a compound unit. * - * For example, if the receiver is "meter-per-square-second", then 2 is returned, since there - * are two simple units: "meter" and "second". + * For example, given "meter-kilogram-per-second", three units will be returned: "meter", + * "kilogram", and "one-per-second". * - * @return The number of constituent units. + * If this is a SINGLE unit, an array of length 1 will be returned. + * + * NOTE: Only works on SINGLE and COMPOUND units. If this is a SEQUENCE unit, an error will + * occur. For more information, see UMeasureUnitComplexity. + * + * @param status Set if this is a SEQUENCE unit or if another error occurs. + * @return An array of single units, owned by the caller. */ - size_t getSimpleUnitCount(UErrorCode& status) const; - - LocalArray getSimpleUnits(UErrorCode& status) const; - - LocalArray getSequenceUnits(UErrorCode& status) const; + LocalArray getSingleUnits(UErrorCode& status) const; /** - * Gets the constituent unit at the given index. - * - * For example, to loop over all simple units: - * - *
-     * MeasureUnit unit(u"meter-per-square-second");
-     * for (size_t i = 0; i < unit.getSimpleUnitCount(); i++) {
-     *     std::cout << unit.simpleUnitAt(i).toString() << std::endl;
-     * }
-     * 
- * - * Expected output: meter, one-per-square-second - * - * @param index Zero-based index. If out of range, the dimensionless unit is returned. - * @return The constituent simple unit at the specified position. + * Gets the list of compound units contained within a sequence unit. + * + * For example, given "hour+minute+second", three units will be returned: "hour", "minute", + * and "second". + * + * If this is a SINGLE or COMPOUND unit, an array of length 1 will be returned. + * + * @param status Set of an error occurs. + * @return An array of compound units, owned by the caller. */ - MeasureUnit simpleUnitAt(size_t index, UErrorCode& status) const; - - /** - * Composes this unit with a super unit. - * - * A super unit, used for formatting only, should be a larger unit sharing the same dimension. - * For example, if the current unit is "inch", a super unit could be "foot", in order to - * render 71 inches as "5 feet, 11 inches". If the super unit is invalid, an error will occur - * during formatting. - * - * A unit can have multiple super units; for example, "second" could have both "minute" and - * "hour" as super units. - * - * Super units are ignored and left untouched in most other methods, such as withSIPrefix, - * withPower, and reciprocal. - * - * @param other The super unit to compose with the target unit. - * @return The composition of the given super unit with this unit. - */ - MeasureUnit withSuperUnit(const MeasureUnit& other, UErrorCode& status) const; - - /** - * Gets the number of super units in the receiver. - * - * For example, "foot+inch" has one super unit. - * - * @return The number of super units. - */ - size_t getSuperUnitCount(UErrorCode& status) const; - - /** - * Gets the super unit at the given index. - * - * For example, to loop over all super units: - * - *
-     * MeasureUnit unit(u"hour+minute+second");
-     * for (size_t i = 0; i < unit.getSuperUnitCount(); i++) {
-     *     std::cout << unit.superUnitAt(i).toCoreUnitIdentifier() << std::endl;
-     * }
-     * 
- * - * Expected output: hour, minute - * - * @return The super unit at the specified position. - */ - MeasureUnit superUnitAt(size_t index, UErrorCode& status) const; + LocalArray getCompoundUnits(UErrorCode& status) const; /** * getAvailable gets all of the available units. diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 5af03b77e64..b15ca5b0128 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -139,7 +139,7 @@ private: NumberFormat::EAlignmentFields field, int32_t start, int32_t end); - void verifyPowerUnit( + void verifySingleUnit( const MeasureUnit& unit, UMeasureSIPrefix siPrefix, int8_t power, @@ -3239,11 +3239,11 @@ void MeasureFormatTest::TestCompoundUnitOperations() { MeasureUnit centimeter2 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); MeasureUnit cubicDecimeter = cubicMeter.withSIPrefix(UMEASURE_SI_PREFIX_DECI, status); - verifyPowerUnit(kilometer, UMEASURE_SI_PREFIX_KILO, 1, "kilometer"); - verifyPowerUnit(meter, UMEASURE_SI_PREFIX_ONE, 1, "meter"); - verifyPowerUnit(centimeter1, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); - verifyPowerUnit(centimeter2, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); - verifyPowerUnit(cubicDecimeter, UMEASURE_SI_PREFIX_DECI, 3, "cubic-decimeter"); + verifySingleUnit(kilometer, UMEASURE_SI_PREFIX_KILO, 1, "kilometer"); + verifySingleUnit(meter, UMEASURE_SI_PREFIX_ONE, 1, "meter"); + verifySingleUnit(centimeter1, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); + verifySingleUnit(centimeter2, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); + verifySingleUnit(cubicDecimeter, UMEASURE_SI_PREFIX_DECI, 3, "cubic-decimeter"); assertTrue("centimeter equality", centimeter1 == centimeter2); assertTrue("kilometer inequality", centimeter1 != kilometer); @@ -3253,10 +3253,10 @@ void MeasureFormatTest::TestCompoundUnitOperations() { MeasureUnit quarticKilometer = kilometer.withPower(4, status); MeasureUnit overQuarticKilometer1 = kilometer.withPower(-4, status); - verifyPowerUnit(squareMeter, UMEASURE_SI_PREFIX_ONE, 2, "square-meter"); - verifyPowerUnit(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "one-per-cubic-centimeter"); - verifyPowerUnit(quarticKilometer, UMEASURE_SI_PREFIX_KILO, 4, "p4-kilometer"); - verifyPowerUnit(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifySingleUnit(squareMeter, UMEASURE_SI_PREFIX_ONE, 2, "square-meter"); + verifySingleUnit(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "one-per-cubic-centimeter"); + verifySingleUnit(quarticKilometer, UMEASURE_SI_PREFIX_KILO, 4, "p4-kilometer"); + verifySingleUnit(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); assertTrue("power inequality", quarticKilometer != overQuarticKilometer1); @@ -3266,8 +3266,8 @@ void MeasureFormatTest::TestCompoundUnitOperations() { .product(kilometer, status) .reciprocal(status); - verifyPowerUnit(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); - verifyPowerUnit(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifySingleUnit(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifySingleUnit(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); @@ -3281,7 +3281,7 @@ void MeasureFormatTest::TestCompoundUnitOperations() { MeasureUnit secondCentimeter = kiloSquareSecond.product(meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status), status); MeasureUnit secondCentimeterPerKilometer = secondCentimeter.product(kilometer.reciprocal(status), status); - verifyPowerUnit(kiloSquareSecond, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); + verifySingleUnit(kiloSquareSecond, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); const char* meterSecondSub[] = {"meter", "square-kilosecond"}; verifyCompoundUnit(meterSecond, "meter-square-kilosecond", meterSecondSub, UPRV_LENGTHOF(meterSecondSub)); @@ -3387,12 +3387,12 @@ void MeasureFormatTest::verifyFormat( } } -void MeasureFormatTest::verifyPowerUnit( +void MeasureFormatTest::verifySingleUnit( const MeasureUnit& unit, UMeasureSIPrefix siPrefix, int8_t power, const char* identifier) { - IcuTestErrorCode status(*this, "verifyPowerUnit"); + IcuTestErrorCode status(*this, "verifySingleUnit"); UnicodeString uid(identifier, -1, US_INV); assertEquals(uid + ": SI prefix", siPrefix, @@ -3409,6 +3409,10 @@ void MeasureFormatTest::verifyPowerUnit( assertTrue(uid + ": Constructor", unit == MeasureUnit::forIdentifier(identifier, status)); status.errIfFailureAndReset("%s: Constructor", identifier); + assertEquals(uid + ": Complexity", + UMEASURE_UNIT_SINGLE, + unit.getComplexity(status)); + status.errIfFailureAndReset("%s: Complexity", identifier); } void MeasureFormatTest::verifyCompoundUnit( @@ -3425,14 +3429,21 @@ void MeasureFormatTest::verifyCompoundUnit( assertTrue(uid + ": Constructor", unit == MeasureUnit::forIdentifier(identifier, status)); status.errIfFailureAndReset("%s: Constructor", identifier); + assertEquals(uid + ": Complexity", + UMEASURE_UNIT_COMPOUND, + unit.getComplexity(status)); + status.errIfFailureAndReset("%s: Complexity", identifier); - LocalArray subUnits = unit.getSimpleUnits(status); + LocalArray subUnits = unit.getSingleUnits(status); assertEquals(uid + ": Length", subIdentifierCount, subUnits.length()); for (int32_t i = 0;; i++) { if (i >= subIdentifierCount || i >= subUnits.length()) break; assertEquals(uid + ": Sub-unit #" + Int64ToUnicodeString(i), subIdentifiers[i], subUnits[i].getIdentifier()); + assertEquals(uid + ": Sub-unit Complexity", + UMEASURE_UNIT_SINGLE, + subUnits[i].getComplexity(status)); } } From 6369aba35a853390e6434111d81016cd8b2ed5cd Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 17 Jan 2020 00:24:56 +0100 Subject: [PATCH 11/13] Implementing sequence units and fixing bugs --- icu4c/source/common/unicode/localpointer.h | 25 ++- icu4c/source/i18n/measunit_extra.cpp | 233 +++++++++++++-------- icu4c/source/test/intltest/measfmttest.cpp | 106 ++++++++++ 3 files changed, 272 insertions(+), 92 deletions(-) diff --git a/icu4c/source/common/unicode/localpointer.h b/icu4c/source/common/unicode/localpointer.h index 01f4205f99b..b6f8b15c7fb 100644 --- a/icu4c/source/common/unicode/localpointer.h +++ b/icu4c/source/common/unicode/localpointer.h @@ -378,9 +378,9 @@ public: * @param p simple pointer to an array of T objects that is adopted * @stable ICU 4.4 */ - explicit LocalArray(T *p=NULL) : LocalPointerBase(p) {} + explicit LocalArray(T *p=nullptr) : LocalPointerBase(p), fLength(p==nullptr?0:-1) {} /** - * Constructor takes ownership and reports an error if NULL. + * Constructor takes ownership and reports an error if nullptr. * * This constructor is intended to be used with other-class constructors * that may report a failure UErrorCode, @@ -389,11 +389,11 @@ public: * * @param p simple pointer to an array of T objects that is adopted * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR - * if p==NULL and no other failure code had been set + * if p==nullptr and no other failure code had been set * @stable ICU 56 */ - LocalArray(T *p, UErrorCode &errorCode) : LocalPointerBase(p) { - if(p==NULL && U_SUCCESS(errorCode)) { + LocalArray(T *p, UErrorCode &errorCode) : LocalArray(p) { + if(p==nullptr && U_SUCCESS(errorCode)) { errorCode=U_MEMORY_ALLOCATION_ERROR; } } @@ -402,8 +402,8 @@ public: * @param src source smart pointer * @stable ICU 56 */ - LocalArray(LocalArray &&src) U_NOEXCEPT : LocalPointerBase(src.ptr) { - src.ptr=NULL; + LocalArray(LocalArray &&src) U_NOEXCEPT : LocalArray(src.ptr) { + src.ptr=nullptr; } static LocalArray withLength(T *p, int32_t length) { @@ -422,7 +422,7 @@ public: * @draft ICU 64 */ explicit LocalArray(std::unique_ptr &&p) - : LocalPointerBase(p.release()) {} + : LocalArray(p.release()) {} #endif /* U_HIDE_DRAFT_API */ /** @@ -442,7 +442,8 @@ public: LocalArray &operator=(LocalArray &&src) U_NOEXCEPT { delete[] LocalPointerBase::ptr; LocalPointerBase::ptr=src.ptr; - src.ptr=NULL; + src.ptr=nullptr; + fLength=src.fLength; return *this; } @@ -457,6 +458,7 @@ public: */ LocalArray &operator=(std::unique_ptr &&p) U_NOEXCEPT { adoptInstead(p.release()); + fLength=-1; return *this; } #endif /* U_HIDE_DRAFT_API */ @@ -470,6 +472,9 @@ public: T *temp=LocalPointerBase::ptr; LocalPointerBase::ptr=other.ptr; other.ptr=temp; + int32_t tempLength=fLength; + fLength=other.fLength; + other.fLength=tempLength; } /** * Non-member LocalArray swap function. @@ -489,6 +494,7 @@ public: void adoptInstead(T *p) { delete[] LocalPointerBase::ptr; LocalPointerBase::ptr=p; + fLength=-1; } /** * Deletes the array it owns, @@ -515,6 +521,7 @@ public: } else { delete[] p; } + fLength=-1; } /** * Array item access (writable). diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index e39d6dab697..0329c68f13c 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -312,9 +312,14 @@ struct SingleUnit { int8_t power = 1; UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; int32_t simpleUnitIndex = 0; - StringPiece id; + StringPiece id = "one"; void appendTo(CharString& builder, UErrorCode& status) const { + if (simpleUnitIndex == 0) { + // Don't propagate SI prefixes and powers on one + builder.append("one", status); + return; + } int8_t posPower = power < 0 ? -power : power; if (posPower == 0) { status = U_ILLEGAL_ARGUMENT_ERROR; @@ -353,6 +358,18 @@ struct SingleUnit { builder.append(id, status); } + + char* build(UErrorCode& status) { + if (U_FAILURE(status)) { + return nullptr; + } + CharString builder; + if (power < 0) { + builder.append("one-per-", status); + } + appendTo(builder, status); + return builder.cloneData(status); + } }; class CompoundUnit { @@ -367,7 +384,7 @@ public: } } - void reciprocal() { + void takeReciprocal() { auto temp = std::move(numerator); numerator = std::move(denominator); denominator = std::move(temp); @@ -377,14 +394,23 @@ public: if (numerator.length() == 0) { builder.append("one", status); } else { - appendToImpl(numerator, numerator.length(), builder, status); + appendToImpl(numerator, builder, status); } if (denominator.length() > 0) { builder.append("-per-", status); - appendToImpl(denominator, denominator.length(), builder, status); + appendToImpl(denominator, builder, status); } } + char* build(UErrorCode& status) { + if (U_FAILURE(status)) { + return nullptr; + } + CharString builder; + appendTo(builder, status); + return builder.cloneData(status); + } + const SingleUnitList& getNumeratorUnits() const { return numerator; } @@ -397,12 +423,17 @@ public: return numerator.length() + denominator.length() == 1; } + bool isEmpty() const { + return numerator.length() + denominator.length() == 0; + } + private: SingleUnitList numerator; SingleUnitList denominator; - void appendToImpl(const SingleUnitList& unitList, int32_t len, CharString& builder, UErrorCode& status) const { + void appendToImpl(const SingleUnitList& unitList, CharString& builder, UErrorCode& status) const { bool first = true; + int32_t len = unitList.length(); for (int32_t i = 0; i < len; i++) { if (first) { first = false; @@ -433,17 +464,63 @@ private: } }; -class UnitIdentifierParser { +class SequenceUnit { public: - static UnitIdentifierParser from(StringPiece source, UErrorCode& status) { + typedef MaybeStackVector CompoundUnitList; + + void append(CompoundUnit&& compoundUnit, UErrorCode& status) { + CompoundUnit* destination = units.emplaceBack(); + if (!destination) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + *destination = std::move(compoundUnit); + } + + void appendTo(CharString& builder, UErrorCode& status) const { + if (units.length() == 0) { + builder.append("one", status); + return; + } + bool isFirst = true; + for (int32_t i = 0; i < units.length(); i++) { + if (isFirst) { + isFirst = false; + } else { + builder.append('+', status); + } + units[i]->appendTo(builder, status); + } + } + + char* build(UErrorCode& status) { if (U_FAILURE(status)) { - return UnitIdentifierParser(); + return nullptr; + } + CharString builder; + appendTo(builder, status); + return builder.cloneData(status); + } + + const CompoundUnitList& getUnits() const { + return units; + } + +private: + CompoundUnitList units; +}; + +class Parser { +public: + static Parser from(StringPiece source, UErrorCode& status) { + if (U_FAILURE(status)) { + return Parser(); } umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status); if (U_FAILURE(status)) { - return UnitIdentifierParser(); + return Parser(); } - return UnitIdentifierParser(source); + return Parser(source); } bool hasNext() const { @@ -474,7 +551,10 @@ public: int32_t previ = fIndex; SingleUnit singleUnit; nextSingleUnit(singleUnit, sawPlus, status); - if (sawPlus) { + if (U_FAILURE(status)) { + return; + } + if (sawPlus && !result.isEmpty()) { fIndex = previ; break; } @@ -497,6 +577,19 @@ public: return retval; } + SequenceUnit getOnlySequenceUnit(UErrorCode& status) { + SequenceUnit retval; + while (hasNext()) { + CompoundUnit compoundUnit; + nextCompoundUnit(compoundUnit, status); + if (U_FAILURE(status)) { + return retval; + } + retval.append(std::move(compoundUnit), status); + } + return retval; + } + private: int32_t fIndex = 0; StringPiece fSource; @@ -504,9 +597,9 @@ private: bool fAfterPer = false; - UnitIdentifierParser() : fSource(""), fTrie(u"") {} + Parser() : fSource(""), fTrie(u"") {} - UnitIdentifierParser(StringPiece source) + Parser(StringPiece source) : fSource(source), fTrie(kSerializedUnitExtrasStemTrie) {} Token nextToken(UErrorCode& status) { @@ -547,6 +640,11 @@ private: return; } + if (!hasNext()) { + // probably "one" + return; + } + // state: // 0 = no tokens seen yet (will accept power, SI prefix, or simple unit) // 1 = power token seen (will not accept another power token) @@ -634,44 +732,22 @@ private: MeasureUnit MeasureUnit::forIdentifier(const char* identifier, UErrorCode& status) { - UnitIdentifierParser parser = UnitIdentifierParser::from(identifier, status); - if (U_FAILURE(status)) { - // Unrecoverable error - return MeasureUnit(); - } - - CharString builder; - while (parser.hasNext()) { - CompoundUnit compoundUnit; - parser.nextCompoundUnit(compoundUnit, status); - if (U_FAILURE(status)) { - // Invalid syntax - return MeasureUnit(); - } - if (builder.length() > 0) { - builder.append('+', status); - } - compoundUnit.appendTo(builder, status); - } - - // Success - return MeasureUnit(builder.cloneData(status)); + return Parser::from(identifier, status).getOnlySequenceUnit(status).build(status); } UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const { const char* id = getIdentifier(); - UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); + Parser parser = Parser::from(id, status); if (U_FAILURE(status)) { - // Unrecoverable error return UMEASURE_UNIT_SINGLE; } CompoundUnit compoundUnit; parser.nextCompoundUnit(compoundUnit, status); - if (compoundUnit.isSingle()) { - return UMEASURE_UNIT_SINGLE; - } else if (parser.hasNext()) { + if (parser.hasNext()) { return UMEASURE_UNIT_SEQUENCE; + } else if (compoundUnit.isSingle()) { + return UMEASURE_UNIT_SINGLE; } else { return UMEASURE_UNIT_COMPOUND; } @@ -679,65 +755,44 @@ UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const { UMeasureSIPrefix MeasureUnit::getSIPrefix(UErrorCode& status) const { const char* id = getIdentifier(); - return UnitIdentifierParser::from(id, status).getOnlySingleUnit(status).siPrefix; + return Parser::from(id, status).getOnlySingleUnit(status).siPrefix; } MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const { const char* id = getIdentifier(); - SingleUnit singleUnit = UnitIdentifierParser::from(id, status).getOnlySingleUnit(status); - if (U_FAILURE(status)) { - return *this; - } - + SingleUnit singleUnit = Parser::from(id, status).getOnlySingleUnit(status); singleUnit.siPrefix = prefix; - CharString builder; - singleUnit.appendTo(builder, status); - return MeasureUnit(builder.cloneData(status)); + return singleUnit.build(status); } int8_t MeasureUnit::getPower(UErrorCode& status) const { const char* id = getIdentifier(); - return UnitIdentifierParser::from(id, status).getOnlySingleUnit(status).power; + return Parser::from(id, status).getOnlySingleUnit(status).power; } MeasureUnit MeasureUnit::withPower(int8_t power, UErrorCode& status) const { const char* id = getIdentifier(); - SingleUnit singleUnit = UnitIdentifierParser::from(id, status).getOnlySingleUnit(status); - if (U_FAILURE(status)) { - return *this; - } - - CharString builder; + SingleUnit singleUnit = Parser::from(id, status).getOnlySingleUnit(status); singleUnit.power = power; - if (power < 0) { - builder.append("one-per-", status); - } - singleUnit.appendTo(builder, status); - return MeasureUnit(builder.cloneData(status)); + return singleUnit.build(status); } MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const { const char* id = getIdentifier(); - CompoundUnit compoundUnit = UnitIdentifierParser::from(id, status).getOnlyCompoundUnit(status); - if (U_FAILURE(status)) { - return *this; - } - - compoundUnit.reciprocal(); - CharString builder; - compoundUnit.appendTo(builder, status); - return MeasureUnit(builder.cloneData(status)); + CompoundUnit compoundUnit = Parser::from(id, status).getOnlyCompoundUnit(status); + compoundUnit.takeReciprocal(); + return compoundUnit.build(status); } MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const { const char* id = getIdentifier(); - CompoundUnit compoundUnit = UnitIdentifierParser::from(id, status).getOnlyCompoundUnit(status); + CompoundUnit compoundUnit = Parser::from(id, status).getOnlyCompoundUnit(status); if (U_FAILURE(status)) { return *this; } // Append other's first CompoundUnit to compoundUnit, then assert other has only one - UnitIdentifierParser otherParser = UnitIdentifierParser::from(other.getIdentifier(), status); + Parser otherParser = Parser::from(other.getIdentifier(), status); otherParser.nextCompoundUnit(compoundUnit, status); if (U_FAILURE(status)) { return *this; @@ -747,16 +802,14 @@ MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) c return *this; } - CharString builder; - compoundUnit.appendTo(builder, status); - return MeasureUnit(builder.cloneData(status)); + return compoundUnit.build(status); } LocalArray MeasureUnit::getSingleUnits(UErrorCode& status) const { const char* id = getIdentifier(); - CompoundUnit compoundUnit = UnitIdentifierParser::from(id, status).getOnlyCompoundUnit(status); + CompoundUnit compoundUnit = Parser::from(id, status).getOnlyCompoundUnit(status); if (U_FAILURE(status)) { - return LocalArray::withLength(nullptr, 0); + return LocalArray(); } const CompoundUnit::SingleUnitList& numerator = compoundUnit.getNumeratorUnits(); @@ -764,16 +817,30 @@ LocalArray MeasureUnit::getSingleUnits(UErrorCode& status) const { int32_t count = numerator.length() + denominator.length(); MeasureUnit* arr = new MeasureUnit[count]; - CharString builder; int32_t i = 0; for (int32_t j = 0; j < numerator.length(); j++) { - numerator[j]->appendTo(builder.clear(), status); - arr[i++] = MeasureUnit(builder.cloneData(status)); + arr[i++] = numerator[j]->build(status); } for (int32_t j = 0; j < denominator.length(); j++) { - builder.clear().append("one-per-", status); - denominator[j]->appendTo(builder, status); - arr[i++] = MeasureUnit(builder.cloneData(status)); + arr[i++] = denominator[j]->build(status); + } + + return LocalArray::withLength(arr, count); +} + +LocalArray MeasureUnit::getCompoundUnits(UErrorCode& status) const { + const char* id = getIdentifier(); + SequenceUnit sequenceUnit = Parser::from(id, status).getOnlySequenceUnit(status); + if (U_FAILURE(status)) { + return LocalArray(); + } + + const SequenceUnit::CompoundUnitList& unitVector = sequenceUnit.getUnits(); + int32_t count = unitVector.length(); + MeasureUnit* arr = new MeasureUnit[count]; + + for (int32_t i = 0; i < count; i++) { + arr[i] = unitVector[i]->build(status); } return LocalArray::withLength(arr, count); diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index b15ca5b0128..06d65c1adf4 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -79,6 +79,7 @@ private: void Test20332_PersonUnits(); void TestNumericTime(); void TestNumericTimeSomeSpecialFormats(); + void TestInvalidIdentifiers(); void TestCompoundUnitOperations(); void verifyFormat( const char *description, @@ -149,6 +150,11 @@ private: const char* identifier, const char** subIdentifiers, int32_t subIdentifierCount); + void verifySequenceUnit( + const MeasureUnit& unit, + const char* identifier, + const char** subIdentifiers, + int32_t subIdentifierCount); }; void MeasureFormatTest::runIndexedTest( @@ -193,6 +199,7 @@ void MeasureFormatTest::runIndexedTest( TESTCASE_AUTO(Test20332_PersonUnits); TESTCASE_AUTO(TestNumericTime); TESTCASE_AUTO(TestNumericTimeSomeSpecialFormats); + TESTCASE_AUTO(TestInvalidIdentifiers); TESTCASE_AUTO(TestCompoundUnitOperations); TESTCASE_AUTO_END; } @@ -3227,6 +3234,34 @@ void MeasureFormatTest::TestNumericTimeSomeSpecialFormats() { verifyFormat("Danish fhoursFminutes", fmtDa, fhoursFminutes, 2, "2.03,877"); } +void MeasureFormatTest::TestInvalidIdentifiers() { + IcuTestErrorCode status(*this, "TestInvalidIdentifiers"); + + const char* const inputs[] = { + "kilo", + "kilokilo", + "onekilo", + "meterkilo", + "meter-kilo", + "k", + "meter-", + "meter+", + "-meter", + "+meter", + "-kilometer", + "+kilometer", + "-p2-meter", + "+p2-meter", + "+", + "-" + }; + + for (const auto& input : inputs) { + MeasureUnit::forIdentifier(input, status); + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + } +} + void MeasureFormatTest::TestCompoundUnitOperations() { IcuTestErrorCode status(*this, "TestCompoundUnitOperations"); @@ -3265,12 +3300,17 @@ void MeasureFormatTest::TestCompoundUnitOperations() { .product(kilometer, status) .product(kilometer, status) .reciprocal(status); + MeasureUnit overQuarticKilometer4 = meter.withPower(4, status) + .reciprocal(status) + .withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); verifySingleUnit(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); verifySingleUnit(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifySingleUnit(overQuarticKilometer4, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); + assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer4); MeasureUnit kiloSquareSecond = MeasureUnit::getSecond() .withPower(2, status).withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); @@ -3314,6 +3354,43 @@ void MeasureFormatTest::TestCompoundUnitOperations() { status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); meterSecond.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + + MeasureUnit footInch = MeasureUnit::forIdentifier("foot+inch", status); + MeasureUnit inchFoot = MeasureUnit::forIdentifier("inch+foot", status); + + const char* footInchSub[] = {"foot", "inch"}; + verifySequenceUnit(footInch, "foot+inch", + footInchSub, UPRV_LENGTHOF(footInchSub)); + const char* inchFootSub[] = {"inch", "foot"}; + verifySequenceUnit(inchFoot, "inch+foot", + inchFootSub, UPRV_LENGTHOF(inchFootSub)); + + assertTrue("order matters inequality", footInch != inchFoot); + + // TODO(ICU-20920): Enable the one1 tests when the dimensionless base unit ID is updated + // MeasureUnit one1; + MeasureUnit one2 = MeasureUnit::forIdentifier("one", status); + MeasureUnit one3 = MeasureUnit::forIdentifier("", status); + MeasureUnit squareOne = one2.withPower(2, status); + MeasureUnit onePerOne = one2.reciprocal(status); + MeasureUnit squareKiloOne = squareOne.withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); + MeasureUnit onePerSquareKiloOne = squareKiloOne.reciprocal(status); + MeasureUnit oneOne = MeasureUnit::forIdentifier("one-one", status); + MeasureUnit onePlusOne = MeasureUnit::forIdentifier("one+one", status); + + // verifySingleUnit(one1, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(one2, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(one3, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(squareOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(onePerOne, UMEASURE_SI_PREFIX_ONE, -1, "one-per-one"); + verifySingleUnit(squareKiloOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(onePerSquareKiloOne, UMEASURE_SI_PREFIX_ONE, -1, "one-per-one"); + verifySingleUnit(oneOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(onePlusOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); + + // assertTrue("one equality", one1 == one2); + assertTrue("one equality", one2 == one3); + assertTrue("one-per-one equality", onePerOne == onePerSquareKiloOne); } @@ -3447,6 +3524,35 @@ void MeasureFormatTest::verifyCompoundUnit( } } +void MeasureFormatTest::verifySequenceUnit( + const MeasureUnit& unit, + const char* identifier, + const char** subIdentifiers, + int32_t subIdentifierCount) { + IcuTestErrorCode status(*this, "verifySequenceUnit"); + UnicodeString uid(identifier, -1, US_INV); + assertEquals(uid + ": Identifier", + identifier, + unit.getIdentifier()); + status.errIfFailureAndReset("%s: Identifier", identifier); + assertTrue(uid + ": Constructor", + unit == MeasureUnit::forIdentifier(identifier, status)); + status.errIfFailureAndReset("%s: Constructor", identifier); + assertEquals(uid + ": Complexity", + UMEASURE_UNIT_SEQUENCE, + unit.getComplexity(status)); + status.errIfFailureAndReset("%s: Complexity", identifier); + + LocalArray subUnits = unit.getCompoundUnits(status); + assertEquals(uid + ": Length", subIdentifierCount, subUnits.length()); + for (int32_t i = 0;; i++) { + if (i >= subIdentifierCount || i >= subUnits.length()) break; + assertEquals(uid + ": Sub-unit #" + Int64ToUnicodeString(i), + subIdentifiers[i], + subUnits[i].getIdentifier()); + } +} + extern IntlTest *createMeasureFormatTest() { return new MeasureFormatTest(); } From 2ea62fea164a23833b0a0d37d90464c0be67a445 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 17 Jan 2020 17:13:16 +0100 Subject: [PATCH 12/13] Change public method to use StringPiece --- icu4c/source/i18n/measunit_extra.cpp | 2 +- icu4c/source/i18n/unicode/measunit.h | 2 +- icu4c/source/test/intltest/measfmttest.cpp | 5 +++++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index 0329c68f13c..a7f24f1e348 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -731,7 +731,7 @@ private: } // namespace -MeasureUnit MeasureUnit::forIdentifier(const char* identifier, UErrorCode& status) { +MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode& status) { return Parser::from(identifier, status).getOnlySequenceUnit(status).build(status); } diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index cdf760fdbdb..4ec0f62684f 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -265,7 +265,7 @@ class U_I18N_API MeasureUnit: public UObject { * @param status Set if the identifier is invalid. * @draft ICU 67 */ - static MeasureUnit forIdentifier(const char* identifier, UErrorCode& status); + static MeasureUnit forIdentifier(StringPiece identifier, UErrorCode& status); /** * Copy assignment operator. diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 06d65c1adf4..4f65bc619fd 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -3355,6 +3355,11 @@ void MeasureFormatTest::TestCompoundUnitOperations() { meterSecond.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + // Test that StringPiece does not overflow + MeasureUnit kiloSquareSecond2 = MeasureUnit::forIdentifier({secondCentimeter.getIdentifier(), 17}, status); + verifySingleUnit(kiloSquareSecond2, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); + assertTrue("string piece equality", kiloSquareSecond == kiloSquareSecond2); + MeasureUnit footInch = MeasureUnit::forIdentifier("foot+inch", status); MeasureUnit inchFoot = MeasureUnit::forIdentifier("inch+foot", status); From 778565e94ec01e3e49459dedf273bca600420774 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 17 Jan 2020 17:32:52 +0100 Subject: [PATCH 13/13] Move measunit_extra.o to avoid merge conflict --- icu4c/source/i18n/Makefile.in | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/icu4c/source/i18n/Makefile.in b/icu4c/source/i18n/Makefile.in index f6f9860fb3a..8f197d89b1a 100644 --- a/icu4c/source/i18n/Makefile.in +++ b/icu4c/source/i18n/Makefile.in @@ -97,7 +97,7 @@ uspoof.o uspoof_impl.o uspoof_build.o uspoof_conf.o smpdtfst.o \ ztrans.o zrule.o vzone.o fphdlimp.o fpositer.o ufieldpositer.o \ decNumber.o decContext.o alphaindex.o tznames.o tznames_impl.o tzgnames.o \ tzfmt.o compactdecimalformat.o gender.o region.o scriptset.o \ -uregion.o reldatefmt.o quantityformatter.o measunit.o \ +uregion.o reldatefmt.o quantityformatter.o measunit.o measunit_extra.o \ sharedbreakiterator.o scientificnumberformatter.o dayperiodrules.o nounit.o \ number_affixutils.o number_compact.o number_decimalquantity.o \ number_decimfmtprops.o number_fluent.o number_formatimpl.o number_grouping.o \ @@ -114,8 +114,7 @@ numparse_symbols.o numparse_decimal.o numparse_scientific.o numparse_currency.o numparse_affixes.o numparse_compositions.o numparse_validators.o \ numrange_fluent.o numrange_impl.o \ erarules.o \ -formattedvalue.o formattedval_iterimpl.o formattedval_sbimpl.o formatted_string_builder.o \ -measunit_extra.o +formattedvalue.o formattedval_iterimpl.o formattedval_sbimpl.o formatted_string_builder.o ## Header files to install HEADERS = $(srcdir)/unicode/*.h