From e03fa7054113dc13d7a5bd1f70b6cc3e9bf33a64 Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe <17109322+hugovdm@users.noreply.github.com> Date: Sat, 18 Apr 2020 01:24:20 +0200 Subject: [PATCH] ICU-21060 Fix behaviour of -per-, -and-, and dimensionless units. --- icu4c/source/i18n/measunit.cpp | 6 +- icu4c/source/i18n/measunit_extra.cpp | 235 +++++++++++++++------ icu4c/source/i18n/measunit_impl.h | 65 +++++- icu4c/source/i18n/nounit.cpp | 2 +- icu4c/source/i18n/unicode/measunit.h | 22 +- icu4c/source/test/intltest/measfmttest.cpp | 196 +++++++++++------ 6 files changed, 375 insertions(+), 151 deletions(-) diff --git a/icu4c/source/i18n/measunit.cpp b/icu4c/source/i18n/measunit.cpp index 344ba45fb50..4edf130b7e9 100644 --- a/icu4c/source/i18n/measunit.cpp +++ b/icu4c/source/i18n/measunit.cpp @@ -537,9 +537,9 @@ static const char * const gSubTypes[] = { "solar-mass", "stone", "ton", - "one", - "percent", - "permille", + "", // TODO(ICU-21076): manual edit of what should have been generated by Java. + "percent", // TODO(ICU-21076): regenerate, deal with duplication. + "permille", // TODO(ICU-21076): regenerate, deal with duplication. "gigawatt", "horsepower", "kilowatt", diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index e43ee6597d8..ebc4ac33322 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -34,17 +34,32 @@ namespace { // TODO: Propose a new error code for this? constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR; -// This is to ensure we only insert positive integers into the trie +// Trie value offset for SI Prefixes. This is big enough to ensure we only +// insert positive integers into the trie. constexpr int32_t kSIPrefixOffset = 64; +// Trie value offset for compound parts, e.g. "-per-", "-", "-and-". constexpr int32_t kCompoundPartOffset = 128; enum CompoundPart { + // Represents "-per-" COMPOUND_PART_PER = kCompoundPartOffset, + // Represents "-" COMPOUND_PART_TIMES, + // Represents "-and-" COMPOUND_PART_AND, }; +// Trie value offset for "per-". +constexpr int32_t kInitialCompoundPartOffset = 192; + +enum InitialCompoundPart { + // Represents "per-", the only compound part that can appear at the start of + // an identifier. + INITIAL_COMPOUND_PART_PER = kInitialCompoundPartOffset, +}; + +// Trie value offset for powers like "square-", "cubic-", "p2-" etc. constexpr int32_t kPowerPartOffset = 256; enum PowerPart { @@ -64,6 +79,8 @@ enum PowerPart { POWER_PART_P15, }; +// Trie value offset for simple units, e.g. "gram", "nautical-mile", +// "fluid-ounce-imperial". constexpr int32_t kSimpleUnitOffset = 512; const struct SIPrefixStrings { @@ -94,7 +111,6 @@ const struct SIPrefixStrings { // TODO(ICU-21059): Get this list from data const char16_t* const gSimpleUnits[] = { - u"one", // note: expected to be index 0 u"candela", u"carat", u"gram", @@ -227,6 +243,7 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) { b.add(u"-per-", COMPOUND_PART_PER, status); b.add(u"-", COMPOUND_PART_TIMES, status); b.add(u"-and-", COMPOUND_PART_AND, status); + b.add(u"per-", INITIAL_COMPOUND_PART_PER, status); b.add(u"square-", POWER_PART_P2, status); b.add(u"cubic-", POWER_PART_P3, status); b.add(u"p2-", POWER_PART_P2, status); @@ -270,28 +287,30 @@ public: enum Type { TYPE_UNDEFINED, TYPE_SI_PREFIX, + // Token type for "-per-", "-", and "-and-". TYPE_COMPOUND_PART, + // Token type for "per-". + TYPE_INITIAL_COMPOUND_PART, TYPE_POWER_PART, - TYPE_ONE, TYPE_SIMPLE_UNIT, }; + // Calling getType() is invalid, resulting in an assertion failure, if Token + // value isn't positive. Type getType() const { - if (fMatch <= 0) { - UPRV_UNREACHABLE; - } + U_ASSERT(fMatch > 0); if (fMatch < kCompoundPartOffset) { return TYPE_SI_PREFIX; } - if (fMatch < kPowerPartOffset) { + if (fMatch < kInitialCompoundPartOffset) { return TYPE_COMPOUND_PART; } + if (fMatch < kPowerPartOffset) { + return TYPE_INITIAL_COMPOUND_PART; + } if (fMatch < kSimpleUnitOffset) { return TYPE_POWER_PART; } - if (fMatch == kSimpleUnitOffset) { - return TYPE_ONE; - } return TYPE_SIMPLE_UNIT; } @@ -300,11 +319,22 @@ public: return static_cast(fMatch - kSIPrefixOffset); } + // Valid only for tokens with type TYPE_COMPOUND_PART. int32_t getMatch() const { U_ASSERT(getType() == TYPE_COMPOUND_PART); return fMatch; } + int32_t getInitialCompoundPart() const { + // Even if there is only one InitialCompoundPart value, we have this + // function for the simplicity of code consistency. + U_ASSERT(getType() == TYPE_INITIAL_COMPOUND_PART); + // Defensive: if this assert fails, code using this function also needs + // to change. + U_ASSERT(fMatch == INITIAL_COMPOUND_PART_PER); + return fMatch; + } + int8_t getPower() const { U_ASSERT(getType() == TYPE_POWER_PART); return static_cast(fMatch - kPowerPartOffset); @@ -347,6 +377,7 @@ public: } private: + // Tracks parser progress: the offset into fSource. int32_t fIndex = 0; // Since we're not owning this memory, whatever is passed to the constructor @@ -355,6 +386,9 @@ private: StringPiece fSource; UCharsTrie fTrie; + // Set to true when we've seen a "-per-" or a "per-", after which all units + // are in the denominator. Until we find an "-and-", at which point the + // identifier is invalid pending TODO(CLDR-13700). bool fAfterPer = false; Parser() : fSource(""), fTrie(u"") {} @@ -366,11 +400,17 @@ private: return fIndex < fSource.length(); } + // Returns the next Token parsed from fSource, advancing fIndex to the end + // of that token in fSource. In case of U_FAILURE(status), the token + // returned will cause an abort if getType() is called on it. Token nextToken(UErrorCode& status) { fTrie.reset(); int32_t match = -1; + // Saves the position in the fSource string for the end of the most + // recent matching token. int32_t previ = -1; - do { + // Find the longest token that matches a value in the trie: + while (fIndex < fSource.length()) { auto result = fTrie.next(fSource.data()[fIndex++]); if (result == USTRINGTRIE_NO_MATCH) { break; @@ -385,7 +425,7 @@ private: } U_ASSERT(result == USTRINGTRIE_INTERMEDIATE_VALUE); // continue; - } while (fIndex < fSource.length()); + } if (match < 0) { status = kUnitIdentifierSyntaxError; @@ -395,63 +435,88 @@ private: return Token(match); } + /** + * Returns the next "single unit" via result. + * + * If a "-per-" was parsed, the result will have appropriate negative + * dimensionality. + * + * Returns an error if we parse both compound units and "-and-", since mixed + * compound units are not yet supported - TODO(CLDR-13700). + * + * @param result Will be overwritten by the result, if status shows success. + * @param sawAnd If an "-and-" was parsed prior to finding the "single + * unit", sawAnd is set to true. If not, it is left as is. + * @param status ICU error code. + */ void nextSingleUnit(SingleUnitImpl& result, bool& sawAnd, UErrorCode& status) { - sawAnd = false; if (U_FAILURE(status)) { return; } - if (!hasNext()) { - // probably "one" - return; - } - // state: // 0 = no tokens seen yet (will accept power, SI prefix, or simple unit) // 1 = power token seen (will not accept another power token) // 2 = SI prefix token seen (will not accept a power or SI prefix token) int32_t state = 0; - // Maybe read a compound part - if (fIndex != 0) { - Token token = nextToken(status); - if (U_FAILURE(status)) { - return; + bool atStart = fIndex == 0; + Token token = nextToken(status); + if (U_FAILURE(status)) { return; } + + if (atStart) { + // Identifiers optionally start with "per-". + if (token.getType() == Token::TYPE_INITIAL_COMPOUND_PART) { + U_ASSERT(token.getInitialCompoundPart() == INITIAL_COMPOUND_PART_PER); + fAfterPer = true; + result.dimensionality = -1; + + token = nextToken(status); + if (U_FAILURE(status)) { return; } } + } else { + // All other SingleUnit's are separated from previous SingleUnit's + // via a compound part: if (token.getType() != Token::TYPE_COMPOUND_PART) { status = kUnitIdentifierSyntaxError; return; } + switch (token.getMatch()) { - case COMPOUND_PART_PER: - if (fAfterPer) { - status = kUnitIdentifierSyntaxError; - return; - } - fAfterPer = true; + case COMPOUND_PART_PER: + if (sawAnd) { + // Mixed compound units not yet supported, + // TODO(CLDR-13700). + status = kUnitIdentifierSyntaxError; + return; + } + fAfterPer = true; + result.dimensionality = -1; + break; + + case COMPOUND_PART_TIMES: + if (fAfterPer) { result.dimensionality = -1; - break; + } + break; - case COMPOUND_PART_TIMES: - if (fAfterPer) { - result.dimensionality = -1; - } - break; - - case COMPOUND_PART_AND: - sawAnd = true; - fAfterPer = false; - break; + case COMPOUND_PART_AND: + if (fAfterPer) { + // Can't start with "-and-", and mixed compound units + // not yet supported, TODO(CLDR-13700). + status = kUnitIdentifierSyntaxError; + return; + } + sawAnd = true; + break; } + + token = nextToken(status); + if (U_FAILURE(status)) { return; } } - // Read a unit - while (hasNext()) { - Token token = nextToken(status); - if (U_FAILURE(status)) { - return; - } - + // Read tokens until we have a complete SingleUnit or we reach the end. + while (true) { switch (token.getType()) { case Token::TYPE_POWER_PART: if (state > 0) { @@ -471,10 +536,6 @@ private: state = 2; break; - case Token::TYPE_ONE: - // Skip "one" and go to the next unit - return nextSingleUnit(result, sawAnd, status); - case Token::TYPE_SIMPLE_UNIT: result.index = token.getSimpleUnitIndex(); return; @@ -483,27 +544,38 @@ private: status = kUnitIdentifierSyntaxError; return; } - } - // We ran out of tokens before finding a complete single unit. - status = kUnitIdentifierSyntaxError; + if (!hasNext()) { + // We ran out of tokens before finding a complete single unit. + status = kUnitIdentifierSyntaxError; + return; + } + token = nextToken(status); + if (U_FAILURE(status)) { + return; + } + } } + /// @param result is modified, not overridden. Caller must pass in a + /// default-constructed (empty) MeasureUnitImpl instance. void parseImpl(MeasureUnitImpl& result, UErrorCode& status) { if (U_FAILURE(status)) { return; } + if (fSource.empty()) { + // The dimenionless unit: nothing to parse. leave result as is. + return; + } int32_t unitNum = 0; while (hasNext()) { - bool sawAnd; + bool sawAnd = false; SingleUnitImpl singleUnit; nextSingleUnit(singleUnit, sawAnd, status); if (U_FAILURE(status)) { return; } - if (singleUnit.index == 0) { - continue; - } + U_ASSERT(!singleUnit.isDimensionless()); bool added = result.append(singleUnit, status); if (sawAnd && !added) { // Two similar units are not allowed in a mixed unit @@ -511,9 +583,12 @@ private: return; } if ((++unitNum) >= 2) { - UMeasureUnitComplexity complexity = sawAnd - ? UMEASURE_UNIT_MIXED - : UMEASURE_UNIT_COMPOUND; + // nextSingleUnit fails appropriately for "per" and "and" in the + // same identifier. It doesn't fail for other compound units + // (COMPOUND_PART_TIMES). Consequently we take care of that + // here. + UMeasureUnitComplexity complexity = + sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND; if (unitNum == 2) { U_ASSERT(result.complexity == UMEASURE_UNIT_SINGLE); result.complexity = complexity; @@ -536,15 +611,22 @@ compareSingleUnits(const void* /*context*/, const void* left, const void* right) /** * Generate the identifier string for a single unit in place. + * + * Does not support the dimensionless SingleUnitImpl: calling serializeSingle + * with the dimensionless unit results in an U_INTERNAL_PROGRAM_ERROR. + * + * @param first If singleUnit is part of a compound unit, and not its first + * single unit, set this to false. Otherwise: set to true. */ void serializeSingle(const SingleUnitImpl& singleUnit, bool first, CharString& output, UErrorCode& status) { if (first && singleUnit.dimensionality < 0) { - output.append("one-per-", status); + // Essentially the "unary per". For compound units with a numerator, the + // caller takes care of the "binary per". + output.append("per-", status); } - if (singleUnit.index == 0) { - // Don't propagate SI prefixes and powers on one - output.append("one", status); + if (singleUnit.isDimensionless()) { + status = U_INTERNAL_PROGRAM_ERROR; return; } int8_t posPower = std::abs(singleUnit.dimensionality); @@ -595,7 +677,8 @@ void serialize(MeasureUnitImpl& impl, UErrorCode& status) { } U_ASSERT(impl.identifier.isEmpty()); if (impl.units.length() == 0) { - impl.identifier.append("one", status); + // Dimensionless, constructed by the default constructor: no appending + // to impl.identifier, we wish it to contain the zero-length string. return; } if (impl.complexity == UMEASURE_UNIT_COMPOUND) { @@ -634,8 +717,17 @@ void serialize(MeasureUnitImpl& impl, UErrorCode& status) { } -/** @return true if a new item was added */ +/** + * Appends a SingleUnitImpl to a MeasureUnitImpl. + * + * @return true if a new item was added. If unit is the dimensionless unit, it + * is never added: the return value will always be false. + */ bool appendImpl(MeasureUnitImpl& impl, const SingleUnitImpl& unit, UErrorCode& status) { + if (unit.isDimensionless()) { + // We don't append dimensionless units. + return false; + } // Find a similar unit that already exists, to attempt to coalesce SingleUnitImpl* oldUnit = nullptr; for (int32_t i = 0; i < impl.units.length(); i++) { @@ -645,6 +737,8 @@ bool appendImpl(MeasureUnitImpl& impl, const SingleUnitImpl& unit, UErrorCode& s } } if (oldUnit) { + // Both dimensionalities will be positive, or both will be negative, by + // virtue of isCompatibleWith(). oldUnit->dimensionality += unit.dimensionality; } else { SingleUnitImpl* destination = impl.units.emplaceBack(); @@ -744,7 +838,12 @@ MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& statu } int32_t MeasureUnit::getDimensionality(UErrorCode& status) const { - return SingleUnitImpl::forMeasureUnit(*this, status).dimensionality; + SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status); + if (U_FAILURE(status)) { return 0; } + if (singleUnit.isDimensionless()) { + return 0; + } + return singleUnit.dimensionality; } MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const { diff --git a/icu4c/source/i18n/measunit_impl.h b/icu4c/source/i18n/measunit_impl.h index 9657ff2c1d9..c69d243b3b8 100644 --- a/icu4c/source/i18n/measunit_impl.h +++ b/icu4c/source/i18n/measunit_impl.h @@ -25,14 +25,25 @@ static const char kDefaultCurrency8[] = "XXX"; struct SingleUnitImpl : public UMemory { /** * Gets a single unit from the MeasureUnit. If there are multiple single units, sets an error - * code and return the base dimensionless unit. Parses if necessary. + * code and returns the base dimensionless unit. Parses if necessary. */ static SingleUnitImpl forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status); /** Transform this SingleUnitImpl into a MeasureUnit, simplifying if possible. */ MeasureUnit build(UErrorCode& status) const; - /** Compare this SingleUnitImpl to another SingleUnitImpl. */ + /** + * Compare this SingleUnitImpl to another SingleUnitImpl for the sake of + * sorting and coalescing. + * + * Takes the sign of dimensionality into account, but not the absolute + * value: per-meter is not considered the same as meter, but meter is + * considered the same as square-meter. + * + * The dimensionless unit generally does not get compared, but if it did, it + * would sort before other units by virtue of index being < 0 and + * dimensionality not being negative. + */ int32_t compareTo(const SingleUnitImpl& other) const { if (dimensionality < 0 && other.dimensionality > 0) { // Positive dimensions first @@ -66,13 +77,36 @@ struct SingleUnitImpl : public UMemory { return (compareTo(other) == 0); } - /** Simple unit index, unique for every simple unit. */ - int32_t index = 0; + /** + * Returns true if this unit is the "dimensionless base unit", as produced + * by the MeasureUnit() default constructor. (This does not include the + * likes of concentrations or angles.) + */ + bool isDimensionless() const { + return index == -1; + } - /** SI prefix. **/ + /** + * Simple unit index, unique for every simple unit, -1 for the dimensionless + * unit. This is an index into a string list in measunit_extra.cpp. + * + * The default value is -1, meaning the dimensionless unit: + * isDimensionless() will return true, until index is changed. + */ + int32_t index = -1; + + /** + * SI prefix. + * + * This is ignored for the dimensionless unit. + */ UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; - - /** Dimensionality. **/ + + /** + * Dimensionality. + * + * This is meaningless for the dimensionless unit. + */ int32_t dimensionality = 1; }; @@ -92,7 +126,8 @@ struct MeasureUnitImpl : public UMemory { * * @param identifier The unit identifier string. * @param status Set if the identifier string is not valid. - * @return A newly parsed value object. + * @return A newly parsed value object. Behaviour of this unit is + * unspecified if an error is returned via status. */ static MeasureUnitImpl forIdentifier(StringPiece identifier, UErrorCode& status); @@ -145,15 +180,23 @@ struct MeasureUnitImpl : public UMemory { /** Mutates this MeasureUnitImpl to take the reciprocal. */ void takeReciprocal(UErrorCode& status); - /** Mutates this MeasureUnitImpl to append a single unit. */ + /** + * Mutates this MeasureUnitImpl to append a single unit. + * + * @return true if a new item was added. If unit is the dimensionless unit, + * it is never added: the return value will always be false. + */ bool append(const SingleUnitImpl& singleUnit, UErrorCode& status); /** The complexity, either SINGLE, COMPOUND, or MIXED. */ UMeasureUnitComplexity complexity = UMEASURE_UNIT_SINGLE; /** - * The list of simple units. These may be summed or multiplied, based on the value of the - * complexity field. + * The list of simple units. These may be summed or multiplied, based on the + * value of the complexity field. + * + * The "dimensionless" unit (SingleUnitImpl default constructor) must not be + * added to this list. */ MaybeStackVector units; diff --git a/icu4c/source/i18n/nounit.cpp b/icu4c/source/i18n/nounit.cpp index b993cb56adb..1d4aa05506e 100644 --- a/icu4c/source/i18n/nounit.cpp +++ b/icu4c/source/i18n/nounit.cpp @@ -11,7 +11,7 @@ U_NAMESPACE_BEGIN UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NoUnit) NoUnit U_EXPORT2 NoUnit::base() { - return NoUnit("one"); + return NoUnit(""); } NoUnit U_EXPORT2 NoUnit::percent() { diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index d221fd88390..e240092e305 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -37,7 +37,7 @@ struct MeasureUnitImpl; * Enumeration for unit complexity. There are three levels: * * - SINGLE: A single unit, optionally with a power and/or SI prefix. Examples: hectare, - * square-kilometer, kilojoule, one-per-second. + * square-kilometer, kilojoule, per-second. * - COMPOUND: A unit composed of the product of multiple single units. Examples: * meter-per-second, kilowatt-hour, kilogram-meter-per-square-second. * - MIXED: A unit composed of the sum of multiple single units. Examples: foot+inch, @@ -387,6 +387,8 @@ class U_I18N_API MeasureUnit: public UObject { * NOTE: Only works on SINGLE units. If this is a COMPOUND or MIXED unit, an error will * occur. For more information, see UMeasureUnitComplexity. * + * For the base dimensionless unit, withDimensionality does nothing. + * * @param dimensionality The dimensionality (power). * @param status Set if this is not a SINGLE unit or if another error occurs. * @return A new SINGLE unit. @@ -401,6 +403,8 @@ class U_I18N_API MeasureUnit: public UObject { * NOTE: Only works on SINGLE units. If this is a COMPOUND or MIXED unit, an error will * occur. For more information, see UMeasureUnitComplexity. * + * For the base dimensionless unit, getDimensionality returns 0. + * * @param status Set if this is not a SINGLE unit or if another error occurs. * @return The dimensionality (power) of this simple unit. * @draft ICU 67 @@ -447,7 +451,7 @@ class U_I18N_API MeasureUnit: public UObject { * * Examples: * - Given "meter-kilogram-per-second", three units will be returned: "meter", - * "kilogram", and "one-per-second". + * "kilogram", and "per-second". * - Given "hour+minute+second", three units will be returned: "hour", "minute", * and "second". * @@ -3375,11 +3379,15 @@ class U_I18N_API MeasureUnit: public UObject { private: - // If non-null, fImpl is owned by the MeasureUnit. + // Used by new draft APIs in ICU 67. If non-null, fImpl is owned by the + // MeasureUnit. MeasureUnitImpl* fImpl; - // These two ints are indices into static string lists in measunit.cpp + // An index into a static string list in measunit.cpp. If set to -1, fImpl + // is in use instead of fTypeId and fSubTypeId. int16_t fSubTypeId; + // An index into a static string list in measunit.cpp. If set to -1, fImpl + // is in use instead of fTypeId and fSubTypeId. int8_t fTypeId; MeasureUnit(int32_t typeId, int32_t subTypeId); @@ -3389,7 +3397,11 @@ private: static MeasureUnit *create(int typeId, int subTypeId, UErrorCode &status); /** - * @return Whether subType is known to ICU. + * Sets output's typeId and subTypeId according to subType, if subType is a + * valid/known identifier. + * + * @return Whether subType is known to ICU. If false, output was not + * modified. */ static bool findBySubType(StringPiece subType, MeasureUnit* output); diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 79eb9b461a0..51a85fef85f 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -79,9 +79,10 @@ private: void Test20332_PersonUnits(); void TestNumericTime(); void TestNumericTimeSomeSpecialFormats(); + void TestIdentifiers(); void TestInvalidIdentifiers(); void TestCompoundUnitOperations(); - void TestIdentifiers(); + void TestDimensionlessBehaviour(); void Test21060_AddressSanitizerProblem(); void verifyFormat( @@ -202,9 +203,10 @@ void MeasureFormatTest::runIndexedTest( TESTCASE_AUTO(Test20332_PersonUnits); TESTCASE_AUTO(TestNumericTime); TESTCASE_AUTO(TestNumericTimeSomeSpecialFormats); + TESTCASE_AUTO(TestIdentifiers); TESTCASE_AUTO(TestInvalidIdentifiers); TESTCASE_AUTO(TestCompoundUnitOperations); - TESTCASE_AUTO(TestIdentifiers); + TESTCASE_AUTO(TestDimensionlessBehaviour); TESTCASE_AUTO(Test21060_AddressSanitizerProblem); TESTCASE_AUTO_END; } @@ -3239,10 +3241,43 @@ void MeasureFormatTest::TestNumericTimeSomeSpecialFormats() { verifyFormat("Danish fhoursFminutes", fmtDa, fhoursFminutes, 2, "2.03,877"); } +void MeasureFormatTest::TestIdentifiers() { + IcuTestErrorCode status(*this, "TestIdentifiers"); + struct TestCase { + const char* id; + const char* normalized; + } cases[] = { + // Correctly normalized identifiers should not change + {"", ""}, + {"square-meter-per-square-meter", "square-meter-per-square-meter"}, + {"kilogram-meter-per-square-meter-square-second", + "kilogram-meter-per-square-meter-square-second"}, + {"square-mile-and-square-foot", "square-mile-and-square-foot"}, + {"square-foot-and-square-mile", "square-foot-and-square-mile"}, + {"per-cubic-centimeter", "per-cubic-centimeter"}, + {"per-kilometer", "per-kilometer"}, + + // Normalization of power and per + {"p2-foot-and-p2-mile", "square-foot-and-square-mile"}, + {"gram-square-gram-per-dekagram", "cubic-gram-per-dekagram"}, + {"kilogram-per-meter-per-second", "kilogram-per-meter-second"}, + + // TODO(ICU-20920): Add more test cases once the proper ranking is available. + }; + for (const auto &cas : cases) { + status.setScope(cas.id); + MeasureUnit unit = MeasureUnit::forIdentifier(cas.id, status); + status.errIfFailureAndReset(); + const char* actual = unit.getIdentifier(); + assertEquals(cas.id, cas.normalized, actual); + status.errIfFailureAndReset(); + } +} + void MeasureFormatTest::TestInvalidIdentifiers() { IcuTestErrorCode status(*this, "TestInvalidIdentifiers"); - const char* const inputs[] = { + const char *const inputs[] = { "kilo", "kilokilo", "onekilo", @@ -3258,7 +3293,23 @@ void MeasureFormatTest::TestInvalidIdentifiers() { "-p2-meter", "+p2-meter", "+", - "-" + "-", + "-mile", + "-and-mile", + "-per-mile", + "one", + "one-one", + "one-per-mile", + "one-per-cubic-centimeter", + "square--per-meter", + "metersecond", // Must have compound part in between single units + + // Negative powers not supported in mixed units yet. TODO(CLDR-13701). + "per-hour-and-hertz", + "hertz-and-per-hour", + + // Compound units not supported in mixed units yet. TODO(CLDR-13700). + "kilonewton-meter-and-newton-meter", }; for (const auto& input : inputs) { @@ -3295,9 +3346,9 @@ void MeasureFormatTest::TestCompoundUnitOperations() { MeasureUnit overQuarticKilometer1 = kilometer.withDimensionality(-4, status); verifySingleUnit(squareMeter, UMEASURE_SI_PREFIX_ONE, 2, "square-meter"); - verifySingleUnit(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "one-per-cubic-centimeter"); + verifySingleUnit(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "per-cubic-centimeter"); verifySingleUnit(quarticKilometer, UMEASURE_SI_PREFIX_KILO, 4, "p4-kilometer"); - verifySingleUnit(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifySingleUnit(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "per-p4-kilometer"); assertTrue("power inequality", quarticKilometer != overQuarticKilometer1); @@ -3310,9 +3361,9 @@ void MeasureFormatTest::TestCompoundUnitOperations() { .reciprocal(status) .withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); - verifySingleUnit(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); - verifySingleUnit(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); - verifySingleUnit(overQuarticKilometer4, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifySingleUnit(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "per-p4-kilometer"); + verifySingleUnit(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "per-p4-kilometer"); + verifySingleUnit(overQuarticKilometer4, UMEASURE_SI_PREFIX_KILO, -4, "per-p4-kilometer"); assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); @@ -3343,7 +3394,7 @@ void MeasureFormatTest::TestCompoundUnitOperations() { const char* secondCentimeterSub[] = {"centimeter", "square-kilosecond"}; verifyCompoundUnit(secondCentimeter, "centimeter-square-kilosecond", secondCentimeterSub, UPRV_LENGTHOF(secondCentimeterSub)); - const char* secondCentimeterPerKilometerSub[] = {"centimeter", "square-kilosecond", "one-per-kilometer"}; + const char* secondCentimeterPerKilometerSub[] = {"centimeter", "square-kilosecond", "per-kilometer"}; verifyCompoundUnit(secondCentimeterPerKilometer, "centimeter-square-kilosecond-per-kilometer", secondCentimeterPerKilometerSub, UPRV_LENGTHOF(secondCentimeterPerKilometerSub)); @@ -3378,31 +3429,16 @@ void MeasureFormatTest::TestCompoundUnitOperations() { assertTrue("order matters inequality", footInch != inchFoot); - MeasureUnit one1; - MeasureUnit one2 = MeasureUnit::forIdentifier("one", status); - MeasureUnit one3 = MeasureUnit::forIdentifier("", status); - MeasureUnit squareOne = one2.withDimensionality(2, status); - MeasureUnit onePerOne = one2.reciprocal(status); - MeasureUnit squareKiloOne = squareOne.withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); - MeasureUnit onePerSquareKiloOne = squareKiloOne.reciprocal(status); - MeasureUnit oneOne = MeasureUnit::forIdentifier("one-one", status); - MeasureUnit onePlusOne = MeasureUnit::forIdentifier("one-and-one", status); - MeasureUnit kilometer2 = one2.product(kilometer, status); + MeasureUnit dimensionless; + MeasureUnit dimensionless2 = MeasureUnit::forIdentifier("", status); + status.errIfFailureAndReset("Dimensionless MeasureUnit."); + assertTrue("dimensionless equality", dimensionless == dimensionless2); - verifySingleUnit(one1, UMEASURE_SI_PREFIX_ONE, 1, "one"); - verifySingleUnit(one2, UMEASURE_SI_PREFIX_ONE, 1, "one"); - verifySingleUnit(one3, UMEASURE_SI_PREFIX_ONE, 1, "one"); - verifySingleUnit(squareOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); - verifySingleUnit(onePerOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); - verifySingleUnit(squareKiloOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); - verifySingleUnit(onePerSquareKiloOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); - verifySingleUnit(oneOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); - verifySingleUnit(onePlusOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); + // We support starting from an "identity" MeasureUnit and then combining it + // with others via product: + MeasureUnit kilometer2 = dimensionless.product(kilometer, status); + status.errIfFailureAndReset("dimensionless.product(kilometer, status)"); verifySingleUnit(kilometer2, UMEASURE_SI_PREFIX_KILO, 1, "kilometer"); - - assertTrue("one equality", one1 == one2); - assertTrue("one equality", one2 == one3); - assertTrue("one-per-one equality", onePerOne == onePerSquareKiloOne); assertTrue("kilometer equality", kilometer == kilometer2); // Test out-of-range powers @@ -3413,49 +3449,81 @@ void MeasureFormatTest::TestCompoundUnitOperations() { status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); MeasureUnit power16b = power15.product(kilometer, status); status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); - MeasureUnit powerN15 = MeasureUnit::forIdentifier("one-per-p15-kilometer", status); - verifySingleUnit(powerN15, UMEASURE_SI_PREFIX_KILO, -15, "one-per-p15-kilometer"); + MeasureUnit powerN15 = MeasureUnit::forIdentifier("per-p15-kilometer", status); + verifySingleUnit(powerN15, UMEASURE_SI_PREFIX_KILO, -15, "per-p15-kilometer"); status.errIfFailureAndReset(); - MeasureUnit powerN16a = MeasureUnit::forIdentifier("one-per-p16-kilometer", status); + MeasureUnit powerN16a = MeasureUnit::forIdentifier("per-p16-kilometer", status); status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); MeasureUnit powerN16b = powerN15.product(overQuarticKilometer1, status); status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); } -void MeasureFormatTest::TestIdentifiers() { - IcuTestErrorCode status(*this, "TestIdentifiers"); - struct TestCase { - bool valid; - const char* id; - const char* normalized; - } cases[] = { - {true, "square-meter-per-square-meter", "square-meter-per-square-meter"}, - {true, "kilogram-meter-per-square-meter-square-second", - "kilogram-meter-per-square-meter-square-second"}, - // TODO(ICU-20920): Add more test cases once the proper ranking is available. - }; - for (const auto& cas : cases) { - status.setScope(cas.id); - MeasureUnit unit = MeasureUnit::forIdentifier(cas.id, status); - if (!cas.valid) { - status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); - continue; - } - const char* actual = unit.getIdentifier(); - assertEquals(cas.id, cas.normalized, actual); - status.errIfFailureAndReset(); - } +void MeasureFormatTest::TestDimensionlessBehaviour() { + IcuTestErrorCode status(*this, "TestDimensionlessBehaviour"); + MeasureUnit dimensionless; + MeasureUnit modified; + + // At the time of writing, each of the seven groups below caused + // Parser::from("") to be called: + + // splitToSingleUnits + int32_t count; + LocalArray singles = dimensionless.splitToSingleUnits(count, status); + status.errIfFailureAndReset("dimensionless.splitToSingleUnits(...)"); + assertEquals("no singles in dimensionless", 0, count); + + // product(dimensionless) + MeasureUnit mile = MeasureUnit::getMile(); + mile = mile.product(dimensionless, status); + status.errIfFailureAndReset("mile.product(dimensionless, ...)"); + verifySingleUnit(mile, UMEASURE_SI_PREFIX_ONE, 1, "mile"); + + // dimensionless.getSIPrefix() + UMeasureSIPrefix siPrefix = dimensionless.getSIPrefix(status); + status.errIfFailureAndReset("dimensionless.getSIPrefix(...)"); + assertEquals("dimensionless SIPrefix", UMEASURE_SI_PREFIX_ONE, siPrefix); + + // dimensionless.withSIPrefix() + modified = dimensionless.withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); + status.errIfFailureAndReset("dimensionless.withSIPrefix(...)"); + singles = modified.splitToSingleUnits(count, status); + assertEquals("no singles in modified", 0, count); + siPrefix = modified.getSIPrefix(status); + status.errIfFailureAndReset("modified.getSIPrefix(...)"); + assertEquals("modified SIPrefix", UMEASURE_SI_PREFIX_ONE, siPrefix); + + // dimensionless.getComplexity() + UMeasureUnitComplexity complexity = dimensionless.getComplexity(status); + status.errIfFailureAndReset("dimensionless.getComplexity(...)"); + assertEquals("dimensionless complexity", UMEASURE_UNIT_SINGLE, complexity); + + // Dimensionality is mostly meaningless for dimensionless units, but it's + // still considered a SINGLE unit, so this code doesn't throw errors: + + // dimensionless.getDimensionality() + int32_t dimensionality = dimensionless.getDimensionality(status); + status.errIfFailureAndReset("dimensionless.getDimensionality(...)"); + assertEquals("dimensionless dimensionality", 0, dimensionality); + + // dimensionless.withDimensionality() + dimensionless.withDimensionality(-1, status); + status.errIfFailureAndReset("dimensionless.withDimensionality(...)"); + dimensionality = dimensionless.getDimensionality(status); + status.errIfFailureAndReset("dimensionless.getDimensionality(...)"); + assertEquals("dimensionless dimensionality", 0, dimensionality); } // ICU-21060 void MeasureFormatTest::Test21060_AddressSanitizerProblem() { - UErrorCode status = U_ZERO_ERROR; - MeasureUnit first = MeasureUnit::forIdentifier("one", status); + IcuTestErrorCode status(*this, "Test21060_AddressSanitizerProblem"); + + MeasureUnit first = MeasureUnit::forIdentifier("", status); + status.errIfFailureAndReset(); // Experimentally, a compound unit like "kilogram-meter" failed. A single // unit like "kilogram" or "meter" did not fail, did not trigger the // problem. - MeasureUnit crux = MeasureUnit::forIdentifier("one-per-meter", status); + MeasureUnit crux = MeasureUnit::forIdentifier("per-meter", status); // Heap allocation of a new CharString for first.identifier happens here: first = first.product(crux, status); @@ -3469,12 +3537,14 @@ void MeasureFormatTest::Test21060_AddressSanitizerProblem() { first = first.product(crux, status); // Proving we've had no failure yet: - if (U_FAILURE(status)) return; + status.errIfFailureAndReset(); // heap-use-after-free failure happened here, since a SingleUnitImpl had // held onto a StringPiece pointing at a substring of an identifier that was // freed above: second = second.product(crux, status); + + status.errIfFailureAndReset(); }