diff --git a/icu4c/source/common/cmemory.h b/icu4c/source/common/cmemory.h index da663918cfd..20e394a6154 100644 --- a/icu4c/source/common/cmemory.h +++ b/icu4c/source/common/cmemory.h @@ -767,7 +767,11 @@ public: } int32_t length() const { - return this->count(); + return this->fCount; + } + + T** getAlias() { + return this->fPool.getAlias(); } /** @@ -776,7 +780,7 @@ public: * @param i array index * @return reference to the array item */ - T *operator[](ptrdiff_t i) const { + T* operator[](ptrdiff_t i) const { return this->fPool[i]; } }; diff --git a/icu4c/source/i18n/i18n.vcxproj b/icu4c/source/i18n/i18n.vcxproj index f22fd2ff097..531a9a0d829 100644 --- a/icu4c/source/i18n/i18n.vcxproj +++ b/icu4c/source/i18n/i18n.vcxproj @@ -393,6 +393,7 @@ + diff --git a/icu4c/source/i18n/i18n.vcxproj.filters b/icu4c/source/i18n/i18n.vcxproj.filters index eade0d07354..50a06ee014e 100644 --- a/icu4c/source/i18n/i18n.vcxproj.filters +++ b/icu4c/source/i18n/i18n.vcxproj.filters @@ -833,6 +833,9 @@ formatting + + formatting + formatting diff --git a/icu4c/source/i18n/i18n_uwp.vcxproj b/icu4c/source/i18n/i18n_uwp.vcxproj index 2466ad95864..0268132e2a8 100644 --- a/icu4c/source/i18n/i18n_uwp.vcxproj +++ b/icu4c/source/i18n/i18n_uwp.vcxproj @@ -612,6 +612,7 @@ + diff --git a/icu4c/source/i18n/measunit.cpp b/icu4c/source/i18n/measunit.cpp index cd2f7da3cba..2e4c46bd173 100644 --- a/icu4c/source/i18n/measunit.cpp +++ b/icu4c/source/i18n/measunit.cpp @@ -17,9 +17,11 @@ #if !UCONFIG_NO_FORMATTING #include "unicode/uenum.h" +#include "unicode/errorcode.h" #include "ustrenum.h" #include "cstring.h" #include "uassert.h" +#include "measunit_impl.h" U_NAMESPACE_BEGIN @@ -2010,28 +2012,25 @@ MeasureUnit::MeasureUnit() : MeasureUnit(kBaseTypeIdx, kBaseSubTypeIdx) { } MeasureUnit::MeasureUnit(int32_t typeId, int32_t subTypeId) - : fId(nullptr), fSubTypeId(subTypeId), fTypeId(typeId) { + : fImpl(nullptr), fSubTypeId(subTypeId), fTypeId(typeId) { } MeasureUnit::MeasureUnit(const MeasureUnit &other) - : fId(nullptr) { + : fImpl(nullptr) { *this = other; } MeasureUnit::MeasureUnit(MeasureUnit &&other) noexcept - : fId(other.fId), + : fImpl(other.fImpl), fSubTypeId(other.fSubTypeId), fTypeId(other.fTypeId) { - other.fId = nullptr; + other.fImpl = nullptr; } -MeasureUnit::MeasureUnit(char* idToAdopt) - : fId(idToAdopt), fSubTypeId(-1), fTypeId(-1) { - if (fId == nullptr) { - // Invalid; reset to the base dimensionless unit - setTo(kBaseTypeIdx, kBaseSubTypeIdx); - } else if (findBySubType(idToAdopt, this)) { - // findBySubType frees fId +MeasureUnit::MeasureUnit(MeasureUnitImpl&& impl) + : fImpl(nullptr), fSubTypeId(-1), fTypeId(-1) { + if (!findBySubType(impl.identifier.toStringPiece(), this)) { + fImpl = new MeasureUnitImpl(std::move(impl)); } } @@ -2039,16 +2038,17 @@ MeasureUnit &MeasureUnit::operator=(const MeasureUnit &other) { if (this == &other) { return *this; } - uprv_free(fId); - if (other.fId) { - fId = uprv_strdup(other.fId); - if (!fId) { + uprv_free(fImpl); + if (other.fImpl) { + ErrorCode localStatus; + fImpl = new MeasureUnitImpl(MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, localStatus)); + if (!fImpl || localStatus.isFailure()) { // Unrecoverable allocation error; set to the default unit *this = MeasureUnit(); return *this; } } else { - fId = nullptr; + fImpl = nullptr; } fTypeId = other.fTypeId; fSubTypeId = other.fSubTypeId; @@ -2059,9 +2059,9 @@ MeasureUnit &MeasureUnit::operator=(MeasureUnit &&other) noexcept { if (this == &other) { return *this; } - uprv_free(fId); - fId = other.fId; - other.fId = nullptr; + uprv_free(fImpl); + fImpl = other.fImpl; + other.fImpl = nullptr; fTypeId = other.fTypeId; fSubTypeId = other.fSubTypeId; return *this; @@ -2072,8 +2072,8 @@ MeasureUnit *MeasureUnit::clone() const { } MeasureUnit::~MeasureUnit() { - uprv_free(fId); - fId = nullptr; + uprv_free(fImpl); + fImpl = nullptr; } const char *MeasureUnit::getType() const { @@ -2093,7 +2093,7 @@ const char *MeasureUnit::getSubtype() const { } const char *MeasureUnit::getIdentifier() const { - return fId ? fId : gSubTypes[getOffset()]; + return fImpl ? fImpl->identifier.data() : gSubTypes[getOffset()]; } UBool MeasureUnit::operator==(const UObject& other) const { @@ -2259,15 +2259,15 @@ void MeasureUnit::initTime(const char *timeId) { fSubTypeId = result - gOffsets[fTypeId]; } -void MeasureUnit::initCurrency(const char *isoCurrency) { +void MeasureUnit::initCurrency(StringPiece isoCurrency) { int32_t result = binarySearch(gTypes, 0, UPRV_LENGTHOF(gTypes), "currency"); U_ASSERT(result != -1); fTypeId = result; result = binarySearch( gSubTypes, gOffsets[fTypeId], gOffsets[fTypeId + 1], isoCurrency); if (result == -1) { - fId = uprv_strdup(isoCurrency); - if (fId) { + fImpl = new MeasureUnitImpl(MeasureUnitImpl::forCurrencyCode(isoCurrency)); + if (fImpl) { fSubTypeId = -1; return; } @@ -2291,8 +2291,8 @@ void MeasureUnit::initNoUnit(const char *subtype) { void MeasureUnit::setTo(int32_t typeId, int32_t subTypeId) { fTypeId = typeId; fSubTypeId = subTypeId; - uprv_free(fId); - fId = nullptr; + uprv_free(fImpl); + fImpl = nullptr; } int32_t MeasureUnit::getOffset() const { diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index b652a681a67..956d00db28c 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -13,6 +13,8 @@ #define UNISTR_FROM_STRING_EXPLICIT #include "cstring.h" +#include "measunit_impl.h" +#include "uarrsort.h" #include "uassert.h" #include "ucln_in.h" #include "umutex.h" @@ -308,211 +310,6 @@ private: int32_t fMatch; }; -struct SingleUnit : public UMemory { - int8_t power = 1; - UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; - int32_t simpleUnitIndex = 0; - StringPiece id = "one"; - - void appendTo(CharString& builder, UErrorCode& status) const { - if (simpleUnitIndex == 0) { - // Don't propagate SI prefixes and powers on one - builder.append("one", status); - return; - } - int8_t posPower = power < 0 ? -power : power; - if (posPower == 0) { - status = U_ILLEGAL_ARGUMENT_ERROR; - } else if (posPower == 1) { - // no-op - } else if (posPower == 2) { - builder.append("square-", status); - } else if (posPower == 3) { - builder.append("cubic-", status); - } else if (posPower < 10) { - builder.append('p', status); - builder.append(posPower + '0', status); - builder.append('-', status); - } else if (posPower <= 15) { - builder.append("p1", status); - builder.append('0' + (posPower % 10), status); - builder.append('-', status); - } else { - status = U_ILLEGAL_ARGUMENT_ERROR; - } - if (U_FAILURE(status)) { - return; - } - - if (siPrefix != UMEASURE_SI_PREFIX_ONE) { - for (const auto& siPrefixInfo : gSIPrefixStrings) { - if (siPrefixInfo.value == siPrefix) { - builder.append(siPrefixInfo.string, status); - break; - } - } - } - if (U_FAILURE(status)) { - return; - } - - builder.append(id, status); - } - - char* build(UErrorCode& status) { - if (U_FAILURE(status)) { - return nullptr; - } - CharString builder; - if (power < 0) { - builder.append("one-per-", status); - } - appendTo(builder, status); - return builder.cloneData(status); - } -}; - -class CompoundUnit : public UMemory { -public: - typedef MaybeStackVector SingleUnitList; - - void append(SingleUnit&& singleUnit, UErrorCode& status) { - if (singleUnit.simpleUnitIndex == 0) { - return; - } - if (singleUnit.power >= 0) { - appendImpl(numerator, std::move(singleUnit), status); - } else { - appendImpl(denominator, std::move(singleUnit), status); - } - } - - void takeReciprocal() { - auto temp = std::move(numerator); - numerator = std::move(denominator); - denominator = std::move(temp); - } - - void appendTo(CharString& builder, UErrorCode& status) const { - if (numerator.length() == 0) { - builder.append("one", status); - } else { - appendToImpl(numerator, builder, status); - } - if (denominator.length() > 0) { - builder.append("-per-", status); - appendToImpl(denominator, builder, status); - } - } - - char* build(UErrorCode& status) { - if (U_FAILURE(status)) { - return nullptr; - } - CharString builder; - appendTo(builder, status); - return builder.cloneData(status); - } - - const SingleUnitList& getNumeratorUnits() const { - return numerator; - } - - const SingleUnitList& getDenominatorUnits() const { - return denominator; - } - - bool isSingle() const { - return numerator.length() + denominator.length() <= 1; - } - - bool isEmpty() const { - return numerator.length() + denominator.length() == 0; - } - -private: - SingleUnitList numerator; - SingleUnitList denominator; - - void appendToImpl(const SingleUnitList& unitList, CharString& builder, UErrorCode& status) const { - bool first = true; - int32_t len = unitList.length(); - for (int32_t i = 0; i < len; i++) { - if (first) { - first = false; - } else { - builder.append('-', status); - } - unitList[i]->appendTo(builder, status); - } - } - - void appendImpl(SingleUnitList& unitList, SingleUnit&& singleUnit, UErrorCode& status) { - // Check that the same simple unit doesn't already exist - for (int32_t i = 0; i < unitList.length(); i++) { - SingleUnit* candidate = unitList[i]; - if (candidate->simpleUnitIndex == singleUnit.simpleUnitIndex - && candidate->siPrefix == singleUnit.siPrefix) { - candidate->power += singleUnit.power; - return; - } - } - // Add a new unit - SingleUnit* destination = unitList.emplaceBack(); - if (!destination) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - *destination = std::move(singleUnit); - } -}; - -class SequenceUnit : public UMemory { -public: - typedef MaybeStackVector CompoundUnitList; - - void append(CompoundUnit&& compoundUnit, UErrorCode& status) { - CompoundUnit* destination = units.emplaceBack(); - if (!destination) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - *destination = std::move(compoundUnit); - } - - void appendTo(CharString& builder, UErrorCode& status) const { - if (units.length() == 0) { - builder.append("one", status); - return; - } - bool isFirst = true; - for (int32_t i = 0; i < units.length(); i++) { - if (isFirst) { - isFirst = false; - } else { - builder.append('+', status); - } - units[i]->appendTo(builder, status); - } - } - - char* build(UErrorCode& status) { - if (U_FAILURE(status)) { - return nullptr; - } - CharString builder; - appendTo(builder, status); - return builder.cloneData(status); - } - - const CompoundUnitList& getUnits() const { - return units; - } - -private: - CompoundUnitList units; -}; - class Parser { public: static Parser from(StringPiece source, UErrorCode& status) { @@ -526,71 +323,10 @@ public: return Parser(source); } - bool hasNext() const { - return fIndex < fSource.length(); - } - - SingleUnit getOnlySingleUnit(UErrorCode& status) { - bool sawPlus; - SingleUnit retval; - nextSingleUnit(retval, sawPlus, status); - if (U_FAILURE(status)) { - return retval; - } - if (sawPlus || hasNext()) { - // Expected to find only one unit in the string - status = U_ILLEGAL_ARGUMENT_ERROR; - return retval; - } - return retval; - } - - void nextCompoundUnit(CompoundUnit& result, UErrorCode& status) { - bool sawPlus; - if (U_FAILURE(status)) { - return; - } - while (hasNext()) { - int32_t previ = fIndex; - SingleUnit singleUnit; - nextSingleUnit(singleUnit, sawPlus, status); - if (U_FAILURE(status)) { - return; - } - if (sawPlus && !result.isEmpty()) { - fIndex = previ; - break; - } - result.append(std::move(singleUnit), status); - } - return; - } - - CompoundUnit getOnlyCompoundUnit(UErrorCode& status) { - CompoundUnit retval; - nextCompoundUnit(retval, status); - if (U_FAILURE(status)) { - return retval; - } - if (hasNext()) { - // Expected to find only one unit in the string - status = U_ILLEGAL_ARGUMENT_ERROR; - return retval; - } - return retval; - } - - SequenceUnit getOnlySequenceUnit(UErrorCode& status) { - SequenceUnit retval; - while (hasNext()) { - CompoundUnit compoundUnit; - nextCompoundUnit(compoundUnit, status); - if (U_FAILURE(status)) { - return retval; - } - retval.append(std::move(compoundUnit), status); - } - return retval; + MeasureUnitImpl parse(UErrorCode& status) { + MeasureUnitImpl result; + parseImpl(result, status); + return result; } private: @@ -605,6 +341,10 @@ private: Parser(StringPiece source) : fSource(source), fTrie(kSerializedUnitExtrasStemTrie) {} + inline bool hasNext() const { + return fIndex < fSource.length(); + } + Token nextToken(UErrorCode& status) { fTrie.reset(); int32_t match = -1; @@ -637,7 +377,7 @@ private: return Token(match); } - void nextSingleUnit(SingleUnit& result, bool& sawPlus, UErrorCode& status) { + void nextSingleUnit(TempSingleUnit& result, bool& sawPlus, UErrorCode& status) { sawPlus = false; if (U_FAILURE(status)) { return; @@ -670,7 +410,7 @@ private: goto fail; } fAfterPer = true; - result.power = -1; + result.dimensionality = -1; break; case COMPOUND_PART_TIMES: @@ -696,7 +436,7 @@ private: if (state > 0) { goto fail; } - result.power *= token.getPower(); + result.dimensionality *= token.getPower(); previ = fIndex; state = 1; break; @@ -715,8 +455,8 @@ private: return nextSingleUnit(result, sawPlus, status); case Token::TYPE_SIMPLE_UNIT: - result.simpleUnitIndex = token.getSimpleUnitIndex(); - result.id = fSource.substr(previ, fIndex - previ); + result.index = token.getSimpleUnitIndex(); + result.identifier = fSource.substr(previ, fIndex - previ); return; default: @@ -729,124 +469,306 @@ private: status = U_ILLEGAL_ARGUMENT_ERROR; return; } + + void parseImpl(MeasureUnitImpl& result, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + int32_t unitNum = 0; + while (hasNext()) { + bool sawPlus; + TempSingleUnit singleUnit; + nextSingleUnit(singleUnit, sawPlus, status); + if (U_FAILURE(status)) { + return; + } + if (singleUnit.index == 0) { + continue; + } + bool added = result.append(singleUnit, status); + if (sawPlus && !added) { + // Two similar units are not allowed in a sequence unit + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if ((++unitNum) >= 2) { + UMeasureUnitComplexity complexity = sawPlus + ? UMEASURE_UNIT_SEQUENCE + : UMEASURE_UNIT_COMPOUND; + if (unitNum == 2) { + U_ASSERT(result.complexity == UMEASURE_UNIT_SINGLE); + result.complexity = complexity; + } else if (result.complexity != complexity) { + // Mixed sequence and compound units + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + } + } + } }; +int32_t U_CALLCONV +compareSingleUnits(const void* /*context*/, const void* left, const void* right) { + auto realLeft = static_cast(left); + auto realRight = static_cast(right); + return (*realLeft)->compareTo(**realRight); +} + +/** + * Generate the identifier string for a single unit in place. + */ +void serializeSingle(const TempSingleUnit& singleUnit, bool first, CharString& output, UErrorCode& status) { + if (first && singleUnit.dimensionality < 0) { + output.append("one-per-", status); + } + + if (singleUnit.index == 0) { + // Don't propagate SI prefixes and powers on one + output.append("one", status); + return; + } + int8_t posPower = std::abs(singleUnit.dimensionality); + if (posPower == 0) { + status = U_ILLEGAL_ARGUMENT_ERROR; + } else if (posPower == 1) { + // no-op + } else if (posPower == 2) { + output.append("square-", status); + } else if (posPower == 3) { + output.append("cubic-", status); + } else if (posPower < 10) { + output.append('p', status); + output.append(posPower + '0', status); + output.append('-', status); + } else if (posPower <= 15) { + output.append("p1", status); + output.append('0' + (posPower % 10), status); + output.append('-', status); + } else { + status = U_ILLEGAL_ARGUMENT_ERROR; + } + if (U_FAILURE(status)) { + return; + } + + if (singleUnit.siPrefix != UMEASURE_SI_PREFIX_ONE) { + for (const auto& siPrefixInfo : gSIPrefixStrings) { + if (siPrefixInfo.value == singleUnit.siPrefix) { + output.append(siPrefixInfo.string, status); + break; + } + } + } + if (U_FAILURE(status)) { + return; + } + + output.append(singleUnit.identifier, status); +} + +/** + * Normalize a MeasureUnitImpl and generate the identifier string in place. + */ +void serialize(MeasureUnitImpl& impl, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + U_ASSERT(impl.identifier.isEmpty()); + if (impl.units.length() == 0) { + impl.identifier.append("one", status); + return; + } + if (impl.complexity == UMEASURE_UNIT_COMPOUND) { + // Note: don't sort a SEQUENCE unit + uprv_sortArray( + impl.units.getAlias(), + impl.units.length(), + sizeof(impl.units[0]), + compareSingleUnits, + nullptr, + false, + &status); + if (U_FAILURE(status)) { + return; + } + } + serializeSingle(*impl.units[0], true, impl.identifier, status); + if (impl.units.length() == 1) { + return; + } + for (int32_t i = 1; i < impl.units.length(); i++) { + const TempSingleUnit& prev = *impl.units[i-1]; + const TempSingleUnit& curr = *impl.units[i]; + if (impl.complexity == UMEASURE_UNIT_SEQUENCE) { + impl.identifier.append('+', status); + serializeSingle(curr, true, impl.identifier, status); + } else { + if (prev.dimensionality > 0 && curr.dimensionality < 0) { + impl.identifier.append("-per-", status); + } else { + impl.identifier.append('-', status); + } + serializeSingle(curr, false, impl.identifier, status); + } + } + +} + +/** @return true if a new item was added */ +bool appendImpl(MeasureUnitImpl& impl, const TempSingleUnit& unit, UErrorCode& status) { + // Find a similar unit that already exists, to attempt to coalesce + TempSingleUnit* oldUnit = nullptr; + for (int32_t i = 0; i < impl.units.length(); i++) { + auto* candidate = impl.units[i]; + if (candidate->index == unit.index && candidate->siPrefix == unit.siPrefix) { + oldUnit = candidate; + } + } + if (oldUnit) { + oldUnit->dimensionality += unit.dimensionality; + } else { + TempSingleUnit* destination = impl.units.emplaceBack(); + if (!destination) { + status = U_MEMORY_ALLOCATION_ERROR; + return false; + } + *destination = unit; + } + return (oldUnit == nullptr); +} + } // namespace +TempSingleUnit TempSingleUnit::forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status) { + MeasureUnitImpl temp; + const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(measureUnit, temp, status); + if (U_FAILURE(status)) { + return {}; + } + if (impl.units.length() == 0) { + return {}; + } else if (impl.units.length() == 1) { + return *impl.units[0]; + } else { + status = U_ILLEGAL_ARGUMENT_ERROR; + return {}; + } +} + +MeasureUnit TempSingleUnit::build(UErrorCode& status) { + MeasureUnitImpl temp; + temp.append(*this, status); + return std::move(temp).build(status); +} + + +MeasureUnitImpl MeasureUnitImpl::forIdentifier(StringPiece identifier, UErrorCode& status) { + return Parser::from(identifier, status).parse(status); +} + +const MeasureUnitImpl& MeasureUnitImpl::forMeasureUnit( + const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status) { + if (measureUnit.fImpl) { + return *measureUnit.fImpl; + } else { + memory = Parser::from(measureUnit.getIdentifier(), status).parse(status); + return memory; + } +} + +MeasureUnitImpl MeasureUnitImpl::forMeasureUnitMaybeCopy( + const MeasureUnit& measureUnit, UErrorCode& status) { + // TODO: Improve this algorithm to not round-trip through the identifier string? + return Parser::from(measureUnit.getIdentifier(), status).parse(status); +} + +MeasureUnitImpl MeasureUnitImpl::forCurrencyCode(StringPiece currencyCode) { + MeasureUnitImpl result; + ErrorCode localStatus; + result.identifier.append(currencyCode, localStatus); + // localStatus is not expected to fail since currencyCode should be 3 chars long + return result; +} + +void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) { + for (int32_t i = 0; i < units.length(); i++) { + units[i]->dimensionality *= -1; + } +} + +bool MeasureUnitImpl::append(const TempSingleUnit& singleUnit, UErrorCode& status) { + return appendImpl(*this, singleUnit, status); +} + +MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && { + serialize(*this, status); + return MeasureUnit(std::move(*this)); +} + + MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode& status) { - return Parser::from(identifier, status).getOnlySequenceUnit(status).build(status); + return Parser::from(identifier, status).parse(status).build(status); } UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const { - const char* id = getIdentifier(); - Parser parser = Parser::from(id, status); - if (U_FAILURE(status)) { - return UMEASURE_UNIT_SINGLE; - } - - CompoundUnit compoundUnit; - parser.nextCompoundUnit(compoundUnit, status); - if (parser.hasNext()) { - return UMEASURE_UNIT_SEQUENCE; - } else if (compoundUnit.isSingle()) { - return UMEASURE_UNIT_SINGLE; - } else { - return UMEASURE_UNIT_COMPOUND; - } + MeasureUnitImpl temp; + return MeasureUnitImpl::forMeasureUnit(*this, temp, status).complexity; } UMeasureSIPrefix MeasureUnit::getSIPrefix(UErrorCode& status) const { - const char* id = getIdentifier(); - return Parser::from(id, status).getOnlySingleUnit(status).siPrefix; + return TempSingleUnit::forMeasureUnit(*this, status).siPrefix; } MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const { - const char* id = getIdentifier(); - SingleUnit singleUnit = Parser::from(id, status).getOnlySingleUnit(status); + TempSingleUnit singleUnit = TempSingleUnit::forMeasureUnit(*this, status); singleUnit.siPrefix = prefix; return singleUnit.build(status); } -int8_t MeasureUnit::getPower(UErrorCode& status) const { - const char* id = getIdentifier(); - return Parser::from(id, status).getOnlySingleUnit(status).power; +int32_t MeasureUnit::getDimensionality(UErrorCode& status) const { + return TempSingleUnit::forMeasureUnit(*this, status).dimensionality; } -MeasureUnit MeasureUnit::withPower(int8_t power, UErrorCode& status) const { - const char* id = getIdentifier(); - SingleUnit singleUnit = Parser::from(id, status).getOnlySingleUnit(status); - singleUnit.power = power; +MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const { + TempSingleUnit singleUnit = TempSingleUnit::forMeasureUnit(*this, status); + singleUnit.dimensionality = dimensionality; return singleUnit.build(status); } MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const { - const char* id = getIdentifier(); - CompoundUnit compoundUnit = Parser::from(id, status).getOnlyCompoundUnit(status); - compoundUnit.takeReciprocal(); - return compoundUnit.build(status); + MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status); + impl.takeReciprocal(status); + return std::move(impl).build(status); } MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const { - const char* id = getIdentifier(); - CompoundUnit compoundUnit = Parser::from(id, status).getOnlyCompoundUnit(status); - if (U_FAILURE(status)) { - return *this; - } - - // Append other's first CompoundUnit to compoundUnit, then assert other has only one - Parser otherParser = Parser::from(other.getIdentifier(), status); - otherParser.nextCompoundUnit(compoundUnit, status); - if (U_FAILURE(status)) { - return *this; - } - if (otherParser.hasNext()) { + MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status); + MeasureUnitImpl temp; + const MeasureUnitImpl& otherImpl = MeasureUnitImpl::forMeasureUnit(other, temp, status); + if (impl.complexity == UMEASURE_UNIT_SEQUENCE || otherImpl.complexity == UMEASURE_UNIT_SEQUENCE) { status = U_ILLEGAL_ARGUMENT_ERROR; - return *this; + return {}; } - - return compoundUnit.build(status); + for (int32_t i = 0; i < otherImpl.units.length(); i++) { + impl.append(*otherImpl.units[i], status); + } + if (impl.units.length() > 1) { + impl.complexity = UMEASURE_UNIT_COMPOUND; + } + return std::move(impl).build(status); } -LocalArray MeasureUnit::getSingleUnits(UErrorCode& status) const { - const char* id = getIdentifier(); - CompoundUnit compoundUnit = Parser::from(id, status).getOnlyCompoundUnit(status); - if (U_FAILURE(status)) { - return LocalArray(); +LocalArray MeasureUnit::splitToSingleUnits(UErrorCode& status) const { + MeasureUnitImpl temp; + const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(*this, temp, status); + const int32_t length = impl.units.length(); + MeasureUnit* arr = new MeasureUnit[length]; + for (int32_t i = 0; i < length; i++) { + arr[i] = impl.units[i]->build(status); } - - const CompoundUnit::SingleUnitList& numerator = compoundUnit.getNumeratorUnits(); - const CompoundUnit::SingleUnitList& denominator = compoundUnit.getDenominatorUnits(); - int32_t count = numerator.length() + denominator.length(); - MeasureUnit* arr = new MeasureUnit[count]; - - int32_t i = 0; - for (int32_t j = 0; j < numerator.length(); j++) { - arr[i++] = numerator[j]->build(status); - } - for (int32_t j = 0; j < denominator.length(); j++) { - arr[i++] = denominator[j]->build(status); - } - - return LocalArray::withLength(arr, count); -} - -LocalArray MeasureUnit::getCompoundUnits(UErrorCode& status) const { - const char* id = getIdentifier(); - SequenceUnit sequenceUnit = Parser::from(id, status).getOnlySequenceUnit(status); - if (U_FAILURE(status)) { - return LocalArray(); - } - - const SequenceUnit::CompoundUnitList& unitVector = sequenceUnit.getUnits(); - int32_t count = unitVector.length(); - MeasureUnit* arr = new MeasureUnit[count]; - - for (int32_t i = 0; i < count; i++) { - arr[i] = unitVector[i]->build(status); - } - - return LocalArray::withLength(arr, count); + return LocalArray::withLength(arr, length); } diff --git a/icu4c/source/i18n/measunit_impl.h b/icu4c/source/i18n/measunit_impl.h new file mode 100644 index 00000000000..5f196969eab --- /dev/null +++ b/icu4c/source/i18n/measunit_impl.h @@ -0,0 +1,135 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef __MEASUNIT_IMPL_H__ +#define __MEASUNIT_IMPL_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/measunit.h" +#include "cmemory.h" +#include "charstr.h" + +U_NAMESPACE_BEGIN + + +struct TempSingleUnit : public UMemory { + /** + * Gets a single unit from the MeasureUnit. If there are multiple single units, sets an error + * code and return the base dimensionless unit. Parses if necessary. + */ + static TempSingleUnit forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status); + + /** Transform this TemplSingleUnit into a MeasureUnit, simplifying if possible. */ + MeasureUnit build(UErrorCode& status); + + /** Compare this TempSingleUnit to another TempSingleUnit. */ + int32_t compareTo(const TempSingleUnit& other) const { + if (dimensionality < 0 && other.dimensionality > 0) { + // Positive dimensions first + return 1; + } else if (dimensionality > 0 && other.dimensionality < 0) { + return -1; + } else if (index < other.index) { + return -1; + } else if (index > other.index) { + return 1; + } else if (siPrefix < other.siPrefix) { + return -1; + } else if (siPrefix > other.siPrefix) { + return 1; + } else { + return 0; + } + } + + /** Simple unit index, unique for every simple unit. */ + int32_t index = 0; + + /** Simple unit identifier; memory not owned by the SimpleUnit. */ + StringPiece identifier; + + /** SI prefix. **/ + UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; + + /** Dimentionality. **/ + int32_t dimensionality = 1; +}; + + +/** + * Internal representation of measurement units. Capable of representing all complexities of units, + * including sequence and compound units. + */ +struct MeasureUnitImpl : public UMemory { + /** Extract the MeasureUnitImpl from a MeasureUnit. */ + static inline const MeasureUnitImpl* get(const MeasureUnit& measureUnit) { + return measureUnit.fImpl; + } + + /** + * Parse a unit identifier into a MeasureUnitImpl. + * + * @param identifier The unit identifier string. + * @param status Set if the identifier string is not valid. + * @return A newly parsed value object. + */ + static MeasureUnitImpl forIdentifier(StringPiece identifier, UErrorCode& status); + + /** + * Extract the MeasureUnitImpl from a MeasureUnit, or parse if it is not present. + * + * @param measureUnit The source MeasureUnit. + * @param memory A place to write the new MeasureUnitImpl if parsing is required. + * @param status Set if an error occurs. + * @return A reference to either measureUnit.fImpl or memory. + */ + static const MeasureUnitImpl& forMeasureUnit( + const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status); + + /** + * Extract the MeasureUnitImpl from a MeasureUnit, or parse if it is not present. + * + * @param measureUnit The source MeasureUnit. + * @param status Set if an error occurs. + * @return A value object, either newly parsed or copied from measureUnit. + */ + static MeasureUnitImpl forMeasureUnitMaybeCopy( + const MeasureUnit& measureUnit, UErrorCode& status); + + /** + * Used for currency units. + */ + static MeasureUnitImpl forCurrencyCode(StringPiece currencyCode); + + /** Transform this MeasureUnitImpl into a MeasureUnit, simplifying if possible. */ + MeasureUnit build(UErrorCode& status) &&; + + /** Mutates this MeasureUnitImpl to take the reciprocal. */ + void takeReciprocal(UErrorCode& status); + + /** Mutates this MeasureUnitImpl to append a single unit. */ + bool append(const TempSingleUnit& singleUnit, UErrorCode& status); + + /** The complexity, either SINGLE, COMPOUND, or SEQUENCE. */ + UMeasureUnitComplexity complexity = UMEASURE_UNIT_SINGLE; + + /** + * The list of simple units. These may be summed or multiplied, based on the value of the + * complexity field. + */ + MaybeStackVector units; + + /** + * The full unit identifier. Owned by the MeasureUnitImpl. Empty if not computed. + */ + CharString identifier; +}; + + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ +#endif //__MEASUNIT_IMPL_H__ diff --git a/icu4c/source/i18n/number_skeletons.cpp b/icu4c/source/i18n/number_skeletons.cpp index 71c68d3e066..3c03dca5dac 100644 --- a/icu4c/source/i18n/number_skeletons.cpp +++ b/icu4c/source/i18n/number_skeletons.cpp @@ -23,6 +23,7 @@ #include "string_segment.h" #include "unicode/errorcode.h" #include "util.h" +#include "measunit_impl.h" using namespace icu; using namespace icu::number; @@ -1041,20 +1042,21 @@ void blueprint_helpers::parseIdentifierUnitOption(const StringSegment& segment, SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status); ErrorCode internalStatus; - MeasureUnit fullUnit = MeasureUnit::forIdentifier(buffer.toStringPiece(), internalStatus); - auto subUnits = fullUnit.getSingleUnits(internalStatus); + auto fullUnit = MeasureUnitImpl::forIdentifier(buffer.toStringPiece(), internalStatus); if (internalStatus.isFailure()) { // throw new SkeletonSyntaxException("Invalid core unit identifier", segment, e); status = U_NUMBER_SKELETON_SYNTAX_ERROR; return; } - for (int32_t i = 0; i < subUnits.length(); i++) { - const MeasureUnit& subUnit = subUnits[i]; - if (subUnit.getPower(status) > 0) { - macros.unit = macros.unit.product(subUnit, status); + // TODO(ICU-20941): Clean this up. + for (int32_t i = 0; i < fullUnit.units.length(); i++) { + TempSingleUnit* subUnit = fullUnit.units[i]; + if (subUnit->dimensionality > 0) { + macros.unit = macros.unit.product(subUnit->build(status), status); } else { - macros.perUnit = macros.perUnit.product(subUnit.reciprocal(status), status); + subUnit->dimensionality *= -1; + macros.perUnit = macros.perUnit.product(subUnit->build(status), status); } } } diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index badc553e0bc..67d74d39428 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -30,6 +30,7 @@ U_NAMESPACE_BEGIN class StringEnumeration; +struct MeasureUnitImpl; #ifndef U_HIDE_DRAFT_API /** @@ -376,28 +377,29 @@ class U_I18N_API MeasureUnit: public UObject { UMeasureSIPrefix getSIPrefix(UErrorCode& status) const; /** - * Creates a MeasureUnit which is this SINGLE unit augmented with the specified power. For - * example, if power is 2, the unit will be squared. + * Creates a MeasureUnit which is this SINGLE unit augmented with the specified dimensionality + * (power). For example, if dimensionality is 2, the unit will be squared. * * NOTE: Only works on SINGLE units. If this is a COMPOUND or SEQUENCE unit, an error will * occur. For more information, see UMeasureUnitComplexity. * - * @param power The power. + * @param power The dimensionality (power). * @param status Set if this is not a SINGLE unit or if another error occurs. * @return A new SINGLE unit. */ - MeasureUnit withPower(int8_t power, UErrorCode& status) const; + MeasureUnit withDimensionality(int32_t dimensionality, UErrorCode& status) const; /** - * Gets the power of this MeasureUnit. For example, if the unit is square, then 2 is returned. + * Gets the dimensionality (power) of this MeasureUnit. For example, if the unit is square, + * then 2 is returned. * * NOTE: Only works on SINGLE units. If this is a COMPOUND or SEQUENCE unit, an error will * occur. For more information, see UMeasureUnitComplexity. * * @param status Set if this is not a SINGLE unit or if another error occurs. - * @return The power of this simple unit. + * @return The dimensionality (power) of this simple unit. */ - int8_t getPower(UErrorCode& status) const; + int32_t getDimensionality(UErrorCode& status) const; /** * Gets the reciprocal of this MeasureUnit, with the numerator and denominator flipped. @@ -431,33 +433,20 @@ class U_I18N_API MeasureUnit: public UObject { MeasureUnit product(const MeasureUnit& other, UErrorCode& status) const; /** - * Gets the list of single units contained within a compound unit. + * Gets the list of SINGLE units contained within a SEQUENCE of COMPOUND unit. * - * For example, given "meter-kilogram-per-second", three units will be returned: "meter", - * "kilogram", and "one-per-second". + * Examples: + * - Given "meter-kilogram-per-second", three units will be returned: "meter", + * "kilogram", and "one-per-second". + * - Given "hour+minute+second", three units will be returned: "hour", "minute", + * and "second". * * If this is a SINGLE unit, an array of length 1 will be returned. * - * NOTE: Only works on SINGLE and COMPOUND units. If this is a SEQUENCE unit, an error will - * occur. For more information, see UMeasureUnitComplexity. - * - * @param status Set if this is a SEQUENCE unit or if another error occurs. + * @param status Set if an error occurs. * @return An array of single units, owned by the caller. */ - LocalArray getSingleUnits(UErrorCode& status) const; - - /** - * Gets the list of compound units contained within a sequence unit. - * - * For example, given "hour+minute+second", three units will be returned: "hour", "minute", - * and "second". - * - * If this is a SINGLE or COMPOUND unit, an array of length 1 will be returned. - * - * @param status Set of an error occurs. - * @return An array of compound units, owned by the caller. - */ - LocalArray getCompoundUnits(UErrorCode& status) const; + LocalArray splitToSingleUnits(UErrorCode& status) const; #endif // U_HIDE_DRAFT_API /** @@ -3682,7 +3671,7 @@ class U_I18N_API MeasureUnit: public UObject { * For ICU use only. * @internal */ - void initCurrency(const char *isoCurrency); + void initCurrency(StringPiece isoCurrency); /** * For ICU use only. @@ -3694,15 +3683,15 @@ class U_I18N_API MeasureUnit: public UObject { private: - // If non-null, fId is owned by the MeasureUnit. - char* fId; + // If non-null, fImpl is owned by the MeasureUnit. + MeasureUnitImpl* fImpl; // These two ints are indices into static string lists in measunit.cpp int16_t fSubTypeId; int8_t fTypeId; MeasureUnit(int32_t typeId, int32_t subTypeId); - MeasureUnit(char* idToAdopt); + MeasureUnit(MeasureUnitImpl&& impl); void setTo(int32_t typeId, int32_t subTypeId); int32_t getOffset() const; static MeasureUnit *create(int typeId, int subTypeId, UErrorCode &status); @@ -3711,6 +3700,8 @@ private: * @return Whether subType is known to ICU. */ static bool findBySubType(StringPiece subType, MeasureUnit* output); + + friend struct MeasureUnitImpl; }; U_NAMESPACE_END diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 98f4ca6b440..240f2ae4c99 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -3283,10 +3283,10 @@ void MeasureFormatTest::TestCompoundUnitOperations() { assertTrue("centimeter equality", centimeter1 == centimeter2); assertTrue("kilometer inequality", centimeter1 != kilometer); - MeasureUnit squareMeter = meter.withPower(2, status); - MeasureUnit overCubicCentimeter = centimeter1.withPower(-3, status); - MeasureUnit quarticKilometer = kilometer.withPower(4, status); - MeasureUnit overQuarticKilometer1 = kilometer.withPower(-4, status); + MeasureUnit squareMeter = meter.withDimensionality(2, status); + MeasureUnit overCubicCentimeter = centimeter1.withDimensionality(-3, status); + MeasureUnit quarticKilometer = kilometer.withDimensionality(4, status); + MeasureUnit overQuarticKilometer1 = kilometer.withDimensionality(-4, status); verifySingleUnit(squareMeter, UMEASURE_SI_PREFIX_ONE, 2, "square-meter"); verifySingleUnit(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "one-per-cubic-centimeter"); @@ -3300,7 +3300,7 @@ void MeasureFormatTest::TestCompoundUnitOperations() { .product(kilometer, status) .product(kilometer, status) .reciprocal(status); - MeasureUnit overQuarticKilometer4 = meter.withPower(4, status) + MeasureUnit overQuarticKilometer4 = meter.withDimensionality(4, status) .reciprocal(status) .withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); @@ -3313,11 +3313,11 @@ void MeasureFormatTest::TestCompoundUnitOperations() { assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer4); MeasureUnit kiloSquareSecond = MeasureUnit::getSecond() - .withPower(2, status).withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); + .withDimensionality(2, status).withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); MeasureUnit meterSecond = meter.product(kiloSquareSecond, status); - MeasureUnit cubicMeterSecond1 = meter.withPower(3, status).product(kiloSquareSecond, status); + MeasureUnit cubicMeterSecond1 = meter.withDimensionality(3, status).product(kiloSquareSecond, status); MeasureUnit centimeterSecond1 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status).product(kiloSquareSecond, status); - MeasureUnit secondCubicMeter = kiloSquareSecond.product(meter.withPower(3, status), status); + MeasureUnit secondCubicMeter = kiloSquareSecond.product(meter.withDimensionality(3, status), status); MeasureUnit secondCentimeter = kiloSquareSecond.product(meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status), status); MeasureUnit secondCentimeterPerKilometer = secondCentimeter.product(kilometer.reciprocal(status), status); @@ -3331,24 +3331,24 @@ void MeasureFormatTest::TestCompoundUnitOperations() { const char* centimeterSecond1Sub[] = {"centimeter", "square-kilosecond"}; verifyCompoundUnit(centimeterSecond1, "centimeter-square-kilosecond", centimeterSecond1Sub, UPRV_LENGTHOF(centimeterSecond1Sub)); - const char* secondCubicMeterSub[] = {"square-kilosecond", "cubic-meter"}; - verifyCompoundUnit(secondCubicMeter, "square-kilosecond-cubic-meter", + const char* secondCubicMeterSub[] = {"cubic-meter", "square-kilosecond"}; + verifyCompoundUnit(secondCubicMeter, "cubic-meter-square-kilosecond", secondCubicMeterSub, UPRV_LENGTHOF(secondCubicMeterSub)); - const char* secondCentimeterSub[] = {"square-kilosecond", "centimeter"}; - verifyCompoundUnit(secondCentimeter, "square-kilosecond-centimeter", + const char* secondCentimeterSub[] = {"centimeter", "square-kilosecond"}; + verifyCompoundUnit(secondCentimeter, "centimeter-square-kilosecond", secondCentimeterSub, UPRV_LENGTHOF(secondCentimeterSub)); - const char* secondCentimeterPerKilometerSub[] = {"square-kilosecond", "centimeter", "one-per-kilometer"}; - verifyCompoundUnit(secondCentimeterPerKilometer, "square-kilosecond-centimeter-per-kilometer", + const char* secondCentimeterPerKilometerSub[] = {"centimeter", "square-kilosecond", "one-per-kilometer"}; + verifyCompoundUnit(secondCentimeterPerKilometer, "centimeter-square-kilosecond-per-kilometer", secondCentimeterPerKilometerSub, UPRV_LENGTHOF(secondCentimeterPerKilometerSub)); - assertTrue("order matters inequality", cubicMeterSecond1 != secondCubicMeter); + assertTrue("reordering equality", cubicMeterSecond1 == secondCubicMeter); assertTrue("additional simple units inequality", secondCubicMeter != secondCentimeter); // Don't allow get/set power or SI prefix on compound units status.errIfFailureAndReset(); - meterSecond.getPower(status); + meterSecond.getDimensionality(status); status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); - meterSecond.withPower(3, status); + meterSecond.withDimensionality(3, status); status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); meterSecond.getSIPrefix(status); status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); @@ -3356,9 +3356,9 @@ void MeasureFormatTest::TestCompoundUnitOperations() { status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); // Test that StringPiece does not overflow - MeasureUnit kiloSquareSecond2 = MeasureUnit::forIdentifier({secondCentimeter.getIdentifier(), 17}, status); - verifySingleUnit(kiloSquareSecond2, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); - assertTrue("string piece equality", kiloSquareSecond == kiloSquareSecond2); + MeasureUnit centimeter3 = MeasureUnit::forIdentifier({secondCentimeter.getIdentifier(), 10}, status); + verifySingleUnit(centimeter3, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); + assertTrue("string piece equality", centimeter1 == centimeter3); MeasureUnit footInch = MeasureUnit::forIdentifier("foot+inch", status); MeasureUnit inchFoot = MeasureUnit::forIdentifier("inch+foot", status); @@ -3375,7 +3375,7 @@ void MeasureFormatTest::TestCompoundUnitOperations() { MeasureUnit one1; MeasureUnit one2 = MeasureUnit::forIdentifier("one", status); MeasureUnit one3 = MeasureUnit::forIdentifier("", status); - MeasureUnit squareOne = one2.withPower(2, status); + MeasureUnit squareOne = one2.withDimensionality(2, status); MeasureUnit onePerOne = one2.reciprocal(status); MeasureUnit squareKiloOne = squareOne.withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); MeasureUnit onePerSquareKiloOne = squareKiloOne.reciprocal(status); @@ -3484,7 +3484,7 @@ void MeasureFormatTest::verifySingleUnit( status.errIfFailureAndReset("%s: SI prefix", identifier); assertEquals(uid + ": Power", static_cast(power), - static_cast(unit.getPower(status))); + static_cast(unit.getDimensionality(status))); status.errIfFailureAndReset("%s: Power", identifier); assertEquals(uid + ": Identifier", identifier, @@ -3518,7 +3518,7 @@ void MeasureFormatTest::verifyCompoundUnit( unit.getComplexity(status)); status.errIfFailureAndReset("%s: Complexity", identifier); - LocalArray subUnits = unit.getSingleUnits(status); + LocalArray subUnits = unit.splitToSingleUnits(status); assertEquals(uid + ": Length", subIdentifierCount, subUnits.length()); for (int32_t i = 0;; i++) { if (i >= subIdentifierCount || i >= subUnits.length()) break; @@ -3550,7 +3550,7 @@ void MeasureFormatTest::verifySequenceUnit( unit.getComplexity(status)); status.errIfFailureAndReset("%s: Complexity", identifier); - LocalArray subUnits = unit.getCompoundUnits(status); + LocalArray subUnits = unit.splitToSingleUnits(status); assertEquals(uid + ": Length", subIdentifierCount, subUnits.length()); for (int32_t i = 0;; i++) { if (i >= subIdentifierCount || i >= subUnits.length()) break;