diff --git a/icu4c/source/common/cmemory.h b/icu4c/source/common/cmemory.h index da663918cfd..f3ba54d2ed6 100644 --- a/icu4c/source/common/cmemory.h +++ b/icu4c/source/common/cmemory.h @@ -767,7 +767,11 @@ public: } int32_t length() const { - return this->count(); + return this->fCount; + } + + T** getAlias() { + return this->fPool.getAlias(); } /** @@ -776,9 +780,22 @@ public: * @param i array index * @return reference to the array item */ - T *operator[](ptrdiff_t i) const { + T* operator[](ptrdiff_t i) const { return this->fPool[i]; } + + /** + * Append all the items from another MaybeStackVector to this one. + */ + void appendAll(const MaybeStackVector& other, UErrorCode& status) { + for (int32_t i = 0; i < other.fCount; i++) { + T* item = emplaceBack(*other[i]); + if (!item) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + } + } }; diff --git a/icu4c/source/common/unicode/localpointer.h b/icu4c/source/common/unicode/localpointer.h index b6f8b15c7fb..cf6782a0078 100644 --- a/icu4c/source/common/unicode/localpointer.h +++ b/icu4c/source/common/unicode/localpointer.h @@ -406,9 +406,19 @@ public: src.ptr=nullptr; } +#ifndef U_HIDE_DRAFT_API + /** + * Construct a LocalArray with a specified length. + * + * @param p Pointer to the array to adopt. + * @param length The length of the array. + * @return A LocalArray with a length field. + * @draft ICU 67 + */ static LocalArray withLength(T *p, int32_t length) { return LocalArray(p, length); } +#endif // U_HIDE_DRAFT_API #ifndef U_HIDE_DRAFT_API /** @@ -549,7 +559,16 @@ public: } #endif /* U_HIDE_DRAFT_API */ +#ifndef U_HIDE_DRAFT_API + /** + * The length of the array contained in the LocalArray. The size must be + * provided when the LocalArray is constructed. + * + * @return The length of the array, or -1 if unknown. + * @draft ICU 67 + */ int32_t length() const { return fLength; } +#endif // U_HIDE_DRAFT_API private: int32_t fLength = -1; diff --git a/icu4c/source/i18n/i18n.vcxproj b/icu4c/source/i18n/i18n.vcxproj index 3905050287a..f5cad11f522 100644 --- a/icu4c/source/i18n/i18n.vcxproj +++ b/icu4c/source/i18n/i18n.vcxproj @@ -394,6 +394,7 @@ + diff --git a/icu4c/source/i18n/i18n.vcxproj.filters b/icu4c/source/i18n/i18n.vcxproj.filters index 31afeff4d6c..5feb9f252fb 100644 --- a/icu4c/source/i18n/i18n.vcxproj.filters +++ b/icu4c/source/i18n/i18n.vcxproj.filters @@ -836,6 +836,9 @@ formatting + + formatting + formatting diff --git a/icu4c/source/i18n/i18n_uwp.vcxproj b/icu4c/source/i18n/i18n_uwp.vcxproj index 5d6416d26f7..39b80d454d4 100644 --- a/icu4c/source/i18n/i18n_uwp.vcxproj +++ b/icu4c/source/i18n/i18n_uwp.vcxproj @@ -613,6 +613,7 @@ + diff --git a/icu4c/source/i18n/measunit.cpp b/icu4c/source/i18n/measunit.cpp index 307156961b0..798f8e549d1 100644 --- a/icu4c/source/i18n/measunit.cpp +++ b/icu4c/source/i18n/measunit.cpp @@ -17,9 +17,11 @@ #if !UCONFIG_NO_FORMATTING #include "unicode/uenum.h" +#include "unicode/errorcode.h" #include "ustrenum.h" #include "cstring.h" #include "uassert.h" +#include "measunit_impl.h" U_NAMESPACE_BEGIN @@ -535,7 +537,7 @@ static const char * const gSubTypes[] = { "solar-mass", "stone", "ton", - "base", + "one", "percent", "permille", "gigawatt", @@ -2010,26 +2012,25 @@ MeasureUnit::MeasureUnit() : MeasureUnit(kBaseTypeIdx, kBaseSubTypeIdx) { } MeasureUnit::MeasureUnit(int32_t typeId, int32_t subTypeId) - : fId(nullptr), fSubTypeId(subTypeId), fTypeId(typeId) { + : fImpl(nullptr), fSubTypeId(subTypeId), fTypeId(typeId) { } MeasureUnit::MeasureUnit(const MeasureUnit &other) - : fId(nullptr) { + : fImpl(nullptr) { *this = other; } MeasureUnit::MeasureUnit(MeasureUnit &&other) noexcept - : fId(other.fId), + : fImpl(other.fImpl), fSubTypeId(other.fSubTypeId), fTypeId(other.fTypeId) { - other.fId = nullptr; + other.fImpl = nullptr; } -MeasureUnit::MeasureUnit(char* idToAdopt) - : fId(idToAdopt), fSubTypeId(-1), fTypeId(-1) { - if (fId == nullptr) { - // Invalid; reset to the base dimensionless unit - setTo(kBaseTypeIdx, kBaseSubTypeIdx); +MeasureUnit::MeasureUnit(MeasureUnitImpl&& impl) + : fImpl(nullptr), fSubTypeId(-1), fTypeId(-1) { + if (!findBySubType(impl.identifier.toStringPiece(), this)) { + fImpl = new MeasureUnitImpl(std::move(impl)); } } @@ -2037,16 +2038,17 @@ MeasureUnit &MeasureUnit::operator=(const MeasureUnit &other) { if (this == &other) { return *this; } - uprv_free(fId); - if (other.fId) { - fId = uprv_strdup(other.fId); - if (!fId) { + uprv_free(fImpl); + if (other.fImpl) { + ErrorCode localStatus; + fImpl = new MeasureUnitImpl(other.fImpl->copy(localStatus)); + if (!fImpl || localStatus.isFailure()) { // Unrecoverable allocation error; set to the default unit *this = MeasureUnit(); return *this; } } else { - fId = nullptr; + fImpl = nullptr; } fTypeId = other.fTypeId; fSubTypeId = other.fSubTypeId; @@ -2057,9 +2059,9 @@ MeasureUnit &MeasureUnit::operator=(MeasureUnit &&other) noexcept { if (this == &other) { return *this; } - uprv_free(fId); - fId = other.fId; - other.fId = nullptr; + uprv_free(fImpl); + fImpl = other.fImpl; + other.fImpl = nullptr; fTypeId = other.fTypeId; fSubTypeId = other.fSubTypeId; return *this; @@ -2070,8 +2072,8 @@ MeasureUnit *MeasureUnit::clone() const { } MeasureUnit::~MeasureUnit() { - uprv_free(fId); - fId = nullptr; + delete fImpl; + fImpl = nullptr; } const char *MeasureUnit::getType() const { @@ -2091,7 +2093,7 @@ const char *MeasureUnit::getSubtype() const { } const char *MeasureUnit::getIdentifier() const { - return fId ? fId : gSubTypes[getOffset()]; + return fImpl ? fImpl->identifier.data() : gSubTypes[getOffset()]; } UBool MeasureUnit::operator==(const UObject& other) const { @@ -2202,38 +2204,6 @@ bool MeasureUnit::findBySubType(StringPiece subType, MeasureUnit* output) { return false; } -bool MeasureUnit::parseCoreUnitIdentifier( - StringPiece coreUnitIdentifier, - MeasureUnit* numerator, - MeasureUnit* denominator, - UErrorCode& status) { - if (U_FAILURE(status)) { - return false; - } - - // First search for the whole code unit identifier as a subType - if (findBySubType(coreUnitIdentifier, numerator)) { - return false; // found a numerator but not denominator - } - - // If not found, try breaking apart numerator and denominator - int32_t perIdx = coreUnitIdentifier.find("-per-", 0); - if (perIdx == -1) { - // String does not contain "-per-" - status = U_ILLEGAL_ARGUMENT_ERROR; - return false; - } - StringPiece numeratorStr(coreUnitIdentifier, 0, perIdx); - StringPiece denominatorStr(coreUnitIdentifier, perIdx + 5); - if (findBySubType(numeratorStr, numerator) && findBySubType(denominatorStr, denominator)) { - return true; // found both a numerator and denominator - } - - // The numerator or denominator were invalid - status = U_ILLEGAL_ARGUMENT_ERROR; - return false; -} - MeasureUnit MeasureUnit::resolveUnitPerUnit( const MeasureUnit &unit, const MeasureUnit &perUnit, bool* isResolved) { int32_t unitOffset = unit.getOffset(); @@ -2289,15 +2259,15 @@ void MeasureUnit::initTime(const char *timeId) { fSubTypeId = result - gOffsets[fTypeId]; } -void MeasureUnit::initCurrency(const char *isoCurrency) { +void MeasureUnit::initCurrency(StringPiece isoCurrency) { int32_t result = binarySearch(gTypes, 0, UPRV_LENGTHOF(gTypes), "currency"); U_ASSERT(result != -1); fTypeId = result; result = binarySearch( gSubTypes, gOffsets[fTypeId], gOffsets[fTypeId + 1], isoCurrency); if (result == -1) { - fId = uprv_strdup(isoCurrency); - if (fId) { + fImpl = new MeasureUnitImpl(MeasureUnitImpl::forCurrencyCode(isoCurrency)); + if (fImpl) { fSubTypeId = -1; return; } @@ -2321,8 +2291,8 @@ void MeasureUnit::initNoUnit(const char *subtype) { void MeasureUnit::setTo(int32_t typeId, int32_t subTypeId) { fTypeId = typeId; fSubTypeId = subTypeId; - uprv_free(fId); - fId = nullptr; + uprv_free(fImpl); + fImpl = nullptr; } int32_t MeasureUnit::getOffset() const { diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index a7f24f1e348..d89f0f18638 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -13,6 +13,8 @@ #define UNISTR_FROM_STRING_EXPLICIT #include "cstring.h" +#include "measunit_impl.h" +#include "uarrsort.h" #include "uassert.h" #include "ucln_in.h" #include "umutex.h" @@ -308,208 +310,6 @@ private: int32_t fMatch; }; -struct SingleUnit { - int8_t power = 1; - UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; - int32_t simpleUnitIndex = 0; - StringPiece id = "one"; - - void appendTo(CharString& builder, UErrorCode& status) const { - if (simpleUnitIndex == 0) { - // Don't propagate SI prefixes and powers on one - builder.append("one", status); - return; - } - int8_t posPower = power < 0 ? -power : power; - if (posPower == 0) { - status = U_ILLEGAL_ARGUMENT_ERROR; - } else if (posPower == 1) { - // no-op - } else if (posPower == 2) { - builder.append("square-", status); - } else if (posPower == 3) { - builder.append("cubic-", status); - } else if (posPower < 10) { - builder.append('p', status); - builder.append(posPower + '0', status); - builder.append('-', status); - } else if (posPower <= 15) { - builder.append("p1", status); - builder.append('0' + (posPower % 10), status); - builder.append('-', status); - } else { - status = U_ILLEGAL_ARGUMENT_ERROR; - } - if (U_FAILURE(status)) { - return; - } - - if (siPrefix != UMEASURE_SI_PREFIX_ONE) { - for (const auto& siPrefixInfo : gSIPrefixStrings) { - if (siPrefixInfo.value == siPrefix) { - builder.append(siPrefixInfo.string, status); - break; - } - } - } - if (U_FAILURE(status)) { - return; - } - - builder.append(id, status); - } - - char* build(UErrorCode& status) { - if (U_FAILURE(status)) { - return nullptr; - } - CharString builder; - if (power < 0) { - builder.append("one-per-", status); - } - appendTo(builder, status); - return builder.cloneData(status); - } -}; - -class CompoundUnit { -public: - typedef MaybeStackVector SingleUnitList; - - void append(SingleUnit&& singleUnit, UErrorCode& status) { - if (singleUnit.power >= 0) { - appendImpl(numerator, std::move(singleUnit), status); - } else { - appendImpl(denominator, std::move(singleUnit), status); - } - } - - void takeReciprocal() { - auto temp = std::move(numerator); - numerator = std::move(denominator); - denominator = std::move(temp); - } - - void appendTo(CharString& builder, UErrorCode& status) const { - if (numerator.length() == 0) { - builder.append("one", status); - } else { - appendToImpl(numerator, builder, status); - } - if (denominator.length() > 0) { - builder.append("-per-", status); - appendToImpl(denominator, builder, status); - } - } - - char* build(UErrorCode& status) { - if (U_FAILURE(status)) { - return nullptr; - } - CharString builder; - appendTo(builder, status); - return builder.cloneData(status); - } - - const SingleUnitList& getNumeratorUnits() const { - return numerator; - } - - const SingleUnitList& getDenominatorUnits() const { - return denominator; - } - - bool isSingle() const { - return numerator.length() + denominator.length() == 1; - } - - bool isEmpty() const { - return numerator.length() + denominator.length() == 0; - } - -private: - SingleUnitList numerator; - SingleUnitList denominator; - - void appendToImpl(const SingleUnitList& unitList, CharString& builder, UErrorCode& status) const { - bool first = true; - int32_t len = unitList.length(); - for (int32_t i = 0; i < len; i++) { - if (first) { - first = false; - } else { - builder.append('-', status); - } - unitList[i]->appendTo(builder, status); - } - } - - void appendImpl(SingleUnitList& unitList, SingleUnit&& singleUnit, UErrorCode& status) { - // Check that the same simple unit doesn't already exist - for (int32_t i = 0; i < unitList.length(); i++) { - SingleUnit* candidate = unitList[i]; - if (candidate->simpleUnitIndex == singleUnit.simpleUnitIndex - && candidate->siPrefix == singleUnit.siPrefix) { - candidate->power += singleUnit.power; - return; - } - } - // Add a new unit - SingleUnit* destination = unitList.emplaceBack(); - if (!destination) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - *destination = std::move(singleUnit); - } -}; - -class SequenceUnit { -public: - typedef MaybeStackVector CompoundUnitList; - - void append(CompoundUnit&& compoundUnit, UErrorCode& status) { - CompoundUnit* destination = units.emplaceBack(); - if (!destination) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - *destination = std::move(compoundUnit); - } - - void appendTo(CharString& builder, UErrorCode& status) const { - if (units.length() == 0) { - builder.append("one", status); - return; - } - bool isFirst = true; - for (int32_t i = 0; i < units.length(); i++) { - if (isFirst) { - isFirst = false; - } else { - builder.append('+', status); - } - units[i]->appendTo(builder, status); - } - } - - char* build(UErrorCode& status) { - if (U_FAILURE(status)) { - return nullptr; - } - CharString builder; - appendTo(builder, status); - return builder.cloneData(status); - } - - const CompoundUnitList& getUnits() const { - return units; - } - -private: - CompoundUnitList units; -}; - class Parser { public: static Parser from(StringPiece source, UErrorCode& status) { @@ -523,71 +323,10 @@ public: return Parser(source); } - bool hasNext() const { - return fIndex < fSource.length(); - } - - SingleUnit getOnlySingleUnit(UErrorCode& status) { - bool sawPlus; - SingleUnit retval; - nextSingleUnit(retval, sawPlus, status); - if (U_FAILURE(status)) { - return retval; - } - if (sawPlus || hasNext()) { - // Expected to find only one unit in the string - status = U_ILLEGAL_ARGUMENT_ERROR; - return retval; - } - return retval; - } - - void nextCompoundUnit(CompoundUnit& result, UErrorCode& status) { - bool sawPlus; - if (U_FAILURE(status)) { - return; - } - while (hasNext()) { - int32_t previ = fIndex; - SingleUnit singleUnit; - nextSingleUnit(singleUnit, sawPlus, status); - if (U_FAILURE(status)) { - return; - } - if (sawPlus && !result.isEmpty()) { - fIndex = previ; - break; - } - result.append(std::move(singleUnit), status); - } - return; - } - - CompoundUnit getOnlyCompoundUnit(UErrorCode& status) { - CompoundUnit retval; - nextCompoundUnit(retval, status); - if (U_FAILURE(status)) { - return retval; - } - if (hasNext()) { - // Expected to find only one unit in the string - status = U_ILLEGAL_ARGUMENT_ERROR; - return retval; - } - return retval; - } - - SequenceUnit getOnlySequenceUnit(UErrorCode& status) { - SequenceUnit retval; - while (hasNext()) { - CompoundUnit compoundUnit; - nextCompoundUnit(compoundUnit, status); - if (U_FAILURE(status)) { - return retval; - } - retval.append(std::move(compoundUnit), status); - } - return retval; + MeasureUnitImpl parse(UErrorCode& status) { + MeasureUnitImpl result; + parseImpl(result, status); + return result; } private: @@ -602,6 +341,10 @@ private: Parser(StringPiece source) : fSource(source), fTrie(kSerializedUnitExtrasStemTrie) {} + inline bool hasNext() const { + return fIndex < fSource.length(); + } + Token nextToken(UErrorCode& status) { fTrie.reset(); int32_t match = -1; @@ -634,7 +377,7 @@ private: return Token(match); } - void nextSingleUnit(SingleUnit& result, bool& sawPlus, UErrorCode& status) { + void nextSingleUnit(TempSingleUnit& result, bool& sawPlus, UErrorCode& status) { sawPlus = false; if (U_FAILURE(status)) { return; @@ -667,7 +410,7 @@ private: goto fail; } fAfterPer = true; - result.power = -1; + result.dimensionality = -1; break; case COMPOUND_PART_TIMES: @@ -693,7 +436,7 @@ private: if (state > 0) { goto fail; } - result.power *= token.getPower(); + result.dimensionality *= token.getPower(); previ = fIndex; state = 1; break; @@ -712,8 +455,8 @@ private: return nextSingleUnit(result, sawPlus, status); case Token::TYPE_SIMPLE_UNIT: - result.simpleUnitIndex = token.getSimpleUnitIndex(); - result.id = fSource.substr(previ, fIndex - previ); + result.index = token.getSimpleUnitIndex(); + result.identifier = fSource.substr(previ, fIndex - previ); return; default: @@ -726,124 +469,303 @@ private: status = U_ILLEGAL_ARGUMENT_ERROR; return; } + + void parseImpl(MeasureUnitImpl& result, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + int32_t unitNum = 0; + while (hasNext()) { + bool sawPlus; + TempSingleUnit singleUnit; + nextSingleUnit(singleUnit, sawPlus, status); + if (U_FAILURE(status)) { + return; + } + if (singleUnit.index == 0) { + continue; + } + bool added = result.append(singleUnit, status); + if (sawPlus && !added) { + // Two similar units are not allowed in a sequence unit + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if ((++unitNum) >= 2) { + UMeasureUnitComplexity complexity = sawPlus + ? UMEASURE_UNIT_SEQUENCE + : UMEASURE_UNIT_COMPOUND; + if (unitNum == 2) { + U_ASSERT(result.complexity == UMEASURE_UNIT_SINGLE); + result.complexity = complexity; + } else if (result.complexity != complexity) { + // Mixed sequence and compound units + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + } + } + } }; +int32_t U_CALLCONV +compareSingleUnits(const void* /*context*/, const void* left, const void* right) { + auto realLeft = static_cast(left); + auto realRight = static_cast(right); + return (*realLeft)->compareTo(**realRight); +} + +/** + * Generate the identifier string for a single unit in place. + */ +void serializeSingle(const TempSingleUnit& singleUnit, bool first, CharString& output, UErrorCode& status) { + if (first && singleUnit.dimensionality < 0) { + output.append("one-per-", status); + } + + if (singleUnit.index == 0) { + // Don't propagate SI prefixes and powers on one + output.append("one", status); + return; + } + int8_t posPower = std::abs(singleUnit.dimensionality); + if (posPower == 0) { + status = U_INTERNAL_PROGRAM_ERROR; + } else if (posPower == 1) { + // no-op + } else if (posPower == 2) { + output.append("square-", status); + } else if (posPower == 3) { + output.append("cubic-", status); + } else if (posPower < 10) { + output.append('p', status); + output.append(posPower + '0', status); + output.append('-', status); + } else if (posPower <= 15) { + output.append("p1", status); + output.append('0' + (posPower % 10), status); + output.append('-', status); + } else { + status = U_ILLEGAL_ARGUMENT_ERROR; + } + if (U_FAILURE(status)) { + return; + } + + if (singleUnit.siPrefix != UMEASURE_SI_PREFIX_ONE) { + for (const auto& siPrefixInfo : gSIPrefixStrings) { + if (siPrefixInfo.value == singleUnit.siPrefix) { + output.append(siPrefixInfo.string, status); + break; + } + } + } + if (U_FAILURE(status)) { + return; + } + + output.append(singleUnit.identifier, status); +} + +/** + * Normalize a MeasureUnitImpl and generate the identifier string in place. + */ +void serialize(MeasureUnitImpl& impl, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + U_ASSERT(impl.identifier.isEmpty()); + if (impl.units.length() == 0) { + impl.identifier.append("one", status); + return; + } + if (impl.complexity == UMEASURE_UNIT_COMPOUND) { + // Note: don't sort a SEQUENCE unit + uprv_sortArray( + impl.units.getAlias(), + impl.units.length(), + sizeof(impl.units[0]), + compareSingleUnits, + nullptr, + false, + &status); + if (U_FAILURE(status)) { + return; + } + } + serializeSingle(*impl.units[0], true, impl.identifier, status); + if (impl.units.length() == 1) { + return; + } + for (int32_t i = 1; i < impl.units.length(); i++) { + const TempSingleUnit& prev = *impl.units[i-1]; + const TempSingleUnit& curr = *impl.units[i]; + if (impl.complexity == UMEASURE_UNIT_SEQUENCE) { + impl.identifier.append('+', status); + serializeSingle(curr, true, impl.identifier, status); + } else { + if (prev.dimensionality > 0 && curr.dimensionality < 0) { + impl.identifier.append("-per-", status); + } else { + impl.identifier.append('-', status); + } + serializeSingle(curr, false, impl.identifier, status); + } + } + +} + +/** @return true if a new item was added */ +bool appendImpl(MeasureUnitImpl& impl, const TempSingleUnit& unit, UErrorCode& status) { + // Find a similar unit that already exists, to attempt to coalesce + TempSingleUnit* oldUnit = nullptr; + for (int32_t i = 0; i < impl.units.length(); i++) { + auto* candidate = impl.units[i]; + if (candidate->isCompatibleWith(unit)) { + oldUnit = candidate; + } + } + if (oldUnit) { + oldUnit->dimensionality += unit.dimensionality; + } else { + TempSingleUnit* destination = impl.units.emplaceBack(); + if (!destination) { + status = U_MEMORY_ALLOCATION_ERROR; + return false; + } + *destination = unit; + } + return (oldUnit == nullptr); +} + } // namespace +TempSingleUnit TempSingleUnit::forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status) { + MeasureUnitImpl temp; + const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(measureUnit, temp, status); + if (U_FAILURE(status)) { + return {}; + } + if (impl.units.length() == 0) { + return {}; + } else if (impl.units.length() == 1) { + return *impl.units[0]; + } else { + status = U_ILLEGAL_ARGUMENT_ERROR; + return {}; + } +} + +MeasureUnit TempSingleUnit::build(UErrorCode& status) { + MeasureUnitImpl temp; + temp.append(*this, status); + return std::move(temp).build(status); +} + + +MeasureUnitImpl MeasureUnitImpl::forIdentifier(StringPiece identifier, UErrorCode& status) { + return Parser::from(identifier, status).parse(status); +} + +const MeasureUnitImpl& MeasureUnitImpl::forMeasureUnit( + const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status) { + if (measureUnit.fImpl) { + return *measureUnit.fImpl; + } else { + memory = Parser::from(measureUnit.getIdentifier(), status).parse(status); + return memory; + } +} + +MeasureUnitImpl MeasureUnitImpl::forMeasureUnitMaybeCopy( + const MeasureUnit& measureUnit, UErrorCode& status) { + if (measureUnit.fImpl) { + return measureUnit.fImpl->copy(status); + } else { + return Parser::from(measureUnit.getIdentifier(), status).parse(status); + } +} + +void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) { + identifier.clear(); + for (int32_t i = 0; i < units.length(); i++) { + units[i]->dimensionality *= -1; + } +} + +bool MeasureUnitImpl::append(const TempSingleUnit& singleUnit, UErrorCode& status) { + identifier.clear(); + return appendImpl(*this, singleUnit, status); +} + +MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && { + serialize(*this, status); + return MeasureUnit(std::move(*this)); +} + + MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode& status) { - return Parser::from(identifier, status).getOnlySequenceUnit(status).build(status); + return Parser::from(identifier, status).parse(status).build(status); } UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const { - const char* id = getIdentifier(); - Parser parser = Parser::from(id, status); - if (U_FAILURE(status)) { - return UMEASURE_UNIT_SINGLE; - } - - CompoundUnit compoundUnit; - parser.nextCompoundUnit(compoundUnit, status); - if (parser.hasNext()) { - return UMEASURE_UNIT_SEQUENCE; - } else if (compoundUnit.isSingle()) { - return UMEASURE_UNIT_SINGLE; - } else { - return UMEASURE_UNIT_COMPOUND; - } + MeasureUnitImpl temp; + return MeasureUnitImpl::forMeasureUnit(*this, temp, status).complexity; } UMeasureSIPrefix MeasureUnit::getSIPrefix(UErrorCode& status) const { - const char* id = getIdentifier(); - return Parser::from(id, status).getOnlySingleUnit(status).siPrefix; + return TempSingleUnit::forMeasureUnit(*this, status).siPrefix; } MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const { - const char* id = getIdentifier(); - SingleUnit singleUnit = Parser::from(id, status).getOnlySingleUnit(status); + TempSingleUnit singleUnit = TempSingleUnit::forMeasureUnit(*this, status); singleUnit.siPrefix = prefix; return singleUnit.build(status); } -int8_t MeasureUnit::getPower(UErrorCode& status) const { - const char* id = getIdentifier(); - return Parser::from(id, status).getOnlySingleUnit(status).power; +int32_t MeasureUnit::getDimensionality(UErrorCode& status) const { + return TempSingleUnit::forMeasureUnit(*this, status).dimensionality; } -MeasureUnit MeasureUnit::withPower(int8_t power, UErrorCode& status) const { - const char* id = getIdentifier(); - SingleUnit singleUnit = Parser::from(id, status).getOnlySingleUnit(status); - singleUnit.power = power; +MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const { + TempSingleUnit singleUnit = TempSingleUnit::forMeasureUnit(*this, status); + singleUnit.dimensionality = dimensionality; return singleUnit.build(status); } MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const { - const char* id = getIdentifier(); - CompoundUnit compoundUnit = Parser::from(id, status).getOnlyCompoundUnit(status); - compoundUnit.takeReciprocal(); - return compoundUnit.build(status); + MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status); + impl.takeReciprocal(status); + return std::move(impl).build(status); } MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const { - const char* id = getIdentifier(); - CompoundUnit compoundUnit = Parser::from(id, status).getOnlyCompoundUnit(status); - if (U_FAILURE(status)) { - return *this; - } - - // Append other's first CompoundUnit to compoundUnit, then assert other has only one - Parser otherParser = Parser::from(other.getIdentifier(), status); - otherParser.nextCompoundUnit(compoundUnit, status); - if (U_FAILURE(status)) { - return *this; - } - if (otherParser.hasNext()) { + MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status); + MeasureUnitImpl temp; + const MeasureUnitImpl& otherImpl = MeasureUnitImpl::forMeasureUnit(other, temp, status); + if (impl.complexity == UMEASURE_UNIT_SEQUENCE || otherImpl.complexity == UMEASURE_UNIT_SEQUENCE) { status = U_ILLEGAL_ARGUMENT_ERROR; - return *this; + return {}; } - - return compoundUnit.build(status); + for (int32_t i = 0; i < otherImpl.units.length(); i++) { + impl.append(*otherImpl.units[i], status); + } + if (impl.units.length() > 1) { + impl.complexity = UMEASURE_UNIT_COMPOUND; + } + return std::move(impl).build(status); } -LocalArray MeasureUnit::getSingleUnits(UErrorCode& status) const { - const char* id = getIdentifier(); - CompoundUnit compoundUnit = Parser::from(id, status).getOnlyCompoundUnit(status); - if (U_FAILURE(status)) { - return LocalArray(); +LocalArray MeasureUnit::splitToSingleUnits(UErrorCode& status) const { + MeasureUnitImpl temp; + const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(*this, temp, status); + const int32_t length = impl.units.length(); + MeasureUnit* arr = new MeasureUnit[length]; + for (int32_t i = 0; i < length; i++) { + arr[i] = impl.units[i]->build(status); } - - const CompoundUnit::SingleUnitList& numerator = compoundUnit.getNumeratorUnits(); - const CompoundUnit::SingleUnitList& denominator = compoundUnit.getDenominatorUnits(); - int32_t count = numerator.length() + denominator.length(); - MeasureUnit* arr = new MeasureUnit[count]; - - int32_t i = 0; - for (int32_t j = 0; j < numerator.length(); j++) { - arr[i++] = numerator[j]->build(status); - } - for (int32_t j = 0; j < denominator.length(); j++) { - arr[i++] = denominator[j]->build(status); - } - - return LocalArray::withLength(arr, count); -} - -LocalArray MeasureUnit::getCompoundUnits(UErrorCode& status) const { - const char* id = getIdentifier(); - SequenceUnit sequenceUnit = Parser::from(id, status).getOnlySequenceUnit(status); - if (U_FAILURE(status)) { - return LocalArray(); - } - - const SequenceUnit::CompoundUnitList& unitVector = sequenceUnit.getUnits(); - int32_t count = unitVector.length(); - MeasureUnit* arr = new MeasureUnit[count]; - - for (int32_t i = 0; i < count; i++) { - arr[i] = unitVector[i]->build(status); - } - - return LocalArray::withLength(arr, count); + return LocalArray::withLength(arr, length); } diff --git a/icu4c/source/i18n/measunit_impl.h b/icu4c/source/i18n/measunit_impl.h new file mode 100644 index 00000000000..631a47fa979 --- /dev/null +++ b/icu4c/source/i18n/measunit_impl.h @@ -0,0 +1,162 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef __MEASUNIT_IMPL_H__ +#define __MEASUNIT_IMPL_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/measunit.h" +#include "cmemory.h" +#include "charstr.h" + +U_NAMESPACE_BEGIN + + +struct TempSingleUnit : public UMemory { + /** + * Gets a single unit from the MeasureUnit. If there are multiple single units, sets an error + * code and return the base dimensionless unit. Parses if necessary. + */ + static TempSingleUnit forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status); + + /** Transform this TemplSingleUnit into a MeasureUnit, simplifying if possible. */ + MeasureUnit build(UErrorCode& status); + + /** Compare this TempSingleUnit to another TempSingleUnit. */ + int32_t compareTo(const TempSingleUnit& other) const { + if (dimensionality < 0 && other.dimensionality > 0) { + // Positive dimensions first + return 1; + } else if (dimensionality > 0 && other.dimensionality < 0) { + return -1; + } else if (index < other.index) { + return -1; + } else if (index > other.index) { + return 1; + } else if (siPrefix < other.siPrefix) { + return -1; + } else if (siPrefix > other.siPrefix) { + return 1; + } else { + return 0; + } + } + + /** + * Return whether this TempSingleUnit is compatible with another for the purpose of coalescing. + * + * Units with the same base unit and SI prefix should match, except that they must also have + * the same dimensionality sign, such that we don't merge numerator and denominator. + */ + bool isCompatibleWith(const TempSingleUnit& other) const { + return (compareTo(other) == 0); + } + + /** Simple unit index, unique for every simple unit. */ + int32_t index = 0; + + /** Simple unit identifier; memory not owned by the SimpleUnit. */ + StringPiece identifier; + + /** SI prefix. **/ + UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; + + /** Dimentionality. **/ + int32_t dimensionality = 1; +}; + + +/** + * Internal representation of measurement units. Capable of representing all complexities of units, + * including sequence and compound units. + */ +struct MeasureUnitImpl : public UMemory { + /** Extract the MeasureUnitImpl from a MeasureUnit. */ + static inline const MeasureUnitImpl* get(const MeasureUnit& measureUnit) { + return measureUnit.fImpl; + } + + /** + * Parse a unit identifier into a MeasureUnitImpl. + * + * @param identifier The unit identifier string. + * @param status Set if the identifier string is not valid. + * @return A newly parsed value object. + */ + static MeasureUnitImpl forIdentifier(StringPiece identifier, UErrorCode& status); + + /** + * Extract the MeasureUnitImpl from a MeasureUnit, or parse if it is not present. + * + * @param measureUnit The source MeasureUnit. + * @param memory A place to write the new MeasureUnitImpl if parsing is required. + * @param status Set if an error occurs. + * @return A reference to either measureUnit.fImpl or memory. + */ + static const MeasureUnitImpl& forMeasureUnit( + const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status); + + /** + * Extract the MeasureUnitImpl from a MeasureUnit, or parse if it is not present. + * + * @param measureUnit The source MeasureUnit. + * @param status Set if an error occurs. + * @return A value object, either newly parsed or copied from measureUnit. + */ + static MeasureUnitImpl forMeasureUnitMaybeCopy( + const MeasureUnit& measureUnit, UErrorCode& status); + + /** + * Used for currency units. + */ + static inline MeasureUnitImpl forCurrencyCode(StringPiece currencyCode) { + MeasureUnitImpl result; + UErrorCode localStatus = U_ZERO_ERROR; + result.identifier.append(currencyCode, localStatus); + // localStatus is not expected to fail since currencyCode should be 3 chars long + return result; + } + + /** Transform this MeasureUnitImpl into a MeasureUnit, simplifying if possible. */ + MeasureUnit build(UErrorCode& status) &&; + + /** + * Create a copy of this MeasureUnitImpl. Don't use copy constructor to make this explicit. + */ + inline MeasureUnitImpl copy(UErrorCode& status) const { + MeasureUnitImpl result; + result.complexity = complexity; + result.units.appendAll(units, status); + result.identifier.append(identifier, status); + return result; + } + + /** Mutates this MeasureUnitImpl to take the reciprocal. */ + void takeReciprocal(UErrorCode& status); + + /** Mutates this MeasureUnitImpl to append a single unit. */ + bool append(const TempSingleUnit& singleUnit, UErrorCode& status); + + /** The complexity, either SINGLE, COMPOUND, or SEQUENCE. */ + UMeasureUnitComplexity complexity = UMEASURE_UNIT_SINGLE; + + /** + * The list of simple units. These may be summed or multiplied, based on the value of the + * complexity field. + */ + MaybeStackVector units; + + /** + * The full unit identifier. Owned by the MeasureUnitImpl. Empty if not computed. + */ + CharString identifier; +}; + + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ +#endif //__MEASUNIT_IMPL_H__ diff --git a/icu4c/source/i18n/nounit.cpp b/icu4c/source/i18n/nounit.cpp index 076f76f199c..b993cb56adb 100644 --- a/icu4c/source/i18n/nounit.cpp +++ b/icu4c/source/i18n/nounit.cpp @@ -11,7 +11,7 @@ U_NAMESPACE_BEGIN UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NoUnit) NoUnit U_EXPORT2 NoUnit::base() { - return NoUnit("base"); + return NoUnit("one"); } NoUnit U_EXPORT2 NoUnit::percent() { diff --git a/icu4c/source/i18n/number_asformat.cpp b/icu4c/source/i18n/number_asformat.cpp index e876174fdce..9d10d1f5580 100644 --- a/icu4c/source/i18n/number_asformat.cpp +++ b/icu4c/source/i18n/number_asformat.cpp @@ -102,4 +102,16 @@ const LocalizedNumberFormatter& LocalizedNumberFormatterAsFormat::getNumberForma return fFormatter; } + +// Definitions of public API methods (put here for dependency disentanglement) + +Format* LocalizedNumberFormatter::toFormat(UErrorCode& status) const { + if (U_FAILURE(status)) { + return nullptr; + } + LocalPointer retval( + new LocalizedNumberFormatterAsFormat(*this, fMacros.locale), status); + return retval.orphan(); +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/number_fluent.cpp b/icu4c/source/i18n/number_fluent.cpp index 2dbd2fa6cd5..9cdb8b7156e 100644 --- a/icu4c/source/i18n/number_fluent.cpp +++ b/icu4c/source/i18n/number_fluent.cpp @@ -11,7 +11,6 @@ #include "number_formatimpl.h" #include "umutex.h" #include "number_asformat.h" -#include "number_skeletons.h" #include "number_utils.h" #include "number_utypes.h" #include "util.h" @@ -21,6 +20,16 @@ using namespace icu; using namespace icu::number; using namespace icu::number::impl; +#if (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(_MSC_VER) +// Ignore MSVC warning 4661. This is generated for NumberFormatterSettings<>::toSkeleton() as this method +// is defined elsewhere (in number_skeletons.cpp). The compiler is warning that the explicit template instantiation +// inside this single translation unit (CPP file) is incomplete, and thus it isn't sure if the template class is +// fully defined. However, since each translation unit explicitly instantiates all the necessary template classes, +// they will all be passed to the linker, and the linker will still find and export all the class members. +#pragma warning(push) +#pragma warning(disable: 4661) +#endif + template Derived NumberFormatterSettings::notation(const Notation& notation) const& { Derived copy(*this); @@ -320,16 +329,7 @@ Derived NumberFormatterSettings::macros(impl::MacroProps&& macros)&& { return move; } -template -UnicodeString NumberFormatterSettings::toSkeleton(UErrorCode& status) const { - if (U_FAILURE(status)) { - return ICU_Utility::makeBogusString(); - } - if (fMacros.copyErrorTo(status)) { - return ICU_Utility::makeBogusString(); - } - return skeleton::generate(fMacros, status); -} +// Note: toSkeleton defined in number_skeletons.cpp template LocalPointer NumberFormatterSettings::clone() const & { @@ -358,15 +358,7 @@ LocalizedNumberFormatter NumberFormatter::withLocale(const Locale& locale) { return with().locale(locale); } -UnlocalizedNumberFormatter -NumberFormatter::forSkeleton(const UnicodeString& skeleton, UErrorCode& status) { - return skeleton::create(skeleton, nullptr, status); -} - -UnlocalizedNumberFormatter -NumberFormatter::forSkeleton(const UnicodeString& skeleton, UParseError& perror, UErrorCode& status) { - return skeleton::create(skeleton, &perror, status); -} +// Note: forSkeleton defined in number_skeletons.cpp template using NFS = NumberFormatterSettings; @@ -766,14 +758,11 @@ int32_t LocalizedNumberFormatter::getCallCount() const { return umtx_loadAcquire(*callCount); } -Format* LocalizedNumberFormatter::toFormat(UErrorCode& status) const { - if (U_FAILURE(status)) { - return nullptr; - } - LocalPointer retval( - new LocalizedNumberFormatterAsFormat(*this, fMacros.locale), status); - return retval.orphan(); -} +// Note: toFormat defined in number_asformat.cpp +#if (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(_MSC_VER) +// Warning 4661. +#pragma warning(pop) +#endif #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/number_longnames.cpp b/icu4c/source/i18n/number_longnames.cpp index 5378eda8b24..74ee0ef3fd3 100644 --- a/icu4c/source/i18n/number_longnames.cpp +++ b/icu4c/source/i18n/number_longnames.cpp @@ -188,6 +188,12 @@ LongNameHandler* LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unitRef, const MeasureUnit &perUnit, const UNumberUnitWidth &width, const PluralRules *rules, const MicroPropsGenerator *parent, UErrorCode &status) { + if (uprv_strlen(unitRef.getType()) == 0 || uprv_strlen(perUnit.getType()) == 0) { + // TODO(ICU-20941): Unsanctioned unit. Not yet fully supported. Set an error code. + status = U_UNSUPPORTED_ERROR; + return nullptr; + } + MeasureUnit unit = unitRef; if (uprv_strcmp(perUnit.getType(), "none") != 0) { // Compound unit: first try to simplify (e.g., meters per second is its own unit). diff --git a/icu4c/source/i18n/number_skeletons.cpp b/icu4c/source/i18n/number_skeletons.cpp index fe96279467f..99e5241e70e 100644 --- a/icu4c/source/i18n/number_skeletons.cpp +++ b/icu4c/source/i18n/number_skeletons.cpp @@ -22,12 +22,24 @@ #include "charstr.h" #include "string_segment.h" #include "unicode/errorcode.h" +#include "util.h" +#include "measunit_impl.h" using namespace icu; using namespace icu::number; using namespace icu::number::impl; using namespace icu::number::impl::skeleton; +#if (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(_MSC_VER) +// Ignore MSVC warning 4661. This is generated for NumberFormatterSettings<>::toSkeleton() as this method +// is defined elsewhere (in number_skeletons.cpp). The compiler is warning that the explicit template instantiation +// inside this single translation unit (CPP file) is incomplete, and thus it isn't sure if the template class is +// fully defined. However, since each translation unit explicitly instantiates all the necessary template classes, +// they will all be passed to the linker, and the linker will still find and export all the class members. +#pragma warning(push) +#pragma warning(disable: 4661) +#endif + namespace { icu::UInitOnce gNumberSkeletonsInitOnce = U_INITONCE_INITIALIZER; @@ -1040,12 +1052,23 @@ void blueprint_helpers::parseIdentifierUnitOption(const StringSegment& segment, SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status); ErrorCode internalStatus; - MeasureUnit::parseCoreUnitIdentifier(buffer.toStringPiece(), ¯os.unit, ¯os.perUnit, internalStatus); + auto fullUnit = MeasureUnitImpl::forIdentifier(buffer.toStringPiece(), internalStatus); if (internalStatus.isFailure()) { // throw new SkeletonSyntaxException("Invalid core unit identifier", segment, e); status = U_NUMBER_SKELETON_SYNTAX_ERROR; return; } + + // TODO(ICU-20941): Clean this up. + for (int32_t i = 0; i < fullUnit.units.length(); i++) { + TempSingleUnit* subUnit = fullUnit.units[i]; + if (subUnit->dimensionality > 0) { + macros.unit = macros.unit.product(subUnit->build(status), status); + } else { + subUnit->dimensionality *= -1; + macros.perUnit = macros.perUnit.product(subUnit->build(status), status); + } + } } void blueprint_helpers::parseFractionStem(const StringSegment& segment, MacroProps& macros, @@ -1674,4 +1697,39 @@ bool GeneratorHelpers::scale(const MacroProps& macros, UnicodeString& sb, UError } +// Definitions of public API methods (put here for dependency disentanglement) + +template +UnicodeString NumberFormatterSettings::toSkeleton(UErrorCode& status) const { + if (U_FAILURE(status)) { + return ICU_Utility::makeBogusString(); + } + if (fMacros.copyErrorTo(status)) { + return ICU_Utility::makeBogusString(); + } + return skeleton::generate(fMacros, status); +} + +// Declare all classes that implement NumberFormatterSettings +// See https://stackoverflow.com/a/495056/1407170 +template +class icu::number::NumberFormatterSettings; +template +class icu::number::NumberFormatterSettings; + +UnlocalizedNumberFormatter +NumberFormatter::forSkeleton(const UnicodeString& skeleton, UErrorCode& status) { + return skeleton::create(skeleton, nullptr, status); +} + +UnlocalizedNumberFormatter +NumberFormatter::forSkeleton(const UnicodeString& skeleton, UParseError& perror, UErrorCode& status) { + return skeleton::create(skeleton, &perror, status); +} + +#if (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(_MSC_VER) +// Warning 4661. +#pragma warning(pop) +#endif + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index 4ec0f62684f..8f9996d8591 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -30,7 +30,9 @@ U_NAMESPACE_BEGIN class StringEnumeration; +struct MeasureUnitImpl; +#ifndef U_HIDE_DRAFT_API /** * Enumeration for unit complexity. There are three levels: * @@ -223,6 +225,7 @@ typedef enum UMeasureSIPrefix { */ UMEASURE_SI_PREFIX_YOCTO = -24 } UMeasureSIPrefix; +#endif // U_HIDE_DRAFT_API /** * A unit such as length, mass, volume, currency, etc. A unit is @@ -253,6 +256,7 @@ class U_I18N_API MeasureUnit: public UObject { */ MeasureUnit(MeasureUnit &&other) noexcept; +#ifndef U_HIDE_DRAFT_API /** * Construct a MeasureUnit from a CLDR Sequence Unit Identifier, defined in UTS 35. * Validates and canonicalizes the identifier. @@ -261,11 +265,12 @@ class U_I18N_API MeasureUnit: public UObject { * MeasureUnit example = MeasureUnit::forIdentifier("furlong-per-nanosecond") * * - * @param id The CLDR Sequence Unit Identifier + * @param identifier The CLDR Sequence Unit Identifier * @param status Set if the identifier is invalid. * @draft ICU 67 */ static MeasureUnit forIdentifier(StringPiece identifier, UErrorCode& status); +#endif // U_HIDE_DRAFT_API /** * Copy assignment operator. @@ -326,6 +331,7 @@ class U_I18N_API MeasureUnit: public UObject { */ const char *getSubtype() const; +#ifndef U_HIDE_DRAFT_API /** * Get the CLDR Sequence Unit Identifier for this MeasureUnit, as defined in UTS 35. * @@ -371,28 +377,29 @@ class U_I18N_API MeasureUnit: public UObject { UMeasureSIPrefix getSIPrefix(UErrorCode& status) const; /** - * Creates a MeasureUnit which is this SINGLE unit augmented with the specified power. For - * example, if power is 2, the unit will be squared. + * Creates a MeasureUnit which is this SINGLE unit augmented with the specified dimensionality + * (power). For example, if dimensionality is 2, the unit will be squared. * * NOTE: Only works on SINGLE units. If this is a COMPOUND or SEQUENCE unit, an error will * occur. For more information, see UMeasureUnitComplexity. * - * @param power The power. + * @param dimensionality The dimensionality (power). * @param status Set if this is not a SINGLE unit or if another error occurs. * @return A new SINGLE unit. */ - MeasureUnit withPower(int8_t power, UErrorCode& status) const; + MeasureUnit withDimensionality(int32_t dimensionality, UErrorCode& status) const; /** - * Gets the power of this MeasureUnit. For example, if the unit is square, then 2 is returned. + * Gets the dimensionality (power) of this MeasureUnit. For example, if the unit is square, + * then 2 is returned. * * NOTE: Only works on SINGLE units. If this is a COMPOUND or SEQUENCE unit, an error will * occur. For more information, see UMeasureUnitComplexity. * * @param status Set if this is not a SINGLE unit or if another error occurs. - * @return The power of this simple unit. + * @return The dimensionality (power) of this simple unit. */ - int8_t getPower(UErrorCode& status) const; + int32_t getDimensionality(UErrorCode& status) const; /** * Gets the reciprocal of this MeasureUnit, with the numerator and denominator flipped. @@ -419,39 +426,28 @@ class U_I18N_API MeasureUnit: public UObject { * NOTE: Only works on SINGLE and COMPOUND units. If either unit (receivee and argument) is a * SEQUENCE unit, an error will occur. For more information, see UMeasureUnitComplexity. * + * @param other The MeasureUnit to multiply with the target. * @param status Set if this or other is a SEQUENCE unit or if another error occurs. * @return The product of the target unit with the provided unit. */ MeasureUnit product(const MeasureUnit& other, UErrorCode& status) const; /** - * Gets the list of single units contained within a compound unit. + * Gets the list of SINGLE units contained within a SEQUENCE of COMPOUND unit. * - * For example, given "meter-kilogram-per-second", three units will be returned: "meter", - * "kilogram", and "one-per-second". + * Examples: + * - Given "meter-kilogram-per-second", three units will be returned: "meter", + * "kilogram", and "one-per-second". + * - Given "hour+minute+second", three units will be returned: "hour", "minute", + * and "second". * * If this is a SINGLE unit, an array of length 1 will be returned. * - * NOTE: Only works on SINGLE and COMPOUND units. If this is a SEQUENCE unit, an error will - * occur. For more information, see UMeasureUnitComplexity. - * - * @param status Set if this is a SEQUENCE unit or if another error occurs. + * @param status Set if an error occurs. * @return An array of single units, owned by the caller. */ - LocalArray getSingleUnits(UErrorCode& status) const; - - /** - * Gets the list of compound units contained within a sequence unit. - * - * For example, given "hour+minute+second", three units will be returned: "hour", "minute", - * and "second". - * - * If this is a SINGLE or COMPOUND unit, an array of length 1 will be returned. - * - * @param status Set of an error occurs. - * @return An array of compound units, owned by the caller. - */ - LocalArray getCompoundUnits(UErrorCode& status) const; + LocalArray splitToSingleUnits(UErrorCode& status) const; +#endif // U_HIDE_DRAFT_API /** * getAvailable gets all of the available units. @@ -547,26 +543,6 @@ class U_I18N_API MeasureUnit: public UObject { */ static int32_t internalGetIndexForTypeAndSubtype(const char *type, const char *subtype); - /** - * ICU use only. - * @return Whether subType is known to ICU. - * @internal - */ - static bool findBySubType(StringPiece subType, MeasureUnit* output); - - /** - * ICU use only. - * Parse a core unit identifier into a numerator and denominator unit. - * @param coreUnitIdentifier The string to parse. - * @param numerator Output: set to the numerator unit. - * @param denominator Output: set to the denominator unit, if present. - * @param status Set to U_ILLEGAL_ARGUMENT_ERROR if the core unit identifier is not known. - * @return Whether both a numerator and denominator are returned. - * @internal - */ - static bool parseCoreUnitIdentifier( - StringPiece coreUnitIdentifier, MeasureUnit* numerator, MeasureUnit* denominator, UErrorCode& status); - /** * ICU use only. * @internal @@ -3695,7 +3671,7 @@ class U_I18N_API MeasureUnit: public UObject { * For ICU use only. * @internal */ - void initCurrency(const char *isoCurrency); + void initCurrency(StringPiece isoCurrency); /** * For ICU use only. @@ -3707,18 +3683,25 @@ class U_I18N_API MeasureUnit: public UObject { private: - // If non-null, fId is owned by the MeasureUnit. - char* fId; + // If non-null, fImpl is owned by the MeasureUnit. + MeasureUnitImpl* fImpl; // These two ints are indices into static string lists in measunit.cpp int16_t fSubTypeId; int8_t fTypeId; MeasureUnit(int32_t typeId, int32_t subTypeId); - MeasureUnit(char* idToAdopt); + MeasureUnit(MeasureUnitImpl&& impl); void setTo(int32_t typeId, int32_t subTypeId); int32_t getOffset() const; static MeasureUnit *create(int typeId, int subTypeId, UErrorCode &status); + + /** + * @return Whether subType is known to ICU. + */ + static bool findBySubType(StringPiece subType, MeasureUnit* output); + + friend struct MeasureUnitImpl; }; U_NAMESPACE_END diff --git a/icu4c/source/i18n/unicode/numberformatter.h b/icu4c/source/i18n/unicode/numberformatter.h index 58b13c36314..88ff2d7c6a4 100644 --- a/icu4c/source/i18n/unicode/numberformatter.h +++ b/icu4c/source/i18n/unicode/numberformatter.h @@ -1439,6 +1439,16 @@ struct U_I18N_API MacroProps : public UMemory { } // namespace impl +#if (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(_MSC_VER) +// Ignore MSVC warning 4661. This is generated for NumberFormatterSettings<>::toSkeleton() as this method +// is defined elsewhere (in number_skeletons.cpp). The compiler is warning that the explicit template instantiation +// inside this single translation unit (CPP file) is incomplete, and thus it isn't sure if the template class is +// fully defined. However, since each translation unit explicitly instantiates all the necessary template classes, +// they will all be passed to the linker, and the linker will still find and export all the class members. +#pragma warning(push) +#pragma warning(disable: 4661) +#endif + /** * An abstract base class for specifying settings related to number formatting. This class is implemented by * {@link UnlocalizedNumberFormatter} and {@link LocalizedNumberFormatter}. This class is not intended for @@ -2405,6 +2415,11 @@ class U_I18N_API LocalizedNumberFormatter friend class UnlocalizedNumberFormatter; }; +#if (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(_MSC_VER) +// Warning 4661. +#pragma warning(pop) +#endif + /** * The result of a number formatting operation. This class allows the result to be exported in several data types, * including a UnicodeString and a FieldPositionIterator. diff --git a/icu4c/source/test/depstest/dependencies.txt b/icu4c/source/test/depstest/dependencies.txt index 080beeff637..98da9a19152 100644 --- a/icu4c/source/test/depstest/dependencies.txt +++ b/icu4c/source/test/depstest/dependencies.txt @@ -869,7 +869,7 @@ library: i18n dayperiodrules listformatter formatting formattable_cnv regex regex_cnv translit - double_conversion number_representation number_output numberformatter numberparser + double_conversion number_representation number_output numberformatter number_skeletons numberparser units_extra universal_time_scale uclean_i18n @@ -982,15 +982,15 @@ group: number_output group: numberformatter # ICU 60+ NumberFormatter API - number_affixutils.o number_asformat.o - number_capi.o number_compact.o number_currencysymbols.o + number_affixutils.o + number_compact.o number_currencysymbols.o number_decimfmtprops.o number_fluent.o number_formatimpl.o number_grouping.o number_integerwidth.o number_longnames.o number_mapper.o number_modifiers.o number_multiplier.o number_notation.o number_padding.o number_patternmodifier.o number_patternstring.o number_rounding.o - number_scientific.o number_skeletons.o + number_scientific.o currpinf.o dcfmtsym.o numsys.o numrange_fluent.o numrange_impl.o deps @@ -998,6 +998,13 @@ group: numberformatter number_representation number_output uclean_i18n common +group: number_skeletons + # Number skeleton support; separated from numberformatter + number_skeletons.o number_capi.o number_asformat.o + deps + numberformatter + units_extra + group: numberparser numparse_affixes.o numparse_compositions.o numparse_currency.o numparse_decimal.o numparse_impl.o numparse_parsednumber.o @@ -1038,7 +1045,8 @@ group: formatting # messageformat choicfmt.o msgfmt.o plurfmt.o selfmt.o umsg.o deps - decnumber formattable format units numberformatter numberparser formatted_value_sbimpl + decnumber formattable format units numberformatter number_skeletons numberparser + formatted_value_sbimpl listformatter dayperiodrules collation collation_builder # for rbnf @@ -1057,12 +1065,12 @@ group: sharedbreakiterator group: units_extra measunit_extra.o deps - units + units ucharstriebuilder ucharstrie uclean_i18n group: units measunit.o currunit.o nounit.o deps - stringenumeration + stringenumeration errorcode group: decnumber decContext.o decNumber.o diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 4f65bc619fd..d784abe0d03 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -81,6 +81,8 @@ private: void TestNumericTimeSomeSpecialFormats(); void TestInvalidIdentifiers(); void TestCompoundUnitOperations(); + void TestIdentifiers(); + void verifyFormat( const char *description, const MeasureFormat &fmt, @@ -201,6 +203,7 @@ void MeasureFormatTest::runIndexedTest( TESTCASE_AUTO(TestNumericTimeSomeSpecialFormats); TESTCASE_AUTO(TestInvalidIdentifiers); TESTCASE_AUTO(TestCompoundUnitOperations); + TESTCASE_AUTO(TestIdentifiers); TESTCASE_AUTO_END; } @@ -3283,10 +3286,10 @@ void MeasureFormatTest::TestCompoundUnitOperations() { assertTrue("centimeter equality", centimeter1 == centimeter2); assertTrue("kilometer inequality", centimeter1 != kilometer); - MeasureUnit squareMeter = meter.withPower(2, status); - MeasureUnit overCubicCentimeter = centimeter1.withPower(-3, status); - MeasureUnit quarticKilometer = kilometer.withPower(4, status); - MeasureUnit overQuarticKilometer1 = kilometer.withPower(-4, status); + MeasureUnit squareMeter = meter.withDimensionality(2, status); + MeasureUnit overCubicCentimeter = centimeter1.withDimensionality(-3, status); + MeasureUnit quarticKilometer = kilometer.withDimensionality(4, status); + MeasureUnit overQuarticKilometer1 = kilometer.withDimensionality(-4, status); verifySingleUnit(squareMeter, UMEASURE_SI_PREFIX_ONE, 2, "square-meter"); verifySingleUnit(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "one-per-cubic-centimeter"); @@ -3300,7 +3303,7 @@ void MeasureFormatTest::TestCompoundUnitOperations() { .product(kilometer, status) .product(kilometer, status) .reciprocal(status); - MeasureUnit overQuarticKilometer4 = meter.withPower(4, status) + MeasureUnit overQuarticKilometer4 = meter.withDimensionality(4, status) .reciprocal(status) .withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); @@ -3313,11 +3316,11 @@ void MeasureFormatTest::TestCompoundUnitOperations() { assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer4); MeasureUnit kiloSquareSecond = MeasureUnit::getSecond() - .withPower(2, status).withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); + .withDimensionality(2, status).withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); MeasureUnit meterSecond = meter.product(kiloSquareSecond, status); - MeasureUnit cubicMeterSecond1 = meter.withPower(3, status).product(kiloSquareSecond, status); + MeasureUnit cubicMeterSecond1 = meter.withDimensionality(3, status).product(kiloSquareSecond, status); MeasureUnit centimeterSecond1 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status).product(kiloSquareSecond, status); - MeasureUnit secondCubicMeter = kiloSquareSecond.product(meter.withPower(3, status), status); + MeasureUnit secondCubicMeter = kiloSquareSecond.product(meter.withDimensionality(3, status), status); MeasureUnit secondCentimeter = kiloSquareSecond.product(meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status), status); MeasureUnit secondCentimeterPerKilometer = secondCentimeter.product(kilometer.reciprocal(status), status); @@ -3331,24 +3334,24 @@ void MeasureFormatTest::TestCompoundUnitOperations() { const char* centimeterSecond1Sub[] = {"centimeter", "square-kilosecond"}; verifyCompoundUnit(centimeterSecond1, "centimeter-square-kilosecond", centimeterSecond1Sub, UPRV_LENGTHOF(centimeterSecond1Sub)); - const char* secondCubicMeterSub[] = {"square-kilosecond", "cubic-meter"}; - verifyCompoundUnit(secondCubicMeter, "square-kilosecond-cubic-meter", + const char* secondCubicMeterSub[] = {"cubic-meter", "square-kilosecond"}; + verifyCompoundUnit(secondCubicMeter, "cubic-meter-square-kilosecond", secondCubicMeterSub, UPRV_LENGTHOF(secondCubicMeterSub)); - const char* secondCentimeterSub[] = {"square-kilosecond", "centimeter"}; - verifyCompoundUnit(secondCentimeter, "square-kilosecond-centimeter", + const char* secondCentimeterSub[] = {"centimeter", "square-kilosecond"}; + verifyCompoundUnit(secondCentimeter, "centimeter-square-kilosecond", secondCentimeterSub, UPRV_LENGTHOF(secondCentimeterSub)); - const char* secondCentimeterPerKilometerSub[] = {"square-kilosecond", "centimeter", "one-per-kilometer"}; - verifyCompoundUnit(secondCentimeterPerKilometer, "square-kilosecond-centimeter-per-kilometer", + const char* secondCentimeterPerKilometerSub[] = {"centimeter", "square-kilosecond", "one-per-kilometer"}; + verifyCompoundUnit(secondCentimeterPerKilometer, "centimeter-square-kilosecond-per-kilometer", secondCentimeterPerKilometerSub, UPRV_LENGTHOF(secondCentimeterPerKilometerSub)); - assertTrue("order matters inequality", cubicMeterSecond1 != secondCubicMeter); + assertTrue("reordering equality", cubicMeterSecond1 == secondCubicMeter); assertTrue("additional simple units inequality", secondCubicMeter != secondCentimeter); // Don't allow get/set power or SI prefix on compound units status.errIfFailureAndReset(); - meterSecond.getPower(status); + meterSecond.getDimensionality(status); status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); - meterSecond.withPower(3, status); + meterSecond.withDimensionality(3, status); status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); meterSecond.getSIPrefix(status); status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); @@ -3356,9 +3359,9 @@ void MeasureFormatTest::TestCompoundUnitOperations() { status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); // Test that StringPiece does not overflow - MeasureUnit kiloSquareSecond2 = MeasureUnit::forIdentifier({secondCentimeter.getIdentifier(), 17}, status); - verifySingleUnit(kiloSquareSecond2, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); - assertTrue("string piece equality", kiloSquareSecond == kiloSquareSecond2); + MeasureUnit centimeter3 = MeasureUnit::forIdentifier({secondCentimeter.getIdentifier(), 10}, status); + verifySingleUnit(centimeter3, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); + assertTrue("string piece equality", centimeter1 == centimeter3); MeasureUnit footInch = MeasureUnit::forIdentifier("foot+inch", status); MeasureUnit inchFoot = MeasureUnit::forIdentifier("inch+foot", status); @@ -3372,30 +3375,55 @@ void MeasureFormatTest::TestCompoundUnitOperations() { assertTrue("order matters inequality", footInch != inchFoot); - // TODO(ICU-20920): Enable the one1 tests when the dimensionless base unit ID is updated - // MeasureUnit one1; + MeasureUnit one1; MeasureUnit one2 = MeasureUnit::forIdentifier("one", status); MeasureUnit one3 = MeasureUnit::forIdentifier("", status); - MeasureUnit squareOne = one2.withPower(2, status); + MeasureUnit squareOne = one2.withDimensionality(2, status); MeasureUnit onePerOne = one2.reciprocal(status); MeasureUnit squareKiloOne = squareOne.withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); MeasureUnit onePerSquareKiloOne = squareKiloOne.reciprocal(status); MeasureUnit oneOne = MeasureUnit::forIdentifier("one-one", status); MeasureUnit onePlusOne = MeasureUnit::forIdentifier("one+one", status); + MeasureUnit kilometer2 = one2.product(kilometer, status); - // verifySingleUnit(one1, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(one1, UMEASURE_SI_PREFIX_ONE, 1, "one"); verifySingleUnit(one2, UMEASURE_SI_PREFIX_ONE, 1, "one"); verifySingleUnit(one3, UMEASURE_SI_PREFIX_ONE, 1, "one"); verifySingleUnit(squareOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); - verifySingleUnit(onePerOne, UMEASURE_SI_PREFIX_ONE, -1, "one-per-one"); + verifySingleUnit(onePerOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); verifySingleUnit(squareKiloOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); - verifySingleUnit(onePerSquareKiloOne, UMEASURE_SI_PREFIX_ONE, -1, "one-per-one"); + verifySingleUnit(onePerSquareKiloOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); verifySingleUnit(oneOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); verifySingleUnit(onePlusOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(kilometer2, UMEASURE_SI_PREFIX_KILO, 1, "kilometer"); - // assertTrue("one equality", one1 == one2); + assertTrue("one equality", one1 == one2); assertTrue("one equality", one2 == one3); assertTrue("one-per-one equality", onePerOne == onePerSquareKiloOne); + assertTrue("kilometer equality", kilometer == kilometer2); +} + +void MeasureFormatTest::TestIdentifiers() { + IcuTestErrorCode status(*this, "TestIdentifiers"); + struct TestCase { + bool valid; + const char* id; + const char* normalized; + } cases[] = { + { true, "square-meter-per-square-meter", "square-meter-per-square-meter" }, + // TODO(ICU-20920): Add more test cases once the proper ranking is available. + }; + for (const auto& cas : cases) { + status.setScope(cas.id); + MeasureUnit unit = MeasureUnit::forIdentifier(cas.id, status); + if (!cas.valid) { + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + continue; + } + const char* actual = unit.getIdentifier(); + assertEquals(cas.id, cas.normalized, actual); + status.errIfFailureAndReset(); + } } @@ -3482,7 +3510,7 @@ void MeasureFormatTest::verifySingleUnit( status.errIfFailureAndReset("%s: SI prefix", identifier); assertEquals(uid + ": Power", static_cast(power), - static_cast(unit.getPower(status))); + static_cast(unit.getDimensionality(status))); status.errIfFailureAndReset("%s: Power", identifier); assertEquals(uid + ": Identifier", identifier, @@ -3516,7 +3544,7 @@ void MeasureFormatTest::verifyCompoundUnit( unit.getComplexity(status)); status.errIfFailureAndReset("%s: Complexity", identifier); - LocalArray subUnits = unit.getSingleUnits(status); + LocalArray subUnits = unit.splitToSingleUnits(status); assertEquals(uid + ": Length", subIdentifierCount, subUnits.length()); for (int32_t i = 0;; i++) { if (i >= subIdentifierCount || i >= subUnits.length()) break; @@ -3548,7 +3576,7 @@ void MeasureFormatTest::verifySequenceUnit( unit.getComplexity(status)); status.errIfFailureAndReset("%s: Complexity", identifier); - LocalArray subUnits = unit.getCompoundUnits(status); + LocalArray subUnits = unit.splitToSingleUnits(status); assertEquals(uid + ": Length", subIdentifierCount, subUnits.length()); for (int32_t i = 0;; i++) { if (i >= subIdentifierCount || i >= subUnits.length()) break; diff --git a/icu4c/source/test/intltest/numbertest_api.cpp b/icu4c/source/test/intltest/numbertest_api.cpp index ba3b927ad9e..c379609cc23 100644 --- a/icu4c/source/test/intltest/numbertest_api.cpp +++ b/icu4c/source/test/intltest/numbertest_api.cpp @@ -674,7 +674,7 @@ void NumberFormatterApiTest::unitCompoundMeasure() { assertFormatDescending( u"Meters Per Second Short (unit that simplifies) and perUnit method", u"measure-unit/length-meter per-measure-unit/duration-second", - u"~unit/meter-per-second", // does not round-trip to the full skeleton above + u"unit/meter-per-second", NumberFormatter::with().unit(METER).perUnit(SECOND), Locale::getEnglish(), u"87,650 m/s", @@ -718,6 +718,23 @@ void NumberFormatterApiTest::unitCompoundMeasure() { u"0.08765 J/fur", u"0.008765 J/fur", u"0 J/fur"); + + // TODO(ICU-20941): Support constructions such as this one. + // assertFormatDescending( + // u"Joules Per Furlong Short with unit identifier via API", + // u"measure-unit/energy-joule per-measure-unit/length-furlong", + // u"unit/joule-per-furlong", + // NumberFormatter::with().unit(MeasureUnit::forIdentifier("joule-per-furlong", status)), + // Locale::getEnglish(), + // u"87,650 J/fur", + // u"8,765 J/fur", + // u"876.5 J/fur", + // u"87.65 J/fur", + // u"8.765 J/fur", + // u"0.8765 J/fur", + // u"0.08765 J/fur", + // u"0.008765 J/fur", + // u"0 J/fur"); } void NumberFormatterApiTest::unitCurrency() { @@ -2777,7 +2794,7 @@ void NumberFormatterApiTest::fieldPositionCoverage() { FormattedNumber result = assertFormatSingle( message, u"measure-unit/length-meter per-measure-unit/duration-second unit-width-full-name", - u"~unit/meter-per-second unit-width-full-name", // does not round-trip to the full skeleton above + u"unit/meter-per-second unit-width-full-name", NumberFormatter::with().unit(METER).perUnit(SECOND).unitWidth(UNUM_UNIT_WIDTH_FULL_NAME), "ky", // locale with the interesting data 68,