diff --git a/icu4c/source/common/cmemory.h b/icu4c/source/common/cmemory.h index 7f7fd8d0864..8d604420215 100644 --- a/icu4c/source/common/cmemory.h +++ b/icu4c/source/common/cmemory.h @@ -274,7 +274,10 @@ inline T *LocalMemory::allocateInsteadAndCopy(int32_t newCapacity, int32_t le * * WARNING: MaybeStackArray only works with primitive (plain-old data) types. * It does NOT know how to call a destructor! If you work with classes with - * destructors, consider LocalArray in localpointer.h or MemoryPool. + * destructors, consider: + * + * - LocalArray in localpointer.h if you know the length ahead of time + * - MaybeStackVector if you know the length at runtime */ template class MaybeStackArray { @@ -684,26 +687,26 @@ inline H *MaybeStackHeaderAndArray::orphanOrClone(int32_t l template class MemoryPool : public UMemory { public: - MemoryPool() : count(0), pool() {} + MemoryPool() : fCount(0), fPool() {} ~MemoryPool() { - for (int32_t i = 0; i < count; ++i) { - delete pool[i]; + for (int32_t i = 0; i < fCount; ++i) { + delete fPool[i]; } } MemoryPool(const MemoryPool&) = delete; MemoryPool& operator=(const MemoryPool&) = delete; - MemoryPool(MemoryPool&& other) U_NOEXCEPT : count(other.count), - pool(std::move(other.pool)) { - other.count = 0; + MemoryPool(MemoryPool&& other) U_NOEXCEPT : fCount(other.fCount), + fPool(std::move(other.fPool)) { + other.fCount = 0; } MemoryPool& operator=(MemoryPool&& other) U_NOEXCEPT { - count = other.count; - pool = std::move(other.pool); - other.count = 0; + fCount = other.fCount; + fPool = std::move(other.fPool); + other.fCount = 0; return *this; } @@ -716,20 +719,101 @@ public: */ template T* create(Args&&... args) { - int32_t capacity = pool.getCapacity(); - if (count == capacity && - pool.resize(capacity == stackCapacity ? 4 * capacity : 2 * capacity, - capacity) == nullptr) { + int32_t capacity = fPool.getCapacity(); + if (fCount == capacity && + fPool.resize(capacity == stackCapacity ? 4 * capacity : 2 * capacity, + capacity) == nullptr) { return nullptr; } - return pool[count++] = new T(std::forward(args)...); + return fPool[fCount++] = new T(std::forward(args)...); } -private: - int32_t count; - MaybeStackArray pool; + /** + * @return Number of elements that have been allocated. + */ + int32_t count() const { + return fCount; + } + +protected: + int32_t fCount; + MaybeStackArray fPool; }; +/** + * An internal Vector-like implementation based on MemoryPool. + * + * Heap-allocates each element and stores pointers. + * + * To append an item to the vector, use emplaceBack. + * + * MaybeStackVector vector; + * MyType* element = vector.emplaceBack(); + * if (!element) { + * status = U_MEMORY_ALLOCATION_ERROR; + * } + * // do stuff with element + * + * To loop over the vector, use a for loop with indices: + * + * for (int32_t i = 0; i < vector.length(); i++) { + * MyType* element = vector[i]; + * } + */ +template +class MaybeStackVector : protected MemoryPool { +public: + using MemoryPool::MemoryPool; + using MemoryPool::operator=; + + template + T* emplaceBack(Args&&... args) { + return this->create(args...); + } + + int32_t length() const { + return this->fCount; + } + + T** getAlias() { + return this->fPool.getAlias(); + } + + /** + * Array item access (read-only). + * No index bounds check. + * @param i array index + * @return reference to the array item + */ + const T* operator[](ptrdiff_t i) const { + return this->fPool[i]; + } + + /** + * Array item access (writable). + * No index bounds check. + * @param i array index + * @return reference to the array item + */ + T* operator[](ptrdiff_t i) { + return this->fPool[i]; + } + + /** + * Append all the items from another MaybeStackVector to this one. + */ + void appendAll(const MaybeStackVector& other, UErrorCode& status) { + for (int32_t i = 0; i < other.fCount; i++) { + T* item = emplaceBack(*other[i]); + if (!item) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + } + } +}; + + U_NAMESPACE_END #endif /* __cplusplus */ diff --git a/icu4c/source/i18n/Makefile.in b/icu4c/source/i18n/Makefile.in index b72f79b6713..ae2c71a24ff 100644 --- a/icu4c/source/i18n/Makefile.in +++ b/icu4c/source/i18n/Makefile.in @@ -97,7 +97,7 @@ uspoof.o uspoof_impl.o uspoof_build.o uspoof_conf.o smpdtfst.o \ ztrans.o zrule.o vzone.o fphdlimp.o fpositer.o ufieldpositer.o \ decNumber.o decContext.o alphaindex.o tznames.o tznames_impl.o tzgnames.o \ tzfmt.o compactdecimalformat.o gender.o region.o scriptset.o \ -uregion.o reldatefmt.o quantityformatter.o measunit.o \ +uregion.o reldatefmt.o quantityformatter.o measunit.o measunit_extra.o \ sharedbreakiterator.o scientificnumberformatter.o dayperiodrules.o nounit.o \ number_affixutils.o number_compact.o number_decimalquantity.o \ number_decimfmtprops.o number_fluent.o number_formatimpl.o number_grouping.o \ diff --git a/icu4c/source/i18n/currunit.cpp b/icu4c/source/i18n/currunit.cpp index dab6dda5368..280bd563e5b 100644 --- a/icu4c/source/i18n/currunit.cpp +++ b/icu4c/source/i18n/currunit.cpp @@ -19,9 +19,7 @@ #include "cstring.h" #include "uinvchar.h" #include "charstr.h" - -static constexpr char16_t kDefaultCurrency[] = u"XXX"; -static constexpr char kDefaultCurrency8[] = "XXX"; +#include "measunit_impl.h" U_NAMESPACE_BEGIN diff --git a/icu4c/source/i18n/i18n.vcxproj b/icu4c/source/i18n/i18n.vcxproj index 5c6760d220e..531a9a0d829 100644 --- a/icu4c/source/i18n/i18n.vcxproj +++ b/icu4c/source/i18n/i18n.vcxproj @@ -187,6 +187,7 @@ + @@ -392,6 +393,7 @@ + diff --git a/icu4c/source/i18n/i18n.vcxproj.filters b/icu4c/source/i18n/i18n.vcxproj.filters index a1813fc0696..50a06ee014e 100644 --- a/icu4c/source/i18n/i18n.vcxproj.filters +++ b/icu4c/source/i18n/i18n.vcxproj.filters @@ -204,6 +204,9 @@ formatting + + formatting + formatting @@ -830,6 +833,9 @@ formatting + + formatting + formatting diff --git a/icu4c/source/i18n/i18n_uwp.vcxproj b/icu4c/source/i18n/i18n_uwp.vcxproj index 989cef88fe5..0268132e2a8 100644 --- a/icu4c/source/i18n/i18n_uwp.vcxproj +++ b/icu4c/source/i18n/i18n_uwp.vcxproj @@ -408,6 +408,7 @@ + @@ -611,6 +612,7 @@ + diff --git a/icu4c/source/i18n/measunit.cpp b/icu4c/source/i18n/measunit.cpp index 5ce9f5423c5..344ba45fb50 100644 --- a/icu4c/source/i18n/measunit.cpp +++ b/icu4c/source/i18n/measunit.cpp @@ -17,9 +17,11 @@ #if !UCONFIG_NO_FORMATTING #include "unicode/uenum.h" +#include "unicode/errorcode.h" #include "ustrenum.h" #include "cstring.h" #include "uassert.h" +#include "measunit_impl.h" U_NAMESPACE_BEGIN @@ -535,7 +537,7 @@ static const char * const gSubTypes[] = { "solar-mass", "stone", "ton", - "base", + "one", "percent", "permille", "gigawatt", @@ -2006,24 +2008,62 @@ static int32_t binarySearch( return -1; } -MeasureUnit::MeasureUnit() { - fCurrency[0] = 0; - fTypeId = kBaseTypeIdx; - fSubTypeId = kBaseSubTypeIdx; +MeasureUnit::MeasureUnit() : MeasureUnit(kBaseTypeIdx, kBaseSubTypeIdx) { +} + +MeasureUnit::MeasureUnit(int32_t typeId, int32_t subTypeId) + : fImpl(nullptr), fSubTypeId(subTypeId), fTypeId(typeId) { } MeasureUnit::MeasureUnit(const MeasureUnit &other) - : fTypeId(other.fTypeId), fSubTypeId(other.fSubTypeId) { - uprv_strcpy(fCurrency, other.fCurrency); + : fImpl(nullptr) { + *this = other; +} + +MeasureUnit::MeasureUnit(MeasureUnit &&other) noexcept + : fImpl(other.fImpl), + fSubTypeId(other.fSubTypeId), + fTypeId(other.fTypeId) { + other.fImpl = nullptr; +} + +MeasureUnit::MeasureUnit(MeasureUnitImpl&& impl) + : fImpl(nullptr), fSubTypeId(-1), fTypeId(-1) { + if (!findBySubType(impl.identifier.toStringPiece(), this)) { + fImpl = new MeasureUnitImpl(std::move(impl)); + } } MeasureUnit &MeasureUnit::operator=(const MeasureUnit &other) { if (this == &other) { return *this; } + delete fImpl; + if (other.fImpl) { + ErrorCode localStatus; + fImpl = new MeasureUnitImpl(other.fImpl->copy(localStatus)); + if (!fImpl || localStatus.isFailure()) { + // Unrecoverable allocation error; set to the default unit + *this = MeasureUnit(); + return *this; + } + } else { + fImpl = nullptr; + } + fTypeId = other.fTypeId; + fSubTypeId = other.fSubTypeId; + return *this; +} + +MeasureUnit &MeasureUnit::operator=(MeasureUnit &&other) noexcept { + if (this == &other) { + return *this; + } + delete fImpl; + fImpl = other.fImpl; + other.fImpl = nullptr; fTypeId = other.fTypeId; fSubTypeId = other.fSubTypeId; - uprv_strcpy(fCurrency, other.fCurrency); return *this; } @@ -2032,14 +2072,28 @@ MeasureUnit *MeasureUnit::clone() const { } MeasureUnit::~MeasureUnit() { + delete fImpl; + fImpl = nullptr; } const char *MeasureUnit::getType() const { + // We have a type & subtype only if fTypeId is present. + if (fTypeId == -1) { + return ""; + } return gTypes[fTypeId]; } const char *MeasureUnit::getSubtype() const { - return fCurrency[0] == 0 ? gSubTypes[getOffset()] : fCurrency; + // We have a type & subtype only if fTypeId is present. + if (fTypeId == -1) { + return ""; + } + return getIdentifier(); +} + +const char *MeasureUnit::getIdentifier() const { + return fImpl ? fImpl->identifier.data() : gSubTypes[getOffset()]; } UBool MeasureUnit::operator==(const UObject& other) const { @@ -2050,10 +2104,7 @@ UBool MeasureUnit::operator==(const UObject& other) const { return FALSE; } const MeasureUnit &rhs = static_cast(other); - return ( - fTypeId == rhs.fTypeId - && fSubTypeId == rhs.fSubTypeId - && uprv_strcmp(fCurrency, rhs.fCurrency) == 0); + return uprv_strcmp(getIdentifier(), rhs.getIdentifier()) == 0; } int32_t MeasureUnit::getIndex() const { @@ -2153,42 +2204,14 @@ bool MeasureUnit::findBySubType(StringPiece subType, MeasureUnit* output) { return false; } -bool MeasureUnit::parseCoreUnitIdentifier( - StringPiece coreUnitIdentifier, - MeasureUnit* numerator, - MeasureUnit* denominator, - UErrorCode& status) { - if (U_FAILURE(status)) { - return false; - } - - // First search for the whole code unit identifier as a subType - if (findBySubType(coreUnitIdentifier, numerator)) { - return false; // found a numerator but not denominator - } - - // If not found, try breaking apart numerator and denominator - int32_t perIdx = coreUnitIdentifier.find("-per-", 0); - if (perIdx == -1) { - // String does not contain "-per-" - status = U_ILLEGAL_ARGUMENT_ERROR; - return false; - } - StringPiece numeratorStr(coreUnitIdentifier, 0, perIdx); - StringPiece denominatorStr(coreUnitIdentifier, perIdx + 5); - if (findBySubType(numeratorStr, numerator) && findBySubType(denominatorStr, denominator)) { - return true; // found both a numerator and denominator - } - - // The numerator or denominator were invalid - status = U_ILLEGAL_ARGUMENT_ERROR; - return false; -} - MeasureUnit MeasureUnit::resolveUnitPerUnit( const MeasureUnit &unit, const MeasureUnit &perUnit, bool* isResolved) { int32_t unitOffset = unit.getOffset(); int32_t perUnitOffset = perUnit.getOffset(); + if (unitOffset == -1 || perUnitOffset == -1) { + *isResolved = false; + return MeasureUnit(); + } // binary search for (unitOffset, perUnitOffset) int32_t start = 0; @@ -2236,18 +2259,24 @@ void MeasureUnit::initTime(const char *timeId) { fSubTypeId = result - gOffsets[fTypeId]; } -void MeasureUnit::initCurrency(const char *isoCurrency) { +void MeasureUnit::initCurrency(StringPiece isoCurrency) { int32_t result = binarySearch(gTypes, 0, UPRV_LENGTHOF(gTypes), "currency"); U_ASSERT(result != -1); fTypeId = result; result = binarySearch( gSubTypes, gOffsets[fTypeId], gOffsets[fTypeId + 1], isoCurrency); - if (result != -1) { - fSubTypeId = result - gOffsets[fTypeId]; - } else { - uprv_strncpy(fCurrency, isoCurrency, UPRV_LENGTHOF(fCurrency)); - fCurrency[3] = 0; + if (result == -1) { + fImpl = new MeasureUnitImpl(MeasureUnitImpl::forCurrencyCode(isoCurrency)); + if (fImpl) { + fSubTypeId = -1; + return; + } + // malloc error: fall back to the undefined currency + result = binarySearch( + gSubTypes, gOffsets[fTypeId], gOffsets[fTypeId + 1], kDefaultCurrency8); + U_ASSERT(result != -1); } + fSubTypeId = result - gOffsets[fTypeId]; } void MeasureUnit::initNoUnit(const char *subtype) { @@ -2262,10 +2291,14 @@ void MeasureUnit::initNoUnit(const char *subtype) { void MeasureUnit::setTo(int32_t typeId, int32_t subTypeId) { fTypeId = typeId; fSubTypeId = subTypeId; - fCurrency[0] = 0; + delete fImpl; + fImpl = nullptr; } int32_t MeasureUnit::getOffset() const { + if (fTypeId < 0 || fSubTypeId < 0) { + return -1; + } return gOffsets[fTypeId] + fSubTypeId; } diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp new file mode 100644 index 00000000000..d848965ff43 --- /dev/null +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -0,0 +1,783 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// Extra functions for MeasureUnit not needed for all clients. +// Separate .o file so that it can be removed for modularity. + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +// Allow implicit conversion from char16_t* to UnicodeString for this file: +// Helpful in toString methods and elsewhere. +#define UNISTR_FROM_STRING_EXPLICIT + +#include "cstring.h" +#include "measunit_impl.h" +#include "uarrsort.h" +#include "uassert.h" +#include "ucln_in.h" +#include "umutex.h" +#include "unicode/errorcode.h" +#include "unicode/localpointer.h" +#include "unicode/measunit.h" +#include "unicode/ucharstrie.h" +#include "unicode/ucharstriebuilder.h" + +#include "cstr.h" + +U_NAMESPACE_BEGIN + + +namespace { + +// TODO: Propose a new error code for this? +constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR; + +// This is to ensure we only insert positive integers into the trie +constexpr int32_t kSIPrefixOffset = 64; + +constexpr int32_t kCompoundPartOffset = 128; + +enum CompoundPart { + COMPOUND_PART_PER = kCompoundPartOffset, + COMPOUND_PART_TIMES, + COMPOUND_PART_PLUS, +}; + +constexpr int32_t kPowerPartOffset = 256; + +enum PowerPart { + POWER_PART_P2 = kPowerPartOffset + 2, + POWER_PART_P3, + POWER_PART_P4, + POWER_PART_P5, + POWER_PART_P6, + POWER_PART_P7, + POWER_PART_P8, + POWER_PART_P9, + POWER_PART_P10, + POWER_PART_P11, + POWER_PART_P12, + POWER_PART_P13, + POWER_PART_P14, + POWER_PART_P15, +}; + +constexpr int32_t kSimpleUnitOffset = 512; + +const struct SIPrefixStrings { + const char* const string; + UMeasureSIPrefix value; +} gSIPrefixStrings[] = { + { "yotta", UMEASURE_SI_PREFIX_YOTTA }, + { "zetta", UMEASURE_SI_PREFIX_ZETTA }, + { "exa", UMEASURE_SI_PREFIX_EXA }, + { "peta", UMEASURE_SI_PREFIX_PETA }, + { "tera", UMEASURE_SI_PREFIX_TERA }, + { "giga", UMEASURE_SI_PREFIX_GIGA }, + { "mega", UMEASURE_SI_PREFIX_MEGA }, + { "kilo", UMEASURE_SI_PREFIX_KILO }, + { "hecto", UMEASURE_SI_PREFIX_HECTO }, + { "deka", UMEASURE_SI_PREFIX_DEKA }, + { "deci", UMEASURE_SI_PREFIX_DECI }, + { "centi", UMEASURE_SI_PREFIX_CENTI }, + { "milli", UMEASURE_SI_PREFIX_MILLI }, + { "micro", UMEASURE_SI_PREFIX_MICRO }, + { "nano", UMEASURE_SI_PREFIX_NANO }, + { "pico", UMEASURE_SI_PREFIX_PICO }, + { "femto", UMEASURE_SI_PREFIX_FEMTO }, + { "atto", UMEASURE_SI_PREFIX_ATTO }, + { "zepto", UMEASURE_SI_PREFIX_ZEPTO }, + { "yocto", UMEASURE_SI_PREFIX_YOCTO }, +}; + +// TODO(ICU-20920): Get this list from data +const char16_t* const gSimpleUnits[] = { + u"one", // note: expected to be index 0 + u"candela", + u"carat", + u"gram", + u"ounce", + u"ounce-troy", + u"pound", + u"kilogram", + u"stone", + u"ton", + u"metric-ton", + u"earth-mass", + u"solar-mass", + u"point", + u"inch", + u"foot", + u"yard", + u"meter", + u"fathom", + u"furlong", + u"mile", + u"nautical-mile", + u"mile-scandinavian", + u"100-kilometer", + u"earth-radius", + u"solar-radius", + u"astronomical-unit", + u"light-year", + u"parsec", + u"second", + u"minute", + u"hour", + u"day", + u"day-person", + u"week", + u"week-person", + u"month", + u"month-person", + u"year", + u"year-person", + u"decade", + u"century", + u"ampere", + u"fahrenheit", + u"kelvin", + u"celsius", + u"arc-second", + u"arc-minute", + u"degree", + u"radian", + u"revolution", + u"item", + u"mole", + u"permillion", + u"permyriad", + u"permille", + u"percent", + u"karat", + u"portion", + u"bit", + u"byte", + u"dot", + u"pixel", + u"em", + u"hertz", + u"newton", + u"pound-force", + u"pascal", + u"bar", + u"atmosphere", + u"ofhg", + u"electronvolt", + u"dalton", + u"joule", + u"calorie", + u"british-thermal-unit", + u"foodcalorie", + u"therm-us", + u"watt", + u"horsepower", + u"solar-luminosity", + u"volt", + u"ohm", + u"dunam", + u"acre", + u"hectare", + u"teaspoon", + u"tablespoon", + u"fluid-ounce-imperial", + u"fluid-ounce", + u"cup", + u"cup-metric", + u"pint", + u"pint-metric", + u"quart", + u"liter", + u"gallon", + u"gallon-imperial", + u"bushel", + u"barrel", + u"knot", + u"g-force", + u"lux", +}; + +icu::UInitOnce gUnitExtrasInitOnce = U_INITONCE_INITIALIZER; + +char16_t* kSerializedUnitExtrasStemTrie = nullptr; + +UBool U_CALLCONV cleanupUnitExtras() { + uprv_free(kSerializedUnitExtrasStemTrie); + kSerializedUnitExtrasStemTrie = nullptr; + gUnitExtrasInitOnce.reset(); + return TRUE; +} + +void U_CALLCONV initUnitExtras(UErrorCode& status) { + ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras); + + UCharsTrieBuilder b(status); + if (U_FAILURE(status)) { return; } + + // Add SI prefixes + for (const auto& siPrefixInfo : gSIPrefixStrings) { + UnicodeString uSIPrefix(siPrefixInfo.string, -1, US_INV); + b.add(uSIPrefix, siPrefixInfo.value + kSIPrefixOffset, status); + } + if (U_FAILURE(status)) { return; } + + // Add syntax parts (compound, power prefixes) + b.add(u"-per-", COMPOUND_PART_PER, status); + b.add(u"-", COMPOUND_PART_TIMES, status); + b.add(u"-and-", COMPOUND_PART_PLUS, status); + b.add(u"square-", POWER_PART_P2, status); + b.add(u"cubic-", POWER_PART_P3, status); + b.add(u"p2-", POWER_PART_P2, status); + b.add(u"p3-", POWER_PART_P3, status); + b.add(u"p4-", POWER_PART_P4, status); + b.add(u"p5-", POWER_PART_P5, status); + b.add(u"p6-", POWER_PART_P6, status); + b.add(u"p7-", POWER_PART_P7, status); + b.add(u"p8-", POWER_PART_P8, status); + b.add(u"p9-", POWER_PART_P9, status); + b.add(u"p10-", POWER_PART_P10, status); + b.add(u"p11-", POWER_PART_P11, status); + b.add(u"p12-", POWER_PART_P12, status); + b.add(u"p13-", POWER_PART_P13, status); + b.add(u"p14-", POWER_PART_P14, status); + b.add(u"p15-", POWER_PART_P15, status); + if (U_FAILURE(status)) { return; } + + // Add sanctioned simple units by offset + int32_t simpleUnitOffset = kSimpleUnitOffset; + for (auto simpleUnit : gSimpleUnits) { + b.add(simpleUnit, simpleUnitOffset++, status); + } + + // Build the CharsTrie + // TODO: Use SLOW or FAST here? + UnicodeString result; + b.buildUnicodeString(USTRINGTRIE_BUILD_FAST, result, status); + if (U_FAILURE(status)) { return; } + + // Copy the result into the global constant pointer + size_t numBytes = result.length() * sizeof(char16_t); + kSerializedUnitExtrasStemTrie = static_cast(uprv_malloc(numBytes)); + uprv_memcpy(kSerializedUnitExtrasStemTrie, result.getBuffer(), numBytes); +} + +class Token { +public: + Token(int32_t match) : fMatch(match) {} + + enum Type { + TYPE_UNDEFINED, + TYPE_SI_PREFIX, + TYPE_COMPOUND_PART, + TYPE_POWER_PART, + TYPE_ONE, + TYPE_SIMPLE_UNIT, + }; + + Type getType() const { + if (fMatch <= 0) { + UPRV_UNREACHABLE; + } + if (fMatch < kCompoundPartOffset) { + return TYPE_SI_PREFIX; + } + if (fMatch < kPowerPartOffset) { + return TYPE_COMPOUND_PART; + } + if (fMatch < kSimpleUnitOffset) { + return TYPE_POWER_PART; + } + if (fMatch == kSimpleUnitOffset) { + return TYPE_ONE; + } + return TYPE_SIMPLE_UNIT; + } + + UMeasureSIPrefix getSIPrefix() const { + U_ASSERT(getType() == TYPE_SI_PREFIX); + return static_cast(fMatch - kSIPrefixOffset); + } + + int32_t getMatch() const { + U_ASSERT(getType() == TYPE_COMPOUND_PART); + return fMatch; + } + + int8_t getPower() const { + U_ASSERT(getType() == TYPE_POWER_PART); + return static_cast(fMatch - kPowerPartOffset); + } + + int32_t getSimpleUnitIndex() const { + U_ASSERT(getType() == TYPE_SIMPLE_UNIT); + return fMatch - kSimpleUnitOffset; + } + +private: + int32_t fMatch; +}; + +class Parser { +public: + static Parser from(StringPiece source, UErrorCode& status) { + if (U_FAILURE(status)) { + return Parser(); + } + umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status); + if (U_FAILURE(status)) { + return Parser(); + } + return Parser(source); + } + + MeasureUnitImpl parse(UErrorCode& status) { + MeasureUnitImpl result; + parseImpl(result, status); + return result; + } + +private: + int32_t fIndex = 0; + StringPiece fSource; + UCharsTrie fTrie; + + bool fAfterPer = false; + + Parser() : fSource(""), fTrie(u"") {} + + Parser(StringPiece source) + : fSource(source), fTrie(kSerializedUnitExtrasStemTrie) {} + + inline bool hasNext() const { + return fIndex < fSource.length(); + } + + Token nextToken(UErrorCode& status) { + fTrie.reset(); + int32_t match = -1; + int32_t previ = -1; + do { + auto result = fTrie.next(fSource.data()[fIndex++]); + if (result == USTRINGTRIE_NO_MATCH) { + break; + } else if (result == USTRINGTRIE_NO_VALUE) { + continue; + } + U_ASSERT(USTRINGTRIE_HAS_VALUE(result)); + match = fTrie.getValue(); + previ = fIndex; + if (result == USTRINGTRIE_FINAL_VALUE) { + break; + } + U_ASSERT(result == USTRINGTRIE_INTERMEDIATE_VALUE); + // continue; + } while (fIndex < fSource.length()); + + if (match < 0) { + status = kUnitIdentifierSyntaxError; + } else { + fIndex = previ; + } + return Token(match); + } + + void nextSingleUnit(SingleUnitImpl& result, bool& sawPlus, UErrorCode& status) { + sawPlus = false; + if (U_FAILURE(status)) { + return; + } + + if (!hasNext()) { + // probably "one" + return; + } + + // state: + // 0 = no tokens seen yet (will accept power, SI prefix, or simple unit) + // 1 = power token seen (will not accept another power token) + // 2 = SI prefix token seen (will not accept a power or SI prefix token) + int32_t state = 0; + int32_t previ = fIndex; + + // Maybe read a compound part + if (fIndex != 0) { + Token token = nextToken(status); + if (U_FAILURE(status)) { + return; + } + if (token.getType() != Token::TYPE_COMPOUND_PART) { + status = kUnitIdentifierSyntaxError; + return; + } + switch (token.getMatch()) { + case COMPOUND_PART_PER: + if (fAfterPer) { + status = kUnitIdentifierSyntaxError; + return; + } + fAfterPer = true; + result.dimensionality = -1; + break; + + case COMPOUND_PART_TIMES: + break; + + case COMPOUND_PART_PLUS: + sawPlus = true; + fAfterPer = false; + break; + } + previ = fIndex; + } + + // Read a unit + while (hasNext()) { + Token token = nextToken(status); + if (U_FAILURE(status)) { + return; + } + + switch (token.getType()) { + case Token::TYPE_POWER_PART: + if (state > 0) { + status = kUnitIdentifierSyntaxError; + return; + } + result.dimensionality *= token.getPower(); + previ = fIndex; + state = 1; + break; + + case Token::TYPE_SI_PREFIX: + if (state > 1) { + status = kUnitIdentifierSyntaxError; + return; + } + result.siPrefix = token.getSIPrefix(); + previ = fIndex; + state = 2; + break; + + case Token::TYPE_ONE: + // Skip "one" and go to the next unit + return nextSingleUnit(result, sawPlus, status); + + case Token::TYPE_SIMPLE_UNIT: + result.index = token.getSimpleUnitIndex(); + result.identifier = fSource.substr(previ, fIndex - previ); + return; + + default: + status = kUnitIdentifierSyntaxError; + return; + } + } + + // We ran out of tokens before finding a complete single unit. + status = kUnitIdentifierSyntaxError; + } + + void parseImpl(MeasureUnitImpl& result, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + int32_t unitNum = 0; + while (hasNext()) { + bool sawPlus; + SingleUnitImpl singleUnit; + nextSingleUnit(singleUnit, sawPlus, status); + if (U_FAILURE(status)) { + return; + } + if (singleUnit.index == 0) { + continue; + } + bool added = result.append(singleUnit, status); + if (sawPlus && !added) { + // Two similar units are not allowed in a sequence unit + status = kUnitIdentifierSyntaxError; + return; + } + if ((++unitNum) >= 2) { + UMeasureUnitComplexity complexity = sawPlus + ? UMEASURE_UNIT_SEQUENCE + : UMEASURE_UNIT_COMPOUND; + if (unitNum == 2) { + U_ASSERT(result.complexity == UMEASURE_UNIT_SINGLE); + result.complexity = complexity; + } else if (result.complexity != complexity) { + // Mixed sequence and compound units + status = kUnitIdentifierSyntaxError; + return; + } + } + } + } +}; + +int32_t U_CALLCONV +compareSingleUnits(const void* /*context*/, const void* left, const void* right) { + auto realLeft = static_cast(left); + auto realRight = static_cast(right); + return (*realLeft)->compareTo(**realRight); +} + +/** + * Generate the identifier string for a single unit in place. + */ +void serializeSingle(const SingleUnitImpl& singleUnit, bool first, CharString& output, UErrorCode& status) { + if (first && singleUnit.dimensionality < 0) { + output.append("one-per-", status); + } + + if (singleUnit.index == 0) { + // Don't propagate SI prefixes and powers on one + output.append("one", status); + return; + } + int8_t posPower = std::abs(singleUnit.dimensionality); + if (posPower == 0) { + status = U_INTERNAL_PROGRAM_ERROR; + } else if (posPower == 1) { + // no-op + } else if (posPower == 2) { + output.append("square-", status); + } else if (posPower == 3) { + output.append("cubic-", status); + } else if (posPower < 10) { + output.append('p', status); + output.append(posPower + '0', status); + output.append('-', status); + } else if (posPower <= 15) { + output.append("p1", status); + output.append('0' + (posPower % 10), status); + output.append('-', status); + } else { + status = kUnitIdentifierSyntaxError; + } + if (U_FAILURE(status)) { + return; + } + + if (singleUnit.siPrefix != UMEASURE_SI_PREFIX_ONE) { + for (const auto& siPrefixInfo : gSIPrefixStrings) { + if (siPrefixInfo.value == singleUnit.siPrefix) { + output.append(siPrefixInfo.string, status); + break; + } + } + } + if (U_FAILURE(status)) { + return; + } + + output.append(singleUnit.identifier, status); +} + +/** + * Normalize a MeasureUnitImpl and generate the identifier string in place. + */ +void serialize(MeasureUnitImpl& impl, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + U_ASSERT(impl.identifier.isEmpty()); + if (impl.units.length() == 0) { + impl.identifier.append("one", status); + return; + } + if (impl.complexity == UMEASURE_UNIT_COMPOUND) { + // Note: don't sort a SEQUENCE unit + uprv_sortArray( + impl.units.getAlias(), + impl.units.length(), + sizeof(impl.units[0]), + compareSingleUnits, + nullptr, + false, + &status); + if (U_FAILURE(status)) { + return; + } + } + serializeSingle(*impl.units[0], true, impl.identifier, status); + if (impl.units.length() == 1) { + return; + } + for (int32_t i = 1; i < impl.units.length(); i++) { + const SingleUnitImpl& prev = *impl.units[i-1]; + const SingleUnitImpl& curr = *impl.units[i]; + if (impl.complexity == UMEASURE_UNIT_SEQUENCE) { + impl.identifier.append("-and-", status); + serializeSingle(curr, true, impl.identifier, status); + } else { + if (prev.dimensionality > 0 && curr.dimensionality < 0) { + impl.identifier.append("-per-", status); + } else { + impl.identifier.append('-', status); + } + serializeSingle(curr, false, impl.identifier, status); + } + } + +} + +/** @return true if a new item was added */ +bool appendImpl(MeasureUnitImpl& impl, const SingleUnitImpl& unit, UErrorCode& status) { + // Find a similar unit that already exists, to attempt to coalesce + SingleUnitImpl* oldUnit = nullptr; + for (int32_t i = 0; i < impl.units.length(); i++) { + auto* candidate = impl.units[i]; + if (candidate->isCompatibleWith(unit)) { + oldUnit = candidate; + } + } + if (oldUnit) { + oldUnit->dimensionality += unit.dimensionality; + } else { + SingleUnitImpl* destination = impl.units.emplaceBack(); + if (!destination) { + status = U_MEMORY_ALLOCATION_ERROR; + return false; + } + *destination = unit; + } + return (oldUnit == nullptr); +} + +} // namespace + + +SingleUnitImpl SingleUnitImpl::forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status) { + MeasureUnitImpl temp; + const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(measureUnit, temp, status); + if (U_FAILURE(status)) { + return {}; + } + if (impl.units.length() == 0) { + return {}; + } + if (impl.units.length() == 1) { + return *impl.units[0]; + } + status = U_ILLEGAL_ARGUMENT_ERROR; + return {}; +} + +MeasureUnit SingleUnitImpl::build(UErrorCode& status) const { + MeasureUnitImpl temp; + temp.append(*this, status); + return std::move(temp).build(status); +} + + +MeasureUnitImpl MeasureUnitImpl::forIdentifier(StringPiece identifier, UErrorCode& status) { + return Parser::from(identifier, status).parse(status); +} + +const MeasureUnitImpl& MeasureUnitImpl::forMeasureUnit( + const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status) { + if (measureUnit.fImpl) { + return *measureUnit.fImpl; + } else { + memory = Parser::from(measureUnit.getIdentifier(), status).parse(status); + return memory; + } +} + +MeasureUnitImpl MeasureUnitImpl::forMeasureUnitMaybeCopy( + const MeasureUnit& measureUnit, UErrorCode& status) { + if (measureUnit.fImpl) { + return measureUnit.fImpl->copy(status); + } else { + return Parser::from(measureUnit.getIdentifier(), status).parse(status); + } +} + +void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) { + identifier.clear(); + for (int32_t i = 0; i < units.length(); i++) { + units[i]->dimensionality *= -1; + } +} + +bool MeasureUnitImpl::append(const SingleUnitImpl& singleUnit, UErrorCode& status) { + identifier.clear(); + return appendImpl(*this, singleUnit, status); +} + +MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && { + serialize(*this, status); + return MeasureUnit(std::move(*this)); +} + + +MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode& status) { + return Parser::from(identifier, status).parse(status).build(status); +} + +UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const { + MeasureUnitImpl temp; + return MeasureUnitImpl::forMeasureUnit(*this, temp, status).complexity; +} + +UMeasureSIPrefix MeasureUnit::getSIPrefix(UErrorCode& status) const { + return SingleUnitImpl::forMeasureUnit(*this, status).siPrefix; +} + +MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const { + SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status); + singleUnit.siPrefix = prefix; + return singleUnit.build(status); +} + +int32_t MeasureUnit::getDimensionality(UErrorCode& status) const { + return SingleUnitImpl::forMeasureUnit(*this, status).dimensionality; +} + +MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const { + SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status); + singleUnit.dimensionality = dimensionality; + return singleUnit.build(status); +} + +MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const { + MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status); + impl.takeReciprocal(status); + return std::move(impl).build(status); +} + +MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const { + MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status); + MeasureUnitImpl temp; + const MeasureUnitImpl& otherImpl = MeasureUnitImpl::forMeasureUnit(other, temp, status); + if (impl.complexity == UMEASURE_UNIT_SEQUENCE || otherImpl.complexity == UMEASURE_UNIT_SEQUENCE) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return {}; + } + for (int32_t i = 0; i < otherImpl.units.length(); i++) { + impl.append(*otherImpl.units[i], status); + } + if (impl.units.length() > 1) { + impl.complexity = UMEASURE_UNIT_COMPOUND; + } + return std::move(impl).build(status); +} + +LocalArray MeasureUnit::splitToSingleUnits(int32_t& outCount, UErrorCode& status) const { + MeasureUnitImpl temp; + const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(*this, temp, status); + outCount = impl.units.length(); + MeasureUnit* arr = new MeasureUnit[outCount]; + for (int32_t i = 0; i < outCount; i++) { + arr[i] = impl.units[i]->build(status); + } + return LocalArray(arr, status); +} + + +U_NAMESPACE_END + +#endif /* !UNCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/measunit_impl.h b/icu4c/source/i18n/measunit_impl.h new file mode 100644 index 00000000000..5d2e6b1fe57 --- /dev/null +++ b/icu4c/source/i18n/measunit_impl.h @@ -0,0 +1,173 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef __MEASUNIT_IMPL_H__ +#define __MEASUNIT_IMPL_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/measunit.h" +#include "cmemory.h" +#include "charstr.h" + +U_NAMESPACE_BEGIN + + +static const char16_t kDefaultCurrency[] = u"XXX"; +static const char kDefaultCurrency8[] = "XXX"; + + +/** + * A struct representing a single unit (optional SI prefix and dimensionality). + */ +struct SingleUnitImpl : public UMemory { + /** + * Gets a single unit from the MeasureUnit. If there are multiple single units, sets an error + * code and return the base dimensionless unit. Parses if necessary. + */ + static SingleUnitImpl forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status); + + /** Transform this SingleUnitImpl into a MeasureUnit, simplifying if possible. */ + MeasureUnit build(UErrorCode& status) const; + + /** Compare this SingleUnitImpl to another SingleUnitImpl. */ + int32_t compareTo(const SingleUnitImpl& other) const { + if (dimensionality < 0 && other.dimensionality > 0) { + // Positive dimensions first + return 1; + } + if (dimensionality > 0 && other.dimensionality < 0) { + return -1; + } + if (index < other.index) { + return -1; + } + if (index > other.index) { + return 1; + } + if (siPrefix < other.siPrefix) { + return -1; + } + if (siPrefix > other.siPrefix) { + return 1; + } + return 0; + } + + /** + * Return whether this SingleUnitImpl is compatible with another for the purpose of coalescing. + * + * Units with the same base unit and SI prefix should match, except that they must also have + * the same dimensionality sign, such that we don't merge numerator and denominator. + */ + bool isCompatibleWith(const SingleUnitImpl& other) const { + return (compareTo(other) == 0); + } + + /** Simple unit index, unique for every simple unit. */ + int32_t index = 0; + + /** Simple unit identifier; memory not owned by the SimpleUnit. */ + StringPiece identifier; + + /** SI prefix. **/ + UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; + + /** Dimensionality. **/ + int32_t dimensionality = 1; +}; + + +/** + * Internal representation of measurement units. Capable of representing all complexities of units, + * including sequence and compound units. + */ +struct MeasureUnitImpl : public UMemory { + /** Extract the MeasureUnitImpl from a MeasureUnit. */ + static inline const MeasureUnitImpl* get(const MeasureUnit& measureUnit) { + return measureUnit.fImpl; + } + + /** + * Parse a unit identifier into a MeasureUnitImpl. + * + * @param identifier The unit identifier string. + * @param status Set if the identifier string is not valid. + * @return A newly parsed value object. + */ + static MeasureUnitImpl forIdentifier(StringPiece identifier, UErrorCode& status); + + /** + * Extract the MeasureUnitImpl from a MeasureUnit, or parse if it is not present. + * + * @param measureUnit The source MeasureUnit. + * @param memory A place to write the new MeasureUnitImpl if parsing is required. + * @param status Set if an error occurs. + * @return A reference to either measureUnit.fImpl or memory. + */ + static const MeasureUnitImpl& forMeasureUnit( + const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status); + + /** + * Extract the MeasureUnitImpl from a MeasureUnit, or parse if it is not present. + * + * @param measureUnit The source MeasureUnit. + * @param status Set if an error occurs. + * @return A value object, either newly parsed or copied from measureUnit. + */ + static MeasureUnitImpl forMeasureUnitMaybeCopy( + const MeasureUnit& measureUnit, UErrorCode& status); + + /** + * Used for currency units. + */ + static inline MeasureUnitImpl forCurrencyCode(StringPiece currencyCode) { + MeasureUnitImpl result; + UErrorCode localStatus = U_ZERO_ERROR; + result.identifier.append(currencyCode, localStatus); + // localStatus is not expected to fail since currencyCode should be 3 chars long + return result; + } + + /** Transform this MeasureUnitImpl into a MeasureUnit, simplifying if possible. */ + MeasureUnit build(UErrorCode& status) &&; + + /** + * Create a copy of this MeasureUnitImpl. Don't use copy constructor to make this explicit. + */ + inline MeasureUnitImpl copy(UErrorCode& status) const { + MeasureUnitImpl result; + result.complexity = complexity; + result.units.appendAll(units, status); + result.identifier.append(identifier, status); + return result; + } + + /** Mutates this MeasureUnitImpl to take the reciprocal. */ + void takeReciprocal(UErrorCode& status); + + /** Mutates this MeasureUnitImpl to append a single unit. */ + bool append(const SingleUnitImpl& singleUnit, UErrorCode& status); + + /** The complexity, either SINGLE, COMPOUND, or SEQUENCE. */ + UMeasureUnitComplexity complexity = UMEASURE_UNIT_SINGLE; + + /** + * The list of simple units. These may be summed or multiplied, based on the value of the + * complexity field. + */ + MaybeStackVector units; + + /** + * The full unit identifier. Owned by the MeasureUnitImpl. Empty if not computed. + */ + CharString identifier; +}; + + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ +#endif //__MEASUNIT_IMPL_H__ diff --git a/icu4c/source/i18n/nounit.cpp b/icu4c/source/i18n/nounit.cpp index 076f76f199c..b993cb56adb 100644 --- a/icu4c/source/i18n/nounit.cpp +++ b/icu4c/source/i18n/nounit.cpp @@ -11,7 +11,7 @@ U_NAMESPACE_BEGIN UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NoUnit) NoUnit U_EXPORT2 NoUnit::base() { - return NoUnit("base"); + return NoUnit("one"); } NoUnit U_EXPORT2 NoUnit::percent() { diff --git a/icu4c/source/i18n/number_asformat.cpp b/icu4c/source/i18n/number_asformat.cpp index e876174fdce..9d10d1f5580 100644 --- a/icu4c/source/i18n/number_asformat.cpp +++ b/icu4c/source/i18n/number_asformat.cpp @@ -102,4 +102,16 @@ const LocalizedNumberFormatter& LocalizedNumberFormatterAsFormat::getNumberForma return fFormatter; } + +// Definitions of public API methods (put here for dependency disentanglement) + +Format* LocalizedNumberFormatter::toFormat(UErrorCode& status) const { + if (U_FAILURE(status)) { + return nullptr; + } + LocalPointer retval( + new LocalizedNumberFormatterAsFormat(*this, fMacros.locale), status); + return retval.orphan(); +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/number_fluent.cpp b/icu4c/source/i18n/number_fluent.cpp index 2dbd2fa6cd5..9cdb8b7156e 100644 --- a/icu4c/source/i18n/number_fluent.cpp +++ b/icu4c/source/i18n/number_fluent.cpp @@ -11,7 +11,6 @@ #include "number_formatimpl.h" #include "umutex.h" #include "number_asformat.h" -#include "number_skeletons.h" #include "number_utils.h" #include "number_utypes.h" #include "util.h" @@ -21,6 +20,16 @@ using namespace icu; using namespace icu::number; using namespace icu::number::impl; +#if (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(_MSC_VER) +// Ignore MSVC warning 4661. This is generated for NumberFormatterSettings<>::toSkeleton() as this method +// is defined elsewhere (in number_skeletons.cpp). The compiler is warning that the explicit template instantiation +// inside this single translation unit (CPP file) is incomplete, and thus it isn't sure if the template class is +// fully defined. However, since each translation unit explicitly instantiates all the necessary template classes, +// they will all be passed to the linker, and the linker will still find and export all the class members. +#pragma warning(push) +#pragma warning(disable: 4661) +#endif + template Derived NumberFormatterSettings::notation(const Notation& notation) const& { Derived copy(*this); @@ -320,16 +329,7 @@ Derived NumberFormatterSettings::macros(impl::MacroProps&& macros)&& { return move; } -template -UnicodeString NumberFormatterSettings::toSkeleton(UErrorCode& status) const { - if (U_FAILURE(status)) { - return ICU_Utility::makeBogusString(); - } - if (fMacros.copyErrorTo(status)) { - return ICU_Utility::makeBogusString(); - } - return skeleton::generate(fMacros, status); -} +// Note: toSkeleton defined in number_skeletons.cpp template LocalPointer NumberFormatterSettings::clone() const & { @@ -358,15 +358,7 @@ LocalizedNumberFormatter NumberFormatter::withLocale(const Locale& locale) { return with().locale(locale); } -UnlocalizedNumberFormatter -NumberFormatter::forSkeleton(const UnicodeString& skeleton, UErrorCode& status) { - return skeleton::create(skeleton, nullptr, status); -} - -UnlocalizedNumberFormatter -NumberFormatter::forSkeleton(const UnicodeString& skeleton, UParseError& perror, UErrorCode& status) { - return skeleton::create(skeleton, &perror, status); -} +// Note: forSkeleton defined in number_skeletons.cpp template using NFS = NumberFormatterSettings; @@ -766,14 +758,11 @@ int32_t LocalizedNumberFormatter::getCallCount() const { return umtx_loadAcquire(*callCount); } -Format* LocalizedNumberFormatter::toFormat(UErrorCode& status) const { - if (U_FAILURE(status)) { - return nullptr; - } - LocalPointer retval( - new LocalizedNumberFormatterAsFormat(*this, fMacros.locale), status); - return retval.orphan(); -} +// Note: toFormat defined in number_asformat.cpp +#if (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(_MSC_VER) +// Warning 4661. +#pragma warning(pop) +#endif #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/number_longnames.cpp b/icu4c/source/i18n/number_longnames.cpp index 5378eda8b24..74ee0ef3fd3 100644 --- a/icu4c/source/i18n/number_longnames.cpp +++ b/icu4c/source/i18n/number_longnames.cpp @@ -188,6 +188,12 @@ LongNameHandler* LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unitRef, const MeasureUnit &perUnit, const UNumberUnitWidth &width, const PluralRules *rules, const MicroPropsGenerator *parent, UErrorCode &status) { + if (uprv_strlen(unitRef.getType()) == 0 || uprv_strlen(perUnit.getType()) == 0) { + // TODO(ICU-20941): Unsanctioned unit. Not yet fully supported. Set an error code. + status = U_UNSUPPORTED_ERROR; + return nullptr; + } + MeasureUnit unit = unitRef; if (uprv_strcmp(perUnit.getType(), "none") != 0) { // Compound unit: first try to simplify (e.g., meters per second is its own unit). diff --git a/icu4c/source/i18n/number_skeletons.cpp b/icu4c/source/i18n/number_skeletons.cpp index e186a56e698..5b0fe2b1b57 100644 --- a/icu4c/source/i18n/number_skeletons.cpp +++ b/icu4c/source/i18n/number_skeletons.cpp @@ -22,6 +22,8 @@ #include "charstr.h" #include "string_segment.h" #include "unicode/errorcode.h" +#include "util.h" +#include "measunit_impl.h" using namespace icu; using namespace icu::number; @@ -1036,12 +1038,23 @@ void blueprint_helpers::parseIdentifierUnitOption(const StringSegment& segment, SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status); ErrorCode internalStatus; - MeasureUnit::parseCoreUnitIdentifier(buffer.toStringPiece(), ¯os.unit, ¯os.perUnit, internalStatus); + auto fullUnit = MeasureUnitImpl::forIdentifier(buffer.toStringPiece(), internalStatus); if (internalStatus.isFailure()) { // throw new SkeletonSyntaxException("Invalid core unit identifier", segment, e); status = U_NUMBER_SKELETON_SYNTAX_ERROR; return; } + + // TODO(ICU-20941): Clean this up. + for (int32_t i = 0; i < fullUnit.units.length(); i++) { + SingleUnitImpl* subUnit = fullUnit.units[i]; + if (subUnit->dimensionality > 0) { + macros.unit = macros.unit.product(subUnit->build(status), status); + } else { + subUnit->dimensionality *= -1; + macros.perUnit = macros.perUnit.product(subUnit->build(status), status); + } + } } void blueprint_helpers::parseFractionStem(const StringSegment& segment, MacroProps& macros, @@ -1670,4 +1683,49 @@ bool GeneratorHelpers::scale(const MacroProps& macros, UnicodeString& sb, UError } +// Definitions of public API methods (put here for dependency disentanglement) + +#if (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(_MSC_VER) +// Ignore MSVC warning 4661. This is generated for NumberFormatterSettings<>::toSkeleton() as this method +// is defined elsewhere (in number_skeletons.cpp). The compiler is warning that the explicit template instantiation +// inside this single translation unit (CPP file) is incomplete, and thus it isn't sure if the template class is +// fully defined. However, since each translation unit explicitly instantiates all the necessary template classes, +// they will all be passed to the linker, and the linker will still find and export all the class members. +#pragma warning(push) +#pragma warning(disable: 4661) +#endif + +template +UnicodeString NumberFormatterSettings::toSkeleton(UErrorCode& status) const { + if (U_FAILURE(status)) { + return ICU_Utility::makeBogusString(); + } + if (fMacros.copyErrorTo(status)) { + return ICU_Utility::makeBogusString(); + } + return skeleton::generate(fMacros, status); +} + +// Declare all classes that implement NumberFormatterSettings +// See https://stackoverflow.com/a/495056/1407170 +template +class icu::number::NumberFormatterSettings; +template +class icu::number::NumberFormatterSettings; + +UnlocalizedNumberFormatter +NumberFormatter::forSkeleton(const UnicodeString& skeleton, UErrorCode& status) { + return skeleton::create(skeleton, nullptr, status); +} + +UnlocalizedNumberFormatter +NumberFormatter::forSkeleton(const UnicodeString& skeleton, UParseError& perror, UErrorCode& status) { + return skeleton::create(skeleton, &perror, status); +} + +#if (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(_MSC_VER) +// Warning 4661. +#pragma warning(pop) +#endif + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/ucln_in.h b/icu4c/source/i18n/ucln_in.h index 2f70a8500e1..765cdd559fb 100644 --- a/icu4c/source/i18n/ucln_in.h +++ b/icu4c/source/i18n/ucln_in.h @@ -26,6 +26,7 @@ as the functions are suppose to be called. It's usually best to have child dependencies called first. */ typedef enum ECleanupI18NType { UCLN_I18N_START = -1, + UCLN_I18N_UNIT_EXTRAS, UCLN_I18N_NUMBER_SKELETONS, UCLN_I18N_CURRENCY_SPACING, UCLN_I18N_SPOOF, diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index 31c93c0f73d..9353159c92a 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -20,6 +20,7 @@ #if !UCONFIG_NO_FORMATTING #include "unicode/unistr.h" +#include "unicode/localpointer.h" /** * \file @@ -29,6 +30,202 @@ U_NAMESPACE_BEGIN class StringEnumeration; +struct MeasureUnitImpl; + +#ifndef U_HIDE_DRAFT_API +/** + * Enumeration for unit complexity. There are three levels: + * + * - SINGLE: A single unit, optionally with a power and/or SI prefix. Examples: hectare, + * square-kilometer, kilojoule, one-per-second. + * - COMPOUND: A unit composed of the product of multiple single units. Examples: + * meter-per-second, kilowatt-hour, kilogram-meter-per-square-second. + * - SEQUENCE: A unit composed of the sum of multiple single units. Examples: foot+inch, + * hour+minute+second, degree+arcminute+arcsecond. + * + * The complexity determines which operations are available. For example, you cannot set the power + * or SI prefix of a compound unit. + * + * @draft ICU 67 + */ +enum UMeasureUnitComplexity { + /** + * A single unit, like kilojoule. + * + * @draft ICU 67 + */ + UMEASURE_UNIT_SINGLE, + + /** + * A compound unit, like meter-per-second. + * + * @draft ICU 67 + */ + UMEASURE_UNIT_COMPOUND, + + /** + * A sequence unit, like hour+minute. + * + * @draft ICU 67 + */ + UMEASURE_UNIT_SEQUENCE +}; + +/** + * Enumeration for SI prefixes, such as "kilo". + * + * @draft ICU 67 + */ +typedef enum UMeasureSIPrefix { + + /** + * SI prefix: yotta, 10^24. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_YOTTA = 24, + + /** + * SI prefix: zetta, 10^21. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_ZETTA = 21, + + /** + * SI prefix: exa, 10^18. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_EXA = 18, + + /** + * SI prefix: peta, 10^15. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_PETA = 15, + + /** + * SI prefix: tera, 10^12. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_TERA = 12, + + /** + * SI prefix: giga, 10^9. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_GIGA = 9, + + /** + * SI prefix: mega, 10^6. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_MEGA = 6, + + /** + * SI prefix: kilo, 10^3. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_KILO = 3, + + /** + * SI prefix: hecto, 10^2. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_HECTO = 2, + + /** + * SI prefix: deka, 10^1. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_DEKA = 1, + + /** + * The absence of an SI prefix. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_ONE = 0, + + /** + * SI prefix: deci, 10^-1. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_DECI = -1, + + /** + * SI prefix: centi, 10^-2. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_CENTI = -2, + + /** + * SI prefix: milli, 10^-3. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_MILLI = -3, + + /** + * SI prefix: micro, 10^-6. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_MICRO = -6, + + /** + * SI prefix: nano, 10^-9. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_NANO = -9, + + /** + * SI prefix: pico, 10^-12. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_PICO = -12, + + /** + * SI prefix: femto, 10^-15. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_FEMTO = -15, + + /** + * SI prefix: atto, 10^-18. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_ATTO = -18, + + /** + * SI prefix: zepto, 10^-21. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_ZEPTO = -21, + + /** + * SI prefix: yocto, 10^-24. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_YOCTO = -24 +} UMeasureSIPrefix; +#endif // U_HIDE_DRAFT_API /** * A unit such as length, mass, volume, currency, etc. A unit is @@ -52,13 +249,43 @@ class U_I18N_API MeasureUnit: public UObject { * @stable ICU 3.0 */ MeasureUnit(const MeasureUnit &other); - + +#ifndef U_HIDE_DRAFT_API /** - * Assignment operator. + * Move constructor. + * @draft ICU 67 + */ + MeasureUnit(MeasureUnit &&other) noexcept; + + /** + * Construct a MeasureUnit from a CLDR Sequence Unit Identifier, defined in UTS 35. + * Validates and canonicalizes the identifier. + * + *
+     * MeasureUnit example = MeasureUnit::forIdentifier("furlong-per-nanosecond")
+     * 
+ * + * @param identifier The CLDR Sequence Unit Identifier + * @param status Set if the identifier is invalid. + * @draft ICU 67 + */ + static MeasureUnit forIdentifier(StringPiece identifier, UErrorCode& status); +#endif // U_HIDE_DRAFT_API + + /** + * Copy assignment operator. * @stable ICU 3.0 */ MeasureUnit &operator=(const MeasureUnit &other); +#ifndef U_HIDE_DRAFT_API + /** + * Move assignment operator. + * @draft ICU 67 + */ + MeasureUnit &operator=(MeasureUnit &&other) noexcept; +#endif // U_HIDE_DRAFT_API + /** * Returns a polymorphic clone of this object. The result will * have the same class as returned by getDynamicClassID(). @@ -90,16 +317,152 @@ class U_I18N_API MeasureUnit: public UObject { /** * Get the type. + * + * If the unit does not have a type, the empty string is returned. + * * @stable ICU 53 */ const char *getType() const; /** * Get the sub type. + * + * If the unit does not have a subtype, the empty string is returned. + * * @stable ICU 53 */ const char *getSubtype() const; +#ifndef U_HIDE_DRAFT_API + /** + * Get the CLDR Sequence Unit Identifier for this MeasureUnit, as defined in UTS 35. + * + * @return The string form of this unit, owned by this MeasureUnit. + * @draft ICU 67 + */ + const char* getIdentifier() const; + + /** + * Compute the complexity of the unit. See UMeasureUnitComplexity for more information. + * + * @param status Set if an error occurs. + * @return The unit complexity. + * @draft ICU 67 + */ + UMeasureUnitComplexity getComplexity(UErrorCode& status) const; + + /** + * Creates a MeasureUnit which is this SINGLE unit augmented with the specified SI prefix. + * For example, UMEASURE_SI_PREFIX_KILO for "kilo". + * + * There is sufficient locale data to format all standard SI prefixes. + * + * NOTE: Only works on SINGLE units. If this is a COMPOUND or SEQUENCE unit, an error will + * occur. For more information, see UMeasureUnitComplexity. + * + * @param prefix The SI prefix, from UMeasureSIPrefix. + * @param status Set if this is not a SINGLE unit or if another error occurs. + * @return A new SINGLE unit. + * @draft ICU 67 + */ + MeasureUnit withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const; + + /** + * Gets the current SI prefix of this SINGLE unit. For example, if the unit has the SI prefix + * "kilo", then UMEASURE_SI_PREFIX_KILO is returned. + * + * NOTE: Only works on SINGLE units. If this is a COMPOUND or SEQUENCE unit, an error will + * occur. For more information, see UMeasureUnitComplexity. + * + * @param status Set if this is not a SINGLE unit or if another error occurs. + * @return The SI prefix of this SINGLE unit, from UMeasureSIPrefix. + * @draft ICU 67 + */ + UMeasureSIPrefix getSIPrefix(UErrorCode& status) const; + + /** + * Creates a MeasureUnit which is this SINGLE unit augmented with the specified dimensionality + * (power). For example, if dimensionality is 2, the unit will be squared. + * + * NOTE: Only works on SINGLE units. If this is a COMPOUND or SEQUENCE unit, an error will + * occur. For more information, see UMeasureUnitComplexity. + * + * @param dimensionality The dimensionality (power). + * @param status Set if this is not a SINGLE unit or if another error occurs. + * @return A new SINGLE unit. + * @draft ICU 67 + */ + MeasureUnit withDimensionality(int32_t dimensionality, UErrorCode& status) const; + + /** + * Gets the dimensionality (power) of this MeasureUnit. For example, if the unit is square, + * then 2 is returned. + * + * NOTE: Only works on SINGLE units. If this is a COMPOUND or SEQUENCE unit, an error will + * occur. For more information, see UMeasureUnitComplexity. + * + * @param status Set if this is not a SINGLE unit or if another error occurs. + * @return The dimensionality (power) of this simple unit. + * @draft ICU 67 + */ + int32_t getDimensionality(UErrorCode& status) const; + + /** + * Gets the reciprocal of this MeasureUnit, with the numerator and denominator flipped. + * + * For example, if the receiver is "meter-per-second", the unit "second-per-meter" is returned. + * + * NOTE: Only works on SINGLE and COMPOUND units. If this is a SEQUENCE unit, an error will + * occur. For more information, see UMeasureUnitComplexity. + * + * @param status Set if this is a SEQUENCE unit or if another error occurs. + * @return The reciprocal of the target unit. + * @draft ICU 67 + */ + MeasureUnit reciprocal(UErrorCode& status) const; + + /** + * Gets the product of this unit with another unit. This is a way to build units from + * constituent parts. + * + * The numerator and denominator are preserved through this operation. + * + * For example, if the receiver is "kilowatt" and the argument is "hour-per-day", then the + * unit "kilowatt-hour-per-day" is returned. + * + * NOTE: Only works on SINGLE and COMPOUND units. If either unit (receivee and argument) is a + * SEQUENCE unit, an error will occur. For more information, see UMeasureUnitComplexity. + * + * @param other The MeasureUnit to multiply with the target. + * @param status Set if this or other is a SEQUENCE unit or if another error occurs. + * @return The product of the target unit with the provided unit. + * @draft ICU 67 + */ + MeasureUnit product(const MeasureUnit& other, UErrorCode& status) const; +#endif // U_HIDE_DRAFT_API + +#ifndef U_HIDE_INTERNAL_API + /** + * Gets the list of SINGLE units contained within a SEQUENCE of COMPOUND unit. + * + * Examples: + * - Given "meter-kilogram-per-second", three units will be returned: "meter", + * "kilogram", and "one-per-second". + * - Given "hour+minute+second", three units will be returned: "hour", "minute", + * and "second". + * + * If this is a SINGLE unit, an array of length 1 will be returned. + * + * TODO(ICU-21021): Finalize this API and propose it as draft. + * + * @param outCount The number of elements in the return array. + * @param status Set if an error occurs. + * @return An array of single units, owned by the caller. + * @internal ICU 67 Technical Preview + */ + LocalArray splitToSingleUnits(int32_t& outCount, UErrorCode& status) const; +#endif // U_HIDE_INTERNAL_API + /** * getAvailable gets all of the available units. * If there are too many units to fit into destCapacity then the @@ -194,26 +557,6 @@ class U_I18N_API MeasureUnit: public UObject { */ static int32_t internalGetIndexForTypeAndSubtype(const char *type, const char *subtype); - /** - * ICU use only. - * @return Whether subType is known to ICU. - * @internal - */ - static bool findBySubType(StringPiece subType, MeasureUnit* output); - - /** - * ICU use only. - * Parse a core unit identifier into a numerator and denominator unit. - * @param coreUnitIdentifier The string to parse. - * @param numerator Output: set to the numerator unit. - * @param denominator Output: set to the denominator unit, if present. - * @param status Set to U_ILLEGAL_ARGUMENT_ERROR if the core unit identifier is not known. - * @return Whether both a numerator and denominator are returned. - * @internal - */ - static bool parseCoreUnitIdentifier( - StringPiece coreUnitIdentifier, MeasureUnit* numerator, MeasureUnit* denominator, UErrorCode& status); - /** * ICU use only. * @internal @@ -3342,7 +3685,7 @@ class U_I18N_API MeasureUnit: public UObject { * For ICU use only. * @internal */ - void initCurrency(const char *isoCurrency); + void initCurrency(StringPiece isoCurrency); /** * For ICU use only. @@ -3353,16 +3696,26 @@ class U_I18N_API MeasureUnit: public UObject { #endif /* U_HIDE_INTERNAL_API */ private: - int32_t fTypeId; - int32_t fSubTypeId; - char fCurrency[4]; - MeasureUnit(int32_t typeId, int32_t subTypeId) : fTypeId(typeId), fSubTypeId(subTypeId) { - fCurrency[0] = 0; - } + // If non-null, fImpl is owned by the MeasureUnit. + MeasureUnitImpl* fImpl; + + // These two ints are indices into static string lists in measunit.cpp + int16_t fSubTypeId; + int8_t fTypeId; + + MeasureUnit(int32_t typeId, int32_t subTypeId); + MeasureUnit(MeasureUnitImpl&& impl); void setTo(int32_t typeId, int32_t subTypeId); int32_t getOffset() const; static MeasureUnit *create(int typeId, int subTypeId, UErrorCode &status); + + /** + * @return Whether subType is known to ICU. + */ + static bool findBySubType(StringPiece subType, MeasureUnit* output); + + friend struct MeasureUnitImpl; }; U_NAMESPACE_END diff --git a/icu4c/source/i18n/unicode/numberformatter.h b/icu4c/source/i18n/unicode/numberformatter.h index 112a285b766..985c02d4b02 100644 --- a/icu4c/source/i18n/unicode/numberformatter.h +++ b/icu4c/source/i18n/unicode/numberformatter.h @@ -1436,6 +1436,16 @@ struct U_I18N_API MacroProps : public UMemory { } // namespace impl +#if (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(_MSC_VER) +// Ignore MSVC warning 4661. This is generated for NumberFormatterSettings<>::toSkeleton() as this method +// is defined elsewhere (in number_skeletons.cpp). The compiler is warning that the explicit template instantiation +// inside this single translation unit (CPP file) is incomplete, and thus it isn't sure if the template class is +// fully defined. However, since each translation unit explicitly instantiates all the necessary template classes, +// they will all be passed to the linker, and the linker will still find and export all the class members. +#pragma warning(push) +#pragma warning(disable: 4661) +#endif + /** * An abstract base class for specifying settings related to number formatting. This class is implemented by * {@link UnlocalizedNumberFormatter} and {@link LocalizedNumberFormatter}. This class is not intended for @@ -2402,6 +2412,11 @@ class U_I18N_API LocalizedNumberFormatter friend class UnlocalizedNumberFormatter; }; +#if (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN) && defined(_MSC_VER) +// Warning 4661. +#pragma warning(pop) +#endif + /** * The result of a number formatting operation. This class allows the result to be exported in several data types, * including a UnicodeString and a FieldPositionIterator. diff --git a/icu4c/source/test/depstest/dependencies.txt b/icu4c/source/test/depstest/dependencies.txt index 8437b4e3f64..e096586b3f3 100644 --- a/icu4c/source/test/depstest/dependencies.txt +++ b/icu4c/source/test/depstest/dependencies.txt @@ -869,7 +869,8 @@ library: i18n dayperiodrules listformatter formatting formattable_cnv regex regex_cnv translit - double_conversion number_representation number_output numberformatter numberparser + double_conversion number_representation number_output numberformatter number_skeletons numberparser + units_extra universal_time_scale uclean_i18n @@ -981,15 +982,15 @@ group: number_output group: numberformatter # ICU 60+ NumberFormatter API - number_affixutils.o number_asformat.o - number_capi.o number_compact.o number_currencysymbols.o + number_affixutils.o + number_compact.o number_currencysymbols.o number_decimfmtprops.o number_fluent.o number_formatimpl.o number_grouping.o number_integerwidth.o number_longnames.o number_mapper.o number_modifiers.o number_multiplier.o number_notation.o number_padding.o number_patternmodifier.o number_patternstring.o number_rounding.o - number_scientific.o number_skeletons.o + number_scientific.o currpinf.o dcfmtsym.o numsys.o numrange_fluent.o numrange_impl.o deps @@ -997,6 +998,13 @@ group: numberformatter number_representation number_output uclean_i18n common +group: number_skeletons + # Number skeleton support; separated from numberformatter + number_skeletons.o number_capi.o number_asformat.o + deps + numberformatter + units_extra + group: numberparser numparse_affixes.o numparse_compositions.o numparse_currency.o numparse_decimal.o numparse_impl.o numparse_parsednumber.o @@ -1037,7 +1045,8 @@ group: formatting # messageformat choicfmt.o msgfmt.o plurfmt.o selfmt.o umsg.o deps - decnumber formattable format units numberformatter numberparser formatted_value_sbimpl + decnumber formattable format units numberformatter number_skeletons numberparser + formatted_value_sbimpl listformatter dayperiodrules collation collation_builder # for rbnf @@ -1053,10 +1062,15 @@ group: sharedbreakiterator deps breakiterator +group: units_extra + measunit_extra.o + deps + units ucharstriebuilder ucharstrie uclean_i18n + group: units measunit.o currunit.o nounit.o deps - stringenumeration + stringenumeration errorcode group: decnumber decContext.o decNumber.o diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 2b53f8bc024..4846540dbdb 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -79,6 +79,10 @@ private: void Test20332_PersonUnits(); void TestNumericTime(); void TestNumericTimeSomeSpecialFormats(); + void TestInvalidIdentifiers(); + void TestCompoundUnitOperations(); + void TestIdentifiers(); + void verifyFormat( const char *description, const MeasureFormat &fmt, @@ -138,6 +142,21 @@ private: NumberFormat::EAlignmentFields field, int32_t start, int32_t end); + void verifySingleUnit( + const MeasureUnit& unit, + UMeasureSIPrefix siPrefix, + int8_t power, + const char* identifier); + void verifyCompoundUnit( + const MeasureUnit& unit, + const char* identifier, + const char** subIdentifiers, + int32_t subIdentifierCount); + void verifySequenceUnit( + const MeasureUnit& unit, + const char* identifier, + const char** subIdentifiers, + int32_t subIdentifierCount); }; void MeasureFormatTest::runIndexedTest( @@ -182,6 +201,9 @@ void MeasureFormatTest::runIndexedTest( TESTCASE_AUTO(Test20332_PersonUnits); TESTCASE_AUTO(TestNumericTime); TESTCASE_AUTO(TestNumericTimeSomeSpecialFormats); + TESTCASE_AUTO(TestInvalidIdentifiers); + TESTCASE_AUTO(TestCompoundUnitOperations); + TESTCASE_AUTO(TestIdentifiers); TESTCASE_AUTO_END; } @@ -3215,6 +3237,212 @@ void MeasureFormatTest::TestNumericTimeSomeSpecialFormats() { verifyFormat("Danish fhoursFminutes", fmtDa, fhoursFminutes, 2, "2.03,877"); } +void MeasureFormatTest::TestInvalidIdentifiers() { + IcuTestErrorCode status(*this, "TestInvalidIdentifiers"); + + const char* const inputs[] = { + "kilo", + "kilokilo", + "onekilo", + "meterkilo", + "meter-kilo", + "k", + "meter-", + "meter+", + "-meter", + "+meter", + "-kilometer", + "+kilometer", + "-p2-meter", + "+p2-meter", + "+", + "-" + }; + + for (const auto& input : inputs) { + status.setScope(input); + MeasureUnit::forIdentifier(input, status); + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + } +} + +void MeasureFormatTest::TestCompoundUnitOperations() { + IcuTestErrorCode status(*this, "TestCompoundUnitOperations"); + + MeasureUnit::forIdentifier("kilometer-per-second-joule", status); + + MeasureUnit kilometer = MeasureUnit::getKilometer(); + MeasureUnit cubicMeter = MeasureUnit::getCubicMeter(); + MeasureUnit meter = kilometer.withSIPrefix(UMEASURE_SI_PREFIX_ONE, status); + MeasureUnit centimeter1 = kilometer.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); + MeasureUnit centimeter2 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); + MeasureUnit cubicDecimeter = cubicMeter.withSIPrefix(UMEASURE_SI_PREFIX_DECI, status); + + verifySingleUnit(kilometer, UMEASURE_SI_PREFIX_KILO, 1, "kilometer"); + verifySingleUnit(meter, UMEASURE_SI_PREFIX_ONE, 1, "meter"); + verifySingleUnit(centimeter1, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); + verifySingleUnit(centimeter2, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); + verifySingleUnit(cubicDecimeter, UMEASURE_SI_PREFIX_DECI, 3, "cubic-decimeter"); + + assertTrue("centimeter equality", centimeter1 == centimeter2); + assertTrue("kilometer inequality", centimeter1 != kilometer); + + MeasureUnit squareMeter = meter.withDimensionality(2, status); + MeasureUnit overCubicCentimeter = centimeter1.withDimensionality(-3, status); + MeasureUnit quarticKilometer = kilometer.withDimensionality(4, status); + MeasureUnit overQuarticKilometer1 = kilometer.withDimensionality(-4, status); + + verifySingleUnit(squareMeter, UMEASURE_SI_PREFIX_ONE, 2, "square-meter"); + verifySingleUnit(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "one-per-cubic-centimeter"); + verifySingleUnit(quarticKilometer, UMEASURE_SI_PREFIX_KILO, 4, "p4-kilometer"); + verifySingleUnit(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + + assertTrue("power inequality", quarticKilometer != overQuarticKilometer1); + + MeasureUnit overQuarticKilometer2 = quarticKilometer.reciprocal(status); + MeasureUnit overQuarticKilometer3 = kilometer.product(kilometer, status) + .product(kilometer, status) + .product(kilometer, status) + .reciprocal(status); + MeasureUnit overQuarticKilometer4 = meter.withDimensionality(4, status) + .reciprocal(status) + .withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); + + verifySingleUnit(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifySingleUnit(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifySingleUnit(overQuarticKilometer4, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + + assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); + assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); + assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer4); + + MeasureUnit kiloSquareSecond = MeasureUnit::getSecond() + .withDimensionality(2, status).withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); + MeasureUnit meterSecond = meter.product(kiloSquareSecond, status); + MeasureUnit cubicMeterSecond1 = meter.withDimensionality(3, status).product(kiloSquareSecond, status); + MeasureUnit centimeterSecond1 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status).product(kiloSquareSecond, status); + MeasureUnit secondCubicMeter = kiloSquareSecond.product(meter.withDimensionality(3, status), status); + MeasureUnit secondCentimeter = kiloSquareSecond.product(meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status), status); + MeasureUnit secondCentimeterPerKilometer = secondCentimeter.product(kilometer.reciprocal(status), status); + + verifySingleUnit(kiloSquareSecond, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); + const char* meterSecondSub[] = {"meter", "square-kilosecond"}; + verifyCompoundUnit(meterSecond, "meter-square-kilosecond", + meterSecondSub, UPRV_LENGTHOF(meterSecondSub)); + const char* cubicMeterSecond1Sub[] = {"cubic-meter", "square-kilosecond"}; + verifyCompoundUnit(cubicMeterSecond1, "cubic-meter-square-kilosecond", + cubicMeterSecond1Sub, UPRV_LENGTHOF(cubicMeterSecond1Sub)); + const char* centimeterSecond1Sub[] = {"centimeter", "square-kilosecond"}; + verifyCompoundUnit(centimeterSecond1, "centimeter-square-kilosecond", + centimeterSecond1Sub, UPRV_LENGTHOF(centimeterSecond1Sub)); + const char* secondCubicMeterSub[] = {"cubic-meter", "square-kilosecond"}; + verifyCompoundUnit(secondCubicMeter, "cubic-meter-square-kilosecond", + secondCubicMeterSub, UPRV_LENGTHOF(secondCubicMeterSub)); + const char* secondCentimeterSub[] = {"centimeter", "square-kilosecond"}; + verifyCompoundUnit(secondCentimeter, "centimeter-square-kilosecond", + secondCentimeterSub, UPRV_LENGTHOF(secondCentimeterSub)); + const char* secondCentimeterPerKilometerSub[] = {"centimeter", "square-kilosecond", "one-per-kilometer"}; + verifyCompoundUnit(secondCentimeterPerKilometer, "centimeter-square-kilosecond-per-kilometer", + secondCentimeterPerKilometerSub, UPRV_LENGTHOF(secondCentimeterPerKilometerSub)); + + assertTrue("reordering equality", cubicMeterSecond1 == secondCubicMeter); + assertTrue("additional simple units inequality", secondCubicMeter != secondCentimeter); + + // Don't allow get/set power or SI prefix on compound units + status.errIfFailureAndReset(); + meterSecond.getDimensionality(status); + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + meterSecond.withDimensionality(3, status); + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + meterSecond.getSIPrefix(status); + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + meterSecond.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + + // Test that StringPiece does not overflow + MeasureUnit centimeter3 = MeasureUnit::forIdentifier({secondCentimeter.getIdentifier(), 10}, status); + verifySingleUnit(centimeter3, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); + assertTrue("string piece equality", centimeter1 == centimeter3); + + MeasureUnit footInch = MeasureUnit::forIdentifier("foot-and-inch", status); + MeasureUnit inchFoot = MeasureUnit::forIdentifier("inch-and-foot", status); + + const char* footInchSub[] = {"foot", "inch"}; + verifySequenceUnit(footInch, "foot-and-inch", + footInchSub, UPRV_LENGTHOF(footInchSub)); + const char* inchFootSub[] = {"inch", "foot"}; + verifySequenceUnit(inchFoot, "inch-and-foot", + inchFootSub, UPRV_LENGTHOF(inchFootSub)); + + assertTrue("order matters inequality", footInch != inchFoot); + + MeasureUnit one1; + MeasureUnit one2 = MeasureUnit::forIdentifier("one", status); + MeasureUnit one3 = MeasureUnit::forIdentifier("", status); + MeasureUnit squareOne = one2.withDimensionality(2, status); + MeasureUnit onePerOne = one2.reciprocal(status); + MeasureUnit squareKiloOne = squareOne.withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); + MeasureUnit onePerSquareKiloOne = squareKiloOne.reciprocal(status); + MeasureUnit oneOne = MeasureUnit::forIdentifier("one-one", status); + MeasureUnit onePlusOne = MeasureUnit::forIdentifier("one-and-one", status); + MeasureUnit kilometer2 = one2.product(kilometer, status); + + verifySingleUnit(one1, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(one2, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(one3, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(squareOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(onePerOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(squareKiloOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(onePerSquareKiloOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(oneOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(onePlusOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(kilometer2, UMEASURE_SI_PREFIX_KILO, 1, "kilometer"); + + assertTrue("one equality", one1 == one2); + assertTrue("one equality", one2 == one3); + assertTrue("one-per-one equality", onePerOne == onePerSquareKiloOne); + assertTrue("kilometer equality", kilometer == kilometer2); + + // Test out-of-range powers + MeasureUnit power15 = MeasureUnit::forIdentifier("p15-kilometer", status); + verifySingleUnit(power15, UMEASURE_SI_PREFIX_KILO, 15, "p15-kilometer"); + status.errIfFailureAndReset(); + MeasureUnit power16a = MeasureUnit::forIdentifier("p16-kilometer", status); + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + MeasureUnit power16b = power15.product(kilometer, status); + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + MeasureUnit powerN15 = MeasureUnit::forIdentifier("one-per-p15-kilometer", status); + verifySingleUnit(powerN15, UMEASURE_SI_PREFIX_KILO, -15, "one-per-p15-kilometer"); + status.errIfFailureAndReset(); + MeasureUnit powerN16a = MeasureUnit::forIdentifier("one-per-p16-kilometer", status); + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + MeasureUnit powerN16b = powerN15.product(overQuarticKilometer1, status); + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); +} + +void MeasureFormatTest::TestIdentifiers() { + IcuTestErrorCode status(*this, "TestIdentifiers"); + struct TestCase { + bool valid; + const char* id; + const char* normalized; + } cases[] = { + { true, "square-meter-per-square-meter", "square-meter-per-square-meter" }, + // TODO(ICU-20920): Add more test cases once the proper ranking is available. + }; + for (const auto& cas : cases) { + status.setScope(cas.id); + MeasureUnit unit = MeasureUnit::forIdentifier(cas.id, status); + if (!cas.valid) { + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + continue; + } + const char* actual = unit.getIdentifier(); + assertEquals(cas.id, cas.normalized, actual); + status.errIfFailureAndReset(); + } +} + void MeasureFormatTest::verifyFieldPosition( const char *description, @@ -3286,6 +3514,97 @@ void MeasureFormatTest::verifyFormat( } } +void MeasureFormatTest::verifySingleUnit( + const MeasureUnit& unit, + UMeasureSIPrefix siPrefix, + int8_t power, + const char* identifier) { + IcuTestErrorCode status(*this, "verifySingleUnit"); + UnicodeString uid(identifier, -1, US_INV); + assertEquals(uid + ": SI prefix", + siPrefix, + unit.getSIPrefix(status)); + status.errIfFailureAndReset("%s: SI prefix", identifier); + assertEquals(uid + ": Power", + static_cast(power), + static_cast(unit.getDimensionality(status))); + status.errIfFailureAndReset("%s: Power", identifier); + assertEquals(uid + ": Identifier", + identifier, + unit.getIdentifier()); + status.errIfFailureAndReset("%s: Identifier", identifier); + assertTrue(uid + ": Constructor", + unit == MeasureUnit::forIdentifier(identifier, status)); + status.errIfFailureAndReset("%s: Constructor", identifier); + assertEquals(uid + ": Complexity", + UMEASURE_UNIT_SINGLE, + unit.getComplexity(status)); + status.errIfFailureAndReset("%s: Complexity", identifier); +} + +void MeasureFormatTest::verifyCompoundUnit( + const MeasureUnit& unit, + const char* identifier, + const char** subIdentifiers, + int32_t subIdentifierCount) { + IcuTestErrorCode status(*this, "verifyCompoundUnit"); + UnicodeString uid(identifier, -1, US_INV); + assertEquals(uid + ": Identifier", + identifier, + unit.getIdentifier()); + status.errIfFailureAndReset("%s: Identifier", identifier); + assertTrue(uid + ": Constructor", + unit == MeasureUnit::forIdentifier(identifier, status)); + status.errIfFailureAndReset("%s: Constructor", identifier); + assertEquals(uid + ": Complexity", + UMEASURE_UNIT_COMPOUND, + unit.getComplexity(status)); + status.errIfFailureAndReset("%s: Complexity", identifier); + + int32_t length; + LocalArray subUnits = unit.splitToSingleUnits(length, status); + assertEquals(uid + ": Length", subIdentifierCount, length); + for (int32_t i = 0;; i++) { + if (i >= subIdentifierCount || i >= length) break; + assertEquals(uid + ": Sub-unit #" + Int64ToUnicodeString(i), + subIdentifiers[i], + subUnits[i].getIdentifier()); + assertEquals(uid + ": Sub-unit Complexity", + UMEASURE_UNIT_SINGLE, + subUnits[i].getComplexity(status)); + } +} + +void MeasureFormatTest::verifySequenceUnit( + const MeasureUnit& unit, + const char* identifier, + const char** subIdentifiers, + int32_t subIdentifierCount) { + IcuTestErrorCode status(*this, "verifySequenceUnit"); + UnicodeString uid(identifier, -1, US_INV); + assertEquals(uid + ": Identifier", + identifier, + unit.getIdentifier()); + status.errIfFailureAndReset("%s: Identifier", identifier); + assertTrue(uid + ": Constructor", + unit == MeasureUnit::forIdentifier(identifier, status)); + status.errIfFailureAndReset("%s: Constructor", identifier); + assertEquals(uid + ": Complexity", + UMEASURE_UNIT_SEQUENCE, + unit.getComplexity(status)); + status.errIfFailureAndReset("%s: Complexity", identifier); + + int32_t length; + LocalArray subUnits = unit.splitToSingleUnits(length, status); + assertEquals(uid + ": Length", subIdentifierCount, length); + for (int32_t i = 0;; i++) { + if (i >= subIdentifierCount || i >= length) break; + assertEquals(uid + ": Sub-unit #" + Int64ToUnicodeString(i), + subIdentifiers[i], + subUnits[i].getIdentifier()); + } +} + extern IntlTest *createMeasureFormatTest() { return new MeasureFormatTest(); } diff --git a/icu4c/source/test/intltest/numbertest_api.cpp b/icu4c/source/test/intltest/numbertest_api.cpp index 01494ff8ab9..60f4d44db3b 100644 --- a/icu4c/source/test/intltest/numbertest_api.cpp +++ b/icu4c/source/test/intltest/numbertest_api.cpp @@ -675,7 +675,7 @@ void NumberFormatterApiTest::unitCompoundMeasure() { assertFormatDescending( u"Meters Per Second Short (unit that simplifies) and perUnit method", u"measure-unit/length-meter per-measure-unit/duration-second", - u"~unit/meter-per-second", // does not round-trip to the full skeleton above + u"unit/meter-per-second", NumberFormatter::with().unit(METER).perUnit(SECOND), Locale::getEnglish(), u"87,650 m/s", @@ -719,6 +719,23 @@ void NumberFormatterApiTest::unitCompoundMeasure() { u"0.08765 J/fur", u"0.008765 J/fur", u"0 J/fur"); + + // TODO(ICU-20941): Support constructions such as this one. + // assertFormatDescending( + // u"Joules Per Furlong Short with unit identifier via API", + // u"measure-unit/energy-joule per-measure-unit/length-furlong", + // u"unit/joule-per-furlong", + // NumberFormatter::with().unit(MeasureUnit::forIdentifier("joule-per-furlong", status)), + // Locale::getEnglish(), + // u"87,650 J/fur", + // u"8,765 J/fur", + // u"876.5 J/fur", + // u"87.65 J/fur", + // u"8.765 J/fur", + // u"0.8765 J/fur", + // u"0.08765 J/fur", + // u"0.008765 J/fur", + // u"0 J/fur"); } void NumberFormatterApiTest::unitCurrency() { @@ -2811,7 +2828,7 @@ void NumberFormatterApiTest::fieldPositionCoverage() { FormattedNumber result = assertFormatSingle( message, u"measure-unit/length-meter per-measure-unit/duration-second unit-width-full-name", - u"~unit/meter-per-second unit-width-full-name", // does not round-trip to the full skeleton above + u"unit/meter-per-second unit-width-full-name", NumberFormatter::with().unit(METER).perUnit(SECOND).unitWidth(UNUM_UNIT_WIDTH_FULL_NAME), "ky", // locale with the interesting data 68, diff --git a/icu4c/source/test/intltest/numfmtst.cpp b/icu4c/source/test/intltest/numfmtst.cpp index 9ebd26e5462..316a74396eb 100644 --- a/icu4c/source/test/intltest/numfmtst.cpp +++ b/icu4c/source/test/intltest/numfmtst.cpp @@ -2166,6 +2166,8 @@ void NumberFormatTest::TestCurrencyUnit(void){ static const UChar BAD2[] = u"??A"; static const UChar XXX[] = u"XXX"; static const char XXX8[] = "XXX"; + static const UChar XYZ[] = u"XYZ"; + static const char XYZ8[] = "XYZ"; static const UChar INV[] = u"{$%"; static const char INV8[] = "{$%"; static const UChar ZZZ[] = u"zz"; @@ -2182,10 +2184,16 @@ void NumberFormatTest::TestCurrencyUnit(void){ CurrencyUnit cu(USD, ec); assertSuccess("CurrencyUnit", ec); - assertEquals("getISOCurrency()", USD, cu.getISOCurrency()); assertEquals("getSubtype()", USD8, cu.getSubtype()); + // Test XYZ, a valid but non-standard currency. + // Note: Country code XY is private-use, so XYZ should remain unallocated. + CurrencyUnit extended(XYZ, ec); + assertSuccess("non-standard", ec); + assertEquals("non-standard", XYZ, extended.getISOCurrency()); + assertEquals("non-standard", XYZ8, extended.getSubtype()); + CurrencyUnit inv(INV, ec); assertEquals("non-invariant", U_INVARIANT_CONVERSION_ERROR, ec); assertEquals("non-invariant", XXX, inv.getISOCurrency()); @@ -2259,15 +2267,20 @@ void NumberFormatTest::TestCurrencyUnit(void){ // Test slicing MeasureUnit sliced1 = cu; MeasureUnit sliced2 = cu; + MeasureUnit sliced3 = extended; assertEquals("Subtype after slicing 1", USD8, sliced1.getSubtype()); assertEquals("Subtype after slicing 2", USD8, sliced2.getSubtype()); + assertEquals("Subtype after slicing 3", XYZ8, sliced3.getSubtype()); CurrencyUnit restored1(sliced1, ec); CurrencyUnit restored2(sliced2, ec); + CurrencyUnit restored3(sliced3, ec); assertSuccess("Restoring from MeasureUnit", ec); assertEquals("Subtype after restoring 1", USD8, restored1.getSubtype()); assertEquals("Subtype after restoring 2", USD8, restored2.getSubtype()); + assertEquals("Subtype after restoring 3", XYZ8, restored3.getSubtype()); assertEquals("ISO Code after restoring 1", USD, restored1.getISOCurrency()); assertEquals("ISO Code after restoring 2", USD, restored2.getISOCurrency()); + assertEquals("ISO Code after restoring 3", XYZ, restored3.getISOCurrency()); // Test copy constructor failure LocalPointer meter(MeasureUnit::createMeter(ec));