From 21df05234d6005164dfc73516aaccc8aa945d15c Mon Sep 17 00:00:00 2001 From: Frank Yung-Fong Tang <41213225+FrankYFTang@users.noreply.github.com> Date: Fri, 17 Jan 2020 01:57:49 +0000 Subject: [PATCH 01/15] ICU-20673 Allow built-in translit ID w/o data. See #958 --- .ci-builds/.azure-pipelines.yml | 6 +- .ci-builds/data-filter.json | 10 ++-- icu4c/source/i18n/translit.cpp | 31 ++++++---- icu4c/source/test/intltest/transtst.cpp | 76 +++++++++++++++++++++++++ icu4c/source/test/intltest/transtst.h | 1 + 5 files changed, 107 insertions(+), 17 deletions(-) diff --git a/.ci-builds/.azure-pipelines.yml b/.ci-builds/.azure-pipelines.yml index ed13185c09e..37a4e9c7887 100644 --- a/.ci-builds/.azure-pipelines.yml +++ b/.ci-builds/.azure-pipelines.yml @@ -57,7 +57,11 @@ jobs: vmImage: 'Ubuntu 16.04' steps: - script: | - cd icu4c/source && ICU_DATA_FILTER_FILE=../../.ci-builds/data-filter.json ./runConfigureICU Linux && make -j2 + cd icu4c/source && \ + ICU_DATA_FILTER_FILE=../../.ci-builds/data-filter.json ./runConfigureICU Linux && \ + make -j2 tests && \ + \[ ! -d data/out/build/icudt66l/translit \] && \ + (cd test/intltest && LD_LIBRARY_PATH=../../lib:../../tools/ctestfw ./intltest translit/TransliteratorTest/TestBasicTransliteratorEvenWithoutData) displayName: 'Build with Data Filter' env: CC: clang diff --git a/.ci-builds/data-filter.json b/.ci-builds/data-filter.json index 177478acff2..d2dd74d4bb1 100644 --- a/.ci-builds/data-filter.json +++ b/.ci-builds/data-filter.json @@ -8,16 +8,18 @@ ] }, // Test mixed feature filter and resource filter +// Exlude translit data so we can run test for ICU-20673 "featureFilters": { "misc": { "whitelist": ["supplementalData"] - } + }, + "translit": "exclude" }, "resourceFilters": [ { "categories": ["misc"], "files": { - "whitelist": ["supplementalData"] + "whitelist": ["supplementalData"] }, "rules": ["+/*"] } @@ -27,8 +29,8 @@ "directory": "$SRC", "replacements": [ { - "src": "translit/Zawgyi_my.txt", - "dest": "translit/Zawgyi_my.txt" + "src": "brkitr/rules/line.txt", + "dest": "brkitr/rules/line_normal.txt" }, "misc/dayPeriods.txt" ] diff --git a/icu4c/source/i18n/translit.cpp b/icu4c/source/i18n/translit.cpp index dae87d06d79..ef44f42aa66 100644 --- a/icu4c/source/i18n/translit.cpp +++ b/icu4c/source/i18n/translit.cpp @@ -1508,28 +1508,35 @@ UBool Transliterator::initializeRegistry(UErrorCode &status) { */ //static const char translit_index[] = "translit_index"; + UErrorCode lstatus = U_ZERO_ERROR; UResourceBundle *bundle, *transIDs, *colBund; - bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/*open default locale*/, &status); - transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &status); + bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/*open default locale*/, &lstatus); + transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &lstatus); const UnicodeString T_PART = UNICODE_STRING_SIMPLE("-t-"); int32_t row, maxRows; - if (U_SUCCESS(status)) { + if (lstatus == U_MEMORY_ALLOCATION_ERROR) { + delete registry; + registry = nullptr; + status = U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + if (U_SUCCESS(lstatus)) { maxRows = ures_getSize(transIDs); for (row = 0; row < maxRows; row++) { - colBund = ures_getByIndex(transIDs, row, 0, &status); - if (U_SUCCESS(status)) { + colBund = ures_getByIndex(transIDs, row, 0, &lstatus); + if (U_SUCCESS(lstatus)) { UnicodeString id(ures_getKey(colBund), -1, US_INV); if(id.indexOf(T_PART) != -1) { ures_close(colBund); continue; } - UResourceBundle* res = ures_getNextResource(colBund, NULL, &status); + UResourceBundle* res = ures_getNextResource(colBund, NULL, &lstatus); const char* typeStr = ures_getKey(res); UChar type; u_charsToUChars(typeStr, &type, 1); - if (U_SUCCESS(status)) { + if (U_SUCCESS(lstatus)) { int32_t len = 0; const UChar *resString; switch (type) { @@ -1539,19 +1546,19 @@ UBool Transliterator::initializeRegistry(UErrorCode &status) { // row[2]=resource, row[3]=direction { - resString = ures_getStringByKey(res, "resource", &len, &status); + resString = ures_getStringByKey(res, "resource", &len, &lstatus); UBool visible = (type == 0x0066 /*f*/); UTransDirection dir = - (ures_getUnicodeStringByKey(res, "direction", &status).charAt(0) == + (ures_getUnicodeStringByKey(res, "direction", &lstatus).charAt(0) == 0x0046 /*F*/) ? UTRANS_FORWARD : UTRANS_REVERSE; - registry->put(id, UnicodeString(TRUE, resString, len), dir, TRUE, visible, status); + registry->put(id, UnicodeString(TRUE, resString, len), dir, TRUE, visible, lstatus); } break; case 0x61: // 'a' // 'alias'; row[2]=createInstance argument - resString = ures_getString(res, &len, &status); - registry->put(id, UnicodeString(TRUE, resString, len), TRUE, TRUE, status); + resString = ures_getString(res, &len, &lstatus); + registry->put(id, UnicodeString(TRUE, resString, len), TRUE, TRUE, lstatus); break; } } diff --git a/icu4c/source/test/intltest/transtst.cpp b/icu4c/source/test/intltest/transtst.cpp index 965fca88ebb..fd7f733a913 100644 --- a/icu4c/source/test/intltest/transtst.cpp +++ b/icu4c/source/test/intltest/transtst.cpp @@ -196,6 +196,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec, TESTCASE(82,TestHalfwidthFullwidth); TESTCASE(83,TestThai); TESTCASE(84,TestAny); + TESTCASE(85,TestBasicTransliteratorEvenWithoutData); default: name = ""; break; } } @@ -1508,6 +1509,81 @@ void TransliteratorTest::TestNormalizationTransliterator() { delete t; } +/** + * Test we can create basic transliterator even without data. + */ +void TransliteratorTest::TestBasicTransliteratorEvenWithoutData() { + const char16_t* TEST_DATA = u"\u0124e\u0301 \uFB01nd x"; + const char16_t* EXPECTED_RESULTS[] = { + u"H\u0302e\u0301 \uFB01nd x", // NFD + u"\u0124\u00E9 \uFB01nd x", // NFC + u"H\u0302e\u0301 find x", // NFKD + u"\u0124\u00E9 find x", // NFKC + u"\u0124e\u0301 \uFB01nd x", // Hex-Any + u"\u0125e\u0301 \uFB01nd x", // Lower + u"\u0124e\uFB01ndx", // [:^L:]Remove + u"H\u0302e\u0301 \uFB01nd ", // NFD; [x]Remove + u"h\u0302e\u0301 find x", // Lower; NFKD; + u"hefindx", // Lower; NFKD; [:^L:]Remove; NFC; + u"\u0124e \uFB01nd x", // [:Nonspacing Mark:] Remove; + u"He \uFB01nd x", // NFD; [:Nonspacing Mark:] Remove; NFC; + // end + 0 + }; + + const char* BASIC_TRANSLITERATOR_ID[] = { + "NFD", + "NFC", + "NFKD", + "NFKC", + "Hex-Any", + "Lower", + "[:^L:]Remove", + "NFD; [x]Remove", + "Lower; NFKD;", + "Lower; NFKD; [:^L:]Remove; NFC;", + "[:Nonspacing Mark:] Remove;", + "NFD; [:Nonspacing Mark:] Remove; NFC;", + // end + 0 + }; + const char* BASIC_TRANSLITERATOR_RULES[] = { + "::Lower; ::NFKD;", + "::Lower; ::NFKD; ::[:^L:]Remove; ::NFC;", + "::[:Nonspacing Mark:] Remove;", + "::NFD; ::[:Nonspacing Mark:] Remove; ::NFC;", + // end + 0 + }; + for (int32_t i=0; BASIC_TRANSLITERATOR_ID[i]; i++) { + UErrorCode status = U_ZERO_ERROR; + UParseError parseError; + std::unique_ptr translit(Transliterator::createInstance( + BASIC_TRANSLITERATOR_ID[i], UTRANS_FORWARD, parseError, status)); + if (translit.get() == nullptr || !U_SUCCESS(status)) { + dataerrln("FAIL: createInstance %s failed", BASIC_TRANSLITERATOR_ID[i]); + } + UnicodeString data(TEST_DATA); + UnicodeString expected(EXPECTED_RESULTS[i]); + translit->transliterate(data); + if (data != expected) { + dataerrln(UnicodeString("FAIL: expected translit(") + + BASIC_TRANSLITERATOR_ID[i] + ") = '" + + EXPECTED_RESULTS[i] + "' but got '" + data); + } + } + for (int32_t i=0; BASIC_TRANSLITERATOR_RULES[i]; i++) { + UErrorCode status = U_ZERO_ERROR; + UParseError parseError; + std::unique_ptr translit(Transliterator::createFromRules( + "Test", + BASIC_TRANSLITERATOR_RULES[i], UTRANS_FORWARD, parseError, status)); + if (translit.get() == nullptr || !U_SUCCESS(status)) { + dataerrln("FAIL: createFromRules %s failed", BASIC_TRANSLITERATOR_RULES[i]); + } + } +} + /** * Test compound RBT rules. */ diff --git a/icu4c/source/test/intltest/transtst.h b/icu4c/source/test/intltest/transtst.h index 8a2bcc68f69..64246d4add7 100644 --- a/icu4c/source/test/intltest/transtst.h +++ b/icu4c/source/test/intltest/transtst.h @@ -369,6 +369,7 @@ private: */ void TestRegisterAlias(void); + void TestBasicTransliteratorEvenWithoutData(void); //====================================================================== // Support methods //====================================================================== From 8c717b514eb3bbfd624d6f408d3989df3dc9a446 Mon Sep 17 00:00:00 2001 From: Shane Carr Date: Wed, 20 Nov 2019 02:22:20 +0000 Subject: [PATCH 02/15] ICU-20665 Removing number-dependence from ICU4C FormattedStringBuilder fields. See #727 --- icu4c/source/common/uassert.h | 2 + .../source/i18n/formatted_string_builder.cpp | 13 ++- icu4c/source/i18n/formatted_string_builder.h | 92 +++++++++++-------- icu4c/source/i18n/formattedval_impl.h | 1 - icu4c/source/i18n/formattedval_sbimpl.cpp | 61 ++++++------ icu4c/source/i18n/measfmt.cpp | 13 +-- icu4c/source/i18n/number_affixutils.cpp | 28 +++--- icu4c/source/i18n/number_compact.cpp | 6 +- icu4c/source/i18n/number_formatimpl.cpp | 32 +++++-- icu4c/source/i18n/number_longnames.cpp | 6 +- icu4c/source/i18n/number_modifiers.cpp | 30 +++--- icu4c/source/i18n/number_modifiers.h | 8 +- icu4c/source/i18n/number_padding.cpp | 2 +- icu4c/source/i18n/number_patternmodifier.cpp | 4 +- icu4c/source/i18n/number_patternmodifier.h | 2 +- icu4c/source/i18n/number_scientific.cpp | 10 +- icu4c/source/i18n/number_scientific.h | 2 +- icu4c/source/i18n/number_types.h | 2 +- icu4c/source/i18n/number_utypes.h | 2 +- icu4c/source/i18n/numrange_impl.cpp | 11 ++- icu4c/source/i18n/numrange_impl.h | 2 +- icu4c/source/i18n/quantityformatter.cpp | 3 +- icu4c/source/i18n/reldatefmt.cpp | 8 +- .../formatted_string_builder_test.cpp | 73 ++++++++------- .../test/intltest/numbertest_affixutils.cpp | 8 +- .../test/intltest/numbertest_modifiers.cpp | 22 ++--- .../intltest/numbertest_patternmodifier.cpp | 12 +-- 27 files changed, 251 insertions(+), 204 deletions(-) diff --git a/icu4c/source/common/uassert.h b/icu4c/source/common/uassert.h index f0f7a92574b..15cd55c8734 100644 --- a/icu4c/source/common/uassert.h +++ b/icu4c/source/common/uassert.h @@ -31,6 +31,8 @@ #if U_DEBUG # include # define U_ASSERT(exp) assert(exp) +#elif U_CPLUSPLUS_VERSION +# define U_ASSERT(exp) void() #else # define U_ASSERT(exp) #endif diff --git a/icu4c/source/i18n/formatted_string_builder.cpp b/icu4c/source/i18n/formatted_string_builder.cpp index 3024bff6add..5aabc31cc43 100644 --- a/icu4c/source/i18n/formatted_string_builder.cpp +++ b/icu4c/source/i18n/formatted_string_builder.cpp @@ -8,6 +8,7 @@ #include "formatted_string_builder.h" #include "unicode/ustring.h" #include "unicode/utf16.h" +#include "unicode/unum.h" // for UNumberFormatFields literals namespace { @@ -246,7 +247,7 @@ void FormattedStringBuilder::writeTerminator(UErrorCode& status) { return; } getCharPtr()[position] = 0; - getFieldPtr()[position] = UNUM_FIELD_COUNT; + getFieldPtr()[position] = kUndefinedField; fLength--; } @@ -360,11 +361,11 @@ UnicodeString FormattedStringBuilder::toDebugString() const { sb.append(toUnicodeString()); sb.append(u"] [", -1); for (int i = 0; i < fLength; i++) { - if (fieldAt(i) == UNUM_FIELD_COUNT) { + if (fieldAt(i) == kUndefinedField) { sb.append(u'n'); - } else { + } else if (fieldAt(i).getCategory() == UFIELD_CATEGORY_NUMBER) { char16_t c; - switch (fieldAt(i)) { + switch (fieldAt(i).getField()) { case UNUM_SIGN_FIELD: c = u'-'; break; @@ -399,10 +400,12 @@ UnicodeString FormattedStringBuilder::toDebugString() const { c = u'$'; break; default: - c = u'?'; + c = u'0' + fieldAt(i).getField(); break; } sb.append(c); + } else { + sb.append(u'0' + fieldAt(i).getCategory()); } } sb.append(u"]>", -1); diff --git a/icu4c/source/i18n/formatted_string_builder.h b/icu4c/source/i18n/formatted_string_builder.h index 2949ae73e0f..4567dc1d66b 100644 --- a/icu4c/source/i18n/formatted_string_builder.h +++ b/icu4c/source/i18n/formatted_string_builder.h @@ -9,7 +9,8 @@ #include -#include "unicode/unum.h" // for UNUM_FIELD_COUNT +#include + #include "cstring.h" #include "uassert.h" #include "fphdlimp.h" @@ -55,7 +56,20 @@ class U_I18N_API FormattedStringBuilder : public UMemory { // Field category 0 implies the number category so that the number field // literals can be directly passed as a Field type. // See the helper functions in "StringBuilderFieldUtils" below. - typedef uint8_t Field; + // Exported as U_I18N_API so it can be used by other exports on Windows. + struct U_I18N_API Field { + uint8_t bits; + + Field() = default; + constexpr Field(uint8_t category, uint8_t field); + + inline UFieldCategory getCategory() const; + inline int32_t getField() const; + inline bool isNumeric() const; + inline bool isUndefined() const; + inline bool operator==(const Field& other) const; + inline bool operator!=(const Field& other) const; + }; FormattedStringBuilder &operator=(const FormattedStringBuilder &other); @@ -204,46 +218,50 @@ class U_I18N_API FormattedStringBuilder : public UMemory { friend class FormattedValueStringBuilderImpl; }; +static_assert( + std::is_pod::value, + "Field should be a POD type for efficient initialization"); + +constexpr FormattedStringBuilder::Field::Field(uint8_t category, uint8_t field) + : bits(( + U_ASSERT(category <= 0xf), + U_ASSERT(field <= 0xf), + static_cast((category << 4) | field) + )) {} + /** - * Helper functions for dealing with the Field typedef, which stores fields - * in a compressed format. + * Internal constant for the undefined field for use in FormattedStringBuilder. */ -class StringBuilderFieldUtils { -public: - struct CategoryFieldPair { - int32_t category; - int32_t field; - }; +constexpr FormattedStringBuilder::Field kUndefinedField = {UFIELD_CATEGORY_UNDEFINED, 0}; - /** Compile-time function to construct a Field from a category and a field */ - template - static constexpr FormattedStringBuilder::Field compress() { - static_assert(category != 0, "cannot use Undefined category in FieldUtils"); - static_assert(category <= 0xf, "only 4 bits for category"); - static_assert(field <= 0xf, "only 4 bits for field"); - return static_cast((category << 4) | field); - } +/** + * Internal field to signal "numeric" when fields are not supported in NumberFormat. + */ +constexpr FormattedStringBuilder::Field kGeneralNumericField = {UFIELD_CATEGORY_UNDEFINED, 1}; - /** Runtime inline function to unpack the category and field from the Field */ - static inline CategoryFieldPair expand(FormattedStringBuilder::Field field) { - if (field == UNUM_FIELD_COUNT) { - return {UFIELD_CATEGORY_UNDEFINED, 0}; - } - CategoryFieldPair ret = { - (field >> 4), - (field & 0xf) - }; - if (ret.category == 0) { - ret.category = UFIELD_CATEGORY_NUMBER; - } - return ret; - } +inline UFieldCategory FormattedStringBuilder::Field::getCategory() const { + return static_cast(bits >> 4); +} - static inline bool isNumericField(FormattedStringBuilder::Field field) { - int8_t category = field >> 4; - return category == 0 || category == UFIELD_CATEGORY_NUMBER; - } -}; +inline int32_t FormattedStringBuilder::Field::getField() const { + return bits & 0xf; +} + +inline bool FormattedStringBuilder::Field::isNumeric() const { + return getCategory() == UFIELD_CATEGORY_NUMBER || *this == kGeneralNumericField; +} + +inline bool FormattedStringBuilder::Field::isUndefined() const { + return getCategory() == UFIELD_CATEGORY_UNDEFINED; +} + +inline bool FormattedStringBuilder::Field::operator==(const Field& other) const { + return bits == other.bits; +} + +inline bool FormattedStringBuilder::Field::operator!=(const Field& other) const { + return bits != other.bits; +} U_NAMESPACE_END diff --git a/icu4c/source/i18n/formattedval_impl.h b/icu4c/source/i18n/formattedval_impl.h index 9aab36a52fe..7bee3742869 100644 --- a/icu4c/source/i18n/formattedval_impl.h +++ b/icu4c/source/i18n/formattedval_impl.h @@ -153,7 +153,6 @@ private: bool nextPositionImpl(ConstrainedFieldPosition& cfpos, FormattedStringBuilder::Field numericField, UErrorCode& status) const; static bool isIntOrGroup(FormattedStringBuilder::Field field); - static bool isNumericField(FormattedStringBuilder::Field field); int32_t trimBack(int32_t limit) const; int32_t trimFront(int32_t start) const; }; diff --git a/icu4c/source/i18n/formattedval_sbimpl.cpp b/icu4c/source/i18n/formattedval_sbimpl.cpp index ca28f222813..dfe3af6686d 100644 --- a/icu4c/source/i18n/formattedval_sbimpl.cpp +++ b/icu4c/source/i18n/formattedval_sbimpl.cpp @@ -63,7 +63,7 @@ UBool FormattedValueStringBuilderImpl::nextFieldPosition(FieldPosition& fp, UErr ConstrainedFieldPosition cfpos; cfpos.constrainField(UFIELD_CATEGORY_NUMBER, rawField); cfpos.setState(UFIELD_CATEGORY_NUMBER, rawField, fp.getBeginIndex(), fp.getEndIndex()); - if (nextPositionImpl(cfpos, 0, status)) { + if (nextPositionImpl(cfpos, kUndefinedField, status)) { fp.setBeginIndex(cfpos.getStart()); fp.setEndIndex(cfpos.getLimit()); return TRUE; @@ -74,7 +74,7 @@ UBool FormattedValueStringBuilderImpl::nextFieldPosition(FieldPosition& fp, UErr bool inside = false; int32_t i = fString.fZero; for (; i < fString.fZero + fString.fLength; i++) { - if (isIntOrGroup(fString.getFieldPtr()[i]) || fString.getFieldPtr()[i] == UNUM_DECIMAL_SEPARATOR_FIELD) { + if (isIntOrGroup(fString.getFieldPtr()[i]) || fString.getFieldPtr()[i] == Field(UFIELD_CATEGORY_NUMBER, UNUM_DECIMAL_SEPARATOR_FIELD)) { inside = true; } else if (inside) { break; @@ -90,42 +90,40 @@ UBool FormattedValueStringBuilderImpl::nextFieldPosition(FieldPosition& fp, UErr void FormattedValueStringBuilderImpl::getAllFieldPositions(FieldPositionIteratorHandler& fpih, UErrorCode& status) const { ConstrainedFieldPosition cfpos; - while (nextPositionImpl(cfpos, 0, status)) { + while (nextPositionImpl(cfpos, kUndefinedField, status)) { fpih.addAttribute(cfpos.getField(), cfpos.getStart(), cfpos.getLimit()); } } // Signal the end of the string using a field that doesn't exist and that is -// different from UNUM_FIELD_COUNT, which is used for "null number field". -static constexpr Field kEndField = 0xff; +// different from kUndefinedField, which is used for "null field". +static constexpr Field kEndField = Field(0xf, 0xf); bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& cfpos, Field numericField, UErrorCode& /*status*/) const { - auto numericCAF = StringBuilderFieldUtils::expand(numericField); int32_t fieldStart = -1; - Field currField = UNUM_FIELD_COUNT; + Field currField = kUndefinedField; for (int32_t i = fString.fZero + cfpos.getLimit(); i <= fString.fZero + fString.fLength; i++) { Field _field = (i < fString.fZero + fString.fLength) ? fString.getFieldPtr()[i] : kEndField; // Case 1: currently scanning a field. - if (currField != UNUM_FIELD_COUNT) { + if (currField != kUndefinedField) { if (currField != _field) { int32_t end = i - fString.fZero; // Grouping separators can be whitespace; don't throw them out! - if (currField != UNUM_GROUPING_SEPARATOR_FIELD) { + if (currField != Field(UFIELD_CATEGORY_NUMBER, UNUM_GROUPING_SEPARATOR_FIELD)) { end = trimBack(i - fString.fZero); } if (end <= fieldStart) { // Entire field position is ignorable; skip. fieldStart = -1; - currField = UNUM_FIELD_COUNT; + currField = kUndefinedField; i--; // look at this index again continue; } int32_t start = fieldStart; - if (currField != UNUM_GROUPING_SEPARATOR_FIELD) { + if (currField != Field(UFIELD_CATEGORY_NUMBER, UNUM_GROUPING_SEPARATOR_FIELD)) { start = trimFront(start); } - auto caf = StringBuilderFieldUtils::expand(currField); - cfpos.setState(caf.category, caf.field, start, end); + cfpos.setState(currField.getCategory(), currField.getField(), start, end); return true; } continue; @@ -147,51 +145,46 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& return true; } // Special case: coalesce NUMERIC if we are pointing at the end of the NUMERIC. - if (numericField != 0 - && cfpos.matchesField(numericCAF.category, numericCAF.field) + if (numericField != kUndefinedField + && cfpos.matchesField(numericField.getCategory(), numericField.getField()) && i > fString.fZero // don't return the same field twice in a row: && (i - fString.fZero > cfpos.getLimit() - || cfpos.getCategory() != numericCAF.category - || cfpos.getField() != numericCAF.field) - && isNumericField(fString.getFieldPtr()[i - 1]) - && !isNumericField(_field)) { + || cfpos.getCategory() != numericField.getCategory() + || cfpos.getField() != numericField.getField()) + && fString.getFieldPtr()[i - 1].isNumeric() + && !_field.isNumeric()) { int j = i - 1; - for (; j >= fString.fZero && isNumericField(fString.getFieldPtr()[j]); j--) {} + for (; j >= fString.fZero && fString.getFieldPtr()[j].isNumeric(); j--) {} cfpos.setState( - numericCAF.category, - numericCAF.field, + numericField.getCategory(), + numericField.getField(), j - fString.fZero + 1, i - fString.fZero); return true; } // Special case: skip over INTEGER; will be coalesced later. - if (_field == UNUM_INTEGER_FIELD) { - _field = UNUM_FIELD_COUNT; + if (_field == Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)) { + _field = kUndefinedField; } // Case 2: no field starting at this position. - if (_field == UNUM_FIELD_COUNT || _field == kEndField) { + if (_field.isUndefined() || _field == kEndField) { continue; } // Case 3: check for field starting at this position - auto caf = StringBuilderFieldUtils::expand(_field); - if (cfpos.matchesField(caf.category, caf.field)) { + if (cfpos.matchesField(_field.getCategory(), _field.getField())) { fieldStart = i - fString.fZero; currField = _field; } } - U_ASSERT(currField == UNUM_FIELD_COUNT); + U_ASSERT(currField == kUndefinedField); return false; } bool FormattedValueStringBuilderImpl::isIntOrGroup(Field field) { - return field == UNUM_INTEGER_FIELD - || field == UNUM_GROUPING_SEPARATOR_FIELD; -} - -bool FormattedValueStringBuilderImpl::isNumericField(Field field) { - return StringBuilderFieldUtils::isNumericField(field); + return field == Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD) + || field == Field(UFIELD_CATEGORY_NUMBER, UNUM_GROUPING_SEPARATOR_FIELD); } int32_t FormattedValueStringBuilderImpl::trimBack(int32_t limit) const { diff --git a/icu4c/source/i18n/measfmt.cpp b/icu4c/source/i18n/measfmt.cpp index 1949f17b137..f3ba6198cef 100644 --- a/icu4c/source/i18n/measfmt.cpp +++ b/icu4c/source/i18n/measfmt.cpp @@ -774,11 +774,6 @@ UnicodeString &MeasureFormat::formatNumeric( case u's': value = seconds; break; } - // For undefined field we use UNUM_FIELD_COUNT, for historical reasons. - // See cleanup bug: https://unicode-org.atlassian.net/browse/ICU-20665 - // But we give it a clear name, to keep "the ugly part" in one place. - constexpr UNumberFormatFields undefinedField = UNUM_FIELD_COUNT; - // There is not enough info to add Field(s) for the unit because all we have are plain // text patterns. For example in "21:51" there is no text for something like "hour", // while in something like "21h51" there is ("h"). But we can't really tell... @@ -787,7 +782,7 @@ UnicodeString &MeasureFormat::formatNumeric( case u'm': case u's': if (protect) { - fsb.appendChar16(c, undefinedField, status); + fsb.appendChar16(c, kUndefinedField, status); } else { UnicodeString tmp; if ((i + 1 < patternLength) && pattern[i + 1] == c) { // doubled @@ -797,20 +792,20 @@ UnicodeString &MeasureFormat::formatNumeric( numberFormatter->format(value, tmp, status); } // TODO: Use proper Field - fsb.append(tmp, undefinedField, status); + fsb.append(tmp, kUndefinedField, status); } break; case u'\'': // '' is escaped apostrophe if ((i + 1 < patternLength) && pattern[i + 1] == c) { - fsb.appendChar16(c, undefinedField, status); + fsb.appendChar16(c, kUndefinedField, status); i++; } else { protect = !protect; } break; default: - fsb.appendChar16(c, undefinedField, status); + fsb.appendChar16(c, kUndefinedField, status); } } diff --git a/icu4c/source/i18n/number_affixutils.cpp b/icu4c/source/i18n/number_affixutils.cpp index 1039a84c656..a74ec2d6347 100644 --- a/icu4c/source/i18n/number_affixutils.cpp +++ b/icu4c/source/i18n/number_affixutils.cpp @@ -131,25 +131,25 @@ UnicodeString AffixUtils::escape(const UnicodeString &input) { Field AffixUtils::getFieldForType(AffixPatternType type) { switch (type) { case TYPE_MINUS_SIGN: - return UNUM_SIGN_FIELD; + return {UFIELD_CATEGORY_NUMBER, UNUM_SIGN_FIELD}; case TYPE_PLUS_SIGN: - return UNUM_SIGN_FIELD; + return {UFIELD_CATEGORY_NUMBER, UNUM_SIGN_FIELD}; case TYPE_PERCENT: - return UNUM_PERCENT_FIELD; + return {UFIELD_CATEGORY_NUMBER, UNUM_PERCENT_FIELD}; case TYPE_PERMILLE: - return UNUM_PERMILL_FIELD; + return {UFIELD_CATEGORY_NUMBER, UNUM_PERMILL_FIELD}; case TYPE_CURRENCY_SINGLE: - return UNUM_CURRENCY_FIELD; + return {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}; case TYPE_CURRENCY_DOUBLE: - return UNUM_CURRENCY_FIELD; + return {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}; case TYPE_CURRENCY_TRIPLE: - return UNUM_CURRENCY_FIELD; + return {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}; case TYPE_CURRENCY_QUAD: - return UNUM_CURRENCY_FIELD; + return {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}; case TYPE_CURRENCY_QUINT: - return UNUM_CURRENCY_FIELD; + return {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}; case TYPE_CURRENCY_OVERFLOW: - return UNUM_CURRENCY_FIELD; + return {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}; default: UPRV_UNREACHABLE; } @@ -165,7 +165,11 @@ AffixUtils::unescape(const UnicodeString &affixPattern, FormattedStringBuilder & if (U_FAILURE(status)) { return length; } if (tag.type == TYPE_CURRENCY_OVERFLOW) { // Don't go to the provider for this special case - length += output.insertCodePoint(position + length, 0xFFFD, UNUM_CURRENCY_FIELD, status); + length += output.insertCodePoint( + position + length, + 0xFFFD, + {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, + status); } else if (tag.type < 0) { length += output.insert( position + length, provider.getSymbol(tag.type), getFieldForType(tag.type), status); @@ -218,7 +222,7 @@ bool AffixUtils::hasCurrencySymbols(const UnicodeString &affixPattern, UErrorCod while (hasNext(tag, affixPattern)) { tag = nextToken(tag, affixPattern, status); if (U_FAILURE(status)) { return false; } - if (tag.type < 0 && getFieldForType(tag.type) == UNUM_CURRENCY_FIELD) { + if (tag.type < 0 && getFieldForType(tag.type) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) { return true; } } diff --git a/icu4c/source/i18n/number_compact.cpp b/icu4c/source/i18n/number_compact.cpp index e3c6bbc0f9e..9d9e6c9ab14 100644 --- a/icu4c/source/i18n/number_compact.cpp +++ b/icu4c/source/i18n/number_compact.cpp @@ -266,7 +266,7 @@ void CompactHandler::precomputeAllModifiers(MutablePatternModifier &buildReferen ParsedPatternInfo patternInfo; PatternParser::parseToPatternInfo(UnicodeString(patternString), patternInfo, status); if (U_FAILURE(status)) { return; } - buildReference.setPatternInfo(&patternInfo, UNUM_COMPACT_FIELD); + buildReference.setPatternInfo(&patternInfo, {UFIELD_CATEGORY_NUMBER, UNUM_COMPACT_FIELD}); info.mod = buildReference.createImmutable(status); if (U_FAILURE(status)) { return; } info.patternString = patternString; @@ -315,7 +315,9 @@ void CompactHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micr // C++ Note: Use unsafePatternInfo for proper lifecycle. ParsedPatternInfo &patternInfo = const_cast(this)->unsafePatternInfo; PatternParser::parseToPatternInfo(UnicodeString(patternString), patternInfo, status); - unsafePatternModifier->setPatternInfo(&unsafePatternInfo, UNUM_COMPACT_FIELD); + unsafePatternModifier->setPatternInfo( + &unsafePatternInfo, + {UFIELD_CATEGORY_NUMBER, UNUM_COMPACT_FIELD}); unsafePatternModifier->setNumberProperties(quantity.signum(), StandardPlural::Form::COUNT); micros.modMiddle = unsafePatternModifier; } diff --git a/icu4c/source/i18n/number_formatimpl.cpp b/icu4c/source/i18n/number_formatimpl.cpp index 708aaa54c20..83ee2a8bfe9 100644 --- a/icu4c/source/i18n/number_formatimpl.cpp +++ b/icu4c/source/i18n/number_formatimpl.cpp @@ -370,7 +370,7 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, patternModifier->setPatternInfo( macros.affixProvider != nullptr ? macros.affixProvider : static_cast(fPatternInfo.getAlias()), - UNUM_FIELD_COUNT); + kUndefinedField); patternModifier->setPatternAttributes(fMicros.sign, isPermille); if (patternModifier->needsPlurals()) { patternModifier->setSymbols( @@ -480,14 +480,14 @@ int32_t NumberFormatterImpl::writeNumber(const MicroProps& micros, DecimalQuanti length += string.insert( length + index, micros.symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kInfinitySymbol), - UNUM_INTEGER_FIELD, + {UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD}, status); } else if (quantity.isNaN()) { length += string.insert( length + index, micros.symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kNaNSymbol), - UNUM_INTEGER_FIELD, + {UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD}, status); } else { @@ -503,7 +503,7 @@ int32_t NumberFormatterImpl::writeNumber(const MicroProps& micros, DecimalQuanti .symbols ->getSymbol( DecimalFormatSymbols::ENumberFormatSymbol::kDecimalSeparatorSymbol), - UNUM_DECIMAL_SEPARATOR_FIELD, + {UFIELD_CATEGORY_NUMBER, UNUM_DECIMAL_SEPARATOR_FIELD}, status); } @@ -513,7 +513,12 @@ int32_t NumberFormatterImpl::writeNumber(const MicroProps& micros, DecimalQuanti if (length == 0) { // Force output of the digit for value 0 length += utils::insertDigitFromSymbols( - string, index, 0, *micros.symbols, UNUM_INTEGER_FIELD, status); + string, + index, + 0, + *micros.symbols, + {UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD}, + status); } } @@ -534,14 +539,20 @@ int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps& micros, Decima DecimalFormatSymbols::ENumberFormatSymbol::kMonetaryGroupingSeparatorSymbol) : micros.symbols->getSymbol( DecimalFormatSymbols::ENumberFormatSymbol::kGroupingSeparatorSymbol), - UNUM_GROUPING_SEPARATOR_FIELD, + {UFIELD_CATEGORY_NUMBER, UNUM_GROUPING_SEPARATOR_FIELD}, status); } // Get and append the next digit value int8_t nextDigit = quantity.getDigit(i); length += utils::insertDigitFromSymbols( - string, index, nextDigit, *micros.symbols, UNUM_INTEGER_FIELD, status); + string, + index, + nextDigit, + *micros.symbols, + {UFIELD_CATEGORY_NUMBER, + UNUM_INTEGER_FIELD}, + status); } return length; } @@ -555,7 +566,12 @@ int32_t NumberFormatterImpl::writeFractionDigits(const MicroProps& micros, Decim // Get and append the next digit value int8_t nextDigit = quantity.getDigit(-i - 1); length += utils::insertDigitFromSymbols( - string, length + index, nextDigit, *micros.symbols, UNUM_FRACTION_FIELD, status); + string, + length + index, + nextDigit, + *micros.symbols, + {UFIELD_CATEGORY_NUMBER, UNUM_FRACTION_FIELD}, + status); } return length; } diff --git a/icu4c/source/i18n/number_longnames.cpp b/icu4c/source/i18n/number_longnames.cpp index 5519398a059..5378eda8b24 100644 --- a/icu4c/source/i18n/number_longnames.cpp +++ b/icu4c/source/i18n/number_longnames.cpp @@ -209,7 +209,7 @@ LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unitRef, c UnicodeString simpleFormats[ARRAY_LENGTH]; getMeasureData(loc, unit, width, simpleFormats, status); if (U_FAILURE(status)) { return result; } - result->simpleFormatsToModifiers(simpleFormats, UNUM_MEASURE_UNIT_FIELD, status); + result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); return result; } @@ -247,7 +247,7 @@ LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit, con compiled.format(UnicodeString(u"{0}"), secondaryString, perUnitFormat, status); if (U_FAILURE(status)) { return result; } } - result->multiSimpleFormatsToModifiers(primaryData, perUnitFormat, UNUM_MEASURE_UNIT_FIELD, status); + result->multiSimpleFormatsToModifiers(primaryData, perUnitFormat, {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status); return result; } @@ -296,7 +296,7 @@ LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const UnicodeString simpleFormats[ARRAY_LENGTH]; getCurrencyLongNameData(loc, currency, simpleFormats, status); if (U_FAILURE(status)) { return nullptr; } - result->simpleFormatsToModifiers(simpleFormats, UNUM_CURRENCY_FIELD, status); + result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status); return result; } diff --git a/icu4c/source/i18n/number_modifiers.cpp b/icu4c/source/i18n/number_modifiers.cpp index 3a44f8f6f15..1267f6aeb55 100644 --- a/icu4c/source/i18n/number_modifiers.cpp +++ b/icu4c/source/i18n/number_modifiers.cpp @@ -89,7 +89,7 @@ bool ConstantAffixModifier::isStrong() const { return fStrong; } -bool ConstantAffixModifier::containsField(UNumberFormatFields field) const { +bool ConstantAffixModifier::containsField(Field field) const { (void)field; // This method is not currently used. UPRV_UNREACHABLE; @@ -151,7 +151,7 @@ SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field fie } SimpleModifier::SimpleModifier() - : fField(UNUM_FIELD_COUNT), fStrong(false), fPrefixLength(0), fSuffixLength(0) { + : fField(kUndefinedField), fStrong(false), fPrefixLength(0), fSuffixLength(0) { } int32_t SimpleModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex, @@ -178,7 +178,7 @@ bool SimpleModifier::isStrong() const { return fStrong; } -bool SimpleModifier::containsField(UNumberFormatFields field) const { +bool SimpleModifier::containsField(Field field) const { (void)field; // This method is not currently used. UPRV_UNREACHABLE; @@ -292,7 +292,7 @@ int32_t ConstantMultiFieldModifier::apply(FormattedStringBuilder &output, int le leftIndex + length, rightIndex + length, UnicodeString(), 0, 0, - UNUM_FIELD_COUNT, status); + kUndefinedField, status); } length += output.insert(rightIndex + length, fSuffix, status); return length; @@ -310,7 +310,7 @@ bool ConstantMultiFieldModifier::isStrong() const { return fStrong; } -bool ConstantMultiFieldModifier::containsField(UNumberFormatFields field) const { +bool ConstantMultiFieldModifier::containsField(Field field) const { return fPrefix.containsField(field) || fSuffix.containsField(field); } @@ -342,7 +342,7 @@ CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const FormattedSt : ConstantMultiFieldModifier(prefix, suffix, overwrite, strong) { // Check for currency spacing. Do not build the UnicodeSets unless there is // a currency code point at a boundary. - if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == UNUM_CURRENCY_FIELD) { + if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) { int prefixCp = prefix.getLastCodePoint(); UnicodeSet prefixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, PREFIX, status); if (prefixUnicodeSet.contains(prefixCp)) { @@ -357,7 +357,7 @@ CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const FormattedSt fAfterPrefixUnicodeSet.setToBogus(); fAfterPrefixInsert.setToBogus(); } - if (suffix.length() > 0 && suffix.fieldAt(0) == UNUM_CURRENCY_FIELD) { + if (suffix.length() > 0 && suffix.fieldAt(0) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) { int suffixCp = suffix.getLastCodePoint(); UnicodeSet suffixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, SUFFIX, status); if (suffixUnicodeSet.contains(suffixCp)) { @@ -381,12 +381,20 @@ int32_t CurrencySpacingEnabledModifier::apply(FormattedStringBuilder &output, in if (rightIndex - leftIndex > 0 && !fAfterPrefixUnicodeSet.isBogus() && fAfterPrefixUnicodeSet.contains(output.codePointAt(leftIndex))) { // TODO: Should we use the CURRENCY field here? - length += output.insert(leftIndex, fAfterPrefixInsert, UNUM_FIELD_COUNT, status); + length += output.insert( + leftIndex, + fAfterPrefixInsert, + kUndefinedField, + status); } if (rightIndex - leftIndex > 0 && !fBeforeSuffixUnicodeSet.isBogus() && fBeforeSuffixUnicodeSet.contains(output.codePointBefore(rightIndex))) { // TODO: Should we use the CURRENCY field here? - length += output.insert(rightIndex + length, fBeforeSuffixInsert, UNUM_FIELD_COUNT, status); + length += output.insert( + rightIndex + length, + fBeforeSuffixInsert, + kUndefinedField, + status); } // Call super for the remaining logic @@ -422,7 +430,7 @@ CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(FormattedStringBuilder // This works even if the last code point in the prefix is 2 code units because the // field value gets populated to both indices in the field array. Field affixField = (affix == PREFIX) ? output.fieldAt(index - 1) : output.fieldAt(index); - if (affixField != UNUM_CURRENCY_FIELD) { + if (affixField != Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) { return 0; } int affixCp = (affix == PREFIX) ? output.codePointBefore(index) : output.codePointAt(index); @@ -443,7 +451,7 @@ CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(FormattedStringBuilder // However, the build code path is more efficient, and this is the most natural // place to put currency spacing in the non-build code path. // TODO: Should we use the CURRENCY field here? - return output.insert(index, spacingString, UNUM_FIELD_COUNT, status); + return output.insert(index, spacingString, kUndefinedField, status); } UnicodeSet diff --git a/icu4c/source/i18n/number_modifiers.h b/icu4c/source/i18n/number_modifiers.h index e3820b6b384..375254310ca 100644 --- a/icu4c/source/i18n/number_modifiers.h +++ b/icu4c/source/i18n/number_modifiers.h @@ -37,7 +37,7 @@ class U_I18N_API ConstantAffixModifier : public Modifier, public UObject { bool isStrong() const U_OVERRIDE; - bool containsField(UNumberFormatFields field) const U_OVERRIDE; + bool containsField(Field field) const U_OVERRIDE; void getParameters(Parameters& output) const U_OVERRIDE; @@ -73,7 +73,7 @@ class U_I18N_API SimpleModifier : public Modifier, public UMemory { bool isStrong() const U_OVERRIDE; - bool containsField(UNumberFormatFields field) const U_OVERRIDE; + bool containsField(Field field) const U_OVERRIDE; void getParameters(Parameters& output) const U_OVERRIDE; @@ -166,7 +166,7 @@ class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory { bool isStrong() const U_OVERRIDE; - bool containsField(UNumberFormatFields field) const U_OVERRIDE; + bool containsField(Field field) const U_OVERRIDE; void getParameters(Parameters& output) const U_OVERRIDE; @@ -255,7 +255,7 @@ class U_I18N_API EmptyModifier : public Modifier, public UMemory { return fStrong; } - bool containsField(UNumberFormatFields field) const U_OVERRIDE { + bool containsField(Field field) const U_OVERRIDE { (void)field; return false; } diff --git a/icu4c/source/i18n/number_padding.cpp b/icu4c/source/i18n/number_padding.cpp index c68a9875b20..c320c3ffb6f 100644 --- a/icu4c/source/i18n/number_padding.cpp +++ b/icu4c/source/i18n/number_padding.cpp @@ -21,7 +21,7 @@ addPaddingHelper(UChar32 paddingCp, int32_t requiredPadding, FormattedStringBuil UErrorCode &status) { for (int32_t i = 0; i < requiredPadding; i++) { // TODO: If appending to the end, this will cause actual insertion operations. Improve. - string.insertCodePoint(index, paddingCp, UNUM_FIELD_COUNT, status); + string.insertCodePoint(index, paddingCp, kUndefinedField, status); } return U16_LENGTH(paddingCp) * requiredPadding; } diff --git a/icu4c/source/i18n/number_patternmodifier.cpp b/icu4c/source/i18n/number_patternmodifier.cpp index 892ac372f51..e37dde8c175 100644 --- a/icu4c/source/i18n/number_patternmodifier.cpp +++ b/icu4c/source/i18n/number_patternmodifier.cpp @@ -195,7 +195,7 @@ int32_t MutablePatternModifier::apply(FormattedStringBuilder& output, int32_t le UnicodeString(), 0, 0, - UNUM_FIELD_COUNT, + kUndefinedField, status); } CurrencySpacingEnabledModifier::applyCurrencySpacing( @@ -239,7 +239,7 @@ bool MutablePatternModifier::isStrong() const { return fStrong; } -bool MutablePatternModifier::containsField(UNumberFormatFields field) const { +bool MutablePatternModifier::containsField(Field field) const { (void)field; // This method is not currently used. UPRV_UNREACHABLE; diff --git a/icu4c/source/i18n/number_patternmodifier.h b/icu4c/source/i18n/number_patternmodifier.h index 4034c9a47ee..2597afefd53 100644 --- a/icu4c/source/i18n/number_patternmodifier.h +++ b/icu4c/source/i18n/number_patternmodifier.h @@ -180,7 +180,7 @@ class U_I18N_API MutablePatternModifier bool isStrong() const U_OVERRIDE; - bool containsField(UNumberFormatFields field) const U_OVERRIDE; + bool containsField(Field field) const U_OVERRIDE; void getParameters(Parameters& output) const U_OVERRIDE; diff --git a/icu4c/source/i18n/number_scientific.cpp b/icu4c/source/i18n/number_scientific.cpp index f3de7414125..edafa80cb9c 100644 --- a/icu4c/source/i18n/number_scientific.cpp +++ b/icu4c/source/i18n/number_scientific.cpp @@ -44,21 +44,21 @@ int32_t ScientificModifier::apply(FormattedStringBuilder &output, int32_t /*left i += output.insert( i, fHandler->fSymbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kExponentialSymbol), - UNUM_EXPONENT_SYMBOL_FIELD, + {UFIELD_CATEGORY_NUMBER, UNUM_EXPONENT_SYMBOL_FIELD}, status); if (fExponent < 0 && fHandler->fSettings.fExponentSignDisplay != UNUM_SIGN_NEVER) { i += output.insert( i, fHandler->fSymbols ->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kMinusSignSymbol), - UNUM_EXPONENT_SIGN_FIELD, + {UFIELD_CATEGORY_NUMBER, UNUM_EXPONENT_SIGN_FIELD}, status); } else if (fExponent >= 0 && fHandler->fSettings.fExponentSignDisplay == UNUM_SIGN_ALWAYS) { i += output.insert( i, fHandler->fSymbols ->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPlusSignSymbol), - UNUM_EXPONENT_SIGN_FIELD, + {UFIELD_CATEGORY_NUMBER, UNUM_EXPONENT_SIGN_FIELD}, status); } // Append the exponent digits (using a simple inline algorithm) @@ -70,7 +70,7 @@ int32_t ScientificModifier::apply(FormattedStringBuilder &output, int32_t /*left i - j, d, *fHandler->fSymbols, - UNUM_EXPONENT_FIELD, + {UFIELD_CATEGORY_NUMBER, UNUM_EXPONENT_FIELD}, status); } return i - rightIndex; @@ -93,7 +93,7 @@ bool ScientificModifier::isStrong() const { return true; } -bool ScientificModifier::containsField(UNumberFormatFields field) const { +bool ScientificModifier::containsField(Field field) const { (void)field; // This method is not used for inner modifiers. UPRV_UNREACHABLE; diff --git a/icu4c/source/i18n/number_scientific.h b/icu4c/source/i18n/number_scientific.h index 1c9ce1efa80..a55d5ed1d41 100644 --- a/icu4c/source/i18n/number_scientific.h +++ b/icu4c/source/i18n/number_scientific.h @@ -30,7 +30,7 @@ class U_I18N_API ScientificModifier : public UMemory, public Modifier { bool isStrong() const U_OVERRIDE; - bool containsField(UNumberFormatFields field) const U_OVERRIDE; + bool containsField(Field field) const U_OVERRIDE; void getParameters(Parameters& output) const U_OVERRIDE; diff --git a/icu4c/source/i18n/number_types.h b/icu4c/source/i18n/number_types.h index 641e082d361..5c2b8cf8b5d 100644 --- a/icu4c/source/i18n/number_types.h +++ b/icu4c/source/i18n/number_types.h @@ -194,7 +194,7 @@ class U_I18N_API Modifier { /** * Whether the modifier contains at least one occurrence of the given field. */ - virtual bool containsField(UNumberFormatFields field) const = 0; + virtual bool containsField(Field field) const = 0; /** * A fill-in for getParameters(). obj will always be set; if non-null, the other diff --git a/icu4c/source/i18n/number_utypes.h b/icu4c/source/i18n/number_utypes.h index 6dbe5bee68f..af25aa33988 100644 --- a/icu4c/source/i18n/number_utypes.h +++ b/icu4c/source/i18n/number_utypes.h @@ -33,7 +33,7 @@ const DecimalQuantity* validateUFormattedNumberToDecimalQuantity( */ class UFormattedNumberData : public FormattedValueStringBuilderImpl { public: - UFormattedNumberData() : FormattedValueStringBuilderImpl(0) {} + UFormattedNumberData() : FormattedValueStringBuilderImpl(kUndefinedField) {} virtual ~UFormattedNumberData(); DecimalQuantity quantity; diff --git a/icu4c/source/i18n/numrange_impl.cpp b/icu4c/source/i18n/numrange_impl.cpp index 7d732b31ec1..9fb3dee861f 100644 --- a/icu4c/source/i18n/numrange_impl.cpp +++ b/icu4c/source/i18n/numrange_impl.cpp @@ -210,7 +210,7 @@ NumberRangeFormatterImpl::NumberRangeFormatterImpl(const RangeMacroProps& macros getNumberRangeData(macros.locale.getName(), nsName, data, status); if (U_FAILURE(status)) { return; } fRangeFormatter = data.rangePattern; - fApproximatelyModifier = {data.approximatelyPattern, UNUM_FIELD_COUNT, false}; + fApproximatelyModifier = {data.approximatelyPattern, kUndefinedField, false}; // TODO: Get locale from PluralRules instead? fPluralRanges.initialize(macros.locale, status); @@ -368,7 +368,8 @@ void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data, // Only collapse if the modifier is a unit. // TODO: Make a better way to check for a unit? // TODO: Handle case where the modifier has both notation and unit (compact currency)? - if (!mm->containsField(UNUM_CURRENCY_FIELD) && !mm->containsField(UNUM_PERCENT_FIELD)) { + if (!mm->containsField({UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}) + && !mm->containsField({UFIELD_CATEGORY_NUMBER, UNUM_PERCENT_FIELD})) { collapseMiddle = false; } } else if (fCollapse == UNUM_RANGE_COLLAPSE_AUTO) { @@ -416,7 +417,7 @@ void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data, 0, &lengthPrefix, &lengthSuffix, - UNUM_FIELD_COUNT, + kUndefinedField, status); if (U_FAILURE(status)) { return; } lengthInfix = lengthRange - lengthPrefix - lengthSuffix; @@ -434,10 +435,10 @@ void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data, if (repeatInner || repeatMiddle || repeatOuter) { // Add spacing if there is not already spacing if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_1))) { - lengthInfix += string.insertCodePoint(UPRV_INDEX_1, u'\u0020', UNUM_FIELD_COUNT, status); + lengthInfix += string.insertCodePoint(UPRV_INDEX_1, u'\u0020', kUndefinedField, status); } if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_2 - 1))) { - lengthInfix += string.insertCodePoint(UPRV_INDEX_2, u'\u0020', UNUM_FIELD_COUNT, status); + lengthInfix += string.insertCodePoint(UPRV_INDEX_2, u'\u0020', kUndefinedField, status); } } } diff --git a/icu4c/source/i18n/numrange_impl.h b/icu4c/source/i18n/numrange_impl.h index f88e3009136..8f4c8a40ba2 100644 --- a/icu4c/source/i18n/numrange_impl.h +++ b/icu4c/source/i18n/numrange_impl.h @@ -31,7 +31,7 @@ namespace impl { */ class UFormattedNumberRangeData : public FormattedValueStringBuilderImpl { public: - UFormattedNumberRangeData() : FormattedValueStringBuilderImpl(0) {} + UFormattedNumberRangeData() : FormattedValueStringBuilderImpl(kUndefinedField) {} virtual ~UFormattedNumberRangeData(); DecimalQuantity quantity1; diff --git a/icu4c/source/i18n/quantityformatter.cpp b/icu4c/source/i18n/quantityformatter.cpp index e88b70fbd71..9c9aa99b670 100644 --- a/icu4c/source/i18n/quantityformatter.cpp +++ b/icu4c/source/i18n/quantityformatter.cpp @@ -204,7 +204,8 @@ void QuantityFormatter::formatAndSelect( if (U_FAILURE(status)) { return; } - output.append(result, UNUM_FIELD_COUNT, status); + // This code path is probably RBNF. Use the generic numeric field. + output.append(result, kGeneralNumericField, status); if (U_FAILURE(status)) { return; } diff --git a/icu4c/source/i18n/reldatefmt.cpp b/icu4c/source/i18n/reldatefmt.cpp index f66092ba948..8c6688c5b9a 100644 --- a/icu4c/source/i18n/reldatefmt.cpp +++ b/icu4c/source/i18n/reldatefmt.cpp @@ -728,11 +728,11 @@ const RelativeDateTimeCacheData *LocaleCacheKey::crea -static constexpr number::impl::Field kRDTNumericField - = StringBuilderFieldUtils::compress(); +static constexpr FormattedStringBuilder::Field kRDTNumericField + = {UFIELD_CATEGORY_RELATIVE_DATETIME, UDAT_REL_NUMERIC_FIELD}; -static constexpr number::impl::Field kRDTLiteralField - = StringBuilderFieldUtils::compress(); +static constexpr FormattedStringBuilder::Field kRDTLiteralField + = {UFIELD_CATEGORY_RELATIVE_DATETIME, UDAT_REL_LITERAL_FIELD}; class FormattedRelativeDateTimeData : public FormattedValueStringBuilderImpl { public: diff --git a/icu4c/source/test/intltest/formatted_string_builder_test.cpp b/icu4c/source/test/intltest/formatted_string_builder_test.cpp index 4ce63daec56..18f56d392dc 100644 --- a/icu4c/source/test/intltest/formatted_string_builder_test.cpp +++ b/icu4c/source/test/intltest/formatted_string_builder_test.cpp @@ -10,6 +10,7 @@ #include "intltest.h" #include "formatted_string_builder.h" #include "formattedval_impl.h" +#include "unicode/unum.h" class FormattedStringBuilderTest : public IntlTest { @@ -61,10 +62,9 @@ void FormattedStringBuilderTest::testInsertAppendUnicodeString() { FormattedStringBuilder sb3; sb1.append(str); - // Note: UNUM_FIELD_COUNT is like passing null in Java - sb2.append(str, UNUM_FIELD_COUNT, status); + sb2.append(str, kUndefinedField, status); assertSuccess("Appending to sb2", status); - sb3.append(str, UNUM_FIELD_COUNT, status); + sb3.append(str, kUndefinedField, status); assertSuccess("Appending to sb3", status); assertEqualsImpl(sb1, sb2); assertEqualsImpl(str, sb3); @@ -74,16 +74,16 @@ void FormattedStringBuilderTest::testInsertAppendUnicodeString() { sb4.append(u"😇"); sb4.append(str); sb4.append(u"xx"); - sb5.append(u"😇xx", UNUM_FIELD_COUNT, status); + sb5.append(u"😇xx", kUndefinedField, status); assertSuccess("Appending to sb5", status); - sb5.insert(2, str, UNUM_FIELD_COUNT, status); + sb5.insert(2, str, kUndefinedField, status); assertSuccess("Inserting into sb5", status); assertEqualsImpl(sb4, sb5); int start = uprv_min(1, str.length()); int end = uprv_min(10, str.length()); sb4.insert(3, str, start, end - start); // UnicodeString uses length instead of end index - sb5.insert(3, str, start, end, UNUM_FIELD_COUNT, status); + sb5.insert(3, str, start, end, kUndefinedField, status); assertSuccess("Inserting into sb5 again", status); assertEqualsImpl(sb4, sb5); @@ -124,8 +124,8 @@ void FormattedStringBuilderTest::testSplice() { sb1.append(cas.input); sb1.replace(cas.startThis, cas.endThis - cas.startThis, replacement); sb2.clear(); - sb2.append(cas.input, UNUM_FIELD_COUNT, status); - sb2.splice(cas.startThis, cas.endThis, replacement, 0, replacement.length(), UNUM_FIELD_COUNT, status); + sb2.append(cas.input, kUndefinedField, status); + sb2.splice(cas.startThis, cas.endThis, replacement, 0, replacement.length(), kUndefinedField, status); assertSuccess("Splicing into sb2 first time", status); assertEqualsImpl(sb1, sb2); @@ -137,8 +137,8 @@ void FormattedStringBuilderTest::testSplice() { sb1.append(cas.input); sb1.replace(cas.startThis, cas.endThis - cas.startThis, UnicodeString(replacement, 1, 2)); sb2.clear(); - sb2.append(cas.input, UNUM_FIELD_COUNT, status); - sb2.splice(cas.startThis, cas.endThis, replacement, 1, 3, UNUM_FIELD_COUNT, status); + sb2.append(cas.input, kUndefinedField, status); + sb2.splice(cas.startThis, cas.endThis, replacement, 1, 3, kUndefinedField, status); assertSuccess("Splicing into sb2 second time", status); assertEqualsImpl(sb1, sb2); } @@ -154,9 +154,9 @@ void FormattedStringBuilderTest::testInsertAppendCodePoint() { for (UChar32 cas : cases) { FormattedStringBuilder sb3; sb1.append(cas); - sb2.appendCodePoint(cas, UNUM_FIELD_COUNT, status); + sb2.appendCodePoint(cas, kUndefinedField, status); assertSuccess("Appending to sb2", status); - sb3.appendCodePoint(cas, UNUM_FIELD_COUNT, status); + sb3.appendCodePoint(cas, kUndefinedField, status); assertSuccess("Appending to sb3", status); assertEqualsImpl(sb1, sb2); assertEquals("Length of sb3", U16_LENGTH(cas), sb3.length()); @@ -170,9 +170,9 @@ void FormattedStringBuilderTest::testInsertAppendCodePoint() { FormattedStringBuilder sb5; sb4.append(u"😇xx"); sb4.insert(2, cas); - sb5.append(u"😇xx", UNUM_FIELD_COUNT, status); + sb5.append(u"😇xx", kUndefinedField, status); assertSuccess("Appending to sb5", status); - sb5.insertCodePoint(2, cas, UNUM_FIELD_COUNT, status); + sb5.insertCodePoint(2, cas, kUndefinedField, status); assertSuccess("Inserting into sb5", status); assertEqualsImpl(sb4, sb5); @@ -180,10 +180,10 @@ void FormattedStringBuilderTest::testInsertAppendCodePoint() { FormattedStringBuilder sb7; sb6.append(cas); if (U_IS_SUPPLEMENTARY(cas)) { - sb7.appendChar16(U16_TRAIL(cas), UNUM_FIELD_COUNT, status); - sb7.insertChar16(0, U16_LEAD(cas), UNUM_FIELD_COUNT, status); + sb7.appendChar16(U16_TRAIL(cas), kUndefinedField, status); + sb7.insertChar16(0, U16_LEAD(cas), kUndefinedField, status); } else { - sb7.insertChar16(0, cas, UNUM_FIELD_COUNT, status); + sb7.insertChar16(0, cas, kUndefinedField, status); } assertSuccess("Insert/append into sb7", status); assertEqualsImpl(sb6, sb7); @@ -194,33 +194,35 @@ void FormattedStringBuilderTest::testCopy() { UErrorCode status = U_ZERO_ERROR; for (UnicodeString str : EXAMPLE_STRINGS) { FormattedStringBuilder sb1; - sb1.append(str, UNUM_FIELD_COUNT, status); + sb1.append(str, kUndefinedField, status); assertSuccess("Appending to sb1 first time", status); FormattedStringBuilder sb2(sb1); assertTrue("Content should equal itself", sb1.contentEquals(sb2)); - sb1.append("12345", UNUM_FIELD_COUNT, status); + sb1.append("12345", kUndefinedField, status); assertSuccess("Appending to sb1 second time", status); assertFalse("Content should no longer equal itself", sb1.contentEquals(sb2)); } } void FormattedStringBuilderTest::testFields() { + typedef FormattedStringBuilder::Field Field; UErrorCode status = U_ZERO_ERROR; // Note: This is a C++11 for loop that calls the UnicodeString constructor on each iteration. for (UnicodeString str : EXAMPLE_STRINGS) { - FormattedValueStringBuilderImpl sbi(0); + FormattedValueStringBuilderImpl sbi(kUndefinedField); FormattedStringBuilder& sb = sbi.getStringRef(); - sb.append(str, UNUM_FIELD_COUNT, status); + sb.append(str, kUndefinedField, status); assertSuccess("Appending to sb", status); - sb.append(str, UNUM_CURRENCY_FIELD, status); + sb.append(str, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status); assertSuccess("Appending to sb", status); assertEquals("Reference string copied twice", str.length() * 2, sb.length()); for (int32_t i = 0; i < str.length(); i++) { assertEquals("Null field first", - (FormattedStringBuilder::Field) UNUM_FIELD_COUNT, sb.fieldAt(i)); + kUndefinedField.bits, sb.fieldAt(i).bits); assertEquals("Currency field second", - (FormattedStringBuilder::Field) UNUM_CURRENCY_FIELD, sb.fieldAt(i + str.length())); + Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD).bits, + sb.fieldAt(i + str.length()).bits); } // Very basic FieldPosition test. More robust tests happen in NumberFormatTest. @@ -232,10 +234,12 @@ void FormattedStringBuilderTest::testFields() { assertEquals("Currency end position", str.length() * 2, fp.getEndIndex()); if (str.length() > 0) { - sb.insertCodePoint(2, 100, UNUM_INTEGER_FIELD, status); + sb.insertCodePoint(2, 100, {UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD}, status); assertSuccess("Inserting code point into sb", status); assertEquals("New length", str.length() * 2 + 1, sb.length()); - assertEquals("Integer field", (FormattedStringBuilder::Field) UNUM_INTEGER_FIELD, sb.fieldAt(2)); + assertEquals("Integer field", + Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD).bits, + sb.fieldAt(2).bits); } FormattedStringBuilder old(sb); @@ -245,13 +249,14 @@ void FormattedStringBuilderTest::testFields() { int32_t numCurr = 0; int32_t numInt = 0; for (int32_t i = 0; i < sb.length(); i++) { - FormattedStringBuilder::Field field = sb.fieldAt(i); - assertEquals("Field should equal location in old", old.fieldAt(i % old.length()), field); - if (field == UNUM_FIELD_COUNT) { + auto field = sb.fieldAt(i); + assertEquals("Field should equal location in old", + old.fieldAt(i % old.length()).bits, field.bits); + if (field == kUndefinedField) { numNull++; - } else if (field == UNUM_CURRENCY_FIELD) { + } else if (field == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) { numCurr++; - } else if (field == UNUM_INTEGER_FIELD) { + } else if (field == Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)) { numInt++; } else { errln("Encountered unknown field"); @@ -271,7 +276,7 @@ void FormattedStringBuilderTest::testUnlimitedCapacity() { UnicodeString message("Iteration #"); message += Int64ToUnicodeString(i); assertEquals(message, builder.length(), i); - builder.appendCodePoint(u'x', UNUM_FIELD_COUNT, status); + builder.appendCodePoint(u'x', kUndefinedField, status); assertSuccess(message, status); assertEquals(message, builder.length(), i + 1); } @@ -284,7 +289,7 @@ void FormattedStringBuilderTest::testCodePoints() { assertEquals("Last is -1 on empty string", -1, nsb.getLastCodePoint()); assertEquals("Length is 0 on empty string", 0, nsb.codePointCount()); - nsb.append(u"q", UNUM_FIELD_COUNT, status); + nsb.append(u"q", kUndefinedField, status); assertSuccess("Spot 1", status); assertEquals("First is q", u'q', nsb.getFirstCodePoint()); assertEquals("Last is q", u'q', nsb.getLastCodePoint()); @@ -293,7 +298,7 @@ void FormattedStringBuilderTest::testCodePoints() { assertEquals("Code point count is 1", 1, nsb.codePointCount()); // 🚀 is two char16s - nsb.append(u"🚀", UNUM_FIELD_COUNT, status); + nsb.append(u"🚀", kUndefinedField, status); assertSuccess("Spot 2" ,status); assertEquals("First is still q", u'q', nsb.getFirstCodePoint()); assertEquals("Last is space ship", 128640, nsb.getLastCodePoint()); diff --git a/icu4c/source/test/intltest/numbertest_affixutils.cpp b/icu4c/source/test/intltest/numbertest_affixutils.cpp index 7f29ad19903..499b5d0e090 100644 --- a/icu4c/source/test/intltest/numbertest_affixutils.cpp +++ b/icu4c/source/test/intltest/numbertest_affixutils.cpp @@ -222,7 +222,7 @@ void AffixUtilsTest::testUnescapeWithSymbolProvider() { UnicodeString input(cas[0]); UnicodeString expected(cas[1]); sb.clear(); - AffixUtils::unescape(input, sb, 0, provider, UNUM_FIELD_COUNT, status); + AffixUtils::unescape(input, sb, 0, provider, kUndefinedField, status); assertSuccess("Spot 1", status); assertEquals(input, expected, sb.toUnicodeString()); assertEquals(input, expected, sb.toTempUnicodeString()); @@ -230,9 +230,9 @@ void AffixUtilsTest::testUnescapeWithSymbolProvider() { // Test insertion position sb.clear(); - sb.append(u"abcdefg", UNUM_FIELD_COUNT, status); + sb.append(u"abcdefg", kUndefinedField, status); assertSuccess("Spot 2", status); - AffixUtils::unescape(u"-+%", sb, 4, provider, UNUM_FIELD_COUNT, status); + AffixUtils::unescape(u"-+%", sb, 4, provider, kUndefinedField, status); assertSuccess("Spot 3", status); assertEquals(u"Symbol provider into middle", u"abcd123efg", sb.toUnicodeString()); } @@ -240,7 +240,7 @@ void AffixUtilsTest::testUnescapeWithSymbolProvider() { UnicodeString AffixUtilsTest::unescapeWithDefaults(const SymbolProvider &defaultProvider, UnicodeString input, UErrorCode &status) { FormattedStringBuilder nsb; - int32_t length = AffixUtils::unescape(input, nsb, 0, defaultProvider, UNUM_FIELD_COUNT, status); + int32_t length = AffixUtils::unescape(input, nsb, 0, defaultProvider, kUndefinedField, status); assertEquals("Return value of unescape", nsb.length(), length); return nsb.toUnicodeString(); } diff --git a/icu4c/source/test/intltest/numbertest_modifiers.cpp b/icu4c/source/test/intltest/numbertest_modifiers.cpp index 90144ad1c64..5ffca12b70e 100644 --- a/icu4c/source/test/intltest/numbertest_modifiers.cpp +++ b/icu4c/source/test/intltest/numbertest_modifiers.cpp @@ -25,11 +25,11 @@ void ModifiersTest::runIndexedTest(int32_t index, UBool exec, const char *&name, void ModifiersTest::testConstantAffixModifier() { UErrorCode status = U_ZERO_ERROR; - ConstantAffixModifier mod0(u"", u"", UNUM_PERCENT_FIELD, true); + ConstantAffixModifier mod0(u"", u"", {UFIELD_CATEGORY_NUMBER, UNUM_PERCENT_FIELD}, true); assertModifierEquals(mod0, 0, true, u"|", u"n", status); assertSuccess("Spot 1", status); - ConstantAffixModifier mod1(u"a📻", u"b", UNUM_PERCENT_FIELD, true); + ConstantAffixModifier mod1(u"a📻", u"b", {UFIELD_CATEGORY_NUMBER, UNUM_PERCENT_FIELD}, true); assertModifierEquals(mod1, 3, true, u"a📻|b", u"%%%n%", status); assertSuccess("Spot 2", status); } @@ -42,8 +42,8 @@ void ModifiersTest::testConstantMultiFieldModifier() { assertModifierEquals(mod1, 0, true, u"|", u"n", status); assertSuccess("Spot 1", status); - prefix.append(u"a📻", UNUM_PERCENT_FIELD, status); - suffix.append(u"b", UNUM_CURRENCY_FIELD, status); + prefix.append(u"a📻", {UFIELD_CATEGORY_NUMBER, UNUM_PERCENT_FIELD}, status); + suffix.append(u"b", {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status); ConstantMultiFieldModifier mod2(prefix, suffix, false, true); assertModifierEquals(mod2, 3, true, u"a📻|b", u"%%%n$", status); assertSuccess("Spot 2", status); @@ -80,7 +80,7 @@ void ModifiersTest::testSimpleModifier() { const UnicodeString pattern(patterns[i]); SimpleFormatter compiledFormatter(pattern, 1, 1, status); assertSuccess("Spot 1", status); - SimpleModifier mod(compiledFormatter, UNUM_PERCENT_FIELD, false); + SimpleModifier mod(compiledFormatter, {UFIELD_CATEGORY_NUMBER, UNUM_PERCENT_FIELD}, false); assertModifierEquals( mod, prefixLens[i], false, expectedCharFields[i][0], expectedCharFields[i][1], status); assertSuccess("Spot 2", status); @@ -88,7 +88,7 @@ void ModifiersTest::testSimpleModifier() { // Test strange insertion positions for (int32_t j = 0; j < NUM_OUTPUTS; j++) { FormattedStringBuilder output; - output.append(outputs[j].baseString, UNUM_FIELD_COUNT, status); + output.append(outputs[j].baseString, kUndefinedField, status); mod.apply(output, outputs[j].leftIndex, outputs[j].rightIndex, status); UnicodeString expected = expecteds[j][i]; UnicodeString actual = output.toUnicodeString(); @@ -112,7 +112,7 @@ void ModifiersTest::testCurrencySpacingEnabledModifier() { assertModifierEquals(mod1, 0, true, u"|", u"n", status); assertSuccess("Spot 3", status); - prefix.append(u"USD", UNUM_CURRENCY_FIELD, status); + prefix.append(u"USD", {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status); assertSuccess("Spot 4", status); CurrencySpacingEnabledModifier mod2(prefix, suffix, false, true, symbols, status); assertSuccess("Spot 5", status); @@ -121,7 +121,7 @@ void ModifiersTest::testCurrencySpacingEnabledModifier() { // Test the default currency spacing rules FormattedStringBuilder sb; - sb.append("123", UNUM_INTEGER_FIELD, status); + sb.append("123", {UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD}, status); assertSuccess("Spot 7", status); FormattedStringBuilder sb1(sb); assertModifierEquals(mod2, sb1, 3, true, u"USD\u00A0123", u"$$$niii", status); @@ -129,7 +129,7 @@ void ModifiersTest::testCurrencySpacingEnabledModifier() { // Compare with the unsafe code path FormattedStringBuilder sb2(sb); - sb2.insert(0, "USD", UNUM_CURRENCY_FIELD, status); + sb2.insert(0, "USD", {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status); assertSuccess("Spot 9", status); CurrencySpacingEnabledModifier::applyCurrencySpacing(sb2, 0, 3, 6, 0, symbols, status); assertSuccess("Spot 10", status); @@ -138,7 +138,7 @@ void ModifiersTest::testCurrencySpacingEnabledModifier() { // Test custom patterns // The following line means that the last char of the number should be a | (rather than a digit) symbols.setPatternForCurrencySpacing(UNUM_CURRENCY_SURROUNDING_MATCH, true, u"[|]"); - suffix.append("XYZ", UNUM_CURRENCY_FIELD, status); + suffix.append("XYZ", {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status); assertSuccess("Spot 11", status); CurrencySpacingEnabledModifier mod3(prefix, suffix, false, true, symbols, status); assertSuccess("Spot 12", status); @@ -150,7 +150,7 @@ void ModifiersTest::assertModifierEquals(const Modifier &mod, int32_t expectedPr bool expectedStrong, UnicodeString expectedChars, UnicodeString expectedFields, UErrorCode &status) { FormattedStringBuilder sb; - sb.appendCodePoint('|', UNUM_FIELD_COUNT, status); + sb.appendCodePoint('|', kUndefinedField, status); assertModifierEquals( mod, sb, expectedPrefixLength, expectedStrong, expectedChars, expectedFields, status); diff --git a/icu4c/source/test/intltest/numbertest_patternmodifier.cpp b/icu4c/source/test/intltest/numbertest_patternmodifier.cpp index 1d1e634cd33..42d3a6f7cf4 100644 --- a/icu4c/source/test/intltest/numbertest_patternmodifier.cpp +++ b/icu4c/source/test/intltest/numbertest_patternmodifier.cpp @@ -26,7 +26,7 @@ void PatternModifierTest::testBasic() { ParsedPatternInfo patternInfo; PatternParser::parseToPatternInfo(u"a0b", patternInfo, status); assertSuccess("Spot 1", status); - mod.setPatternInfo(&patternInfo, UNUM_FIELD_COUNT); + mod.setPatternInfo(&patternInfo, kUndefinedField); mod.setPatternAttributes(UNUM_SIGN_AUTO, false); DecimalFormatSymbols symbols(Locale::getEnglish(), status); CurrencySymbols currencySymbols({u"USD", status}, "en", status); @@ -61,7 +61,7 @@ void PatternModifierTest::testBasic() { ParsedPatternInfo patternInfo2; PatternParser::parseToPatternInfo(u"a0b;c-0d", patternInfo2, status); assertSuccess("Spot 4", status); - mod.setPatternInfo(&patternInfo2, UNUM_FIELD_COUNT); + mod.setPatternInfo(&patternInfo2, kUndefinedField); mod.setPatternAttributes(UNUM_SIGN_AUTO, false); mod.setNumberProperties(SIGNUM_POS, StandardPlural::Form::COUNT); assertEquals("Pattern a0b;c-0d", u"a", getPrefix(mod, status)); @@ -93,7 +93,7 @@ void PatternModifierTest::testPatternWithNoPlaceholder() { ParsedPatternInfo patternInfo; PatternParser::parseToPatternInfo(u"abc", patternInfo, status); assertSuccess("Spot 1", status); - mod.setPatternInfo(&patternInfo, UNUM_FIELD_COUNT); + mod.setPatternInfo(&patternInfo, kUndefinedField); mod.setPatternAttributes(UNUM_SIGN_AUTO, false); DecimalFormatSymbols symbols(Locale::getEnglish(), status); CurrencySymbols currencySymbols({u"USD", status}, "en", status); @@ -105,7 +105,7 @@ void PatternModifierTest::testPatternWithNoPlaceholder() { // Unsafe Code Path FormattedStringBuilder nsb; - nsb.append(u"x123y", UNUM_FIELD_COUNT, status); + nsb.append(u"x123y", kUndefinedField, status); assertSuccess("Spot 3", status); mod.apply(nsb, 1, 4, status); assertSuccess("Spot 4", status); @@ -113,7 +113,7 @@ void PatternModifierTest::testPatternWithNoPlaceholder() { // Safe Code Path nsb.clear(); - nsb.append(u"x123y", UNUM_FIELD_COUNT, status); + nsb.append(u"x123y", kUndefinedField, status); assertSuccess("Spot 5", status); MicroProps micros; LocalPointer imod(mod.createImmutable(status), status); @@ -136,7 +136,7 @@ void PatternModifierTest::testMutableEqualsImmutable() { ParsedPatternInfo patternInfo; PatternParser::parseToPatternInfo("a0b;c-0d", patternInfo, status); assertSuccess("Spot 1", status); - mod.setPatternInfo(&patternInfo, UNUM_FIELD_COUNT); + mod.setPatternInfo(&patternInfo, kUndefinedField); mod.setPatternAttributes(UNUM_SIGN_AUTO, false); DecimalFormatSymbols symbols(Locale::getEnglish(), status); CurrencySymbols currencySymbols({u"USD", status}, "en", status); From 4cd5a81ad22b7ac180b1b81ca0c33c8b2f0dba27 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Mon, 13 Jan 2020 21:52:52 +0100 Subject: [PATCH 03/15] Intermediate work on new unit identifiers --- icu4c/source/i18n/measunit.cpp | 56 +++- icu4c/source/i18n/unicode/measunit.h | 359 ++++++++++++++++++++- icu4c/source/test/intltest/measfmttest.cpp | 81 +++++ 3 files changed, 478 insertions(+), 18 deletions(-) diff --git a/icu4c/source/i18n/measunit.cpp b/icu4c/source/i18n/measunit.cpp index 66ddd0a1d54..0886b23caf7 100644 --- a/icu4c/source/i18n/measunit.cpp +++ b/icu4c/source/i18n/measunit.cpp @@ -2006,21 +2006,54 @@ static int32_t binarySearch( return -1; } -MeasureUnit::MeasureUnit() { - fCurrency[0] = 0; - fTypeId = kBaseTypeIdx; - fSubTypeId = kBaseSubTypeIdx; +MeasureUnit::MeasureUnit() : MeasureUnit(kBaseTypeIdx, kBaseSubTypeIdx) { } -MeasureUnit::MeasureUnit(const MeasureUnit &other) - : fTypeId(other.fTypeId), fSubTypeId(other.fSubTypeId) { +MeasureUnit::MeasureUnit(int32_t typeId, int32_t subTypeId) + : fId(nullptr), fSubTypeId(subTypeId), fTypeId(typeId) { + fCurrency[0] = 0; +} + +MeasureUnit::MeasureUnit(const MeasureUnit &other) { + *this = other; +} + +MeasureUnit::MeasureUnit(MeasureUnit &&other) noexcept + : fId(other.fId), + fSubTypeId(other.fSubTypeId), + fTypeId(other.fTypeId) { uprv_strcpy(fCurrency, other.fCurrency); + other.fId = nullptr; } MeasureUnit &MeasureUnit::operator=(const MeasureUnit &other) { if (this == &other) { return *this; } + if (other.fId) { + auto* id = static_cast(uprv_malloc(uprv_strlen(other.fId) + 1)); + if (!id) { + // Unrecoverable allocation error; set to the default unit + *this = MeasureUnit(); + return *this; + } + uprv_strcpy(id, other.fId); + fId = id; + } else { + fId = nullptr; + } + fTypeId = other.fTypeId; + fSubTypeId = other.fSubTypeId; + uprv_strcpy(fCurrency, other.fCurrency); + return *this; +} + +MeasureUnit &MeasureUnit::operator=(MeasureUnit &&other) noexcept { + if (this == &other) { + return *this; + } + fId = other.fId; + other.fId = nullptr; fTypeId = other.fTypeId; fSubTypeId = other.fSubTypeId; uprv_strcpy(fCurrency, other.fCurrency); @@ -2032,6 +2065,8 @@ MeasureUnit *MeasureUnit::clone() const { } MeasureUnit::~MeasureUnit() { + uprv_free(const_cast(fId)); + fId = nullptr; } const char *MeasureUnit::getType() const { @@ -2042,6 +2077,10 @@ const char *MeasureUnit::getSubtype() const { return fCurrency[0] == 0 ? gSubTypes[getOffset()] : fCurrency; } +const char *MeasureUnit::toString() const { + return fId ? fId : getSubtype(); +} + UBool MeasureUnit::operator==(const UObject& other) const { if (this == &other) { // Same object, equal return TRUE; @@ -2050,10 +2089,7 @@ UBool MeasureUnit::operator==(const UObject& other) const { return FALSE; } const MeasureUnit &rhs = static_cast(other); - return ( - fTypeId == rhs.fTypeId - && fSubTypeId == rhs.fSubTypeId - && uprv_strcmp(fCurrency, rhs.fCurrency) == 0); + return uprv_strcmp(toString(), rhs.toString()) == 0; } int32_t MeasureUnit::getIndex() const { diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index ebcf6914fdb..cca944cc8ff 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -29,6 +29,162 @@ U_NAMESPACE_BEGIN class StringEnumeration; +class MeasureUnitFields; + +/** + * Enumeration for SI prefixes, such as "kilo". + * + * @draft ICU 67 + */ +typedef enum UMeasureSIPrefix { + + /** + * SI prefix: yotta, 10^24. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_YOTTA = 24, + + /** + * SI prefix: zetta, 10^21. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_ZETTA = 21, + + /** + * SI prefix: exa, 10^18. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_EXA = 18, + + /** + * SI prefix: peta, 10^15. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_PETA = 15, + + /** + * SI prefix: tera, 10^12. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_TERA = 12, + + /** + * SI prefix: giga, 10^9. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_GIGA = 9, + + /** + * SI prefix: mega, 10^6. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_MEGA = 6, + + /** + * SI prefix: kilo, 10^3. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_KILO = 3, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_HECTO = 2, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_DEKA = 1, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_ONE = 0, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_DECI = -1, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_CENTI = -2, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_MILLI = -3, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_MICRO = -6, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_NANO = -9, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_PICO = -12, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_FEMTO = -15, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_ATTO = -18, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_ZEPTO = -21, + + /** + * SI prefix: FIXME, 10^FIXME. + * + * @draft ICU 67 + */ + UMEASURE_SI_PREFIX_YOCTO = -24 +} UMeasureSIPrefix; /** * A unit such as length, mass, volume, currency, etc. A unit is @@ -52,13 +208,39 @@ class U_I18N_API MeasureUnit: public UObject { * @stable ICU 3.0 */ MeasureUnit(const MeasureUnit &other); - + /** - * Assignment operator. + * Move constructor. + * @stable ICU 3.0 + */ + MeasureUnit(MeasureUnit &&other) noexcept; + + /** + * Construct a MeasureUnit from a CLDR Sequence Unit Identifier, defined in UTS 35. + * Validates and canonicalizes the identifier. + * + *
+     * MeasureUnit example = MeasureUnit::forIdentifier("furlong-per-nanosecond")
+     * 
+ * + * @param id The CLDR Sequence Unit Identifier + * @param status Set if the identifier is invalid. + * @draft ICU 67 + */ + static MeasureUnit forIdentifier(const char* identifier, UErrorCode& status); + + /** + * Copy assignment operator. * @stable ICU 3.0 */ MeasureUnit &operator=(const MeasureUnit &other); + /** + * Move assignment operator. + * @stable ICU 3.0 + */ + MeasureUnit &operator=(MeasureUnit &&other) noexcept; + /** * Returns a polymorphic clone of this object. The result will * have the same class as returned by getDynamicClassID(). @@ -90,16 +272,177 @@ class U_I18N_API MeasureUnit: public UObject { /** * Get the type. + * + * If the unit does not have a type, the empty string is returned. + * * @stable ICU 53 */ const char *getType() const; /** * Get the sub type. + * + * If the unit does not have a subtype, the empty string is returned. + * * @stable ICU 53 */ const char *getSubtype() const; + /** + * Get the CLDR Sequence Unit Identifier for this MeasureUnit, as defined in UTS 35. + * + * @return The string form of this unit, owned by this MeasureUnit. + * @draft ICU 67 + */ + const char* toString() const; + + /** + * Creates a MeasureUnit which is this MeasureUnit augmented with the specified SI prefix. + * For example, UMEASURE_SI_PREFIX_KILO for "kilo". + * + * There is sufficient locale data to format all standard SI prefixes. + * + * If the MeasureUnit is composed of multiple simple units, only the first simple unit is + * modified. For example, starting at "meter-kilogram-per-second", if you set the SI prefix + * to "centi", then you get "centimeter-kilogram-per-second". + * + * @param prefix The SI prefix, from UMeasureSIPrefix. + * @return A new MeasureUnit. + */ + MeasureUnit withSIPrefix(UMeasureSIPrefix prefix) const; + + /** + * Gets the current SI prefix of this MeasureUnit. For example, if the unit has the SI prefix + * "kilo", then UMEASURE_SI_PREFIX_KILO is returned. + * + * If the MeasureUnit is composed of multiple simple units, the SI prefix of the first simple + * unit is returned. For example, from "centimeter-kilogram-per-second", the SI prefix + * UMEASURE_SI_PREFIX_CENTI will be returned. + * + * @return The SI prefix of the first simple unit, from UMeasureSIPrefix. + */ + UMeasureSIPrefix getSIPrefix() const; + + /** + * Creates a MeasureUnit which is this MeasureUnit augmented with the specified power. For + * example, if power is 2, the unit will be squared. + * + * If the MeasureUnit is composed of multiple simple units, only the first simple unit is + * modified. For example, starting at "meter-kilogram-per-second", if you set the power to 2, + * then you get "square-meter-kilogram-per-second". + * + * @param power The power. + * @return A new MeasureUnit. + */ + MeasureUnit withPower(int8_t power) const; + + /** + * Gets the power of this MeasureUnit. For example, if the unit is square, then 2 is returned. + * + * If the MeasureUnit is composed of multiple simple units, the power of the first simple unit + * is returned. For example, from "cubic-meter-per-square-second", 3 is returned. + * + * @return The power of the first simple unit. + */ + int8_t getPower() const; + + /** + * Gets the reciprocal of the unit, with the numerator and denominator flipped. + * + * For example, if the receiver is "meter-per-second", the unit "second-per-meter" is returned. + * + * @return The reciprocal of the target unit. + */ + MeasureUnit reciprocal() const; + + /** + * Gets the product of this unit with another unit. This is a way to build units from + * constituent parts. + * + * The numerator and denominator are preserved through this operation. + * + * For example, if the receiver is "kilowatt" and the argument is "hour-per-day", then the + * unit "kilowatt-hour-per-day" is returned. + * + * @return The product of the target unit with the provided unit. + */ + MeasureUnit product(const MeasureUnit& other) const; + + /** + * Gets the number of constituent simple units. + * + * For example, if the receiver is "meter-per-square-second", then 2 is returned, since there + * are two simple units: "meter" and "second". + * + * @return The number of constituent units. + */ + size_t getSimpleUnitCount() const; + + /** + * Gets the constituent unit at the given index. + * + * For example, to loop over all simple units: + * + *
+     * MeasureUnit unit(u"meter-per-square-second");
+     * for (size_t i = 0; i < unit.getSimpleUnitCount(); i++) {
+     *     std::cout << unit.simpleUnitAt(i).toString() << std::endl;
+     * }
+     * 
+ * + * Expected output: meter, one-per-square-second + * + * @param index Zero-based index. If out of range, the dimensionless unit is returned. + * @return The constituent simple unit at the specified position. + */ + MeasureUnit simpleUnitAt(size_t index) const; + + /** + * Composes this unit with a super unit. + * + * A super unit, used for formatting only, should be a larger unit sharing the same dimension. + * For example, if the current unit is "inch", a super unit could be "foot", in order to + * render 71 inches as "5 feet, 11 inches". If the super unit is invalid, an error will occur + * during formatting. + * + * A unit can have multiple super units; for example, "second" could have both "minute" and + * "hour" as super units. + * + * Super units are ignored and left untouched in most other methods, such as withSIPrefix, + * withPower, and reciprocal. + * + * @param other The super unit to compose with the target unit. + * @return The composition of the given super unit with this unit. + */ + MeasureUnit withSuperUnit(const MeasureUnit& other) const; + + /** + * Gets the number of super units in the receiver. + * + * For example, "foot+inch" has one super unit. + * + * @return The number of super units. + */ + size_t getSuperUnitCount() const; + + /** + * Gets the super unit at the given index. + * + * For example, to loop over all super units: + * + *
+     * MeasureUnit unit(u"hour+minute+second");
+     * for (size_t i = 0; i < unit.getSuperUnitCount(); i++) {
+     *     std::cout << unit.superUnitAt(i).toCoreUnitIdentifier() << std::endl;
+     * }
+     * 
+ * + * Expected output: hour, minute + * + * @return The super unit at the specified position. + */ + MeasureUnit superUnitAt(size_t index) const; + /** * getAvailable gets all of the available units. * If there are too many units to fit into destCapacity then the @@ -3353,13 +3696,13 @@ class U_I18N_API MeasureUnit: public UObject { #endif /* U_HIDE_INTERNAL_API */ private: - int32_t fTypeId; - int32_t fSubTypeId; - char fCurrency[4]; - MeasureUnit(int32_t typeId, int32_t subTypeId) : fTypeId(typeId), fSubTypeId(subTypeId) { - fCurrency[0] = 0; - } + const char* fId; + char fCurrency[4]; + int16_t fSubTypeId; + int8_t fTypeId; + + MeasureUnit(int32_t typeId, int32_t subTypeId); void setTo(int32_t typeId, int32_t subTypeId); int32_t getOffset() const; static MeasureUnit *create(int typeId, int subTypeId, UErrorCode &status); diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 2b53f8bc024..94a499e73a4 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -79,6 +79,7 @@ private: void Test20332_PersonUnits(); void TestNumericTime(); void TestNumericTimeSomeSpecialFormats(); + void TestCompoundUnitOperations(); void verifyFormat( const char *description, const MeasureFormat &fmt, @@ -138,6 +139,11 @@ private: NumberFormat::EAlignmentFields field, int32_t start, int32_t end); + void verifyUnitParts( + const MeasureUnit& unit, + UMeasureSIPrefix siPrefix, + int8_t power, + const char* identifier); }; void MeasureFormatTest::runIndexedTest( @@ -3215,6 +3221,69 @@ void MeasureFormatTest::TestNumericTimeSomeSpecialFormats() { verifyFormat("Danish fhoursFminutes", fmtDa, fhoursFminutes, 2, "2.03,877"); } +void MeasureFormatTest::TestCompoundUnitOperations() { + IcuTestErrorCode status(*this, "TestCompoundUnitOperations"); + + MeasureUnit kilometer = MeasureUnit::getKilometer(); + MeasureUnit meter = kilometer.withSIPrefix(UMEASURE_SI_PREFIX_ONE); + MeasureUnit centimeter1 = kilometer.withSIPrefix(UMEASURE_SI_PREFIX_CENTI); + MeasureUnit centimeter2 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI); + + verifyUnitParts(kilometer, UMEASURE_SI_PREFIX_KILO, 0, "kilometer"); + verifyUnitParts(meter, UMEASURE_SI_PREFIX_ONE, 0, "meter"); + verifyUnitParts(centimeter1, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter"); + verifyUnitParts(centimeter2, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter"); + + assertTrue("centimeter equality", centimeter1 == centimeter2); + assertTrue("kilometer inequality", centimeter1 != kilometer); + + MeasureUnit squareMeter = meter.withPower(2); + MeasureUnit overCubicCentimeter = centimeter1.withPower(-3); + MeasureUnit quarticKilometer = kilometer.withPower(4); + MeasureUnit overQuarticKilometer1 = kilometer.withPower(-4); + + verifyUnitParts(squareMeter, UMEASURE_SI_PREFIX_ONE, 2, "square-meter"); + verifyUnitParts(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, 2, "one-per-cubic-centimeter"); + verifyUnitParts(quarticKilometer, UMEASURE_SI_PREFIX_ONE, 2, "p4-kilometer"); + verifyUnitParts(overQuarticKilometer1, UMEASURE_SI_PREFIX_ONE, 2, "one-per-p4-kilometer"); + + assertTrue("power inequality", quarticKilometer != overQuarticKilometer1); + + MeasureUnit overQuarticKilometer2 = overQuarticKilometer1.reciprocal(); + MeasureUnit overQuarticKilometer3 = kilometer.product(kilometer).product(kilometer) + .product(kilometer).reciprocal(); + + verifyUnitParts(overQuarticKilometer2, UMEASURE_SI_PREFIX_ONE, 2, "one-per-p4-kilometer"); + verifyUnitParts(overQuarticKilometer3, UMEASURE_SI_PREFIX_ONE, 2, "one-per-p4-kilometer"); + + assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); + assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); + + MeasureUnit kiloSquareSecond = MeasureUnit::getSecond() + .withPower(2).withSIPrefix(UMEASURE_SI_PREFIX_KILO); + MeasureUnit meterSecond = meter.product(kiloSquareSecond); + MeasureUnit cubicMeterSecond1 = meter.withPower(3).product(kiloSquareSecond); + MeasureUnit cubicMeterSecond2 = meterSecond.withPower(3); + MeasureUnit centimeterSecond1 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI).product(kiloSquareSecond); + MeasureUnit centimeterSecond2 = meterSecond.withSIPrefix(UMEASURE_SI_PREFIX_CENTI); + MeasureUnit secondCubicMeter = kiloSquareSecond.product(meter.withPower(3)); + MeasureUnit secondCentimeter = kiloSquareSecond.product(meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI)); + + verifyUnitParts(kiloSquareSecond, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); + verifyUnitParts(meterSecond, UMEASURE_SI_PREFIX_ONE, 0, "meter-square-kilosecond"); + verifyUnitParts(cubicMeterSecond1, UMEASURE_SI_PREFIX_ONE, 2, "cubic-meter-square-kilosecond"); + verifyUnitParts(cubicMeterSecond2, UMEASURE_SI_PREFIX_ONE, 2, "cubic-meter-square-kilosecond"); + verifyUnitParts(centimeterSecond1, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter-square-kilosecond"); + verifyUnitParts(centimeterSecond2, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter-square-kilosecond"); + verifyUnitParts(secondCubicMeter, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond-cubic-meter"); + verifyUnitParts(secondCentimeter, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond-centimeter"); + + assertTrue("multipart power equality", cubicMeterSecond1 == cubicMeterSecond2); + assertTrue("multipart SI prefix equality", centimeterSecond1 == centimeterSecond2); + assertTrue("order matters inequality", cubicMeterSecond1 != secondCubicMeter); + assertTrue("additional simple units inequality", secondCubicMeter != secondCentimeter); +} + void MeasureFormatTest::verifyFieldPosition( const char *description, @@ -3286,6 +3355,18 @@ void MeasureFormatTest::verifyFormat( } } +void MeasureFormatTest::verifyUnitParts( + const MeasureUnit& unit, + UMeasureSIPrefix siPrefix, + int8_t power, + const char* identifier) { + IcuTestErrorCode status(*this, "verifyUnitParts"); + assertEquals(UnicodeString(identifier) + ": SI prefix", siPrefix, unit.getSIPrefix()); + assertEquals(UnicodeString(identifier) + ": Power", power, unit.getPower()); + assertEquals(UnicodeString(identifier) + ": Identifier", identifier, unit.toString()); + assertTrue(UnicodeString(identifier) + ": Constructor", unit == MeasureUnit::forIdentifier(identifier, status)); +} + extern IntlTest *createMeasureFormatTest() { return new MeasureFormatTest(); } From c787452cfecd35e304c1a826e863505f0b2dddf3 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Tue, 14 Jan 2020 15:12:27 +0100 Subject: [PATCH 04/15] Removing fCurrency to always use fId --- icu4c/source/i18n/measunit.cpp | 55 +++++++++++++++++-------- icu4c/source/i18n/unicode/measunit.h | 6 ++- icu4c/source/test/intltest/numfmtst.cpp | 15 ++++++- 3 files changed, 55 insertions(+), 21 deletions(-) diff --git a/icu4c/source/i18n/measunit.cpp b/icu4c/source/i18n/measunit.cpp index 0886b23caf7..43811a5331d 100644 --- a/icu4c/source/i18n/measunit.cpp +++ b/icu4c/source/i18n/measunit.cpp @@ -2011,10 +2011,10 @@ MeasureUnit::MeasureUnit() : MeasureUnit(kBaseTypeIdx, kBaseSubTypeIdx) { MeasureUnit::MeasureUnit(int32_t typeId, int32_t subTypeId) : fId(nullptr), fSubTypeId(subTypeId), fTypeId(typeId) { - fCurrency[0] = 0; } -MeasureUnit::MeasureUnit(const MeasureUnit &other) { +MeasureUnit::MeasureUnit(const MeasureUnit &other) + : fId(nullptr) { *this = other; } @@ -2022,7 +2022,6 @@ MeasureUnit::MeasureUnit(MeasureUnit &&other) noexcept : fId(other.fId), fSubTypeId(other.fSubTypeId), fTypeId(other.fTypeId) { - uprv_strcpy(fCurrency, other.fCurrency); other.fId = nullptr; } @@ -2030,21 +2029,19 @@ MeasureUnit &MeasureUnit::operator=(const MeasureUnit &other) { if (this == &other) { return *this; } + uprv_free(fId); if (other.fId) { - auto* id = static_cast(uprv_malloc(uprv_strlen(other.fId) + 1)); - if (!id) { + fId = uprv_strdup(other.fId); + if (!fId) { // Unrecoverable allocation error; set to the default unit *this = MeasureUnit(); return *this; } - uprv_strcpy(id, other.fId); - fId = id; } else { fId = nullptr; } fTypeId = other.fTypeId; fSubTypeId = other.fSubTypeId; - uprv_strcpy(fCurrency, other.fCurrency); return *this; } @@ -2052,11 +2049,11 @@ MeasureUnit &MeasureUnit::operator=(MeasureUnit &&other) noexcept { if (this == &other) { return *this; } + uprv_free(fId); fId = other.fId; other.fId = nullptr; fTypeId = other.fTypeId; fSubTypeId = other.fSubTypeId; - uprv_strcpy(fCurrency, other.fCurrency); return *this; } @@ -2065,20 +2062,28 @@ MeasureUnit *MeasureUnit::clone() const { } MeasureUnit::~MeasureUnit() { - uprv_free(const_cast(fId)); + uprv_free(fId); fId = nullptr; } const char *MeasureUnit::getType() const { + // We have a type & subtype only if fTypeId is present. + if (fTypeId == -1) { + return ""; + } return gTypes[fTypeId]; } const char *MeasureUnit::getSubtype() const { - return fCurrency[0] == 0 ? gSubTypes[getOffset()] : fCurrency; + // We have a type & subtype only if fTypeId is present. + if (fTypeId == -1) { + return ""; + } + return toString(); } const char *MeasureUnit::toString() const { - return fId ? fId : getSubtype(); + return fId ? fId : gSubTypes[getOffset()]; } UBool MeasureUnit::operator==(const UObject& other) const { @@ -2225,6 +2230,10 @@ MeasureUnit MeasureUnit::resolveUnitPerUnit( const MeasureUnit &unit, const MeasureUnit &perUnit, bool* isResolved) { int32_t unitOffset = unit.getOffset(); int32_t perUnitOffset = perUnit.getOffset(); + if (unitOffset == -1 || perUnitOffset == -1) { + *isResolved = false; + return MeasureUnit(); + } // binary search for (unitOffset, perUnitOffset) int32_t start = 0; @@ -2278,12 +2287,18 @@ void MeasureUnit::initCurrency(const char *isoCurrency) { fTypeId = result; result = binarySearch( gSubTypes, gOffsets[fTypeId], gOffsets[fTypeId + 1], isoCurrency); - if (result != -1) { - fSubTypeId = result - gOffsets[fTypeId]; - } else { - uprv_strncpy(fCurrency, isoCurrency, UPRV_LENGTHOF(fCurrency)); - fCurrency[3] = 0; + if (result == -1) { + fId = uprv_strdup(isoCurrency); + if (fId) { + fSubTypeId = -1; + return; + } + // malloc error: fall back to the undefined currency + result = binarySearch( + gSubTypes, gOffsets[fTypeId], gOffsets[fTypeId + 1], "XXX"); + U_ASSERT(result != -1); } + fSubTypeId = result - gOffsets[fTypeId]; } void MeasureUnit::initNoUnit(const char *subtype) { @@ -2298,10 +2313,14 @@ void MeasureUnit::initNoUnit(const char *subtype) { void MeasureUnit::setTo(int32_t typeId, int32_t subTypeId) { fTypeId = typeId; fSubTypeId = subTypeId; - fCurrency[0] = 0; + uprv_free(fId); + fId = nullptr; } int32_t MeasureUnit::getOffset() const { + if (fTypeId < 0 || fSubTypeId < 0) { + return -1; + } return gOffsets[fTypeId] + fSubTypeId; } diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index cca944cc8ff..16790f8a3d9 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -3697,8 +3697,10 @@ class U_I18N_API MeasureUnit: public UObject { private: - const char* fId; - char fCurrency[4]; + // If non-null, fId is owned by the MeasureUnit. + char* fId; + + // These two ints are indices into static string lists in measunit.cpp int16_t fSubTypeId; int8_t fTypeId; diff --git a/icu4c/source/test/intltest/numfmtst.cpp b/icu4c/source/test/intltest/numfmtst.cpp index 8b0f7e0a77b..b1b032a05a3 100644 --- a/icu4c/source/test/intltest/numfmtst.cpp +++ b/icu4c/source/test/intltest/numfmtst.cpp @@ -2164,6 +2164,8 @@ void NumberFormatTest::TestCurrencyUnit(void){ static const UChar BAD2[] = u"??A"; static const UChar XXX[] = u"XXX"; static const char XXX8[] = "XXX"; + static const UChar XYZ[] = u"XYZ"; + static const char XYZ8[] = "XYZ"; static const UChar INV[] = u"{$%"; static const char INV8[] = "{$%"; static const UChar ZZZ[] = u"zz"; @@ -2180,10 +2182,16 @@ void NumberFormatTest::TestCurrencyUnit(void){ CurrencyUnit cu(USD, ec); assertSuccess("CurrencyUnit", ec); - assertEquals("getISOCurrency()", USD, cu.getISOCurrency()); assertEquals("getSubtype()", USD8, cu.getSubtype()); + // Test XYZ, a valid but non-standard currency. + // Note: Country code XY is private-use, so XYZ should remain unallocated. + CurrencyUnit extended(XYZ, ec); + assertSuccess("non-standard", ec); + assertEquals("non-standard", XYZ, extended.getISOCurrency()); + assertEquals("non-standard", XYZ8, extended.getSubtype()); + CurrencyUnit inv(INV, ec); assertEquals("non-invariant", U_INVARIANT_CONVERSION_ERROR, ec); assertEquals("non-invariant", XXX, inv.getISOCurrency()); @@ -2257,15 +2265,20 @@ void NumberFormatTest::TestCurrencyUnit(void){ // Test slicing MeasureUnit sliced1 = cu; MeasureUnit sliced2 = cu; + MeasureUnit sliced3 = extended; assertEquals("Subtype after slicing 1", USD8, sliced1.getSubtype()); assertEquals("Subtype after slicing 2", USD8, sliced2.getSubtype()); + assertEquals("Subtype after slicing 3", XYZ8, sliced3.getSubtype()); CurrencyUnit restored1(sliced1, ec); CurrencyUnit restored2(sliced2, ec); + CurrencyUnit restored3(sliced3, ec); assertSuccess("Restoring from MeasureUnit", ec); assertEquals("Subtype after restoring 1", USD8, restored1.getSubtype()); assertEquals("Subtype after restoring 2", USD8, restored2.getSubtype()); + assertEquals("Subtype after restoring 3", XYZ8, restored3.getSubtype()); assertEquals("ISO Code after restoring 1", USD, restored1.getISOCurrency()); assertEquals("ISO Code after restoring 2", USD, restored2.getISOCurrency()); + assertEquals("ISO Code after restoring 3", XYZ, restored3.getISOCurrency()); // Test copy constructor failure LocalPointer meter(MeasureUnit::createMeter(ec)); From 9a6caa01afeb79180c8ce3bf1ad9f3dcbd20cd4d Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Tue, 14 Jan 2020 16:47:18 +0100 Subject: [PATCH 05/15] Adding measunit_extra.cpp with basic trie --- icu4c/source/i18n/Makefile.in | 3 +- icu4c/source/i18n/i18n.vcxproj | 1 + icu4c/source/i18n/i18n.vcxproj.filters | 3 + icu4c/source/i18n/i18n_uwp.vcxproj | 1 + icu4c/source/i18n/measunit_extra.cpp | 231 ++++++++++++++++++++ icu4c/source/i18n/ucln_in.h | 1 + icu4c/source/i18n/unicode/measunit.h | 1 - icu4c/source/test/depstest/dependencies.txt | 6 + 8 files changed, 245 insertions(+), 2 deletions(-) create mode 100644 icu4c/source/i18n/measunit_extra.cpp diff --git a/icu4c/source/i18n/Makefile.in b/icu4c/source/i18n/Makefile.in index f22c20eabca..f6f9860fb3a 100644 --- a/icu4c/source/i18n/Makefile.in +++ b/icu4c/source/i18n/Makefile.in @@ -114,7 +114,8 @@ numparse_symbols.o numparse_decimal.o numparse_scientific.o numparse_currency.o numparse_affixes.o numparse_compositions.o numparse_validators.o \ numrange_fluent.o numrange_impl.o \ erarules.o \ -formattedvalue.o formattedval_iterimpl.o formattedval_sbimpl.o formatted_string_builder.o +formattedvalue.o formattedval_iterimpl.o formattedval_sbimpl.o formatted_string_builder.o \ +measunit_extra.o ## Header files to install HEADERS = $(srcdir)/unicode/*.h diff --git a/icu4c/source/i18n/i18n.vcxproj b/icu4c/source/i18n/i18n.vcxproj index 5c6760d220e..f22fd2ff097 100644 --- a/icu4c/source/i18n/i18n.vcxproj +++ b/icu4c/source/i18n/i18n.vcxproj @@ -187,6 +187,7 @@ + diff --git a/icu4c/source/i18n/i18n.vcxproj.filters b/icu4c/source/i18n/i18n.vcxproj.filters index a1813fc0696..eade0d07354 100644 --- a/icu4c/source/i18n/i18n.vcxproj.filters +++ b/icu4c/source/i18n/i18n.vcxproj.filters @@ -204,6 +204,9 @@ formatting + + formatting + formatting diff --git a/icu4c/source/i18n/i18n_uwp.vcxproj b/icu4c/source/i18n/i18n_uwp.vcxproj index 989cef88fe5..2466ad95864 100644 --- a/icu4c/source/i18n/i18n_uwp.vcxproj +++ b/icu4c/source/i18n/i18n_uwp.vcxproj @@ -408,6 +408,7 @@ + diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp new file mode 100644 index 00000000000..d84e50af350 --- /dev/null +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -0,0 +1,231 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// Extra functions for MeasureUnit not needed for all clients. +// Separate .o file so that it can be removed for modularity. + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +// Allow implicit conversion from char16_t* to UnicodeString for this file: +// Helpful in toString methods and elsewhere. +#define UNISTR_FROM_STRING_EXPLICIT + +#include "cstring.h" +#include "ucln_in.h" +#include "umutex.h" +#include "unicode/errorcode.h" +#include "unicode/measunit.h" +#include "unicode/ucharstrie.h" +#include "unicode/ucharstriebuilder.h" + +U_NAMESPACE_BEGIN + + +namespace { + +// This is to ensure we only insert positive integers into the trie +constexpr int32_t kSIPrefixOffset = 64; + +constexpr int32_t kSyntaxPartOffset = 256; + +enum SyntaxPart { + SYNTAX_PART_PER = kSyntaxPartOffset, + SYNTAX_PART_SQUARE, + SYNTAX_PART_CUBIC, + SYNTAX_PART_P1, + SYNTAX_PART_P2, + SYNTAX_PART_P3, + SYNTAX_PART_P4, + SYNTAX_PART_P5, + SYNTAX_PART_P6, + SYNTAX_PART_P7, + SYNTAX_PART_P8, + SYNTAX_PART_P9, +}; + +constexpr int32_t kSimpleUnitOffset = 512; + +// FIXME: Get this list from data +const char16_t* gSimpleUnits[] = { + u"100kilometer", + u"acre", + u"ampere", + u"arc-minute", + u"arc-second", + u"astronomical-unit", + u"atmosphere", + u"bar", + u"barrel", + u"bit", + u"british-thermal-unit", + u"bushel", + u"byte", + u"calorie", + u"carat", + u"celsius", + u"century", + u"cup", + u"cup-metric", + u"dalton", + u"day", + u"day-person", + u"decade", + u"degree", + u"dot", // (as in "dot-per-inch") + u"dunam", + u"earth-mass", + u"electronvolt", + u"em", + u"fahrenheit", + u"fathom", + u"fluid-ounce", + u"fluid-ounce-imperial", + u"foodcalorie", + u"foot", + u"furlong", + u"g-force", + u"gallon", + u"gallon-imperial", + u"generic", // (i.e., "temperature-generic") + u"gram", + u"hectare", // (note: other "are" derivatives are uncommon) + u"hertz", + u"horsepower", + u"hour", + u"inch", + u"inch-hg", + u"joule", + u"karat", + u"kelvin", + u"knot", + u"light-year", + u"liter", + u"lux", + u"meter", + u"meter-of-mercury", // (not "millimeter-of-mercury") + u"metric-ton", + u"mile", + u"mile-scandinavian", + u"minute", + u"mole", + u"month", + u"month-person", + u"nautical-mile", + u"newton", + u"ohm", + u"ounce", + u"ounce-troy", + u"parsec", + u"pascal", + u"percent", + u"permille", + u"permillion", + u"permyriad", + u"pint", + u"pint-metric", + u"pixel", + u"point", + u"pound", + u"pound-force", + u"quart", + u"radian", + u"revolution", + u"second", + u"solar-luminosity", + u"solar-mass", + u"solar-radius", + u"stone", + u"tablespoon", + u"teaspoon", + u"therm-us", + u"ton", + u"volt", + u"watt", + u"week", + u"week-person", + u"yard", + u"year", + u"year-person", +}; + +icu::UInitOnce gUnitExtrasInitOnce = U_INITONCE_INITIALIZER; + +char16_t* kSerializedUnitExtrasStemTrie = nullptr; + +UBool U_CALLCONV cleanupUnitExtras() { + uprv_free(kSerializedUnitExtrasStemTrie); + kSerializedUnitExtrasStemTrie = nullptr; + gUnitExtrasInitOnce.reset(); + return TRUE; +} + +void U_CALLCONV initUnitExtras(UErrorCode& status) { + ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras); + + UCharsTrieBuilder b(status); + if (U_FAILURE(status)) { return; } + + // Add SI prefixes + b.add(u"yotta", kSIPrefixOffset + UMEASURE_SI_PREFIX_YOTTA, status); + b.add(u"zetta", kSIPrefixOffset + UMEASURE_SI_PREFIX_ZETTA, status); + b.add(u"exa", kSIPrefixOffset + UMEASURE_SI_PREFIX_EXA, status); + b.add(u"peta", kSIPrefixOffset + UMEASURE_SI_PREFIX_PETA, status); + b.add(u"tera", kSIPrefixOffset + UMEASURE_SI_PREFIX_TERA, status); + b.add(u"giga", kSIPrefixOffset + UMEASURE_SI_PREFIX_GIGA, status); + b.add(u"mega", kSIPrefixOffset + UMEASURE_SI_PREFIX_MEGA, status); + b.add(u"kilo", kSIPrefixOffset + UMEASURE_SI_PREFIX_KILO, status); + b.add(u"hecto", kSIPrefixOffset + UMEASURE_SI_PREFIX_HECTO, status); + b.add(u"deka", kSIPrefixOffset + UMEASURE_SI_PREFIX_DEKA, status); + b.add(u"deci", kSIPrefixOffset + UMEASURE_SI_PREFIX_DECI, status); + b.add(u"centi", kSIPrefixOffset + UMEASURE_SI_PREFIX_CENTI, status); + b.add(u"milli", kSIPrefixOffset + UMEASURE_SI_PREFIX_MILLI, status); + b.add(u"micro", kSIPrefixOffset + UMEASURE_SI_PREFIX_MICRO, status); + b.add(u"nano", kSIPrefixOffset + UMEASURE_SI_PREFIX_NANO, status); + b.add(u"pico", kSIPrefixOffset + UMEASURE_SI_PREFIX_PICO, status); + b.add(u"femto", kSIPrefixOffset + UMEASURE_SI_PREFIX_FEMTO, status); + b.add(u"atto", kSIPrefixOffset + UMEASURE_SI_PREFIX_ATTO, status); + b.add(u"zepto", kSIPrefixOffset + UMEASURE_SI_PREFIX_ZEPTO, status); + b.add(u"yocto", kSIPrefixOffset + UMEASURE_SI_PREFIX_YOCTO, status); + if (U_FAILURE(status)) { return; } + + // Add syntax parts (per, power prefixes) + b.add(u"-per-", SYNTAX_PART_PER, status); + b.add(u"square-", SYNTAX_PART_SQUARE, status); + b.add(u"cubic-", SYNTAX_PART_CUBIC, status); + b.add(u"p1", SYNTAX_PART_P1, status); + b.add(u"p2", SYNTAX_PART_P2, status); + b.add(u"p3", SYNTAX_PART_P3, status); + b.add(u"p4", SYNTAX_PART_P4, status); + b.add(u"p5", SYNTAX_PART_P5, status); + b.add(u"p6", SYNTAX_PART_P6, status); + b.add(u"p7", SYNTAX_PART_P7, status); + b.add(u"p8", SYNTAX_PART_P8, status); + b.add(u"p9", SYNTAX_PART_P9, status); + if (U_FAILURE(status)) { return; } + + // Add sanctioned simple units by offset + int32_t simpleUnitOffset = kSimpleUnitOffset; + for (auto simpleUnit : gSimpleUnits) { + b.add(simpleUnit, simpleUnitOffset++, status); + } + + // Build the CharsTrie + // TODO: Use SLOW or FAST here? + UnicodeString result; + b.buildUnicodeString(USTRINGTRIE_BUILD_FAST, result, status); + if (U_FAILURE(status)) { return; } + + // Copy the result into the global constant pointer + size_t numBytes = result.length() * sizeof(char16_t); + kSerializedUnitExtrasStemTrie = static_cast(uprv_malloc(numBytes)); + uprv_memcpy(kSerializedUnitExtrasStemTrie, result.getBuffer(), numBytes); +} + +} // namespace + + +U_NAMESPACE_END + +#endif /* !UNCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/ucln_in.h b/icu4c/source/i18n/ucln_in.h index 2f70a8500e1..765cdd559fb 100644 --- a/icu4c/source/i18n/ucln_in.h +++ b/icu4c/source/i18n/ucln_in.h @@ -26,6 +26,7 @@ as the functions are suppose to be called. It's usually best to have child dependencies called first. */ typedef enum ECleanupI18NType { UCLN_I18N_START = -1, + UCLN_I18N_UNIT_EXTRAS, UCLN_I18N_NUMBER_SKELETONS, UCLN_I18N_CURRENCY_SPACING, UCLN_I18N_SPOOF, diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index 16790f8a3d9..1c535540467 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -29,7 +29,6 @@ U_NAMESPACE_BEGIN class StringEnumeration; -class MeasureUnitFields; /** * Enumeration for SI prefixes, such as "kilo". diff --git a/icu4c/source/test/depstest/dependencies.txt b/icu4c/source/test/depstest/dependencies.txt index 1d726b6ea32..080beeff637 100644 --- a/icu4c/source/test/depstest/dependencies.txt +++ b/icu4c/source/test/depstest/dependencies.txt @@ -870,6 +870,7 @@ library: i18n listformatter formatting formattable_cnv regex regex_cnv translit double_conversion number_representation number_output numberformatter numberparser + units_extra universal_time_scale uclean_i18n @@ -1053,6 +1054,11 @@ group: sharedbreakiterator deps breakiterator +group: units_extra + measunit_extra.o + deps + units + group: units measunit.o currunit.o nounit.o deps From fb1129a188bb663737a747360c24a332a175f206 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Tue, 14 Jan 2020 23:25:36 +0100 Subject: [PATCH 06/15] Additional WIP --- icu4c/source/i18n/measunit.cpp | 4 + icu4c/source/i18n/measunit_extra.cpp | 374 +++++++++++++++++++++++---- icu4c/source/i18n/unicode/measunit.h | 1 + 3 files changed, 331 insertions(+), 48 deletions(-) diff --git a/icu4c/source/i18n/measunit.cpp b/icu4c/source/i18n/measunit.cpp index 43811a5331d..22f877a95b8 100644 --- a/icu4c/source/i18n/measunit.cpp +++ b/icu4c/source/i18n/measunit.cpp @@ -2025,6 +2025,10 @@ MeasureUnit::MeasureUnit(MeasureUnit &&other) noexcept other.fId = nullptr; } +MeasureUnit::MeasureUnit(char* idToAdopt) + : fId(idToAdopt), fSubTypeId(-1), fTypeId(-1) { +} + MeasureUnit &MeasureUnit::operator=(const MeasureUnit &other) { if (this == &other) { return *this; diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index d84e50af350..379587a8778 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -13,6 +13,7 @@ #define UNISTR_FROM_STRING_EXPLICIT #include "cstring.h" +#include "uassert.h" #include "ucln_in.h" #include "umutex.h" #include "unicode/errorcode.h" @@ -20,6 +21,8 @@ #include "unicode/ucharstrie.h" #include "unicode/ucharstriebuilder.h" +#include "cstr.h" + U_NAMESPACE_BEGIN @@ -28,27 +31,64 @@ namespace { // This is to ensure we only insert positive integers into the trie constexpr int32_t kSIPrefixOffset = 64; -constexpr int32_t kSyntaxPartOffset = 256; +constexpr int32_t kCompoundPartOffset = 128; -enum SyntaxPart { - SYNTAX_PART_PER = kSyntaxPartOffset, - SYNTAX_PART_SQUARE, - SYNTAX_PART_CUBIC, - SYNTAX_PART_P1, - SYNTAX_PART_P2, - SYNTAX_PART_P3, - SYNTAX_PART_P4, - SYNTAX_PART_P5, - SYNTAX_PART_P6, - SYNTAX_PART_P7, - SYNTAX_PART_P8, - SYNTAX_PART_P9, +enum CompoundPart { + COMPOUND_PART_PER = kCompoundPartOffset, + COMPOUND_PART_TIMES, + COMPOUND_PART_ONE_PER, + COMPOUND_PART_PLUS, +}; + +constexpr int32_t kPowerPartOffset = 256; + +enum PowerPart { + POWER_PART_P2 = kPowerPartOffset + 2, + POWER_PART_P3, + POWER_PART_P4, + POWER_PART_P5, + POWER_PART_P6, + POWER_PART_P7, + POWER_PART_P8, + POWER_PART_P9, + POWER_PART_P10, + POWER_PART_P11, + POWER_PART_P12, + POWER_PART_P13, + POWER_PART_P14, + POWER_PART_P15, }; constexpr int32_t kSimpleUnitOffset = 512; +const struct SIPrefixStrings { + const char* const string; + UMeasureSIPrefix value; +} gSIPrefixStrings[] = { + { "yotta", UMEASURE_SI_PREFIX_YOTTA }, + { "zetta", UMEASURE_SI_PREFIX_ZETTA }, + { "exa", UMEASURE_SI_PREFIX_EXA }, + { "peta", UMEASURE_SI_PREFIX_PETA }, + { "tera", UMEASURE_SI_PREFIX_TERA }, + { "giga", UMEASURE_SI_PREFIX_GIGA }, + { "mega", UMEASURE_SI_PREFIX_MEGA }, + { "kilo", UMEASURE_SI_PREFIX_KILO }, + { "hecto", UMEASURE_SI_PREFIX_HECTO }, + { "deka", UMEASURE_SI_PREFIX_DEKA }, + { "deci", UMEASURE_SI_PREFIX_DECI }, + { "centi", UMEASURE_SI_PREFIX_CENTI }, + { "milli", UMEASURE_SI_PREFIX_MILLI }, + { "micro", UMEASURE_SI_PREFIX_MICRO }, + { "nano", UMEASURE_SI_PREFIX_NANO }, + { "pico", UMEASURE_SI_PREFIX_PICO }, + { "femto", UMEASURE_SI_PREFIX_FEMTO }, + { "atto", UMEASURE_SI_PREFIX_ATTO }, + { "zepto", UMEASURE_SI_PREFIX_ZEPTO }, + { "yocto", UMEASURE_SI_PREFIX_YOCTO }, +}; + // FIXME: Get this list from data -const char16_t* gSimpleUnits[] = { +const char16_t* const gSimpleUnits[] = { u"100kilometer", u"acre", u"ampere", @@ -168,41 +208,33 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) { if (U_FAILURE(status)) { return; } // Add SI prefixes - b.add(u"yotta", kSIPrefixOffset + UMEASURE_SI_PREFIX_YOTTA, status); - b.add(u"zetta", kSIPrefixOffset + UMEASURE_SI_PREFIX_ZETTA, status); - b.add(u"exa", kSIPrefixOffset + UMEASURE_SI_PREFIX_EXA, status); - b.add(u"peta", kSIPrefixOffset + UMEASURE_SI_PREFIX_PETA, status); - b.add(u"tera", kSIPrefixOffset + UMEASURE_SI_PREFIX_TERA, status); - b.add(u"giga", kSIPrefixOffset + UMEASURE_SI_PREFIX_GIGA, status); - b.add(u"mega", kSIPrefixOffset + UMEASURE_SI_PREFIX_MEGA, status); - b.add(u"kilo", kSIPrefixOffset + UMEASURE_SI_PREFIX_KILO, status); - b.add(u"hecto", kSIPrefixOffset + UMEASURE_SI_PREFIX_HECTO, status); - b.add(u"deka", kSIPrefixOffset + UMEASURE_SI_PREFIX_DEKA, status); - b.add(u"deci", kSIPrefixOffset + UMEASURE_SI_PREFIX_DECI, status); - b.add(u"centi", kSIPrefixOffset + UMEASURE_SI_PREFIX_CENTI, status); - b.add(u"milli", kSIPrefixOffset + UMEASURE_SI_PREFIX_MILLI, status); - b.add(u"micro", kSIPrefixOffset + UMEASURE_SI_PREFIX_MICRO, status); - b.add(u"nano", kSIPrefixOffset + UMEASURE_SI_PREFIX_NANO, status); - b.add(u"pico", kSIPrefixOffset + UMEASURE_SI_PREFIX_PICO, status); - b.add(u"femto", kSIPrefixOffset + UMEASURE_SI_PREFIX_FEMTO, status); - b.add(u"atto", kSIPrefixOffset + UMEASURE_SI_PREFIX_ATTO, status); - b.add(u"zepto", kSIPrefixOffset + UMEASURE_SI_PREFIX_ZEPTO, status); - b.add(u"yocto", kSIPrefixOffset + UMEASURE_SI_PREFIX_YOCTO, status); + for (const auto& siPrefixInfo : gSIPrefixStrings) { + UnicodeString uSIPrefix(siPrefixInfo.string, -1, US_INV); + b.add(uSIPrefix, siPrefixInfo.value + kSIPrefixOffset, status); + } if (U_FAILURE(status)) { return; } - // Add syntax parts (per, power prefixes) - b.add(u"-per-", SYNTAX_PART_PER, status); - b.add(u"square-", SYNTAX_PART_SQUARE, status); - b.add(u"cubic-", SYNTAX_PART_CUBIC, status); - b.add(u"p1", SYNTAX_PART_P1, status); - b.add(u"p2", SYNTAX_PART_P2, status); - b.add(u"p3", SYNTAX_PART_P3, status); - b.add(u"p4", SYNTAX_PART_P4, status); - b.add(u"p5", SYNTAX_PART_P5, status); - b.add(u"p6", SYNTAX_PART_P6, status); - b.add(u"p7", SYNTAX_PART_P7, status); - b.add(u"p8", SYNTAX_PART_P8, status); - b.add(u"p9", SYNTAX_PART_P9, status); + // Add syntax parts (compound, power prefixes) + b.add(u"-per-", COMPOUND_PART_PER, status); + b.add(u"-", COMPOUND_PART_TIMES, status); + b.add(u"one-per-", COMPOUND_PART_ONE_PER, status); + b.add(u"+", COMPOUND_PART_PLUS, status); + b.add(u"square-", POWER_PART_P2, status); + b.add(u"cubic-", POWER_PART_P3, status); + b.add(u"p2-", POWER_PART_P2, status); + b.add(u"p3-", POWER_PART_P3, status); + b.add(u"p4-", POWER_PART_P4, status); + b.add(u"p5-", POWER_PART_P5, status); + b.add(u"p6-", POWER_PART_P6, status); + b.add(u"p7-", POWER_PART_P7, status); + b.add(u"p8-", POWER_PART_P8, status); + b.add(u"p9-", POWER_PART_P9, status); + b.add(u"p10-", POWER_PART_P10, status); + b.add(u"p11-", POWER_PART_P11, status); + b.add(u"p12-", POWER_PART_P12, status); + b.add(u"p13-", POWER_PART_P13, status); + b.add(u"p14-", POWER_PART_P14, status); + b.add(u"p15-", POWER_PART_P15, status); if (U_FAILURE(status)) { return; } // Add sanctioned simple units by offset @@ -223,9 +255,255 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) { uprv_memcpy(kSerializedUnitExtrasStemTrie, result.getBuffer(), numBytes); } +class UnitIdentifierParser { +public: + static UnitIdentifierParser from(StringPiece source, UErrorCode& status) { + umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status); + if (U_FAILURE(status)) { + return UnitIdentifierParser(); + } + return UnitIdentifierParser(source); + } + + int32_t nextToken(UErrorCode& status) { + fTrie.reset(); + int32_t match = -1; + int32_t previ = -1; + do { + fTrie.next(fSource.data()[fIndex++]); + if (fTrie.current() == USTRINGTRIE_NO_MATCH) { + break; + } else if (fTrie.current() == USTRINGTRIE_NO_VALUE) { + continue; + } else if (fTrie.current() == USTRINGTRIE_FINAL_VALUE) { + match = fTrie.getValue(); + previ = fIndex; + break; + } else if (fTrie.current() == USTRINGTRIE_INTERMEDIATE_VALUE) { + match = fTrie.getValue(); + previ = fIndex; + continue; + } else { + UPRV_UNREACHABLE; + } + } while (fIndex < fSource.length()); + + if (match < 0) { + // TODO: Make a new status code? + status = U_ILLEGAL_ARGUMENT_ERROR; + } else { + fIndex = previ; + } + return match; + } + + bool hasNext() const { + return fIndex < fSource.length(); + } + + int32_t currentIndex() const { + return fIndex; + } + +private: + int32_t fIndex = 0; + StringPiece fSource; + UCharsTrie fTrie; + + UnitIdentifierParser() : fSource(""), fTrie(u"") {} + + UnitIdentifierParser(StringPiece source) + : fSource(source), fTrie(kSerializedUnitExtrasStemTrie) {} +}; + } // namespace +MeasureUnit MeasureUnit::forIdentifier(const char* identifier, UErrorCode& status) { + UnitIdentifierParser parser = UnitIdentifierParser::from(identifier, status); + if (U_FAILURE(status)) { + // Unrecoverable error + return MeasureUnit(); + } + + while (parser.hasNext()) { + parser.nextToken(status); + if (U_FAILURE(status)) { + // Invalid syntax + return MeasureUnit(); + } + + // if (match < kCompoundPartOffset) { + // // SI Prefix + // auto prefix = static_cast(match - kSIPrefixOffset); + // } else if (match < kPowerPartOffset) { + // // Compound part + // const char* operation = (match == COMPOUND_PART_PER) ? "per" : "times/plus"; + // } else if (match < kSimpleUnitOffset) { + // // Power part + // int32_t power = match - kPowerPartOffset; + // } else { + // // Simple unit + // const char16_t* simpleUnit = gSimpleUnits[match - kSimpleUnitOffset]; + // } + } + + // Success + return MeasureUnit(uprv_strdup(identifier)); +} + +UMeasureSIPrefix MeasureUnit::getSIPrefix() const { + ErrorCode status; + const char* id = toString(); + UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); + if (status.isFailure()) { + // Unrecoverable error + return UMEASURE_SI_PREFIX_ONE; + } + + int32_t match = parser.nextToken(status); + if (status.isFailure()) { + // Invalid syntax + return UMEASURE_SI_PREFIX_ONE; + } + + if (match >= kPowerPartOffset && match < kSimpleUnitOffset) { + // Skip the power part + match = parser.nextToken(status); + if (status.isFailure()) { + // Invalid syntax + return UMEASURE_SI_PREFIX_ONE; + } + } + + if (match >= kCompoundPartOffset) { + // No SI prefix + return UMEASURE_SI_PREFIX_ONE; + } + + return static_cast(match - kSIPrefixOffset); +} + +MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix) const { + ErrorCode status; + const char* id = toString(); + UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); + if (status.isFailure()) { + // Unrecoverable error + return *this; + } + + int32_t match = parser.nextToken(status); + if (status.isFailure()) { + // Invalid syntax + return *this; + } + + CharString builder; + int32_t unitStart = 0; + if (match >= kPowerPartOffset && match < kSimpleUnitOffset) { + // Skip the power part + unitStart = parser.currentIndex(); + builder.append(id, unitStart, status); + match = parser.nextToken(status); + } + + // Append the new SI prefix + for (const auto& siPrefixInfo : gSIPrefixStrings) { + if (siPrefixInfo.value == prefix) { + builder.append(siPrefixInfo.string, status); + break; + } + } + + if (match < kCompoundPartOffset) { + // Remove the old SI prefix + unitStart = parser.currentIndex(); + } + builder.append(id + unitStart, status); + if (status.isFailure()) { + // Unrecoverable error + return *this; + } + + return MeasureUnit(builder.cloneData(status)); +} + +int8_t MeasureUnit::getPower() const { + ErrorCode status; + const char* id = toString(); + UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); + if (status.isFailure()) { + // Unrecoverable error + return 0; + } + + int32_t match = parser.nextToken(status); + if (status.isFailure()) { + // Invalid syntax + return 0; + } + + if (match < kPowerPartOffset || match >= kSimpleUnitOffset) { + // No power part + return 0; + } + + return static_cast(match - kPowerPartOffset); +} + +MeasureUnit MeasureUnit::withPower(int8_t power) const { + if (power < 0) { + // Don't know how to handle this yet + U_ASSERT(FALSE); + } + + ErrorCode status; + const char* id = toString(); + UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); + if (status.isFailure()) { + // Unrecoverable error + return *this; + } + + int32_t match = parser.nextToken(status); + if (status.isFailure()) { + // Invalid syntax + return *this; + } + + // Append the new power + CharString builder; + if (power == 2) { + builder.append("square-", status); + } else if (power == 3) { + builder.append("cubic-", status); + } else if (power < 10) { + builder.append('p', status); + builder.append(power + '0', status); + builder.append('-', status); + } else { + builder.append("p1", status); + builder.append('0' + (power % 10), status); + builder.append('-', status); + } + + if (match < kCompoundPartOffset) { + // Remove the old power + builder.append(id + parser.currentIndex(), status); + } else { + // Append the whole identifier + builder.append(id, status); + } + if (status.isFailure()) { + // Unrecoverable error + return *this; + } + + return MeasureUnit(builder.cloneData(status)); +} + + U_NAMESPACE_END #endif /* !UNCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index 1c535540467..466460b02a0 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -3704,6 +3704,7 @@ private: int8_t fTypeId; MeasureUnit(int32_t typeId, int32_t subTypeId); + MeasureUnit(char* idToAdopt); void setTo(int32_t typeId, int32_t subTypeId); int32_t getOffset() const; static MeasureUnit *create(int typeId, int subTypeId, UErrorCode &status); From 6161b508a872031d29c4613e747c6fd65c846cbe Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Wed, 15 Jan 2020 15:17:12 +0100 Subject: [PATCH 07/15] Next checkpoint --- icu4c/source/i18n/measunit_extra.cpp | 323 ++++++++++++++++----------- 1 file changed, 193 insertions(+), 130 deletions(-) diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index 379587a8778..86453302b1e 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -36,7 +36,6 @@ constexpr int32_t kCompoundPartOffset = 128; enum CompoundPart { COMPOUND_PART_PER = kCompoundPartOffset, COMPOUND_PART_TIMES, - COMPOUND_PART_ONE_PER, COMPOUND_PART_PLUS, }; @@ -217,7 +216,6 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) { // Add syntax parts (compound, power prefixes) b.add(u"-per-", COMPOUND_PART_PER, status); b.add(u"-", COMPOUND_PART_TIMES, status); - b.add(u"one-per-", COMPOUND_PART_ONE_PER, status); b.add(u"+", COMPOUND_PART_PLUS, status); b.add(u"square-", POWER_PART_P2, status); b.add(u"cubic-", POWER_PART_P3, status); @@ -255,6 +253,98 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) { uprv_memcpy(kSerializedUnitExtrasStemTrie, result.getBuffer(), numBytes); } +class Token { +public: + Token(int32_t match) : fMatch(match) {} + + enum Type { + TYPE_UNDEFINED, + TYPE_SI_PREFIX, + TYPE_COMPOUND_PART, + TYPE_POWER_PART, + TYPE_SIMPLE_UNIT, + }; + + Type getType() const { + if (fMatch <= 0) { + UPRV_UNREACHABLE; + } else if (fMatch < kCompoundPartOffset) { + return TYPE_SI_PREFIX; + } else if (fMatch < kPowerPartOffset) { + return TYPE_COMPOUND_PART; + } else if (fMatch < kSimpleUnitOffset) { + return TYPE_POWER_PART; + } else { + return TYPE_SIMPLE_UNIT; + } + } + + UMeasureSIPrefix getSIPrefix() const { + U_ASSERT(getType() == TYPE_SI_PREFIX); + return static_cast(fMatch - kSIPrefixOffset); + } + + int32_t getMatch() const { + return fMatch; + } + + int8_t getPower() const { + return static_cast(fMatch - kPowerPartOffset); + } + + int32_t getSimpleUnitOffset() const { + return fMatch - kSimpleUnitOffset; + } + +private: + int32_t fMatch; +}; + +struct PowerUnit { + int8_t power = 0; + bool plus = false; + UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; + StringPiece id; + + void toString(CharString& builder, UErrorCode& status) { + if (power == 0) { + // no-op + } else if (power == 1) { + UPRV_UNREACHABLE; + } else if (power == 2) { + builder.append("square-", status); + } else if (power == 3) { + builder.append("cubic-", status); + } else if (power < 10) { + builder.append('p', status); + builder.append(power + '0', status); + builder.append('-', status); + } else { + U_ASSERT(power <= 15); + builder.append("p1", status); + builder.append('0' + (power % 10), status); + builder.append('-', status); + } + if (U_FAILURE(status)) { + return; + } + + if (siPrefix != UMEASURE_SI_PREFIX_ONE) { + for (const auto& siPrefixInfo : gSIPrefixStrings) { + if (siPrefixInfo.value == siPrefix) { + builder.append(siPrefixInfo.string, status); + break; + } + } + } + if (U_FAILURE(status)) { + return; + } + + builder.append(id, status); + } +}; + class UnitIdentifierParser { public: static UnitIdentifierParser from(StringPiece source, UErrorCode& status) { @@ -265,7 +355,7 @@ public: return UnitIdentifierParser(source); } - int32_t nextToken(UErrorCode& status) { + Token nextToken(UErrorCode& status) { fTrie.reset(); int32_t match = -1; int32_t previ = -1; @@ -294,13 +384,101 @@ public: } else { fIndex = previ; } - return match; + return Token(match); } bool hasNext() const { return fIndex < fSource.length(); } + PowerUnit nextUnit(UErrorCode& status) { + PowerUnit retval; + if (U_FAILURE(status)) { + return retval; + } + + // state: + // 0 = no tokens seen yet (will accept power, SI prefix, or simple unit) + // 1 = power token seen (will not accept another power token) + // 2 = SI prefix token seen (will not accept a power or SI prefix token) + int32_t state = 0; + int32_t previ = fIndex; + + // Maybe read a compound part + if (!fExpectingUnit) { + Token token = nextToken(status); + if (U_FAILURE(status)) { + return retval; + } + if (token.getType() != Token::TYPE_COMPOUND_PART) { + goto fail; + } + switch (token.getMatch()) { + case COMPOUND_PART_PER: + if (fAfterPer) { + goto fail; + } + fAfterPer = true; + break; + + case COMPOUND_PART_TIMES: + break; + + case COMPOUND_PART_PLUS: + retval.plus = true; + fAfterPer = false; + break; + } + previ = fIndex; + } + + // Read a unit + while (hasNext()) { + Token token = nextToken(status); + if (U_FAILURE(status)) { + return retval; + } + + switch (token.getType()) { + case Token::TYPE_POWER_PART: + if (state > 0) { + goto fail; + } + retval.power = token.getPower(); + if (fAfterPer) { + retval.power *= -1; + } + previ = fIndex; + state = 1; + break; + + case Token::TYPE_SI_PREFIX: + if (state > 1) { + goto fail; + } + retval.siPrefix = token.getSIPrefix(); + previ = fIndex; + state = 2; + break; + + case Token::TYPE_SIMPLE_UNIT: + if (state > 2) { + goto fail; + } + retval.id = fSource.substr(previ, fIndex - previ); + return retval; + + default: + goto fail; + } + } + + fail: + // TODO: Make a new status code? + status = U_ILLEGAL_ARGUMENT_ERROR; + return retval; + } + int32_t currentIndex() const { return fIndex; } @@ -310,6 +488,9 @@ private: StringPiece fSource; UCharsTrie fTrie; + bool fAfterPer = false; + bool fExpectingUnit = true; + UnitIdentifierParser() : fSource(""), fTrie(u"") {} UnitIdentifierParser(StringPiece source) @@ -332,20 +513,6 @@ MeasureUnit MeasureUnit::forIdentifier(const char* identifier, UErrorCode& statu // Invalid syntax return MeasureUnit(); } - - // if (match < kCompoundPartOffset) { - // // SI Prefix - // auto prefix = static_cast(match - kSIPrefixOffset); - // } else if (match < kPowerPartOffset) { - // // Compound part - // const char* operation = (match == COMPOUND_PART_PER) ? "per" : "times/plus"; - // } else if (match < kSimpleUnitOffset) { - // // Power part - // int32_t power = match - kPowerPartOffset; - // } else { - // // Simple unit - // const char16_t* simpleUnit = gSimpleUnits[match - kSimpleUnitOffset]; - // } } // Success @@ -355,101 +522,28 @@ MeasureUnit MeasureUnit::forIdentifier(const char* identifier, UErrorCode& statu UMeasureSIPrefix MeasureUnit::getSIPrefix() const { ErrorCode status; const char* id = toString(); - UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); - if (status.isFailure()) { - // Unrecoverable error - return UMEASURE_SI_PREFIX_ONE; - } - - int32_t match = parser.nextToken(status); - if (status.isFailure()) { - // Invalid syntax - return UMEASURE_SI_PREFIX_ONE; - } - - if (match >= kPowerPartOffset && match < kSimpleUnitOffset) { - // Skip the power part - match = parser.nextToken(status); - if (status.isFailure()) { - // Invalid syntax - return UMEASURE_SI_PREFIX_ONE; - } - } - - if (match >= kCompoundPartOffset) { - // No SI prefix - return UMEASURE_SI_PREFIX_ONE; - } - - return static_cast(match - kSIPrefixOffset); + return UnitIdentifierParser::from(id, status).nextUnit(status).siPrefix; } MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix) const { ErrorCode status; const char* id = toString(); UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); + PowerUnit powerUnit = parser.nextUnit(status); if (status.isFailure()) { - // Unrecoverable error - return *this; - } - - int32_t match = parser.nextToken(status); - if (status.isFailure()) { - // Invalid syntax return *this; } + powerUnit.siPrefix = prefix; CharString builder; - int32_t unitStart = 0; - if (match >= kPowerPartOffset && match < kSimpleUnitOffset) { - // Skip the power part - unitStart = parser.currentIndex(); - builder.append(id, unitStart, status); - match = parser.nextToken(status); - } - - // Append the new SI prefix - for (const auto& siPrefixInfo : gSIPrefixStrings) { - if (siPrefixInfo.value == prefix) { - builder.append(siPrefixInfo.string, status); - break; - } - } - - if (match < kCompoundPartOffset) { - // Remove the old SI prefix - unitStart = parser.currentIndex(); - } - builder.append(id + unitStart, status); - if (status.isFailure()) { - // Unrecoverable error - return *this; - } - + powerUnit.toString(builder, status); return MeasureUnit(builder.cloneData(status)); } int8_t MeasureUnit::getPower() const { ErrorCode status; const char* id = toString(); - UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); - if (status.isFailure()) { - // Unrecoverable error - return 0; - } - - int32_t match = parser.nextToken(status); - if (status.isFailure()) { - // Invalid syntax - return 0; - } - - if (match < kPowerPartOffset || match >= kSimpleUnitOffset) { - // No power part - return 0; - } - - return static_cast(match - kPowerPartOffset); + return UnitIdentifierParser::from(id, status).nextUnit(status).power; } MeasureUnit MeasureUnit::withPower(int8_t power) const { @@ -461,45 +555,14 @@ MeasureUnit MeasureUnit::withPower(int8_t power) const { ErrorCode status; const char* id = toString(); UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); + PowerUnit powerUnit = parser.nextUnit(status); if (status.isFailure()) { - // Unrecoverable error return *this; } - int32_t match = parser.nextToken(status); - if (status.isFailure()) { - // Invalid syntax - return *this; - } - - // Append the new power + powerUnit.power = power; CharString builder; - if (power == 2) { - builder.append("square-", status); - } else if (power == 3) { - builder.append("cubic-", status); - } else if (power < 10) { - builder.append('p', status); - builder.append(power + '0', status); - builder.append('-', status); - } else { - builder.append("p1", status); - builder.append('0' + (power % 10), status); - builder.append('-', status); - } - - if (match < kCompoundPartOffset) { - // Remove the old power - builder.append(id + parser.currentIndex(), status); - } else { - // Append the whole identifier - builder.append(id, status); - } - if (status.isFailure()) { - // Unrecoverable error - return *this; - } - + powerUnit.toString(builder, status); return MeasureUnit(builder.cloneData(status)); } From 127ea0a0255841da4900779dbf17212aedb049f0 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Wed, 15 Jan 2020 16:08:53 +0100 Subject: [PATCH 08/15] Checkpoint --- icu4c/source/i18n/measunit.cpp | 6 +- icu4c/source/i18n/measunit_extra.cpp | 183 ++++++++++++--------- icu4c/source/i18n/unicode/measunit.h | 36 ++-- icu4c/source/test/intltest/measfmttest.cpp | 99 ++++++----- 4 files changed, 183 insertions(+), 141 deletions(-) diff --git a/icu4c/source/i18n/measunit.cpp b/icu4c/source/i18n/measunit.cpp index 22f877a95b8..c575c53745b 100644 --- a/icu4c/source/i18n/measunit.cpp +++ b/icu4c/source/i18n/measunit.cpp @@ -2083,10 +2083,10 @@ const char *MeasureUnit::getSubtype() const { if (fTypeId == -1) { return ""; } - return toString(); + return getIdentifier(); } -const char *MeasureUnit::toString() const { +const char *MeasureUnit::getIdentifier() const { return fId ? fId : gSubTypes[getOffset()]; } @@ -2098,7 +2098,7 @@ UBool MeasureUnit::operator==(const UObject& other) const { return FALSE; } const MeasureUnit &rhs = static_cast(other); - return uprv_strcmp(toString(), rhs.toString()) == 0; + return uprv_strcmp(getIdentifier(), rhs.getIdentifier()) == 0; } int32_t MeasureUnit::getIndex() const { diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index 86453302b1e..83257fbcf56 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -88,6 +88,7 @@ const struct SIPrefixStrings { // FIXME: Get this list from data const char16_t* const gSimpleUnits[] = { + u"one", // note: expected to be index 0 u"100kilometer", u"acre", u"ampere", @@ -262,6 +263,7 @@ public: TYPE_SI_PREFIX, TYPE_COMPOUND_PART, TYPE_POWER_PART, + TYPE_ONE, TYPE_SIMPLE_UNIT, }; @@ -274,6 +276,8 @@ public: return TYPE_COMPOUND_PART; } else if (fMatch < kSimpleUnitOffset) { return TYPE_POWER_PART; + } else if (fMatch == kSimpleUnitOffset) { + return TYPE_ONE; } else { return TYPE_SIMPLE_UNIT; } @@ -285,14 +289,17 @@ public: } int32_t getMatch() const { + U_ASSERT(getType() == TYPE_COMPOUND_PART); return fMatch; } int8_t getPower() const { + U_ASSERT(getType() == TYPE_POWER_PART); return static_cast(fMatch - kPowerPartOffset); } int32_t getSimpleUnitOffset() const { + U_ASSERT(getType() == TYPE_SIMPLE_UNIT); return fMatch - kSimpleUnitOffset; } @@ -302,11 +309,10 @@ private: struct PowerUnit { int8_t power = 0; - bool plus = false; UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; StringPiece id; - void toString(CharString& builder, UErrorCode& status) { + void appendTo(CharString& builder, UErrorCode& status) { if (power == 0) { // no-op } else if (power == 1) { @@ -348,50 +354,22 @@ struct PowerUnit { class UnitIdentifierParser { public: static UnitIdentifierParser from(StringPiece source, UErrorCode& status) { + if (U_FAILURE(status)) { + return UnitIdentifierParser(); + } umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status); if (U_FAILURE(status)) { return UnitIdentifierParser(); } return UnitIdentifierParser(source); } - - Token nextToken(UErrorCode& status) { - fTrie.reset(); - int32_t match = -1; - int32_t previ = -1; - do { - fTrie.next(fSource.data()[fIndex++]); - if (fTrie.current() == USTRINGTRIE_NO_MATCH) { - break; - } else if (fTrie.current() == USTRINGTRIE_NO_VALUE) { - continue; - } else if (fTrie.current() == USTRINGTRIE_FINAL_VALUE) { - match = fTrie.getValue(); - previ = fIndex; - break; - } else if (fTrie.current() == USTRINGTRIE_INTERMEDIATE_VALUE) { - match = fTrie.getValue(); - previ = fIndex; - continue; - } else { - UPRV_UNREACHABLE; - } - } while (fIndex < fSource.length()); - - if (match < 0) { - // TODO: Make a new status code? - status = U_ILLEGAL_ARGUMENT_ERROR; - } else { - fIndex = previ; - } - return Token(match); - } bool hasNext() const { return fIndex < fSource.length(); } - PowerUnit nextUnit(UErrorCode& status) { + PowerUnit nextUnit(bool& sawPlus, UErrorCode& status) { + sawPlus = false; PowerUnit retval; if (U_FAILURE(status)) { return retval; @@ -405,7 +383,7 @@ public: int32_t previ = fIndex; // Maybe read a compound part - if (!fExpectingUnit) { + if (fIndex != 0) { Token token = nextToken(status); if (U_FAILURE(status)) { return retval; @@ -425,7 +403,7 @@ public: break; case COMPOUND_PART_PLUS: - retval.plus = true; + sawPlus = true; fAfterPer = false; break; } @@ -461,10 +439,11 @@ public: state = 2; break; + case Token::TYPE_ONE: + // Skip "one" and go to the next unit + return nextUnit(sawPlus, status); + case Token::TYPE_SIMPLE_UNIT: - if (state > 2) { - goto fail; - } retval.id = fSource.substr(previ, fIndex - previ); return retval; @@ -473,14 +452,24 @@ public: } } - fail: - // TODO: Make a new status code? - status = U_ILLEGAL_ARGUMENT_ERROR; - return retval; + fail: + // TODO: Make a new status code? + status = U_ILLEGAL_ARGUMENT_ERROR; + return retval; } - int32_t currentIndex() const { - return fIndex; + PowerUnit getOnlyUnit(UErrorCode& status) { + bool sawPlus; + PowerUnit retval = nextUnit(sawPlus, status); + if (U_FAILURE(status)) { + return retval; + } + if (sawPlus || hasNext()) { + // Expected to find only one unit in the string + status = U_ILLEGAL_ARGUMENT_ERROR; + return retval; + } + return retval; } private: @@ -489,12 +478,43 @@ private: UCharsTrie fTrie; bool fAfterPer = false; - bool fExpectingUnit = true; UnitIdentifierParser() : fSource(""), fTrie(u"") {} UnitIdentifierParser(StringPiece source) : fSource(source), fTrie(kSerializedUnitExtrasStemTrie) {} + + Token nextToken(UErrorCode& status) { + fTrie.reset(); + int32_t match = -1; + int32_t previ = -1; + do { + fTrie.next(fSource.data()[fIndex++]); + if (fTrie.current() == USTRINGTRIE_NO_MATCH) { + break; + } else if (fTrie.current() == USTRINGTRIE_NO_VALUE) { + continue; + } else if (fTrie.current() == USTRINGTRIE_FINAL_VALUE) { + match = fTrie.getValue(); + previ = fIndex; + break; + } else if (fTrie.current() == USTRINGTRIE_INTERMEDIATE_VALUE) { + match = fTrie.getValue(); + previ = fIndex; + continue; + } else { + UPRV_UNREACHABLE; + } + } while (fIndex < fSource.length()); + + if (match < 0) { + // TODO: Make a new status code? + status = U_ILLEGAL_ARGUMENT_ERROR; + } else { + fIndex = previ; + } + return Token(match); + } }; } // namespace @@ -507,62 +527,71 @@ MeasureUnit MeasureUnit::forIdentifier(const char* identifier, UErrorCode& statu return MeasureUnit(); } + CharString builder; + bool afterPer = false; while (parser.hasNext()) { - parser.nextToken(status); + bool sawPlus; + PowerUnit powerUnit = parser.nextUnit(sawPlus, status); if (U_FAILURE(status)) { // Invalid syntax return MeasureUnit(); } + + if (sawPlus) { + builder.append('+', status); + afterPer = false; + } + if (powerUnit.power < 0 && !afterPer) { + if (builder.length() == 0 || sawPlus) { + builder.append("one", status); + } + builder.append("-per-", status); + powerUnit.power *= -1; + } + powerUnit.appendTo(builder, status); } // Success - return MeasureUnit(uprv_strdup(identifier)); + return MeasureUnit(builder.cloneData(status)); } -UMeasureSIPrefix MeasureUnit::getSIPrefix() const { - ErrorCode status; - const char* id = toString(); - return UnitIdentifierParser::from(id, status).nextUnit(status).siPrefix; +UMeasureSIPrefix MeasureUnit::getSIPrefix(UErrorCode& status) const { + const char* id = getIdentifier(); + return UnitIdentifierParser::from(id, status).getOnlyUnit(status).siPrefix; } -MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix) const { - ErrorCode status; - const char* id = toString(); - UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); - PowerUnit powerUnit = parser.nextUnit(status); - if (status.isFailure()) { +MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const { + const char* id = getIdentifier(); + PowerUnit powerUnit = UnitIdentifierParser::from(id, status).getOnlyUnit(status); + if (U_FAILURE(status)) { return *this; } powerUnit.siPrefix = prefix; CharString builder; - powerUnit.toString(builder, status); + powerUnit.appendTo(builder, status); return MeasureUnit(builder.cloneData(status)); } -int8_t MeasureUnit::getPower() const { - ErrorCode status; - const char* id = toString(); - return UnitIdentifierParser::from(id, status).nextUnit(status).power; +int8_t MeasureUnit::getPower(UErrorCode& status) const { + const char* id = getIdentifier(); + return UnitIdentifierParser::from(id, status).getOnlyUnit(status).power; } -MeasureUnit MeasureUnit::withPower(int8_t power) const { - if (power < 0) { - // Don't know how to handle this yet - U_ASSERT(FALSE); - } - - ErrorCode status; - const char* id = toString(); - UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); - PowerUnit powerUnit = parser.nextUnit(status); - if (status.isFailure()) { +MeasureUnit MeasureUnit::withPower(int8_t power, UErrorCode& status) const { + const char* id = getIdentifier(); + PowerUnit powerUnit = UnitIdentifierParser::from(id, status).getOnlyUnit(status); + if (U_FAILURE(status)) { return *this; } - powerUnit.power = power; CharString builder; - powerUnit.toString(builder, status); + if (power < 0) { + builder.append("one-per-", status); + power *= -1; + } + powerUnit.power = power; + powerUnit.appendTo(builder, status); return MeasureUnit(builder.cloneData(status)); } diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index 466460b02a0..8ed893190ab 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -293,7 +293,7 @@ class U_I18N_API MeasureUnit: public UObject { * @return The string form of this unit, owned by this MeasureUnit. * @draft ICU 67 */ - const char* toString() const; + const char* getIdentifier() const; /** * Creates a MeasureUnit which is this MeasureUnit augmented with the specified SI prefix. @@ -301,14 +301,14 @@ class U_I18N_API MeasureUnit: public UObject { * * There is sufficient locale data to format all standard SI prefixes. * - * If the MeasureUnit is composed of multiple simple units, only the first simple unit is - * modified. For example, starting at "meter-kilogram-per-second", if you set the SI prefix - * to "centi", then you get "centimeter-kilogram-per-second". + * This method only works if the MeasureUnit is composed of only one simple unit. An error + * will be set if called on a MeasureUnit containing multiple units. * * @param prefix The SI prefix, from UMeasureSIPrefix. + * @param status ICU error code * @return A new MeasureUnit. */ - MeasureUnit withSIPrefix(UMeasureSIPrefix prefix) const; + MeasureUnit withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const; /** * Gets the current SI prefix of this MeasureUnit. For example, if the unit has the SI prefix @@ -320,20 +320,20 @@ class U_I18N_API MeasureUnit: public UObject { * * @return The SI prefix of the first simple unit, from UMeasureSIPrefix. */ - UMeasureSIPrefix getSIPrefix() const; + UMeasureSIPrefix getSIPrefix(UErrorCode& status) const; /** * Creates a MeasureUnit which is this MeasureUnit augmented with the specified power. For * example, if power is 2, the unit will be squared. * - * If the MeasureUnit is composed of multiple simple units, only the first simple unit is - * modified. For example, starting at "meter-kilogram-per-second", if you set the power to 2, - * then you get "square-meter-kilogram-per-second". + * This method only works if the MeasureUnit is composed of only one simple unit. An error + * will be set if called on a MeasureUnit containing multiple units. * * @param power The power. + * @param status ICU error code * @return A new MeasureUnit. */ - MeasureUnit withPower(int8_t power) const; + MeasureUnit withPower(int8_t power, UErrorCode& status) const; /** * Gets the power of this MeasureUnit. For example, if the unit is square, then 2 is returned. @@ -343,7 +343,7 @@ class U_I18N_API MeasureUnit: public UObject { * * @return The power of the first simple unit. */ - int8_t getPower() const; + int8_t getPower(UErrorCode& status) const; /** * Gets the reciprocal of the unit, with the numerator and denominator flipped. @@ -352,7 +352,7 @@ class U_I18N_API MeasureUnit: public UObject { * * @return The reciprocal of the target unit. */ - MeasureUnit reciprocal() const; + MeasureUnit reciprocal(UErrorCode& status) const; /** * Gets the product of this unit with another unit. This is a way to build units from @@ -365,7 +365,7 @@ class U_I18N_API MeasureUnit: public UObject { * * @return The product of the target unit with the provided unit. */ - MeasureUnit product(const MeasureUnit& other) const; + MeasureUnit product(const MeasureUnit& other, UErrorCode& status) const; /** * Gets the number of constituent simple units. @@ -375,7 +375,7 @@ class U_I18N_API MeasureUnit: public UObject { * * @return The number of constituent units. */ - size_t getSimpleUnitCount() const; + size_t getSimpleUnitCount(UErrorCode& status) const; /** * Gets the constituent unit at the given index. @@ -394,7 +394,7 @@ class U_I18N_API MeasureUnit: public UObject { * @param index Zero-based index. If out of range, the dimensionless unit is returned. * @return The constituent simple unit at the specified position. */ - MeasureUnit simpleUnitAt(size_t index) const; + MeasureUnit simpleUnitAt(size_t index, UErrorCode& status) const; /** * Composes this unit with a super unit. @@ -413,7 +413,7 @@ class U_I18N_API MeasureUnit: public UObject { * @param other The super unit to compose with the target unit. * @return The composition of the given super unit with this unit. */ - MeasureUnit withSuperUnit(const MeasureUnit& other) const; + MeasureUnit withSuperUnit(const MeasureUnit& other, UErrorCode& status) const; /** * Gets the number of super units in the receiver. @@ -422,7 +422,7 @@ class U_I18N_API MeasureUnit: public UObject { * * @return The number of super units. */ - size_t getSuperUnitCount() const; + size_t getSuperUnitCount(UErrorCode& status) const; /** * Gets the super unit at the given index. @@ -440,7 +440,7 @@ class U_I18N_API MeasureUnit: public UObject { * * @return The super unit at the specified position. */ - MeasureUnit superUnitAt(size_t index) const; + MeasureUnit superUnitAt(size_t index, UErrorCode& status) const; /** * getAvailable gets all of the available units. diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 94a499e73a4..27ed41b9590 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -188,6 +188,7 @@ void MeasureFormatTest::runIndexedTest( TESTCASE_AUTO(Test20332_PersonUnits); TESTCASE_AUTO(TestNumericTime); TESTCASE_AUTO(TestNumericTimeSomeSpecialFormats); + TESTCASE_AUTO(TestCompoundUnitOperations); TESTCASE_AUTO_END; } @@ -3224,64 +3225,69 @@ void MeasureFormatTest::TestNumericTimeSomeSpecialFormats() { void MeasureFormatTest::TestCompoundUnitOperations() { IcuTestErrorCode status(*this, "TestCompoundUnitOperations"); + MeasureUnit::forIdentifier("kilometer-per-second-joule", status); + MeasureUnit kilometer = MeasureUnit::getKilometer(); - MeasureUnit meter = kilometer.withSIPrefix(UMEASURE_SI_PREFIX_ONE); - MeasureUnit centimeter1 = kilometer.withSIPrefix(UMEASURE_SI_PREFIX_CENTI); - MeasureUnit centimeter2 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI); + MeasureUnit cubicMeter = MeasureUnit::getCubicMeter(); + MeasureUnit meter = kilometer.withSIPrefix(UMEASURE_SI_PREFIX_ONE, status); + MeasureUnit centimeter1 = kilometer.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); + MeasureUnit centimeter2 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); + MeasureUnit cubicDecimeter = cubicMeter.withSIPrefix(UMEASURE_SI_PREFIX_DECI, status); verifyUnitParts(kilometer, UMEASURE_SI_PREFIX_KILO, 0, "kilometer"); verifyUnitParts(meter, UMEASURE_SI_PREFIX_ONE, 0, "meter"); verifyUnitParts(centimeter1, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter"); verifyUnitParts(centimeter2, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter"); + verifyUnitParts(cubicDecimeter, UMEASURE_SI_PREFIX_DECI, 3, "cubic-decimeter"); assertTrue("centimeter equality", centimeter1 == centimeter2); assertTrue("kilometer inequality", centimeter1 != kilometer); - MeasureUnit squareMeter = meter.withPower(2); - MeasureUnit overCubicCentimeter = centimeter1.withPower(-3); - MeasureUnit quarticKilometer = kilometer.withPower(4); - MeasureUnit overQuarticKilometer1 = kilometer.withPower(-4); + MeasureUnit squareMeter = meter.withPower(2, status); + MeasureUnit overCubicCentimeter = centimeter1.withPower(-3, status); + MeasureUnit quarticKilometer = kilometer.withPower(4, status); + MeasureUnit overQuarticKilometer1 = kilometer.withPower(-4, status); verifyUnitParts(squareMeter, UMEASURE_SI_PREFIX_ONE, 2, "square-meter"); - verifyUnitParts(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, 2, "one-per-cubic-centimeter"); - verifyUnitParts(quarticKilometer, UMEASURE_SI_PREFIX_ONE, 2, "p4-kilometer"); - verifyUnitParts(overQuarticKilometer1, UMEASURE_SI_PREFIX_ONE, 2, "one-per-p4-kilometer"); + verifyUnitParts(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "one-per-cubic-centimeter"); + verifyUnitParts(quarticKilometer, UMEASURE_SI_PREFIX_KILO, 4, "p4-kilometer"); + verifyUnitParts(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); - assertTrue("power inequality", quarticKilometer != overQuarticKilometer1); + // assertTrue("power inequality", quarticKilometer != overQuarticKilometer1); - MeasureUnit overQuarticKilometer2 = overQuarticKilometer1.reciprocal(); - MeasureUnit overQuarticKilometer3 = kilometer.product(kilometer).product(kilometer) - .product(kilometer).reciprocal(); + // MeasureUnit overQuarticKilometer2 = overQuarticKilometer1.reciprocal(); + // MeasureUnit overQuarticKilometer3 = kilometer.product(kilometer).product(kilometer) + // .product(kilometer).reciprocal(); - verifyUnitParts(overQuarticKilometer2, UMEASURE_SI_PREFIX_ONE, 2, "one-per-p4-kilometer"); - verifyUnitParts(overQuarticKilometer3, UMEASURE_SI_PREFIX_ONE, 2, "one-per-p4-kilometer"); + // verifyUnitParts(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, 2, "one-per-p4-kilometer"); + // verifyUnitParts(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, 2, "one-per-p4-kilometer"); - assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); - assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); + // assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); + // assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); - MeasureUnit kiloSquareSecond = MeasureUnit::getSecond() - .withPower(2).withSIPrefix(UMEASURE_SI_PREFIX_KILO); - MeasureUnit meterSecond = meter.product(kiloSquareSecond); - MeasureUnit cubicMeterSecond1 = meter.withPower(3).product(kiloSquareSecond); - MeasureUnit cubicMeterSecond2 = meterSecond.withPower(3); - MeasureUnit centimeterSecond1 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI).product(kiloSquareSecond); - MeasureUnit centimeterSecond2 = meterSecond.withSIPrefix(UMEASURE_SI_PREFIX_CENTI); - MeasureUnit secondCubicMeter = kiloSquareSecond.product(meter.withPower(3)); - MeasureUnit secondCentimeter = kiloSquareSecond.product(meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI)); + // MeasureUnit kiloSquareSecond = MeasureUnit::getSecond() + // .withPower(2).withSIPrefix(UMEASURE_SI_PREFIX_KILO); + // MeasureUnit meterSecond = meter.product(kiloSquareSecond); + // MeasureUnit cubicMeterSecond1 = meter.withPower(3).product(kiloSquareSecond); + // MeasureUnit cubicMeterSecond2 = meterSecond.withPower(3); + // MeasureUnit centimeterSecond1 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI).product(kiloSquareSecond); + // MeasureUnit centimeterSecond2 = meterSecond.withSIPrefix(UMEASURE_SI_PREFIX_CENTI); + // MeasureUnit secondCubicMeter = kiloSquareSecond.product(meter.withPower(3)); + // MeasureUnit secondCentimeter = kiloSquareSecond.product(meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI)); - verifyUnitParts(kiloSquareSecond, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); - verifyUnitParts(meterSecond, UMEASURE_SI_PREFIX_ONE, 0, "meter-square-kilosecond"); - verifyUnitParts(cubicMeterSecond1, UMEASURE_SI_PREFIX_ONE, 2, "cubic-meter-square-kilosecond"); - verifyUnitParts(cubicMeterSecond2, UMEASURE_SI_PREFIX_ONE, 2, "cubic-meter-square-kilosecond"); - verifyUnitParts(centimeterSecond1, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter-square-kilosecond"); - verifyUnitParts(centimeterSecond2, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter-square-kilosecond"); - verifyUnitParts(secondCubicMeter, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond-cubic-meter"); - verifyUnitParts(secondCentimeter, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond-centimeter"); + // verifyUnitParts(kiloSquareSecond, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); + // verifyUnitParts(meterSecond, UMEASURE_SI_PREFIX_ONE, 0, "meter-square-kilosecond"); + // verifyUnitParts(cubicMeterSecond1, UMEASURE_SI_PREFIX_ONE, 2, "cubic-meter-square-kilosecond"); + // verifyUnitParts(cubicMeterSecond2, UMEASURE_SI_PREFIX_ONE, 2, "cubic-meter-square-kilosecond"); + // verifyUnitParts(centimeterSecond1, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter-square-kilosecond"); + // verifyUnitParts(centimeterSecond2, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter-square-kilosecond"); + // verifyUnitParts(secondCubicMeter, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond-cubic-meter"); + // verifyUnitParts(secondCentimeter, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond-centimeter"); - assertTrue("multipart power equality", cubicMeterSecond1 == cubicMeterSecond2); - assertTrue("multipart SI prefix equality", centimeterSecond1 == centimeterSecond2); - assertTrue("order matters inequality", cubicMeterSecond1 != secondCubicMeter); - assertTrue("additional simple units inequality", secondCubicMeter != secondCentimeter); + // assertTrue("multipart power equality", cubicMeterSecond1 == cubicMeterSecond2); + // assertTrue("multipart SI prefix equality", centimeterSecond1 == centimeterSecond2); + // assertTrue("order matters inequality", cubicMeterSecond1 != secondCubicMeter); + // assertTrue("additional simple units inequality", secondCubicMeter != secondCentimeter); } @@ -3361,10 +3367,17 @@ void MeasureFormatTest::verifyUnitParts( int8_t power, const char* identifier) { IcuTestErrorCode status(*this, "verifyUnitParts"); - assertEquals(UnicodeString(identifier) + ": SI prefix", siPrefix, unit.getSIPrefix()); - assertEquals(UnicodeString(identifier) + ": Power", power, unit.getPower()); - assertEquals(UnicodeString(identifier) + ": Identifier", identifier, unit.toString()); - assertTrue(UnicodeString(identifier) + ": Constructor", unit == MeasureUnit::forIdentifier(identifier, status)); + assertEquals(UnicodeString(identifier) + ": SI prefix", + siPrefix, + unit.getSIPrefix(status)); + assertEquals(UnicodeString(identifier) + ": Power", + static_cast(power), + static_cast(unit.getPower(status))); + assertEquals(UnicodeString(identifier) + ": Identifier", + identifier, + unit.getIdentifier()); + assertTrue(UnicodeString(identifier) + ": Constructor", + unit == MeasureUnit::forIdentifier(identifier, status)); } extern IntlTest *createMeasureFormatTest() { From eb5b4dff8386b93f542eb2bcfa1badd04e6613da Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Wed, 15 Jan 2020 18:15:40 +0100 Subject: [PATCH 09/15] Checkpoint --- icu4c/source/common/cmemory.h | 17 + icu4c/source/i18n/measunit.cpp | 4 + icu4c/source/i18n/measunit_extra.cpp | 378 ++++++++++++++------- icu4c/source/test/intltest/measfmttest.cpp | 26 +- 4 files changed, 287 insertions(+), 138 deletions(-) diff --git a/icu4c/source/common/cmemory.h b/icu4c/source/common/cmemory.h index 7f7fd8d0864..8f9be605396 100644 --- a/icu4c/source/common/cmemory.h +++ b/icu4c/source/common/cmemory.h @@ -725,6 +725,23 @@ public: return pool[count++] = new T(std::forward(args)...); } + /** + * @return Number of elements that have been allocated. + */ + int32_t size() const { + return count; + } + + /** + * Array item access (writable). + * No index bounds check. + * @param i array index + * @return reference to the array item + */ + T *operator[](ptrdiff_t i) const { + return pool[i]; + } + private: int32_t count; MaybeStackArray pool; diff --git a/icu4c/source/i18n/measunit.cpp b/icu4c/source/i18n/measunit.cpp index c575c53745b..307156961b0 100644 --- a/icu4c/source/i18n/measunit.cpp +++ b/icu4c/source/i18n/measunit.cpp @@ -2027,6 +2027,10 @@ MeasureUnit::MeasureUnit(MeasureUnit &&other) noexcept MeasureUnit::MeasureUnit(char* idToAdopt) : fId(idToAdopt), fSubTypeId(-1), fTypeId(-1) { + if (fId == nullptr) { + // Invalid; reset to the base dimensionless unit + setTo(kBaseTypeIdx, kBaseSubTypeIdx); + } } MeasureUnit &MeasureUnit::operator=(const MeasureUnit &other) { diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index 83257fbcf56..eec204ed0e7 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -298,7 +298,7 @@ public: return static_cast(fMatch - kPowerPartOffset); } - int32_t getSimpleUnitOffset() const { + int32_t getSimpleUnitIndex() const { U_ASSERT(getType() == TYPE_SIMPLE_UNIT); return fMatch - kSimpleUnitOffset; } @@ -308,28 +308,31 @@ private: }; struct PowerUnit { - int8_t power = 0; + int8_t power = 1; UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; + int32_t simpleUnitIndex = 0; StringPiece id; - void appendTo(CharString& builder, UErrorCode& status) { - if (power == 0) { + void appendTo(CharString& builder, UErrorCode& status) const { + int8_t posPower = power < 0 ? -power : power; + if (posPower == 0) { + status = U_ILLEGAL_ARGUMENT_ERROR; + } else if (posPower == 1) { // no-op - } else if (power == 1) { - UPRV_UNREACHABLE; - } else if (power == 2) { + } else if (posPower == 2) { builder.append("square-", status); - } else if (power == 3) { + } else if (posPower == 3) { builder.append("cubic-", status); - } else if (power < 10) { + } else if (posPower < 10) { builder.append('p', status); - builder.append(power + '0', status); + builder.append(posPower + '0', status); + builder.append('-', status); + } else if (posPower <= 15) { + builder.append("p1", status); + builder.append('0' + (posPower % 10), status); builder.append('-', status); } else { - U_ASSERT(power <= 15); - builder.append("p1", status); - builder.append('0' + (power % 10), status); - builder.append('-', status); + status = U_ILLEGAL_ARGUMENT_ERROR; } if (U_FAILURE(status)) { return; @@ -351,6 +354,71 @@ struct PowerUnit { } }; +class CompoundUnit { +public: + void append(PowerUnit&& powerUnit, UErrorCode& status) { + if (powerUnit.power >= 0) { + appendImpl(numerator, std::move(powerUnit), status); + } else { + appendImpl(denominator, std::move(powerUnit), status); + } + } + + void reciprocal() { + auto temp = std::move(numerator); + numerator = std::move(denominator); + denominator = std::move(temp); + } + + void appendTo(CharString& builder, UErrorCode& status) { + if (numerator.size() == 0) { + builder.append("one", status); + } else { + appendToImpl(numerator, numerator.size(), builder, status); + } + if (denominator.size() > 0) { + builder.append("-per-", status); + appendToImpl(denominator, denominator.size(), builder, status); + } + } + +private: + typedef MemoryPool PowerUnitList; + PowerUnitList numerator; + PowerUnitList denominator; + + void appendToImpl(const PowerUnitList& unitList, int32_t len, CharString& builder, UErrorCode& status) { + bool first = true; + for (int32_t i = 0; i < len; i++) { + if (first) { + first = false; + } else { + builder.append('-', status); + } + unitList[i]->appendTo(builder, status); + } + } + + void appendImpl(PowerUnitList& unitList, PowerUnit&& powerUnit, UErrorCode& status) { + // Check that the same simple unit doesn't already exist + for (int32_t i = 0; i < unitList.size(); i++) { + PowerUnit* candidate = unitList[i]; + if (candidate->simpleUnitIndex == powerUnit.simpleUnitIndex + && candidate->siPrefix == powerUnit.siPrefix) { + candidate->power += powerUnit.power; + return; + } + } + // Add a new unit + PowerUnit* destination = unitList.create(); + if (!destination) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + *destination = std::move(powerUnit); + } +}; + class UnitIdentifierParser { public: static UnitIdentifierParser from(StringPiece source, UErrorCode& status) { @@ -368,99 +436,10 @@ public: return fIndex < fSource.length(); } - PowerUnit nextUnit(bool& sawPlus, UErrorCode& status) { - sawPlus = false; - PowerUnit retval; - if (U_FAILURE(status)) { - return retval; - } - - // state: - // 0 = no tokens seen yet (will accept power, SI prefix, or simple unit) - // 1 = power token seen (will not accept another power token) - // 2 = SI prefix token seen (will not accept a power or SI prefix token) - int32_t state = 0; - int32_t previ = fIndex; - - // Maybe read a compound part - if (fIndex != 0) { - Token token = nextToken(status); - if (U_FAILURE(status)) { - return retval; - } - if (token.getType() != Token::TYPE_COMPOUND_PART) { - goto fail; - } - switch (token.getMatch()) { - case COMPOUND_PART_PER: - if (fAfterPer) { - goto fail; - } - fAfterPer = true; - break; - - case COMPOUND_PART_TIMES: - break; - - case COMPOUND_PART_PLUS: - sawPlus = true; - fAfterPer = false; - break; - } - previ = fIndex; - } - - // Read a unit - while (hasNext()) { - Token token = nextToken(status); - if (U_FAILURE(status)) { - return retval; - } - - switch (token.getType()) { - case Token::TYPE_POWER_PART: - if (state > 0) { - goto fail; - } - retval.power = token.getPower(); - if (fAfterPer) { - retval.power *= -1; - } - previ = fIndex; - state = 1; - break; - - case Token::TYPE_SI_PREFIX: - if (state > 1) { - goto fail; - } - retval.siPrefix = token.getSIPrefix(); - previ = fIndex; - state = 2; - break; - - case Token::TYPE_ONE: - // Skip "one" and go to the next unit - return nextUnit(sawPlus, status); - - case Token::TYPE_SIMPLE_UNIT: - retval.id = fSource.substr(previ, fIndex - previ); - return retval; - - default: - goto fail; - } - } - - fail: - // TODO: Make a new status code? - status = U_ILLEGAL_ARGUMENT_ERROR; - return retval; - } - - PowerUnit getOnlyUnit(UErrorCode& status) { + PowerUnit getOnlyPowerUnit(UErrorCode& status) { bool sawPlus; - PowerUnit retval = nextUnit(sawPlus, status); + PowerUnit retval; + nextPowerUnit(retval, sawPlus, status); if (U_FAILURE(status)) { return retval; } @@ -472,6 +451,38 @@ public: return retval; } + void nextCompoundUnit(CompoundUnit& result, UErrorCode& status) { + bool sawPlus; + if (U_FAILURE(status)) { + return; + } + while (hasNext()) { + int32_t previ = fIndex; + PowerUnit powerUnit; + nextPowerUnit(powerUnit, sawPlus, status); + if (sawPlus) { + fIndex = previ; + break; + } + result.append(std::move(powerUnit), status); + } + return; + } + + CompoundUnit getOnlyCompoundUnit(UErrorCode& status) { + CompoundUnit retval; + nextCompoundUnit(retval, status); + if (U_FAILURE(status)) { + return retval; + } + if (hasNext()) { + // Expected to find only one unit in the string + status = U_ILLEGAL_ARGUMENT_ERROR; + return retval; + } + return retval; + } + private: int32_t fIndex = 0; StringPiece fSource; @@ -515,6 +526,96 @@ private: } return Token(match); } + + void nextPowerUnit(PowerUnit& result, bool& sawPlus, UErrorCode& status) { + sawPlus = false; + if (U_FAILURE(status)) { + return; + } + + // state: + // 0 = no tokens seen yet (will accept power, SI prefix, or simple unit) + // 1 = power token seen (will not accept another power token) + // 2 = SI prefix token seen (will not accept a power or SI prefix token) + int32_t state = 0; + int32_t previ = fIndex; + + // Maybe read a compound part + if (fIndex != 0) { + Token token = nextToken(status); + if (U_FAILURE(status)) { + return; + } + if (token.getType() != Token::TYPE_COMPOUND_PART) { + goto fail; + } + switch (token.getMatch()) { + case COMPOUND_PART_PER: + if (fAfterPer) { + goto fail; + } + fAfterPer = true; + break; + + case COMPOUND_PART_TIMES: + break; + + case COMPOUND_PART_PLUS: + sawPlus = true; + fAfterPer = false; + break; + } + previ = fIndex; + } + + // Read a unit + while (hasNext()) { + Token token = nextToken(status); + if (U_FAILURE(status)) { + return; + } + + switch (token.getType()) { + case Token::TYPE_POWER_PART: + if (state > 0) { + goto fail; + } + result.power = token.getPower(); + if (fAfterPer) { + result.power *= -1; + } + previ = fIndex; + state = 1; + break; + + case Token::TYPE_SI_PREFIX: + if (state > 1) { + goto fail; + } + result.siPrefix = token.getSIPrefix(); + previ = fIndex; + state = 2; + break; + + case Token::TYPE_ONE: + // Skip "one" and go to the next unit + return nextPowerUnit(result, sawPlus, status); + + case Token::TYPE_SIMPLE_UNIT: + result.simpleUnitIndex = token.getSimpleUnitIndex(); + result.id = fSource.substr(previ, fIndex - previ); + return; + + default: + goto fail; + } + } + + fail: + // TODO: Make a new status code? + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } }; } // namespace @@ -528,27 +629,17 @@ MeasureUnit MeasureUnit::forIdentifier(const char* identifier, UErrorCode& statu } CharString builder; - bool afterPer = false; while (parser.hasNext()) { - bool sawPlus; - PowerUnit powerUnit = parser.nextUnit(sawPlus, status); + CompoundUnit compoundUnit; + parser.nextCompoundUnit(compoundUnit, status); if (U_FAILURE(status)) { // Invalid syntax return MeasureUnit(); } - - if (sawPlus) { + if (builder.length() > 0) { builder.append('+', status); - afterPer = false; } - if (powerUnit.power < 0 && !afterPer) { - if (builder.length() == 0 || sawPlus) { - builder.append("one", status); - } - builder.append("-per-", status); - powerUnit.power *= -1; - } - powerUnit.appendTo(builder, status); + compoundUnit.appendTo(builder, status); } // Success @@ -557,12 +648,12 @@ MeasureUnit MeasureUnit::forIdentifier(const char* identifier, UErrorCode& statu UMeasureSIPrefix MeasureUnit::getSIPrefix(UErrorCode& status) const { const char* id = getIdentifier(); - return UnitIdentifierParser::from(id, status).getOnlyUnit(status).siPrefix; + return UnitIdentifierParser::from(id, status).getOnlyPowerUnit(status).siPrefix; } MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const { const char* id = getIdentifier(); - PowerUnit powerUnit = UnitIdentifierParser::from(id, status).getOnlyUnit(status); + PowerUnit powerUnit = UnitIdentifierParser::from(id, status).getOnlyPowerUnit(status); if (U_FAILURE(status)) { return *this; } @@ -575,26 +666,61 @@ MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& statu int8_t MeasureUnit::getPower(UErrorCode& status) const { const char* id = getIdentifier(); - return UnitIdentifierParser::from(id, status).getOnlyUnit(status).power; + return UnitIdentifierParser::from(id, status).getOnlyPowerUnit(status).power; } MeasureUnit MeasureUnit::withPower(int8_t power, UErrorCode& status) const { const char* id = getIdentifier(); - PowerUnit powerUnit = UnitIdentifierParser::from(id, status).getOnlyUnit(status); + PowerUnit powerUnit = UnitIdentifierParser::from(id, status).getOnlyPowerUnit(status); if (U_FAILURE(status)) { return *this; } CharString builder; + powerUnit.power = power; if (power < 0) { builder.append("one-per-", status); - power *= -1; } - powerUnit.power = power; powerUnit.appendTo(builder, status); return MeasureUnit(builder.cloneData(status)); } +MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const { + const char* id = getIdentifier(); + CompoundUnit compoundUnit = UnitIdentifierParser::from(id, status).getOnlyCompoundUnit(status); + if (U_FAILURE(status)) { + return *this; + } + + compoundUnit.reciprocal(); + CharString builder; + compoundUnit.appendTo(builder, status); + return MeasureUnit(builder.cloneData(status)); +} + +MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const { + const char* id = getIdentifier(); + CompoundUnit compoundUnit = UnitIdentifierParser::from(id, status).getOnlyCompoundUnit(status); + if (U_FAILURE(status)) { + return *this; + } + + // Append other's first CompoundUnit to compoundUnit, then assert other has only one + UnitIdentifierParser otherParser = UnitIdentifierParser::from(other.getIdentifier(), status); + otherParser.nextCompoundUnit(compoundUnit, status); + if (U_FAILURE(status)) { + return *this; + } + if (otherParser.hasNext()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return *this; + } + + CharString builder; + compoundUnit.appendTo(builder, status); + return MeasureUnit(builder.cloneData(status)); +} + U_NAMESPACE_END diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 27ed41b9590..86814a7a8df 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -3234,10 +3234,10 @@ void MeasureFormatTest::TestCompoundUnitOperations() { MeasureUnit centimeter2 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); MeasureUnit cubicDecimeter = cubicMeter.withSIPrefix(UMEASURE_SI_PREFIX_DECI, status); - verifyUnitParts(kilometer, UMEASURE_SI_PREFIX_KILO, 0, "kilometer"); - verifyUnitParts(meter, UMEASURE_SI_PREFIX_ONE, 0, "meter"); - verifyUnitParts(centimeter1, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter"); - verifyUnitParts(centimeter2, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter"); + verifyUnitParts(kilometer, UMEASURE_SI_PREFIX_KILO, 1, "kilometer"); + verifyUnitParts(meter, UMEASURE_SI_PREFIX_ONE, 1, "meter"); + verifyUnitParts(centimeter1, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); + verifyUnitParts(centimeter2, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); verifyUnitParts(cubicDecimeter, UMEASURE_SI_PREFIX_DECI, 3, "cubic-decimeter"); assertTrue("centimeter equality", centimeter1 == centimeter2); @@ -3253,17 +3253,19 @@ void MeasureFormatTest::TestCompoundUnitOperations() { verifyUnitParts(quarticKilometer, UMEASURE_SI_PREFIX_KILO, 4, "p4-kilometer"); verifyUnitParts(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); - // assertTrue("power inequality", quarticKilometer != overQuarticKilometer1); + assertTrue("power inequality", quarticKilometer != overQuarticKilometer1); - // MeasureUnit overQuarticKilometer2 = overQuarticKilometer1.reciprocal(); - // MeasureUnit overQuarticKilometer3 = kilometer.product(kilometer).product(kilometer) - // .product(kilometer).reciprocal(); + MeasureUnit overQuarticKilometer2 = quarticKilometer.reciprocal(status); + MeasureUnit overQuarticKilometer3 = kilometer.product(kilometer, status) + .product(kilometer, status) + .product(kilometer, status) + .reciprocal(status); - // verifyUnitParts(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, 2, "one-per-p4-kilometer"); - // verifyUnitParts(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, 2, "one-per-p4-kilometer"); + verifyUnitParts(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifyUnitParts(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); - // assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); - // assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); + assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); + assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); // MeasureUnit kiloSquareSecond = MeasureUnit::getSecond() // .withPower(2).withSIPrefix(UMEASURE_SI_PREFIX_KILO); From f70ac326ff291133ed878e48100375f0afe2cf7c Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Wed, 15 Jan 2020 18:31:09 +0100 Subject: [PATCH 10/15] Checkpoint --- icu4c/source/test/intltest/measfmttest.cpp | 77 +++++++++++++++------- 1 file changed, 52 insertions(+), 25 deletions(-) diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 86814a7a8df..01f118f4d36 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -144,6 +144,9 @@ private: UMeasureSIPrefix siPrefix, int8_t power, const char* identifier); + void verifyUnitIdentifierOnly( + const MeasureUnit& unit, + const char* identifier); }; void MeasureFormatTest::runIndexedTest( @@ -3267,29 +3270,34 @@ void MeasureFormatTest::TestCompoundUnitOperations() { assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); - // MeasureUnit kiloSquareSecond = MeasureUnit::getSecond() - // .withPower(2).withSIPrefix(UMEASURE_SI_PREFIX_KILO); - // MeasureUnit meterSecond = meter.product(kiloSquareSecond); - // MeasureUnit cubicMeterSecond1 = meter.withPower(3).product(kiloSquareSecond); - // MeasureUnit cubicMeterSecond2 = meterSecond.withPower(3); - // MeasureUnit centimeterSecond1 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI).product(kiloSquareSecond); - // MeasureUnit centimeterSecond2 = meterSecond.withSIPrefix(UMEASURE_SI_PREFIX_CENTI); - // MeasureUnit secondCubicMeter = kiloSquareSecond.product(meter.withPower(3)); - // MeasureUnit secondCentimeter = kiloSquareSecond.product(meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI)); + MeasureUnit kiloSquareSecond = MeasureUnit::getSecond() + .withPower(2, status).withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); + MeasureUnit meterSecond = meter.product(kiloSquareSecond, status); + MeasureUnit cubicMeterSecond1 = meter.withPower(3, status).product(kiloSquareSecond, status); + MeasureUnit centimeterSecond1 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status).product(kiloSquareSecond, status); + MeasureUnit secondCubicMeter = kiloSquareSecond.product(meter.withPower(3, status), status); + MeasureUnit secondCentimeter = kiloSquareSecond.product(meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status), status); - // verifyUnitParts(kiloSquareSecond, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); - // verifyUnitParts(meterSecond, UMEASURE_SI_PREFIX_ONE, 0, "meter-square-kilosecond"); - // verifyUnitParts(cubicMeterSecond1, UMEASURE_SI_PREFIX_ONE, 2, "cubic-meter-square-kilosecond"); - // verifyUnitParts(cubicMeterSecond2, UMEASURE_SI_PREFIX_ONE, 2, "cubic-meter-square-kilosecond"); - // verifyUnitParts(centimeterSecond1, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter-square-kilosecond"); - // verifyUnitParts(centimeterSecond2, UMEASURE_SI_PREFIX_CENTI, 0, "centimeter-square-kilosecond"); - // verifyUnitParts(secondCubicMeter, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond-cubic-meter"); - // verifyUnitParts(secondCentimeter, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond-centimeter"); + verifyUnitParts(kiloSquareSecond, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); + verifyUnitIdentifierOnly(meterSecond, "meter-square-kilosecond"); + verifyUnitIdentifierOnly(cubicMeterSecond1, "cubic-meter-square-kilosecond"); + verifyUnitIdentifierOnly(centimeterSecond1, "centimeter-square-kilosecond"); + verifyUnitIdentifierOnly(secondCubicMeter, "square-kilosecond-cubic-meter"); + verifyUnitIdentifierOnly(secondCentimeter, "square-kilosecond-centimeter"); - // assertTrue("multipart power equality", cubicMeterSecond1 == cubicMeterSecond2); - // assertTrue("multipart SI prefix equality", centimeterSecond1 == centimeterSecond2); - // assertTrue("order matters inequality", cubicMeterSecond1 != secondCubicMeter); - // assertTrue("additional simple units inequality", secondCubicMeter != secondCentimeter); + // Don't allow get/set power or SI prefix on compound units + status.errIfFailureAndReset(); + meterSecond.getPower(status); + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + meterSecond.withPower(3, status); + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + meterSecond.getSIPrefix(status); + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + meterSecond.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + + assertTrue("order matters inequality", cubicMeterSecond1 != secondCubicMeter); + assertTrue("additional simple units inequality", secondCubicMeter != secondCentimeter); } @@ -3369,17 +3377,36 @@ void MeasureFormatTest::verifyUnitParts( int8_t power, const char* identifier) { IcuTestErrorCode status(*this, "verifyUnitParts"); - assertEquals(UnicodeString(identifier) + ": SI prefix", + UnicodeString uid(identifier, -1, US_INV); + assertEquals(uid + ": SI prefix", siPrefix, unit.getSIPrefix(status)); - assertEquals(UnicodeString(identifier) + ": Power", + status.errIfFailureAndReset("%s: SI prefix", identifier); + assertEquals(uid + ": Power", static_cast(power), static_cast(unit.getPower(status))); - assertEquals(UnicodeString(identifier) + ": Identifier", + status.errIfFailureAndReset("%s: Power", identifier); + assertEquals(uid + ": Identifier", identifier, unit.getIdentifier()); - assertTrue(UnicodeString(identifier) + ": Constructor", + status.errIfFailureAndReset("%s: Identifier", identifier); + assertTrue(uid + ": Constructor", unit == MeasureUnit::forIdentifier(identifier, status)); + status.errIfFailureAndReset("%s: Constructor", identifier); +} + +void MeasureFormatTest::verifyUnitIdentifierOnly( + const MeasureUnit& unit, + const char* identifier) { + IcuTestErrorCode status(*this, "verifyUnitIdentifierOnly"); + UnicodeString uid(identifier, -1, US_INV); + assertEquals(uid + ": Identifier", + identifier, + unit.getIdentifier()); + status.errIfFailureAndReset("%s: Identifier", identifier); + assertTrue(uid + ": Constructor", + unit == MeasureUnit::forIdentifier(identifier, status)); + status.errIfFailureAndReset("%s: Constructor", identifier); } extern IntlTest *createMeasureFormatTest() { From 8fa27836959bbdaa06a5cf69a2153aa81fbcaede Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Wed, 15 Jan 2020 21:05:27 +0100 Subject: [PATCH 11/15] Checkpoint --- icu4c/source/common/cmemory.h | 77 ++++++++++++++------ icu4c/source/common/unicode/localpointer.h | 13 ++++ icu4c/source/i18n/measunit_extra.cpp | 57 ++++++++++++--- icu4c/source/i18n/unicode/measunit.h | 5 ++ icu4c/source/test/intltest/measfmttest.cpp | 83 ++++++++++++++-------- 5 files changed, 175 insertions(+), 60 deletions(-) diff --git a/icu4c/source/common/cmemory.h b/icu4c/source/common/cmemory.h index 8f9be605396..da663918cfd 100644 --- a/icu4c/source/common/cmemory.h +++ b/icu4c/source/common/cmemory.h @@ -684,26 +684,26 @@ inline H *MaybeStackHeaderAndArray::orphanOrClone(int32_t l template class MemoryPool : public UMemory { public: - MemoryPool() : count(0), pool() {} + MemoryPool() : fCount(0), fPool() {} ~MemoryPool() { - for (int32_t i = 0; i < count; ++i) { - delete pool[i]; + for (int32_t i = 0; i < fCount; ++i) { + delete fPool[i]; } } MemoryPool(const MemoryPool&) = delete; MemoryPool& operator=(const MemoryPool&) = delete; - MemoryPool(MemoryPool&& other) U_NOEXCEPT : count(other.count), - pool(std::move(other.pool)) { - other.count = 0; + MemoryPool(MemoryPool&& other) U_NOEXCEPT : fCount(other.fCount), + fPool(std::move(other.fPool)) { + other.fCount = 0; } MemoryPool& operator=(MemoryPool&& other) U_NOEXCEPT { - count = other.count; - pool = std::move(other.pool); - other.count = 0; + fCount = other.fCount; + fPool = std::move(other.fPool); + other.fCount = 0; return *this; } @@ -716,20 +716,58 @@ public: */ template T* create(Args&&... args) { - int32_t capacity = pool.getCapacity(); - if (count == capacity && - pool.resize(capacity == stackCapacity ? 4 * capacity : 2 * capacity, - capacity) == nullptr) { + int32_t capacity = fPool.getCapacity(); + if (fCount == capacity && + fPool.resize(capacity == stackCapacity ? 4 * capacity : 2 * capacity, + capacity) == nullptr) { return nullptr; } - return pool[count++] = new T(std::forward(args)...); + return fPool[fCount++] = new T(std::forward(args)...); } /** * @return Number of elements that have been allocated. */ - int32_t size() const { - return count; + int32_t count() const { + return fCount; + } + +protected: + int32_t fCount; + MaybeStackArray fPool; +}; + +/** + * An internal Vector-like implementation based on MemoryPool. + * + * To append an item to the vector, use emplaceBack. + * + * MaybeStackVector vector; + * MyType* element = vector.emplaceBack(); + * if (!element) { + * status = U_MEMORY_ALLOCATION_ERROR; + * } + * // do stuff with element + * + * To loop over the vector, use a for loop with indices: + * + * for (int32_t i = 0; i < vector.length(); i++) { + * MyType* element = vector[i]; + * } + */ +template +class MaybeStackVector : protected MemoryPool { +public: + using MemoryPool::MemoryPool; + using MemoryPool::operator=; + + template + T* emplaceBack(Args&&... args) { + return this->create(args...); + } + + int32_t length() const { + return this->count(); } /** @@ -739,14 +777,11 @@ public: * @return reference to the array item */ T *operator[](ptrdiff_t i) const { - return pool[i]; + return this->fPool[i]; } - -private: - int32_t count; - MaybeStackArray pool; }; + U_NAMESPACE_END #endif /* __cplusplus */ diff --git a/icu4c/source/common/unicode/localpointer.h b/icu4c/source/common/unicode/localpointer.h index e011688b1a5..01f4205f99b 100644 --- a/icu4c/source/common/unicode/localpointer.h +++ b/icu4c/source/common/unicode/localpointer.h @@ -406,6 +406,10 @@ public: src.ptr=NULL; } + static LocalArray withLength(T *p, int32_t length) { + return LocalArray(p, length); + } + #ifndef U_HIDE_DRAFT_API /** * Constructs a LocalArray from a C++11 std::unique_ptr of an array type. @@ -537,6 +541,15 @@ public: return std::unique_ptr(LocalPointerBase::orphan()); } #endif /* U_HIDE_DRAFT_API */ + + int32_t length() const { return fLength; } + +private: + int32_t fLength = -1; + + LocalArray(T *p, int32_t length) : LocalArray(p) { + fLength = length; + } }; /** diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index eec204ed0e7..090cb803ac0 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -17,6 +17,7 @@ #include "ucln_in.h" #include "umutex.h" #include "unicode/errorcode.h" +#include "unicode/localpointer.h" #include "unicode/measunit.h" #include "unicode/ucharstrie.h" #include "unicode/ucharstriebuilder.h" @@ -356,6 +357,8 @@ struct PowerUnit { class CompoundUnit { public: + typedef MaybeStackVector PowerUnitList; + void append(PowerUnit&& powerUnit, UErrorCode& status) { if (powerUnit.power >= 0) { appendImpl(numerator, std::move(powerUnit), status); @@ -371,19 +374,26 @@ public: } void appendTo(CharString& builder, UErrorCode& status) { - if (numerator.size() == 0) { + if (numerator.length() == 0) { builder.append("one", status); } else { - appendToImpl(numerator, numerator.size(), builder, status); + appendToImpl(numerator, numerator.length(), builder, status); } - if (denominator.size() > 0) { + if (denominator.length() > 0) { builder.append("-per-", status); - appendToImpl(denominator, denominator.size(), builder, status); + appendToImpl(denominator, denominator.length(), builder, status); } } + const PowerUnitList& getNumeratorUnits() { + return numerator; + } + + const PowerUnitList& getDenominatorUnits() { + return denominator; + } + private: - typedef MemoryPool PowerUnitList; PowerUnitList numerator; PowerUnitList denominator; @@ -401,7 +411,7 @@ private: void appendImpl(PowerUnitList& unitList, PowerUnit&& powerUnit, UErrorCode& status) { // Check that the same simple unit doesn't already exist - for (int32_t i = 0; i < unitList.size(); i++) { + for (int32_t i = 0; i < unitList.length(); i++) { PowerUnit* candidate = unitList[i]; if (candidate->simpleUnitIndex == powerUnit.simpleUnitIndex && candidate->siPrefix == powerUnit.siPrefix) { @@ -410,7 +420,7 @@ private: } } // Add a new unit - PowerUnit* destination = unitList.create(); + PowerUnit* destination = unitList.emplaceBack(); if (!destination) { status = U_MEMORY_ALLOCATION_ERROR; return; @@ -555,6 +565,7 @@ private: goto fail; } fAfterPer = true; + result.power = -1; break; case COMPOUND_PART_TIMES: @@ -580,10 +591,7 @@ private: if (state > 0) { goto fail; } - result.power = token.getPower(); - if (fAfterPer) { - result.power *= -1; - } + result.power *= token.getPower(); previ = fIndex; state = 1; break; @@ -721,6 +729,33 @@ MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) c return MeasureUnit(builder.cloneData(status)); } +LocalArray MeasureUnit::getSimpleUnits(UErrorCode& status) const { + const char* id = getIdentifier(); + CompoundUnit compoundUnit = UnitIdentifierParser::from(id, status).getOnlyCompoundUnit(status); + if (U_FAILURE(status)) { + return LocalArray::withLength(nullptr, 0); + } + + const CompoundUnit::PowerUnitList& numerator = compoundUnit.getNumeratorUnits(); + const CompoundUnit::PowerUnitList& denominator = compoundUnit.getDenominatorUnits(); + int32_t count = numerator.length() + denominator.length(); + MeasureUnit* arr = new MeasureUnit[count]; + + CharString builder; + int32_t i = 0; + for (int32_t j = 0; j < numerator.length(); j++) { + numerator[j]->appendTo(builder.clear(), status); + arr[i++] = MeasureUnit(builder.cloneData(status)); + } + for (int32_t j = 0; j < denominator.length(); j++) { + builder.clear().append("one-per-", status); + denominator[j]->appendTo(builder, status); + arr[i++] = MeasureUnit(builder.cloneData(status)); + } + + return LocalArray::withLength(arr, count); +} + U_NAMESPACE_END diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index 8ed893190ab..c9947d3edff 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -20,6 +20,7 @@ #if !UCONFIG_NO_FORMATTING #include "unicode/unistr.h" +#include "unicode/localpointer.h" /** * \file @@ -377,6 +378,10 @@ class U_I18N_API MeasureUnit: public UObject { */ size_t getSimpleUnitCount(UErrorCode& status) const; + LocalArray getSimpleUnits(UErrorCode& status) const; + + LocalArray getSequenceUnits(UErrorCode& status) const; + /** * Gets the constituent unit at the given index. * diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 01f118f4d36..5af03b77e64 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -139,14 +139,16 @@ private: NumberFormat::EAlignmentFields field, int32_t start, int32_t end); - void verifyUnitParts( + void verifyPowerUnit( const MeasureUnit& unit, UMeasureSIPrefix siPrefix, int8_t power, const char* identifier); - void verifyUnitIdentifierOnly( + void verifyCompoundUnit( const MeasureUnit& unit, - const char* identifier); + const char* identifier, + const char** subIdentifiers, + int32_t subIdentifierCount); }; void MeasureFormatTest::runIndexedTest( @@ -3237,11 +3239,11 @@ void MeasureFormatTest::TestCompoundUnitOperations() { MeasureUnit centimeter2 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); MeasureUnit cubicDecimeter = cubicMeter.withSIPrefix(UMEASURE_SI_PREFIX_DECI, status); - verifyUnitParts(kilometer, UMEASURE_SI_PREFIX_KILO, 1, "kilometer"); - verifyUnitParts(meter, UMEASURE_SI_PREFIX_ONE, 1, "meter"); - verifyUnitParts(centimeter1, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); - verifyUnitParts(centimeter2, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); - verifyUnitParts(cubicDecimeter, UMEASURE_SI_PREFIX_DECI, 3, "cubic-decimeter"); + verifyPowerUnit(kilometer, UMEASURE_SI_PREFIX_KILO, 1, "kilometer"); + verifyPowerUnit(meter, UMEASURE_SI_PREFIX_ONE, 1, "meter"); + verifyPowerUnit(centimeter1, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); + verifyPowerUnit(centimeter2, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); + verifyPowerUnit(cubicDecimeter, UMEASURE_SI_PREFIX_DECI, 3, "cubic-decimeter"); assertTrue("centimeter equality", centimeter1 == centimeter2); assertTrue("kilometer inequality", centimeter1 != kilometer); @@ -3251,10 +3253,10 @@ void MeasureFormatTest::TestCompoundUnitOperations() { MeasureUnit quarticKilometer = kilometer.withPower(4, status); MeasureUnit overQuarticKilometer1 = kilometer.withPower(-4, status); - verifyUnitParts(squareMeter, UMEASURE_SI_PREFIX_ONE, 2, "square-meter"); - verifyUnitParts(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "one-per-cubic-centimeter"); - verifyUnitParts(quarticKilometer, UMEASURE_SI_PREFIX_KILO, 4, "p4-kilometer"); - verifyUnitParts(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifyPowerUnit(squareMeter, UMEASURE_SI_PREFIX_ONE, 2, "square-meter"); + verifyPowerUnit(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "one-per-cubic-centimeter"); + verifyPowerUnit(quarticKilometer, UMEASURE_SI_PREFIX_KILO, 4, "p4-kilometer"); + verifyPowerUnit(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); assertTrue("power inequality", quarticKilometer != overQuarticKilometer1); @@ -3264,8 +3266,8 @@ void MeasureFormatTest::TestCompoundUnitOperations() { .product(kilometer, status) .reciprocal(status); - verifyUnitParts(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); - verifyUnitParts(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifyPowerUnit(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifyPowerUnit(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); @@ -3277,13 +3279,30 @@ void MeasureFormatTest::TestCompoundUnitOperations() { MeasureUnit centimeterSecond1 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status).product(kiloSquareSecond, status); MeasureUnit secondCubicMeter = kiloSquareSecond.product(meter.withPower(3, status), status); MeasureUnit secondCentimeter = kiloSquareSecond.product(meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status), status); + MeasureUnit secondCentimeterPerKilometer = secondCentimeter.product(kilometer.reciprocal(status), status); - verifyUnitParts(kiloSquareSecond, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); - verifyUnitIdentifierOnly(meterSecond, "meter-square-kilosecond"); - verifyUnitIdentifierOnly(cubicMeterSecond1, "cubic-meter-square-kilosecond"); - verifyUnitIdentifierOnly(centimeterSecond1, "centimeter-square-kilosecond"); - verifyUnitIdentifierOnly(secondCubicMeter, "square-kilosecond-cubic-meter"); - verifyUnitIdentifierOnly(secondCentimeter, "square-kilosecond-centimeter"); + verifyPowerUnit(kiloSquareSecond, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); + const char* meterSecondSub[] = {"meter", "square-kilosecond"}; + verifyCompoundUnit(meterSecond, "meter-square-kilosecond", + meterSecondSub, UPRV_LENGTHOF(meterSecondSub)); + const char* cubicMeterSecond1Sub[] = {"cubic-meter", "square-kilosecond"}; + verifyCompoundUnit(cubicMeterSecond1, "cubic-meter-square-kilosecond", + cubicMeterSecond1Sub, UPRV_LENGTHOF(cubicMeterSecond1Sub)); + const char* centimeterSecond1Sub[] = {"centimeter", "square-kilosecond"}; + verifyCompoundUnit(centimeterSecond1, "centimeter-square-kilosecond", + centimeterSecond1Sub, UPRV_LENGTHOF(centimeterSecond1Sub)); + const char* secondCubicMeterSub[] = {"square-kilosecond", "cubic-meter"}; + verifyCompoundUnit(secondCubicMeter, "square-kilosecond-cubic-meter", + secondCubicMeterSub, UPRV_LENGTHOF(secondCubicMeterSub)); + const char* secondCentimeterSub[] = {"square-kilosecond", "centimeter"}; + verifyCompoundUnit(secondCentimeter, "square-kilosecond-centimeter", + secondCentimeterSub, UPRV_LENGTHOF(secondCentimeterSub)); + const char* secondCentimeterPerKilometerSub[] = {"square-kilosecond", "centimeter", "one-per-kilometer"}; + verifyCompoundUnit(secondCentimeterPerKilometer, "square-kilosecond-centimeter-per-kilometer", + secondCentimeterPerKilometerSub, UPRV_LENGTHOF(secondCentimeterPerKilometerSub)); + + assertTrue("order matters inequality", cubicMeterSecond1 != secondCubicMeter); + assertTrue("additional simple units inequality", secondCubicMeter != secondCentimeter); // Don't allow get/set power or SI prefix on compound units status.errIfFailureAndReset(); @@ -3295,9 +3314,6 @@ void MeasureFormatTest::TestCompoundUnitOperations() { status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); meterSecond.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); - - assertTrue("order matters inequality", cubicMeterSecond1 != secondCubicMeter); - assertTrue("additional simple units inequality", secondCubicMeter != secondCentimeter); } @@ -3371,12 +3387,12 @@ void MeasureFormatTest::verifyFormat( } } -void MeasureFormatTest::verifyUnitParts( +void MeasureFormatTest::verifyPowerUnit( const MeasureUnit& unit, UMeasureSIPrefix siPrefix, int8_t power, const char* identifier) { - IcuTestErrorCode status(*this, "verifyUnitParts"); + IcuTestErrorCode status(*this, "verifyPowerUnit"); UnicodeString uid(identifier, -1, US_INV); assertEquals(uid + ": SI prefix", siPrefix, @@ -3395,10 +3411,12 @@ void MeasureFormatTest::verifyUnitParts( status.errIfFailureAndReset("%s: Constructor", identifier); } -void MeasureFormatTest::verifyUnitIdentifierOnly( +void MeasureFormatTest::verifyCompoundUnit( const MeasureUnit& unit, - const char* identifier) { - IcuTestErrorCode status(*this, "verifyUnitIdentifierOnly"); + const char* identifier, + const char** subIdentifiers, + int32_t subIdentifierCount) { + IcuTestErrorCode status(*this, "verifyCompoundUnit"); UnicodeString uid(identifier, -1, US_INV); assertEquals(uid + ": Identifier", identifier, @@ -3407,6 +3425,15 @@ void MeasureFormatTest::verifyUnitIdentifierOnly( assertTrue(uid + ": Constructor", unit == MeasureUnit::forIdentifier(identifier, status)); status.errIfFailureAndReset("%s: Constructor", identifier); + + LocalArray subUnits = unit.getSimpleUnits(status); + assertEquals(uid + ": Length", subIdentifierCount, subUnits.length()); + for (int32_t i = 0;; i++) { + if (i >= subIdentifierCount || i >= subUnits.length()) break; + assertEquals(uid + ": Sub-unit #" + Int64ToUnicodeString(i), + subIdentifiers[i], + subUnits[i].getIdentifier()); + } } extern IntlTest *createMeasureFormatTest() { From 05ec2aea59a9b6a86425b2e4379b26485f8d495a Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Thu, 16 Jan 2020 22:17:46 +0100 Subject: [PATCH 12/15] Renaming and refactoring --- icu4c/source/i18n/measunit_extra.cpp | 103 +++++++----- icu4c/source/i18n/unicode/measunit.h | 186 +++++++++++---------- icu4c/source/test/intltest/measfmttest.cpp | 43 +++-- 3 files changed, 186 insertions(+), 146 deletions(-) diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index 090cb803ac0..e39d6dab697 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -87,7 +87,7 @@ const struct SIPrefixStrings { { "yocto", UMEASURE_SI_PREFIX_YOCTO }, }; -// FIXME: Get this list from data +// TODO(ICU-20920): Get this list from data const char16_t* const gSimpleUnits[] = { u"one", // note: expected to be index 0 u"100kilometer", @@ -308,7 +308,7 @@ private: int32_t fMatch; }; -struct PowerUnit { +struct SingleUnit { int8_t power = 1; UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; int32_t simpleUnitIndex = 0; @@ -357,13 +357,13 @@ struct PowerUnit { class CompoundUnit { public: - typedef MaybeStackVector PowerUnitList; + typedef MaybeStackVector SingleUnitList; - void append(PowerUnit&& powerUnit, UErrorCode& status) { - if (powerUnit.power >= 0) { - appendImpl(numerator, std::move(powerUnit), status); + void append(SingleUnit&& singleUnit, UErrorCode& status) { + if (singleUnit.power >= 0) { + appendImpl(numerator, std::move(singleUnit), status); } else { - appendImpl(denominator, std::move(powerUnit), status); + appendImpl(denominator, std::move(singleUnit), status); } } @@ -373,7 +373,7 @@ public: denominator = std::move(temp); } - void appendTo(CharString& builder, UErrorCode& status) { + void appendTo(CharString& builder, UErrorCode& status) const { if (numerator.length() == 0) { builder.append("one", status); } else { @@ -385,19 +385,23 @@ public: } } - const PowerUnitList& getNumeratorUnits() { + const SingleUnitList& getNumeratorUnits() const { return numerator; } - const PowerUnitList& getDenominatorUnits() { + const SingleUnitList& getDenominatorUnits() const { return denominator; } -private: - PowerUnitList numerator; - PowerUnitList denominator; + bool isSingle() const { + return numerator.length() + denominator.length() == 1; + } - void appendToImpl(const PowerUnitList& unitList, int32_t len, CharString& builder, UErrorCode& status) { +private: + SingleUnitList numerator; + SingleUnitList denominator; + + void appendToImpl(const SingleUnitList& unitList, int32_t len, CharString& builder, UErrorCode& status) const { bool first = true; for (int32_t i = 0; i < len; i++) { if (first) { @@ -409,23 +413,23 @@ private: } } - void appendImpl(PowerUnitList& unitList, PowerUnit&& powerUnit, UErrorCode& status) { + void appendImpl(SingleUnitList& unitList, SingleUnit&& singleUnit, UErrorCode& status) { // Check that the same simple unit doesn't already exist for (int32_t i = 0; i < unitList.length(); i++) { - PowerUnit* candidate = unitList[i]; - if (candidate->simpleUnitIndex == powerUnit.simpleUnitIndex - && candidate->siPrefix == powerUnit.siPrefix) { - candidate->power += powerUnit.power; + SingleUnit* candidate = unitList[i]; + if (candidate->simpleUnitIndex == singleUnit.simpleUnitIndex + && candidate->siPrefix == singleUnit.siPrefix) { + candidate->power += singleUnit.power; return; } } // Add a new unit - PowerUnit* destination = unitList.emplaceBack(); + SingleUnit* destination = unitList.emplaceBack(); if (!destination) { status = U_MEMORY_ALLOCATION_ERROR; return; } - *destination = std::move(powerUnit); + *destination = std::move(singleUnit); } }; @@ -446,10 +450,10 @@ public: return fIndex < fSource.length(); } - PowerUnit getOnlyPowerUnit(UErrorCode& status) { + SingleUnit getOnlySingleUnit(UErrorCode& status) { bool sawPlus; - PowerUnit retval; - nextPowerUnit(retval, sawPlus, status); + SingleUnit retval; + nextSingleUnit(retval, sawPlus, status); if (U_FAILURE(status)) { return retval; } @@ -468,13 +472,13 @@ public: } while (hasNext()) { int32_t previ = fIndex; - PowerUnit powerUnit; - nextPowerUnit(powerUnit, sawPlus, status); + SingleUnit singleUnit; + nextSingleUnit(singleUnit, sawPlus, status); if (sawPlus) { fIndex = previ; break; } - result.append(std::move(powerUnit), status); + result.append(std::move(singleUnit), status); } return; } @@ -537,7 +541,7 @@ private: return Token(match); } - void nextPowerUnit(PowerUnit& result, bool& sawPlus, UErrorCode& status) { + void nextSingleUnit(SingleUnit& result, bool& sawPlus, UErrorCode& status) { sawPlus = false; if (U_FAILURE(status)) { return; @@ -607,7 +611,7 @@ private: case Token::TYPE_ONE: // Skip "one" and go to the next unit - return nextPowerUnit(result, sawPlus, status); + return nextSingleUnit(result, sawPlus, status); case Token::TYPE_SIMPLE_UNIT: result.simpleUnitIndex = token.getSimpleUnitIndex(); @@ -654,42 +658,61 @@ MeasureUnit MeasureUnit::forIdentifier(const char* identifier, UErrorCode& statu return MeasureUnit(builder.cloneData(status)); } +UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const { + const char* id = getIdentifier(); + UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); + if (U_FAILURE(status)) { + // Unrecoverable error + return UMEASURE_UNIT_SINGLE; + } + + CompoundUnit compoundUnit; + parser.nextCompoundUnit(compoundUnit, status); + if (compoundUnit.isSingle()) { + return UMEASURE_UNIT_SINGLE; + } else if (parser.hasNext()) { + return UMEASURE_UNIT_SEQUENCE; + } else { + return UMEASURE_UNIT_COMPOUND; + } +} + UMeasureSIPrefix MeasureUnit::getSIPrefix(UErrorCode& status) const { const char* id = getIdentifier(); - return UnitIdentifierParser::from(id, status).getOnlyPowerUnit(status).siPrefix; + return UnitIdentifierParser::from(id, status).getOnlySingleUnit(status).siPrefix; } MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const { const char* id = getIdentifier(); - PowerUnit powerUnit = UnitIdentifierParser::from(id, status).getOnlyPowerUnit(status); + SingleUnit singleUnit = UnitIdentifierParser::from(id, status).getOnlySingleUnit(status); if (U_FAILURE(status)) { return *this; } - powerUnit.siPrefix = prefix; + singleUnit.siPrefix = prefix; CharString builder; - powerUnit.appendTo(builder, status); + singleUnit.appendTo(builder, status); return MeasureUnit(builder.cloneData(status)); } int8_t MeasureUnit::getPower(UErrorCode& status) const { const char* id = getIdentifier(); - return UnitIdentifierParser::from(id, status).getOnlyPowerUnit(status).power; + return UnitIdentifierParser::from(id, status).getOnlySingleUnit(status).power; } MeasureUnit MeasureUnit::withPower(int8_t power, UErrorCode& status) const { const char* id = getIdentifier(); - PowerUnit powerUnit = UnitIdentifierParser::from(id, status).getOnlyPowerUnit(status); + SingleUnit singleUnit = UnitIdentifierParser::from(id, status).getOnlySingleUnit(status); if (U_FAILURE(status)) { return *this; } CharString builder; - powerUnit.power = power; + singleUnit.power = power; if (power < 0) { builder.append("one-per-", status); } - powerUnit.appendTo(builder, status); + singleUnit.appendTo(builder, status); return MeasureUnit(builder.cloneData(status)); } @@ -729,15 +752,15 @@ MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) c return MeasureUnit(builder.cloneData(status)); } -LocalArray MeasureUnit::getSimpleUnits(UErrorCode& status) const { +LocalArray MeasureUnit::getSingleUnits(UErrorCode& status) const { const char* id = getIdentifier(); CompoundUnit compoundUnit = UnitIdentifierParser::from(id, status).getOnlyCompoundUnit(status); if (U_FAILURE(status)) { return LocalArray::withLength(nullptr, 0); } - const CompoundUnit::PowerUnitList& numerator = compoundUnit.getNumeratorUnits(); - const CompoundUnit::PowerUnitList& denominator = compoundUnit.getDenominatorUnits(); + const CompoundUnit::SingleUnitList& numerator = compoundUnit.getNumeratorUnits(); + const CompoundUnit::SingleUnitList& denominator = compoundUnit.getDenominatorUnits(); int32_t count = numerator.length() + denominator.length(); MeasureUnit* arr = new MeasureUnit[count]; diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index c9947d3edff..cdf760fdbdb 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -31,6 +31,44 @@ U_NAMESPACE_BEGIN class StringEnumeration; +/** + * Enumeration for unit complexity. There are three levels: + * + * - SINGLE: A single unit, optionally with a power and/or SI prefix. Examples: hectare, + * square-kilometer, kilojoule, one-per-second. + * - COMPOUND: A unit composed of the product of multiple single units. Examples: + * meter-per-second, kilowatt-hour, kilogram-meter-per-square-second. + * - SEQUENCE: A unit composed of the sum of multiple compound units. Examples: foot+inch, + * hour+minute+second, hectare+square-meter. + * + * The complexity determines which operations are available. For example, you cannot set the power + * or SI prefix of a compound unit. + * + * @draft ICU 67 + */ +enum UMeasureUnitComplexity { + /** + * A single unit, like kilojoule. + * + * @draft ICU 67 + */ + UMEASURE_UNIT_SINGLE, + + /** + * A compound unit, like meter-per-second. + * + * @draft ICU 67 + */ + UMEASURE_UNIT_COMPOUND, + + /** + * A sequence unit, like hour+minute. + * + * @draft ICU 67 + */ + UMEASURE_UNIT_SEQUENCE +}; + /** * Enumeration for SI prefixes, such as "kilo". * @@ -297,60 +335,74 @@ class U_I18N_API MeasureUnit: public UObject { const char* getIdentifier() const; /** - * Creates a MeasureUnit which is this MeasureUnit augmented with the specified SI prefix. + * Compute the complexity of the unit. See UMeasureUnitComplexity for more information. + * + * @param status Set if an error occurs. + * @return The unit complexity. + * @draft ICU 67 + */ + UMeasureUnitComplexity getComplexity(UErrorCode& status) const; + + /** + * Creates a MeasureUnit which is this SINGLE unit augmented with the specified SI prefix. * For example, UMEASURE_SI_PREFIX_KILO for "kilo". * * There is sufficient locale data to format all standard SI prefixes. * - * This method only works if the MeasureUnit is composed of only one simple unit. An error - * will be set if called on a MeasureUnit containing multiple units. + * NOTE: Only works on SINGLE units. If this is a COMPOUND or SEQUENCE unit, an error will + * occur. For more information, see UMeasureUnitComplexity. * * @param prefix The SI prefix, from UMeasureSIPrefix. - * @param status ICU error code - * @return A new MeasureUnit. + * @param status Set if this is not a SINGLE unit or if another error occurs. + * @return A new SINGLE unit. */ MeasureUnit withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const; /** - * Gets the current SI prefix of this MeasureUnit. For example, if the unit has the SI prefix + * Gets the current SI prefix of this SINGLE unit. For example, if the unit has the SI prefix * "kilo", then UMEASURE_SI_PREFIX_KILO is returned. * - * If the MeasureUnit is composed of multiple simple units, the SI prefix of the first simple - * unit is returned. For example, from "centimeter-kilogram-per-second", the SI prefix - * UMEASURE_SI_PREFIX_CENTI will be returned. + * NOTE: Only works on SINGLE units. If this is a COMPOUND or SEQUENCE unit, an error will + * occur. For more information, see UMeasureUnitComplexity. * - * @return The SI prefix of the first simple unit, from UMeasureSIPrefix. + * @param status Set if this is not a SINGLE unit or if another error occurs. + * @return The SI prefix of this SINGLE unit, from UMeasureSIPrefix. */ UMeasureSIPrefix getSIPrefix(UErrorCode& status) const; /** - * Creates a MeasureUnit which is this MeasureUnit augmented with the specified power. For + * Creates a MeasureUnit which is this SINGLE unit augmented with the specified power. For * example, if power is 2, the unit will be squared. * - * This method only works if the MeasureUnit is composed of only one simple unit. An error - * will be set if called on a MeasureUnit containing multiple units. + * NOTE: Only works on SINGLE units. If this is a COMPOUND or SEQUENCE unit, an error will + * occur. For more information, see UMeasureUnitComplexity. * * @param power The power. - * @param status ICU error code - * @return A new MeasureUnit. + * @param status Set if this is not a SINGLE unit or if another error occurs. + * @return A new SINGLE unit. */ MeasureUnit withPower(int8_t power, UErrorCode& status) const; /** * Gets the power of this MeasureUnit. For example, if the unit is square, then 2 is returned. * - * If the MeasureUnit is composed of multiple simple units, the power of the first simple unit - * is returned. For example, from "cubic-meter-per-square-second", 3 is returned. + * NOTE: Only works on SINGLE units. If this is a COMPOUND or SEQUENCE unit, an error will + * occur. For more information, see UMeasureUnitComplexity. * - * @return The power of the first simple unit. + * @param status Set if this is not a SINGLE unit or if another error occurs. + * @return The power of this simple unit. */ int8_t getPower(UErrorCode& status) const; /** - * Gets the reciprocal of the unit, with the numerator and denominator flipped. + * Gets the reciprocal of this MeasureUnit, with the numerator and denominator flipped. * * For example, if the receiver is "meter-per-second", the unit "second-per-meter" is returned. * + * NOTE: Only works on SINGLE and COMPOUND units. If this is a SEQUENCE unit, an error will + * occur. For more information, see UMeasureUnitComplexity. + * + * @param status Set if this is a SEQUENCE unit or if another error occurs. * @return The reciprocal of the target unit. */ MeasureUnit reciprocal(UErrorCode& status) const; @@ -364,88 +416,42 @@ class U_I18N_API MeasureUnit: public UObject { * For example, if the receiver is "kilowatt" and the argument is "hour-per-day", then the * unit "kilowatt-hour-per-day" is returned. * + * NOTE: Only works on SINGLE and COMPOUND units. If either unit (receivee and argument) is a + * SEQUENCE unit, an error will occur. For more information, see UMeasureUnitComplexity. + * + * @param status Set if this or other is a SEQUENCE unit or if another error occurs. * @return The product of the target unit with the provided unit. */ MeasureUnit product(const MeasureUnit& other, UErrorCode& status) const; /** - * Gets the number of constituent simple units. + * Gets the list of single units contained within a compound unit. * - * For example, if the receiver is "meter-per-square-second", then 2 is returned, since there - * are two simple units: "meter" and "second". + * For example, given "meter-kilogram-per-second", three units will be returned: "meter", + * "kilogram", and "one-per-second". * - * @return The number of constituent units. + * If this is a SINGLE unit, an array of length 1 will be returned. + * + * NOTE: Only works on SINGLE and COMPOUND units. If this is a SEQUENCE unit, an error will + * occur. For more information, see UMeasureUnitComplexity. + * + * @param status Set if this is a SEQUENCE unit or if another error occurs. + * @return An array of single units, owned by the caller. */ - size_t getSimpleUnitCount(UErrorCode& status) const; - - LocalArray getSimpleUnits(UErrorCode& status) const; - - LocalArray getSequenceUnits(UErrorCode& status) const; + LocalArray getSingleUnits(UErrorCode& status) const; /** - * Gets the constituent unit at the given index. - * - * For example, to loop over all simple units: - * - *
-     * MeasureUnit unit(u"meter-per-square-second");
-     * for (size_t i = 0; i < unit.getSimpleUnitCount(); i++) {
-     *     std::cout << unit.simpleUnitAt(i).toString() << std::endl;
-     * }
-     * 
- * - * Expected output: meter, one-per-square-second - * - * @param index Zero-based index. If out of range, the dimensionless unit is returned. - * @return The constituent simple unit at the specified position. + * Gets the list of compound units contained within a sequence unit. + * + * For example, given "hour+minute+second", three units will be returned: "hour", "minute", + * and "second". + * + * If this is a SINGLE or COMPOUND unit, an array of length 1 will be returned. + * + * @param status Set of an error occurs. + * @return An array of compound units, owned by the caller. */ - MeasureUnit simpleUnitAt(size_t index, UErrorCode& status) const; - - /** - * Composes this unit with a super unit. - * - * A super unit, used for formatting only, should be a larger unit sharing the same dimension. - * For example, if the current unit is "inch", a super unit could be "foot", in order to - * render 71 inches as "5 feet, 11 inches". If the super unit is invalid, an error will occur - * during formatting. - * - * A unit can have multiple super units; for example, "second" could have both "minute" and - * "hour" as super units. - * - * Super units are ignored and left untouched in most other methods, such as withSIPrefix, - * withPower, and reciprocal. - * - * @param other The super unit to compose with the target unit. - * @return The composition of the given super unit with this unit. - */ - MeasureUnit withSuperUnit(const MeasureUnit& other, UErrorCode& status) const; - - /** - * Gets the number of super units in the receiver. - * - * For example, "foot+inch" has one super unit. - * - * @return The number of super units. - */ - size_t getSuperUnitCount(UErrorCode& status) const; - - /** - * Gets the super unit at the given index. - * - * For example, to loop over all super units: - * - *
-     * MeasureUnit unit(u"hour+minute+second");
-     * for (size_t i = 0; i < unit.getSuperUnitCount(); i++) {
-     *     std::cout << unit.superUnitAt(i).toCoreUnitIdentifier() << std::endl;
-     * }
-     * 
- * - * Expected output: hour, minute - * - * @return The super unit at the specified position. - */ - MeasureUnit superUnitAt(size_t index, UErrorCode& status) const; + LocalArray getCompoundUnits(UErrorCode& status) const; /** * getAvailable gets all of the available units. diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 5af03b77e64..b15ca5b0128 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -139,7 +139,7 @@ private: NumberFormat::EAlignmentFields field, int32_t start, int32_t end); - void verifyPowerUnit( + void verifySingleUnit( const MeasureUnit& unit, UMeasureSIPrefix siPrefix, int8_t power, @@ -3239,11 +3239,11 @@ void MeasureFormatTest::TestCompoundUnitOperations() { MeasureUnit centimeter2 = meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); MeasureUnit cubicDecimeter = cubicMeter.withSIPrefix(UMEASURE_SI_PREFIX_DECI, status); - verifyPowerUnit(kilometer, UMEASURE_SI_PREFIX_KILO, 1, "kilometer"); - verifyPowerUnit(meter, UMEASURE_SI_PREFIX_ONE, 1, "meter"); - verifyPowerUnit(centimeter1, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); - verifyPowerUnit(centimeter2, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); - verifyPowerUnit(cubicDecimeter, UMEASURE_SI_PREFIX_DECI, 3, "cubic-decimeter"); + verifySingleUnit(kilometer, UMEASURE_SI_PREFIX_KILO, 1, "kilometer"); + verifySingleUnit(meter, UMEASURE_SI_PREFIX_ONE, 1, "meter"); + verifySingleUnit(centimeter1, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); + verifySingleUnit(centimeter2, UMEASURE_SI_PREFIX_CENTI, 1, "centimeter"); + verifySingleUnit(cubicDecimeter, UMEASURE_SI_PREFIX_DECI, 3, "cubic-decimeter"); assertTrue("centimeter equality", centimeter1 == centimeter2); assertTrue("kilometer inequality", centimeter1 != kilometer); @@ -3253,10 +3253,10 @@ void MeasureFormatTest::TestCompoundUnitOperations() { MeasureUnit quarticKilometer = kilometer.withPower(4, status); MeasureUnit overQuarticKilometer1 = kilometer.withPower(-4, status); - verifyPowerUnit(squareMeter, UMEASURE_SI_PREFIX_ONE, 2, "square-meter"); - verifyPowerUnit(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "one-per-cubic-centimeter"); - verifyPowerUnit(quarticKilometer, UMEASURE_SI_PREFIX_KILO, 4, "p4-kilometer"); - verifyPowerUnit(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifySingleUnit(squareMeter, UMEASURE_SI_PREFIX_ONE, 2, "square-meter"); + verifySingleUnit(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "one-per-cubic-centimeter"); + verifySingleUnit(quarticKilometer, UMEASURE_SI_PREFIX_KILO, 4, "p4-kilometer"); + verifySingleUnit(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); assertTrue("power inequality", quarticKilometer != overQuarticKilometer1); @@ -3266,8 +3266,8 @@ void MeasureFormatTest::TestCompoundUnitOperations() { .product(kilometer, status) .reciprocal(status); - verifyPowerUnit(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); - verifyPowerUnit(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifySingleUnit(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifySingleUnit(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); @@ -3281,7 +3281,7 @@ void MeasureFormatTest::TestCompoundUnitOperations() { MeasureUnit secondCentimeter = kiloSquareSecond.product(meter.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status), status); MeasureUnit secondCentimeterPerKilometer = secondCentimeter.product(kilometer.reciprocal(status), status); - verifyPowerUnit(kiloSquareSecond, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); + verifySingleUnit(kiloSquareSecond, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); const char* meterSecondSub[] = {"meter", "square-kilosecond"}; verifyCompoundUnit(meterSecond, "meter-square-kilosecond", meterSecondSub, UPRV_LENGTHOF(meterSecondSub)); @@ -3387,12 +3387,12 @@ void MeasureFormatTest::verifyFormat( } } -void MeasureFormatTest::verifyPowerUnit( +void MeasureFormatTest::verifySingleUnit( const MeasureUnit& unit, UMeasureSIPrefix siPrefix, int8_t power, const char* identifier) { - IcuTestErrorCode status(*this, "verifyPowerUnit"); + IcuTestErrorCode status(*this, "verifySingleUnit"); UnicodeString uid(identifier, -1, US_INV); assertEquals(uid + ": SI prefix", siPrefix, @@ -3409,6 +3409,10 @@ void MeasureFormatTest::verifyPowerUnit( assertTrue(uid + ": Constructor", unit == MeasureUnit::forIdentifier(identifier, status)); status.errIfFailureAndReset("%s: Constructor", identifier); + assertEquals(uid + ": Complexity", + UMEASURE_UNIT_SINGLE, + unit.getComplexity(status)); + status.errIfFailureAndReset("%s: Complexity", identifier); } void MeasureFormatTest::verifyCompoundUnit( @@ -3425,14 +3429,21 @@ void MeasureFormatTest::verifyCompoundUnit( assertTrue(uid + ": Constructor", unit == MeasureUnit::forIdentifier(identifier, status)); status.errIfFailureAndReset("%s: Constructor", identifier); + assertEquals(uid + ": Complexity", + UMEASURE_UNIT_COMPOUND, + unit.getComplexity(status)); + status.errIfFailureAndReset("%s: Complexity", identifier); - LocalArray subUnits = unit.getSimpleUnits(status); + LocalArray subUnits = unit.getSingleUnits(status); assertEquals(uid + ": Length", subIdentifierCount, subUnits.length()); for (int32_t i = 0;; i++) { if (i >= subIdentifierCount || i >= subUnits.length()) break; assertEquals(uid + ": Sub-unit #" + Int64ToUnicodeString(i), subIdentifiers[i], subUnits[i].getIdentifier()); + assertEquals(uid + ": Sub-unit Complexity", + UMEASURE_UNIT_SINGLE, + subUnits[i].getComplexity(status)); } } From 6369aba35a853390e6434111d81016cd8b2ed5cd Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 17 Jan 2020 00:24:56 +0100 Subject: [PATCH 13/15] Implementing sequence units and fixing bugs --- icu4c/source/common/unicode/localpointer.h | 25 ++- icu4c/source/i18n/measunit_extra.cpp | 233 +++++++++++++-------- icu4c/source/test/intltest/measfmttest.cpp | 106 ++++++++++ 3 files changed, 272 insertions(+), 92 deletions(-) diff --git a/icu4c/source/common/unicode/localpointer.h b/icu4c/source/common/unicode/localpointer.h index 01f4205f99b..b6f8b15c7fb 100644 --- a/icu4c/source/common/unicode/localpointer.h +++ b/icu4c/source/common/unicode/localpointer.h @@ -378,9 +378,9 @@ public: * @param p simple pointer to an array of T objects that is adopted * @stable ICU 4.4 */ - explicit LocalArray(T *p=NULL) : LocalPointerBase(p) {} + explicit LocalArray(T *p=nullptr) : LocalPointerBase(p), fLength(p==nullptr?0:-1) {} /** - * Constructor takes ownership and reports an error if NULL. + * Constructor takes ownership and reports an error if nullptr. * * This constructor is intended to be used with other-class constructors * that may report a failure UErrorCode, @@ -389,11 +389,11 @@ public: * * @param p simple pointer to an array of T objects that is adopted * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR - * if p==NULL and no other failure code had been set + * if p==nullptr and no other failure code had been set * @stable ICU 56 */ - LocalArray(T *p, UErrorCode &errorCode) : LocalPointerBase(p) { - if(p==NULL && U_SUCCESS(errorCode)) { + LocalArray(T *p, UErrorCode &errorCode) : LocalArray(p) { + if(p==nullptr && U_SUCCESS(errorCode)) { errorCode=U_MEMORY_ALLOCATION_ERROR; } } @@ -402,8 +402,8 @@ public: * @param src source smart pointer * @stable ICU 56 */ - LocalArray(LocalArray &&src) U_NOEXCEPT : LocalPointerBase(src.ptr) { - src.ptr=NULL; + LocalArray(LocalArray &&src) U_NOEXCEPT : LocalArray(src.ptr) { + src.ptr=nullptr; } static LocalArray withLength(T *p, int32_t length) { @@ -422,7 +422,7 @@ public: * @draft ICU 64 */ explicit LocalArray(std::unique_ptr &&p) - : LocalPointerBase(p.release()) {} + : LocalArray(p.release()) {} #endif /* U_HIDE_DRAFT_API */ /** @@ -442,7 +442,8 @@ public: LocalArray &operator=(LocalArray &&src) U_NOEXCEPT { delete[] LocalPointerBase::ptr; LocalPointerBase::ptr=src.ptr; - src.ptr=NULL; + src.ptr=nullptr; + fLength=src.fLength; return *this; } @@ -457,6 +458,7 @@ public: */ LocalArray &operator=(std::unique_ptr &&p) U_NOEXCEPT { adoptInstead(p.release()); + fLength=-1; return *this; } #endif /* U_HIDE_DRAFT_API */ @@ -470,6 +472,9 @@ public: T *temp=LocalPointerBase::ptr; LocalPointerBase::ptr=other.ptr; other.ptr=temp; + int32_t tempLength=fLength; + fLength=other.fLength; + other.fLength=tempLength; } /** * Non-member LocalArray swap function. @@ -489,6 +494,7 @@ public: void adoptInstead(T *p) { delete[] LocalPointerBase::ptr; LocalPointerBase::ptr=p; + fLength=-1; } /** * Deletes the array it owns, @@ -515,6 +521,7 @@ public: } else { delete[] p; } + fLength=-1; } /** * Array item access (writable). diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index e39d6dab697..0329c68f13c 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -312,9 +312,14 @@ struct SingleUnit { int8_t power = 1; UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; int32_t simpleUnitIndex = 0; - StringPiece id; + StringPiece id = "one"; void appendTo(CharString& builder, UErrorCode& status) const { + if (simpleUnitIndex == 0) { + // Don't propagate SI prefixes and powers on one + builder.append("one", status); + return; + } int8_t posPower = power < 0 ? -power : power; if (posPower == 0) { status = U_ILLEGAL_ARGUMENT_ERROR; @@ -353,6 +358,18 @@ struct SingleUnit { builder.append(id, status); } + + char* build(UErrorCode& status) { + if (U_FAILURE(status)) { + return nullptr; + } + CharString builder; + if (power < 0) { + builder.append("one-per-", status); + } + appendTo(builder, status); + return builder.cloneData(status); + } }; class CompoundUnit { @@ -367,7 +384,7 @@ public: } } - void reciprocal() { + void takeReciprocal() { auto temp = std::move(numerator); numerator = std::move(denominator); denominator = std::move(temp); @@ -377,14 +394,23 @@ public: if (numerator.length() == 0) { builder.append("one", status); } else { - appendToImpl(numerator, numerator.length(), builder, status); + appendToImpl(numerator, builder, status); } if (denominator.length() > 0) { builder.append("-per-", status); - appendToImpl(denominator, denominator.length(), builder, status); + appendToImpl(denominator, builder, status); } } + char* build(UErrorCode& status) { + if (U_FAILURE(status)) { + return nullptr; + } + CharString builder; + appendTo(builder, status); + return builder.cloneData(status); + } + const SingleUnitList& getNumeratorUnits() const { return numerator; } @@ -397,12 +423,17 @@ public: return numerator.length() + denominator.length() == 1; } + bool isEmpty() const { + return numerator.length() + denominator.length() == 0; + } + private: SingleUnitList numerator; SingleUnitList denominator; - void appendToImpl(const SingleUnitList& unitList, int32_t len, CharString& builder, UErrorCode& status) const { + void appendToImpl(const SingleUnitList& unitList, CharString& builder, UErrorCode& status) const { bool first = true; + int32_t len = unitList.length(); for (int32_t i = 0; i < len; i++) { if (first) { first = false; @@ -433,17 +464,63 @@ private: } }; -class UnitIdentifierParser { +class SequenceUnit { public: - static UnitIdentifierParser from(StringPiece source, UErrorCode& status) { + typedef MaybeStackVector CompoundUnitList; + + void append(CompoundUnit&& compoundUnit, UErrorCode& status) { + CompoundUnit* destination = units.emplaceBack(); + if (!destination) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + *destination = std::move(compoundUnit); + } + + void appendTo(CharString& builder, UErrorCode& status) const { + if (units.length() == 0) { + builder.append("one", status); + return; + } + bool isFirst = true; + for (int32_t i = 0; i < units.length(); i++) { + if (isFirst) { + isFirst = false; + } else { + builder.append('+', status); + } + units[i]->appendTo(builder, status); + } + } + + char* build(UErrorCode& status) { if (U_FAILURE(status)) { - return UnitIdentifierParser(); + return nullptr; + } + CharString builder; + appendTo(builder, status); + return builder.cloneData(status); + } + + const CompoundUnitList& getUnits() const { + return units; + } + +private: + CompoundUnitList units; +}; + +class Parser { +public: + static Parser from(StringPiece source, UErrorCode& status) { + if (U_FAILURE(status)) { + return Parser(); } umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status); if (U_FAILURE(status)) { - return UnitIdentifierParser(); + return Parser(); } - return UnitIdentifierParser(source); + return Parser(source); } bool hasNext() const { @@ -474,7 +551,10 @@ public: int32_t previ = fIndex; SingleUnit singleUnit; nextSingleUnit(singleUnit, sawPlus, status); - if (sawPlus) { + if (U_FAILURE(status)) { + return; + } + if (sawPlus && !result.isEmpty()) { fIndex = previ; break; } @@ -497,6 +577,19 @@ public: return retval; } + SequenceUnit getOnlySequenceUnit(UErrorCode& status) { + SequenceUnit retval; + while (hasNext()) { + CompoundUnit compoundUnit; + nextCompoundUnit(compoundUnit, status); + if (U_FAILURE(status)) { + return retval; + } + retval.append(std::move(compoundUnit), status); + } + return retval; + } + private: int32_t fIndex = 0; StringPiece fSource; @@ -504,9 +597,9 @@ private: bool fAfterPer = false; - UnitIdentifierParser() : fSource(""), fTrie(u"") {} + Parser() : fSource(""), fTrie(u"") {} - UnitIdentifierParser(StringPiece source) + Parser(StringPiece source) : fSource(source), fTrie(kSerializedUnitExtrasStemTrie) {} Token nextToken(UErrorCode& status) { @@ -547,6 +640,11 @@ private: return; } + if (!hasNext()) { + // probably "one" + return; + } + // state: // 0 = no tokens seen yet (will accept power, SI prefix, or simple unit) // 1 = power token seen (will not accept another power token) @@ -634,44 +732,22 @@ private: MeasureUnit MeasureUnit::forIdentifier(const char* identifier, UErrorCode& status) { - UnitIdentifierParser parser = UnitIdentifierParser::from(identifier, status); - if (U_FAILURE(status)) { - // Unrecoverable error - return MeasureUnit(); - } - - CharString builder; - while (parser.hasNext()) { - CompoundUnit compoundUnit; - parser.nextCompoundUnit(compoundUnit, status); - if (U_FAILURE(status)) { - // Invalid syntax - return MeasureUnit(); - } - if (builder.length() > 0) { - builder.append('+', status); - } - compoundUnit.appendTo(builder, status); - } - - // Success - return MeasureUnit(builder.cloneData(status)); + return Parser::from(identifier, status).getOnlySequenceUnit(status).build(status); } UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const { const char* id = getIdentifier(); - UnitIdentifierParser parser = UnitIdentifierParser::from(id, status); + Parser parser = Parser::from(id, status); if (U_FAILURE(status)) { - // Unrecoverable error return UMEASURE_UNIT_SINGLE; } CompoundUnit compoundUnit; parser.nextCompoundUnit(compoundUnit, status); - if (compoundUnit.isSingle()) { - return UMEASURE_UNIT_SINGLE; - } else if (parser.hasNext()) { + if (parser.hasNext()) { return UMEASURE_UNIT_SEQUENCE; + } else if (compoundUnit.isSingle()) { + return UMEASURE_UNIT_SINGLE; } else { return UMEASURE_UNIT_COMPOUND; } @@ -679,65 +755,44 @@ UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const { UMeasureSIPrefix MeasureUnit::getSIPrefix(UErrorCode& status) const { const char* id = getIdentifier(); - return UnitIdentifierParser::from(id, status).getOnlySingleUnit(status).siPrefix; + return Parser::from(id, status).getOnlySingleUnit(status).siPrefix; } MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const { const char* id = getIdentifier(); - SingleUnit singleUnit = UnitIdentifierParser::from(id, status).getOnlySingleUnit(status); - if (U_FAILURE(status)) { - return *this; - } - + SingleUnit singleUnit = Parser::from(id, status).getOnlySingleUnit(status); singleUnit.siPrefix = prefix; - CharString builder; - singleUnit.appendTo(builder, status); - return MeasureUnit(builder.cloneData(status)); + return singleUnit.build(status); } int8_t MeasureUnit::getPower(UErrorCode& status) const { const char* id = getIdentifier(); - return UnitIdentifierParser::from(id, status).getOnlySingleUnit(status).power; + return Parser::from(id, status).getOnlySingleUnit(status).power; } MeasureUnit MeasureUnit::withPower(int8_t power, UErrorCode& status) const { const char* id = getIdentifier(); - SingleUnit singleUnit = UnitIdentifierParser::from(id, status).getOnlySingleUnit(status); - if (U_FAILURE(status)) { - return *this; - } - - CharString builder; + SingleUnit singleUnit = Parser::from(id, status).getOnlySingleUnit(status); singleUnit.power = power; - if (power < 0) { - builder.append("one-per-", status); - } - singleUnit.appendTo(builder, status); - return MeasureUnit(builder.cloneData(status)); + return singleUnit.build(status); } MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const { const char* id = getIdentifier(); - CompoundUnit compoundUnit = UnitIdentifierParser::from(id, status).getOnlyCompoundUnit(status); - if (U_FAILURE(status)) { - return *this; - } - - compoundUnit.reciprocal(); - CharString builder; - compoundUnit.appendTo(builder, status); - return MeasureUnit(builder.cloneData(status)); + CompoundUnit compoundUnit = Parser::from(id, status).getOnlyCompoundUnit(status); + compoundUnit.takeReciprocal(); + return compoundUnit.build(status); } MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const { const char* id = getIdentifier(); - CompoundUnit compoundUnit = UnitIdentifierParser::from(id, status).getOnlyCompoundUnit(status); + CompoundUnit compoundUnit = Parser::from(id, status).getOnlyCompoundUnit(status); if (U_FAILURE(status)) { return *this; } // Append other's first CompoundUnit to compoundUnit, then assert other has only one - UnitIdentifierParser otherParser = UnitIdentifierParser::from(other.getIdentifier(), status); + Parser otherParser = Parser::from(other.getIdentifier(), status); otherParser.nextCompoundUnit(compoundUnit, status); if (U_FAILURE(status)) { return *this; @@ -747,16 +802,14 @@ MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) c return *this; } - CharString builder; - compoundUnit.appendTo(builder, status); - return MeasureUnit(builder.cloneData(status)); + return compoundUnit.build(status); } LocalArray MeasureUnit::getSingleUnits(UErrorCode& status) const { const char* id = getIdentifier(); - CompoundUnit compoundUnit = UnitIdentifierParser::from(id, status).getOnlyCompoundUnit(status); + CompoundUnit compoundUnit = Parser::from(id, status).getOnlyCompoundUnit(status); if (U_FAILURE(status)) { - return LocalArray::withLength(nullptr, 0); + return LocalArray(); } const CompoundUnit::SingleUnitList& numerator = compoundUnit.getNumeratorUnits(); @@ -764,16 +817,30 @@ LocalArray MeasureUnit::getSingleUnits(UErrorCode& status) const { int32_t count = numerator.length() + denominator.length(); MeasureUnit* arr = new MeasureUnit[count]; - CharString builder; int32_t i = 0; for (int32_t j = 0; j < numerator.length(); j++) { - numerator[j]->appendTo(builder.clear(), status); - arr[i++] = MeasureUnit(builder.cloneData(status)); + arr[i++] = numerator[j]->build(status); } for (int32_t j = 0; j < denominator.length(); j++) { - builder.clear().append("one-per-", status); - denominator[j]->appendTo(builder, status); - arr[i++] = MeasureUnit(builder.cloneData(status)); + arr[i++] = denominator[j]->build(status); + } + + return LocalArray::withLength(arr, count); +} + +LocalArray MeasureUnit::getCompoundUnits(UErrorCode& status) const { + const char* id = getIdentifier(); + SequenceUnit sequenceUnit = Parser::from(id, status).getOnlySequenceUnit(status); + if (U_FAILURE(status)) { + return LocalArray(); + } + + const SequenceUnit::CompoundUnitList& unitVector = sequenceUnit.getUnits(); + int32_t count = unitVector.length(); + MeasureUnit* arr = new MeasureUnit[count]; + + for (int32_t i = 0; i < count; i++) { + arr[i] = unitVector[i]->build(status); } return LocalArray::withLength(arr, count); diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index b15ca5b0128..06d65c1adf4 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -79,6 +79,7 @@ private: void Test20332_PersonUnits(); void TestNumericTime(); void TestNumericTimeSomeSpecialFormats(); + void TestInvalidIdentifiers(); void TestCompoundUnitOperations(); void verifyFormat( const char *description, @@ -149,6 +150,11 @@ private: const char* identifier, const char** subIdentifiers, int32_t subIdentifierCount); + void verifySequenceUnit( + const MeasureUnit& unit, + const char* identifier, + const char** subIdentifiers, + int32_t subIdentifierCount); }; void MeasureFormatTest::runIndexedTest( @@ -193,6 +199,7 @@ void MeasureFormatTest::runIndexedTest( TESTCASE_AUTO(Test20332_PersonUnits); TESTCASE_AUTO(TestNumericTime); TESTCASE_AUTO(TestNumericTimeSomeSpecialFormats); + TESTCASE_AUTO(TestInvalidIdentifiers); TESTCASE_AUTO(TestCompoundUnitOperations); TESTCASE_AUTO_END; } @@ -3227,6 +3234,34 @@ void MeasureFormatTest::TestNumericTimeSomeSpecialFormats() { verifyFormat("Danish fhoursFminutes", fmtDa, fhoursFminutes, 2, "2.03,877"); } +void MeasureFormatTest::TestInvalidIdentifiers() { + IcuTestErrorCode status(*this, "TestInvalidIdentifiers"); + + const char* const inputs[] = { + "kilo", + "kilokilo", + "onekilo", + "meterkilo", + "meter-kilo", + "k", + "meter-", + "meter+", + "-meter", + "+meter", + "-kilometer", + "+kilometer", + "-p2-meter", + "+p2-meter", + "+", + "-" + }; + + for (const auto& input : inputs) { + MeasureUnit::forIdentifier(input, status); + status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + } +} + void MeasureFormatTest::TestCompoundUnitOperations() { IcuTestErrorCode status(*this, "TestCompoundUnitOperations"); @@ -3265,12 +3300,17 @@ void MeasureFormatTest::TestCompoundUnitOperations() { .product(kilometer, status) .product(kilometer, status) .reciprocal(status); + MeasureUnit overQuarticKilometer4 = meter.withPower(4, status) + .reciprocal(status) + .withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); verifySingleUnit(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); verifySingleUnit(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifySingleUnit(overQuarticKilometer4, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); + assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer4); MeasureUnit kiloSquareSecond = MeasureUnit::getSecond() .withPower(2, status).withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); @@ -3314,6 +3354,43 @@ void MeasureFormatTest::TestCompoundUnitOperations() { status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); meterSecond.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + + MeasureUnit footInch = MeasureUnit::forIdentifier("foot+inch", status); + MeasureUnit inchFoot = MeasureUnit::forIdentifier("inch+foot", status); + + const char* footInchSub[] = {"foot", "inch"}; + verifySequenceUnit(footInch, "foot+inch", + footInchSub, UPRV_LENGTHOF(footInchSub)); + const char* inchFootSub[] = {"inch", "foot"}; + verifySequenceUnit(inchFoot, "inch+foot", + inchFootSub, UPRV_LENGTHOF(inchFootSub)); + + assertTrue("order matters inequality", footInch != inchFoot); + + // TODO(ICU-20920): Enable the one1 tests when the dimensionless base unit ID is updated + // MeasureUnit one1; + MeasureUnit one2 = MeasureUnit::forIdentifier("one", status); + MeasureUnit one3 = MeasureUnit::forIdentifier("", status); + MeasureUnit squareOne = one2.withPower(2, status); + MeasureUnit onePerOne = one2.reciprocal(status); + MeasureUnit squareKiloOne = squareOne.withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); + MeasureUnit onePerSquareKiloOne = squareKiloOne.reciprocal(status); + MeasureUnit oneOne = MeasureUnit::forIdentifier("one-one", status); + MeasureUnit onePlusOne = MeasureUnit::forIdentifier("one+one", status); + + // verifySingleUnit(one1, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(one2, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(one3, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(squareOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(onePerOne, UMEASURE_SI_PREFIX_ONE, -1, "one-per-one"); + verifySingleUnit(squareKiloOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(onePerSquareKiloOne, UMEASURE_SI_PREFIX_ONE, -1, "one-per-one"); + verifySingleUnit(oneOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); + verifySingleUnit(onePlusOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); + + // assertTrue("one equality", one1 == one2); + assertTrue("one equality", one2 == one3); + assertTrue("one-per-one equality", onePerOne == onePerSquareKiloOne); } @@ -3447,6 +3524,35 @@ void MeasureFormatTest::verifyCompoundUnit( } } +void MeasureFormatTest::verifySequenceUnit( + const MeasureUnit& unit, + const char* identifier, + const char** subIdentifiers, + int32_t subIdentifierCount) { + IcuTestErrorCode status(*this, "verifySequenceUnit"); + UnicodeString uid(identifier, -1, US_INV); + assertEquals(uid + ": Identifier", + identifier, + unit.getIdentifier()); + status.errIfFailureAndReset("%s: Identifier", identifier); + assertTrue(uid + ": Constructor", + unit == MeasureUnit::forIdentifier(identifier, status)); + status.errIfFailureAndReset("%s: Constructor", identifier); + assertEquals(uid + ": Complexity", + UMEASURE_UNIT_SEQUENCE, + unit.getComplexity(status)); + status.errIfFailureAndReset("%s: Complexity", identifier); + + LocalArray subUnits = unit.getCompoundUnits(status); + assertEquals(uid + ": Length", subIdentifierCount, subUnits.length()); + for (int32_t i = 0;; i++) { + if (i >= subIdentifierCount || i >= subUnits.length()) break; + assertEquals(uid + ": Sub-unit #" + Int64ToUnicodeString(i), + subIdentifiers[i], + subUnits[i].getIdentifier()); + } +} + extern IntlTest *createMeasureFormatTest() { return new MeasureFormatTest(); } From 2ea62fea164a23833b0a0d37d90464c0be67a445 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 17 Jan 2020 17:13:16 +0100 Subject: [PATCH 14/15] Change public method to use StringPiece --- icu4c/source/i18n/measunit_extra.cpp | 2 +- icu4c/source/i18n/unicode/measunit.h | 2 +- icu4c/source/test/intltest/measfmttest.cpp | 5 +++++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index 0329c68f13c..a7f24f1e348 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -731,7 +731,7 @@ private: } // namespace -MeasureUnit MeasureUnit::forIdentifier(const char* identifier, UErrorCode& status) { +MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode& status) { return Parser::from(identifier, status).getOnlySequenceUnit(status).build(status); } diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index cdf760fdbdb..4ec0f62684f 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -265,7 +265,7 @@ class U_I18N_API MeasureUnit: public UObject { * @param status Set if the identifier is invalid. * @draft ICU 67 */ - static MeasureUnit forIdentifier(const char* identifier, UErrorCode& status); + static MeasureUnit forIdentifier(StringPiece identifier, UErrorCode& status); /** * Copy assignment operator. diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 06d65c1adf4..4f65bc619fd 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -3355,6 +3355,11 @@ void MeasureFormatTest::TestCompoundUnitOperations() { meterSecond.withSIPrefix(UMEASURE_SI_PREFIX_CENTI, status); status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); + // Test that StringPiece does not overflow + MeasureUnit kiloSquareSecond2 = MeasureUnit::forIdentifier({secondCentimeter.getIdentifier(), 17}, status); + verifySingleUnit(kiloSquareSecond2, UMEASURE_SI_PREFIX_KILO, 2, "square-kilosecond"); + assertTrue("string piece equality", kiloSquareSecond == kiloSquareSecond2); + MeasureUnit footInch = MeasureUnit::forIdentifier("foot+inch", status); MeasureUnit inchFoot = MeasureUnit::forIdentifier("inch+foot", status); From 778565e94ec01e3e49459dedf273bca600420774 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 17 Jan 2020 17:32:52 +0100 Subject: [PATCH 15/15] Move measunit_extra.o to avoid merge conflict --- icu4c/source/i18n/Makefile.in | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/icu4c/source/i18n/Makefile.in b/icu4c/source/i18n/Makefile.in index f6f9860fb3a..8f197d89b1a 100644 --- a/icu4c/source/i18n/Makefile.in +++ b/icu4c/source/i18n/Makefile.in @@ -97,7 +97,7 @@ uspoof.o uspoof_impl.o uspoof_build.o uspoof_conf.o smpdtfst.o \ ztrans.o zrule.o vzone.o fphdlimp.o fpositer.o ufieldpositer.o \ decNumber.o decContext.o alphaindex.o tznames.o tznames_impl.o tzgnames.o \ tzfmt.o compactdecimalformat.o gender.o region.o scriptset.o \ -uregion.o reldatefmt.o quantityformatter.o measunit.o \ +uregion.o reldatefmt.o quantityformatter.o measunit.o measunit_extra.o \ sharedbreakiterator.o scientificnumberformatter.o dayperiodrules.o nounit.o \ number_affixutils.o number_compact.o number_decimalquantity.o \ number_decimfmtprops.o number_fluent.o number_formatimpl.o number_grouping.o \ @@ -114,8 +114,7 @@ numparse_symbols.o numparse_decimal.o numparse_scientific.o numparse_currency.o numparse_affixes.o numparse_compositions.o numparse_validators.o \ numrange_fluent.o numrange_impl.o \ erarules.o \ -formattedvalue.o formattedval_iterimpl.o formattedval_sbimpl.o formatted_string_builder.o \ -measunit_extra.o +formattedvalue.o formattedval_iterimpl.o formattedval_sbimpl.o formatted_string_builder.o ## Header files to install HEADERS = $(srcdir)/unicode/*.h