From 99f9802fec08d1479dc4bfcc0776a8c2a7285c68 Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe <17109322+hugovdm@users.noreply.github.com> Date: Fri, 3 Apr 2020 04:35:29 +0200 Subject: [PATCH 01/14] ICU-21060 Fix the foo-per-a-b -> foo-b-per-a bug. --- icu4c/source/i18n/measunit_extra.cpp | 25 ++++++++++++---------- icu4c/source/test/intltest/measfmttest.cpp | 4 +++- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index 8af9e4141f3..b997167b311 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -42,7 +42,7 @@ constexpr int32_t kCompoundPartOffset = 128; enum CompoundPart { COMPOUND_PART_PER = kCompoundPartOffset, COMPOUND_PART_TIMES, - COMPOUND_PART_PLUS, + COMPOUND_PART_AND, }; constexpr int32_t kPowerPartOffset = 256; @@ -226,7 +226,7 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) { // Add syntax parts (compound, power prefixes) b.add(u"-per-", COMPOUND_PART_PER, status); b.add(u"-", COMPOUND_PART_TIMES, status); - b.add(u"-and-", COMPOUND_PART_PLUS, status); + b.add(u"-and-", COMPOUND_PART_AND, status); b.add(u"square-", POWER_PART_P2, status); b.add(u"cubic-", POWER_PART_P3, status); b.add(u"p2-", POWER_PART_P2, status); @@ -383,8 +383,8 @@ private: return Token(match); } - void nextSingleUnit(SingleUnitImpl& result, bool& sawPlus, UErrorCode& status) { - sawPlus = false; + void nextSingleUnit(SingleUnitImpl& result, bool& sawAnd, UErrorCode& status) { + sawAnd = false; if (U_FAILURE(status)) { return; } @@ -422,10 +422,13 @@ private: break; case COMPOUND_PART_TIMES: + if (fAfterPer) { + result.dimensionality = -1; + } break; - case COMPOUND_PART_PLUS: - sawPlus = true; + case COMPOUND_PART_AND: + sawAnd = true; fAfterPer = false; break; } @@ -462,7 +465,7 @@ private: case Token::TYPE_ONE: // Skip "one" and go to the next unit - return nextSingleUnit(result, sawPlus, status); + return nextSingleUnit(result, sawAnd, status); case Token::TYPE_SIMPLE_UNIT: result.index = token.getSimpleUnitIndex(); @@ -485,9 +488,9 @@ private: } int32_t unitNum = 0; while (hasNext()) { - bool sawPlus; + bool sawAnd; SingleUnitImpl singleUnit; - nextSingleUnit(singleUnit, sawPlus, status); + nextSingleUnit(singleUnit, sawAnd, status); if (U_FAILURE(status)) { return; } @@ -495,13 +498,13 @@ private: continue; } bool added = result.append(singleUnit, status); - if (sawPlus && !added) { + if (sawAnd && !added) { // Two similar units are not allowed in a mixed unit status = kUnitIdentifierSyntaxError; return; } if ((++unitNum) >= 2) { - UMeasureUnitComplexity complexity = sawPlus + UMeasureUnitComplexity complexity = sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND; if (unitNum == 2) { diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 6f6c14cc8df..9306a5660bc 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -3427,7 +3427,9 @@ void MeasureFormatTest::TestIdentifiers() { const char* id; const char* normalized; } cases[] = { - { true, "square-meter-per-square-meter", "square-meter-per-square-meter" }, + {true, "square-meter-per-square-meter", "square-meter-per-square-meter"}, + {true, "kilogram-meter-per-square-meter-square-second", + "kilogram-meter-per-square-meter-square-second"}, // TODO(ICU-20920): Add more test cases once the proper ranking is available. }; for (const auto& cas : cases) { From cb544f47e075f1b5b71714d7ffdee0ad12cfe77c Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe <17109322+hugovdm@users.noreply.github.com> Date: Tue, 31 Mar 2020 13:58:16 +0200 Subject: [PATCH 02/14] ICU-21060 Fix heap-use-after-free bug. --- icu4c/source/i18n/measunit_extra.cpp | 19 +++++++++---- icu4c/source/i18n/measunit_impl.h | 3 -- icu4c/source/test/intltest/measfmttest.cpp | 32 ++++++++++++++++++++++ 3 files changed, 45 insertions(+), 9 deletions(-) diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index b997167b311..e43ee6597d8 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -321,6 +321,14 @@ private: class Parser { public: + /** + * Factory function for parsing the given identifier. + * + * @param source The identifier to parse. This function does not make a copy + * of source: the underlying string that source points at, must outlive the + * parser. + * @param status ICU error code. + */ static Parser from(StringPiece source, UErrorCode& status) { if (U_FAILURE(status)) { return Parser(); @@ -340,6 +348,10 @@ public: private: int32_t fIndex = 0; + + // Since we're not owning this memory, whatever is passed to the constructor + // should live longer than this Parser - and the parser shouldn't return any + // references to that string. StringPiece fSource; UCharsTrie fTrie; @@ -399,7 +411,6 @@ private: // 1 = power token seen (will not accept another power token) // 2 = SI prefix token seen (will not accept a power or SI prefix token) int32_t state = 0; - int32_t previ = fIndex; // Maybe read a compound part if (fIndex != 0) { @@ -432,7 +443,6 @@ private: fAfterPer = false; break; } - previ = fIndex; } // Read a unit @@ -449,7 +459,6 @@ private: return; } result.dimensionality *= token.getPower(); - previ = fIndex; state = 1; break; @@ -459,7 +468,6 @@ private: return; } result.siPrefix = token.getSIPrefix(); - previ = fIndex; state = 2; break; @@ -469,7 +477,6 @@ private: case Token::TYPE_SIMPLE_UNIT: result.index = token.getSimpleUnitIndex(); - result.identifier = fSource.substr(previ, fIndex - previ); return; default: @@ -576,7 +583,7 @@ void serializeSingle(const SingleUnitImpl& singleUnit, bool first, CharString& o return; } - output.append(singleUnit.identifier, status); + output.appendInvariantChars(gSimpleUnits[singleUnit.index], status); } /** diff --git a/icu4c/source/i18n/measunit_impl.h b/icu4c/source/i18n/measunit_impl.h index cf0ea63d2af..9657ff2c1d9 100644 --- a/icu4c/source/i18n/measunit_impl.h +++ b/icu4c/source/i18n/measunit_impl.h @@ -69,9 +69,6 @@ struct SingleUnitImpl : public UMemory { /** Simple unit index, unique for every simple unit. */ int32_t index = 0; - /** Simple unit identifier; memory not owned by the SimpleUnit. */ - StringPiece identifier; - /** SI prefix. **/ UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 9306a5660bc..79eb9b461a0 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -82,6 +82,7 @@ private: void TestInvalidIdentifiers(); void TestCompoundUnitOperations(); void TestIdentifiers(); + void Test21060_AddressSanitizerProblem(); void verifyFormat( const char *description, @@ -204,6 +205,7 @@ void MeasureFormatTest::runIndexedTest( TESTCASE_AUTO(TestInvalidIdentifiers); TESTCASE_AUTO(TestCompoundUnitOperations); TESTCASE_AUTO(TestIdentifiers); + TESTCASE_AUTO(Test21060_AddressSanitizerProblem); TESTCASE_AUTO_END; } @@ -3445,6 +3447,36 @@ void MeasureFormatTest::TestIdentifiers() { } } +// ICU-21060 +void MeasureFormatTest::Test21060_AddressSanitizerProblem() { + UErrorCode status = U_ZERO_ERROR; + MeasureUnit first = MeasureUnit::forIdentifier("one", status); + + // Experimentally, a compound unit like "kilogram-meter" failed. A single + // unit like "kilogram" or "meter" did not fail, did not trigger the + // problem. + MeasureUnit crux = MeasureUnit::forIdentifier("one-per-meter", status); + + // Heap allocation of a new CharString for first.identifier happens here: + first = first.product(crux, status); + + // Constructing second from first's identifier resulted in a failure later, + // as second held a reference to a substring of first's identifier: + MeasureUnit second = MeasureUnit::forIdentifier(first.getIdentifier(), status); + + // Heap is freed here, as an old first.identifier CharString is deallocated + // and a new CharString is allocated: + first = first.product(crux, status); + + // Proving we've had no failure yet: + if (U_FAILURE(status)) return; + + // heap-use-after-free failure happened here, since a SingleUnitImpl had + // held onto a StringPiece pointing at a substring of an identifier that was + // freed above: + second = second.product(crux, status); +} + void MeasureFormatTest::verifyFieldPosition( const char *description, From 79248911dc04c38b2bd77b15678115c17eb8fa43 Mon Sep 17 00:00:00 2001 From: Frank Tang Date: Mon, 6 Apr 2020 14:13:02 -0700 Subject: [PATCH 03/14] ICU-21065 Move U_DEFINE_LOCAL_OPEN_POINTER Move IU_DEFINE_LOCAL_OPEN_POINTER outside U_CDECL_BEGIN / _END to fix conflicting declaration of C function --- icu4c/source/common/unicode/umutablecptrie.h | 38 ++++++++++---------- icu4c/source/common/unicode/utext.h | 38 ++++++++++---------- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/icu4c/source/common/unicode/umutablecptrie.h b/icu4c/source/common/unicode/umutablecptrie.h index 13e71ef25e8..f2af36477d5 100644 --- a/icu4c/source/common/unicode/umutablecptrie.h +++ b/icu4c/source/common/unicode/umutablecptrie.h @@ -83,25 +83,6 @@ umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode); U_CAPI void U_EXPORT2 umutablecptrie_close(UMutableCPTrie *trie); -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUMutableCPTriePointer - * "Smart pointer" class, closes a UMutableCPTrie via umutablecptrie_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 63 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUMutableCPTriePointer, UMutableCPTrie, umutablecptrie_close); - -U_NAMESPACE_END - -#endif - /** * Creates a mutable trie with the same contents as the UCPMap. * You must umutablecptrie_close() the mutable trie once you are done using it. @@ -235,4 +216,23 @@ umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieVal U_CDECL_END +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUMutableCPTriePointer + * "Smart pointer" class, closes a UMutableCPTrie via umutablecptrie_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 63 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUMutableCPTriePointer, UMutableCPTrie, umutablecptrie_close); + +U_NAMESPACE_END + +#endif + #endif diff --git a/icu4c/source/common/unicode/utext.h b/icu4c/source/common/unicode/utext.h index 196056bfb89..37d71a31728 100644 --- a/icu4c/source/common/unicode/utext.h +++ b/icu4c/source/common/unicode/utext.h @@ -183,25 +183,6 @@ typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */ U_STABLE UText * U_EXPORT2 utext_close(UText *ut); -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUTextPointer - * "Smart pointer" class, closes a UText via utext_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close); - -U_NAMESPACE_END - -#endif - /** * Open a read-only UText implementation for UTF-8 strings. * @@ -1599,5 +1580,24 @@ enum { U_CDECL_END +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUTextPointer + * "Smart pointer" class, closes a UText via utext_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close); + +U_NAMESPACE_END + +#endif + #endif From 7a434a413f78fac3b0318d004fd6703fd4ffc390 Mon Sep 17 00:00:00 2001 From: Daniel Ju Date: Wed, 8 Apr 2020 11:32:10 -0700 Subject: [PATCH 04/14] ICU-21061 Update version numbers for 67GA --- icu4j/build.properties | 6 +++--- icu4j/main/shared/data/icudata.jar | 4 ++-- icu4j/main/shared/data/icutzdata.jar | 2 +- icu4j/main/shared/data/testdata.jar | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/icu4j/build.properties b/icu4j/build.properties index 61b6d0bb594..10f87c968fb 100644 --- a/icu4j/build.properties +++ b/icu4j/build.properties @@ -6,7 +6,7 @@ #******************************************************************************* api.report.version = 67 api.report.prev.version = 66 -release.file.ver = 67rc -api.doc.version = 67 Release Candidate -maven.pom.ver = 67.1-SNAPSHOT +release.file.ver = 67_1 +api.doc.version = 67.1 +maven.pom.ver = 67.1 diff --git a/icu4j/main/shared/data/icudata.jar b/icu4j/main/shared/data/icudata.jar index a8a137ab585..b2b2b6f0052 100644 --- a/icu4j/main/shared/data/icudata.jar +++ b/icu4j/main/shared/data/icudata.jar @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:71879fb1441641469ad42809317e9326cda811b2c48d04493b62aefe0ce26b0b -size 13149611 +oid sha256:5b89416e9ba1ddffa9e0e2a2d0023e40b7ce9ddde3aef80610c0d317187dce9a +size 13149615 diff --git a/icu4j/main/shared/data/icutzdata.jar b/icu4j/main/shared/data/icutzdata.jar index f455c5df955..0fad4155b90 100644 --- a/icu4j/main/shared/data/icutzdata.jar +++ b/icu4j/main/shared/data/icutzdata.jar @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4360c9bc505145e73669436e8c188d0dc9f3a831333228df9e291ebf6060908c +oid sha256:ac3d2dd0ae390a10159fee6fa71b375729fa2b9a461c393605a7e547ef98d7da size 94304 diff --git a/icu4j/main/shared/data/testdata.jar b/icu4j/main/shared/data/testdata.jar index 126e12712c0..c0e2d9e3963 100644 --- a/icu4j/main/shared/data/testdata.jar +++ b/icu4j/main/shared/data/testdata.jar @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:94af01f2a6e9f05c76a50247ffc64f8fd732c111cef83fd928920335eceb0dbc +oid sha256:12001a00c73f1436871c9b237bed66228595b73383ebf8908b51d76b35b03862 size 726452 From 3fb3929f80370f974f26c40015fed91c626594d5 Mon Sep 17 00:00:00 2001 From: Elango Cheran Date: Fri, 10 Apr 2020 11:22:56 -0700 Subject: [PATCH 05/14] ICU-21040 Fix segfaults in no data tests --- icu4c/source/i18n/number_formatimpl.cpp | 18 ++++++++++++++++++ icu4c/source/test/intltest/dtifmtts.cpp | 3 +++ icu4c/source/test/intltest/numfmtst.cpp | 6 ++++++ icu4c/source/test/intltest/transtst.cpp | 3 +++ 4 files changed, 30 insertions(+) diff --git a/icu4c/source/i18n/number_formatimpl.cpp b/icu4c/source/i18n/number_formatimpl.cpp index 8042979bdc3..5bba09cfb52 100644 --- a/icu4c/source/i18n/number_formatimpl.cpp +++ b/icu4c/source/i18n/number_formatimpl.cpp @@ -203,6 +203,9 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, patternStyle = CLDR_PATTERN_STYLE_CURRENCY; } pattern = utils::getPatternForStyle(macros.locale, nsName, patternStyle, status); + if (U_FAILURE(status)) { + return nullptr; + } } auto patternInfo = new ParsedPatternInfo(); if (patternInfo == nullptr) { @@ -211,6 +214,9 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, } fPatternInfo.adoptInstead(patternInfo); PatternParser::parseToPatternInfo(UnicodeString(pattern), *patternInfo, status); + if (U_FAILURE(status)) { + return nullptr; + } ///////////////////////////////////////////////////////////////////////////////////// /// START POPULATING THE DEFAULT MICROPROPS AND BUILDING THE MICROPROPS GENERATOR /// @@ -241,6 +247,9 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, roundingMode = precision.fRoundingMode; } fMicros.rounder = {precision, roundingMode, currency, status}; + if (U_FAILURE(status)) { + return nullptr; + } // Grouping strategy if (!macros.grouper.isBogus()) { @@ -323,6 +332,9 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, if (safe) { fImmutablePatternModifier.adoptInstead(patternModifier->createImmutable(status)); } + if (U_FAILURE(status)) { + return nullptr; + } // Outer modifier (CLDR units and currency long names) if (isCldrUnit) { @@ -349,6 +361,9 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, // No outer modifier required fMicros.modOuter = &fMicros.helpers.emptyWeakModifier; } + if (U_FAILURE(status)) { + return nullptr; + } // Compact notation if (macros.notation.fType == Notation::NTN_COMPACT) { @@ -371,6 +386,9 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, fCompactHandler.adoptInstead(newCompactHandler); chain = fCompactHandler.getAlias(); } + if (U_FAILURE(status)) { + return nullptr; + } // Always add the pattern modifier as the last element of the chain. if (safe) { diff --git a/icu4c/source/test/intltest/dtifmtts.cpp b/icu4c/source/test/intltest/dtifmtts.cpp index 2f59c1905cb..9713dee8998 100644 --- a/icu4c/source/test/intltest/dtifmtts.cpp +++ b/icu4c/source/test/intltest/dtifmtts.cpp @@ -1943,6 +1943,9 @@ void DateIntervalFormatTest::testTicket20707() { int32_t j = 0; for (const UnicodeString skeleton : {u"hh", u"HH", u"kk", u"KK", u"jj", u"JJs", u"CC"}) { LocalPointer dtifmt(DateIntervalFormat::createInstance(skeleton, locale, status)); + if (status.errDataIfFailureAndReset()) { + continue; + } FieldPosition fposition; UnicodeString result; LocalPointer calendar(Calendar::createInstance(TimeZone::createTimeZone(timeZone), status)); diff --git a/icu4c/source/test/intltest/numfmtst.cpp b/icu4c/source/test/intltest/numfmtst.cpp index 62e161f5703..5a26a7a5607 100644 --- a/icu4c/source/test/intltest/numfmtst.cpp +++ b/icu4c/source/test/intltest/numfmtst.cpp @@ -9679,6 +9679,9 @@ void NumberFormatTest::Test20956_MonetarySymbolGetters() { IcuTestErrorCode status(*this, "Test20956_MonetarySymbolGetters"); LocalPointer decimalFormat(static_cast( NumberFormat::createCurrencyInstance("et", status))); + if (status.errDataIfFailureAndReset()) { + return; + } decimalFormat->setCurrency(u"EEK"); @@ -9823,6 +9826,9 @@ void NumberFormatTest::Test20961_CurrencyPluralPattern() { { LocalPointer decimalFormat(static_cast( NumberFormat::createInstance("en-US", UNUM_CURRENCY_PLURAL, status))); + if (status.errDataIfFailureAndReset()) { + return; + } UnicodeString result; decimalFormat->toPattern(result); assertEquals("Currency pattern", u"#,##0.00 ¤¤¤", result); diff --git a/icu4c/source/test/intltest/transtst.cpp b/icu4c/source/test/intltest/transtst.cpp index fd7f733a913..8e7bcb09174 100644 --- a/icu4c/source/test/intltest/transtst.cpp +++ b/icu4c/source/test/intltest/transtst.cpp @@ -1562,6 +1562,7 @@ void TransliteratorTest::TestBasicTransliteratorEvenWithoutData() { BASIC_TRANSLITERATOR_ID[i], UTRANS_FORWARD, parseError, status)); if (translit.get() == nullptr || !U_SUCCESS(status)) { dataerrln("FAIL: createInstance %s failed", BASIC_TRANSLITERATOR_ID[i]); + continue; } UnicodeString data(TEST_DATA); UnicodeString expected(EXPECTED_RESULTS[i]); @@ -1570,6 +1571,7 @@ void TransliteratorTest::TestBasicTransliteratorEvenWithoutData() { dataerrln(UnicodeString("FAIL: expected translit(") + BASIC_TRANSLITERATOR_ID[i] + ") = '" + EXPECTED_RESULTS[i] + "' but got '" + data); + continue; } } for (int32_t i=0; BASIC_TRANSLITERATOR_RULES[i]; i++) { @@ -1580,6 +1582,7 @@ void TransliteratorTest::TestBasicTransliteratorEvenWithoutData() { BASIC_TRANSLITERATOR_RULES[i], UTRANS_FORWARD, parseError, status)); if (translit.get() == nullptr || !U_SUCCESS(status)) { dataerrln("FAIL: createFromRules %s failed", BASIC_TRANSLITERATOR_RULES[i]); + continue; } } } From 9b2092fa8921a765e27ee110f15fcc753f0c8e56 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Mon, 13 Apr 2020 19:45:27 -0500 Subject: [PATCH 06/14] ICU-21073 Fix ICU Data Build Tool in "locale" mode. --- .ci-builds/data-filter.json | 2 +- docs/userguide/icu_data/buildtool.md | 2 +- icu4c/source/data/BUILDRULES.py | 27 ++++++++++++++++++- .../python/icutools/databuilder/filtration.py | 2 +- 4 files changed, 29 insertions(+), 4 deletions(-) diff --git a/.ci-builds/data-filter.json b/.ci-builds/data-filter.json index d2dd74d4bb1..ffde99567d0 100644 --- a/.ci-builds/data-filter.json +++ b/.ci-builds/data-filter.json @@ -1,6 +1,6 @@ { "localeFilter": { - "filterType": "language", + "filterType": "locale", "whitelist": [ "en", "de", diff --git a/docs/userguide/icu_data/buildtool.md b/docs/userguide/icu_data/buildtool.md index aa34b8ed8e5..7c3eadcc262 100644 --- a/docs/userguide/icu_data/buildtool.md +++ b/docs/userguide/icu_data/buildtool.md @@ -202,7 +202,7 @@ summarizes the ICU data files and their corresponding features and categories: | Region Display
Names | `"region_tree"` | region/\*.txt | **1.1 MiB** | | Rule-Based
Number Formatting
(Spellout, Ordinals) | `"rbnf_tree"` | rbnf/\*.txt | 538 KiB | | StringPrep | `"stringprep"` | sprep/\*.txt | 193 KiB | -| Time Zones | `"misc"`
`"zone_tree"` | misc/metaZones.txt
misc/timezoneTypes.txt
misc/windowsZones.txt
misc/zoneinfo64.txt
zone/\*.txt | 41 KiB
20 KiB
22 KiB
151 KiB
**2.7 MiB** | +| Time Zones | `"misc"`
`"zone_tree"`
`"zone_supplemental"` | misc/metaZones.txt
misc/timezoneTypes.txt
misc/windowsZones.txt
misc/zoneinfo64.txt
zone/\*.txt
zone/tzdbNames.txt | 41 KiB
20 KiB
22 KiB
151 KiB
**2.7 MiB**
4.8 KiB | | Transliteration | `"translit"` | translit/\*.txt | 685 KiB | | Unicode Character
Names | `"unames"` | in/unames.icu | 269 KiB | | Unicode Text Layout | `"ulayout"` | in/ulayout.icu | 14 KiB | diff --git a/icu4c/source/data/BUILDRULES.py b/icu4c/source/data/BUILDRULES.py index 2338afd1f71..e6ddea95c47 100644 --- a/icu4c/source/data/BUILDRULES.py +++ b/icu4c/source/data/BUILDRULES.py @@ -33,6 +33,7 @@ def generate(config, io, common_vars): requests += generate_unames(config, io, common_vars) requests += generate_misc(config, io, common_vars) requests += generate_curr_supplemental(config, io, common_vars) + requests += generate_zone_supplemental(config, io, common_vars) requests += generate_translit(config, io, common_vars) # Res Tree Files @@ -399,6 +400,29 @@ def generate_curr_supplemental(config, io, common_vars): ] +def generate_zone_supplemental(config, io, common_vars): + # tzdbNames Res File + input_file = InFile("zone/tzdbNames.txt") + input_basename = "tzdbNames.txt" + output_file = OutFile("zone/tzdbNames.res") + return [ + SingleExecutionRequest( + name = "zone_supplemental_res", + category = "zone_supplemental", + dep_targets = [], + input_files = [input_file], + output_files = [output_file], + tool = IcuTool("genrb"), + args = "-s {IN_DIR}/zone -d {OUT_DIR}/zone -i {OUT_DIR} " + "-k " + "{INPUT_BASENAME}", + format_with = { + "INPUT_BASENAME": input_basename + } + ) + ] + + def generate_translit(config, io, common_vars): input_files = [ InFile("translit/root.txt"), @@ -444,10 +468,11 @@ def generate_tree( requests = [] category = "%s_tree" % sub_dir out_prefix = "%s/" % out_sub_dir if out_sub_dir else "" - # TODO: Clean this up for curr input_files = [InFile(filename) for filename in io.glob("%s/*.txt" % sub_dir)] if sub_dir == "curr": input_files.remove(InFile("curr/supplementalData.txt")) + if sub_dir == "zone": + input_files.remove(InFile("zone/tzdbNames.txt")) input_basenames = [v.filename[len(sub_dir)+1:] for v in input_files] output_files = [ OutFile("%s%s.res" % (out_prefix, v[:-4])) diff --git a/icu4c/source/python/icutools/databuilder/filtration.py b/icu4c/source/python/icutools/databuilder/filtration.py index acdba0ee5b2..554013ac98d 100644 --- a/icu4c/source/python/icutools/databuilder/filtration.py +++ b/icu4c/source/python/icutools/databuilder/filtration.py @@ -217,7 +217,7 @@ class LocaleFilter(Filter): return "root" i = locale.rfind("_") if i < 0: - assert locale == "root" + assert locale == "root", "Invalid locale: %s/%s" % (tree, locale) return None return locale[:i] From b9d1ba87f545744f2b81f1f413484f55a0da3d2f Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Tue, 14 Apr 2020 13:25:41 -0700 Subject: [PATCH 07/14] ICU-20936 copy the new direction field --- icu4c/source/common/localematcher.cpp | 2 ++ icu4c/source/test/intltest/localematchertest.cpp | 10 ++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/icu4c/source/common/localematcher.cpp b/icu4c/source/common/localematcher.cpp index 7f0dceb66ae..85db8c8bf32 100644 --- a/icu4c/source/common/localematcher.cpp +++ b/icu4c/source/common/localematcher.cpp @@ -466,6 +466,7 @@ LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT : thresholdDistance(src.thresholdDistance), demotionPerDesiredLocale(src.demotionPerDesiredLocale), favorSubtag(src.favorSubtag), + direction(src.direction), supportedLocales(src.supportedLocales), lsrs(src.lsrs), supportedLocalesLength(src.supportedLocalesLength), supportedLsrToIndex(src.supportedLsrToIndex), @@ -502,6 +503,7 @@ LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) U_NOEXCEPT { thresholdDistance = src.thresholdDistance; demotionPerDesiredLocale = src.demotionPerDesiredLocale; favorSubtag = src.favorSubtag; + direction = src.direction; supportedLocales = src.supportedLocales; lsrs = src.lsrs; supportedLocalesLength = src.supportedLocalesLength; diff --git a/icu4c/source/test/intltest/localematchertest.cpp b/icu4c/source/test/intltest/localematchertest.cpp index 6d7f48da606..683466b3c56 100644 --- a/icu4c/source/test/intltest/localematchertest.cpp +++ b/icu4c/source/test/intltest/localematchertest.cpp @@ -6,6 +6,7 @@ #include #include +#include #include "unicode/utypes.h" #include "unicode/localematcher.h" @@ -333,7 +334,9 @@ void LocaleMatcherTest::testDirection() { { // arz is a close one-way match to ar, and the region matches. // (Egyptian Arabic vs. Arabic) - LocaleMatcher withOneWay = builder.build(errorCode); + // Also explicitly exercise the move copy constructor. + LocaleMatcher built = builder.build(errorCode); + LocaleMatcher withOneWay(std::move(built)); Locale::RangeIterator desiredIter(ARRAY_RANGE(desired)); assertEquals("with one-way", "ar", locString(withOneWay.getBestMatch(desiredIter, errorCode))); @@ -341,8 +344,11 @@ void LocaleMatcherTest::testDirection() { { // nb is a less close two-way match to nn, and the regions differ. // (Norwegian Bokmal vs. Nynorsk) - LocaleMatcher onlyTwoWay = + // Also explicitly exercise the move assignment operator. + LocaleMatcher onlyTwoWay = builder.build(errorCode); + LocaleMatcher built = builder.setDirection(ULOCMATCH_DIRECTION_ONLY_TWO_WAY).build(errorCode); + onlyTwoWay = std::move(built); Locale::RangeIterator desiredIter(ARRAY_RANGE(desired)); assertEquals("only two-way", "nn", locString(onlyTwoWay.getBestMatch(desiredIter, errorCode))); From e03fa7054113dc13d7a5bd1f70b6cc3e9bf33a64 Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe <17109322+hugovdm@users.noreply.github.com> Date: Sat, 18 Apr 2020 01:24:20 +0200 Subject: [PATCH 08/14] ICU-21060 Fix behaviour of -per-, -and-, and dimensionless units. --- icu4c/source/i18n/measunit.cpp | 6 +- icu4c/source/i18n/measunit_extra.cpp | 235 +++++++++++++++------ icu4c/source/i18n/measunit_impl.h | 65 +++++- icu4c/source/i18n/nounit.cpp | 2 +- icu4c/source/i18n/unicode/measunit.h | 22 +- icu4c/source/test/intltest/measfmttest.cpp | 196 +++++++++++------ 6 files changed, 375 insertions(+), 151 deletions(-) diff --git a/icu4c/source/i18n/measunit.cpp b/icu4c/source/i18n/measunit.cpp index 344ba45fb50..4edf130b7e9 100644 --- a/icu4c/source/i18n/measunit.cpp +++ b/icu4c/source/i18n/measunit.cpp @@ -537,9 +537,9 @@ static const char * const gSubTypes[] = { "solar-mass", "stone", "ton", - "one", - "percent", - "permille", + "", // TODO(ICU-21076): manual edit of what should have been generated by Java. + "percent", // TODO(ICU-21076): regenerate, deal with duplication. + "permille", // TODO(ICU-21076): regenerate, deal with duplication. "gigawatt", "horsepower", "kilowatt", diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index e43ee6597d8..ebc4ac33322 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -34,17 +34,32 @@ namespace { // TODO: Propose a new error code for this? constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR; -// This is to ensure we only insert positive integers into the trie +// Trie value offset for SI Prefixes. This is big enough to ensure we only +// insert positive integers into the trie. constexpr int32_t kSIPrefixOffset = 64; +// Trie value offset for compound parts, e.g. "-per-", "-", "-and-". constexpr int32_t kCompoundPartOffset = 128; enum CompoundPart { + // Represents "-per-" COMPOUND_PART_PER = kCompoundPartOffset, + // Represents "-" COMPOUND_PART_TIMES, + // Represents "-and-" COMPOUND_PART_AND, }; +// Trie value offset for "per-". +constexpr int32_t kInitialCompoundPartOffset = 192; + +enum InitialCompoundPart { + // Represents "per-", the only compound part that can appear at the start of + // an identifier. + INITIAL_COMPOUND_PART_PER = kInitialCompoundPartOffset, +}; + +// Trie value offset for powers like "square-", "cubic-", "p2-" etc. constexpr int32_t kPowerPartOffset = 256; enum PowerPart { @@ -64,6 +79,8 @@ enum PowerPart { POWER_PART_P15, }; +// Trie value offset for simple units, e.g. "gram", "nautical-mile", +// "fluid-ounce-imperial". constexpr int32_t kSimpleUnitOffset = 512; const struct SIPrefixStrings { @@ -94,7 +111,6 @@ const struct SIPrefixStrings { // TODO(ICU-21059): Get this list from data const char16_t* const gSimpleUnits[] = { - u"one", // note: expected to be index 0 u"candela", u"carat", u"gram", @@ -227,6 +243,7 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) { b.add(u"-per-", COMPOUND_PART_PER, status); b.add(u"-", COMPOUND_PART_TIMES, status); b.add(u"-and-", COMPOUND_PART_AND, status); + b.add(u"per-", INITIAL_COMPOUND_PART_PER, status); b.add(u"square-", POWER_PART_P2, status); b.add(u"cubic-", POWER_PART_P3, status); b.add(u"p2-", POWER_PART_P2, status); @@ -270,28 +287,30 @@ public: enum Type { TYPE_UNDEFINED, TYPE_SI_PREFIX, + // Token type for "-per-", "-", and "-and-". TYPE_COMPOUND_PART, + // Token type for "per-". + TYPE_INITIAL_COMPOUND_PART, TYPE_POWER_PART, - TYPE_ONE, TYPE_SIMPLE_UNIT, }; + // Calling getType() is invalid, resulting in an assertion failure, if Token + // value isn't positive. Type getType() const { - if (fMatch <= 0) { - UPRV_UNREACHABLE; - } + U_ASSERT(fMatch > 0); if (fMatch < kCompoundPartOffset) { return TYPE_SI_PREFIX; } - if (fMatch < kPowerPartOffset) { + if (fMatch < kInitialCompoundPartOffset) { return TYPE_COMPOUND_PART; } + if (fMatch < kPowerPartOffset) { + return TYPE_INITIAL_COMPOUND_PART; + } if (fMatch < kSimpleUnitOffset) { return TYPE_POWER_PART; } - if (fMatch == kSimpleUnitOffset) { - return TYPE_ONE; - } return TYPE_SIMPLE_UNIT; } @@ -300,11 +319,22 @@ public: return static_cast(fMatch - kSIPrefixOffset); } + // Valid only for tokens with type TYPE_COMPOUND_PART. int32_t getMatch() const { U_ASSERT(getType() == TYPE_COMPOUND_PART); return fMatch; } + int32_t getInitialCompoundPart() const { + // Even if there is only one InitialCompoundPart value, we have this + // function for the simplicity of code consistency. + U_ASSERT(getType() == TYPE_INITIAL_COMPOUND_PART); + // Defensive: if this assert fails, code using this function also needs + // to change. + U_ASSERT(fMatch == INITIAL_COMPOUND_PART_PER); + return fMatch; + } + int8_t getPower() const { U_ASSERT(getType() == TYPE_POWER_PART); return static_cast(fMatch - kPowerPartOffset); @@ -347,6 +377,7 @@ public: } private: + // Tracks parser progress: the offset into fSource. int32_t fIndex = 0; // Since we're not owning this memory, whatever is passed to the constructor @@ -355,6 +386,9 @@ private: StringPiece fSource; UCharsTrie fTrie; + // Set to true when we've seen a "-per-" or a "per-", after which all units + // are in the denominator. Until we find an "-and-", at which point the + // identifier is invalid pending TODO(CLDR-13700). bool fAfterPer = false; Parser() : fSource(""), fTrie(u"") {} @@ -366,11 +400,17 @@ private: return fIndex < fSource.length(); } + // Returns the next Token parsed from fSource, advancing fIndex to the end + // of that token in fSource. In case of U_FAILURE(status), the token + // returned will cause an abort if getType() is called on it. Token nextToken(UErrorCode& status) { fTrie.reset(); int32_t match = -1; + // Saves the position in the fSource string for the end of the most + // recent matching token. int32_t previ = -1; - do { + // Find the longest token that matches a value in the trie: + while (fIndex < fSource.length()) { auto result = fTrie.next(fSource.data()[fIndex++]); if (result == USTRINGTRIE_NO_MATCH) { break; @@ -385,7 +425,7 @@ private: } U_ASSERT(result == USTRINGTRIE_INTERMEDIATE_VALUE); // continue; - } while (fIndex < fSource.length()); + } if (match < 0) { status = kUnitIdentifierSyntaxError; @@ -395,63 +435,88 @@ private: return Token(match); } + /** + * Returns the next "single unit" via result. + * + * If a "-per-" was parsed, the result will have appropriate negative + * dimensionality. + * + * Returns an error if we parse both compound units and "-and-", since mixed + * compound units are not yet supported - TODO(CLDR-13700). + * + * @param result Will be overwritten by the result, if status shows success. + * @param sawAnd If an "-and-" was parsed prior to finding the "single + * unit", sawAnd is set to true. If not, it is left as is. + * @param status ICU error code. + */ void nextSingleUnit(SingleUnitImpl& result, bool& sawAnd, UErrorCode& status) { - sawAnd = false; if (U_FAILURE(status)) { return; } - if (!hasNext()) { - // probably "one" - return; - } - // state: // 0 = no tokens seen yet (will accept power, SI prefix, or simple unit) // 1 = power token seen (will not accept another power token) // 2 = SI prefix token seen (will not accept a power or SI prefix token) int32_t state = 0; - // Maybe read a compound part - if (fIndex != 0) { - Token token = nextToken(status); - if (U_FAILURE(status)) { - return; + bool atStart = fIndex == 0; + Token token = nextToken(status); + if (U_FAILURE(status)) { return; } + + if (atStart) { + // Identifiers optionally start with "per-". + if (token.getType() == Token::TYPE_INITIAL_COMPOUND_PART) { + U_ASSERT(token.getInitialCompoundPart() == INITIAL_COMPOUND_PART_PER); + fAfterPer = true; + result.dimensionality = -1; + + token = nextToken(status); + if (U_FAILURE(status)) { return; } } + } else { + // All other SingleUnit's are separated from previous SingleUnit's + // via a compound part: if (token.getType() != Token::TYPE_COMPOUND_PART) { status = kUnitIdentifierSyntaxError; return; } + switch (token.getMatch()) { - case COMPOUND_PART_PER: - if (fAfterPer) { - status = kUnitIdentifierSyntaxError; - return; - } - fAfterPer = true; + case COMPOUND_PART_PER: + if (sawAnd) { + // Mixed compound units not yet supported, + // TODO(CLDR-13700). + status = kUnitIdentifierSyntaxError; + return; + } + fAfterPer = true; + result.dimensionality = -1; + break; + + case COMPOUND_PART_TIMES: + if (fAfterPer) { result.dimensionality = -1; - break; + } + break; - case COMPOUND_PART_TIMES: - if (fAfterPer) { - result.dimensionality = -1; - } - break; - - case COMPOUND_PART_AND: - sawAnd = true; - fAfterPer = false; - break; + case COMPOUND_PART_AND: + if (fAfterPer) { + // Can't start with "-and-", and mixed compound units + // not yet supported, TODO(CLDR-13700). + status = kUnitIdentifierSyntaxError; + return; + } + sawAnd = true; + break; } + + token = nextToken(status); + if (U_FAILURE(status)) { return; } } - // Read a unit - while (hasNext()) { - Token token = nextToken(status); - if (U_FAILURE(status)) { - return; - } - + // Read tokens until we have a complete SingleUnit or we reach the end. + while (true) { switch (token.getType()) { case Token::TYPE_POWER_PART: if (state > 0) { @@ -471,10 +536,6 @@ private: state = 2; break; - case Token::TYPE_ONE: - // Skip "one" and go to the next unit - return nextSingleUnit(result, sawAnd, status); - case Token::TYPE_SIMPLE_UNIT: result.index = token.getSimpleUnitIndex(); return; @@ -483,27 +544,38 @@ private: status = kUnitIdentifierSyntaxError; return; } - } - // We ran out of tokens before finding a complete single unit. - status = kUnitIdentifierSyntaxError; + if (!hasNext()) { + // We ran out of tokens before finding a complete single unit. + status = kUnitIdentifierSyntaxError; + return; + } + token = nextToken(status); + if (U_FAILURE(status)) { + return; + } + } } + /// @param result is modified, not overridden. Caller must pass in a + /// default-constructed (empty) MeasureUnitImpl instance. void parseImpl(MeasureUnitImpl& result, UErrorCode& status) { if (U_FAILURE(status)) { return; } + if (fSource.empty()) { + // The dimenionless unit: nothing to parse. leave result as is. + return; + } int32_t unitNum = 0; while (hasNext()) { - bool sawAnd; + bool sawAnd = false; SingleUnitImpl singleUnit; nextSingleUnit(singleUnit, sawAnd, status); if (U_FAILURE(status)) { return; } - if (singleUnit.index == 0) { - continue; - } + U_ASSERT(!singleUnit.isDimensionless()); bool added = result.append(singleUnit, status); if (sawAnd && !added) { // Two similar units are not allowed in a mixed unit @@ -511,9 +583,12 @@ private: return; } if ((++unitNum) >= 2) { - UMeasureUnitComplexity complexity = sawAnd - ? UMEASURE_UNIT_MIXED - : UMEASURE_UNIT_COMPOUND; + // nextSingleUnit fails appropriately for "per" and "and" in the + // same identifier. It doesn't fail for other compound units + // (COMPOUND_PART_TIMES). Consequently we take care of that + // here. + UMeasureUnitComplexity complexity = + sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND; if (unitNum == 2) { U_ASSERT(result.complexity == UMEASURE_UNIT_SINGLE); result.complexity = complexity; @@ -536,15 +611,22 @@ compareSingleUnits(const void* /*context*/, const void* left, const void* right) /** * Generate the identifier string for a single unit in place. + * + * Does not support the dimensionless SingleUnitImpl: calling serializeSingle + * with the dimensionless unit results in an U_INTERNAL_PROGRAM_ERROR. + * + * @param first If singleUnit is part of a compound unit, and not its first + * single unit, set this to false. Otherwise: set to true. */ void serializeSingle(const SingleUnitImpl& singleUnit, bool first, CharString& output, UErrorCode& status) { if (first && singleUnit.dimensionality < 0) { - output.append("one-per-", status); + // Essentially the "unary per". For compound units with a numerator, the + // caller takes care of the "binary per". + output.append("per-", status); } - if (singleUnit.index == 0) { - // Don't propagate SI prefixes and powers on one - output.append("one", status); + if (singleUnit.isDimensionless()) { + status = U_INTERNAL_PROGRAM_ERROR; return; } int8_t posPower = std::abs(singleUnit.dimensionality); @@ -595,7 +677,8 @@ void serialize(MeasureUnitImpl& impl, UErrorCode& status) { } U_ASSERT(impl.identifier.isEmpty()); if (impl.units.length() == 0) { - impl.identifier.append("one", status); + // Dimensionless, constructed by the default constructor: no appending + // to impl.identifier, we wish it to contain the zero-length string. return; } if (impl.complexity == UMEASURE_UNIT_COMPOUND) { @@ -634,8 +717,17 @@ void serialize(MeasureUnitImpl& impl, UErrorCode& status) { } -/** @return true if a new item was added */ +/** + * Appends a SingleUnitImpl to a MeasureUnitImpl. + * + * @return true if a new item was added. If unit is the dimensionless unit, it + * is never added: the return value will always be false. + */ bool appendImpl(MeasureUnitImpl& impl, const SingleUnitImpl& unit, UErrorCode& status) { + if (unit.isDimensionless()) { + // We don't append dimensionless units. + return false; + } // Find a similar unit that already exists, to attempt to coalesce SingleUnitImpl* oldUnit = nullptr; for (int32_t i = 0; i < impl.units.length(); i++) { @@ -645,6 +737,8 @@ bool appendImpl(MeasureUnitImpl& impl, const SingleUnitImpl& unit, UErrorCode& s } } if (oldUnit) { + // Both dimensionalities will be positive, or both will be negative, by + // virtue of isCompatibleWith(). oldUnit->dimensionality += unit.dimensionality; } else { SingleUnitImpl* destination = impl.units.emplaceBack(); @@ -744,7 +838,12 @@ MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& statu } int32_t MeasureUnit::getDimensionality(UErrorCode& status) const { - return SingleUnitImpl::forMeasureUnit(*this, status).dimensionality; + SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status); + if (U_FAILURE(status)) { return 0; } + if (singleUnit.isDimensionless()) { + return 0; + } + return singleUnit.dimensionality; } MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const { diff --git a/icu4c/source/i18n/measunit_impl.h b/icu4c/source/i18n/measunit_impl.h index 9657ff2c1d9..c69d243b3b8 100644 --- a/icu4c/source/i18n/measunit_impl.h +++ b/icu4c/source/i18n/measunit_impl.h @@ -25,14 +25,25 @@ static const char kDefaultCurrency8[] = "XXX"; struct SingleUnitImpl : public UMemory { /** * Gets a single unit from the MeasureUnit. If there are multiple single units, sets an error - * code and return the base dimensionless unit. Parses if necessary. + * code and returns the base dimensionless unit. Parses if necessary. */ static SingleUnitImpl forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status); /** Transform this SingleUnitImpl into a MeasureUnit, simplifying if possible. */ MeasureUnit build(UErrorCode& status) const; - /** Compare this SingleUnitImpl to another SingleUnitImpl. */ + /** + * Compare this SingleUnitImpl to another SingleUnitImpl for the sake of + * sorting and coalescing. + * + * Takes the sign of dimensionality into account, but not the absolute + * value: per-meter is not considered the same as meter, but meter is + * considered the same as square-meter. + * + * The dimensionless unit generally does not get compared, but if it did, it + * would sort before other units by virtue of index being < 0 and + * dimensionality not being negative. + */ int32_t compareTo(const SingleUnitImpl& other) const { if (dimensionality < 0 && other.dimensionality > 0) { // Positive dimensions first @@ -66,13 +77,36 @@ struct SingleUnitImpl : public UMemory { return (compareTo(other) == 0); } - /** Simple unit index, unique for every simple unit. */ - int32_t index = 0; + /** + * Returns true if this unit is the "dimensionless base unit", as produced + * by the MeasureUnit() default constructor. (This does not include the + * likes of concentrations or angles.) + */ + bool isDimensionless() const { + return index == -1; + } - /** SI prefix. **/ + /** + * Simple unit index, unique for every simple unit, -1 for the dimensionless + * unit. This is an index into a string list in measunit_extra.cpp. + * + * The default value is -1, meaning the dimensionless unit: + * isDimensionless() will return true, until index is changed. + */ + int32_t index = -1; + + /** + * SI prefix. + * + * This is ignored for the dimensionless unit. + */ UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; - - /** Dimensionality. **/ + + /** + * Dimensionality. + * + * This is meaningless for the dimensionless unit. + */ int32_t dimensionality = 1; }; @@ -92,7 +126,8 @@ struct MeasureUnitImpl : public UMemory { * * @param identifier The unit identifier string. * @param status Set if the identifier string is not valid. - * @return A newly parsed value object. + * @return A newly parsed value object. Behaviour of this unit is + * unspecified if an error is returned via status. */ static MeasureUnitImpl forIdentifier(StringPiece identifier, UErrorCode& status); @@ -145,15 +180,23 @@ struct MeasureUnitImpl : public UMemory { /** Mutates this MeasureUnitImpl to take the reciprocal. */ void takeReciprocal(UErrorCode& status); - /** Mutates this MeasureUnitImpl to append a single unit. */ + /** + * Mutates this MeasureUnitImpl to append a single unit. + * + * @return true if a new item was added. If unit is the dimensionless unit, + * it is never added: the return value will always be false. + */ bool append(const SingleUnitImpl& singleUnit, UErrorCode& status); /** The complexity, either SINGLE, COMPOUND, or MIXED. */ UMeasureUnitComplexity complexity = UMEASURE_UNIT_SINGLE; /** - * The list of simple units. These may be summed or multiplied, based on the value of the - * complexity field. + * The list of simple units. These may be summed or multiplied, based on the + * value of the complexity field. + * + * The "dimensionless" unit (SingleUnitImpl default constructor) must not be + * added to this list. */ MaybeStackVector units; diff --git a/icu4c/source/i18n/nounit.cpp b/icu4c/source/i18n/nounit.cpp index b993cb56adb..1d4aa05506e 100644 --- a/icu4c/source/i18n/nounit.cpp +++ b/icu4c/source/i18n/nounit.cpp @@ -11,7 +11,7 @@ U_NAMESPACE_BEGIN UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NoUnit) NoUnit U_EXPORT2 NoUnit::base() { - return NoUnit("one"); + return NoUnit(""); } NoUnit U_EXPORT2 NoUnit::percent() { diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index d221fd88390..e240092e305 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -37,7 +37,7 @@ struct MeasureUnitImpl; * Enumeration for unit complexity. There are three levels: * * - SINGLE: A single unit, optionally with a power and/or SI prefix. Examples: hectare, - * square-kilometer, kilojoule, one-per-second. + * square-kilometer, kilojoule, per-second. * - COMPOUND: A unit composed of the product of multiple single units. Examples: * meter-per-second, kilowatt-hour, kilogram-meter-per-square-second. * - MIXED: A unit composed of the sum of multiple single units. Examples: foot+inch, @@ -387,6 +387,8 @@ class U_I18N_API MeasureUnit: public UObject { * NOTE: Only works on SINGLE units. If this is a COMPOUND or MIXED unit, an error will * occur. For more information, see UMeasureUnitComplexity. * + * For the base dimensionless unit, withDimensionality does nothing. + * * @param dimensionality The dimensionality (power). * @param status Set if this is not a SINGLE unit or if another error occurs. * @return A new SINGLE unit. @@ -401,6 +403,8 @@ class U_I18N_API MeasureUnit: public UObject { * NOTE: Only works on SINGLE units. If this is a COMPOUND or MIXED unit, an error will * occur. For more information, see UMeasureUnitComplexity. * + * For the base dimensionless unit, getDimensionality returns 0. + * * @param status Set if this is not a SINGLE unit or if another error occurs. * @return The dimensionality (power) of this simple unit. * @draft ICU 67 @@ -447,7 +451,7 @@ class U_I18N_API MeasureUnit: public UObject { * * Examples: * - Given "meter-kilogram-per-second", three units will be returned: "meter", - * "kilogram", and "one-per-second". + * "kilogram", and "per-second". * - Given "hour+minute+second", three units will be returned: "hour", "minute", * and "second". * @@ -3375,11 +3379,15 @@ class U_I18N_API MeasureUnit: public UObject { private: - // If non-null, fImpl is owned by the MeasureUnit. + // Used by new draft APIs in ICU 67. If non-null, fImpl is owned by the + // MeasureUnit. MeasureUnitImpl* fImpl; - // These two ints are indices into static string lists in measunit.cpp + // An index into a static string list in measunit.cpp. If set to -1, fImpl + // is in use instead of fTypeId and fSubTypeId. int16_t fSubTypeId; + // An index into a static string list in measunit.cpp. If set to -1, fImpl + // is in use instead of fTypeId and fSubTypeId. int8_t fTypeId; MeasureUnit(int32_t typeId, int32_t subTypeId); @@ -3389,7 +3397,11 @@ private: static MeasureUnit *create(int typeId, int subTypeId, UErrorCode &status); /** - * @return Whether subType is known to ICU. + * Sets output's typeId and subTypeId according to subType, if subType is a + * valid/known identifier. + * + * @return Whether subType is known to ICU. If false, output was not + * modified. */ static bool findBySubType(StringPiece subType, MeasureUnit* output); diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 79eb9b461a0..51a85fef85f 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -79,9 +79,10 @@ private: void Test20332_PersonUnits(); void TestNumericTime(); void TestNumericTimeSomeSpecialFormats(); + void TestIdentifiers(); void TestInvalidIdentifiers(); void TestCompoundUnitOperations(); - void TestIdentifiers(); + void TestDimensionlessBehaviour(); void Test21060_AddressSanitizerProblem(); void verifyFormat( @@ -202,9 +203,10 @@ void MeasureFormatTest::runIndexedTest( TESTCASE_AUTO(Test20332_PersonUnits); TESTCASE_AUTO(TestNumericTime); TESTCASE_AUTO(TestNumericTimeSomeSpecialFormats); + TESTCASE_AUTO(TestIdentifiers); TESTCASE_AUTO(TestInvalidIdentifiers); TESTCASE_AUTO(TestCompoundUnitOperations); - TESTCASE_AUTO(TestIdentifiers); + TESTCASE_AUTO(TestDimensionlessBehaviour); TESTCASE_AUTO(Test21060_AddressSanitizerProblem); TESTCASE_AUTO_END; } @@ -3239,10 +3241,43 @@ void MeasureFormatTest::TestNumericTimeSomeSpecialFormats() { verifyFormat("Danish fhoursFminutes", fmtDa, fhoursFminutes, 2, "2.03,877"); } +void MeasureFormatTest::TestIdentifiers() { + IcuTestErrorCode status(*this, "TestIdentifiers"); + struct TestCase { + const char* id; + const char* normalized; + } cases[] = { + // Correctly normalized identifiers should not change + {"", ""}, + {"square-meter-per-square-meter", "square-meter-per-square-meter"}, + {"kilogram-meter-per-square-meter-square-second", + "kilogram-meter-per-square-meter-square-second"}, + {"square-mile-and-square-foot", "square-mile-and-square-foot"}, + {"square-foot-and-square-mile", "square-foot-and-square-mile"}, + {"per-cubic-centimeter", "per-cubic-centimeter"}, + {"per-kilometer", "per-kilometer"}, + + // Normalization of power and per + {"p2-foot-and-p2-mile", "square-foot-and-square-mile"}, + {"gram-square-gram-per-dekagram", "cubic-gram-per-dekagram"}, + {"kilogram-per-meter-per-second", "kilogram-per-meter-second"}, + + // TODO(ICU-20920): Add more test cases once the proper ranking is available. + }; + for (const auto &cas : cases) { + status.setScope(cas.id); + MeasureUnit unit = MeasureUnit::forIdentifier(cas.id, status); + status.errIfFailureAndReset(); + const char* actual = unit.getIdentifier(); + assertEquals(cas.id, cas.normalized, actual); + status.errIfFailureAndReset(); + } +} + void MeasureFormatTest::TestInvalidIdentifiers() { IcuTestErrorCode status(*this, "TestInvalidIdentifiers"); - const char* const inputs[] = { + const char *const inputs[] = { "kilo", "kilokilo", "onekilo", @@ -3258,7 +3293,23 @@ void MeasureFormatTest::TestInvalidIdentifiers() { "-p2-meter", "+p2-meter", "+", - "-" + "-", + "-mile", + "-and-mile", + "-per-mile", + "one", + "one-one", + "one-per-mile", + "one-per-cubic-centimeter", + "square--per-meter", + "metersecond", // Must have compound part in between single units + + // Negative powers not supported in mixed units yet. TODO(CLDR-13701). + "per-hour-and-hertz", + "hertz-and-per-hour", + + // Compound units not supported in mixed units yet. TODO(CLDR-13700). + "kilonewton-meter-and-newton-meter", }; for (const auto& input : inputs) { @@ -3295,9 +3346,9 @@ void MeasureFormatTest::TestCompoundUnitOperations() { MeasureUnit overQuarticKilometer1 = kilometer.withDimensionality(-4, status); verifySingleUnit(squareMeter, UMEASURE_SI_PREFIX_ONE, 2, "square-meter"); - verifySingleUnit(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "one-per-cubic-centimeter"); + verifySingleUnit(overCubicCentimeter, UMEASURE_SI_PREFIX_CENTI, -3, "per-cubic-centimeter"); verifySingleUnit(quarticKilometer, UMEASURE_SI_PREFIX_KILO, 4, "p4-kilometer"); - verifySingleUnit(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifySingleUnit(overQuarticKilometer1, UMEASURE_SI_PREFIX_KILO, -4, "per-p4-kilometer"); assertTrue("power inequality", quarticKilometer != overQuarticKilometer1); @@ -3310,9 +3361,9 @@ void MeasureFormatTest::TestCompoundUnitOperations() { .reciprocal(status) .withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); - verifySingleUnit(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); - verifySingleUnit(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); - verifySingleUnit(overQuarticKilometer4, UMEASURE_SI_PREFIX_KILO, -4, "one-per-p4-kilometer"); + verifySingleUnit(overQuarticKilometer2, UMEASURE_SI_PREFIX_KILO, -4, "per-p4-kilometer"); + verifySingleUnit(overQuarticKilometer3, UMEASURE_SI_PREFIX_KILO, -4, "per-p4-kilometer"); + verifySingleUnit(overQuarticKilometer4, UMEASURE_SI_PREFIX_KILO, -4, "per-p4-kilometer"); assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer2); assertTrue("reciprocal equality", overQuarticKilometer1 == overQuarticKilometer3); @@ -3343,7 +3394,7 @@ void MeasureFormatTest::TestCompoundUnitOperations() { const char* secondCentimeterSub[] = {"centimeter", "square-kilosecond"}; verifyCompoundUnit(secondCentimeter, "centimeter-square-kilosecond", secondCentimeterSub, UPRV_LENGTHOF(secondCentimeterSub)); - const char* secondCentimeterPerKilometerSub[] = {"centimeter", "square-kilosecond", "one-per-kilometer"}; + const char* secondCentimeterPerKilometerSub[] = {"centimeter", "square-kilosecond", "per-kilometer"}; verifyCompoundUnit(secondCentimeterPerKilometer, "centimeter-square-kilosecond-per-kilometer", secondCentimeterPerKilometerSub, UPRV_LENGTHOF(secondCentimeterPerKilometerSub)); @@ -3378,31 +3429,16 @@ void MeasureFormatTest::TestCompoundUnitOperations() { assertTrue("order matters inequality", footInch != inchFoot); - MeasureUnit one1; - MeasureUnit one2 = MeasureUnit::forIdentifier("one", status); - MeasureUnit one3 = MeasureUnit::forIdentifier("", status); - MeasureUnit squareOne = one2.withDimensionality(2, status); - MeasureUnit onePerOne = one2.reciprocal(status); - MeasureUnit squareKiloOne = squareOne.withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); - MeasureUnit onePerSquareKiloOne = squareKiloOne.reciprocal(status); - MeasureUnit oneOne = MeasureUnit::forIdentifier("one-one", status); - MeasureUnit onePlusOne = MeasureUnit::forIdentifier("one-and-one", status); - MeasureUnit kilometer2 = one2.product(kilometer, status); + MeasureUnit dimensionless; + MeasureUnit dimensionless2 = MeasureUnit::forIdentifier("", status); + status.errIfFailureAndReset("Dimensionless MeasureUnit."); + assertTrue("dimensionless equality", dimensionless == dimensionless2); - verifySingleUnit(one1, UMEASURE_SI_PREFIX_ONE, 1, "one"); - verifySingleUnit(one2, UMEASURE_SI_PREFIX_ONE, 1, "one"); - verifySingleUnit(one3, UMEASURE_SI_PREFIX_ONE, 1, "one"); - verifySingleUnit(squareOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); - verifySingleUnit(onePerOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); - verifySingleUnit(squareKiloOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); - verifySingleUnit(onePerSquareKiloOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); - verifySingleUnit(oneOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); - verifySingleUnit(onePlusOne, UMEASURE_SI_PREFIX_ONE, 1, "one"); + // We support starting from an "identity" MeasureUnit and then combining it + // with others via product: + MeasureUnit kilometer2 = dimensionless.product(kilometer, status); + status.errIfFailureAndReset("dimensionless.product(kilometer, status)"); verifySingleUnit(kilometer2, UMEASURE_SI_PREFIX_KILO, 1, "kilometer"); - - assertTrue("one equality", one1 == one2); - assertTrue("one equality", one2 == one3); - assertTrue("one-per-one equality", onePerOne == onePerSquareKiloOne); assertTrue("kilometer equality", kilometer == kilometer2); // Test out-of-range powers @@ -3413,49 +3449,81 @@ void MeasureFormatTest::TestCompoundUnitOperations() { status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); MeasureUnit power16b = power15.product(kilometer, status); status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); - MeasureUnit powerN15 = MeasureUnit::forIdentifier("one-per-p15-kilometer", status); - verifySingleUnit(powerN15, UMEASURE_SI_PREFIX_KILO, -15, "one-per-p15-kilometer"); + MeasureUnit powerN15 = MeasureUnit::forIdentifier("per-p15-kilometer", status); + verifySingleUnit(powerN15, UMEASURE_SI_PREFIX_KILO, -15, "per-p15-kilometer"); status.errIfFailureAndReset(); - MeasureUnit powerN16a = MeasureUnit::forIdentifier("one-per-p16-kilometer", status); + MeasureUnit powerN16a = MeasureUnit::forIdentifier("per-p16-kilometer", status); status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); MeasureUnit powerN16b = powerN15.product(overQuarticKilometer1, status); status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); } -void MeasureFormatTest::TestIdentifiers() { - IcuTestErrorCode status(*this, "TestIdentifiers"); - struct TestCase { - bool valid; - const char* id; - const char* normalized; - } cases[] = { - {true, "square-meter-per-square-meter", "square-meter-per-square-meter"}, - {true, "kilogram-meter-per-square-meter-square-second", - "kilogram-meter-per-square-meter-square-second"}, - // TODO(ICU-20920): Add more test cases once the proper ranking is available. - }; - for (const auto& cas : cases) { - status.setScope(cas.id); - MeasureUnit unit = MeasureUnit::forIdentifier(cas.id, status); - if (!cas.valid) { - status.expectErrorAndReset(U_ILLEGAL_ARGUMENT_ERROR); - continue; - } - const char* actual = unit.getIdentifier(); - assertEquals(cas.id, cas.normalized, actual); - status.errIfFailureAndReset(); - } +void MeasureFormatTest::TestDimensionlessBehaviour() { + IcuTestErrorCode status(*this, "TestDimensionlessBehaviour"); + MeasureUnit dimensionless; + MeasureUnit modified; + + // At the time of writing, each of the seven groups below caused + // Parser::from("") to be called: + + // splitToSingleUnits + int32_t count; + LocalArray singles = dimensionless.splitToSingleUnits(count, status); + status.errIfFailureAndReset("dimensionless.splitToSingleUnits(...)"); + assertEquals("no singles in dimensionless", 0, count); + + // product(dimensionless) + MeasureUnit mile = MeasureUnit::getMile(); + mile = mile.product(dimensionless, status); + status.errIfFailureAndReset("mile.product(dimensionless, ...)"); + verifySingleUnit(mile, UMEASURE_SI_PREFIX_ONE, 1, "mile"); + + // dimensionless.getSIPrefix() + UMeasureSIPrefix siPrefix = dimensionless.getSIPrefix(status); + status.errIfFailureAndReset("dimensionless.getSIPrefix(...)"); + assertEquals("dimensionless SIPrefix", UMEASURE_SI_PREFIX_ONE, siPrefix); + + // dimensionless.withSIPrefix() + modified = dimensionless.withSIPrefix(UMEASURE_SI_PREFIX_KILO, status); + status.errIfFailureAndReset("dimensionless.withSIPrefix(...)"); + singles = modified.splitToSingleUnits(count, status); + assertEquals("no singles in modified", 0, count); + siPrefix = modified.getSIPrefix(status); + status.errIfFailureAndReset("modified.getSIPrefix(...)"); + assertEquals("modified SIPrefix", UMEASURE_SI_PREFIX_ONE, siPrefix); + + // dimensionless.getComplexity() + UMeasureUnitComplexity complexity = dimensionless.getComplexity(status); + status.errIfFailureAndReset("dimensionless.getComplexity(...)"); + assertEquals("dimensionless complexity", UMEASURE_UNIT_SINGLE, complexity); + + // Dimensionality is mostly meaningless for dimensionless units, but it's + // still considered a SINGLE unit, so this code doesn't throw errors: + + // dimensionless.getDimensionality() + int32_t dimensionality = dimensionless.getDimensionality(status); + status.errIfFailureAndReset("dimensionless.getDimensionality(...)"); + assertEquals("dimensionless dimensionality", 0, dimensionality); + + // dimensionless.withDimensionality() + dimensionless.withDimensionality(-1, status); + status.errIfFailureAndReset("dimensionless.withDimensionality(...)"); + dimensionality = dimensionless.getDimensionality(status); + status.errIfFailureAndReset("dimensionless.getDimensionality(...)"); + assertEquals("dimensionless dimensionality", 0, dimensionality); } // ICU-21060 void MeasureFormatTest::Test21060_AddressSanitizerProblem() { - UErrorCode status = U_ZERO_ERROR; - MeasureUnit first = MeasureUnit::forIdentifier("one", status); + IcuTestErrorCode status(*this, "Test21060_AddressSanitizerProblem"); + + MeasureUnit first = MeasureUnit::forIdentifier("", status); + status.errIfFailureAndReset(); // Experimentally, a compound unit like "kilogram-meter" failed. A single // unit like "kilogram" or "meter" did not fail, did not trigger the // problem. - MeasureUnit crux = MeasureUnit::forIdentifier("one-per-meter", status); + MeasureUnit crux = MeasureUnit::forIdentifier("per-meter", status); // Heap allocation of a new CharString for first.identifier happens here: first = first.product(crux, status); @@ -3469,12 +3537,14 @@ void MeasureFormatTest::Test21060_AddressSanitizerProblem() { first = first.product(crux, status); // Proving we've had no failure yet: - if (U_FAILURE(status)) return; + status.errIfFailureAndReset(); // heap-use-after-free failure happened here, since a SingleUnitImpl had // held onto a StringPiece pointing at a substring of an identifier that was // freed above: second = second.product(crux, status); + + status.errIfFailureAndReset(); } From 3dce0ab5c6a9cbd3fa54e7df8f2e9fbf0b9ab632 Mon Sep 17 00:00:00 2001 From: Frank Tang Date: Sat, 18 Apr 2020 00:00:38 -0700 Subject: [PATCH 09/14] ICU-21082 Fix error: no member named 'abs' in"std" --- icu4c/source/i18n/measunit_extra.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index ebc4ac33322..aeb60017a18 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -12,6 +12,7 @@ // Helpful in toString methods and elsewhere. #define UNISTR_FROM_STRING_EXPLICIT +#include #include "cstring.h" #include "measunit_impl.h" #include "uarrsort.h" From 2f4e2e43ce37d2308cf1ef5cec1a601994abf1a5 Mon Sep 17 00:00:00 2001 From: Craig Cornelius Date: Mon, 20 Apr 2020 16:26:58 +0000 Subject: [PATCH 10/14] ICU-21053 Fix compile problems with UCONFIG_NO_FORMATTING = 1 See #1115 --- icu4c/source/i18n/listformatter.cpp | 8 ++++++++ icu4c/source/i18n/unicode/dtptngen.h | 4 ++++ icu4c/source/i18n/unicode/listformatter.h | 4 +++- icu4c/source/i18n/unicode/udatpg.h | 4 ++++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/icu4c/source/i18n/listformatter.cpp b/icu4c/source/i18n/listformatter.cpp index b9065e8796d..da99c9291d1 100644 --- a/icu4c/source/i18n/listformatter.cpp +++ b/icu4c/source/i18n/listformatter.cpp @@ -348,6 +348,7 @@ const ListFormatInternal* ListFormatter::getListFormatInternal( return result; } +#if !UCONFIG_NO_FORMATTING static const char* typeWidthToStyleString(UListFormatterType type, UListFormatterWidth width) { switch (type) { case ULISTFMT_TYPE_AND: @@ -391,6 +392,7 @@ static const char* typeWidthToStyleString(UListFormatterType type, UListFormatte return nullptr; } +#endif static const UChar solidus = 0x2F; static const UChar aliasPrefix[] = { 0x6C,0x69,0x73,0x74,0x50,0x61,0x74,0x74,0x65,0x72,0x6E,0x2F }; // "listPattern/" @@ -511,9 +513,14 @@ ListFormatter* ListFormatter::createInstance(UErrorCode& errorCode) { } ListFormatter* ListFormatter::createInstance(const Locale& locale, UErrorCode& errorCode) { +#if !UCONFIG_NO_FORMATTING return createInstance(locale, ULISTFMT_TYPE_AND, ULISTFMT_WIDTH_WIDE, errorCode); +#else + return createInstance(locale, "standard", errorCode); +#endif } +#if !UCONFIG_NO_FORMATTING ListFormatter* ListFormatter::createInstance( const Locale& locale, UListFormatterType type, UListFormatterWidth width, UErrorCode& errorCode) { const char* style = typeWidthToStyleString(type, width); @@ -523,6 +530,7 @@ ListFormatter* ListFormatter::createInstance( } return createInstance(locale, style, errorCode); } +#endif ListFormatter* ListFormatter::createInstance(const Locale& locale, const char *style, UErrorCode& errorCode) { const ListFormatInternal* listFormatInternal = getListFormatInternal(locale, style, errorCode); diff --git a/icu4c/source/i18n/unicode/dtptngen.h b/icu4c/source/i18n/unicode/dtptngen.h index 35736a0ea3f..dd99d58e65e 100644 --- a/icu4c/source/i18n/unicode/dtptngen.h +++ b/icu4c/source/i18n/unicode/dtptngen.h @@ -483,6 +483,8 @@ public: */ const UnicodeString& getDecimal() const; +#if !UCONFIG_NO_FORMATTING + #ifndef U_HIDE_DRAFT_API /** * Get the default hour cycle for a locale. Uses the locale that the @@ -499,6 +501,8 @@ public: UDateFormatHourCycle getDefaultHourCycle(UErrorCode& status) const; #endif /* U_HIDE_DRAFT_API */ +#endif /* #if !UCONFIG_NO_FORMATTING */ + /** * ICU "poor man's RTTI", returns a UClassID for the actual class. * diff --git a/icu4c/source/i18n/unicode/listformatter.h b/icu4c/source/i18n/unicode/listformatter.h index 26b42c28bbc..211055d9d1c 100644 --- a/icu4c/source/i18n/unicode/listformatter.h +++ b/icu4c/source/i18n/unicode/listformatter.h @@ -186,6 +186,7 @@ class U_I18N_API ListFormatter : public UObject{ static ListFormatter* createInstance(const Locale& locale, UErrorCode& errorCode); #ifndef U_HIDE_DRAFT_API +#if !UCONFIG_NO_FORMATTING /** * Creates a ListFormatter for the given locale, list type, and style. * @@ -198,8 +199,9 @@ class U_I18N_API ListFormatter : public UObject{ */ static ListFormatter* createInstance( const Locale& locale, UListFormatterType type, UListFormatterWidth width, UErrorCode& errorCode); +#endif /* !UCONFIG_NO_FORMATTING */ #endif /* U_HIDE_DRAFT_API */ - + #ifndef U_HIDE_INTERNAL_API /** * Creates a ListFormatter appropriate for a locale and style. diff --git a/icu4c/source/i18n/unicode/udatpg.h b/icu4c/source/i18n/unicode/udatpg.h index 74c812ff85b..5abe1472a65 100644 --- a/icu4c/source/i18n/unicode/udatpg.h +++ b/icu4c/source/i18n/unicode/udatpg.h @@ -652,6 +652,8 @@ udatpg_getPatternForSkeleton(const UDateTimePatternGenerator *dtpg, const UChar *skeleton, int32_t skeletonLength, int32_t *pLength); +#if !UCONFIG_NO_FORMATTING + #ifndef U_HIDE_DRAFT_API /** * Return the default hour cycle for a locale. Uses the locale that the @@ -670,4 +672,6 @@ U_DRAFT UDateFormatHourCycle U_EXPORT2 udatpg_getDefaultHourCycle(const UDateTimePatternGenerator *dtpg, UErrorCode* pErrorCode); #endif /* U_HIDE_DRAFT_API */ +#endif /* #if !UCONFIG_NO_FORMATTING */ + #endif From 925376a90416e7027ce3b7352a2ecc7d7c3429ab Mon Sep 17 00:00:00 2001 From: Elango Cheran Date: Mon, 20 Apr 2020 14:57:16 -0700 Subject: [PATCH 11/14] ICU-21055 Remove test inputs causing noknownissues test run to hang --- icu4c/source/test/intltest/restsnew.cpp | 12 ++++++++++++ icu4c/source/test/intltest/restsnew.h | 1 + 2 files changed, 13 insertions(+) diff --git a/icu4c/source/test/intltest/restsnew.cpp b/icu4c/source/test/intltest/restsnew.cpp index b2d72d98a82..482e241caf5 100644 --- a/icu4c/source/test/intltest/restsnew.cpp +++ b/icu4c/source/test/intltest/restsnew.cpp @@ -1395,6 +1395,14 @@ void NewResourceBundleTest::TestFilter() { } } +/* + * The following test for ICU-20706 has infinite loops on certain inputs for + * locales and calendars. In order to unblock the build (ICU-21055), those + * specific values are temporarily removed. + * The issue of the infinite loops and its blocking dependencies were captured + * in ICU-21080. + */ + void NewResourceBundleTest::TestIntervalAliasFallbacks() { const char* locales[] = { // Thee will not cause infinity loop @@ -1402,6 +1410,7 @@ void NewResourceBundleTest::TestIntervalAliasFallbacks() { "ja", // These will cause infinity loop +#if 0 "fr_CA", "en_150", "es_419", @@ -1413,6 +1422,7 @@ void NewResourceBundleTest::TestIntervalAliasFallbacks() { "zh_Hant", "zh_Hant_TW", "zh_TW", +#endif }; const char* calendars[] = { // These won't cause infinity loop @@ -1420,6 +1430,7 @@ void NewResourceBundleTest::TestIntervalAliasFallbacks() { "chinese", // These will cause infinity loop +#if 0 "islamic", "islamic-civil", "islamic-tbla", @@ -1428,6 +1439,7 @@ void NewResourceBundleTest::TestIntervalAliasFallbacks() { "islamic-rgsa", "japanese", "roc", +#endif }; for (int lidx = 0; lidx < UPRV_LENGTHOF(locales); lidx++) { diff --git a/icu4c/source/test/intltest/restsnew.h b/icu4c/source/test/intltest/restsnew.h index d3b2d9c38a1..45cc9309365 100644 --- a/icu4c/source/test/intltest/restsnew.h +++ b/icu4c/source/test/intltest/restsnew.h @@ -39,6 +39,7 @@ public: void TestGetByFallback(void); void TestFilter(void); + void TestIntervalAliasFallbacks(void); #if U_ENABLE_TRACING From b5973eee648e2d99273a085836e66a9deda57fa1 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Tue, 21 Apr 2020 18:21:55 -0500 Subject: [PATCH 12/14] ICU-21061 ICU-21085 Merge latest changes from double-conversion --- icu4c/source/i18n/double-conversion-utils.h | 12 +++++++++-- .../upstream/double-conversion/utils.h | 12 +++++++++-- .../upstream/msvc/double-conversion.vcxproj | 9 ++++---- .../msvc/double-conversion.vcxproj.filters | 21 +++++++++++-------- .../upstream/msvc/run_tests/run_tests.vcxproj | 1 + 5 files changed, 38 insertions(+), 17 deletions(-) diff --git a/icu4c/source/i18n/double-conversion-utils.h b/icu4c/source/i18n/double-conversion-utils.h index 10d8fdd1163..8c6a0e16e00 100644 --- a/icu4c/source/i18n/double-conversion-utils.h +++ b/icu4c/source/i18n/double-conversion-utils.h @@ -66,15 +66,23 @@ inline void abort_noreturn() { abort(); } #endif #endif +// Not all compilers support __has_attribute and combining a check for both +// ifdef and __has_attribute on the same preprocessor line isn't portable. +#ifdef __has_attribute +# define DOUBLE_CONVERSION_HAS_ATTRIBUTE(x) __has_attribute(x) +#else +# define DOUBLE_CONVERSION_HAS_ATTRIBUTE(x) 0 +#endif + #ifndef DOUBLE_CONVERSION_UNUSED -#ifdef __GNUC__ +#if DOUBLE_CONVERSION_HAS_ATTRIBUTE(unused) #define DOUBLE_CONVERSION_UNUSED __attribute__((unused)) #else #define DOUBLE_CONVERSION_UNUSED #endif #endif -#if defined(__clang__) && __has_attribute(uninitialized) +#if DOUBLE_CONVERSION_HAS_ATTRIBUTE(uninitialized) #define DOUBLE_CONVERSION_STACK_UNINITIALIZED __attribute__((uninitialized)) #else #define DOUBLE_CONVERSION_STACK_UNINITIALIZED diff --git a/vendor/double-conversion/upstream/double-conversion/utils.h b/vendor/double-conversion/upstream/double-conversion/utils.h index 471c3da84cd..438d0551c6d 100644 --- a/vendor/double-conversion/upstream/double-conversion/utils.h +++ b/vendor/double-conversion/upstream/double-conversion/utils.h @@ -56,15 +56,23 @@ inline void abort_noreturn() { abort(); } #endif #endif +// Not all compilers support __has_attribute and combining a check for both +// ifdef and __has_attribute on the same preprocessor line isn't portable. +#ifdef __has_attribute +# define DOUBLE_CONVERSION_HAS_ATTRIBUTE(x) __has_attribute(x) +#else +# define DOUBLE_CONVERSION_HAS_ATTRIBUTE(x) 0 +#endif + #ifndef DOUBLE_CONVERSION_UNUSED -#ifdef __GNUC__ +#if DOUBLE_CONVERSION_HAS_ATTRIBUTE(unused) #define DOUBLE_CONVERSION_UNUSED __attribute__((unused)) #else #define DOUBLE_CONVERSION_UNUSED #endif #endif -#if defined(__clang__) && __has_attribute(uninitialized) +#if DOUBLE_CONVERSION_HAS_ATTRIBUTE(uninitialized) #define DOUBLE_CONVERSION_STACK_UNINITIALIZED __attribute__((uninitialized)) #else #define DOUBLE_CONVERSION_STACK_UNINITIALIZED diff --git a/vendor/double-conversion/upstream/msvc/double-conversion.vcxproj b/vendor/double-conversion/upstream/msvc/double-conversion.vcxproj index 5616c8ad696..e2d2ef87ead 100644 --- a/vendor/double-conversion/upstream/msvc/double-conversion.vcxproj +++ b/vendor/double-conversion/upstream/msvc/double-conversion.vcxproj @@ -147,24 +147,25 @@ - + - - + + - + + diff --git a/vendor/double-conversion/upstream/msvc/double-conversion.vcxproj.filters b/vendor/double-conversion/upstream/msvc/double-conversion.vcxproj.filters index 664a27f6d10..cebae940b10 100644 --- a/vendor/double-conversion/upstream/msvc/double-conversion.vcxproj.filters +++ b/vendor/double-conversion/upstream/msvc/double-conversion.vcxproj.filters @@ -24,12 +24,6 @@ Source Files - - Source Files - - - Source Files - Source Files @@ -39,14 +33,17 @@ Source Files + + Source Files + + + Source Files + Header Files - - Header Files - Header Files @@ -71,5 +68,11 @@ Header Files + + Header Files + + + Header Files + \ No newline at end of file diff --git a/vendor/double-conversion/upstream/msvc/run_tests/run_tests.vcxproj b/vendor/double-conversion/upstream/msvc/run_tests/run_tests.vcxproj index 05d2873f2eb..1cb0d360925 100644 --- a/vendor/double-conversion/upstream/msvc/run_tests/run_tests.vcxproj +++ b/vendor/double-conversion/upstream/msvc/run_tests/run_tests.vcxproj @@ -109,6 +109,7 @@ _SCL_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) false $(SolutionDir).. + /bigobj %(AdditionalOptions) Console From f0ada590421f7509d15a91921d5fa7227522515f Mon Sep 17 00:00:00 2001 From: Frank Tang Date: Tue, 21 Apr 2020 21:04:29 -0700 Subject: [PATCH 13/14] ICU-20949 Fix compound unit in "ar", "ne" locales Do not assume the "one" pattern always contains "{0}" --- icu4c/source/i18n/number_longnames.cpp | 3 +- .../test/cintltst/unumberformattertst.c | 90 ++++++++++++++++++- icu4c/source/test/intltest/numbertest.h | 1 + .../test/intltest/numbertest_skeletons.cpp | 73 +++++++++++++++ .../ibm/icu/impl/number/LongNameHandler.java | 4 +- .../dev/test/number/NumberSkeletonTest.java | 61 +++++++++++++ 6 files changed, 229 insertions(+), 3 deletions(-) diff --git a/icu4c/source/i18n/number_longnames.cpp b/icu4c/source/i18n/number_longnames.cpp index 74ee0ef3fd3..bb32d0381a5 100644 --- a/icu4c/source/i18n/number_longnames.cpp +++ b/icu4c/source/i18n/number_longnames.cpp @@ -246,7 +246,8 @@ LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit, con if (U_FAILURE(status)) { return result; } UnicodeString secondaryFormat = getWithPlural(secondaryData, StandardPlural::Form::ONE, status); if (U_FAILURE(status)) { return result; } - SimpleFormatter secondaryCompiled(secondaryFormat, 1, 1, status); + // Some "one" pattern may not contain "{0}". For example in "ar" or "ne" locale. + SimpleFormatter secondaryCompiled(secondaryFormat, 0, 1, status); if (U_FAILURE(status)) { return result; } UnicodeString secondaryString = secondaryCompiled.getTextWithNoArguments().trim(); // TODO: Why does UnicodeString need to be explicit in the following line? diff --git a/icu4c/source/test/cintltst/unumberformattertst.c b/icu4c/source/test/cintltst/unumberformattertst.c index 2e296f033f9..8919c786ec8 100644 --- a/icu4c/source/test/cintltst/unumberformattertst.c +++ b/icu4c/source/test/cintltst/unumberformattertst.c @@ -9,9 +9,11 @@ // Helpful in toString methods and elsewhere. #define UNISTR_FROM_STRING_EXPLICIT +#include #include "unicode/unumberformatter.h" #include "unicode/umisc.h" #include "unicode/unum.h" +#include "unicode/ustring.h" #include "cformtst.h" #include "cintltst.h" #include "cmemory.h" @@ -26,6 +28,8 @@ static void TestFormattedValue(void); static void TestSkeletonParseError(void); +static void TestPerUnitInArabic(void); + void addUNumberFormatterTest(TestNode** root); #define TESTCASE(x) addTest(root, &x, "tsformat/unumberformatter/" #x) @@ -36,6 +40,7 @@ void addUNumberFormatterTest(TestNode** root) { TESTCASE(TestExampleCode); TESTCASE(TestFormattedValue); TESTCASE(TestSkeletonParseError); + TESTCASE(TestPerUnitInArabic); } @@ -254,5 +259,88 @@ static void TestSkeletonParseError() { unumf_close(uformatter); } - +static void TestPerUnitInArabic() { + const char* simpleMeasureUnits[] = { + "area-acre", + "digital-bit", + "digital-byte", + "temperature-celsius", + "length-centimeter", + "duration-day", + "angle-degree", + "temperature-fahrenheit", + "volume-fluid-ounce", + "length-foot", + "volume-gallon", + "digital-gigabit", + "digital-gigabyte", + "mass-gram", + "area-hectare", + "duration-hour", + "length-inch", + "digital-kilobit", + "digital-kilobyte", + "mass-kilogram", + "length-kilometer", + "volume-liter", + "digital-megabit", + "digital-megabyte", + "length-meter", + "length-mile", + "length-mile-scandinavian", + "volume-milliliter", + "length-millimeter", + "duration-millisecond", + "duration-minute", + "duration-month", + "mass-ounce", + "concentr-percent", + "digital-petabyte", + "mass-pound", + "duration-second", + "mass-stone", + "digital-terabit", + "digital-terabyte", + "duration-week", + "length-yard", + "duration-year" + }; +#define BUFFER_LEN 256 + char buffer[BUFFER_LEN]; + UChar ubuffer[BUFFER_LEN]; + const char* locale = "ar"; + UErrorCode status = U_ZERO_ERROR; + UFormattedNumber* formatted = unumf_openResult(&status); + if (U_FAILURE(status)) { + log_err("FAIL: unumf_openResult failed"); + return; + } + for(int32_t i=0; i < UPRV_LENGTHOF(simpleMeasureUnits); ++i) { + for(int32_t j=0; j < UPRV_LENGTHOF(simpleMeasureUnits); ++j) { + status = U_ZERO_ERROR; + sprintf(buffer, "measure-unit/%s per-measure-unit/%s", + simpleMeasureUnits[i], simpleMeasureUnits[j]); + int32_t outputlen = 0; + u_strFromUTF8(ubuffer, BUFFER_LEN, &outputlen, buffer, strlen(buffer), &status); + if (U_FAILURE(status)) { + log_err("FAIL u_strFromUTF8: %s = %s ( %s )\n", locale, buffer, + u_errorName(status)); + } + UNumberFormatter* nf = unumf_openForSkeletonAndLocale( + ubuffer, outputlen, locale, &status); + if (U_FAILURE(status)) { + log_err("FAIL unumf_openForSkeletonAndLocale: %s = %s ( %s )\n", + locale, buffer, u_errorName(status)); + } else { + unumf_formatDouble(nf, 1, formatted, &status); + if (U_FAILURE(status)) { + log_err("FAIL unumf_formatDouble: %s = %s ( %s )\n", + locale, buffer, u_errorName(status)); + } + } + unumf_close(nf); + } + } + unumf_closeResult(formatted); +} #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/intltest/numbertest.h b/icu4c/source/test/intltest/numbertest.h index 2597aa80cce..cb0be280a9a 100644 --- a/icu4c/source/test/intltest/numbertest.h +++ b/icu4c/source/test/intltest/numbertest.h @@ -258,6 +258,7 @@ class NumberSkeletonTest : public IntlTest { void defaultTokens(); void flexibleSeparators(); void wildcardCharacters(); + void perUnitInArabic(); void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0); diff --git a/icu4c/source/test/intltest/numbertest_skeletons.cpp b/icu4c/source/test/intltest/numbertest_skeletons.cpp index 3aad601a457..d5e4bcfd512 100644 --- a/icu4c/source/test/intltest/numbertest_skeletons.cpp +++ b/icu4c/source/test/intltest/numbertest_skeletons.cpp @@ -30,6 +30,7 @@ void NumberSkeletonTest::runIndexedTest(int32_t index, UBool exec, const char*& TESTCASE_AUTO(defaultTokens); TESTCASE_AUTO(flexibleSeparators); TESTCASE_AUTO(wildcardCharacters); + TESTCASE_AUTO(perUnitInArabic); TESTCASE_AUTO_END; } @@ -362,5 +363,77 @@ void NumberSkeletonTest::expectedErrorSkeleton(const char16_t** cases, int32_t c } } +void NumberSkeletonTest::perUnitInArabic() { + IcuTestErrorCode status(*this, "perUnitInArabic"); + + struct TestCase { + const char16_t* type; + const char16_t* subtype; + } cases[] = { + {u"area", u"acre"}, + {u"digital", u"bit"}, + {u"digital", u"byte"}, + {u"temperature", u"celsius"}, + {u"length", u"centimeter"}, + {u"duration", u"day"}, + {u"angle", u"degree"}, + {u"temperature", u"fahrenheit"}, + {u"volume", u"fluid-ounce"}, + {u"length", u"foot"}, + {u"volume", u"gallon"}, + {u"digital", u"gigabit"}, + {u"digital", u"gigabyte"}, + {u"mass", u"gram"}, + {u"area", u"hectare"}, + {u"duration", u"hour"}, + {u"length", u"inch"}, + {u"digital", u"kilobit"}, + {u"digital", u"kilobyte"}, + {u"mass", u"kilogram"}, + {u"length", u"kilometer"}, + {u"volume", u"liter"}, + {u"digital", u"megabit"}, + {u"digital", u"megabyte"}, + {u"length", u"meter"}, + {u"length", u"mile"}, + {u"length", u"mile-scandinavian"}, + {u"volume", u"milliliter"}, + {u"length", u"millimeter"}, + {u"duration", u"millisecond"}, + {u"duration", u"minute"}, + {u"duration", u"month"}, + {u"mass", u"ounce"}, + {u"concentr", u"percent"}, + {u"digital", u"petabyte"}, + {u"mass", u"pound"}, + {u"duration", u"second"}, + {u"mass", u"stone"}, + {u"digital", u"terabit"}, + {u"digital", u"terabyte"}, + {u"duration", u"week"}, + {u"length", u"yard"}, + {u"duration", u"year"}, + }; + + for (const auto& cas1 : cases) { + for (const auto& cas2 : cases) { + UnicodeString skeleton(u"measure-unit/"); + skeleton += cas1.type; + skeleton += u"-"; + skeleton += cas1.subtype; + skeleton += u" "; + skeleton += u"per-measure-unit/"; + skeleton += cas2.type; + skeleton += u"-"; + skeleton += cas2.subtype; + + status.setScope(skeleton); + UnicodeString actual = NumberFormatter::forSkeleton(skeleton, status).locale("ar") + .formatDouble(5142.3, status) + .toString(status); + status.errIfFailureAndReset(); + } + } +} #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/LongNameHandler.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/LongNameHandler.java index 459e5140ba7..4f58c66ba1c 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/LongNameHandler.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/LongNameHandler.java @@ -241,8 +241,10 @@ public class LongNameHandler implements MicroPropsGenerator, ModifierStore { String compiled = SimpleFormatterImpl .compileToStringMinMaxArguments(rawPerUnitFormat, sb, 2, 2); String secondaryFormat = getWithPlural(secondaryData, StandardPlural.ONE); + + // Some "one" pattern may not contain "{0}". For example in "ar" or "ne" locale. String secondaryCompiled = SimpleFormatterImpl - .compileToStringMinMaxArguments(secondaryFormat, sb, 1, 1); + .compileToStringMinMaxArguments(secondaryFormat, sb, 0, 1); String secondaryString = SimpleFormatterImpl.getTextWithNoArguments(secondaryCompiled) .trim(); perUnitFormat = SimpleFormatterImpl.formatCompiledPattern(compiled, "{0}", secondaryString); diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberSkeletonTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberSkeletonTest.java index 03caedc29ef..db6ee5ed9d5 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberSkeletonTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberSkeletonTest.java @@ -349,4 +349,65 @@ public class NumberSkeletonTest { assertEquals(mode.toString(), modeString, skeleton.substring(14)); } } + + @Test + public void perUnitInArabic() { + String[][] cases = { + {"area", "acre"}, + {"digital", "bit"}, + {"digital", "byte"}, + {"temperature", "celsius"}, + {"length", "centimeter"}, + {"duration", "day"}, + {"angle", "degree"}, + {"temperature", "fahrenheit"}, + {"volume", "fluid-ounce"}, + {"length", "foot"}, + {"volume", "gallon"}, + {"digital", "gigabit"}, + {"digital", "gigabyte"}, + {"mass", "gram"}, + {"area", "hectare"}, + {"duration", "hour"}, + {"length", "inch"}, + {"digital", "kilobit"}, + {"digital", "kilobyte"}, + {"mass", "kilogram"}, + {"length", "kilometer"}, + {"volume", "liter"}, + {"digital", "megabit"}, + {"digital", "megabyte"}, + {"length", "meter"}, + {"length", "mile"}, + {"length", "mile-scandinavian"}, + {"volume", "milliliter"}, + {"length", "millimeter"}, + {"duration", "millisecond"}, + {"duration", "minute"}, + {"duration", "month"}, + {"mass", "ounce"}, + {"concentr", "percent"}, + {"digital", "petabyte"}, + {"mass", "pound"}, + {"duration", "second"}, + {"mass", "stone"}, + {"digital", "terabit"}, + {"digital", "terabyte"}, + {"duration", "week"}, + {"length", "yard"}, + {"duration", "year"}, + }; + + ULocale arabic = new ULocale("ar"); + for (String[] cas1 : cases) { + for (String[] cas2 : cases) { + String skeleton = "measure-unit/"; + skeleton += cas1[0] + "-" + cas1[1] + " per-measure-unit/" + cas2[0] + "-" + cas2[1]; + + String actual = NumberFormatter.forSkeleton(skeleton).locale(arabic).format(5142.3) + .toString(); + // Just make sure it won't throw exception + } + } + } } From 125e29d54990e74845e1546851b5afa3efab06ce Mon Sep 17 00:00:00 2001 From: Peter Edberg Date: Wed, 15 Apr 2020 23:30:01 -0700 Subject: [PATCH 14/14] ICU-21061 integrate CLDR release-37-beta3 to master (cherry-picked from 71fa037844dbee3f58b7928097437b2fc635db61) --- icu4c/source/data/misc/langInfo.txt | 248 ++++++++++---------- icu4c/source/data/misc/supplementalData.txt | 2 +- icu4j/main/shared/data/icudata.jar | 4 +- icu4j/main/shared/data/icutzdata.jar | 2 +- icu4j/main/shared/data/testdata.jar | 4 +- 5 files changed, 130 insertions(+), 130 deletions(-) diff --git a/icu4c/source/data/misc/langInfo.txt b/icu4c/source/data/misc/langInfo.txt index 76d7287dab7..068b5e709dc 100644 --- a/icu4c/source/data/misc/langInfo.txt +++ b/icu4c/source/data/misc/langInfo.txt @@ -2436,15 +2436,15 @@ f93702e439e53be83d } // lsrs } // likely match{ - trie:bin{ // BytesTrie: 2627 bytes -00196ec41e74c1b077c0ca77a28078a2 -8a79a29a7a036c346daa8ee834f51165 + trie:bin{ // BytesTrie: 2629 bytes +00196ec42074c1b077c0ca77a28078a2 +8a79a29a7a036c346daa90e834f51165 ee5d12ed6df335117ae820022a854834 4c166174ee48616ef34911616e02e99e f3a245f41248616e01f347f42007b314 -b32ab426b536b605b3c8a3b329b429b6 -2910b5292a2bb026b122b205b3c88fb3 -29b429b62905b3c885b329b429b62912 +b32ab426b536b605b3c8a5b329b429b6 +2910b5292a2bb026b122b205b3c891b3 +29b429b62905b3c887b329b429b62912 48616e01f349f4491348616ef43f0175 2aef1166f25d12f57ae835016d2ae811 65ee5d02eda241f622f7116de7350369 @@ -2453,27 +2453,27 @@ e57ae8351165ee5c012a8548166562f2 4c6174ee35744a75a2b476016b2c7212 ef65f43501eb22f4116df3350ee171eb 0eeba2a5eea2dbefa2d8f41172f55de1 -4ee592e7aa40e91165ee5c012a854516 +4ee592e7aa42e91165ee5c012a854516 7468e94c6174ee350165427410e12001 2a854c166174ee54616dec4910ee5c01 2a855416616dec4c6174ee3501654274 10e520012a854c166174ee54656cf549 10ee5c012a855416656cf54c6174ee35 -6c1c6c366dac0575367812f96de73512 +6c1c6c366dac0775367812f96de73512 e865ee5d01eda25af9126b6cee356432 65a2dc6b12e76de73512f86de7350472 527a58e75ef2a28ffa1172f55c012a85 4c166174ee437972ec3512eb6df33512 f375fa35117ae84971c16171a2ac72a4 49730011e855f12df130f234f428f5a8 -16f71165ee5d107301e829f220022a85 +18f71165ee5d107301e829f220022a85 43344c166174ee437972ec2b167972ec 4c6174ee2be832e942ee22ef1165ee5d -0262ac2768227310f2291165ee5c012a +0262ac2968227310f2291165ee5c012a 855316696ee84c6174ee35702e705272 -58735ee1b0aae41165ee5c012a854116 +58735ee1b0ace41165ee5c012a854116 7261e24c6174ee3512f66ff23512ef73 -e33501e8aeeff91161e1296434674068 +e33501e8aef1f91161e1296434674068 486b12e76de73501e322ee1173e33513 e36b6cee3512f561f23504753a769077 a26478a26df51165f35d0cec16f30bf3 @@ -2485,123 +2485,123 @@ e80be82ae926ea22ec1171f535e126e3 22e51171f53503e12ae326e822f31171 f5350aef14f409f426f522f71171f535 ef26f022f21171f535e12ee32ae826ec -22ee1171f5351171f5350361a80aed30 +22ee1171f5351171f5350361a80ced30 eea285f71166f25d1164e5496ea26f6f -a2d370086b4d6bac03735ae168f386f4 -1170f4a28007b3c4c2b3aac6b4aac3b5 -aac0b601b129b62901e5a2d3f41170f3 +a2d570086b4d6bac05735ae168f386f4 +1170f4a28007b3c4c4b3aac8b4aac5b5 +aac2b601b129b62901e5a2d5f41170f3 351165ee5c012a8547167572f54c6174 ee351165ee5c012a8541167261e24c61 74ee356234633a65406712e161f23512 f470f33512ed65ee4912ec6df3350a79 -44ee17ee2eef38f91165ee5d106e01e2 -49ef490164786e10e2237948e256e511 -65ee5c012a8544166576e14c6174ee35 -01e4a66eee1165ee5d01642c6e01ee49 -ef2310e13161a8756834693a6c427312 -ef65ee5d12e467ee3513f16b6cee3513 -e56c75f935046b54725ce3acfbeda2dd -f21165ee5c012a854f167279e14c6174 -ee3513e96b6cee3502e3a6c6ee22f311 -6df33567c32c6ac2486aa2b56ba40d6c -a4b06d00107669ee35eea40af24ef374 -f4a29af91165ee5c012a854d16796df2 -4c6174ee350168a64f6d10f220012a85 -4c166174ee446576e1491169e45d7676 -e7a448e9a60aec0165426d10ec20012a -854c166174ee4d6c79ed4910ee5c012a -854d166c79ed4c6174ee3512e66dee35 -6e1f6e34713a73407512e96df33512f0 -7ae83512e76df33501e8ac66e9116df3 -356134653a66406912ee6df33512f86d -f33512ef6df33502e14ae246e51165ee -5d02612ee13af61169e44901eb22f811 -6df335116ae120032a8548384b784c16 -6174ee4a7061ee2b026136694272156b -f44a7061ee2b156ee94a7061ee2b1172 -e10148464a127061ee2b12616ee10148 -2c4a127061ee2b12726bf42b0aeb73ef -36ef2ef57cf91172f55d116bef20022a -8548464a12616def01482c4b126f72e5 -2b12616ee72b11616e01e722e9134b6f -72e52b1174f25deba897ed6aee016542 -6b10ee20012a854c166174ee4b6e64e1 -4910ee5c012a854b166e64e14c6174ee -351165ee5c012a854b16686df24c6174 -ee357236763c7848e14ee71166f25d12 -e965ee5d01e222f2116df33512e46df3 -351165ee5c012a854716656ff24c6174 -ee350e753ee221e24ee7a29dee4cef11 -65ee5c012a854c16616fef4c6174ee35 -1164e5291166f25d753277387a40e111 -69f44912e166f25d13e76c75f93512e8 -7ae8356f2a6f48724e735c7402e730ef -22f3126c75f935116cf63512fa65ee5d -01e922ed126c75f93513ed6c75f93563 -4669526b02e226ef22f3126c75f93512 -6c75f93501e522e6116df33512f76df3 -35676a68a2816905e40ae4a88ce734f3 -1165ee49642e6b36e11165ee5d13e16c -75f93512f469f5350a7531ec1eeca40d -eea40af50167286810e95d10f520012a -854c166174ee47756af249752ee122e4 -1165ee4901e922ee1167ee3561386b4c -6e546f5a7312f764e52902e1a257eea4 -6bf8116fed3513f06b70e53512f767ee -3513ed6b6feb3507e934e94ef26cf4a6 -f7f91172f55c012a854116726dee4379 -72ec351168e920012a854c166174ee44 -6576e1490162287310e82910f3296132 -6a4c7352e11165ee5d02e530eba40df7 -1165ee49116fed3512e96df33512ee7a -e83564c0dd648865a248660366327538 -ef6af9116eec4912ed66e63506e80be8 -2ae926f122f61166e635e226e522e611 -66e6351166e6351164e149027434753a -e1106e01e231ef3112f96ee53512f06d -f33506ee78ee34efa278f396f51165f3 -491165eea28007b317b362b430b55ab6 -01b229b62904b027b127b327b427b527 -2a2bb030b12cb201b229b62904b029b1 -29b329b427b5291165f3a28007b30cb3 -42b43eb53ab601b129b6292a2bb026b1 -4eb204b029b229b329b429b52904b029 -b229b329b429b52901b129b6296e2e79 -36e51165ee5d13e26b6cee3513ef6b6c -ee3561a25662a482630a6f2c7a167a30 -efa4b5f91165ee4901e822ef117ae835 -117ae8356f3070367212f366f24912e1 -6df33512f87ae8356438653e68466a4c -6b12e261f25d12ef7ae83513e26669ec -5d12f265ee4912f97ae835001575afe2 -88f275f24af9a26cfa1172f55c012a85 -4c166174ee437972ec351161f220012a -5a4c166174ee417261e24807b327b364 -b426b522b605b436b429b529b6298407 -b313b33cb426b522b605b422b429b529 -b62910b3292a2bb026b122b205b40fb4 -29b529b62905b406b429b529b629b029 -b129b2291165f349e22ee632eb1165ee -5d1172f55d116eec4975a41d7634793a -7a4ee1127373f92912ec61f23503e82a -ec26ee22f01161f23512e261fa356635 -6c1e6c3e70447203f12af326f922fa11 -61f23512ee73f13501e322e41161f235 -663069366a12f061f23512e261f23513 -e97379f235613e624e635a64906501e2 -22e31161f23502e526ef32f41173f135 -01e822f61161f23505f709f726f822f9 -1161f235e82eed22f11161f2351165ee -5d1161f23512e661f2350c7655e83de8 -3eee42f286f30168287310e82910f229 -1168e95d0162426510ee5c012a854216 -656ee74c6174ee3510ee20012a854c16 -6174ee42656ee7491166f249762e7a3a -e51172f54901e522f5116df33512e36d -e7356a1b6a306d447412ea6df33501ee -2af1116de735116df33512ed6de73562 -3065366812f26de73512fa61f23512ed -65ee5d +46ee19ee2eef38f91165ee5d106e01e2 +49ef4901647c6e01e223ee497948e256 +e51165ee5c012a8544166576e14c6174 +ee3501e4a66eee1165ee5d01642c6e01 +ee49ef2310e13161a8756834693a6c42 +7312ef65ee5d12e467ee3513f16b6cee +3513e56c75f935046b54725ce3acfbed +a2ddf21165ee5c012a854f167279e14c +6174ee3513e96b6cee3502e3a6c6ee22 +f3116df33567c32c6ac2486aa2b56ba4 +0d6ca4b06d00107669ee35eea40af24e +f374f4a29af91165ee5c012a854d1679 +6df24c6174ee350168a64f6d10f22001 +2a854c166174ee446576e1491169e45d +7676e7a448e9a60aec0165426d10ec20 +012a854c166174ee4d6c79ed4910ee5c +012a854d166c79ed4c6174ee3512e66d +ee356e1f6e34713a73407512e96df335 +12f07ae83512e76df33501e8ac66e911 +6df3356134653a66406912ee6df33512 +f86df33512ef6df33502e14ae246e511 +65ee5d02612ee13af61169e44901eb22 +f8116df335116ae120032a8548384b78 +4c166174ee4a7061ee2b026136694272 +156bf44a7061ee2b156ee94a7061ee2b +1172e10148464a127061ee2b12616ee1 +01482c4a127061ee2b12726bf42b0aeb +73ef36ef2ef57cf91172f55d116bef20 +022a8548464a12616def01482c4b126f +72e52b12616ee72b11616e01e722e913 +4b6f72e52b1174f25deba897ed6aee01 +65426b10ee20012a854c166174ee4b6e +64e14910ee5c012a854b166e64e14c61 +74ee351165ee5c012a854b16686df24c +6174ee357236763c7848e14ee71166f2 +5d12e965ee5d01e222f2116df33512e4 +6df3351165ee5c012a854716656ff24c +6174ee350e753ee221e24ee7a29dee4c +ef1165ee5c012a854c16616fef4c6174 +ee351164e5291166f25d753277387a40 +e11169f44912e166f25d13e76c75f935 +12e87ae8356f2a6f48724e735c7402e7 +30ef22f3126c75f935116cf63512fa65 +ee5d01e922ed126c75f93513ed6c75f9 +35634669526b02e226ef22f3126c75f9 +35126c75f93501e522e6116df33512f7 +6df335676a68a2816905e40ae4a88ce7 +34f31165ee49642e6b36e11165ee5d13 +e16c75f93512f469f5350a7531ec1eec +a40deea40af50167286810e95d10f520 +012a854c166174ee47756af249752ee1 +22e41165ee4901e922ee1167ee356138 +6b4c6e546f5a7312f764e52902e1a257 +eea46bf8116fed3513f06b70e53512f7 +67ee3513ed6b6feb3507e934e94ef26c +f4a6f7f91172f55c012a854116726dee +437972ec351168e920012a854c166174 +ee446576e1490162287310e82910f329 +61326a4c7352e11165ee5d02e530eba4 +0df71165ee49116fed3512e96df33512 +ee7ae83564c0dd648865a24866036632 +7538ef6af9116eec4912ed66e63506e8 +0be82ae926f122f61166e635e226e522 +e61166e6351166e6351164e149027434 +753ae1106e01e231ef3112f96ee53512 +f06df33506ee78ee34efa278f396f511 +65f3491165eea28007b317b362b430b5 +5ab601b229b62904b027b127b327b427 +b5272a2bb030b12cb201b229b62904b0 +29b129b329b427b5291165f3a28007b3 +0cb342b43eb53ab601b129b6292a2bb0 +26b14eb204b029b229b329b429b52904 +b029b229b329b429b52901b129b6296e +2e7936e51165ee5d13e26b6cee3513ef +6b6cee3561a25662a482630a6f2c7a16 +7a30efa4b5f91165ee4901e822ef117a +e835117ae8356f3070367212f366f249 +12e16df33512f87ae8356438653e6846 +6a4c6b12e261f25d12ef7ae83513e266 +69ec5d12f265ee4912f97ae835001575 +afe288f275f24af9a26cfa1172f55c01 +2a854c166174ee437972ec351161f220 +012a5a4c166174ee417261e24807b327 +b364b426b522b605b436b429b529b629 +8407b313b33cb426b522b605b422b429 +b529b62910b3292a2bb026b122b205b4 +0fb429b529b62905b406b429b529b629 +b029b129b2291165f349e22ee632eb11 +65ee5d1172f55d116eec4975a41d7634 +793a7a4ee1127373f92912ec61f23503 +e82aec26ee22f01161f23512e261fa35 +66356c1e6c3e70447203f12af326f922 +fa1161f23512ee73f13501e322e41161 +f235663069366a12f061f23512e261f2 +3513e97379f235613e624e635a649065 +01e222e31161f23502e526ef32f41173 +f13501e822f61161f23505f709f726f8 +22f91161f235e82eed22f11161f23511 +65ee5d1161f23512e661f2350c7655e8 +3de83eee42f286f30168287310e82910 +f2291168e95d0162426510ee5c012a85 +4216656ee74c6174ee3510ee20012a85 +4c166174ee42656ee7491166f249762e +7a3ae51172f54901e522f5116df33512 +e36de7356a1b6a306d447412ea6df335 +01ee2af1116de735116df33512ed6de7 +35623065366812f26de73512fa61f235 +12ed65ee5d } // trie regionToPartitions:bin{ // 1677 bytes 000008090a00020000000b0009000201 diff --git a/icu4c/source/data/misc/supplementalData.txt b/icu4c/source/data/misc/supplementalData.txt index 6c0de73eded..4f341a775a7 100644 --- a/icu4c/source/data/misc/supplementalData.txt +++ b/icu4c/source/data/misc/supplementalData.txt @@ -18649,7 +18649,7 @@ supplementalData:table(nofallback){ "nn", "no", "20", - "1", + "0", } { "nso", diff --git a/icu4j/main/shared/data/icudata.jar b/icu4j/main/shared/data/icudata.jar index b2b2b6f0052..2842ae6def2 100644 --- a/icu4j/main/shared/data/icudata.jar +++ b/icu4j/main/shared/data/icudata.jar @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5b89416e9ba1ddffa9e0e2a2d0023e40b7ce9ddde3aef80610c0d317187dce9a -size 13149615 +oid sha256:10956955759da882d9dfd0f70187a2f221581778ec04650ed6cbd35dcf57271a +size 13149611 diff --git a/icu4j/main/shared/data/icutzdata.jar b/icu4j/main/shared/data/icutzdata.jar index 0fad4155b90..3d31dfbcd2b 100644 --- a/icu4j/main/shared/data/icutzdata.jar +++ b/icu4j/main/shared/data/icutzdata.jar @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ac3d2dd0ae390a10159fee6fa71b375729fa2b9a461c393605a7e547ef98d7da +oid sha256:dd223ac934d8069e9d4669518eb7893df90ffd38e7b7ac1092d790adb6d41882 size 94304 diff --git a/icu4j/main/shared/data/testdata.jar b/icu4j/main/shared/data/testdata.jar index c0e2d9e3963..37eb65026de 100644 --- a/icu4j/main/shared/data/testdata.jar +++ b/icu4j/main/shared/data/testdata.jar @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:12001a00c73f1436871c9b237bed66228595b73383ebf8908b51d76b35b03862 -size 726452 +oid sha256:20c5df6a33d105b6e4add95a906fd104602a284f67fea0d5a86fdb96106cf682 +size 723481