diff --git a/icu4c/source/i18n/Makefile.in b/icu4c/source/i18n/Makefile.in index f22c20eabca..f6f9860fb3a 100644 --- a/icu4c/source/i18n/Makefile.in +++ b/icu4c/source/i18n/Makefile.in @@ -114,7 +114,8 @@ numparse_symbols.o numparse_decimal.o numparse_scientific.o numparse_currency.o numparse_affixes.o numparse_compositions.o numparse_validators.o \ numrange_fluent.o numrange_impl.o \ erarules.o \ -formattedvalue.o formattedval_iterimpl.o formattedval_sbimpl.o formatted_string_builder.o +formattedvalue.o formattedval_iterimpl.o formattedval_sbimpl.o formatted_string_builder.o \ +measunit_extra.o ## Header files to install HEADERS = $(srcdir)/unicode/*.h diff --git a/icu4c/source/i18n/i18n.vcxproj b/icu4c/source/i18n/i18n.vcxproj index 5c6760d220e..f22fd2ff097 100644 --- a/icu4c/source/i18n/i18n.vcxproj +++ b/icu4c/source/i18n/i18n.vcxproj @@ -187,6 +187,7 @@ + diff --git a/icu4c/source/i18n/i18n.vcxproj.filters b/icu4c/source/i18n/i18n.vcxproj.filters index a1813fc0696..eade0d07354 100644 --- a/icu4c/source/i18n/i18n.vcxproj.filters +++ b/icu4c/source/i18n/i18n.vcxproj.filters @@ -204,6 +204,9 @@ formatting + + formatting + formatting diff --git a/icu4c/source/i18n/i18n_uwp.vcxproj b/icu4c/source/i18n/i18n_uwp.vcxproj index 989cef88fe5..2466ad95864 100644 --- a/icu4c/source/i18n/i18n_uwp.vcxproj +++ b/icu4c/source/i18n/i18n_uwp.vcxproj @@ -408,6 +408,7 @@ + diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp new file mode 100644 index 00000000000..d84e50af350 --- /dev/null +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -0,0 +1,231 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// Extra functions for MeasureUnit not needed for all clients. +// Separate .o file so that it can be removed for modularity. + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +// Allow implicit conversion from char16_t* to UnicodeString for this file: +// Helpful in toString methods and elsewhere. +#define UNISTR_FROM_STRING_EXPLICIT + +#include "cstring.h" +#include "ucln_in.h" +#include "umutex.h" +#include "unicode/errorcode.h" +#include "unicode/measunit.h" +#include "unicode/ucharstrie.h" +#include "unicode/ucharstriebuilder.h" + +U_NAMESPACE_BEGIN + + +namespace { + +// This is to ensure we only insert positive integers into the trie +constexpr int32_t kSIPrefixOffset = 64; + +constexpr int32_t kSyntaxPartOffset = 256; + +enum SyntaxPart { + SYNTAX_PART_PER = kSyntaxPartOffset, + SYNTAX_PART_SQUARE, + SYNTAX_PART_CUBIC, + SYNTAX_PART_P1, + SYNTAX_PART_P2, + SYNTAX_PART_P3, + SYNTAX_PART_P4, + SYNTAX_PART_P5, + SYNTAX_PART_P6, + SYNTAX_PART_P7, + SYNTAX_PART_P8, + SYNTAX_PART_P9, +}; + +constexpr int32_t kSimpleUnitOffset = 512; + +// FIXME: Get this list from data +const char16_t* gSimpleUnits[] = { + u"100kilometer", + u"acre", + u"ampere", + u"arc-minute", + u"arc-second", + u"astronomical-unit", + u"atmosphere", + u"bar", + u"barrel", + u"bit", + u"british-thermal-unit", + u"bushel", + u"byte", + u"calorie", + u"carat", + u"celsius", + u"century", + u"cup", + u"cup-metric", + u"dalton", + u"day", + u"day-person", + u"decade", + u"degree", + u"dot", // (as in "dot-per-inch") + u"dunam", + u"earth-mass", + u"electronvolt", + u"em", + u"fahrenheit", + u"fathom", + u"fluid-ounce", + u"fluid-ounce-imperial", + u"foodcalorie", + u"foot", + u"furlong", + u"g-force", + u"gallon", + u"gallon-imperial", + u"generic", // (i.e., "temperature-generic") + u"gram", + u"hectare", // (note: other "are" derivatives are uncommon) + u"hertz", + u"horsepower", + u"hour", + u"inch", + u"inch-hg", + u"joule", + u"karat", + u"kelvin", + u"knot", + u"light-year", + u"liter", + u"lux", + u"meter", + u"meter-of-mercury", // (not "millimeter-of-mercury") + u"metric-ton", + u"mile", + u"mile-scandinavian", + u"minute", + u"mole", + u"month", + u"month-person", + u"nautical-mile", + u"newton", + u"ohm", + u"ounce", + u"ounce-troy", + u"parsec", + u"pascal", + u"percent", + u"permille", + u"permillion", + u"permyriad", + u"pint", + u"pint-metric", + u"pixel", + u"point", + u"pound", + u"pound-force", + u"quart", + u"radian", + u"revolution", + u"second", + u"solar-luminosity", + u"solar-mass", + u"solar-radius", + u"stone", + u"tablespoon", + u"teaspoon", + u"therm-us", + u"ton", + u"volt", + u"watt", + u"week", + u"week-person", + u"yard", + u"year", + u"year-person", +}; + +icu::UInitOnce gUnitExtrasInitOnce = U_INITONCE_INITIALIZER; + +char16_t* kSerializedUnitExtrasStemTrie = nullptr; + +UBool U_CALLCONV cleanupUnitExtras() { + uprv_free(kSerializedUnitExtrasStemTrie); + kSerializedUnitExtrasStemTrie = nullptr; + gUnitExtrasInitOnce.reset(); + return TRUE; +} + +void U_CALLCONV initUnitExtras(UErrorCode& status) { + ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras); + + UCharsTrieBuilder b(status); + if (U_FAILURE(status)) { return; } + + // Add SI prefixes + b.add(u"yotta", kSIPrefixOffset + UMEASURE_SI_PREFIX_YOTTA, status); + b.add(u"zetta", kSIPrefixOffset + UMEASURE_SI_PREFIX_ZETTA, status); + b.add(u"exa", kSIPrefixOffset + UMEASURE_SI_PREFIX_EXA, status); + b.add(u"peta", kSIPrefixOffset + UMEASURE_SI_PREFIX_PETA, status); + b.add(u"tera", kSIPrefixOffset + UMEASURE_SI_PREFIX_TERA, status); + b.add(u"giga", kSIPrefixOffset + UMEASURE_SI_PREFIX_GIGA, status); + b.add(u"mega", kSIPrefixOffset + UMEASURE_SI_PREFIX_MEGA, status); + b.add(u"kilo", kSIPrefixOffset + UMEASURE_SI_PREFIX_KILO, status); + b.add(u"hecto", kSIPrefixOffset + UMEASURE_SI_PREFIX_HECTO, status); + b.add(u"deka", kSIPrefixOffset + UMEASURE_SI_PREFIX_DEKA, status); + b.add(u"deci", kSIPrefixOffset + UMEASURE_SI_PREFIX_DECI, status); + b.add(u"centi", kSIPrefixOffset + UMEASURE_SI_PREFIX_CENTI, status); + b.add(u"milli", kSIPrefixOffset + UMEASURE_SI_PREFIX_MILLI, status); + b.add(u"micro", kSIPrefixOffset + UMEASURE_SI_PREFIX_MICRO, status); + b.add(u"nano", kSIPrefixOffset + UMEASURE_SI_PREFIX_NANO, status); + b.add(u"pico", kSIPrefixOffset + UMEASURE_SI_PREFIX_PICO, status); + b.add(u"femto", kSIPrefixOffset + UMEASURE_SI_PREFIX_FEMTO, status); + b.add(u"atto", kSIPrefixOffset + UMEASURE_SI_PREFIX_ATTO, status); + b.add(u"zepto", kSIPrefixOffset + UMEASURE_SI_PREFIX_ZEPTO, status); + b.add(u"yocto", kSIPrefixOffset + UMEASURE_SI_PREFIX_YOCTO, status); + if (U_FAILURE(status)) { return; } + + // Add syntax parts (per, power prefixes) + b.add(u"-per-", SYNTAX_PART_PER, status); + b.add(u"square-", SYNTAX_PART_SQUARE, status); + b.add(u"cubic-", SYNTAX_PART_CUBIC, status); + b.add(u"p1", SYNTAX_PART_P1, status); + b.add(u"p2", SYNTAX_PART_P2, status); + b.add(u"p3", SYNTAX_PART_P3, status); + b.add(u"p4", SYNTAX_PART_P4, status); + b.add(u"p5", SYNTAX_PART_P5, status); + b.add(u"p6", SYNTAX_PART_P6, status); + b.add(u"p7", SYNTAX_PART_P7, status); + b.add(u"p8", SYNTAX_PART_P8, status); + b.add(u"p9", SYNTAX_PART_P9, status); + if (U_FAILURE(status)) { return; } + + // Add sanctioned simple units by offset + int32_t simpleUnitOffset = kSimpleUnitOffset; + for (auto simpleUnit : gSimpleUnits) { + b.add(simpleUnit, simpleUnitOffset++, status); + } + + // Build the CharsTrie + // TODO: Use SLOW or FAST here? + UnicodeString result; + b.buildUnicodeString(USTRINGTRIE_BUILD_FAST, result, status); + if (U_FAILURE(status)) { return; } + + // Copy the result into the global constant pointer + size_t numBytes = result.length() * sizeof(char16_t); + kSerializedUnitExtrasStemTrie = static_cast(uprv_malloc(numBytes)); + uprv_memcpy(kSerializedUnitExtrasStemTrie, result.getBuffer(), numBytes); +} + +} // namespace + + +U_NAMESPACE_END + +#endif /* !UNCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/ucln_in.h b/icu4c/source/i18n/ucln_in.h index 2f70a8500e1..765cdd559fb 100644 --- a/icu4c/source/i18n/ucln_in.h +++ b/icu4c/source/i18n/ucln_in.h @@ -26,6 +26,7 @@ as the functions are suppose to be called. It's usually best to have child dependencies called first. */ typedef enum ECleanupI18NType { UCLN_I18N_START = -1, + UCLN_I18N_UNIT_EXTRAS, UCLN_I18N_NUMBER_SKELETONS, UCLN_I18N_CURRENCY_SPACING, UCLN_I18N_SPOOF, diff --git a/icu4c/source/i18n/unicode/measunit.h b/icu4c/source/i18n/unicode/measunit.h index 16790f8a3d9..1c535540467 100644 --- a/icu4c/source/i18n/unicode/measunit.h +++ b/icu4c/source/i18n/unicode/measunit.h @@ -29,7 +29,6 @@ U_NAMESPACE_BEGIN class StringEnumeration; -class MeasureUnitFields; /** * Enumeration for SI prefixes, such as "kilo". diff --git a/icu4c/source/test/depstest/dependencies.txt b/icu4c/source/test/depstest/dependencies.txt index 1d726b6ea32..080beeff637 100644 --- a/icu4c/source/test/depstest/dependencies.txt +++ b/icu4c/source/test/depstest/dependencies.txt @@ -870,6 +870,7 @@ library: i18n listformatter formatting formattable_cnv regex regex_cnv translit double_conversion number_representation number_output numberformatter numberparser + units_extra universal_time_scale uclean_i18n @@ -1053,6 +1054,11 @@ group: sharedbreakiterator deps breakiterator +group: units_extra + measunit_extra.o + deps + units + group: units measunit.o currunit.o nounit.o deps