Adding measunit_extra.cpp with basic trie

This commit is contained in:
Shane F. Carr 2020-01-14 16:47:18 +01:00
parent c787452cfe
commit 9a6caa01af
8 changed files with 245 additions and 2 deletions

View file

@ -114,7 +114,8 @@ numparse_symbols.o numparse_decimal.o numparse_scientific.o numparse_currency.o
numparse_affixes.o numparse_compositions.o numparse_validators.o \
numrange_fluent.o numrange_impl.o \
erarules.o \
formattedvalue.o formattedval_iterimpl.o formattedval_sbimpl.o formatted_string_builder.o
formattedvalue.o formattedval_iterimpl.o formattedval_sbimpl.o formatted_string_builder.o \
measunit_extra.o
## Header files to install
HEADERS = $(srcdir)/unicode/*.h

View file

@ -187,6 +187,7 @@
<ClCompile Include="ulistformatter.cpp" />
<ClCompile Include="measfmt.cpp" />
<ClCompile Include="measunit.cpp" />
<ClCompile Include="measunit_extra.cpp" />
<ClCompile Include="measure.cpp" />
<ClCompile Include="msgfmt.cpp" />
<ClCompile Include="nfrs.cpp" />

View file

@ -204,6 +204,9 @@
<ClCompile Include="measunit.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="measunit_extra.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="measure.cpp">
<Filter>formatting</Filter>
</ClCompile>

View file

@ -408,6 +408,7 @@
<ClCompile Include="ulistformatter.cpp" />
<ClCompile Include="measfmt.cpp" />
<ClCompile Include="measunit.cpp" />
<ClCompile Include="measunit_extra.cpp" />
<ClCompile Include="measure.cpp" />
<ClCompile Include="msgfmt.cpp" />
<ClCompile Include="nfrs.cpp" />

View file

@ -0,0 +1,231 @@
// © 2020 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// Extra functions for MeasureUnit not needed for all clients.
// Separate .o file so that it can be removed for modularity.
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
// Allow implicit conversion from char16_t* to UnicodeString for this file:
// Helpful in toString methods and elsewhere.
#define UNISTR_FROM_STRING_EXPLICIT
#include "cstring.h"
#include "ucln_in.h"
#include "umutex.h"
#include "unicode/errorcode.h"
#include "unicode/measunit.h"
#include "unicode/ucharstrie.h"
#include "unicode/ucharstriebuilder.h"
U_NAMESPACE_BEGIN
namespace {
// This is to ensure we only insert positive integers into the trie
constexpr int32_t kSIPrefixOffset = 64;
constexpr int32_t kSyntaxPartOffset = 256;
enum SyntaxPart {
SYNTAX_PART_PER = kSyntaxPartOffset,
SYNTAX_PART_SQUARE,
SYNTAX_PART_CUBIC,
SYNTAX_PART_P1,
SYNTAX_PART_P2,
SYNTAX_PART_P3,
SYNTAX_PART_P4,
SYNTAX_PART_P5,
SYNTAX_PART_P6,
SYNTAX_PART_P7,
SYNTAX_PART_P8,
SYNTAX_PART_P9,
};
constexpr int32_t kSimpleUnitOffset = 512;
// FIXME: Get this list from data
const char16_t* gSimpleUnits[] = {
u"100kilometer",
u"acre",
u"ampere",
u"arc-minute",
u"arc-second",
u"astronomical-unit",
u"atmosphere",
u"bar",
u"barrel",
u"bit",
u"british-thermal-unit",
u"bushel",
u"byte",
u"calorie",
u"carat",
u"celsius",
u"century",
u"cup",
u"cup-metric",
u"dalton",
u"day",
u"day-person",
u"decade",
u"degree",
u"dot", // (as in "dot-per-inch")
u"dunam",
u"earth-mass",
u"electronvolt",
u"em",
u"fahrenheit",
u"fathom",
u"fluid-ounce",
u"fluid-ounce-imperial",
u"foodcalorie",
u"foot",
u"furlong",
u"g-force",
u"gallon",
u"gallon-imperial",
u"generic", // (i.e., "temperature-generic")
u"gram",
u"hectare", // (note: other "are" derivatives are uncommon)
u"hertz",
u"horsepower",
u"hour",
u"inch",
u"inch-hg",
u"joule",
u"karat",
u"kelvin",
u"knot",
u"light-year",
u"liter",
u"lux",
u"meter",
u"meter-of-mercury", // (not "millimeter-of-mercury")
u"metric-ton",
u"mile",
u"mile-scandinavian",
u"minute",
u"mole",
u"month",
u"month-person",
u"nautical-mile",
u"newton",
u"ohm",
u"ounce",
u"ounce-troy",
u"parsec",
u"pascal",
u"percent",
u"permille",
u"permillion",
u"permyriad",
u"pint",
u"pint-metric",
u"pixel",
u"point",
u"pound",
u"pound-force",
u"quart",
u"radian",
u"revolution",
u"second",
u"solar-luminosity",
u"solar-mass",
u"solar-radius",
u"stone",
u"tablespoon",
u"teaspoon",
u"therm-us",
u"ton",
u"volt",
u"watt",
u"week",
u"week-person",
u"yard",
u"year",
u"year-person",
};
icu::UInitOnce gUnitExtrasInitOnce = U_INITONCE_INITIALIZER;
char16_t* kSerializedUnitExtrasStemTrie = nullptr;
UBool U_CALLCONV cleanupUnitExtras() {
uprv_free(kSerializedUnitExtrasStemTrie);
kSerializedUnitExtrasStemTrie = nullptr;
gUnitExtrasInitOnce.reset();
return TRUE;
}
void U_CALLCONV initUnitExtras(UErrorCode& status) {
ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras);
UCharsTrieBuilder b(status);
if (U_FAILURE(status)) { return; }
// Add SI prefixes
b.add(u"yotta", kSIPrefixOffset + UMEASURE_SI_PREFIX_YOTTA, status);
b.add(u"zetta", kSIPrefixOffset + UMEASURE_SI_PREFIX_ZETTA, status);
b.add(u"exa", kSIPrefixOffset + UMEASURE_SI_PREFIX_EXA, status);
b.add(u"peta", kSIPrefixOffset + UMEASURE_SI_PREFIX_PETA, status);
b.add(u"tera", kSIPrefixOffset + UMEASURE_SI_PREFIX_TERA, status);
b.add(u"giga", kSIPrefixOffset + UMEASURE_SI_PREFIX_GIGA, status);
b.add(u"mega", kSIPrefixOffset + UMEASURE_SI_PREFIX_MEGA, status);
b.add(u"kilo", kSIPrefixOffset + UMEASURE_SI_PREFIX_KILO, status);
b.add(u"hecto", kSIPrefixOffset + UMEASURE_SI_PREFIX_HECTO, status);
b.add(u"deka", kSIPrefixOffset + UMEASURE_SI_PREFIX_DEKA, status);
b.add(u"deci", kSIPrefixOffset + UMEASURE_SI_PREFIX_DECI, status);
b.add(u"centi", kSIPrefixOffset + UMEASURE_SI_PREFIX_CENTI, status);
b.add(u"milli", kSIPrefixOffset + UMEASURE_SI_PREFIX_MILLI, status);
b.add(u"micro", kSIPrefixOffset + UMEASURE_SI_PREFIX_MICRO, status);
b.add(u"nano", kSIPrefixOffset + UMEASURE_SI_PREFIX_NANO, status);
b.add(u"pico", kSIPrefixOffset + UMEASURE_SI_PREFIX_PICO, status);
b.add(u"femto", kSIPrefixOffset + UMEASURE_SI_PREFIX_FEMTO, status);
b.add(u"atto", kSIPrefixOffset + UMEASURE_SI_PREFIX_ATTO, status);
b.add(u"zepto", kSIPrefixOffset + UMEASURE_SI_PREFIX_ZEPTO, status);
b.add(u"yocto", kSIPrefixOffset + UMEASURE_SI_PREFIX_YOCTO, status);
if (U_FAILURE(status)) { return; }
// Add syntax parts (per, power prefixes)
b.add(u"-per-", SYNTAX_PART_PER, status);
b.add(u"square-", SYNTAX_PART_SQUARE, status);
b.add(u"cubic-", SYNTAX_PART_CUBIC, status);
b.add(u"p1", SYNTAX_PART_P1, status);
b.add(u"p2", SYNTAX_PART_P2, status);
b.add(u"p3", SYNTAX_PART_P3, status);
b.add(u"p4", SYNTAX_PART_P4, status);
b.add(u"p5", SYNTAX_PART_P5, status);
b.add(u"p6", SYNTAX_PART_P6, status);
b.add(u"p7", SYNTAX_PART_P7, status);
b.add(u"p8", SYNTAX_PART_P8, status);
b.add(u"p9", SYNTAX_PART_P9, status);
if (U_FAILURE(status)) { return; }
// Add sanctioned simple units by offset
int32_t simpleUnitOffset = kSimpleUnitOffset;
for (auto simpleUnit : gSimpleUnits) {
b.add(simpleUnit, simpleUnitOffset++, status);
}
// Build the CharsTrie
// TODO: Use SLOW or FAST here?
UnicodeString result;
b.buildUnicodeString(USTRINGTRIE_BUILD_FAST, result, status);
if (U_FAILURE(status)) { return; }
// Copy the result into the global constant pointer
size_t numBytes = result.length() * sizeof(char16_t);
kSerializedUnitExtrasStemTrie = static_cast<char16_t*>(uprv_malloc(numBytes));
uprv_memcpy(kSerializedUnitExtrasStemTrie, result.getBuffer(), numBytes);
}
} // namespace
U_NAMESPACE_END
#endif /* !UNCONFIG_NO_FORMATTING */

View file

@ -26,6 +26,7 @@ as the functions are suppose to be called.
It's usually best to have child dependencies called first. */
typedef enum ECleanupI18NType {
UCLN_I18N_START = -1,
UCLN_I18N_UNIT_EXTRAS,
UCLN_I18N_NUMBER_SKELETONS,
UCLN_I18N_CURRENCY_SPACING,
UCLN_I18N_SPOOF,

View file

@ -29,7 +29,6 @@
U_NAMESPACE_BEGIN
class StringEnumeration;
class MeasureUnitFields;
/**
* Enumeration for SI prefixes, such as "kilo".

View file

@ -870,6 +870,7 @@ library: i18n
listformatter
formatting formattable_cnv regex regex_cnv translit
double_conversion number_representation number_output numberformatter numberparser
units_extra
universal_time_scale
uclean_i18n
@ -1053,6 +1054,11 @@ group: sharedbreakiterator
deps
breakiterator
group: units_extra
measunit_extra.o
deps
units
group: units
measunit.o currunit.o nounit.o
deps