From c26a0e244f6218ab7eed85d771a8470b1b819c2e Mon Sep 17 00:00:00 2001 From: Hugo van der Merwe <17109322+hugovdm@users.noreply.github.com> Date: Wed, 15 Jul 2020 04:32:18 +0200 Subject: [PATCH 1/2] Add Unit Usage support to Number Skeletons. --- icu4c/source/i18n/number_skeletons.cpp | 37 ++++++- icu4c/source/i18n/number_skeletons.h | 19 +++- icu4c/source/i18n/unicode/numberformatter.h | 7 +- icu4c/source/i18n/unitsdata.cpp | 4 + icu4c/source/test/intltest/numbertest_api.cpp | 98 +++++++++---------- 5 files changed, 105 insertions(+), 60 deletions(-) diff --git a/icu4c/source/i18n/number_skeletons.cpp b/icu4c/source/i18n/number_skeletons.cpp index 4ba2647986c..ce5244050dc 100644 --- a/icu4c/source/i18n/number_skeletons.cpp +++ b/icu4c/source/i18n/number_skeletons.cpp @@ -97,6 +97,7 @@ void U_CALLCONV initNumberSkeletons(UErrorCode& status) { b.add(u"measure-unit", STEM_MEASURE_UNIT, status); b.add(u"per-measure-unit", STEM_PER_MEASURE_UNIT, status); b.add(u"unit", STEM_UNIT, status); + b.add(u"usage", STEM_UNIT_USAGE, status); b.add(u"currency", STEM_CURRENCY, status); b.add(u"integer-width", STEM_INTEGER_WIDTH, status); b.add(u"numbering-system", STEM_NUMBERING_SYSTEM, status); @@ -550,6 +551,7 @@ MacroProps skeleton::parseSkeleton( case STATE_MEASURE_UNIT: case STATE_PER_MEASURE_UNIT: case STATE_IDENTIFIER_UNIT: + case STATE_UNIT_USAGE: case STATE_CURRENCY_UNIT: case STATE_INTEGER_WIDTH: case STATE_NUMBERING_SYSTEM: @@ -705,7 +707,7 @@ skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, Se macros.decimal = stem_to_object::decimalSeparatorDisplay(stem); return STATE_NULL; - // Stems requiring an option: + // Stems requiring an option: case STEM_PRECISION_INCREMENT: CHECK_NULL(seen, precision, status); @@ -724,6 +726,10 @@ skeleton::parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, Se CHECK_NULL(seen, perUnit, status); return STATE_IDENTIFIER_UNIT; + case STEM_UNIT_USAGE: + CHECK_NULL(seen, usage, status); + return STATE_UNIT_USAGE; + case STEM_CURRENCY: CHECK_NULL(seen, unit, status); return STATE_CURRENCY_UNIT; @@ -763,6 +769,9 @@ ParseState skeleton::parseOption(ParseState stem, const StringSegment& segment, case STATE_IDENTIFIER_UNIT: blueprint_helpers::parseIdentifierUnitOption(segment, macros, status); return STATE_NULL; + case STATE_UNIT_USAGE: + blueprint_helpers::parseUnitUsageOption(segment, macros, status); + return STATE_NULL; case STATE_INCREMENT_PRECISION: blueprint_helpers::parseIncrementOption(segment, macros, status); return STATE_NULL; @@ -837,6 +846,10 @@ void GeneratorHelpers::generateSkeleton(const MacroProps& macros, UnicodeString& sb.append(u' '); } if (U_FAILURE(status)) { return; } + if (GeneratorHelpers::usage(macros, sb, status)) { + sb.append(u' '); + } + if (U_FAILURE(status)) { return; } if (GeneratorHelpers::precision(macros, sb, status)) { sb.append(u' '); } @@ -993,6 +1006,8 @@ void blueprint_helpers::parseMeasureUnitOption(const StringSegment& segment, Mac static constexpr int32_t CAPACITY = 30; MeasureUnit units[CAPACITY]; UErrorCode localStatus = U_ZERO_ERROR; + // TODO(units): revisit this: deals with hard-coded set of measure units? + // Can it handle complex units correctly? int32_t numUnits = MeasureUnit::getAvailable(type.data(), units, CAPACITY, localStatus); if (U_FAILURE(localStatus)) { // More than 30 units in this type? @@ -1057,6 +1072,17 @@ void blueprint_helpers::parseIdentifierUnitOption(const StringSegment& segment, } } +void blueprint_helpers::parseUnitUsageOption(const StringSegment &segment, MacroProps ¯os, + UErrorCode &status) { + // Need to do char <-> UChar conversion... + U_ASSERT(U_SUCCESS(status)); + CharString buffer; + SKELETON_UCHAR_TO_CHAR(buffer, segment.toTempUnicodeString(), 0, segment.length(), status); + macros.usage.set(buffer.toStringPiece()); + // We do not do any validation of the usage string: it depends on the + // unitPreferenceData in the units resources. +} + void blueprint_helpers::parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status) { U_ASSERT(segment.charAt(0) == u'.'); @@ -1545,6 +1571,15 @@ bool GeneratorHelpers::perUnit(const MacroProps& macros, UnicodeString& sb, UErr } } +bool GeneratorHelpers::usage(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) { + if (macros.usage.fLength > 0) { + sb.append(u"usage/", -1); + sb.append(UnicodeString(macros.usage.fUsage)); + return true; + } + return false; +} + bool GeneratorHelpers::precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) { if (macros.precision.fType == Precision::RND_NONE) { sb.append(u"precision-unlimited", -1); diff --git a/icu4c/source/i18n/number_skeletons.h b/icu4c/source/i18n/number_skeletons.h index d9b2c0ee0b1..ce9eb070396 100644 --- a/icu4c/source/i18n/number_skeletons.h +++ b/icu4c/source/i18n/number_skeletons.h @@ -22,10 +22,12 @@ struct SeenMacroProps; // namespace for enums and entrypoint functions namespace skeleton { -/////////////////////////////////////////////////////////////////////////////////////// -// NOTE: For an example of how to add a new stem to the number skeleton parser, see: // -// http://bugs.icu-project.org/trac/changeset/41193 // -/////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////// +// NOTE: For examples of how to add a new stem to the number skeleton parser, see: // +// https://github.com/unicode-org/icu/commit/a2a7982216b2348070dc71093775ac7195793d73 // +// and // +// https://github.com/unicode-org/icu/commit/6fe86f3934a8a5701034f648a8f7c5087e84aa28 // +//////////////////////////////////////////////////////////////////////////////////////// /** * While parsing a skeleton, this enum records what type of option we expect to find next. @@ -46,7 +48,8 @@ enum ParseState { STATE_INCREMENT_PRECISION, STATE_MEASURE_UNIT, STATE_PER_MEASURE_UNIT, - STATE_IDENTIFIER_UNIT, + STATE_IDENTIFIER_UNIT, // A MeasureUnit Identifier. TODO(units): Unused? + STATE_UNIT_USAGE, STATE_CURRENCY_UNIT, STATE_INTEGER_WIDTH, STATE_NUMBERING_SYSTEM, @@ -112,6 +115,7 @@ enum StemEnum { STEM_MEASURE_UNIT, STEM_PER_MEASURE_UNIT, STEM_UNIT, + STEM_UNIT_USAGE, STEM_CURRENCY, STEM_INTEGER_WIDTH, STEM_NUMBERING_SYSTEM, @@ -242,6 +246,8 @@ void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, void parseIdentifierUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); +void parseUnitUsageOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); + void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status); @@ -304,6 +310,8 @@ class GeneratorHelpers { static bool perUnit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); + static bool usage(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); + static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); @@ -332,6 +340,7 @@ struct SeenMacroProps { bool notation = false; bool unit = false; bool perUnit = false; + bool usage = false; bool precision = false; bool roundingMode = false; bool grouper = false; diff --git a/icu4c/source/i18n/unicode/numberformatter.h b/icu4c/source/i18n/unicode/numberformatter.h index 67b549d7720..b17e25373e6 100644 --- a/icu4c/source/i18n/unicode/numberformatter.h +++ b/icu4c/source/i18n/unicode/numberformatter.h @@ -1161,7 +1161,9 @@ class U_I18N_API Usage : public UMemory { /** @internal */ int16_t length() const { return fLength; } - /** @internal */ + /** @internal + * Makes a copy of value. + */ void set(StringPiece value); /** @internal */ @@ -1177,6 +1179,9 @@ class U_I18N_API Usage : public UMemory { // Allow NumberFormatterImpl to access fUsage. friend class impl::NumberFormatterImpl; + // Allow skeleton generation code to access private members. + friend class impl::GeneratorHelpers; + // Allow MacroProps/MicroProps to initialize empty instances. friend struct impl::MacroProps; diff --git a/icu4c/source/i18n/unitsdata.cpp b/icu4c/source/i18n/unitsdata.cpp index 67116c66a53..4571795e78e 100644 --- a/icu4c/source/i18n/unitsdata.cpp +++ b/icu4c/source/i18n/unitsdata.cpp @@ -329,6 +329,10 @@ int32_t getPreferenceMetadataIndex(const MaybeStackVector Date: Thu, 16 Jul 2020 02:23:50 +0200 Subject: [PATCH 2/2] Code review feedback incorporated. --- icu4c/source/i18n/number_skeletons.cpp | 4 +--- icu4c/source/i18n/number_skeletons.h | 2 +- icu4c/source/i18n/unitsdata.cpp | 7 +++---- icu4c/source/test/intltest/numbertest_api.cpp | 6 +++--- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/icu4c/source/i18n/number_skeletons.cpp b/icu4c/source/i18n/number_skeletons.cpp index ce5244050dc..c555265e1b5 100644 --- a/icu4c/source/i18n/number_skeletons.cpp +++ b/icu4c/source/i18n/number_skeletons.cpp @@ -1006,8 +1006,6 @@ void blueprint_helpers::parseMeasureUnitOption(const StringSegment& segment, Mac static constexpr int32_t CAPACITY = 30; MeasureUnit units[CAPACITY]; UErrorCode localStatus = U_ZERO_ERROR; - // TODO(units): revisit this: deals with hard-coded set of measure units? - // Can it handle complex units correctly? int32_t numUnits = MeasureUnit::getAvailable(type.data(), units, CAPACITY, localStatus); if (U_FAILURE(localStatus)) { // More than 30 units in this type? @@ -1574,7 +1572,7 @@ bool GeneratorHelpers::perUnit(const MacroProps& macros, UnicodeString& sb, UErr bool GeneratorHelpers::usage(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) { if (macros.usage.fLength > 0) { sb.append(u"usage/", -1); - sb.append(UnicodeString(macros.usage.fUsage)); + sb.append(UnicodeString(macros.usage.fUsage, -1, US_INV)); return true; } return false; diff --git a/icu4c/source/i18n/number_skeletons.h b/icu4c/source/i18n/number_skeletons.h index ce9eb070396..841abb3d868 100644 --- a/icu4c/source/i18n/number_skeletons.h +++ b/icu4c/source/i18n/number_skeletons.h @@ -48,7 +48,7 @@ enum ParseState { STATE_INCREMENT_PRECISION, STATE_MEASURE_UNIT, STATE_PER_MEASURE_UNIT, - STATE_IDENTIFIER_UNIT, // A MeasureUnit Identifier. TODO(units): Unused? + STATE_IDENTIFIER_UNIT, STATE_UNIT_USAGE, STATE_CURRENCY_UNIT, STATE_INTEGER_WIDTH, diff --git a/icu4c/source/i18n/unitsdata.cpp b/icu4c/source/i18n/unitsdata.cpp index 4571795e78e..0cb96ea1f85 100644 --- a/icu4c/source/i18n/unitsdata.cpp +++ b/icu4c/source/i18n/unitsdata.cpp @@ -329,10 +329,9 @@ int32_t getPreferenceMetadataIndex(const MaybeStackVector