diff --git a/icu4c/source/i18n/fmtable.cpp b/icu4c/source/i18n/fmtable.cpp index 051eccf33d1..c65c5c2e095 100644 --- a/icu4c/source/i18n/fmtable.cpp +++ b/icu4c/source/i18n/fmtable.cpp @@ -795,7 +795,7 @@ Formattable::setDecimalNumber(StringPiece numberString, UErrorCode &status) { dispose(); DecimalQuantity* dq = new DecimalQuantity(); - dq->setToDecNumber(numberString); + dq->setToDecNumber(numberString, status); adoptDecimalQuantity(dq); // Note that we do not hang on to the caller's input string. diff --git a/icu4c/source/i18n/number_decimalquantity.cpp b/icu4c/source/i18n/number_decimalquantity.cpp index c00d1a126ae..cd5df22f485 100644 --- a/icu4c/source/i18n/number_decimalquantity.cpp +++ b/icu4c/source/i18n/number_decimalquantity.cpp @@ -16,6 +16,7 @@ #include "number_roundingutils.h" #include "double-conversion.h" #include "unicode/plurrule.h" +#include "charstr.h" using namespace icu; using namespace icu::number; @@ -33,19 +34,29 @@ static constexpr int32_t DEFAULT_DIGITS = 34; typedef MaybeStackHeaderAndArray DecNumberWithStorage; /** Helper function to convert a decNumber-compatible string into a decNumber. */ -void stringToDecNumber(StringPiece n, DecNumberWithStorage &dn) { +void stringToDecNumber(StringPiece n, DecNumberWithStorage &dn, UErrorCode& status) { decContext set; uprv_decContextDefault(&set, DEC_INIT_BASE); uprv_decContextSetRounding(&set, DEC_ROUND_HALF_EVEN); - set.traps = 0; // no traps, thank you + set.traps = 0; // no traps, thank you (what does this mean?) if (n.length() > DEFAULT_DIGITS) { dn.resize(n.length(), 0); set.digits = n.length(); } else { set.digits = DEFAULT_DIGITS; } - uprv_decNumberFromString(dn.getAlias(), n.data(), &set); - U_ASSERT(DECDPUN == 1); + + // Make sure that the string is NUL-terminated; CharString guarantees this, but not StringPiece. + CharString cs(n, status); + if (U_FAILURE(status)) { return; } + + static_assert(DECDPUN == 1, "Assumes that DECDPUN is set to 1"); + uprv_decNumberFromString(dn.getAlias(), cs.data(), &set); + + // Check for invalid syntax and set the corresponding error code. + if ((set.status & DEC_Conversion_syntax) != 0) { + status = U_DECIMAL_NUMBER_SYNTAX_ERROR; + } } /** Helper function for safe subtraction (no overflow). */ @@ -329,7 +340,9 @@ void DecimalQuantity::_setToLong(int64_t n) { if (n == INT64_MIN) { static const char *int64minStr = "9.223372036854775808E+18"; DecNumberWithStorage dn; - stringToDecNumber(int64minStr, dn); + UErrorCode localStatus = U_ZERO_ERROR; + stringToDecNumber(int64minStr, dn, localStatus); + if (U_FAILURE(localStatus)) { return; } // unexpected readDecNumberToBcd(dn.getAlias()); } else if (n <= INT32_MAX) { readIntToBcd(static_cast(n)); @@ -429,12 +442,13 @@ void DecimalQuantity::convertToAccurateDouble() { explicitExactDouble = true; } -DecimalQuantity &DecimalQuantity::setToDecNumber(StringPiece n) { +DecimalQuantity &DecimalQuantity::setToDecNumber(StringPiece n, UErrorCode& status) { setBcdToZero(); flags = 0; DecNumberWithStorage dn; - stringToDecNumber(n, dn); + stringToDecNumber(n, dn, status); + if (U_FAILURE(status)) { return *this; } // The code path for decNumber is modeled after BigDecimal in Java. if (decNumberIsNegative(dn.getAlias())) { diff --git a/icu4c/source/i18n/number_decimalquantity.h b/icu4c/source/i18n/number_decimalquantity.h index 495ba80ec1c..10f2e669b8a 100644 --- a/icu4c/source/i18n/number_decimalquantity.h +++ b/icu4c/source/i18n/number_decimalquantity.h @@ -145,7 +145,7 @@ class U_I18N_API DecimalQuantity : public IFixedDecimal, public UMemory { /** decNumber is similar to BigDecimal in Java. */ - DecimalQuantity &setToDecNumber(StringPiece n); + DecimalQuantity &setToDecNumber(StringPiece n, UErrorCode& status); /** * Appends a digit, optionally with one or more leading zeros, to the end of the value represented diff --git a/icu4c/source/i18n/number_fluent.cpp b/icu4c/source/i18n/number_fluent.cpp index 372e6f18d83..e0ba258e3cf 100644 --- a/icu4c/source/i18n/number_fluent.cpp +++ b/icu4c/source/i18n/number_fluent.cpp @@ -11,6 +11,7 @@ #include "number_decimalquantity.h" #include "number_formatimpl.h" #include "umutex.h" +#include "number_skeletons.h" using namespace icu; using namespace icu::number; @@ -287,6 +288,11 @@ Derived NumberFormatterSettings::macros(impl::MacroProps&& macros) && { return move; } +template +UnicodeString NumberFormatterSettings::toSkeleton(UErrorCode& status) const { + return skeleton::generate(fMacros, status); +} + // Declare all classes that implement NumberFormatterSettings // See https://stackoverflow.com/a/495056/1407170 template @@ -304,6 +310,11 @@ LocalizedNumberFormatter NumberFormatter::withLocale(const Locale& locale) { return with().locale(locale); } +UnlocalizedNumberFormatter +NumberFormatter::fromSkeleton(const UnicodeString& skeleton, UErrorCode& status) { + return skeleton::create(skeleton, status); +} + template using NFS = NumberFormatterSettings; @@ -563,7 +574,7 @@ FormattedNumber LocalizedNumberFormatter::formatDecimal(StringPiece value, UErro status = U_MEMORY_ALLOCATION_ERROR; return FormattedNumber(status); } - results->quantity.setToDecNumber(value); + results->quantity.setToDecNumber(value, status); return formatImpl(results, status); } diff --git a/icu4c/source/i18n/number_integerwidth.cpp b/icu4c/source/i18n/number_integerwidth.cpp index 464c2230fff..87e543622cc 100644 --- a/icu4c/source/i18n/number_integerwidth.cpp +++ b/icu4c/source/i18n/number_integerwidth.cpp @@ -39,7 +39,7 @@ IntegerWidth IntegerWidth::truncateAt(int32_t maxInt) { } } -void IntegerWidth::apply(impl::DecimalQuantity &quantity, UErrorCode &status) const { +void IntegerWidth::apply(impl::DecimalQuantity& quantity, UErrorCode& status) const { if (fHasError) { status = U_ILLEGAL_ARGUMENT_ERROR; } else if (fUnion.minMaxInt.fMaxInt == -1) { @@ -50,12 +50,13 @@ void IntegerWidth::apply(impl::DecimalQuantity &quantity, UErrorCode &status) co } bool IntegerWidth::operator==(const IntegerWidth& other) const { - if (fHasError) { - return other.fHasError && fUnion.errorCode == other.fUnion.errorCode; - } else { - return !other.fHasError && fUnion.minMaxInt.fMinInt == other.fUnion.minMaxInt.fMinInt && - fUnion.minMaxInt.fMaxInt == other.fUnion.minMaxInt.fMaxInt; - } + // Private operator==; do error and bogus checking first! + U_ASSERT(!fHasError); + U_ASSERT(!other.fHasError); + U_ASSERT(!isBogus()); + U_ASSERT(!other.isBogus()); + return fUnion.minMaxInt.fMinInt == other.fUnion.minMaxInt.fMinInt && + fUnion.minMaxInt.fMaxInt == other.fUnion.minMaxInt.fMaxInt; } #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/number_skeletons.cpp b/icu4c/source/i18n/number_skeletons.cpp index 3efae97f20a..357c443455e 100644 --- a/icu4c/source/i18n/number_skeletons.cpp +++ b/icu4c/source/i18n/number_skeletons.cpp @@ -17,14 +17,14 @@ #include "number_utils.h" #include "number_decimalquantity.h" #include "unicode/numberformatter.h" +#include "uinvchar.h" +#include "charstr.h" using namespace icu; using namespace icu::number; using namespace icu::number::impl; using namespace icu::number::impl::skeleton; -static constexpr UErrorCode U_NUMBER_SKELETON_SYNTAX_ERROR = U_ILLEGAL_ARGUMENT_ERROR; - namespace { icu::UInitOnce gNumberSkeletonsInitOnce = U_INITONCE_INITIALIZER; @@ -107,7 +107,7 @@ inline void appendMultiple(UnicodeString& sb, UChar32 cp, int32_t count) { } -#define CHECK_NULL(seen, field, status) (void)(seen); /* for auto-format line wraping */ \ +#define CHECK_NULL(seen, field, status) (void)(seen); /* for auto-format line wrapping */ \ { \ if ((seen).field) { \ (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \ @@ -117,8 +117,24 @@ inline void appendMultiple(UnicodeString& sb, UChar32 cp, int32_t count) { } +#define SKELETON_UCHAR_TO_CHAR(dest, src, start, end, status) (void)(dest); \ +{ \ + UErrorCode conversionStatus = U_ZERO_ERROR; \ + (dest).appendInvariantChars({FALSE, (src).getBuffer() + (start), (end) - (start)}, conversionStatus); \ + if (conversionStatus == U_INVARIANT_CONVERSION_ERROR) { \ + /* Don't propagate the invariant conversion error; it is a skeleton syntax error */ \ + (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \ + return; \ + } else if (U_FAILURE(conversionStatus)) { \ + (status) = conversionStatus; \ + return; \ + } \ +} + + +// NOTE: The order of these strings must be consistent with UNumberFormatRoundingMode const char16_t* const kRoundingModeStrings[] = { - u"up", u"down", u"ceiling", u"floor", u"half-up", u"half-down", u"half-even", u"unnecessary"}; + u"ceiling", u"floor", u"down", u"up", u"half-even", u"half-down", u"half-up", u"unnecessary"}; constexpr int32_t kRoundingModeCount = 8; static_assert( @@ -357,14 +373,14 @@ MacroProps skeleton::parseSkeleton(const UnicodeString& skeletonString, UErrorCo SeenMacroProps seen; MacroProps macros; - StringSegment segment(skeletonString, false); + StringSegment segment(tempSkeletonString, false); UCharsTrie stemTrie(kSerializedStemTrie); ParseState stem = STATE_NULL; - int offset = 0; + int32_t offset = 0; // Primary skeleton parse loop: while (offset < segment.length()) { - int cp = segment.codePointAt(offset); + UChar32 cp = segment.codePointAt(offset); bool isTokenSeparator = PatternProps::isWhiteSpace(cp); bool isOptionSeparator = (cp == u'/'); @@ -772,21 +788,17 @@ blueprint_helpers::parseExponentSignOption(const StringSegment& segment, MacroPr void blueprint_helpers::parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status) { - if (segment.length() != 3) { - // throw new SkeletonSyntaxException("Invalid currency", segment); - status = U_NUMBER_SKELETON_SYNTAX_ERROR; - return; - } const UChar* currencyCode = segment.toUnicodeString().getTerminatedBuffer(); - // Check that the currency code is valid: - int32_t numericCode = ucurr_getNumericCode(currencyCode); - if (numericCode == 0) { + UErrorCode localStatus = U_ZERO_ERROR; + CurrencyUnit currency(currencyCode, localStatus); + if (U_FAILURE(localStatus)) { + // Not 3 ascii chars // throw new SkeletonSyntaxException("Invalid currency", segment); status = U_NUMBER_SKELETON_SYNTAX_ERROR; return; } // Slicing is OK - macros.unit = CurrencyUnit(currencyCode, status); // NOLINT + macros.unit = currency; // NOLINT } void @@ -796,48 +808,40 @@ blueprint_helpers::generateCurrencyOption(const CurrencyUnit& currency, UnicodeS void blueprint_helpers::parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status) { + UnicodeString stemString = segment.toUnicodeString(); + // NOTE: The category (type) of the unit is guaranteed to be a valid subtag (alphanumeric) // http://unicode.org/reports/tr35/#Validity_Data int firstHyphen = 0; - while (firstHyphen < segment.length() && segment.charAt(firstHyphen) != '-') { + while (firstHyphen < stemString.length() && stemString.charAt(firstHyphen) != '-') { firstHyphen++; } - if (firstHyphen == segment.length()) { + if (firstHyphen == stemString.length()) { // throw new SkeletonSyntaxException("Invalid measure unit option", segment); status = U_NUMBER_SKELETON_SYNTAX_ERROR; return; } - // MeasureUnit is in char space; we need to convert. - // Note: the longest type/subtype as of this writing (March 2018) is 24 chars. - static constexpr int32_t CAPACITY = 30; - char type[CAPACITY]; - char subType[CAPACITY]; - const int32_t typeLen = firstHyphen; - const int32_t subTypeLen = segment.length() - firstHyphen - 1; - if (typeLen + 1 > CAPACITY || subTypeLen + 1 > CAPACITY) { - // Type or subtype longer than 30? - // The capacity should be increased if this is a problem with a real CLDR unit. - status = U_NUMBER_SKELETON_SYNTAX_ERROR; - return; - } - u_UCharsToChars(segment.toUnicodeString().getBuffer(), type, typeLen); - u_UCharsToChars(segment.toUnicodeString().getBuffer() + firstHyphen + 1, subType, subTypeLen); - type[typeLen] = 0; - subType[subTypeLen] = 0; + // Need to do char <-> UChar conversion... + if (U_FAILURE(status)) { return; } + CharString type; + SKELETON_UCHAR_TO_CHAR(type, stemString, 0, firstHyphen, status); + CharString subType; + SKELETON_UCHAR_TO_CHAR(subType, stemString, firstHyphen + 1, stemString.length(), status); // Note: the largest type as of this writing (March 2018) is "volume", which has 24 units. - MeasureUnit units[30]; + static constexpr int32_t CAPACITY = 30; + MeasureUnit units[CAPACITY]; UErrorCode localStatus = U_ZERO_ERROR; - int32_t numUnits = MeasureUnit::getAvailable(type, units, 30, localStatus); + int32_t numUnits = MeasureUnit::getAvailable(type.data(), units, CAPACITY, localStatus); if (U_FAILURE(localStatus)) { // More than 30 units in this type? - status = U_NUMBER_SKELETON_SYNTAX_ERROR; + status = U_INTERNAL_PROGRAM_ERROR; return; } for (int32_t i = 0; i < numUnits; i++) { auto& unit = units[i]; - if (uprv_strcmp(subType, unit.getSubtype()) == 0) { + if (uprv_strcmp(subType.data(), unit.getSubtype()) == 0) { macros.unit = unit; return; } @@ -848,26 +852,11 @@ void blueprint_helpers::parseMeasureUnitOption(const StringSegment& segment, Mac } void blueprint_helpers::generateMeasureUnitOption(const MeasureUnit& measureUnit, UnicodeString& sb, - UErrorCode& status) { - // We need to convert from char* to UChar*... - // See comments in the previous function about the capacity setting. - static constexpr int32_t CAPACITY = 30; - char16_t type16[CAPACITY]; - char16_t subType16[CAPACITY]; - const auto typeLen = static_cast(uprv_strlen(measureUnit.getType())); - const auto subTypeLen = static_cast(uprv_strlen(measureUnit.getSubtype())); - if (typeLen + 1 > CAPACITY || subTypeLen + 1 > CAPACITY) { - // Type or subtype longer than 30? - // The capacity should be increased if this is a problem with a real CLDR unit. - status = U_UNSUPPORTED_ERROR; - return; - } - u_charsToUChars(measureUnit.getType(), type16, typeLen); - u_charsToUChars(measureUnit.getSubtype(), subType16, subTypeLen); - - sb.append(type16, typeLen); + UErrorCode&) { + // Need to do char <-> UChar conversion... + sb.append(UnicodeString(measureUnit.getType(), -1, US_INV)); sb.append(u'-'); - sb.append(subType16, subTypeLen); + sb.append(UnicodeString(measureUnit.getSubtype(), -1, US_INV)); } void blueprint_helpers::parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, @@ -1052,17 +1041,19 @@ bool blueprint_helpers::parseFracSigOption(const StringSegment& segment, MacroPr void blueprint_helpers::parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status) { + // Need to do char <-> UChar conversion... + CharString buffer; + SKELETON_UCHAR_TO_CHAR(buffer, segment.toUnicodeString(), 0, segment.length(), status); + // Utilize DecimalQuantity/decNumber to parse this for us. - static constexpr int32_t CAPACITY = 30; - char buffer[CAPACITY]; - if (segment.length() > CAPACITY) { - // No support for numbers this long; they won't fit in a double anyway. + DecimalQuantity dq; + UErrorCode localStatus = U_ZERO_ERROR; + dq.setToDecNumber({buffer.data(), buffer.length()}, localStatus); + if (U_FAILURE(localStatus)) { + // throw new SkeletonSyntaxException("Invalid rounding increment", segment, e); status = U_NUMBER_SKELETON_SYNTAX_ERROR; return; } - u_UCharsToChars(segment.toUnicodeString().getBuffer(), buffer, segment.length()); - DecimalQuantity dq; - dq.setToDecNumber({buffer, segment.length()}); double increment = dq.toDouble(); macros.rounder = Rounder::increment(increment); } @@ -1146,17 +1137,10 @@ void blueprint_helpers::generateIntegerWidthOption(int32_t minInt, int32_t maxIn void blueprint_helpers::parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status) { // Need to do char <-> UChar conversion... - static constexpr int32_t CAPACITY = 30; - char buffer[CAPACITY]; - if (segment.length() + 1 > CAPACITY) { - // No support for numbers this long; they won't fit in a double anyway. - status = U_NUMBER_SKELETON_SYNTAX_ERROR; - return; - } - u_UCharsToChars(segment.toUnicodeString().getBuffer(), buffer, segment.length()); - buffer[segment.length()] = 0; + CharString buffer; + SKELETON_UCHAR_TO_CHAR(buffer, segment.toUnicodeString(), 0, segment.length(), status); - NumberingSystem* ns = NumberingSystem::createInstanceByName(buffer, status); + NumberingSystem* ns = NumberingSystem::createInstanceByName(buffer.data(), status); if (ns == nullptr) { // throw new SkeletonSyntaxException("Unknown numbering system", segment); status = U_NUMBER_SKELETON_SYNTAX_ERROR; @@ -1166,18 +1150,9 @@ void blueprint_helpers::parseNumberingSystemOption(const StringSegment& segment, } void blueprint_helpers::generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, - UErrorCode& status) { + UErrorCode&) { // Need to do char <-> UChar conversion... - static constexpr int32_t CAPACITY = 30; - char16_t buffer16[CAPACITY]; - const auto len = static_cast(uprv_strlen(ns.getName())); - if (len > CAPACITY) { - // No support for numbers this long; they won't fit in a double anyway. - status = U_UNSUPPORTED_ERROR; - return; - } - u_charsToUChars(ns.getName(), buffer16, len); - sb.append(buffer16, len); + sb.append(UnicodeString(ns.getName(), -1, US_INV)); } @@ -1243,7 +1218,15 @@ bool GeneratorHelpers::unit(const MacroProps& macros, UnicodeString& sb, UErrorC bool GeneratorHelpers::perUnit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) { // Per-units are currently expected to be only MeasureUnits. - if (unitIsCurrency(macros.perUnit) || unitIsNoUnit(macros.perUnit)) { + if (unitIsNoUnit(macros.perUnit)) { + if (unitIsPercent(macros.perUnit) || unitIsPermille(macros.perUnit)) { + status = U_UNSUPPORTED_ERROR; + return false; + } else { + // Default value: ok to ignore + return false; + } + } else if (unitIsCurrency(macros.perUnit)) { status = U_UNSUPPORTED_ERROR; return false; } else { @@ -1298,7 +1281,9 @@ bool GeneratorHelpers::rounding(const MacroProps& macros, UnicodeString& sb, UEr } bool GeneratorHelpers::grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) { - if (macros.grouper.isBogus() || macros.grouper.fStrategy == UNUM_GROUPING_COUNT) { + if (macros.grouper.isBogus()) { + return false; // No value + } else if (macros.grouper.fStrategy == UNUM_GROUPING_COUNT) { status = U_UNSUPPORTED_ERROR; return false; } else if (macros.grouper.fStrategy == UNUM_GROUPING_AUTO) { @@ -1310,7 +1295,8 @@ bool GeneratorHelpers::grouping(const MacroProps& macros, UnicodeString& sb, UEr } bool GeneratorHelpers::integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) { - if (macros.integerWidth.fHasError || macros.integerWidth == IntegerWidth::standard()) { + if (macros.integerWidth.fHasError || macros.integerWidth.isBogus() || + macros.integerWidth == IntegerWidth::standard()) { // Error or Default return false; } diff --git a/icu4c/source/i18n/number_skeletons.h b/icu4c/source/i18n/number_skeletons.h index e874d6acc33..6a15d9efb40 100644 --- a/icu4c/source/i18n/number_skeletons.h +++ b/icu4c/source/i18n/number_skeletons.h @@ -16,6 +16,8 @@ using icu::numparse::impl::StringSegment; U_NAMESPACE_BEGIN namespace number { namespace impl { +static constexpr UErrorCode U_NUMBER_SKELETON_SYNTAX_ERROR = U_ILLEGAL_ARGUMENT_ERROR; + // Forward-declaration struct SeenMacroProps; diff --git a/icu4c/source/i18n/numparse_stringsegment.cpp b/icu4c/source/i18n/numparse_stringsegment.cpp index aa8e62beefb..0a6e4fd1049 100644 --- a/icu4c/source/i18n/numparse_stringsegment.cpp +++ b/icu4c/source/i18n/numparse_stringsegment.cpp @@ -133,5 +133,9 @@ bool StringSegment::codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase) { return cp1 == cp2; } +bool StringSegment::operator==(const UnicodeString& other) const { + return toUnicodeString() == other; +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/plurrule.cpp b/icu4c/source/i18n/plurrule.cpp index eb77905b0c3..1b0089f9c9d 100644 --- a/icu4c/source/i18n/plurrule.cpp +++ b/icu4c/source/i18n/plurrule.cpp @@ -1458,7 +1458,7 @@ FixedDecimal::FixedDecimal(const UnicodeString &num, UErrorCode &status) { CharString cs; cs.appendInvariantChars(num, status); DecimalQuantity dl; - dl.setToDecNumber(cs.toStringPiece()); + dl.setToDecNumber(cs.toStringPiece(), status); if (U_FAILURE(status)) { init(0, 0, 0); return; diff --git a/icu4c/source/i18n/unicode/numberformatter.h b/icu4c/source/i18n/unicode/numberformatter.h index fdc6bdc285c..c0131eaa712 100644 --- a/icu4c/source/i18n/unicode/numberformatter.h +++ b/icu4c/source/i18n/unicode/numberformatter.h @@ -2179,6 +2179,25 @@ class U_I18N_API NumberFormatterSettings { #endif /* U_HIDE_INTERNAL_API */ + /** + * Creates a skeleton string representation of this number formatter. A skeleton string is a + * locale-agnostic serialized form of a number formatter. + *

+ * Not all options are capable of being represented in the skeleton string; for example, a + * DecimalFormatSymbols object. If any such option is encountered, an + * {@link UnsupportedOperationException} is thrown. + *

+ * The returned skeleton is in normalized form, such that two number formatters with equivalent + * behavior should produce the same skeleton. + *

+ * Sets an error code if the number formatter has an option that cannot be represented in a skeleton + * string. + * + * @return A number skeleton string with behavior corresponding to this number formatter. + * @draft ICU 62 + */ + UnicodeString toSkeleton(UErrorCode& status) const; + /** * Sets the UErrorCode if an error occurred in the fluent chain. * Preserves older error codes in the outErrorCode. @@ -2192,7 +2211,7 @@ class U_I18N_API NumberFormatterSettings { } fMacros.copyErrorTo(outErrorCode); return U_FAILURE(outErrorCode); - } + }; // NOTE: Uses default copy and move constructors. @@ -2588,6 +2607,18 @@ class U_I18N_API NumberFormatter final { */ static LocalizedNumberFormatter withLocale(const Locale &locale); + /** + * Call this method at the beginning of a NumberFormatter fluent chain to create an instance based + * on a given number skeleton string. + * + * @param skeleton + * The skeleton string off of which to base this NumberFormatter. + * @return An UnlocalizedNumberFormatter, to be used for chaining. + * @throws SkeletonSyntaxException If the given string is not a valid number formatting skeleton. + * @draft ICU 62 + */ + static UnlocalizedNumberFormatter fromSkeleton(const UnicodeString& skeleton, UErrorCode& status); + /** * Use factory methods instead of the constructor to create a NumberFormatter. * @draft ICU 60 diff --git a/icu4c/source/test/intltest/Makefile.in b/icu4c/source/test/intltest/Makefile.in index 6699d319fef..55d1fb21421 100644 --- a/icu4c/source/test/intltest/Makefile.in +++ b/icu4c/source/test/intltest/Makefile.in @@ -65,7 +65,7 @@ numberformattesttuple.o pluralmaptest.o \ numbertest_affixutils.o numbertest_api.o numbertest_decimalquantity.o \ numbertest_modifiers.o numbertest_patternmodifier.o numbertest_patternstring.o \ numbertest_stringbuilder.o numbertest_stringsegment.o numbertest_unisets.o \ -numbertest_parse.o numbertest_doubleconversion.o +numbertest_parse.o numbertest_doubleconversion.o numbertest_skeletons.o DEPS = $(OBJECTS:.o=.d) diff --git a/icu4c/source/test/intltest/numbertest.h b/icu4c/source/test/intltest/numbertest.h index 410d9ba316b..595a954e28e 100644 --- a/icu4c/source/test/intltest/numbertest.h +++ b/icu4c/source/test/intltest/numbertest.h @@ -229,6 +229,23 @@ class NumberParserTest : public IntlTest { void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0); }; +class NumberSkeletonTest : public IntlTest { + public: + void validTokens(); + void invalidTokens(); + void unknownTokens(); + void unexpectedTokens(); + void duplicateValues(); + void stemsRequiringOption(); + void defaultTokens(); + void flexibleSeparators(); + + void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0); + + private: + void expectedErrorSkeleton(const char16_t** cases, int32_t casesLen); +}; + // NOTE: This macro is identical to the one in itformat.cpp #define TESTCLASS(id, TestClass) \ @@ -261,6 +278,7 @@ class NumberTest : public IntlTest { TESTCLASS(8, StringSegmentTest); TESTCLASS(9, UniSetsTest); TESTCLASS(10, NumberParserTest); + TESTCLASS(11, NumberSkeletonTest); default: name = ""; break; // needed to end loop } } diff --git a/icu4c/source/test/intltest/numbertest_skeletons.cpp b/icu4c/source/test/intltest/numbertest_skeletons.cpp new file mode 100644 index 00000000000..80196f9fc8f --- /dev/null +++ b/icu4c/source/test/intltest/numbertest_skeletons.cpp @@ -0,0 +1,245 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT + +#include "putilimp.h" +#include "unicode/dcfmtsym.h" +#include "numbertest.h" +#include "number_utils.h" +#include "number_skeletons.h" + +using namespace icu::number::impl; + + +void NumberSkeletonTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) { + if (exec) { + logln("TestSuite AffixUtilsTest: "); + } + TESTCASE_AUTO_BEGIN; + TESTCASE_AUTO(validTokens); + TESTCASE_AUTO(invalidTokens); + TESTCASE_AUTO(unknownTokens); + TESTCASE_AUTO(unexpectedTokens); + TESTCASE_AUTO(duplicateValues); + TESTCASE_AUTO(stemsRequiringOption); + TESTCASE_AUTO(defaultTokens); + TESTCASE_AUTO(flexibleSeparators); + TESTCASE_AUTO_END; +} + +void NumberSkeletonTest::validTokens() { + // This tests only if the tokens are valid, not their behavior. + // Most of these are from the design doc. + static const char16_t* cases[] = { + u"round-integer", + u"round-unlimited", + u"@@@##", + u"@@+", + u".000##", + u".00+", + u".", + u".+", + u".######", + u".00/@@+", + u".00/@##", + u"round-increment/3.14", + u"round-currency-standard", + u"round-integer/half-up", + u".00#/ceiling", + u".00/@@+/floor", + u"scientific", + u"scientific/+ee", + u"scientific/sign-always", + u"scientific/+ee/sign-always", + u"scientific/sign-always/+ee", + u"scientific/sign-except-zero", + u"engineering", + u"engineering/+eee", + u"compact-short", + u"compact-long", + u"notation-simple", + u"percent", + u"permille", + u"measure-unit/length-meter", + u"measure-unit/area-square-meter", + u"measure-unit/energy-joule per-measure-unit/length-meter", + u"currency/XXX", + u"currency/ZZZ", + u"group-off", + u"group-min2", + u"group-auto", + u"group-on-aligned", + u"group-thousands", + u"integer-width/00", + u"integer-width/#0", + u"integer-width/+00", + u"sign-always", + u"sign-auto", + u"sign-never", + u"sign-accounting", + u"sign-accounting-always", + u"sign-except-zero", + u"sign-accounting-except-zero", + u"unit-width-narrow", + u"unit-width-short", + u"unit-width-iso-code", + u"unit-width-full-name", + u"unit-width-hidden", + u"decimal-auto", + u"decimal-always", + u"latin", + u"numbering-system/arab", + u"numbering-system/latn", + u"round-integer/@##", + u"round-integer/ceiling", + u"round-currency-cash/ceiling"}; + + for (auto& cas : cases) { + UnicodeString skeletonString(cas); + UErrorCode status = U_ZERO_ERROR; + NumberFormatter::fromSkeleton(skeletonString, status); + assertSuccess(skeletonString, status); + } +} + +void NumberSkeletonTest::invalidTokens() { + static const char16_t* cases[] = { + u".00x", + u".00##0", + u".##+", + u".00##+", + u".0#+", + u"@@x", + u"@@##0", + u"@#+", + u".00/@", + u".00/@@", + u".00/@@x", + u".00/@@#", + u".00/@@#+", + u".00/floor/@@+", // wrong order + u"round-currency-cash/XXX", + u"scientific/ee", + u"round-increment/xxx", + u"round-increment/0.1.2", + u"currency/dummy", + u"measure-unit/foo", + u"integer-width/xxx", + u"integer-width/0+", + u"integer-width/+0#", + u"scientific/foo"}; + + expectedErrorSkeleton(cases, sizeof(cases) / sizeof(*cases)); +} + +void NumberSkeletonTest::unknownTokens() { + static const char16_t* cases[] = { + u"maesure-unit", + u"measure-unit/foo-bar", + u"numbering-system/dummy", + u"français", + u"measure-unit/français-français", // non-invariant characters for C++ + u"numbering-system/français", // non-invariant characters for C++ + u"round-increment/français", // non-invariant characters for C++ + u"currency-USD"}; + + expectedErrorSkeleton(cases, sizeof(cases) / sizeof(*cases)); +} + +void NumberSkeletonTest::unexpectedTokens() { + static const char16_t* cases[] = { + u"group-thousands/foo", + u"round-integer//ceiling group-off", + u"round-integer//ceiling group-off", + u"round-integer/ group-off", + u"round-integer// group-off"}; + + expectedErrorSkeleton(cases, sizeof(cases) / sizeof(*cases)); +} + +void NumberSkeletonTest::duplicateValues() { + static const char16_t* cases[] = { + u"round-integer round-integer", + u"round-integer .00+", + u"round-integer round-unlimited", + u"round-integer @@@", + u"scientific engineering", + u"engineering compact-long", + u"sign-auto sign-always"}; + + expectedErrorSkeleton(cases, sizeof(cases) / sizeof(*cases)); +} + +void NumberSkeletonTest::stemsRequiringOption() { + static const char16_t* stems[] = {u"round-increment", u"currency", u"measure-unit", u"integer-width",}; + static const char16_t* suffixes[] = {u"", u"/ceiling", u" scientific", u"/ceiling scientific"}; + + for (auto& stem : stems) { + for (auto& suffix : suffixes) { + UnicodeString skeletonString = UnicodeString(stem) + suffix; + UErrorCode status = U_ZERO_ERROR; + NumberFormatter::fromSkeleton(skeletonString, status); + assertEquals(skeletonString, U_NUMBER_SKELETON_SYNTAX_ERROR, status); + } + } +} + +void NumberSkeletonTest::defaultTokens() { + IcuTestErrorCode status(*this, "defaultTokens"); + + static const char16_t* cases[] = { + u"notation-simple", + u"base-unit", + u"group-auto", + u"integer-width/+0", + u"sign-auto", + u"unit-width-short", + u"decimal-auto"}; + + for (auto& cas : cases) { + UnicodeString skeletonString(cas); + status.setScope(skeletonString); + UnicodeString normalized = NumberFormatter::fromSkeleton( + skeletonString, status).toSkeleton(status); + // Skeleton should become empty when normalized + assertEquals(skeletonString, u"", normalized); + } +} + +void NumberSkeletonTest::flexibleSeparators() { + IcuTestErrorCode status(*this, "flexibleSeparators"); + + static struct TestCase { + const char16_t* skeleton; + const char16_t* expected; + } cases[] = {{u"round-integer group-off", u"5142"}, + {u"round-integer group-off", u"5142"}, + {u"round-integer/ceiling group-off", u"5143"}, + {u"round-integer/ceiling group-off", u"5143"}}; + + for (auto& cas : cases) { + UnicodeString skeletonString(cas.skeleton); + UnicodeString expected(cas.expected); + status.setScope(skeletonString); + UnicodeString actual = NumberFormatter::fromSkeleton(skeletonString, status).locale("en") + .formatDouble(5142.3, status) + .toString(); + assertEquals(skeletonString, expected, actual); + } +} + +// In C++, there is no distinguishing between "invalid", "unknown", and "unexpected" tokens. +void NumberSkeletonTest::expectedErrorSkeleton(const char16_t** cases, int32_t casesLen) { + for (int32_t i = 0; i < casesLen; i++) { + UnicodeString skeletonString(cases[i]); + UErrorCode status = U_ZERO_ERROR; + NumberFormatter::fromSkeleton(skeletonString, status); + assertEquals(skeletonString, U_NUMBER_SKELETON_SYNTAX_ERROR, status); + } +} + + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/intltest/numfmtst.cpp b/icu4c/source/test/intltest/numfmtst.cpp index 35825b62cf3..3487d432f77 100644 --- a/icu4c/source/test/intltest/numfmtst.cpp +++ b/icu4c/source/test/intltest/numfmtst.cpp @@ -105,7 +105,7 @@ static DecimalQuantity &strToDigitList( } CharString formatValue; formatValue.appendInvariantChars(str, status); - digitList.setToDecNumber(StringPiece(formatValue.data())); + digitList.setToDecNumber(StringPiece(formatValue.data()), status); return digitList; } @@ -7027,7 +7027,7 @@ void NumberFormatTest::TestDecimal() { UnicodeString formattedResult; DecimalQuantity dl; StringPiece num("123.4566666666666666666666666666666666621E+40"); - dl.setToDecNumber(num); + dl.setToDecNumber(num, status); ASSERT_SUCCESS(status); fmtr->format(dl, formattedResult, NULL, status); ASSERT_SUCCESS(status); @@ -7035,7 +7035,7 @@ void NumberFormatTest::TestDecimal() { status = U_ZERO_ERROR; num.set("666.666"); - dl.setToDecNumber(num); + dl.setToDecNumber(num, status); FieldPosition pos(NumberFormat::FRACTION_FIELD); ASSERT_SUCCESS(status); formattedResult.remove(); diff --git a/icu4c/source/test/intltest/plurults.cpp b/icu4c/source/test/intltest/plurults.cpp index 57221263039..c6b1a046a0d 100644 --- a/icu4c/source/test/intltest/plurults.cpp +++ b/icu4c/source/test/intltest/plurults.cpp @@ -636,7 +636,7 @@ void PluralRulesTest::checkSelect(const LocalPointer &rules, UError // DigitList is a convenient way to parse the decimal number string and get a double. DecimalQuantity dl; - dl.setToDecNumber(StringPiece(num)); + dl.setToDecNumber(StringPiece(num), status); if (U_FAILURE(status)) { errln("file %s, line %d, ICU error status: %s.", __FILE__, line, u_errorName(status)); status = U_ZERO_ERROR; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/number/NumberSkeletonImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/number/NumberSkeletonImpl.java index 6b8d77c9ac8..e6138e280df 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/number/NumberSkeletonImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/number/NumberSkeletonImpl.java @@ -850,11 +850,14 @@ class NumberSkeletonImpl { private static void parseCurrencyOption(StringSegment segment, MacroProps macros) { String currencyCode = segment.subSequence(0, segment.length()).toString(); + Currency currency; try { - macros.unit = Currency.getInstance(currencyCode); + currency = Currency.getInstance(currencyCode); } catch (IllegalArgumentException e) { + // Not 3 ascii chars throw new SkeletonSyntaxException("Invalid currency", segment, e); } + macros.unit = currency; } private static void generateCurrencyOption(Currency currency, StringBuilder sb) { diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberSkeletonTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberSkeletonTest.java index 4d5509f4f1c..37815f69157 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberSkeletonTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberSkeletonTest.java @@ -21,16 +21,6 @@ import com.ibm.icu.util.ULocale; */ public class NumberSkeletonTest { - @Test - public void duplicateValues() { - try { - NumberFormatter.fromSkeleton("round-integer round-integer"); - fail(); - } catch (SkeletonSyntaxException expected) { - assertTrue(expected.getMessage(), expected.getMessage().contains("Duplicated setting")); - } - } - @Test public void validTokens() { // This tests only if the tokens are valid, not their behavior. @@ -69,6 +59,7 @@ public class NumberSkeletonTest { "measure-unit/area-square-meter", "measure-unit/energy-joule per-measure-unit/length-meter", "currency/XXX", + "currency/ZZZ", "group-off", "group-min2", "group-auto", @@ -138,7 +129,7 @@ public class NumberSkeletonTest { for (String cas : cases) { try { NumberFormatter.fromSkeleton(cas); - fail("Skeleton parses, but it should have failed: " + cas); + fail(cas); } catch (SkeletonSyntaxException expected) { assertTrue(expected.getMessage(), expected.getMessage().contains("Invalid")); } @@ -147,12 +138,20 @@ public class NumberSkeletonTest { @Test public void unknownTokens() { - String[] cases = { "maesure-unit", "measure-unit/foo-bar", "numbering-system/dummy" }; + String[] cases = { + "maesure-unit", + "measure-unit/foo-bar", + "numbering-system/dummy", + "français", + "measure-unit/français-français", // non-invariant characters for C++ + "numbering-system/français", // non-invariant characters for C++ + "round-increment/français", // non-invariant characters for C++ + "currency-USD" }; for (String cas : cases) { try { NumberFormatter.fromSkeleton(cas); - fail(); + fail(cas); } catch (SkeletonSyntaxException expected) { assertTrue(expected.getMessage(), expected.getMessage().contains("Unknown")); } @@ -171,13 +170,34 @@ public class NumberSkeletonTest { for (String cas : cases) { try { NumberFormatter.fromSkeleton(cas); - fail(); + fail(cas); } catch (SkeletonSyntaxException expected) { assertTrue(expected.getMessage(), expected.getMessage().contains("Unexpected")); } } } + @Test + public void duplicateValues() { + String[] cases = { + "round-integer round-integer", + "round-integer .00+", + "round-integer round-unlimited", + "round-integer @@@", + "scientific engineering", + "engineering compact-long", + "sign-auto sign-always" }; + + for (String cas : cases) { + try { + NumberFormatter.fromSkeleton(cas); + fail(cas); + } catch (SkeletonSyntaxException expected) { + assertTrue(expected.getMessage(), expected.getMessage().contains("Duplicated")); + } + } + } + @Test public void stemsRequiringOption() { String[] stems = { "round-increment", "currency", "measure-unit", "integer-width", };