ICU-8610 Adding tests for number skeletons in C++. Adding error code handling to the setToDecNumber setter on DecimalQuantity. Refactoring char-to-uchar conversion in skeleton implementation code.

X-SVN-Rev: 41152
2025-04-07 06:25:30 +00:00 · 2018-03-24 05:46:28 +00:00 · 2018-03-24 05:46:28 +00:00 · a8f2471248
commit a8f2471248
parent 4c07b01a46
17 changed files with 463 additions and 128 deletions
--- a/icu4c/source/i18n/fmtable.cpp
+++ b/icu4c/source/i18n/fmtable.cpp
@ -795,7 +795,7 @@ Formattable::setDecimalNumber(StringPiece numberString, UErrorCode &status) {
    dispose();

    DecimalQuantity* dq = new DecimalQuantity();
-    dq->setToDecNumber(numberString);
+    dq->setToDecNumber(numberString, status);
    adoptDecimalQuantity(dq);

    // Note that we do not hang on to the caller's input string.
--- a/icu4c/source/i18n/number_decimalquantity.cpp
+++ b/icu4c/source/i18n/number_decimalquantity.cpp
@ -16,6 +16,7 @@
 #include "number_roundingutils.h"
 #include "double-conversion.h"
 #include "unicode/plurrule.h"
+#include "charstr.h"

 using namespace icu;
 using namespace icu::number;
@ -33,19 +34,29 @@ static constexpr int32_t DEFAULT_DIGITS = 34;
 typedef MaybeStackHeaderAndArray<decNumber, char, DEFAULT_DIGITS> DecNumberWithStorage;

 /** Helper function to convert a decNumber-compatible string into a decNumber. */
-void stringToDecNumber(StringPiece n, DecNumberWithStorage &dn) {
+void stringToDecNumber(StringPiece n, DecNumberWithStorage &dn, UErrorCode& status) {
    decContext set;
    uprv_decContextDefault(&set, DEC_INIT_BASE);
    uprv_decContextSetRounding(&set, DEC_ROUND_HALF_EVEN);
-    set.traps = 0; // no traps, thank you
+    set.traps = 0; // no traps, thank you (what does this mean?)
    if (n.length() > DEFAULT_DIGITS) {
        dn.resize(n.length(), 0);
        set.digits = n.length();
    } else {
        set.digits = DEFAULT_DIGITS;
    }
-    uprv_decNumberFromString(dn.getAlias(), n.data(), &set);
-    U_ASSERT(DECDPUN == 1);
+
+    // Make sure that the string is NUL-terminated; CharString guarantees this, but not StringPiece.
+    CharString cs(n, status);
+    if (U_FAILURE(status)) { return; }
+
+    static_assert(DECDPUN == 1, "Assumes that DECDPUN is set to 1");
+    uprv_decNumberFromString(dn.getAlias(), cs.data(), &set);
+
+    // Check for invalid syntax and set the corresponding error code.
+    if ((set.status & DEC_Conversion_syntax) != 0) {
+        status = U_DECIMAL_NUMBER_SYNTAX_ERROR;
+    }
 }

 /** Helper function for safe subtraction (no overflow). */
@ -329,7 +340,9 @@ void DecimalQuantity::_setToLong(int64_t n) {
    if (n == INT64_MIN) {
        static const char *int64minStr = "9.223372036854775808E+18";
        DecNumberWithStorage dn;
-        stringToDecNumber(int64minStr, dn);
+        UErrorCode localStatus = U_ZERO_ERROR;
+        stringToDecNumber(int64minStr, dn, localStatus);
+        if (U_FAILURE(localStatus)) { return; } // unexpected
        readDecNumberToBcd(dn.getAlias());
    } else if (n <= INT32_MAX) {
        readIntToBcd(static_cast<int32_t>(n));
@ -429,12 +442,13 @@ void DecimalQuantity::convertToAccurateDouble() {
    explicitExactDouble = true;
 }

-DecimalQuantity &DecimalQuantity::setToDecNumber(StringPiece n) {
+DecimalQuantity &DecimalQuantity::setToDecNumber(StringPiece n, UErrorCode& status) {
    setBcdToZero();
    flags = 0;

    DecNumberWithStorage dn;
-    stringToDecNumber(n, dn);
+    stringToDecNumber(n, dn, status);
+    if (U_FAILURE(status)) { return *this; }

    // The code path for decNumber is modeled after BigDecimal in Java.
    if (decNumberIsNegative(dn.getAlias())) {
--- a/icu4c/source/i18n/number_decimalquantity.h
+++ b/icu4c/source/i18n/number_decimalquantity.h
@ -145,7 +145,7 @@ class U_I18N_API DecimalQuantity : public IFixedDecimal, public UMemory {

    /** decNumber is similar to BigDecimal in Java. */

-    DecimalQuantity &setToDecNumber(StringPiece n);
+    DecimalQuantity &setToDecNumber(StringPiece n, UErrorCode& status);

    /**
     * Appends a digit, optionally with one or more leading zeros, to the end of the value represented
--- a/icu4c/source/i18n/number_fluent.cpp
+++ b/icu4c/source/i18n/number_fluent.cpp
@ -11,6 +11,7 @@
 #include "number_decimalquantity.h"
 #include "number_formatimpl.h"
 #include "umutex.h"
+#include "number_skeletons.h"

 using namespace icu;
 using namespace icu::number;
@ -287,6 +288,11 @@ Derived NumberFormatterSettings<Derived>::macros(impl::MacroProps&& macros) && {
    return move;
 }

+template<typename Derived>
+UnicodeString NumberFormatterSettings<Derived>::toSkeleton(UErrorCode& status) const {
+    return skeleton::generate(fMacros, status);
+}
+
 // Declare all classes that implement NumberFormatterSettings
 // See https://stackoverflow.com/a/495056/1407170
 template
@ -304,6 +310,11 @@ LocalizedNumberFormatter NumberFormatter::withLocale(const Locale& locale) {
    return with().locale(locale);
 }

+UnlocalizedNumberFormatter
+NumberFormatter::fromSkeleton(const UnicodeString& skeleton, UErrorCode& status) {
+    return skeleton::create(skeleton, status);
+}
+

 template<typename T>
 using NFS = NumberFormatterSettings<T>;
@ -563,7 +574,7 @@ FormattedNumber LocalizedNumberFormatter::formatDecimal(StringPiece value, UErro
        status = U_MEMORY_ALLOCATION_ERROR;
        return FormattedNumber(status);
    }
-    results->quantity.setToDecNumber(value);
+    results->quantity.setToDecNumber(value, status);
    return formatImpl(results, status);
 }

--- a/icu4c/source/i18n/number_integerwidth.cpp
+++ b/icu4c/source/i18n/number_integerwidth.cpp
@ -39,7 +39,7 @@ IntegerWidth IntegerWidth::truncateAt(int32_t maxInt) {
    }
 }

-void IntegerWidth::apply(impl::DecimalQuantity &quantity, UErrorCode &status) const {
+void IntegerWidth::apply(impl::DecimalQuantity& quantity, UErrorCode& status) const {
    if (fHasError) {
        status = U_ILLEGAL_ARGUMENT_ERROR;
    } else if (fUnion.minMaxInt.fMaxInt == -1) {
@ -50,12 +50,13 @@ void IntegerWidth::apply(impl::DecimalQuantity &quantity, UErrorCode &status) co
 }

 bool IntegerWidth::operator==(const IntegerWidth& other) const {
-    if (fHasError) {
-        return other.fHasError && fUnion.errorCode == other.fUnion.errorCode;
-    } else {
-        return !other.fHasError && fUnion.minMaxInt.fMinInt == other.fUnion.minMaxInt.fMinInt &&
-               fUnion.minMaxInt.fMaxInt == other.fUnion.minMaxInt.fMaxInt;
-    }
+    // Private operator==; do error and bogus checking first!
+    U_ASSERT(!fHasError);
+    U_ASSERT(!other.fHasError);
+    U_ASSERT(!isBogus());
+    U_ASSERT(!other.isBogus());
+    return fUnion.minMaxInt.fMinInt == other.fUnion.minMaxInt.fMinInt &&
+           fUnion.minMaxInt.fMaxInt == other.fUnion.minMaxInt.fMaxInt;
 }

 #endif /* #if !UCONFIG_NO_FORMATTING */
--- a/icu4c/source/i18n/number_skeletons.cpp
+++ b/icu4c/source/i18n/number_skeletons.cpp
@ -17,14 +17,14 @@
 #include "number_utils.h"
 #include "number_decimalquantity.h"
 #include "unicode/numberformatter.h"
+#include "uinvchar.h"
+#include "charstr.h"

 using namespace icu;
 using namespace icu::number;
 using namespace icu::number::impl;
 using namespace icu::number::impl::skeleton;

-static constexpr UErrorCode U_NUMBER_SKELETON_SYNTAX_ERROR = U_ILLEGAL_ARGUMENT_ERROR;
-
 namespace {

 icu::UInitOnce gNumberSkeletonsInitOnce = U_INITONCE_INITIALIZER;
@ -107,7 +107,7 @@ inline void appendMultiple(UnicodeString& sb, UChar32 cp, int32_t count) {
 }


-#define CHECK_NULL(seen, field, status) (void)(seen); /* for auto-format line wraping */ \
+#define CHECK_NULL(seen, field, status) (void)(seen); /* for auto-format line wrapping */ \
 { \
    if ((seen).field) { \
        (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \
@ -117,8 +117,24 @@ inline void appendMultiple(UnicodeString& sb, UChar32 cp, int32_t count) {
 }


+#define SKELETON_UCHAR_TO_CHAR(dest, src, start, end, status) (void)(dest); \
+{ \
+    UErrorCode conversionStatus = U_ZERO_ERROR; \
+    (dest).appendInvariantChars({FALSE, (src).getBuffer() + (start), (end) - (start)}, conversionStatus); \
+    if (conversionStatus == U_INVARIANT_CONVERSION_ERROR) { \
+        /* Don't propagate the invariant conversion error; it is a skeleton syntax error */ \
+        (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \
+        return; \
+    } else if (U_FAILURE(conversionStatus)) { \
+        (status) = conversionStatus; \
+        return; \
+    } \
+}
+
+
+// NOTE: The order of these strings must be consistent with UNumberFormatRoundingMode
 const char16_t* const kRoundingModeStrings[] = {
-        u"up", u"down", u"ceiling", u"floor", u"half-up", u"half-down", u"half-even", u"unnecessary"};
+        u"ceiling", u"floor", u"down", u"up", u"half-even", u"half-down", u"half-up", u"unnecessary"};

 constexpr int32_t kRoundingModeCount = 8;
 static_assert(
@ -357,14 +373,14 @@ MacroProps skeleton::parseSkeleton(const UnicodeString& skeletonString, UErrorCo

    SeenMacroProps seen;
    MacroProps macros;
-    StringSegment segment(skeletonString, false);
+    StringSegment segment(tempSkeletonString, false);
    UCharsTrie stemTrie(kSerializedStemTrie);
    ParseState stem = STATE_NULL;
-    int offset = 0;
+    int32_t offset = 0;

    // Primary skeleton parse loop:
    while (offset < segment.length()) {
-        int cp = segment.codePointAt(offset);
+        UChar32 cp = segment.codePointAt(offset);
        bool isTokenSeparator = PatternProps::isWhiteSpace(cp);
        bool isOptionSeparator = (cp == u'/');

@ -772,21 +788,17 @@ blueprint_helpers::parseExponentSignOption(const StringSegment& segment, MacroPr

 void blueprint_helpers::parseCurrencyOption(const StringSegment& segment, MacroProps& macros,
                                            UErrorCode& status) {
-    if (segment.length() != 3) {
-        // throw new SkeletonSyntaxException("Invalid currency", segment);
-        status = U_NUMBER_SKELETON_SYNTAX_ERROR;
-        return;
-    }
    const UChar* currencyCode = segment.toUnicodeString().getTerminatedBuffer();
-    // Check that the currency code is valid:
-    int32_t numericCode = ucurr_getNumericCode(currencyCode);
-    if (numericCode == 0) {
+    UErrorCode localStatus = U_ZERO_ERROR;
+    CurrencyUnit currency(currencyCode, localStatus);
+    if (U_FAILURE(localStatus)) {
+        // Not 3 ascii chars
        // throw new SkeletonSyntaxException("Invalid currency", segment);
        status = U_NUMBER_SKELETON_SYNTAX_ERROR;
        return;
    }
    // Slicing is OK
-    macros.unit = CurrencyUnit(currencyCode, status); // NOLINT
+    macros.unit = currency; // NOLINT
 }

 void
@ -796,48 +808,40 @@ blueprint_helpers::generateCurrencyOption(const CurrencyUnit& currency, UnicodeS

 void blueprint_helpers::parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros,
                                               UErrorCode& status) {
+    UnicodeString stemString = segment.toUnicodeString();
+
    // NOTE: The category (type) of the unit is guaranteed to be a valid subtag (alphanumeric)
    // http://unicode.org/reports/tr35/#Validity_Data
    int firstHyphen = 0;
-    while (firstHyphen < segment.length() && segment.charAt(firstHyphen) != '-') {
+    while (firstHyphen < stemString.length() && stemString.charAt(firstHyphen) != '-') {
        firstHyphen++;
    }
-    if (firstHyphen == segment.length()) {
+    if (firstHyphen == stemString.length()) {
        // throw new SkeletonSyntaxException("Invalid measure unit option", segment);
        status = U_NUMBER_SKELETON_SYNTAX_ERROR;
        return;
    }

-    // MeasureUnit is in char space; we need to convert.
-    // Note: the longest type/subtype as of this writing (March 2018) is 24 chars.
-    static constexpr int32_t CAPACITY = 30;
-    char type[CAPACITY];
-    char subType[CAPACITY];
-    const int32_t typeLen = firstHyphen;
-    const int32_t subTypeLen = segment.length() - firstHyphen - 1;
-    if (typeLen + 1 > CAPACITY || subTypeLen + 1 > CAPACITY) {
-        // Type or subtype longer than 30?
-        // The capacity should be increased if this is a problem with a real CLDR unit.
-        status = U_NUMBER_SKELETON_SYNTAX_ERROR;
-        return;
-    }
-    u_UCharsToChars(segment.toUnicodeString().getBuffer(), type, typeLen);
-    u_UCharsToChars(segment.toUnicodeString().getBuffer() + firstHyphen + 1, subType, subTypeLen);
-    type[typeLen] = 0;
-    subType[subTypeLen] = 0;
+    // Need to do char <-> UChar conversion...
+    if (U_FAILURE(status)) { return; }
+    CharString type;
+    SKELETON_UCHAR_TO_CHAR(type, stemString, 0, firstHyphen, status);
+    CharString subType;
+    SKELETON_UCHAR_TO_CHAR(subType, stemString, firstHyphen + 1, stemString.length(), status);

    // Note: the largest type as of this writing (March 2018) is "volume", which has 24 units.
-    MeasureUnit units[30];
+    static constexpr int32_t CAPACITY = 30;
+    MeasureUnit units[CAPACITY];
    UErrorCode localStatus = U_ZERO_ERROR;
-    int32_t numUnits = MeasureUnit::getAvailable(type, units, 30, localStatus);
+    int32_t numUnits = MeasureUnit::getAvailable(type.data(), units, CAPACITY, localStatus);
    if (U_FAILURE(localStatus)) {
        // More than 30 units in this type?
-        status = U_NUMBER_SKELETON_SYNTAX_ERROR;
+        status = U_INTERNAL_PROGRAM_ERROR;
        return;
    }
    for (int32_t i = 0; i < numUnits; i++) {
        auto& unit = units[i];
-        if (uprv_strcmp(subType, unit.getSubtype()) == 0) {
+        if (uprv_strcmp(subType.data(), unit.getSubtype()) == 0) {
            macros.unit = unit;
            return;
        }
@ -848,26 +852,11 @@ void blueprint_helpers::parseMeasureUnitOption(const StringSegment& segment, Mac
 }

 void blueprint_helpers::generateMeasureUnitOption(const MeasureUnit& measureUnit, UnicodeString& sb,
-                                                  UErrorCode& status) {
-    // We need to convert from char* to UChar*...
-    // See comments in the previous function about the capacity setting.
-    static constexpr int32_t CAPACITY = 30;
-    char16_t type16[CAPACITY];
-    char16_t subType16[CAPACITY];
-    const auto typeLen = static_cast<int32_t>(uprv_strlen(measureUnit.getType()));
-    const auto subTypeLen = static_cast<int32_t>(uprv_strlen(measureUnit.getSubtype()));
-    if (typeLen + 1 > CAPACITY || subTypeLen + 1 > CAPACITY) {
-        // Type or subtype longer than 30?
-        // The capacity should be increased if this is a problem with a real CLDR unit.
-        status = U_UNSUPPORTED_ERROR;
-        return;
-    }
-    u_charsToUChars(measureUnit.getType(), type16, typeLen);
-    u_charsToUChars(measureUnit.getSubtype(), subType16, subTypeLen);
-
-    sb.append(type16, typeLen);
+                                                  UErrorCode&) {
+    // Need to do char <-> UChar conversion...
+    sb.append(UnicodeString(measureUnit.getType(), -1, US_INV));
    sb.append(u'-');
-    sb.append(subType16, subTypeLen);
+    sb.append(UnicodeString(measureUnit.getSubtype(), -1, US_INV));
 }

 void blueprint_helpers::parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros,
@ -1052,17 +1041,19 @@ bool blueprint_helpers::parseFracSigOption(const StringSegment& segment, MacroPr

 void blueprint_helpers::parseIncrementOption(const StringSegment& segment, MacroProps& macros,
                                             UErrorCode& status) {
+    // Need to do char <-> UChar conversion...
+    CharString buffer;
+    SKELETON_UCHAR_TO_CHAR(buffer, segment.toUnicodeString(), 0, segment.length(), status);
+
    // Utilize DecimalQuantity/decNumber to parse this for us.
-    static constexpr int32_t CAPACITY = 30;
-    char buffer[CAPACITY];
-    if (segment.length() > CAPACITY) {
-        // No support for numbers this long; they won't fit in a double anyway.
+    DecimalQuantity dq;
+    UErrorCode localStatus = U_ZERO_ERROR;
+    dq.setToDecNumber({buffer.data(), buffer.length()}, localStatus);
+    if (U_FAILURE(localStatus)) {
+        // throw new SkeletonSyntaxException("Invalid rounding increment", segment, e);
        status = U_NUMBER_SKELETON_SYNTAX_ERROR;
        return;
    }
-    u_UCharsToChars(segment.toUnicodeString().getBuffer(), buffer, segment.length());
-    DecimalQuantity dq;
-    dq.setToDecNumber({buffer, segment.length()});
    double increment = dq.toDouble();
    macros.rounder = Rounder::increment(increment);
 }
@ -1146,17 +1137,10 @@ void blueprint_helpers::generateIntegerWidthOption(int32_t minInt, int32_t maxIn
 void blueprint_helpers::parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros,
                                                   UErrorCode& status) {
    // Need to do char <-> UChar conversion...
-    static constexpr int32_t CAPACITY = 30;
-    char buffer[CAPACITY];
-    if (segment.length() + 1 > CAPACITY) {
-        // No support for numbers this long; they won't fit in a double anyway.
-        status = U_NUMBER_SKELETON_SYNTAX_ERROR;
-        return;
-    }
-    u_UCharsToChars(segment.toUnicodeString().getBuffer(), buffer, segment.length());
-    buffer[segment.length()] = 0;
+    CharString buffer;
+    SKELETON_UCHAR_TO_CHAR(buffer, segment.toUnicodeString(), 0, segment.length(), status);

-    NumberingSystem* ns = NumberingSystem::createInstanceByName(buffer, status);
+    NumberingSystem* ns = NumberingSystem::createInstanceByName(buffer.data(), status);
    if (ns == nullptr) {
        // throw new SkeletonSyntaxException("Unknown numbering system", segment);
        status = U_NUMBER_SKELETON_SYNTAX_ERROR;
@ -1166,18 +1150,9 @@ void blueprint_helpers::parseNumberingSystemOption(const StringSegment& segment,
 }

 void blueprint_helpers::generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb,
-                                                      UErrorCode& status) {
+                                                      UErrorCode&) {
    // Need to do char <-> UChar conversion...
-    static constexpr int32_t CAPACITY = 30;
-    char16_t buffer16[CAPACITY];
-    const auto len = static_cast<int32_t>(uprv_strlen(ns.getName()));
-    if (len > CAPACITY) {
-        // No support for numbers this long; they won't fit in a double anyway.
-        status = U_UNSUPPORTED_ERROR;
-        return;
-    }
-    u_charsToUChars(ns.getName(), buffer16, len);
-    sb.append(buffer16, len);
+    sb.append(UnicodeString(ns.getName(), -1, US_INV));
 }


@ -1243,7 +1218,15 @@ bool GeneratorHelpers::unit(const MacroProps& macros, UnicodeString& sb, UErrorC

 bool GeneratorHelpers::perUnit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
    // Per-units are currently expected to be only MeasureUnits.
-    if (unitIsCurrency(macros.perUnit) || unitIsNoUnit(macros.perUnit)) {
+    if (unitIsNoUnit(macros.perUnit)) {
+        if (unitIsPercent(macros.perUnit) || unitIsPermille(macros.perUnit)) {
+            status = U_UNSUPPORTED_ERROR;
+            return false;
+        } else {
+            // Default value: ok to ignore
+            return false;
+        }
+    } else if (unitIsCurrency(macros.perUnit)) {
        status = U_UNSUPPORTED_ERROR;
        return false;
    } else {
@ -1298,7 +1281,9 @@ bool GeneratorHelpers::rounding(const MacroProps& macros, UnicodeString& sb, UEr
 }

 bool GeneratorHelpers::grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
-    if (macros.grouper.isBogus() || macros.grouper.fStrategy == UNUM_GROUPING_COUNT) {
+    if (macros.grouper.isBogus()) {
+        return false; // No value
+    } else if (macros.grouper.fStrategy == UNUM_GROUPING_COUNT) {
        status = U_UNSUPPORTED_ERROR;
        return false;
    } else if (macros.grouper.fStrategy == UNUM_GROUPING_AUTO) {
@ -1310,7 +1295,8 @@ bool GeneratorHelpers::grouping(const MacroProps& macros, UnicodeString& sb, UEr
 }

 bool GeneratorHelpers::integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status) {
-    if (macros.integerWidth.fHasError || macros.integerWidth == IntegerWidth::standard()) {
+    if (macros.integerWidth.fHasError || macros.integerWidth.isBogus() ||
+        macros.integerWidth == IntegerWidth::standard()) {
        // Error or Default
        return false;
    }
--- a/icu4c/source/i18n/number_skeletons.h
+++ b/icu4c/source/i18n/number_skeletons.h
@ -16,6 +16,8 @@ using icu::numparse::impl::StringSegment;
 U_NAMESPACE_BEGIN namespace number {
 namespace impl {

+static constexpr UErrorCode U_NUMBER_SKELETON_SYNTAX_ERROR = U_ILLEGAL_ARGUMENT_ERROR;
+
 // Forward-declaration
 struct SeenMacroProps;

--- a/icu4c/source/i18n/numparse_stringsegment.cpp
+++ b/icu4c/source/i18n/numparse_stringsegment.cpp
@ -133,5 +133,9 @@ bool StringSegment::codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase) {
    return cp1 == cp2;
 }

+bool StringSegment::operator==(const UnicodeString& other) const {
+    return toUnicodeString() == other;
+}
+

 #endif /* #if !UCONFIG_NO_FORMATTING */
--- a/icu4c/source/i18n/plurrule.cpp
+++ b/icu4c/source/i18n/plurrule.cpp
@ -1458,7 +1458,7 @@ FixedDecimal::FixedDecimal(const UnicodeString &num, UErrorCode &status) {
    CharString cs;
    cs.appendInvariantChars(num, status);
    DecimalQuantity dl;
-    dl.setToDecNumber(cs.toStringPiece());
+    dl.setToDecNumber(cs.toStringPiece(), status);
    if (U_FAILURE(status)) {
        init(0, 0, 0);
        return;
--- a/icu4c/source/i18n/unicode/numberformatter.h
+++ b/icu4c/source/i18n/unicode/numberformatter.h
@ -2179,6 +2179,25 @@ class U_I18N_API NumberFormatterSettings {

 #endif  /* U_HIDE_INTERNAL_API */

+    /**
+     * Creates a skeleton string representation of this number formatter. A skeleton string is a
+     * locale-agnostic serialized form of a number formatter.
+     * <p>
+     * Not all options are capable of being represented in the skeleton string; for example, a
+     * DecimalFormatSymbols object. If any such option is encountered, an
+     * {@link UnsupportedOperationException} is thrown.
+     * <p>
+     * The returned skeleton is in normalized form, such that two number formatters with equivalent
+     * behavior should produce the same skeleton.
+     * <p>
+     * Sets an error code if the number formatter has an option that cannot be represented in a skeleton
+     * string.
+     *
+     * @return A number skeleton string with behavior corresponding to this number formatter.
+     * @draft ICU 62
+     */
+    UnicodeString toSkeleton(UErrorCode& status) const;
+
    /**
     * Sets the UErrorCode if an error occurred in the fluent chain.
     * Preserves older error codes in the outErrorCode.
@ -2192,7 +2211,7 @@ class U_I18N_API NumberFormatterSettings {
        }
        fMacros.copyErrorTo(outErrorCode);
        return U_FAILURE(outErrorCode);
-    }
+    };

    // NOTE: Uses default copy and move constructors.

@ -2588,6 +2607,18 @@ class U_I18N_API NumberFormatter final {
     */
    static LocalizedNumberFormatter withLocale(const Locale &locale);

+    /**
+     * Call this method at the beginning of a NumberFormatter fluent chain to create an instance based
+     * on a given number skeleton string.
+     *
+     * @param skeleton
+     *            The skeleton string off of which to base this NumberFormatter.
+     * @return An UnlocalizedNumberFormatter, to be used for chaining.
+     * @throws SkeletonSyntaxException If the given string is not a valid number formatting skeleton.
+     * @draft ICU 62
+     */
+    static UnlocalizedNumberFormatter fromSkeleton(const UnicodeString& skeleton, UErrorCode& status);
+
    /**
     * Use factory methods instead of the constructor to create a NumberFormatter.
     * @draft ICU 60
--- a/icu4c/source/test/intltest/Makefile.in
+++ b/icu4c/source/test/intltest/Makefile.in
@ -65,7 +65,7 @@ numberformattesttuple.o pluralmaptest.o \
 numbertest_affixutils.o numbertest_api.o numbertest_decimalquantity.o \
 numbertest_modifiers.o numbertest_patternmodifier.o numbertest_patternstring.o \
 numbertest_stringbuilder.o numbertest_stringsegment.o numbertest_unisets.o \
-numbertest_parse.o numbertest_doubleconversion.o
+numbertest_parse.o numbertest_doubleconversion.o numbertest_skeletons.o

 DEPS = $(OBJECTS:.o=.d)

--- a/icu4c/source/test/intltest/numbertest.h
+++ b/icu4c/source/test/intltest/numbertest.h
@ -229,6 +229,23 @@ class NumberParserTest : public IntlTest {
    void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
 };

+class NumberSkeletonTest : public IntlTest {
+  public:
+    void validTokens();
+    void invalidTokens();
+    void unknownTokens();
+    void unexpectedTokens();
+    void duplicateValues();
+    void stemsRequiringOption();
+    void defaultTokens();
+    void flexibleSeparators();
+
+    void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
+
+  private:
+    void expectedErrorSkeleton(const char16_t** cases, int32_t casesLen);
+};
+

 // NOTE: This macro is identical to the one in itformat.cpp
 #define TESTCLASS(id, TestClass)          \
@ -261,6 +278,7 @@ class NumberTest : public IntlTest {
        TESTCLASS(8, StringSegmentTest);
        TESTCLASS(9, UniSetsTest);
        TESTCLASS(10, NumberParserTest);
+        TESTCLASS(11, NumberSkeletonTest);
        default: name = ""; break; // needed to end loop
        }
    }
--- a/icu4c/source/test/intltest/numbertest_skeletons.cpp
+++ b/icu4c/source/test/intltest/numbertest_skeletons.cpp
@ -0,0 +1,245 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
+
+#include "putilimp.h"
+#include "unicode/dcfmtsym.h"
+#include "numbertest.h"
+#include "number_utils.h"
+#include "number_skeletons.h"
+
+using namespace icu::number::impl;
+
+
+void NumberSkeletonTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) {
+    if (exec) {
+        logln("TestSuite AffixUtilsTest: ");
+    }
+    TESTCASE_AUTO_BEGIN;
+        TESTCASE_AUTO(validTokens);
+        TESTCASE_AUTO(invalidTokens);
+        TESTCASE_AUTO(unknownTokens);
+        TESTCASE_AUTO(unexpectedTokens);
+        TESTCASE_AUTO(duplicateValues);
+        TESTCASE_AUTO(stemsRequiringOption);
+        TESTCASE_AUTO(defaultTokens);
+        TESTCASE_AUTO(flexibleSeparators);
+    TESTCASE_AUTO_END;
+}
+
+void NumberSkeletonTest::validTokens() {
+    // This tests only if the tokens are valid, not their behavior.
+    // Most of these are from the design doc.
+    static const char16_t* cases[] = {
+            u"round-integer",
+            u"round-unlimited",
+            u"@@@##",
+            u"@@+",
+            u".000##",
+            u".00+",
+            u".",
+            u".+",
+            u".######",
+            u".00/@@+",
+            u".00/@##",
+            u"round-increment/3.14",
+            u"round-currency-standard",
+            u"round-integer/half-up",
+            u".00#/ceiling",
+            u".00/@@+/floor",
+            u"scientific",
+            u"scientific/+ee",
+            u"scientific/sign-always",
+            u"scientific/+ee/sign-always",
+            u"scientific/sign-always/+ee",
+            u"scientific/sign-except-zero",
+            u"engineering",
+            u"engineering/+eee",
+            u"compact-short",
+            u"compact-long",
+            u"notation-simple",
+            u"percent",
+            u"permille",
+            u"measure-unit/length-meter",
+            u"measure-unit/area-square-meter",
+            u"measure-unit/energy-joule per-measure-unit/length-meter",
+            u"currency/XXX",
+            u"currency/ZZZ",
+            u"group-off",
+            u"group-min2",
+            u"group-auto",
+            u"group-on-aligned",
+            u"group-thousands",
+            u"integer-width/00",
+            u"integer-width/#0",
+            u"integer-width/+00",
+            u"sign-always",
+            u"sign-auto",
+            u"sign-never",
+            u"sign-accounting",
+            u"sign-accounting-always",
+            u"sign-except-zero",
+            u"sign-accounting-except-zero",
+            u"unit-width-narrow",
+            u"unit-width-short",
+            u"unit-width-iso-code",
+            u"unit-width-full-name",
+            u"unit-width-hidden",
+            u"decimal-auto",
+            u"decimal-always",
+            u"latin",
+            u"numbering-system/arab",
+            u"numbering-system/latn",
+            u"round-integer/@##",
+            u"round-integer/ceiling",
+            u"round-currency-cash/ceiling"};
+
+    for (auto& cas : cases) {
+        UnicodeString skeletonString(cas);
+        UErrorCode status = U_ZERO_ERROR;
+        NumberFormatter::fromSkeleton(skeletonString, status);
+        assertSuccess(skeletonString, status);
+    }
+}
+
+void NumberSkeletonTest::invalidTokens() {
+    static const char16_t* cases[] = {
+            u".00x",
+            u".00##0",
+            u".##+",
+            u".00##+",
+            u".0#+",
+            u"@@x",
+            u"@@##0",
+            u"@#+",
+            u".00/@",
+            u".00/@@",
+            u".00/@@x",
+            u".00/@@#",
+            u".00/@@#+",
+            u".00/floor/@@+", // wrong order
+            u"round-currency-cash/XXX",
+            u"scientific/ee",
+            u"round-increment/xxx",
+            u"round-increment/0.1.2",
+            u"currency/dummy",
+            u"measure-unit/foo",
+            u"integer-width/xxx",
+            u"integer-width/0+",
+            u"integer-width/+0#",
+            u"scientific/foo"};
+
+    expectedErrorSkeleton(cases, sizeof(cases) / sizeof(*cases));
+}
+
+void NumberSkeletonTest::unknownTokens() {
+    static const char16_t* cases[] = {
+            u"maesure-unit",
+            u"measure-unit/foo-bar",
+            u"numbering-system/dummy",
+            u"français",
+            u"measure-unit/français-français", // non-invariant characters for C++
+            u"numbering-system/français", // non-invariant characters for C++
+            u"round-increment/français", // non-invariant characters for C++
+            u"currency-USD"};
+
+    expectedErrorSkeleton(cases, sizeof(cases) / sizeof(*cases));
+}
+
+void NumberSkeletonTest::unexpectedTokens() {
+    static const char16_t* cases[] = {
+            u"group-thousands/foo",
+            u"round-integer//ceiling group-off",
+            u"round-integer//ceiling  group-off",
+            u"round-integer/ group-off",
+            u"round-integer// group-off"};
+
+    expectedErrorSkeleton(cases, sizeof(cases) / sizeof(*cases));
+}
+
+void NumberSkeletonTest::duplicateValues() {
+    static const char16_t* cases[] = {
+            u"round-integer round-integer",
+            u"round-integer .00+",
+            u"round-integer round-unlimited",
+            u"round-integer @@@",
+            u"scientific engineering",
+            u"engineering compact-long",
+            u"sign-auto sign-always"};
+
+    expectedErrorSkeleton(cases, sizeof(cases) / sizeof(*cases));
+}
+
+void NumberSkeletonTest::stemsRequiringOption() {
+    static const char16_t* stems[] = {u"round-increment", u"currency", u"measure-unit", u"integer-width",};
+    static const char16_t* suffixes[] = {u"", u"/ceiling", u" scientific", u"/ceiling scientific"};
+
+    for (auto& stem : stems) {
+        for (auto& suffix : suffixes) {
+            UnicodeString skeletonString = UnicodeString(stem) + suffix;
+            UErrorCode status = U_ZERO_ERROR;
+            NumberFormatter::fromSkeleton(skeletonString, status);
+            assertEquals(skeletonString, U_NUMBER_SKELETON_SYNTAX_ERROR, status);
+        }
+    }
+}
+
+void NumberSkeletonTest::defaultTokens() {
+    IcuTestErrorCode status(*this, "defaultTokens");
+
+    static const char16_t* cases[] = {
+            u"notation-simple",
+            u"base-unit",
+            u"group-auto",
+            u"integer-width/+0",
+            u"sign-auto",
+            u"unit-width-short",
+            u"decimal-auto"};
+
+    for (auto& cas : cases) {
+        UnicodeString skeletonString(cas);
+        status.setScope(skeletonString);
+        UnicodeString normalized = NumberFormatter::fromSkeleton(
+                skeletonString, status).toSkeleton(status);
+        // Skeleton should become empty when normalized
+        assertEquals(skeletonString, u"", normalized);
+    }
+}
+
+void NumberSkeletonTest::flexibleSeparators() {
+    IcuTestErrorCode status(*this, "flexibleSeparators");
+
+    static struct TestCase {
+        const char16_t* skeleton;
+        const char16_t* expected;
+    } cases[] = {{u"round-integer group-off", u"5142"},
+                 {u"round-integer  group-off", u"5142"},
+                 {u"round-integer/ceiling group-off", u"5143"},
+                 {u"round-integer/ceiling  group-off", u"5143"}};
+
+    for (auto& cas : cases) {
+        UnicodeString skeletonString(cas.skeleton);
+        UnicodeString expected(cas.expected);
+        status.setScope(skeletonString);
+        UnicodeString actual = NumberFormatter::fromSkeleton(skeletonString, status).locale("en")
+                .formatDouble(5142.3, status)
+                .toString();
+        assertEquals(skeletonString, expected, actual);
+    }
+}
+
+// In C++, there is no distinguishing between "invalid", "unknown", and "unexpected" tokens.
+void NumberSkeletonTest::expectedErrorSkeleton(const char16_t** cases, int32_t casesLen) {
+    for (int32_t i = 0; i < casesLen; i++) {
+        UnicodeString skeletonString(cases[i]);
+        UErrorCode status = U_ZERO_ERROR;
+        NumberFormatter::fromSkeleton(skeletonString, status);
+        assertEquals(skeletonString, U_NUMBER_SKELETON_SYNTAX_ERROR, status);
+    }
+}
+
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
--- a/icu4c/source/test/intltest/numfmtst.cpp
+++ b/icu4c/source/test/intltest/numfmtst.cpp
@ -105,7 +105,7 @@ static DecimalQuantity &strToDigitList(
    }
    CharString formatValue;
    formatValue.appendInvariantChars(str, status);
-    digitList.setToDecNumber(StringPiece(formatValue.data()));
+    digitList.setToDecNumber(StringPiece(formatValue.data()), status);
    return digitList;
 }

@ -7027,7 +7027,7 @@ void NumberFormatTest::TestDecimal() {
            UnicodeString formattedResult;
            DecimalQuantity dl;
            StringPiece num("123.4566666666666666666666666666666666621E+40");
-            dl.setToDecNumber(num);
+            dl.setToDecNumber(num, status);
            ASSERT_SUCCESS(status);
            fmtr->format(dl, formattedResult, NULL, status);
            ASSERT_SUCCESS(status);
@ -7035,7 +7035,7 @@ void NumberFormatTest::TestDecimal() {

            status = U_ZERO_ERROR;
            num.set("666.666");
-            dl.setToDecNumber(num);
+            dl.setToDecNumber(num, status);
            FieldPosition pos(NumberFormat::FRACTION_FIELD);
            ASSERT_SUCCESS(status);
            formattedResult.remove();
--- a/icu4c/source/test/intltest/plurults.cpp
+++ b/icu4c/source/test/intltest/plurults.cpp
@ -636,7 +636,7 @@ void PluralRulesTest::checkSelect(const LocalPointer<PluralRules> &rules, UError

        // DigitList is a convenient way to parse the decimal number string and get a double.
        DecimalQuantity  dl;
-        dl.setToDecNumber(StringPiece(num));
+        dl.setToDecNumber(StringPiece(num), status);
        if (U_FAILURE(status)) {
            errln("file %s, line %d, ICU error status: %s.", __FILE__, line, u_errorName(status));
            status = U_ZERO_ERROR;
--- a/icu4j/main/classes/core/src/com/ibm/icu/number/NumberSkeletonImpl.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/number/NumberSkeletonImpl.java
@ -850,11 +850,14 @@ class NumberSkeletonImpl {

        private static void parseCurrencyOption(StringSegment segment, MacroProps macros) {
            String currencyCode = segment.subSequence(0, segment.length()).toString();
+            Currency currency;
            try {
-                macros.unit = Currency.getInstance(currencyCode);
+                currency = Currency.getInstance(currencyCode);
            } catch (IllegalArgumentException e) {
+                // Not 3 ascii chars
                throw new SkeletonSyntaxException("Invalid currency", segment, e);
            }
+            macros.unit = currency;
        }

        private static void generateCurrencyOption(Currency currency, StringBuilder sb) {
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberSkeletonTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberSkeletonTest.java
@ -21,16 +21,6 @@ import com.ibm.icu.util.ULocale;
 */
 public class NumberSkeletonTest {

-    @Test
-    public void duplicateValues() {
-        try {
-            NumberFormatter.fromSkeleton("round-integer round-integer");
-            fail();
-        } catch (SkeletonSyntaxException expected) {
-            assertTrue(expected.getMessage(), expected.getMessage().contains("Duplicated setting"));
-        }
-    }
-
    @Test
    public void validTokens() {
        // This tests only if the tokens are valid, not their behavior.
@ -69,6 +59,7 @@ public class NumberSkeletonTest {
                "measure-unit/area-square-meter",
                "measure-unit/energy-joule per-measure-unit/length-meter",
                "currency/XXX",
+                "currency/ZZZ",
                "group-off",
                "group-min2",
                "group-auto",
@ -138,7 +129,7 @@ public class NumberSkeletonTest {
        for (String cas : cases) {
            try {
                NumberFormatter.fromSkeleton(cas);
-                fail("Skeleton parses, but it should have failed: " + cas);
+                fail(cas);
            } catch (SkeletonSyntaxException expected) {
                assertTrue(expected.getMessage(), expected.getMessage().contains("Invalid"));
            }
@ -147,12 +138,20 @@ public class NumberSkeletonTest {

    @Test
    public void unknownTokens() {
-        String[] cases = { "maesure-unit", "measure-unit/foo-bar", "numbering-system/dummy" };
+        String[] cases = {
+                "maesure-unit",
+                "measure-unit/foo-bar",
+                "numbering-system/dummy",
+                "français",
+                "measure-unit/français-français", // non-invariant characters for C++
+                "numbering-system/français", // non-invariant characters for C++
+                "round-increment/français", // non-invariant characters for C++
+                "currency-USD" };

        for (String cas : cases) {
            try {
                NumberFormatter.fromSkeleton(cas);
-                fail();
+                fail(cas);
            } catch (SkeletonSyntaxException expected) {
                assertTrue(expected.getMessage(), expected.getMessage().contains("Unknown"));
            }
@ -171,13 +170,34 @@ public class NumberSkeletonTest {
        for (String cas : cases) {
            try {
                NumberFormatter.fromSkeleton(cas);
-                fail();
+                fail(cas);
            } catch (SkeletonSyntaxException expected) {
                assertTrue(expected.getMessage(), expected.getMessage().contains("Unexpected"));
            }
        }
    }

+    @Test
+    public void duplicateValues() {
+        String[] cases = {
+                "round-integer round-integer",
+                "round-integer .00+",
+                "round-integer round-unlimited",
+                "round-integer @@@",
+                "scientific engineering",
+                "engineering compact-long",
+                "sign-auto sign-always" };
+
+        for (String cas : cases) {
+            try {
+                NumberFormatter.fromSkeleton(cas);
+                fail(cas);
+            } catch (SkeletonSyntaxException expected) {
+                assertTrue(expected.getMessage(), expected.getMessage().contains("Duplicated"));
+            }
+        }
+    }
+
    @Test
    public void stemsRequiringOption() {
        String[] stems = { "round-increment", "currency", "measure-unit", "integer-width", };