diff --git a/icu4c/source/i18n/Makefile.in b/icu4c/source/i18n/Makefile.in index a5752781f2f..94dac4e235b 100644 --- a/icu4c/source/i18n/Makefile.in +++ b/icu4c/source/i18n/Makefile.in @@ -108,7 +108,8 @@ number_decimfmtprops.o number_fluent.o number_formatimpl.o number_grouping.o \ number_integerwidth.o number_longnames.o number_modifiers.o number_notation.o \ number_padding.o number_patternmodifier.o number_patternstring.o \ number_rounding.o number_scientific.o number_stringbuilder.o \ -numparse_stringsegment.o numparse_unisets.o numparse_parsednumber.o +numparse_stringsegment.o numparse_unisets.o numparse_parsednumber.o \ +numparse_impl.o numparse_symbols.o numparse_decimal.o ## Header files to install diff --git a/icu4c/source/i18n/number_decimalquantity.cpp b/icu4c/source/i18n/number_decimalquantity.cpp index 3342771b987..31150dfa12d 100644 --- a/icu4c/source/i18n/number_decimalquantity.cpp +++ b/icu4c/source/i18n/number_decimalquantity.cpp @@ -190,6 +190,7 @@ int32_t DecimalQuantity::getMagnitude() const { void DecimalQuantity::adjustMagnitude(int32_t delta) { if (precision != 0) { + // TODO: How to handle overflow cases? scale += delta; origDelta += delta; } diff --git a/icu4c/source/i18n/number_formatimpl.cpp b/icu4c/source/i18n/number_formatimpl.cpp index bc96cb15dab..795c3d13482 100644 --- a/icu4c/source/i18n/number_formatimpl.cpp +++ b/icu4c/source/i18n/number_formatimpl.cpp @@ -38,9 +38,9 @@ enum CldrPatternStyle { // TODO: Consider scientific format. }; -const char16_t * -doGetPattern(UResourceBundle *res, const char *nsName, const char *patternKey, UErrorCode &publicStatus, - UErrorCode &localStatus) { +const char16_t* +doGetPattern(UResourceBundle* res, const char* nsName, const char* patternKey, UErrorCode& publicStatus, + UErrorCode& localStatus) { // Construct the path into the resource bundle CharString key; key.append("NumberElements/", publicStatus); @@ -53,9 +53,9 @@ doGetPattern(UResourceBundle *res, const char *nsName, const char *patternKey, U return ures_getStringByKeyWithFallback(res, key.data(), nullptr, &localStatus); } -const char16_t *getPatternForStyle(const Locale &locale, const char *nsName, CldrPatternStyle style, - UErrorCode &status) { - const char *patternKey; +const char16_t* getPatternForStyle(const Locale& locale, const char* nsName, CldrPatternStyle style, + UErrorCode& status) { + const char* patternKey; switch (style) { case CLDR_PATTERN_STYLE_DECIMAL: patternKey = "decimalFormat"; @@ -76,7 +76,7 @@ const char16_t *getPatternForStyle(const Locale &locale, const char *nsName, Cld // Attempt to get the pattern with the native numbering system. UErrorCode localStatus = U_ZERO_ERROR; - const char16_t *pattern; + const char16_t* pattern; pattern = doGetPattern(res.getAlias(), nsName, patternKey, status, localStatus); if (U_FAILURE(status)) { return u""; } @@ -96,18 +96,21 @@ struct CurrencyFormatInfoResult { const char16_t* decimalSeparator; const char16_t* groupingSeparator; }; -CurrencyFormatInfoResult getCurrencyFormatInfo(const Locale& locale, const char* isoCode, UErrorCode& status) { + +CurrencyFormatInfoResult +getCurrencyFormatInfo(const Locale& locale, const char* isoCode, UErrorCode& status) { // TODO: Load this data in a centralized location like ICU4J? // TODO: Parts of this same data are loaded in dcfmtsym.cpp; should clean up. - CurrencyFormatInfoResult result = { false, nullptr, nullptr, nullptr }; - if (U_FAILURE(status)) return result; + CurrencyFormatInfoResult result = {false, nullptr, nullptr, nullptr}; + if (U_FAILURE(status)) { return result; } CharString key; key.append("Currencies/", status); key.append(isoCode, status); UErrorCode localStatus = status; LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_CURR, locale.getName(), &localStatus)); ures_getByKeyWithFallback(bundle.getAlias(), key.data(), bundle.getAlias(), &localStatus); - if (U_SUCCESS(localStatus) && ures_getSize(bundle.getAlias())>2) { // the length is 3 if more data is present + if (U_SUCCESS(localStatus) && + ures_getSize(bundle.getAlias()) > 2) { // the length is 3 if more data is present ures_getByIndex(bundle.getAlias(), 2, bundle.getAlias(), &localStatus); int32_t dummy; result.exists = true; @@ -121,30 +124,30 @@ CurrencyFormatInfoResult getCurrencyFormatInfo(const Locale& locale, const char* return result; } -inline bool unitIsCurrency(const MeasureUnit &unit) { +inline bool unitIsCurrency(const MeasureUnit& unit) { return uprv_strcmp("currency", unit.getType()) == 0; } -inline bool unitIsNoUnit(const MeasureUnit &unit) { +inline bool unitIsNoUnit(const MeasureUnit& unit) { return uprv_strcmp("none", unit.getType()) == 0; } -inline bool unitIsPercent(const MeasureUnit &unit) { +inline bool unitIsPercent(const MeasureUnit& unit) { return uprv_strcmp("percent", unit.getSubtype()) == 0; } -inline bool unitIsPermille(const MeasureUnit &unit) { +inline bool unitIsPermille(const MeasureUnit& unit) { return uprv_strcmp("permille", unit.getSubtype()) == 0; } } // namespace -NumberFormatterImpl *NumberFormatterImpl::fromMacros(const MacroProps ¯os, UErrorCode &status) { +NumberFormatterImpl* NumberFormatterImpl::fromMacros(const MacroProps& macros, UErrorCode& status) { return new NumberFormatterImpl(macros, true, status); } -void NumberFormatterImpl::applyStatic(const MacroProps ¯os, DecimalQuantity &inValue, - NumberStringBuilder &outString, UErrorCode &status) { +void NumberFormatterImpl::applyStatic(const MacroProps& macros, DecimalQuantity& inValue, + NumberStringBuilder& outString, UErrorCode& status) { NumberFormatterImpl impl(macros, false, status); impl.applyUnsafe(inValue, outString, status); } @@ -154,8 +157,8 @@ void NumberFormatterImpl::applyStatic(const MacroProps ¯os, DecimalQuantity // The "unsafe" method simply re-uses fMicros, eliminating the extra copy operation. // See MicroProps::processQuantity() for details. -void NumberFormatterImpl::apply(DecimalQuantity &inValue, NumberStringBuilder &outString, - UErrorCode &status) const { +void NumberFormatterImpl::apply(DecimalQuantity& inValue, NumberStringBuilder& outString, + UErrorCode& status) const { if (U_FAILURE(status)) { return; } MicroProps micros; fMicroPropsGenerator->processQuantity(inValue, micros, status); @@ -163,23 +166,23 @@ void NumberFormatterImpl::apply(DecimalQuantity &inValue, NumberStringBuilder &o microsToString(micros, inValue, outString, status); } -void NumberFormatterImpl::applyUnsafe(DecimalQuantity &inValue, NumberStringBuilder &outString, - UErrorCode &status) { +void NumberFormatterImpl::applyUnsafe(DecimalQuantity& inValue, NumberStringBuilder& outString, + UErrorCode& status) { if (U_FAILURE(status)) { return; } fMicroPropsGenerator->processQuantity(inValue, fMicros, status); if (U_FAILURE(status)) { return; } microsToString(fMicros, inValue, outString, status); } -NumberFormatterImpl::NumberFormatterImpl(const MacroProps ¯os, bool safe, UErrorCode &status) { +NumberFormatterImpl::NumberFormatterImpl(const MacroProps& macros, bool safe, UErrorCode& status) { fMicroPropsGenerator = macrosToMicroGenerator(macros, safe, status); } ////////// -const MicroPropsGenerator * -NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe, UErrorCode &status) { - const MicroPropsGenerator *chain = &fMicros; +const MicroPropsGenerator* +NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, UErrorCode& status) { + const MicroPropsGenerator* chain = &fMicros; // Check that macros is error-free before continuing. if (macros.copyErrorTo(status)) { @@ -194,9 +197,9 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe, bool isPercent = isNoUnit && unitIsPercent(macros.unit); bool isPermille = isNoUnit && unitIsPermille(macros.unit); bool isCldrUnit = !isCurrency && !isNoUnit; - bool isAccounting = macros.sign == UNUM_SIGN_ACCOUNTING - || macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS - || macros.sign == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO; + bool isAccounting = + macros.sign == UNUM_SIGN_ACCOUNTING || macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS || + macros.sign == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO; CurrencyUnit currency(kDefaultCurrency, status); if (isCurrency) { currency = CurrencyUnit(macros.unit, status); // Restore CurrencyUnit from MeasureUnit @@ -208,7 +211,7 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe, // Select the numbering system. LocalPointer nsLocal; - const NumberingSystem *ns; + const NumberingSystem* ns; if (macros.symbols.isNumberingSystem()) { ns = macros.symbols.getNumberingSystem(); } else { @@ -217,7 +220,7 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe, // Give ownership to the function scope. nsLocal.adoptInstead(ns); } - const char *nsName = U_SUCCESS(status) ? ns->getName() : "latn"; + const char* nsName = U_SUCCESS(status) ? ns->getName() : "latn"; // Resolve the symbols. Do this here because currency may need to customize them. if (macros.symbols.isDecimalFormatSymbols()) { @@ -232,7 +235,8 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe, // If we are formatting currency, check for a currency-specific pattern. const char16_t* pattern = nullptr; if (isCurrency) { - CurrencyFormatInfoResult info = getCurrencyFormatInfo(macros.locale, currency.getSubtype(), status); + CurrencyFormatInfoResult info = getCurrencyFormatInfo( + macros.locale, currency.getSubtype(), status); if (info.exists) { pattern = info.pattern; // It's clunky to clone an object here, but this code is not frequently executed. @@ -240,13 +244,13 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe, fMicros.symbols = symbols; fSymbols.adoptInstead(symbols); symbols->setSymbol( - DecimalFormatSymbols::ENumberFormatSymbol::kMonetarySeparatorSymbol, - UnicodeString(info.decimalSeparator), - FALSE); + DecimalFormatSymbols::ENumberFormatSymbol::kMonetarySeparatorSymbol, + UnicodeString(info.decimalSeparator), + FALSE); symbols->setSymbol( - DecimalFormatSymbols::ENumberFormatSymbol::kMonetaryGroupingSeparatorSymbol, - UnicodeString(info.groupingSeparator), - FALSE); + DecimalFormatSymbols::ENumberFormatSymbol::kMonetaryGroupingSeparatorSymbol, + UnicodeString(info.groupingSeparator), + FALSE); } } if (pattern == nullptr) { @@ -407,9 +411,9 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe, return chain; } -const PluralRules * -NumberFormatterImpl::resolvePluralRules(const PluralRules *rulesPtr, const Locale &locale, - UErrorCode &status) { +const PluralRules* +NumberFormatterImpl::resolvePluralRules(const PluralRules* rulesPtr, const Locale& locale, + UErrorCode& status) { if (rulesPtr != nullptr) { return rulesPtr; } @@ -420,8 +424,8 @@ NumberFormatterImpl::resolvePluralRules(const PluralRules *rulesPtr, const Local return fRules.getAlias(); } -int32_t NumberFormatterImpl::microsToString(const MicroProps µs, DecimalQuantity &quantity, - NumberStringBuilder &string, UErrorCode &status) { +int32_t NumberFormatterImpl::microsToString(const MicroProps& micros, DecimalQuantity& quantity, + NumberStringBuilder& string, UErrorCode& status) { micros.rounding.apply(quantity, status); micros.integerWidth.apply(quantity, status); int32_t length = writeNumber(micros, quantity, string, status); @@ -439,8 +443,8 @@ int32_t NumberFormatterImpl::microsToString(const MicroProps µs, DecimalQua return length; } -int32_t NumberFormatterImpl::writeNumber(const MicroProps µs, DecimalQuantity &quantity, - NumberStringBuilder &string, UErrorCode &status) { +int32_t NumberFormatterImpl::writeNumber(const MicroProps& micros, DecimalQuantity& quantity, + NumberStringBuilder& string, UErrorCode& status) { int32_t length = 0; if (quantity.isInfinite()) { length += string.insert( @@ -480,8 +484,8 @@ int32_t NumberFormatterImpl::writeNumber(const MicroProps µs, DecimalQuanti return length; } -int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps µs, DecimalQuantity &quantity, - NumberStringBuilder &string, UErrorCode &status) { +int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps& micros, DecimalQuantity& quantity, + NumberStringBuilder& string, UErrorCode& status) { int length = 0; int integerCount = quantity.getUpperDisplayMagnitude() + 1; for (int i = 0; i < integerCount; i++) { @@ -499,21 +503,21 @@ int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps µs, Decima // Get and append the next digit value int8_t nextDigit = quantity.getDigit(i); - length += string.insert( - 0, getDigitFromSymbols(nextDigit, *micros.symbols), UNUM_INTEGER_FIELD, status); + length += insertDigitFromSymbols( + string, 0, nextDigit, *micros.symbols, UNUM_INTEGER_FIELD, status); } return length; } -int32_t NumberFormatterImpl::writeFractionDigits(const MicroProps µs, DecimalQuantity &quantity, - NumberStringBuilder &string, UErrorCode &status) { +int32_t NumberFormatterImpl::writeFractionDigits(const MicroProps& micros, DecimalQuantity& quantity, + NumberStringBuilder& string, UErrorCode& status) { int length = 0; int fractionCount = -quantity.getLowerDisplayMagnitude(); for (int i = 0; i < fractionCount; i++) { // Get and append the next digit value int8_t nextDigit = quantity.getDigit(-i - 1); - length += string.append( - getDigitFromSymbols(nextDigit, *micros.symbols), UNUM_FRACTION_FIELD, status); + length += insertDigitFromSymbols( + string, string.length(), nextDigit, *micros.symbols, UNUM_INTEGER_FIELD, status); } return length; } diff --git a/icu4c/source/i18n/number_grouping.cpp b/icu4c/source/i18n/number_grouping.cpp index 67fd4c94317..03daffa629e 100644 --- a/icu4c/source/i18n/number_grouping.cpp +++ b/icu4c/source/i18n/number_grouping.cpp @@ -86,4 +86,12 @@ bool Grouper::groupAtPosition(int32_t position, const impl::DecimalQuantity &val && value.getUpperDisplayMagnitude() - fGrouping1 + 1 >= fMinGrouping; } +int16_t Grouper::getPrimary() const { + return fGrouping1; +} + +int16_t Grouper::getSecondary() const { + return fGrouping2; +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/number_scientific.cpp b/icu4c/source/i18n/number_scientific.cpp index 0557adb63e3..548ce625ad8 100644 --- a/icu4c/source/i18n/number_scientific.cpp +++ b/icu4c/source/i18n/number_scientific.cpp @@ -64,8 +64,7 @@ int32_t ScientificModifier::apply(NumberStringBuilder &output, int32_t /*leftInd int32_t disp = std::abs(fExponent); for (int j = 0; j < fHandler->fSettings.fMinExponentDigits || disp > 0; j++, disp /= 10) { auto d = static_cast(disp % 10); - const UnicodeString &digitString = getDigitFromSymbols(d, *fHandler->fSymbols); - i += output.insert(i - j, digitString, UNUM_EXPONENT_FIELD, status); + i += insertDigitFromSymbols(output, i - j, d, *fHandler->fSymbols, UNUM_EXPONENT_FIELD, status); } return i - rightIndex; } diff --git a/icu4c/source/i18n/number_utils.h b/icu4c/source/i18n/number_utils.h index 3a408d6007a..a889c69eb74 100644 --- a/icu4c/source/i18n/number_utils.h +++ b/icu4c/source/i18n/number_utils.h @@ -19,7 +19,7 @@ namespace impl { class UnicodeStringCharSequence : public CharSequence { public: - explicit UnicodeStringCharSequence(const UnicodeString &other) { + explicit UnicodeStringCharSequence(const UnicodeString& other) { fStr = other; } @@ -62,10 +62,10 @@ struct MicroProps : public MicroPropsGenerator { bool useCurrency; // Note: This struct has no direct ownership of the following pointers. - const DecimalFormatSymbols *symbols; - const Modifier *modOuter; - const Modifier *modMiddle; - const Modifier *modInner; + const DecimalFormatSymbols* symbols; + const Modifier* modOuter; + const Modifier* modMiddle; + const Modifier* modInner; // The following "helper" fields may optionally be used during the MicroPropsGenerator. // They live here to retain memory. @@ -78,12 +78,12 @@ struct MicroProps : public MicroPropsGenerator { MicroProps() = default; - MicroProps(const MicroProps &other) = default; + MicroProps(const MicroProps& other) = default; - MicroProps &operator=(const MicroProps &other) = default; + MicroProps& operator=(const MicroProps& other) = default; - void processQuantity(DecimalQuantity &, MicroProps µs, UErrorCode &status) const U_OVERRIDE { - (void)status; + void processQuantity(DecimalQuantity&, MicroProps& micros, UErrorCode& status) const U_OVERRIDE { + (void) status; if (this == µs) { // Unsafe path: no need to perform a copy. U_ASSERT(!exhausted); @@ -111,14 +111,13 @@ struct NumberFormatterResults : public UMemory { NumberStringBuilder string; }; -inline const UnicodeString getDigitFromSymbols(int8_t digit, const DecimalFormatSymbols &symbols) { - // TODO: Implement DecimalFormatSymbols.getCodePointZero()? - if (digit == 0) { - return symbols.getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kZeroDigitSymbol); - } else { - return symbols.getSymbol(static_cast( - DecimalFormatSymbols::ENumberFormatSymbol::kOneDigitSymbol + digit - 1)); +inline int32_t insertDigitFromSymbols(NumberStringBuilder& output, int32_t index, int8_t digit, + const DecimalFormatSymbols& symbols, Field field, + UErrorCode& status) { + if (symbols.getCodePointZero() != -1) { + return output.insertCodePoint(index, symbols.getCodePointZero() + digit, field, status); } + return output.insert(index, symbols.getConstDigitSymbol(digit), field, status); } } // namespace impl diff --git a/icu4c/source/i18n/numparse_decimal.cpp b/icu4c/source/i18n/numparse_decimal.cpp new file mode 100644 index 00000000000..bfc9c4f8a71 --- /dev/null +++ b/icu4c/source/i18n/numparse_decimal.cpp @@ -0,0 +1,313 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT + +#include "numparse_types.h" +#include "numparse_decimal.h" +#include "numparse_unisets.h" +#include "numparse_utils.h" +#include "unicode/uchar.h" + +using namespace icu; +using namespace icu::numparse; +using namespace icu::numparse::impl; + + +DecimalMatcher::DecimalMatcher(const DecimalFormatSymbols& symbols, const Grouper& grouper, + parse_flags_t parseFlags) { + if (0 != (parseFlags & PARSE_FLAG_MONETARY_SEPARATORS)) { + groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetaryGroupingSeparatorSymbol); + decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol); + } else { + groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol); + decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol); + } + bool strictSeparators = 0 != (parseFlags & PARSE_FLAG_STRICT_SEPARATORS); + unisets::Key groupingKey = strictSeparators ? unisets::STRICT_ALL_SEPARATORS + : unisets::ALL_SEPARATORS; + + // Attempt to find separators in the static cache + + groupingUniSet = unisets::get(groupingKey); + unisets::Key decimalKey = unisets::chooseFrom( + decimalSeparator, + strictSeparators ? unisets::STRICT_COMMA : unisets::COMMA, + strictSeparators ? unisets::STRICT_PERIOD : unisets::PERIOD); + if (decimalKey != unisets::COUNT) { + decimalUniSet = unisets::get(decimalKey); + } else { + auto* set = new UnicodeSet(); + set->add(decimalSeparator.char32At(0)); + set->freeze(); + decimalUniSet = set; + fLocalDecimalUniSet.adoptInstead(set); + } + + if (groupingKey != unisets::COUNT && decimalKey != unisets::COUNT) { + // Everything is available in the static cache + separatorSet = groupingUniSet; + leadSet = unisets::get( + strictSeparators ? unisets::DIGITS_OR_ALL_SEPARATORS + : unisets::DIGITS_OR_STRICT_ALL_SEPARATORS); + } else { + auto* set = new UnicodeSet(); + set->addAll(*groupingUniSet); + set->addAll(*decimalUniSet); + set->freeze(); + separatorSet = set; + fLocalSeparatorSet.adoptInstead(set); + leadSet = nullptr; + } + + int cpZero = symbols.getCodePointZero(); + if (cpZero == -1 || !u_isdigit(cpZero) || u_digit(cpZero, 10) != 0) { + // Uncommon case: okay to allocate. + auto digitStrings = new UnicodeString[10]; + fLocalDigitStrings.adoptInstead(digitStrings); + for (int32_t i = 0; i <= 9; i++) { + digitStrings[i] = symbols.getConstDigitSymbol(i); + } + } + + requireGroupingMatch = 0 != (parseFlags & PARSE_FLAG_STRICT_GROUPING_SIZE); + groupingDisabled = 0 != (parseFlags & PARSE_FLAG_GROUPING_DISABLED); + fractionGroupingDisabled = 0 != ( + parseFlags & PARSE_FLAG_FRACTION_GROUPING_DISABLED); + integerOnly = 0 != (parseFlags & PARSE_FLAG_INTEGER_ONLY); + grouping1 = grouper.getPrimary(); + grouping2 = grouper.getSecondary(); +} + +bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const { + return match(segment, result, 0, status); +} + +bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign, + UErrorCode&) const { + if (result.seenNumber() && exponentSign == 0) { + // A number has already been consumed. + return false; + } else if (exponentSign != 0) { + // scientific notation always comes after the number + U_ASSERT(!result.quantity.bogus); + } + + ParsedNumber backupResult(result); + + // strict parsing + bool strictFail = false; // did we exit with a strict parse failure? + UnicodeString actualGroupingString = groupingSeparator; + UnicodeString actualDecimalString = decimalSeparator; + int32_t groupedDigitCount = 0; // tracking count of digits delimited by grouping separator + int32_t backupOffset = -1; // used for preserving the last confirmed position + bool afterFirstGrouping = false; + bool seenGrouping = false; + bool seenDecimal = false; + int32_t digitsAfterDecimal = 0; + int32_t initialOffset = segment.getOffset(); + int32_t exponent = 0; + bool hasPartialPrefix = false; + while (segment.length() > 0) { + hasPartialPrefix = false; + + // Attempt to match a digit. + int8_t digit = -1; + + // Try by code point digit value. + int cp = segment.getCodePoint(); + if (u_isdigit(cp)) { + segment.adjustOffset(U16_LENGTH(cp)); + digit = static_cast(u_digit(cp, 10)); + } + + // Try by digit string. + if (digit == -1 && !fLocalDigitStrings.isNull()) { + for (int i = 0; i < 10; i++) { + const UnicodeString& str = fLocalDigitStrings[i]; + int overlap = segment.getCommonPrefixLength(str); + if (overlap == str.length()) { + segment.adjustOffset(overlap); + digit = static_cast(i); + break; + } else if (overlap == segment.length()) { + hasPartialPrefix = true; + } + } + } + + if (digit >= 0) { + // Digit was found. + // Check for grouping size violation + if (backupOffset != -1) { + if (requireGroupingMatch) { + // comma followed by digit, so group before comma is a secondary + // group. If there was a group separator before that, the group + // must == the secondary group length, else it can be <= the the + // secondary group length. + if ((afterFirstGrouping && groupedDigitCount != grouping2) || + (!afterFirstGrouping && groupedDigitCount > grouping2)) { + strictFail = true; + break; + } + } + afterFirstGrouping = true; + backupOffset = -1; + groupedDigitCount = 0; + } + + // Save the digit in the DecimalQuantity or scientific adjustment. + if (exponentSign != 0) { + int nextExponent = digit + exponent * 10; + if (nextExponent < exponent) { + // Overflow + exponent = INT32_MAX; + } else { + exponent = nextExponent; + } + } else { + if (result.quantity.bogus) { + result.quantity.bogus = false; + } + result.quantity.appendDigit(digit, 0, true); + } + result.setCharsConsumed(segment); + groupedDigitCount++; + if (seenDecimal) { + digitsAfterDecimal++; + } + continue; + } + + // Attempt to match a literal grouping or decimal separator + int32_t decimalOverlap = segment.getCommonPrefixLength(actualDecimalString); + bool decimalStringMatch = decimalOverlap == actualDecimalString.length(); + int32_t groupingOverlap = segment.getCommonPrefixLength(actualGroupingString); + bool groupingStringMatch = groupingOverlap == actualGroupingString.length(); + + hasPartialPrefix = (decimalOverlap == segment.length()) || (groupingOverlap == segment.length()); + + if (!seenDecimal && !groupingStringMatch && + (decimalStringMatch || (!seenDecimal && decimalUniSet->contains(cp)))) { + // matched a decimal separator + if (requireGroupingMatch) { + if (backupOffset != -1 || (seenGrouping && groupedDigitCount != grouping1)) { + strictFail = true; + break; + } + } + + // If we're only parsing integers, then don't parse this one. + if (integerOnly) { + break; + } + + seenDecimal = true; + if (!decimalStringMatch) { + actualDecimalString = UnicodeString(cp); + } + segment.adjustOffset(actualDecimalString.length()); + result.setCharsConsumed(segment); + result.flags |= FLAG_HAS_DECIMAL_SEPARATOR; + continue; + } + + if (!groupingDisabled && !decimalStringMatch && + (groupingStringMatch || (!seenGrouping && groupingUniSet->contains(cp)))) { + // matched a grouping separator + if (requireGroupingMatch) { + if (groupedDigitCount == 0) { + // leading group + strictFail = true; + break; + } else if (backupOffset != -1) { + // two group separators in a row + break; + } + } + + if (fractionGroupingDisabled && seenDecimal) { + // Stop parsing here. + break; + } + + seenGrouping = true; + if (!groupingStringMatch) { + actualGroupingString = UnicodeString(cp); + } + backupOffset = segment.getOffset(); + segment.adjustOffset(actualGroupingString.length()); + // Note: do NOT set charsConsumed + continue; + } + + // Not a digit and not a separator + break; + } + + // Check the final grouping for validity + if (requireGroupingMatch && !seenDecimal && seenGrouping && afterFirstGrouping && + groupedDigitCount != grouping1) { + strictFail = true; + } + + if (requireGroupingMatch && strictFail) { + result = backupResult; + segment.setOffset(initialOffset); + } + + if (result.quantity.bogus && segment.getOffset() != initialOffset) { + // Strings that start with a separator but have no digits. + // We don't need a backup of ParsedNumber because no changes could have been made to it. + segment.setOffset(initialOffset); + hasPartialPrefix = true; + } + + if (!result.quantity.bogus) { + // The final separator was a decimal separator. + result.quantity.adjustMagnitude(-digitsAfterDecimal); + } + + if (exponentSign != 0 && segment.getOffset() != initialOffset) { + U_ASSERT(!result.quantity.bogus); + bool overflow = (exponent == INT32_MAX); + if (!overflow) { + result.quantity.adjustMagnitude(exponentSign * exponent); + } + if (overflow) { + if (exponentSign == -1) { + // Set to zero + result.quantity.clear(); + } else { + // Set to infinity + result.quantity.bogus = true; + result.flags |= FLAG_INFINITY; + } + } + } + + return segment.length() == 0 || hasPartialPrefix; +} + +const UnicodeSet* DecimalMatcher::getLeadCodePoints() const { + if (fLocalDigitStrings.isNull() && leadSet != nullptr) { + return new UnicodeSet(*leadSet); + } + + auto* leadCodePoints = new UnicodeSet(); + // Assumption: the sets are all single code points. + leadCodePoints->addAll(*unisets::get(unisets::DIGITS)); + leadCodePoints->addAll(*separatorSet); + if (!fLocalDigitStrings.isNull()) { + for (int i = 0; i < 10; i++) { + utils::putLeadCodePoint(fLocalDigitStrings[i], leadCodePoints); + } + } + leadCodePoints->freeze(); + return leadCodePoints; +} + + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/numparse_decimal.h b/icu4c/source/i18n/numparse_decimal.h new file mode 100644 index 00000000000..b7bda16f589 --- /dev/null +++ b/icu4c/source/i18n/numparse_decimal.h @@ -0,0 +1,69 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT +#ifndef __NUMPARSE_DECIMAL_H__ +#define __NUMPARSE_DECIMAL_H__ + +#include "unicode/uniset.h" +#include "numparse_types.h" + +U_NAMESPACE_BEGIN namespace numparse { +namespace impl { + +using ::icu::number::impl::Grouper; + +class DecimalMatcher : public NumberParseMatcher, public UMemory { + public: + DecimalMatcher(const DecimalFormatSymbols& symbols, const Grouper& grouper, + parse_flags_t parseFlags); + + bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; + + bool + match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign, UErrorCode& status) const; + + const UnicodeSet* getLeadCodePoints() const override; + + private: + /** If true, only accept strings whose grouping sizes match the locale */ + bool requireGroupingMatch; + + /** If true, do not accept grouping separators at all */ + bool groupingDisabled; + + /** If true, do not accept fraction grouping separators */ + bool fractionGroupingDisabled; + + /** If true, do not accept numbers in the fraction */ + bool integerOnly; + + int16_t grouping1; + int16_t grouping2; + + UnicodeString groupingSeparator; + UnicodeString decimalSeparator; + + // Assumption: these sets all consist of single code points. If this assumption needs to be broken, + // fix getLeadCodePoints() as well as matching logic. Be careful of the performance impact. + const UnicodeSet* groupingUniSet; + const UnicodeSet* decimalUniSet; + const UnicodeSet* separatorSet; + const UnicodeSet* leadSet; + + // Make this class the owner of a few objects that could be allocated. + // The first two LocalPointers are used for assigning ownership only. + LocalPointer fLocalDecimalUniSet; + LocalPointer fLocalSeparatorSet; + LocalArray fLocalDigitStrings; +}; + + +} // namespace impl +} // namespace numparse +U_NAMESPACE_END + +#endif //__NUMPARSE_DECIMAL_H__ +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/numparse_impl.cpp b/icu4c/source/i18n/numparse_impl.cpp new file mode 100644 index 00000000000..d93c0173f40 --- /dev/null +++ b/icu4c/source/i18n/numparse_impl.cpp @@ -0,0 +1,113 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT + +#include "number_types.h" +#include "number_patternstring.h" +#include "numparse_types.h" +#include "numparse_impl.h" +#include "numparse_symbols.h" +#include "numparse_decimal.h" +#include "unicode/numberformatter.h" + +using namespace icu; +using namespace icu::number; +using namespace icu::number::impl; +using namespace icu::numparse; +using namespace icu::numparse::impl; + + +NumberParserImpl* +NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString& patternString, + parse_flags_t parseFlags, UErrorCode& status) { + + auto* parser = new NumberParserImpl(parseFlags, true); + DecimalFormatSymbols symbols(locale, status); + +// IgnorablesMatcher* ignorables = IgnorablesMatcher.getDefault(); +// +// MatcherFactory factory = new MatcherFactory(); +// factory.currency = Currency.getInstance("USD"); +// factory.symbols = symbols; +// factory.ignorables = ignorables; +// factory.locale = locale; +// factory.parseFlags = parseFlags; + + ParsedPatternInfo patternInfo; + PatternParser::parseToPatternInfo(patternString, patternInfo, status); +// AffixMatcher.createMatchers(patternInfo, parser, factory, ignorables, parseFlags); + + Grouper grouper = Grouper::forStrategy(UNUM_GROUPING_AUTO); + grouper.setLocaleData(patternInfo, locale); + +// parser.addMatcher({ignorables, false}); + parser->addAndAdoptMatcher(new DecimalMatcher(symbols, grouper, parseFlags)); + parser->addAndAdoptMatcher(new MinusSignMatcher(symbols, false)); +// parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags)); +// parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags)); +// parser.addMatcher(CurrencyTrieMatcher.getInstance(locale)); +// parser.addMatcher(new RequireNumberMatcher()); + + parser->freeze(); + return parser; +} + +NumberParserImpl::NumberParserImpl(parse_flags_t parseFlags, bool computeLeads) + : fParseFlags(parseFlags), fComputeLeads(computeLeads) { +} + +NumberParserImpl::~NumberParserImpl() { + for (int32_t i = 0; i < fNumMatchers; i++) { + delete (fMatchers[i]); + if (fComputeLeads) { + delete (fLeads[i]); + } + } + fNumMatchers = 0; +} + +void NumberParserImpl::addAndAdoptMatcher(const NumberParseMatcher* matcher) { + if (fNumMatchers + 1 > fMatchers.getCapacity()) { + fMatchers.resize(fNumMatchers * 2, fNumMatchers); + if (fComputeLeads) { + // The two arrays should grow in tandem: + U_ASSERT(fNumMatchers >= fLeads.getCapacity()); + fLeads.resize(fNumMatchers * 2, fNumMatchers); + } + } + + fMatchers[fNumMatchers] = matcher; + + if (fComputeLeads) { + fLeads[fNumMatchers] = matcher->getLeadCodePoints(); + } + + fNumMatchers++; +} + +void NumberParserImpl::freeze() { + fFrozen = true; +} + +//void +//NumberParserImpl::parse(const UnicodeString& input, int32_t start, bool greedy, ParsedNumber& result, +// UErrorCode& status) const { +// U_ASSERT(frozen); +// // TODO: Check start >= 0 and start < input.length() +// StringSegment segment(utils::maybeFold(input, parseFlags)); +// segment.adjustOffset(start); +// if (greedy) { +// parseGreedyRecursive(segment, result); +// } else { +// parseLongestRecursive(segment, result); +// } +// for (NumberParseMatcher matcher : matchers) { +// matcher.postProcess(result); +// } +//} + + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/numparse_impl.h b/icu4c/source/i18n/numparse_impl.h new file mode 100644 index 00000000000..2ded607d829 --- /dev/null +++ b/icu4c/source/i18n/numparse_impl.h @@ -0,0 +1,56 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT +#ifndef __NUMPARSE_IMPL_H__ +#define __NUMPARSE_IMPL_H__ + +#include "numparse_types.h" +#include "unicode/uniset.h" + +U_NAMESPACE_BEGIN namespace numparse { +namespace impl { + +class NumberParserImpl { + public: + static NumberParserImpl* createSimpleParser(const Locale& locale, const UnicodeString& patternString, + parse_flags_t parseFlags, UErrorCode& status); + + void addAndAdoptMatcher(const NumberParseMatcher* matcher); + + void freeze(); + + void parse(const UnicodeString& input, bool greedy, ParsedNumber& result, UErrorCode& status) const; + + void parse(const UnicodeString& input, int32_t start, bool greedy, ParsedNumber& result, + UErrorCode& status) const; + + UnicodeString toString() const; + + private: + parse_flags_t fParseFlags; + int32_t fNumMatchers = 0; + // NOTE: The stack capacity for fMatchers and fLeads should be the same + MaybeStackArray fMatchers; + MaybeStackArray fLeads; + bool fComputeLeads; + bool fFrozen = false; + + NumberParserImpl(parse_flags_t parseFlags, bool computeLeads); + + ~NumberParserImpl(); + + void parseGreedyRecursive(StringSegment& segment, ParsedNumber& result) const; + + void parseLongestRecursive(StringSegment& segment, ParsedNumber& result) const; +}; + + +} // namespace impl +} // namespace numparse +U_NAMESPACE_END + +#endif //__NUMPARSE_IMPL_H__ +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/numparse_symbols.cpp b/icu4c/source/i18n/numparse_symbols.cpp new file mode 100644 index 00000000000..8d1631256c5 --- /dev/null +++ b/icu4c/source/i18n/numparse_symbols.cpp @@ -0,0 +1,95 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT + +#include "numparse_types.h" +#include "numparse_symbols.h" +#include "numparse_utils.h" + +using namespace icu; +using namespace icu::numparse; +using namespace icu::numparse::impl; + + +SymbolMatcher::SymbolMatcher(const UnicodeString& symbolString, unisets::Key key) { + fUniSet = unisets::get(key); + fOwnsUniSet = false; + if (fUniSet->contains(symbolString)) { + fString.setToBogus(); + } else { + fString = symbolString; + } +} + +SymbolMatcher::~SymbolMatcher() { + if (fOwnsUniSet) { + delete fUniSet; + fUniSet = nullptr; + } +} + +const UnicodeSet* SymbolMatcher::getSet() { + return fUniSet; +} + +bool SymbolMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const { + // Smoke test first; this matcher might be disabled. + if (isDisabled(result)) { + return false; + } + + // Test the string first in order to consume trailing chars greedily. + int overlap = 0; + if (!fString.isEmpty()) { + overlap = segment.getCommonPrefixLength(fString); + if (overlap == fString.length()) { + segment.adjustOffset(fString.length()); + accept(segment, result); + return false; + } + } + + int cp = segment.getCodePoint(); + if (cp != -1 && fUniSet->contains(cp)) { + segment.adjustOffset(U16_LENGTH(cp)); + accept(segment, result); + return false; + } + + return overlap == segment.length(); +} + +const UnicodeSet* SymbolMatcher::getLeadCodePoints() const { + if (fString.isEmpty()) { + // Assumption: for sets from UnicodeSetStaticCache, uniSet == leadCodePoints. + return new UnicodeSet(*fUniSet); + } + + UnicodeSet* leadCodePoints = new UnicodeSet(); + utils::putLeadCodePoints(fUniSet, leadCodePoints); + utils::putLeadCodePoint(fString, leadCodePoints); + leadCodePoints->freeze(); + return leadCodePoints; +} + + +MinusSignMatcher::MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing) : SymbolMatcher( + dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol), + unisets::MINUS_SIGN), fAllowTrailing(allowTrailing) { +} + +bool MinusSignMatcher::isDisabled(const ParsedNumber& result) const { + return 0 != (result.flags & FLAG_NEGATIVE) || + (fAllowTrailing ? false : result.seenNumber()); +} + +void MinusSignMatcher::accept(StringSegment& segment, ParsedNumber& result) const { + result.flags |= FLAG_NEGATIVE; + result.setCharsConsumed(segment); +} + + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/numparse_symbols.h b/icu4c/source/i18n/numparse_symbols.h new file mode 100644 index 00000000000..d730ef57535 --- /dev/null +++ b/icu4c/source/i18n/numparse_symbols.h @@ -0,0 +1,60 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT +#ifndef __NUMPARSE_SYMBOLS_H__ +#define __NUMPARSE_SYMBOLS_H__ + +#include "numparse_types.h" +#include "unicode/uniset.h" +#include "numparse_unisets.h" + +U_NAMESPACE_BEGIN namespace numparse { +namespace impl { + + +class SymbolMatcher : public NumberParseMatcher, public UMemory { + public: + ~SymbolMatcher() override; + + const UnicodeSet* getSet(); + + bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; + + const UnicodeSet* getLeadCodePoints() const override; + + virtual bool isDisabled(const ParsedNumber& result) const = 0; + + virtual void accept(StringSegment& segment, ParsedNumber& result) const = 0; + + protected: + UnicodeString fString; + const UnicodeSet* fUniSet; + bool fOwnsUniSet; + + SymbolMatcher(const UnicodeString& symbolString, unisets::Key key); +}; + + +class MinusSignMatcher : public SymbolMatcher { + public: + MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing); + + protected: + bool isDisabled(const ParsedNumber& result) const override; + + void accept(StringSegment& segment, ParsedNumber& result) const override; + + private: + bool fAllowTrailing; +}; + + +} // namespace impl +} // namespace numparse +U_NAMESPACE_END + +#endif //__NUMPARSE_SYMBOLS_H__ +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/numparse_types.h b/icu4c/source/i18n/numparse_types.h index 92957204baa..fe8a5652476 100644 --- a/icu4c/source/i18n/numparse_types.h +++ b/icu4c/source/i18n/numparse_types.h @@ -17,6 +17,42 @@ namespace impl { class StringSegment; class ParsedNumber; +typedef int32_t result_flags_t; +typedef int32_t parse_flags_t; + +/** Flags for the type result_flags_t */ +enum ResultFlags { + FLAG_NEGATIVE = 0x0001, + FLAG_PERCENT = 0x0002, + FLAG_PERMILLE = 0x0004, + FLAG_HAS_EXPONENT = 0x0008, + FLAG_HAS_DEFAULT_CURRENCY = 0x0010, + FLAG_HAS_DECIMAL_SEPARATOR = 0x0020, + FLAG_NAN = 0x0040, + FLAG_INFINITY = 0x0080, + FLAG_FAIL = 0x0100, +}; + +/** Flags for the type parse_flags_t */ +enum ParseFlags { + PARSE_FLAG_IGNORE_CASE = 0x0001, + PARSE_FLAG_MONETARY_SEPARATORS = 0x0002, + PARSE_FLAG_STRICT_SEPARATORS = 0x0004, + PARSE_FLAG_STRICT_GROUPING_SIZE = 0x0008, + PARSE_FLAG_INTEGER_ONLY = 0x0010, + PARSE_FLAG_GROUPING_DISABLED = 0x0020, + PARSE_FLAG_FRACTION_GROUPING_DISABLED = 0x0040, + PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES = 0x0080, + PARSE_FLAG_USE_FULL_AFFIXES = 0x0100, + PARSE_FLAG_EXACT_AFFIX = 0x0200, + PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400, +}; + +//template +//struct MaybeNeedsAdoption { +// T* ptr; +// bool needsAdoption; +//}; /** * Struct-like class to hold the results of a parsing routine. @@ -25,17 +61,6 @@ class ParsedNumber; */ class ParsedNumber { public: - enum ParsedNumberFlags { - FLAG_NEGATIVE = 0x0001, - FLAG_PERCENT = 0x0002, - FLAG_PERMILLE = 0x0004, - FLAG_HAS_EXPONENT = 0x0008, - FLAG_HAS_DEFAULT_CURRENCY = 0x0010, - FLAG_HAS_DECIMAL_SEPARATOR = 0x0020, - FLAG_NAN = 0x0040, - FLAG_INFINITY = 0x0080, - FLAG_FAIL = 0x0100, - }; /** * The numerical value that was parsed. @@ -51,9 +76,9 @@ class ParsedNumber { int32_t charEnd; /** - * Boolean flags (see constants below). + * Boolean flags (see constants above). */ - int32_t flags; + result_flags_t flags; /** * The pattern string corresponding to the prefix that got consumed. @@ -204,15 +229,17 @@ class NumberParseMatcher { * @return Whether this matcher thinks there may be more interesting chars beyond the end of the * string segment. */ - virtual bool match(StringSegment& segment, ParsedNumber& result) const = 0; + virtual bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const = 0; /** * Should return a set representing all possible chars (UTF-16 code units) that could be the first * char that this matcher can consume. This method is only called during construction phase, and its * return value is used to skip this matcher unless a segment begins with a char in this set. To make * this matcher always run, return {@link UnicodeSet#ALL_CODE_POINTS}. + * + * The returned UnicodeSet needs adoption! */ - virtual UnicodeSet getLeadCodePoints() const = 0; + virtual const UnicodeSet* getLeadCodePoints() const = 0; /** * Method called at the end of a parse, after all matchers have failed to consume any more chars. @@ -222,7 +249,9 @@ class NumberParseMatcher { * @param result * The data structure to store results. */ - virtual void postProcess(ParsedNumber& result) const = 0; + virtual void postProcess(ParsedNumber&) const { + // Default implementation: no-op + }; }; diff --git a/icu4c/source/i18n/numparse_utils.h b/icu4c/source/i18n/numparse_utils.h new file mode 100644 index 00000000000..a25f9ef9df7 --- /dev/null +++ b/icu4c/source/i18n/numparse_utils.h @@ -0,0 +1,38 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT +#ifndef __NUMPARSE_UTILS_H__ +#define __NUMPARSE_UTILS_H__ + +#include "numparse_types.h" +#include "unicode/uniset.h" + +U_NAMESPACE_BEGIN namespace numparse { +namespace impl { +namespace utils { + + +inline static void putLeadCodePoints(const UnicodeSet* input, UnicodeSet* output) { + for (int32_t i = 0; i < input->getRangeCount(); i++) { + output->add(input->getRangeStart(i), input->getRangeEnd(i)); + } + // TODO: ANDY: How to iterate over the strings in ICU4C UnicodeSet? +} + +inline static void putLeadCodePoint(const UnicodeString& input, UnicodeSet* output) { + if (!input.isEmpty()) { + output->add(input.char32At(0)); + } +} + + +} // namespace utils +} // namespace impl +} // namespace numparse +U_NAMESPACE_END + +#endif //__NUMPARSE_UTILS_H__ +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/unicode/numberformatter.h b/icu4c/source/i18n/unicode/numberformatter.h index be4593309e7..a3d80d28706 100644 --- a/icu4c/source/i18n/unicode/numberformatter.h +++ b/icu4c/source/i18n/unicode/numberformatter.h @@ -378,7 +378,18 @@ typedef enum UNumberDecimalSeparatorDisplay { UNUM_DECIMAL_SEPARATOR_COUNT } UNumberDecimalMarkDisplay; -U_NAMESPACE_BEGIN namespace number { // icu::number +U_NAMESPACE_BEGIN + +namespace numparse { +namespace impl { + +// Forward declarations: +class NumberParserImpl; + +} +} + +namespace number { // icu::number // Forward declarations: class UnlocalizedNumberFormatter; @@ -1311,6 +1322,12 @@ class U_I18N_API Grouper : public UMemory { Grouper(int16_t grouping1, int16_t grouping2, int16_t minGrouping) : fGrouping1(grouping1), fGrouping2(grouping2), fMinGrouping(minGrouping) {} + /** @internal */ + int16_t getPrimary() const; + + /** @internal */ + int16_t getSecondary() const; + private: /** * The grouping sizes, with the following special values: @@ -1349,6 +1366,9 @@ class U_I18N_API Grouper : public UMemory { // To allow NumberFormatterImpl to access isBogus() and perform other operations: friend class NumberFormatterImpl; + + // To allow NumberParserImpl to perform setLocaleData(): + friend class ::icu::numparse::impl::NumberParserImpl; }; /** @internal */ diff --git a/icu4c/source/test/intltest/Makefile.in b/icu4c/source/test/intltest/Makefile.in index ed1aa256b14..c67f0006cae 100644 --- a/icu4c/source/test/intltest/Makefile.in +++ b/icu4c/source/test/intltest/Makefile.in @@ -64,7 +64,8 @@ scientificnumberformattertest.o datadrivennumberformattestsuite.o \ numberformattesttuple.o numberformat2test.o pluralmaptest.o \ numbertest_affixutils.o numbertest_api.o numbertest_decimalquantity.o \ numbertest_modifiers.o numbertest_patternmodifier.o numbertest_patternstring.o \ -numbertest_stringbuilder.o numbertest_stringsegment.o numbertest_unisets.o +numbertest_stringbuilder.o numbertest_stringsegment.o numbertest_unisets.o \ +numbertest_parse.o DEPS = $(OBJECTS:.o=.d) diff --git a/icu4c/source/test/intltest/numbertest.h b/icu4c/source/test/intltest/numbertest.h index 272f87c8827..5da55bbe9c7 100644 --- a/icu4c/source/test/intltest/numbertest.h +++ b/icu4c/source/test/intltest/numbertest.h @@ -207,6 +207,16 @@ class UniSetsTest : public IntlTest { const UnicodeSet& set, UChar32 cp); }; +class NumberParserTest : public IntlTest { + public: + void testBasic(); + void testLocaleFi(); + void testSeriesMatcher(); + void testGroupingDisabled(); + + void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0); +}; + // NOTE: This macro is identical to the one in itformat.cpp #define TESTCLASS(id, TestClass) \ @@ -237,6 +247,7 @@ class NumberTest : public IntlTest { TESTCLASS(6, NumberStringBuilderTest); TESTCLASS(7, StringSegmentTest); TESTCLASS(8, UniSetsTest); + TESTCLASS(9, NumberParserTest); default: name = ""; break; // needed to end loop } } diff --git a/icu4c/source/test/intltest/numbertest_parse.cpp b/icu4c/source/test/intltest/numbertest_parse.cpp new file mode 100644 index 00000000000..b0d2fe8cf1d --- /dev/null +++ b/icu4c/source/test/intltest/numbertest_parse.cpp @@ -0,0 +1,144 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT + +#include "numbertest.h" +#include "numparse_impl.h" +#include "numparse_unisets.h" +#include "unicode/dcfmtsym.h" +#include "unicode/testlog.h" + +#include + +using icu::numparse::impl::unisets::get; + +void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) { + if (exec) { + logln("TestSuite NumberParserTest: "); + } + TESTCASE_AUTO_BEGIN; + TESTCASE_AUTO(testBasic); + TESTCASE_AUTO_END; +} + +void NumberParserTest::testBasic() { + IcuTestErrorCode status(*this, "testBasic"); + + static const struct TestCase { + int32_t flags; + const char16_t* inputString; + const char16_t* patternString; + int32_t expectedCharsConsumed; + double expectedResultDouble; + } cases[] = {{3, u"51423", u"0", 5, 51423.}, + {3, u"51423x", u"0", 5, 51423.}, + {3, u" 51423", u"0", 6, 51423.}, + {3, u"51423 ", u"0", 5, 51423.}, + {3, u"𝟱𝟭𝟰𝟮𝟯", u"0", 10, 51423.}, + {3, u"𝟱𝟭𝟰𝟮𝟯x", u"0", 10, 51423.}, + {3, u" 𝟱𝟭𝟰𝟮𝟯", u"0", 11, 51423.}, + {3, u"𝟱𝟭𝟰𝟮𝟯 ", u"0", 10, 51423.}, + {7, u"𝟱𝟭,𝟰𝟮𝟯", u"#,##,##0", 11, 51423.}, + {7, u"𝟳,𝟴𝟵,𝟱𝟭,𝟰𝟮𝟯", u"#,##,##0", 19, 78951423.}, + {7, u"𝟳𝟴,𝟵𝟱𝟭.𝟰𝟮𝟯", u"#,##,##0", 18, 78951.423}, + {7, u"𝟳𝟴,𝟬𝟬𝟬", u"#,##,##0", 11, 78000.}, + {7, u"𝟳𝟴,𝟬𝟬𝟬.𝟬𝟬𝟬", u"#,##,##0", 18, 78000.}, + {7, u"𝟳𝟴,𝟬𝟬𝟬.𝟬𝟮𝟯", u"#,##,##0", 18, 78000.023}, + {7, u"𝟳𝟴.𝟬𝟬𝟬.𝟬𝟮𝟯", u"#,##,##0", 11, 78.}, + {3, u"-𝟱𝟭𝟰𝟮𝟯", u"0", 11, -51423.}, + {3, u"-𝟱𝟭𝟰𝟮𝟯-", u"0", 11, -51423.}, + {3, u"a51423US dollars", u"a0¤¤¤", 16, 51423.}, + {3, u"a 51423 US dollars", u"a0¤¤¤", 18, 51423.}, + {3, u"514.23 USD", u"¤0", 10, 514.23}, + {3, u"514.23 GBP", u"¤0", 10, 514.23}, + {3, u"a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 14, 51423.}, + {3, u"-a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.}, + {3, u"a -𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.}, + {3, u"𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 10, 51423.}, + {3, u"[𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, 51423.}, + {3, u"𝟱𝟭𝟰𝟮𝟯]", u"[0];(0)", 11, 51423.}, + {3, u"[𝟱𝟭𝟰𝟮𝟯]", u"[0];(0)", 12, 51423.}, + {3, u"(𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, -51423.}, + {3, u"𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 11, -51423.}, + {3, u"(𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 12, -51423.}, + {3, u"𝟱𝟭𝟰𝟮𝟯", u"{0};{0}", 10, 51423.}, + {3, u"{𝟱𝟭𝟰𝟮𝟯", u"{0};{0}", 11, 51423.}, + {3, u"𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 11, 51423.}, + {3, u"{𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 12, 51423.}, + {1, u"a40b", u"a0'0b'", 3, 40.}, // greedy code path thinks "40" is the number + {2, u"a40b", u"a0'0b'", 4, 4.}, // slow code path finds the suffix "0b" + {3, u"𝟱.𝟭𝟰𝟮E𝟯", u"0", 12, 5142.}, + {3, u"𝟱.𝟭𝟰𝟮E-𝟯", u"0", 13, 0.005142}, + {3, u"𝟱.𝟭𝟰𝟮e-𝟯", u"0", 13, 0.005142}, + {7, u"5,142.50 Canadian dollars", u"#,##,##0 ¤¤¤", 25, 5142.5}, + {3, u"a$ b5", u"a ¤ b0", 5, 5.0}, + {3, u"📺1.23", u"📺0;📻0", 6, 1.23}, + {3, u"📻1.23", u"📺0;📻0", 6, -1.23}, + {3, u".00", u"0", 3, 0.0}, + {3, u" 0", u"a0", 31, 0.0}, // should not hang + {3, u"NaN", u"0", 3, NAN}, + {3, u"NaN E5", u"0", 3, NAN}, + {3, u"0", u"0", 1, 0.0}}; + + parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES; + for (auto cas : cases) { + UnicodeString inputString(cas.inputString); + UnicodeString patternString(cas.patternString); + const NumberParserImpl* parser = NumberParserImpl::createSimpleParser( + Locale("en"), patternString, parseFlags, status); + UnicodeString message = + UnicodeString("Input <") + inputString + UnicodeString("> Parser ") + parser->toString(); + + if (0 != (cas.flags & 0x01)) { + // Test greedy code path + ParsedNumber resultObject; + parser->parse(inputString, true, resultObject, status); + assertTrue("Greedy Parse failed: " + message, resultObject.success()); + assertEquals( + "Greedy Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd); + assertEquals( + "Greedy Parse failed: " + message, + cas.expectedResultDouble, + resultObject.getDouble()); + } + + if (0 != (cas.flags & 0x02)) { + // Test slow code path + ParsedNumber resultObject; + parser->parse(inputString, false, resultObject, status); + assertTrue("Non-Greedy Parse failed: " + message, resultObject.success()); + assertEquals( + "Non-Greedy Parse failed: " + message, + cas.expectedCharsConsumed, + resultObject.charEnd); + assertEquals( + "Non-Greedy Parse failed: " + message, + cas.expectedResultDouble, + resultObject.getDouble()); + } + + if (0 != (cas.flags & 0x04)) { + // Test with strict separators + parser = NumberParserImpl::createSimpleParser( + Locale("en"), + patternString, + parseFlags | PARSE_FLAG_STRICT_GROUPING_SIZE, + status); + ParsedNumber resultObject; + parser->parse(inputString, true, resultObject, status); + assertTrue("Strict Parse failed: " + message, resultObject.success()); + assertEquals( + "Strict Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd); + assertEquals( + "Strict Parse failed: " + message, + cas.expectedResultDouble, + resultObject.getDouble()); + } + } +} + + +#endif diff --git a/icu4c/source/test/intltest/numbertest_stringbuilder.cpp b/icu4c/source/test/intltest/numbertest_stringbuilder.cpp index 76d27e1b12a..cdc38361739 100644 --- a/icu4c/source/test/intltest/numbertest_stringbuilder.cpp +++ b/icu4c/source/test/intltest/numbertest_stringbuilder.cpp @@ -77,7 +77,7 @@ void NumberStringBuilderTest::testInsertAppendUnicodeString() { } void NumberStringBuilderTest::testSplice() { - const struct TestCase { + static const struct TestCase { const char16_t* input; const int32_t startThis; const int32_t endThis; diff --git a/icu4c/source/test/intltest/numbertest_unisets.cpp b/icu4c/source/test/intltest/numbertest_unisets.cpp index a41f3f6efb4..ed7fb08d837 100644 --- a/icu4c/source/test/intltest/numbertest_unisets.cpp +++ b/icu4c/source/test/intltest/numbertest_unisets.cpp @@ -9,9 +9,6 @@ #include "numparse_unisets.h" #include "unicode/dcfmtsym.h" -#include -#include - using icu::numparse::impl::unisets::get; void UniSetsTest::runIndexedTest(int32_t index, UBool exec, const char*&name, char*) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java index b965263af1a..8ba8eb79e5d 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/DecimalMatcher.java @@ -27,9 +27,6 @@ public class DecimalMatcher implements NumberParseMatcher { /** If true, do not accept numbers in the fraction */ private final boolean integerOnly; - /** If true, save the result as an exponent instead of a quantity in the ParsedNumber */ - private final boolean isScientific; - private final int grouping1; private final int grouping2; @@ -97,20 +94,28 @@ public class DecimalMatcher implements NumberParseMatcher { fractionGroupingDisabled = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_FRACTION_GROUPING_DISABLED); integerOnly = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_INTEGER_ONLY); - isScientific = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_DECIMAL_SCIENTIFIC); grouping1 = grouper.getPrimary(); grouping2 = grouper.getSecondary(); } @Override public boolean match(StringSegment segment, ParsedNumber result) { - return match(segment, result, false); + return match(segment, result, 0); } - public boolean match(StringSegment segment, ParsedNumber result, boolean negativeExponent) { - if (result.seenNumber() && !isScientific) { + /** + * @param exponentSign + * -1 means a negative exponent; +1 means a positive exponent; 0 means NO exponent. If -1 + * or +1, the number will be saved by scaling the pre-existing DecimalQuantity in the + * ParsedNumber. If 0, a new DecimalQuantity will be created to store the number. + */ + public boolean match(StringSegment segment, ParsedNumber result, int exponentSign) { + if (result.seenNumber() && exponentSign == 0) { // A number has already been consumed. return false; + } else if (exponentSign != 0) { + // scientific notation always comes after the number + assert result.quantity != null; } ParsedNumber backupResult = null; @@ -181,7 +186,7 @@ public class DecimalMatcher implements NumberParseMatcher { } // Save the digit in the DecimalQuantity or scientific adjustment. - if (isScientific) { + if (exponentSign != 0) { int nextExponent = digit + exponent * 10; if (nextExponent < exponent) { // Overflow @@ -272,11 +277,6 @@ public class DecimalMatcher implements NumberParseMatcher { break; } - // if (backupOffset != -1) { - // segment.setOffset(backupOffset); - // hasPartialPrefix = true; - // } - // Check the final grouping for validity if (requireGroupingMatch && !seenDecimal @@ -303,18 +303,17 @@ public class DecimalMatcher implements NumberParseMatcher { result.quantity.adjustMagnitude(-digitsAfterDecimal); } - if (isScientific && segment.getOffset() != initialOffset) { - assert result.quantity != null; // scientific notation always comes after the number + if (exponentSign != 0 && segment.getOffset() != initialOffset) { boolean overflow = (exponent == Integer.MAX_VALUE); if (!overflow) { try { - result.quantity.adjustMagnitude(negativeExponent ? -exponent : exponent); + result.quantity.adjustMagnitude(exponentSign * exponent); } catch (ArithmeticException e) { overflow = true; } } if (overflow) { - if (negativeExponent) { + if (exponentSign == -1) { // Set to zero result.quantity.clear(); } else { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java index 55a046a43ad..f34effcf67d 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/NumberParserImpl.java @@ -266,28 +266,27 @@ public class NumberParserImpl { private final int parseFlags; private final List matchers; - private final List leadCodePointses; + private final List leads; private Comparator comparator; private boolean frozen; /** * Creates a new, empty parser. * - * @param ignoreCase - * If true, perform case-folding. This parameter needs to go into the constructor because - * its value is used during the construction of the matcher chain. - * @param optimize + * @param parseFlags + * Settings for constructing the parser. + * @param computeLeads * If true, compute "lead chars" UnicodeSets for the matchers. This reduces parsing * runtime but increases construction runtime. If the parser is going to be used only once * or twice, set this to false; if it is going to be used hundreds of times, set it to * true. */ - public NumberParserImpl(int parseFlags, boolean optimize) { + public NumberParserImpl(int parseFlags, boolean computeLeads) { matchers = new ArrayList(); - if (optimize) { - leadCodePointses = new ArrayList(); + if (computeLeads) { + leads = new ArrayList(); } else { - leadCodePointses = null; + leads = null; } comparator = ParsedNumber.COMPARATOR; // default value this.parseFlags = parseFlags; @@ -297,21 +296,21 @@ public class NumberParserImpl { public void addMatcher(NumberParseMatcher matcher) { assert !frozen; this.matchers.add(matcher); - if (leadCodePointses != null) { + if (leads != null) { UnicodeSet leadCodePoints = matcher.getLeadCodePoints(); assert leadCodePoints.isFrozen(); - this.leadCodePointses.add(leadCodePoints); + this.leads.add(leadCodePoints); } } public void addMatchers(Collection matchers) { assert !frozen; this.matchers.addAll(matchers); - if (leadCodePointses != null) { + if (leads != null) { for (NumberParseMatcher matcher : matchers) { UnicodeSet leadCodePoints = matcher.getLeadCodePoints(); assert leadCodePoints.isFrozen(); - this.leadCodePointses.add(leadCodePoints); + this.leads.add(leadCodePoints); } } } @@ -366,7 +365,7 @@ public class NumberParserImpl { int initialOffset = segment.getOffset(); int leadCp = segment.getCodePoint(); for (int i = 0; i < matchers.size(); i++) { - if (leadCodePointses != null && !leadCodePointses.get(i).contains(leadCp)) { + if (leads != null && !leads.get(i).contains(leadCp)) { continue; } NumberParseMatcher matcher = matchers.get(i); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java index c4a0005c0e7..c4d11b8b4e7 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ParsingUtils.java @@ -17,12 +17,11 @@ public class ParsingUtils { public static final int PARSE_FLAG_STRICT_GROUPING_SIZE = 0x0008; public static final int PARSE_FLAG_INTEGER_ONLY = 0x0010; public static final int PARSE_FLAG_GROUPING_DISABLED = 0x0020; - public static final int PARSE_FLAG_DECIMAL_SCIENTIFIC = 0x0040; + public static final int PARSE_FLAG_FRACTION_GROUPING_DISABLED = 0x0040; public static final int PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES = 0x0080; public static final int PARSE_FLAG_USE_FULL_AFFIXES = 0x0100; public static final int PARSE_FLAG_EXACT_AFFIX = 0x0200; public static final int PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400; - public static final int PARSE_FLAG_FRACTION_GROUPING_DISABLED = 0x0800; public static void putLeadCodePoints(UnicodeSet input, UnicodeSet output) { for (EntryRange range : input.ranges()) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java index a6c053af7ea..1f51ae3e9cb 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/parse/ScientificMatcher.java @@ -27,13 +27,15 @@ public class ScientificMatcher implements NumberParseMatcher { exponentSeparatorString = ParsingUtils.maybeFold(symbols.getExponentSeparator(), parseFlags); exponentMatcher = DecimalMatcher.getInstance(symbols, grouper, - ParsingUtils.PARSE_FLAG_DECIMAL_SCIENTIFIC | ParsingUtils.PARSE_FLAG_INTEGER_ONLY); + ParsingUtils.PARSE_FLAG_INTEGER_ONLY); } @Override public boolean match(StringSegment segment, ParsedNumber result) { // Only accept scientific notation after the mantissa. - if (!result.seenNumber()) { + // Most places use result.hasNumber(), but we need a stronger condition here (i.e., exponent is + // not well-defined after NaN or infinity). + if (result.quantity == null) { return false; } @@ -54,16 +56,16 @@ public class ScientificMatcher implements NumberParseMatcher { } // Allow a sign, and then try to match digits. - boolean minusSign = false; + int exponentSign = 1; if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN).contains(leadCp)) { - minusSign = true; + exponentSign = -1; segment.adjustOffset(Character.charCount(leadCp)); } else if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN).contains(leadCp)) { segment.adjustOffset(Character.charCount(leadCp)); } int digitsOffset = segment.getOffset(); - boolean digitsReturnValue = exponentMatcher.match(segment, result, minusSign); + boolean digitsReturnValue = exponentMatcher.match(segment, result, exponentSign); if (segment.getOffset() != digitsOffset) { // At least one exponent digit was matched. result.flags |= ParsedNumber.FLAG_HAS_EXPONENT; diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DecimalFormatSymbols.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DecimalFormatSymbols.java index bed3481e953..07757d0b68d 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/DecimalFormatSymbols.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DecimalFormatSymbols.java @@ -1654,13 +1654,13 @@ public class DecimalFormatSymbols implements Cloneable, Serializable { * represents a sequence of ten code points in order. * *

If the value stored here is positive, it means that the code point stored in this value - * corresponds to the digitStrings array, and zeroCodePoint can be used instead of the + * corresponds to the digitStrings array, and codePointZero can be used instead of the * digitStrings array for the purposes of efficient formatting; if -1, then digitStrings does * *not* contain a sequence of code points, and it must be used directly. * - *

It is assumed that zeroCodePoint always shadows the value in digitStrings. zeroCodePoint + *

It is assumed that codePointZero always shadows the value in digitStrings. codePointZero * should never be set directly; rather, it should be updated only when digitStrings mutates. - * That is, the flow of information is digitStrings -> zeroCodePoint, not the other way. + * That is, the flow of information is digitStrings -> codePointZero, not the other way. */ private transient int codePointZero; diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDecimalFormatSymbols.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDecimalFormatSymbols.java index 724247a5854..0d6f7d0b1f5 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDecimalFormatSymbols.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/IntlTestDecimalFormatSymbols.java @@ -296,6 +296,9 @@ public class IntlTestDecimalFormatSymbols extends TestFmwk final String[] differentDigitStrings = {"0", "b", "3", "d", "5", "ff", "7", "h", "9", "j"}; DecimalFormatSymbols symbols = new DecimalFormatSymbols(Locale.ENGLISH); + if (defZero != symbols.getCodePointZero()) { + errln("ERROR: Code point zero initialize to ASCII 0"); + } symbols.setDigitStrings(osmanyaDigitStrings); if (!Arrays.equals(symbols.getDigitStrings(), osmanyaDigitStrings)) { diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java index 5bb123968b6..41cc2d79350 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberParserTest.java @@ -3,7 +3,6 @@ package com.ibm.icu.dev.test.number; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import org.junit.Test; @@ -86,30 +85,32 @@ public class NumberParserTest { { 3, "📻1.23", "📺0;📻0", 6, -1.23 }, { 3, ".00", "0", 3, 0.0 }, { 3, " 0", "a0", 31, 0.0 }, // should not hang + { 3, "NaN", "0", 3, Double.NaN }, + { 3, "NaN E5", "0", 3, Double.NaN }, { 3, "0", "0", 1, 0.0 } }; int parseFlags = ParsingUtils.PARSE_FLAG_IGNORE_CASE | ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES; for (Object[] cas : cases) { int flags = (Integer) cas[0]; - String input = (String) cas[1]; - String pattern = (String) cas[2]; + String inputString = (String) cas[1]; + String patternString = (String) cas[2]; int expectedCharsConsumed = (Integer) cas[3]; - double resultDouble = (Double) cas[4]; + double expectedResultDouble = (Double) cas[4]; NumberParserImpl parser = NumberParserImpl - .createSimpleParser(ULocale.ENGLISH, pattern, parseFlags); - String message = "Input <" + input + "> Parser " + parser; + .createSimpleParser(ULocale.ENGLISH, patternString, parseFlags); + String message = "Input <" + inputString + "> Parser " + parser; if (0 != (flags & 0x01)) { // Test greedy code path ParsedNumber resultObject = new ParsedNumber(); - parser.parse(input, true, resultObject); - assertNotNull("Greedy Parse failed: " + message, resultObject.quantity); + parser.parse(inputString, true, resultObject); + assertTrue("Greedy Parse failed: " + message, resultObject.success()); assertEquals("Greedy Parse failed: " + message, expectedCharsConsumed, resultObject.charEnd); assertEquals("Greedy Parse failed: " + message, - resultDouble, + expectedResultDouble, resultObject.getNumber().doubleValue(), 0.0); } @@ -117,13 +118,13 @@ public class NumberParserTest { if (0 != (flags & 0x02)) { // Test slow code path ParsedNumber resultObject = new ParsedNumber(); - parser.parse(input, false, resultObject); - assertNotNull("Non-Greedy Parse failed: " + message, resultObject.quantity); + parser.parse(inputString, false, resultObject); + assertTrue("Non-Greedy Parse failed: " + message, resultObject.success()); assertEquals("Non-Greedy Parse failed: " + message, expectedCharsConsumed, resultObject.charEnd); assertEquals("Non-Greedy Parse failed: " + message, - resultDouble, + expectedResultDouble, resultObject.getNumber().doubleValue(), 0.0); } @@ -131,16 +132,16 @@ public class NumberParserTest { if (0 != (flags & 0x04)) { // Test with strict separators parser = NumberParserImpl.createSimpleParser(ULocale.ENGLISH, - pattern, + patternString, parseFlags | ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE); ParsedNumber resultObject = new ParsedNumber(); - parser.parse(input, true, resultObject); - assertNotNull("Strict Parse failed: " + message, resultObject.quantity); + parser.parse(inputString, true, resultObject); + assertTrue("Strict Parse failed: " + message, resultObject.success()); assertEquals("Strict Parse failed: " + message, expectedCharsConsumed, resultObject.charEnd); assertEquals("Strict Parse failed: " + message, - resultDouble, + expectedResultDouble, resultObject.getNumber().doubleValue(), 0.0); }