ICU-13574 Checkpoint commit. Basic NumberParseMatcher implementations in DecimalMatcher and SymbolMatcher. Cleanup in ICU4J.

X-SVN-Rev: 40869
This commit is contained in:
Shane Carr 2018-02-08 08:49:50 +00:00
parent 9337205a54
commit 2ee42b9288
27 changed files with 1112 additions and 151 deletions

View file

@ -108,7 +108,8 @@ number_decimfmtprops.o number_fluent.o number_formatimpl.o number_grouping.o \
number_integerwidth.o number_longnames.o number_modifiers.o number_notation.o \
number_padding.o number_patternmodifier.o number_patternstring.o \
number_rounding.o number_scientific.o number_stringbuilder.o \
numparse_stringsegment.o numparse_unisets.o numparse_parsednumber.o
numparse_stringsegment.o numparse_unisets.o numparse_parsednumber.o \
numparse_impl.o numparse_symbols.o numparse_decimal.o
## Header files to install

View file

@ -190,6 +190,7 @@ int32_t DecimalQuantity::getMagnitude() const {
void DecimalQuantity::adjustMagnitude(int32_t delta) {
if (precision != 0) {
// TODO: How to handle overflow cases?
scale += delta;
origDelta += delta;
}

View file

@ -38,9 +38,9 @@ enum CldrPatternStyle {
// TODO: Consider scientific format.
};
const char16_t *
doGetPattern(UResourceBundle *res, const char *nsName, const char *patternKey, UErrorCode &publicStatus,
UErrorCode &localStatus) {
const char16_t*
doGetPattern(UResourceBundle* res, const char* nsName, const char* patternKey, UErrorCode& publicStatus,
UErrorCode& localStatus) {
// Construct the path into the resource bundle
CharString key;
key.append("NumberElements/", publicStatus);
@ -53,9 +53,9 @@ doGetPattern(UResourceBundle *res, const char *nsName, const char *patternKey, U
return ures_getStringByKeyWithFallback(res, key.data(), nullptr, &localStatus);
}
const char16_t *getPatternForStyle(const Locale &locale, const char *nsName, CldrPatternStyle style,
UErrorCode &status) {
const char *patternKey;
const char16_t* getPatternForStyle(const Locale& locale, const char* nsName, CldrPatternStyle style,
UErrorCode& status) {
const char* patternKey;
switch (style) {
case CLDR_PATTERN_STYLE_DECIMAL:
patternKey = "decimalFormat";
@ -76,7 +76,7 @@ const char16_t *getPatternForStyle(const Locale &locale, const char *nsName, Cld
// Attempt to get the pattern with the native numbering system.
UErrorCode localStatus = U_ZERO_ERROR;
const char16_t *pattern;
const char16_t* pattern;
pattern = doGetPattern(res.getAlias(), nsName, patternKey, status, localStatus);
if (U_FAILURE(status)) { return u""; }
@ -96,18 +96,21 @@ struct CurrencyFormatInfoResult {
const char16_t* decimalSeparator;
const char16_t* groupingSeparator;
};
CurrencyFormatInfoResult getCurrencyFormatInfo(const Locale& locale, const char* isoCode, UErrorCode& status) {
CurrencyFormatInfoResult
getCurrencyFormatInfo(const Locale& locale, const char* isoCode, UErrorCode& status) {
// TODO: Load this data in a centralized location like ICU4J?
// TODO: Parts of this same data are loaded in dcfmtsym.cpp; should clean up.
CurrencyFormatInfoResult result = { false, nullptr, nullptr, nullptr };
if (U_FAILURE(status)) return result;
CurrencyFormatInfoResult result = {false, nullptr, nullptr, nullptr};
if (U_FAILURE(status)) { return result; }
CharString key;
key.append("Currencies/", status);
key.append(isoCode, status);
UErrorCode localStatus = status;
LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_CURR, locale.getName(), &localStatus));
ures_getByKeyWithFallback(bundle.getAlias(), key.data(), bundle.getAlias(), &localStatus);
if (U_SUCCESS(localStatus) && ures_getSize(bundle.getAlias())>2) { // the length is 3 if more data is present
if (U_SUCCESS(localStatus) &&
ures_getSize(bundle.getAlias()) > 2) { // the length is 3 if more data is present
ures_getByIndex(bundle.getAlias(), 2, bundle.getAlias(), &localStatus);
int32_t dummy;
result.exists = true;
@ -121,30 +124,30 @@ CurrencyFormatInfoResult getCurrencyFormatInfo(const Locale& locale, const char*
return result;
}
inline bool unitIsCurrency(const MeasureUnit &unit) {
inline bool unitIsCurrency(const MeasureUnit& unit) {
return uprv_strcmp("currency", unit.getType()) == 0;
}
inline bool unitIsNoUnit(const MeasureUnit &unit) {
inline bool unitIsNoUnit(const MeasureUnit& unit) {
return uprv_strcmp("none", unit.getType()) == 0;
}
inline bool unitIsPercent(const MeasureUnit &unit) {
inline bool unitIsPercent(const MeasureUnit& unit) {
return uprv_strcmp("percent", unit.getSubtype()) == 0;
}
inline bool unitIsPermille(const MeasureUnit &unit) {
inline bool unitIsPermille(const MeasureUnit& unit) {
return uprv_strcmp("permille", unit.getSubtype()) == 0;
}
} // namespace
NumberFormatterImpl *NumberFormatterImpl::fromMacros(const MacroProps &macros, UErrorCode &status) {
NumberFormatterImpl* NumberFormatterImpl::fromMacros(const MacroProps& macros, UErrorCode& status) {
return new NumberFormatterImpl(macros, true, status);
}
void NumberFormatterImpl::applyStatic(const MacroProps &macros, DecimalQuantity &inValue,
NumberStringBuilder &outString, UErrorCode &status) {
void NumberFormatterImpl::applyStatic(const MacroProps& macros, DecimalQuantity& inValue,
NumberStringBuilder& outString, UErrorCode& status) {
NumberFormatterImpl impl(macros, false, status);
impl.applyUnsafe(inValue, outString, status);
}
@ -154,8 +157,8 @@ void NumberFormatterImpl::applyStatic(const MacroProps &macros, DecimalQuantity
// The "unsafe" method simply re-uses fMicros, eliminating the extra copy operation.
// See MicroProps::processQuantity() for details.
void NumberFormatterImpl::apply(DecimalQuantity &inValue, NumberStringBuilder &outString,
UErrorCode &status) const {
void NumberFormatterImpl::apply(DecimalQuantity& inValue, NumberStringBuilder& outString,
UErrorCode& status) const {
if (U_FAILURE(status)) { return; }
MicroProps micros;
fMicroPropsGenerator->processQuantity(inValue, micros, status);
@ -163,23 +166,23 @@ void NumberFormatterImpl::apply(DecimalQuantity &inValue, NumberStringBuilder &o
microsToString(micros, inValue, outString, status);
}
void NumberFormatterImpl::applyUnsafe(DecimalQuantity &inValue, NumberStringBuilder &outString,
UErrorCode &status) {
void NumberFormatterImpl::applyUnsafe(DecimalQuantity& inValue, NumberStringBuilder& outString,
UErrorCode& status) {
if (U_FAILURE(status)) { return; }
fMicroPropsGenerator->processQuantity(inValue, fMicros, status);
if (U_FAILURE(status)) { return; }
microsToString(fMicros, inValue, outString, status);
}
NumberFormatterImpl::NumberFormatterImpl(const MacroProps &macros, bool safe, UErrorCode &status) {
NumberFormatterImpl::NumberFormatterImpl(const MacroProps& macros, bool safe, UErrorCode& status) {
fMicroPropsGenerator = macrosToMicroGenerator(macros, safe, status);
}
//////////
const MicroPropsGenerator *
NumberFormatterImpl::macrosToMicroGenerator(const MacroProps &macros, bool safe, UErrorCode &status) {
const MicroPropsGenerator *chain = &fMicros;
const MicroPropsGenerator*
NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, UErrorCode& status) {
const MicroPropsGenerator* chain = &fMicros;
// Check that macros is error-free before continuing.
if (macros.copyErrorTo(status)) {
@ -194,9 +197,9 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps &macros, bool safe,
bool isPercent = isNoUnit && unitIsPercent(macros.unit);
bool isPermille = isNoUnit && unitIsPermille(macros.unit);
bool isCldrUnit = !isCurrency && !isNoUnit;
bool isAccounting = macros.sign == UNUM_SIGN_ACCOUNTING
|| macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS
|| macros.sign == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO;
bool isAccounting =
macros.sign == UNUM_SIGN_ACCOUNTING || macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS ||
macros.sign == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO;
CurrencyUnit currency(kDefaultCurrency, status);
if (isCurrency) {
currency = CurrencyUnit(macros.unit, status); // Restore CurrencyUnit from MeasureUnit
@ -208,7 +211,7 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps &macros, bool safe,
// Select the numbering system.
LocalPointer<const NumberingSystem> nsLocal;
const NumberingSystem *ns;
const NumberingSystem* ns;
if (macros.symbols.isNumberingSystem()) {
ns = macros.symbols.getNumberingSystem();
} else {
@ -217,7 +220,7 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps &macros, bool safe,
// Give ownership to the function scope.
nsLocal.adoptInstead(ns);
}
const char *nsName = U_SUCCESS(status) ? ns->getName() : "latn";
const char* nsName = U_SUCCESS(status) ? ns->getName() : "latn";
// Resolve the symbols. Do this here because currency may need to customize them.
if (macros.symbols.isDecimalFormatSymbols()) {
@ -232,7 +235,8 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps &macros, bool safe,
// If we are formatting currency, check for a currency-specific pattern.
const char16_t* pattern = nullptr;
if (isCurrency) {
CurrencyFormatInfoResult info = getCurrencyFormatInfo(macros.locale, currency.getSubtype(), status);
CurrencyFormatInfoResult info = getCurrencyFormatInfo(
macros.locale, currency.getSubtype(), status);
if (info.exists) {
pattern = info.pattern;
// It's clunky to clone an object here, but this code is not frequently executed.
@ -240,13 +244,13 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps &macros, bool safe,
fMicros.symbols = symbols;
fSymbols.adoptInstead(symbols);
symbols->setSymbol(
DecimalFormatSymbols::ENumberFormatSymbol::kMonetarySeparatorSymbol,
UnicodeString(info.decimalSeparator),
FALSE);
DecimalFormatSymbols::ENumberFormatSymbol::kMonetarySeparatorSymbol,
UnicodeString(info.decimalSeparator),
FALSE);
symbols->setSymbol(
DecimalFormatSymbols::ENumberFormatSymbol::kMonetaryGroupingSeparatorSymbol,
UnicodeString(info.groupingSeparator),
FALSE);
DecimalFormatSymbols::ENumberFormatSymbol::kMonetaryGroupingSeparatorSymbol,
UnicodeString(info.groupingSeparator),
FALSE);
}
}
if (pattern == nullptr) {
@ -407,9 +411,9 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps &macros, bool safe,
return chain;
}
const PluralRules *
NumberFormatterImpl::resolvePluralRules(const PluralRules *rulesPtr, const Locale &locale,
UErrorCode &status) {
const PluralRules*
NumberFormatterImpl::resolvePluralRules(const PluralRules* rulesPtr, const Locale& locale,
UErrorCode& status) {
if (rulesPtr != nullptr) {
return rulesPtr;
}
@ -420,8 +424,8 @@ NumberFormatterImpl::resolvePluralRules(const PluralRules *rulesPtr, const Local
return fRules.getAlias();
}
int32_t NumberFormatterImpl::microsToString(const MicroProps &micros, DecimalQuantity &quantity,
NumberStringBuilder &string, UErrorCode &status) {
int32_t NumberFormatterImpl::microsToString(const MicroProps& micros, DecimalQuantity& quantity,
NumberStringBuilder& string, UErrorCode& status) {
micros.rounding.apply(quantity, status);
micros.integerWidth.apply(quantity, status);
int32_t length = writeNumber(micros, quantity, string, status);
@ -439,8 +443,8 @@ int32_t NumberFormatterImpl::microsToString(const MicroProps &micros, DecimalQua
return length;
}
int32_t NumberFormatterImpl::writeNumber(const MicroProps &micros, DecimalQuantity &quantity,
NumberStringBuilder &string, UErrorCode &status) {
int32_t NumberFormatterImpl::writeNumber(const MicroProps& micros, DecimalQuantity& quantity,
NumberStringBuilder& string, UErrorCode& status) {
int32_t length = 0;
if (quantity.isInfinite()) {
length += string.insert(
@ -480,8 +484,8 @@ int32_t NumberFormatterImpl::writeNumber(const MicroProps &micros, DecimalQuanti
return length;
}
int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps &micros, DecimalQuantity &quantity,
NumberStringBuilder &string, UErrorCode &status) {
int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps& micros, DecimalQuantity& quantity,
NumberStringBuilder& string, UErrorCode& status) {
int length = 0;
int integerCount = quantity.getUpperDisplayMagnitude() + 1;
for (int i = 0; i < integerCount; i++) {
@ -499,21 +503,21 @@ int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps &micros, Decima
// Get and append the next digit value
int8_t nextDigit = quantity.getDigit(i);
length += string.insert(
0, getDigitFromSymbols(nextDigit, *micros.symbols), UNUM_INTEGER_FIELD, status);
length += insertDigitFromSymbols(
string, 0, nextDigit, *micros.symbols, UNUM_INTEGER_FIELD, status);
}
return length;
}
int32_t NumberFormatterImpl::writeFractionDigits(const MicroProps &micros, DecimalQuantity &quantity,
NumberStringBuilder &string, UErrorCode &status) {
int32_t NumberFormatterImpl::writeFractionDigits(const MicroProps& micros, DecimalQuantity& quantity,
NumberStringBuilder& string, UErrorCode& status) {
int length = 0;
int fractionCount = -quantity.getLowerDisplayMagnitude();
for (int i = 0; i < fractionCount; i++) {
// Get and append the next digit value
int8_t nextDigit = quantity.getDigit(-i - 1);
length += string.append(
getDigitFromSymbols(nextDigit, *micros.symbols), UNUM_FRACTION_FIELD, status);
length += insertDigitFromSymbols(
string, string.length(), nextDigit, *micros.symbols, UNUM_INTEGER_FIELD, status);
}
return length;
}

View file

@ -86,4 +86,12 @@ bool Grouper::groupAtPosition(int32_t position, const impl::DecimalQuantity &val
&& value.getUpperDisplayMagnitude() - fGrouping1 + 1 >= fMinGrouping;
}
int16_t Grouper::getPrimary() const {
return fGrouping1;
}
int16_t Grouper::getSecondary() const {
return fGrouping2;
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -64,8 +64,7 @@ int32_t ScientificModifier::apply(NumberStringBuilder &output, int32_t /*leftInd
int32_t disp = std::abs(fExponent);
for (int j = 0; j < fHandler->fSettings.fMinExponentDigits || disp > 0; j++, disp /= 10) {
auto d = static_cast<int8_t>(disp % 10);
const UnicodeString &digitString = getDigitFromSymbols(d, *fHandler->fSymbols);
i += output.insert(i - j, digitString, UNUM_EXPONENT_FIELD, status);
i += insertDigitFromSymbols(output, i - j, d, *fHandler->fSymbols, UNUM_EXPONENT_FIELD, status);
}
return i - rightIndex;
}

View file

@ -19,7 +19,7 @@ namespace impl {
class UnicodeStringCharSequence : public CharSequence {
public:
explicit UnicodeStringCharSequence(const UnicodeString &other) {
explicit UnicodeStringCharSequence(const UnicodeString& other) {
fStr = other;
}
@ -62,10 +62,10 @@ struct MicroProps : public MicroPropsGenerator {
bool useCurrency;
// Note: This struct has no direct ownership of the following pointers.
const DecimalFormatSymbols *symbols;
const Modifier *modOuter;
const Modifier *modMiddle;
const Modifier *modInner;
const DecimalFormatSymbols* symbols;
const Modifier* modOuter;
const Modifier* modMiddle;
const Modifier* modInner;
// The following "helper" fields may optionally be used during the MicroPropsGenerator.
// They live here to retain memory.
@ -78,12 +78,12 @@ struct MicroProps : public MicroPropsGenerator {
MicroProps() = default;
MicroProps(const MicroProps &other) = default;
MicroProps(const MicroProps& other) = default;
MicroProps &operator=(const MicroProps &other) = default;
MicroProps& operator=(const MicroProps& other) = default;
void processQuantity(DecimalQuantity &, MicroProps &micros, UErrorCode &status) const U_OVERRIDE {
(void)status;
void processQuantity(DecimalQuantity&, MicroProps& micros, UErrorCode& status) const U_OVERRIDE {
(void) status;
if (this == &micros) {
// Unsafe path: no need to perform a copy.
U_ASSERT(!exhausted);
@ -111,14 +111,13 @@ struct NumberFormatterResults : public UMemory {
NumberStringBuilder string;
};
inline const UnicodeString getDigitFromSymbols(int8_t digit, const DecimalFormatSymbols &symbols) {
// TODO: Implement DecimalFormatSymbols.getCodePointZero()?
if (digit == 0) {
return symbols.getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kZeroDigitSymbol);
} else {
return symbols.getSymbol(static_cast<DecimalFormatSymbols::ENumberFormatSymbol>(
DecimalFormatSymbols::ENumberFormatSymbol::kOneDigitSymbol + digit - 1));
inline int32_t insertDigitFromSymbols(NumberStringBuilder& output, int32_t index, int8_t digit,
const DecimalFormatSymbols& symbols, Field field,
UErrorCode& status) {
if (symbols.getCodePointZero() != -1) {
return output.insertCodePoint(index, symbols.getCodePointZero() + digit, field, status);
}
return output.insert(index, symbols.getConstDigitSymbol(digit), field, status);
}
} // namespace impl

View file

@ -0,0 +1,313 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
#include "numparse_types.h"
#include "numparse_decimal.h"
#include "numparse_unisets.h"
#include "numparse_utils.h"
#include "unicode/uchar.h"
using namespace icu;
using namespace icu::numparse;
using namespace icu::numparse::impl;
DecimalMatcher::DecimalMatcher(const DecimalFormatSymbols& symbols, const Grouper& grouper,
parse_flags_t parseFlags) {
if (0 != (parseFlags & PARSE_FLAG_MONETARY_SEPARATORS)) {
groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetaryGroupingSeparatorSymbol);
decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol);
} else {
groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol);
decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
}
bool strictSeparators = 0 != (parseFlags & PARSE_FLAG_STRICT_SEPARATORS);
unisets::Key groupingKey = strictSeparators ? unisets::STRICT_ALL_SEPARATORS
: unisets::ALL_SEPARATORS;
// Attempt to find separators in the static cache
groupingUniSet = unisets::get(groupingKey);
unisets::Key decimalKey = unisets::chooseFrom(
decimalSeparator,
strictSeparators ? unisets::STRICT_COMMA : unisets::COMMA,
strictSeparators ? unisets::STRICT_PERIOD : unisets::PERIOD);
if (decimalKey != unisets::COUNT) {
decimalUniSet = unisets::get(decimalKey);
} else {
auto* set = new UnicodeSet();
set->add(decimalSeparator.char32At(0));
set->freeze();
decimalUniSet = set;
fLocalDecimalUniSet.adoptInstead(set);
}
if (groupingKey != unisets::COUNT && decimalKey != unisets::COUNT) {
// Everything is available in the static cache
separatorSet = groupingUniSet;
leadSet = unisets::get(
strictSeparators ? unisets::DIGITS_OR_ALL_SEPARATORS
: unisets::DIGITS_OR_STRICT_ALL_SEPARATORS);
} else {
auto* set = new UnicodeSet();
set->addAll(*groupingUniSet);
set->addAll(*decimalUniSet);
set->freeze();
separatorSet = set;
fLocalSeparatorSet.adoptInstead(set);
leadSet = nullptr;
}
int cpZero = symbols.getCodePointZero();
if (cpZero == -1 || !u_isdigit(cpZero) || u_digit(cpZero, 10) != 0) {
// Uncommon case: okay to allocate.
auto digitStrings = new UnicodeString[10];
fLocalDigitStrings.adoptInstead(digitStrings);
for (int32_t i = 0; i <= 9; i++) {
digitStrings[i] = symbols.getConstDigitSymbol(i);
}
}
requireGroupingMatch = 0 != (parseFlags & PARSE_FLAG_STRICT_GROUPING_SIZE);
groupingDisabled = 0 != (parseFlags & PARSE_FLAG_GROUPING_DISABLED);
fractionGroupingDisabled = 0 != (
parseFlags & PARSE_FLAG_FRACTION_GROUPING_DISABLED);
integerOnly = 0 != (parseFlags & PARSE_FLAG_INTEGER_ONLY);
grouping1 = grouper.getPrimary();
grouping2 = grouper.getSecondary();
}
bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
return match(segment, result, 0, status);
}
bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign,
UErrorCode&) const {
if (result.seenNumber() && exponentSign == 0) {
// A number has already been consumed.
return false;
} else if (exponentSign != 0) {
// scientific notation always comes after the number
U_ASSERT(!result.quantity.bogus);
}
ParsedNumber backupResult(result);
// strict parsing
bool strictFail = false; // did we exit with a strict parse failure?
UnicodeString actualGroupingString = groupingSeparator;
UnicodeString actualDecimalString = decimalSeparator;
int32_t groupedDigitCount = 0; // tracking count of digits delimited by grouping separator
int32_t backupOffset = -1; // used for preserving the last confirmed position
bool afterFirstGrouping = false;
bool seenGrouping = false;
bool seenDecimal = false;
int32_t digitsAfterDecimal = 0;
int32_t initialOffset = segment.getOffset();
int32_t exponent = 0;
bool hasPartialPrefix = false;
while (segment.length() > 0) {
hasPartialPrefix = false;
// Attempt to match a digit.
int8_t digit = -1;
// Try by code point digit value.
int cp = segment.getCodePoint();
if (u_isdigit(cp)) {
segment.adjustOffset(U16_LENGTH(cp));
digit = static_cast<int8_t>(u_digit(cp, 10));
}
// Try by digit string.
if (digit == -1 && !fLocalDigitStrings.isNull()) {
for (int i = 0; i < 10; i++) {
const UnicodeString& str = fLocalDigitStrings[i];
int overlap = segment.getCommonPrefixLength(str);
if (overlap == str.length()) {
segment.adjustOffset(overlap);
digit = static_cast<int8_t>(i);
break;
} else if (overlap == segment.length()) {
hasPartialPrefix = true;
}
}
}
if (digit >= 0) {
// Digit was found.
// Check for grouping size violation
if (backupOffset != -1) {
if (requireGroupingMatch) {
// comma followed by digit, so group before comma is a secondary
// group. If there was a group separator before that, the group
// must == the secondary group length, else it can be <= the the
// secondary group length.
if ((afterFirstGrouping && groupedDigitCount != grouping2) ||
(!afterFirstGrouping && groupedDigitCount > grouping2)) {
strictFail = true;
break;
}
}
afterFirstGrouping = true;
backupOffset = -1;
groupedDigitCount = 0;
}
// Save the digit in the DecimalQuantity or scientific adjustment.
if (exponentSign != 0) {
int nextExponent = digit + exponent * 10;
if (nextExponent < exponent) {
// Overflow
exponent = INT32_MAX;
} else {
exponent = nextExponent;
}
} else {
if (result.quantity.bogus) {
result.quantity.bogus = false;
}
result.quantity.appendDigit(digit, 0, true);
}
result.setCharsConsumed(segment);
groupedDigitCount++;
if (seenDecimal) {
digitsAfterDecimal++;
}
continue;
}
// Attempt to match a literal grouping or decimal separator
int32_t decimalOverlap = segment.getCommonPrefixLength(actualDecimalString);
bool decimalStringMatch = decimalOverlap == actualDecimalString.length();
int32_t groupingOverlap = segment.getCommonPrefixLength(actualGroupingString);
bool groupingStringMatch = groupingOverlap == actualGroupingString.length();
hasPartialPrefix = (decimalOverlap == segment.length()) || (groupingOverlap == segment.length());
if (!seenDecimal && !groupingStringMatch &&
(decimalStringMatch || (!seenDecimal && decimalUniSet->contains(cp)))) {
// matched a decimal separator
if (requireGroupingMatch) {
if (backupOffset != -1 || (seenGrouping && groupedDigitCount != grouping1)) {
strictFail = true;
break;
}
}
// If we're only parsing integers, then don't parse this one.
if (integerOnly) {
break;
}
seenDecimal = true;
if (!decimalStringMatch) {
actualDecimalString = UnicodeString(cp);
}
segment.adjustOffset(actualDecimalString.length());
result.setCharsConsumed(segment);
result.flags |= FLAG_HAS_DECIMAL_SEPARATOR;
continue;
}
if (!groupingDisabled && !decimalStringMatch &&
(groupingStringMatch || (!seenGrouping && groupingUniSet->contains(cp)))) {
// matched a grouping separator
if (requireGroupingMatch) {
if (groupedDigitCount == 0) {
// leading group
strictFail = true;
break;
} else if (backupOffset != -1) {
// two group separators in a row
break;
}
}
if (fractionGroupingDisabled && seenDecimal) {
// Stop parsing here.
break;
}
seenGrouping = true;
if (!groupingStringMatch) {
actualGroupingString = UnicodeString(cp);
}
backupOffset = segment.getOffset();
segment.adjustOffset(actualGroupingString.length());
// Note: do NOT set charsConsumed
continue;
}
// Not a digit and not a separator
break;
}
// Check the final grouping for validity
if (requireGroupingMatch && !seenDecimal && seenGrouping && afterFirstGrouping &&
groupedDigitCount != grouping1) {
strictFail = true;
}
if (requireGroupingMatch && strictFail) {
result = backupResult;
segment.setOffset(initialOffset);
}
if (result.quantity.bogus && segment.getOffset() != initialOffset) {
// Strings that start with a separator but have no digits.
// We don't need a backup of ParsedNumber because no changes could have been made to it.
segment.setOffset(initialOffset);
hasPartialPrefix = true;
}
if (!result.quantity.bogus) {
// The final separator was a decimal separator.
result.quantity.adjustMagnitude(-digitsAfterDecimal);
}
if (exponentSign != 0 && segment.getOffset() != initialOffset) {
U_ASSERT(!result.quantity.bogus);
bool overflow = (exponent == INT32_MAX);
if (!overflow) {
result.quantity.adjustMagnitude(exponentSign * exponent);
}
if (overflow) {
if (exponentSign == -1) {
// Set to zero
result.quantity.clear();
} else {
// Set to infinity
result.quantity.bogus = true;
result.flags |= FLAG_INFINITY;
}
}
}
return segment.length() == 0 || hasPartialPrefix;
}
const UnicodeSet* DecimalMatcher::getLeadCodePoints() const {
if (fLocalDigitStrings.isNull() && leadSet != nullptr) {
return new UnicodeSet(*leadSet);
}
auto* leadCodePoints = new UnicodeSet();
// Assumption: the sets are all single code points.
leadCodePoints->addAll(*unisets::get(unisets::DIGITS));
leadCodePoints->addAll(*separatorSet);
if (!fLocalDigitStrings.isNull()) {
for (int i = 0; i < 10; i++) {
utils::putLeadCodePoint(fLocalDigitStrings[i], leadCodePoints);
}
}
leadCodePoints->freeze();
return leadCodePoints;
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -0,0 +1,69 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
#ifndef __NUMPARSE_DECIMAL_H__
#define __NUMPARSE_DECIMAL_H__
#include "unicode/uniset.h"
#include "numparse_types.h"
U_NAMESPACE_BEGIN namespace numparse {
namespace impl {
using ::icu::number::impl::Grouper;
class DecimalMatcher : public NumberParseMatcher, public UMemory {
public:
DecimalMatcher(const DecimalFormatSymbols& symbols, const Grouper& grouper,
parse_flags_t parseFlags);
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
bool
match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign, UErrorCode& status) const;
const UnicodeSet* getLeadCodePoints() const override;
private:
/** If true, only accept strings whose grouping sizes match the locale */
bool requireGroupingMatch;
/** If true, do not accept grouping separators at all */
bool groupingDisabled;
/** If true, do not accept fraction grouping separators */
bool fractionGroupingDisabled;
/** If true, do not accept numbers in the fraction */
bool integerOnly;
int16_t grouping1;
int16_t grouping2;
UnicodeString groupingSeparator;
UnicodeString decimalSeparator;
// Assumption: these sets all consist of single code points. If this assumption needs to be broken,
// fix getLeadCodePoints() as well as matching logic. Be careful of the performance impact.
const UnicodeSet* groupingUniSet;
const UnicodeSet* decimalUniSet;
const UnicodeSet* separatorSet;
const UnicodeSet* leadSet;
// Make this class the owner of a few objects that could be allocated.
// The first two LocalPointers are used for assigning ownership only.
LocalPointer<const UnicodeSet> fLocalDecimalUniSet;
LocalPointer<const UnicodeSet> fLocalSeparatorSet;
LocalArray<const UnicodeString> fLocalDigitStrings;
};
} // namespace impl
} // namespace numparse
U_NAMESPACE_END
#endif //__NUMPARSE_DECIMAL_H__
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -0,0 +1,113 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
#include "number_types.h"
#include "number_patternstring.h"
#include "numparse_types.h"
#include "numparse_impl.h"
#include "numparse_symbols.h"
#include "numparse_decimal.h"
#include "unicode/numberformatter.h"
using namespace icu;
using namespace icu::number;
using namespace icu::number::impl;
using namespace icu::numparse;
using namespace icu::numparse::impl;
NumberParserImpl*
NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString& patternString,
parse_flags_t parseFlags, UErrorCode& status) {
auto* parser = new NumberParserImpl(parseFlags, true);
DecimalFormatSymbols symbols(locale, status);
// IgnorablesMatcher* ignorables = IgnorablesMatcher.getDefault();
//
// MatcherFactory factory = new MatcherFactory();
// factory.currency = Currency.getInstance("USD");
// factory.symbols = symbols;
// factory.ignorables = ignorables;
// factory.locale = locale;
// factory.parseFlags = parseFlags;
ParsedPatternInfo patternInfo;
PatternParser::parseToPatternInfo(patternString, patternInfo, status);
// AffixMatcher.createMatchers(patternInfo, parser, factory, ignorables, parseFlags);
Grouper grouper = Grouper::forStrategy(UNUM_GROUPING_AUTO);
grouper.setLocaleData(patternInfo, locale);
// parser.addMatcher({ignorables, false});
parser->addAndAdoptMatcher(new DecimalMatcher(symbols, grouper, parseFlags));
parser->addAndAdoptMatcher(new MinusSignMatcher(symbols, false));
// parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
// parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags));
// parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
// parser.addMatcher(new RequireNumberMatcher());
parser->freeze();
return parser;
}
NumberParserImpl::NumberParserImpl(parse_flags_t parseFlags, bool computeLeads)
: fParseFlags(parseFlags), fComputeLeads(computeLeads) {
}
NumberParserImpl::~NumberParserImpl() {
for (int32_t i = 0; i < fNumMatchers; i++) {
delete (fMatchers[i]);
if (fComputeLeads) {
delete (fLeads[i]);
}
}
fNumMatchers = 0;
}
void NumberParserImpl::addAndAdoptMatcher(const NumberParseMatcher* matcher) {
if (fNumMatchers + 1 > fMatchers.getCapacity()) {
fMatchers.resize(fNumMatchers * 2, fNumMatchers);
if (fComputeLeads) {
// The two arrays should grow in tandem:
U_ASSERT(fNumMatchers >= fLeads.getCapacity());
fLeads.resize(fNumMatchers * 2, fNumMatchers);
}
}
fMatchers[fNumMatchers] = matcher;
if (fComputeLeads) {
fLeads[fNumMatchers] = matcher->getLeadCodePoints();
}
fNumMatchers++;
}
void NumberParserImpl::freeze() {
fFrozen = true;
}
//void
//NumberParserImpl::parse(const UnicodeString& input, int32_t start, bool greedy, ParsedNumber& result,
// UErrorCode& status) const {
// U_ASSERT(frozen);
// // TODO: Check start >= 0 and start < input.length()
// StringSegment segment(utils::maybeFold(input, parseFlags));
// segment.adjustOffset(start);
// if (greedy) {
// parseGreedyRecursive(segment, result);
// } else {
// parseLongestRecursive(segment, result);
// }
// for (NumberParseMatcher matcher : matchers) {
// matcher.postProcess(result);
// }
//}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -0,0 +1,56 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
#ifndef __NUMPARSE_IMPL_H__
#define __NUMPARSE_IMPL_H__
#include "numparse_types.h"
#include "unicode/uniset.h"
U_NAMESPACE_BEGIN namespace numparse {
namespace impl {
class NumberParserImpl {
public:
static NumberParserImpl* createSimpleParser(const Locale& locale, const UnicodeString& patternString,
parse_flags_t parseFlags, UErrorCode& status);
void addAndAdoptMatcher(const NumberParseMatcher* matcher);
void freeze();
void parse(const UnicodeString& input, bool greedy, ParsedNumber& result, UErrorCode& status) const;
void parse(const UnicodeString& input, int32_t start, bool greedy, ParsedNumber& result,
UErrorCode& status) const;
UnicodeString toString() const;
private:
parse_flags_t fParseFlags;
int32_t fNumMatchers = 0;
// NOTE: The stack capacity for fMatchers and fLeads should be the same
MaybeStackArray<const NumberParseMatcher*, 10> fMatchers;
MaybeStackArray<const UnicodeSet*, 10> fLeads;
bool fComputeLeads;
bool fFrozen = false;
NumberParserImpl(parse_flags_t parseFlags, bool computeLeads);
~NumberParserImpl();
void parseGreedyRecursive(StringSegment& segment, ParsedNumber& result) const;
void parseLongestRecursive(StringSegment& segment, ParsedNumber& result) const;
};
} // namespace impl
} // namespace numparse
U_NAMESPACE_END
#endif //__NUMPARSE_IMPL_H__
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -0,0 +1,95 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
#include "numparse_types.h"
#include "numparse_symbols.h"
#include "numparse_utils.h"
using namespace icu;
using namespace icu::numparse;
using namespace icu::numparse::impl;
SymbolMatcher::SymbolMatcher(const UnicodeString& symbolString, unisets::Key key) {
fUniSet = unisets::get(key);
fOwnsUniSet = false;
if (fUniSet->contains(symbolString)) {
fString.setToBogus();
} else {
fString = symbolString;
}
}
SymbolMatcher::~SymbolMatcher() {
if (fOwnsUniSet) {
delete fUniSet;
fUniSet = nullptr;
}
}
const UnicodeSet* SymbolMatcher::getSet() {
return fUniSet;
}
bool SymbolMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const {
// Smoke test first; this matcher might be disabled.
if (isDisabled(result)) {
return false;
}
// Test the string first in order to consume trailing chars greedily.
int overlap = 0;
if (!fString.isEmpty()) {
overlap = segment.getCommonPrefixLength(fString);
if (overlap == fString.length()) {
segment.adjustOffset(fString.length());
accept(segment, result);
return false;
}
}
int cp = segment.getCodePoint();
if (cp != -1 && fUniSet->contains(cp)) {
segment.adjustOffset(U16_LENGTH(cp));
accept(segment, result);
return false;
}
return overlap == segment.length();
}
const UnicodeSet* SymbolMatcher::getLeadCodePoints() const {
if (fString.isEmpty()) {
// Assumption: for sets from UnicodeSetStaticCache, uniSet == leadCodePoints.
return new UnicodeSet(*fUniSet);
}
UnicodeSet* leadCodePoints = new UnicodeSet();
utils::putLeadCodePoints(fUniSet, leadCodePoints);
utils::putLeadCodePoint(fString, leadCodePoints);
leadCodePoints->freeze();
return leadCodePoints;
}
MinusSignMatcher::MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing) : SymbolMatcher(
dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol),
unisets::MINUS_SIGN), fAllowTrailing(allowTrailing) {
}
bool MinusSignMatcher::isDisabled(const ParsedNumber& result) const {
return 0 != (result.flags & FLAG_NEGATIVE) ||
(fAllowTrailing ? false : result.seenNumber());
}
void MinusSignMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
result.flags |= FLAG_NEGATIVE;
result.setCharsConsumed(segment);
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -0,0 +1,60 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
#ifndef __NUMPARSE_SYMBOLS_H__
#define __NUMPARSE_SYMBOLS_H__
#include "numparse_types.h"
#include "unicode/uniset.h"
#include "numparse_unisets.h"
U_NAMESPACE_BEGIN namespace numparse {
namespace impl {
class SymbolMatcher : public NumberParseMatcher, public UMemory {
public:
~SymbolMatcher() override;
const UnicodeSet* getSet();
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
const UnicodeSet* getLeadCodePoints() const override;
virtual bool isDisabled(const ParsedNumber& result) const = 0;
virtual void accept(StringSegment& segment, ParsedNumber& result) const = 0;
protected:
UnicodeString fString;
const UnicodeSet* fUniSet;
bool fOwnsUniSet;
SymbolMatcher(const UnicodeString& symbolString, unisets::Key key);
};
class MinusSignMatcher : public SymbolMatcher {
public:
MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing);
protected:
bool isDisabled(const ParsedNumber& result) const override;
void accept(StringSegment& segment, ParsedNumber& result) const override;
private:
bool fAllowTrailing;
};
} // namespace impl
} // namespace numparse
U_NAMESPACE_END
#endif //__NUMPARSE_SYMBOLS_H__
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -17,6 +17,42 @@ namespace impl {
class StringSegment;
class ParsedNumber;
typedef int32_t result_flags_t;
typedef int32_t parse_flags_t;
/** Flags for the type result_flags_t */
enum ResultFlags {
FLAG_NEGATIVE = 0x0001,
FLAG_PERCENT = 0x0002,
FLAG_PERMILLE = 0x0004,
FLAG_HAS_EXPONENT = 0x0008,
FLAG_HAS_DEFAULT_CURRENCY = 0x0010,
FLAG_HAS_DECIMAL_SEPARATOR = 0x0020,
FLAG_NAN = 0x0040,
FLAG_INFINITY = 0x0080,
FLAG_FAIL = 0x0100,
};
/** Flags for the type parse_flags_t */
enum ParseFlags {
PARSE_FLAG_IGNORE_CASE = 0x0001,
PARSE_FLAG_MONETARY_SEPARATORS = 0x0002,
PARSE_FLAG_STRICT_SEPARATORS = 0x0004,
PARSE_FLAG_STRICT_GROUPING_SIZE = 0x0008,
PARSE_FLAG_INTEGER_ONLY = 0x0010,
PARSE_FLAG_GROUPING_DISABLED = 0x0020,
PARSE_FLAG_FRACTION_GROUPING_DISABLED = 0x0040,
PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES = 0x0080,
PARSE_FLAG_USE_FULL_AFFIXES = 0x0100,
PARSE_FLAG_EXACT_AFFIX = 0x0200,
PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400,
};
//template<typename T>
//struct MaybeNeedsAdoption {
// T* ptr;
// bool needsAdoption;
//};
/**
* Struct-like class to hold the results of a parsing routine.
@ -25,17 +61,6 @@ class ParsedNumber;
*/
class ParsedNumber {
public:
enum ParsedNumberFlags {
FLAG_NEGATIVE = 0x0001,
FLAG_PERCENT = 0x0002,
FLAG_PERMILLE = 0x0004,
FLAG_HAS_EXPONENT = 0x0008,
FLAG_HAS_DEFAULT_CURRENCY = 0x0010,
FLAG_HAS_DECIMAL_SEPARATOR = 0x0020,
FLAG_NAN = 0x0040,
FLAG_INFINITY = 0x0080,
FLAG_FAIL = 0x0100,
};
/**
* The numerical value that was parsed.
@ -51,9 +76,9 @@ class ParsedNumber {
int32_t charEnd;
/**
* Boolean flags (see constants below).
* Boolean flags (see constants above).
*/
int32_t flags;
result_flags_t flags;
/**
* The pattern string corresponding to the prefix that got consumed.
@ -204,15 +229,17 @@ class NumberParseMatcher {
* @return Whether this matcher thinks there may be more interesting chars beyond the end of the
* string segment.
*/
virtual bool match(StringSegment& segment, ParsedNumber& result) const = 0;
virtual bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const = 0;
/**
* Should return a set representing all possible chars (UTF-16 code units) that could be the first
* char that this matcher can consume. This method is only called during construction phase, and its
* return value is used to skip this matcher unless a segment begins with a char in this set. To make
* this matcher always run, return {@link UnicodeSet#ALL_CODE_POINTS}.
*
* The returned UnicodeSet needs adoption!
*/
virtual UnicodeSet getLeadCodePoints() const = 0;
virtual const UnicodeSet* getLeadCodePoints() const = 0;
/**
* Method called at the end of a parse, after all matchers have failed to consume any more chars.
@ -222,7 +249,9 @@ class NumberParseMatcher {
* @param result
* The data structure to store results.
*/
virtual void postProcess(ParsedNumber& result) const = 0;
virtual void postProcess(ParsedNumber&) const {
// Default implementation: no-op
};
};

View file

@ -0,0 +1,38 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
#ifndef __NUMPARSE_UTILS_H__
#define __NUMPARSE_UTILS_H__
#include "numparse_types.h"
#include "unicode/uniset.h"
U_NAMESPACE_BEGIN namespace numparse {
namespace impl {
namespace utils {
inline static void putLeadCodePoints(const UnicodeSet* input, UnicodeSet* output) {
for (int32_t i = 0; i < input->getRangeCount(); i++) {
output->add(input->getRangeStart(i), input->getRangeEnd(i));
}
// TODO: ANDY: How to iterate over the strings in ICU4C UnicodeSet?
}
inline static void putLeadCodePoint(const UnicodeString& input, UnicodeSet* output) {
if (!input.isEmpty()) {
output->add(input.char32At(0));
}
}
} // namespace utils
} // namespace impl
} // namespace numparse
U_NAMESPACE_END
#endif //__NUMPARSE_UTILS_H__
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -378,7 +378,18 @@ typedef enum UNumberDecimalSeparatorDisplay {
UNUM_DECIMAL_SEPARATOR_COUNT
} UNumberDecimalMarkDisplay;
U_NAMESPACE_BEGIN namespace number { // icu::number
U_NAMESPACE_BEGIN
namespace numparse {
namespace impl {
// Forward declarations:
class NumberParserImpl;
}
}
namespace number { // icu::number
// Forward declarations:
class UnlocalizedNumberFormatter;
@ -1311,6 +1322,12 @@ class U_I18N_API Grouper : public UMemory {
Grouper(int16_t grouping1, int16_t grouping2, int16_t minGrouping)
: fGrouping1(grouping1), fGrouping2(grouping2), fMinGrouping(minGrouping) {}
/** @internal */
int16_t getPrimary() const;
/** @internal */
int16_t getSecondary() const;
private:
/**
* The grouping sizes, with the following special values:
@ -1349,6 +1366,9 @@ class U_I18N_API Grouper : public UMemory {
// To allow NumberFormatterImpl to access isBogus() and perform other operations:
friend class NumberFormatterImpl;
// To allow NumberParserImpl to perform setLocaleData():
friend class ::icu::numparse::impl::NumberParserImpl;
};
/** @internal */

View file

@ -64,7 +64,8 @@ scientificnumberformattertest.o datadrivennumberformattestsuite.o \
numberformattesttuple.o numberformat2test.o pluralmaptest.o \
numbertest_affixutils.o numbertest_api.o numbertest_decimalquantity.o \
numbertest_modifiers.o numbertest_patternmodifier.o numbertest_patternstring.o \
numbertest_stringbuilder.o numbertest_stringsegment.o numbertest_unisets.o
numbertest_stringbuilder.o numbertest_stringsegment.o numbertest_unisets.o \
numbertest_parse.o
DEPS = $(OBJECTS:.o=.d)

View file

@ -207,6 +207,16 @@ class UniSetsTest : public IntlTest {
const UnicodeSet& set, UChar32 cp);
};
class NumberParserTest : public IntlTest {
public:
void testBasic();
void testLocaleFi();
void testSeriesMatcher();
void testGroupingDisabled();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
};
// NOTE: This macro is identical to the one in itformat.cpp
#define TESTCLASS(id, TestClass) \
@ -237,6 +247,7 @@ class NumberTest : public IntlTest {
TESTCLASS(6, NumberStringBuilderTest);
TESTCLASS(7, StringSegmentTest);
TESTCLASS(8, UniSetsTest);
TESTCLASS(9, NumberParserTest);
default: name = ""; break; // needed to end loop
}
}

View file

@ -0,0 +1,144 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
#include "numbertest.h"
#include "numparse_impl.h"
#include "numparse_unisets.h"
#include "unicode/dcfmtsym.h"
#include "unicode/testlog.h"
#include <cmath>
using icu::numparse::impl::unisets::get;
void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) {
if (exec) {
logln("TestSuite NumberParserTest: ");
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testBasic);
TESTCASE_AUTO_END;
}
void NumberParserTest::testBasic() {
IcuTestErrorCode status(*this, "testBasic");
static const struct TestCase {
int32_t flags;
const char16_t* inputString;
const char16_t* patternString;
int32_t expectedCharsConsumed;
double expectedResultDouble;
} cases[] = {{3, u"51423", u"0", 5, 51423.},
{3, u"51423x", u"0", 5, 51423.},
{3, u" 51423", u"0", 6, 51423.},
{3, u"51423 ", u"0", 5, 51423.},
{3, u"𝟱𝟭𝟰𝟮𝟯", u"0", 10, 51423.},
{3, u"𝟱𝟭𝟰𝟮𝟯x", u"0", 10, 51423.},
{3, u" 𝟱𝟭𝟰𝟮𝟯", u"0", 11, 51423.},
{3, u"𝟱𝟭𝟰𝟮𝟯 ", u"0", 10, 51423.},
{7, u"𝟱𝟭,𝟰𝟮𝟯", u"#,##,##0", 11, 51423.},
{7, u"𝟳,𝟴𝟵,𝟱𝟭,𝟰𝟮𝟯", u"#,##,##0", 19, 78951423.},
{7, u"𝟳𝟴,𝟵𝟱𝟭.𝟰𝟮𝟯", u"#,##,##0", 18, 78951.423},
{7, u"𝟳𝟴,𝟬𝟬𝟬", u"#,##,##0", 11, 78000.},
{7, u"𝟳𝟴,𝟬𝟬𝟬.𝟬𝟬𝟬", u"#,##,##0", 18, 78000.},
{7, u"𝟳𝟴,𝟬𝟬𝟬.𝟬𝟮𝟯", u"#,##,##0", 18, 78000.023},
{7, u"𝟳𝟴.𝟬𝟬𝟬.𝟬𝟮𝟯", u"#,##,##0", 11, 78.},
{3, u"-𝟱𝟭𝟰𝟮𝟯", u"0", 11, -51423.},
{3, u"-𝟱𝟭𝟰𝟮𝟯-", u"0", 11, -51423.},
{3, u"a51423US dollars", u"a0¤¤¤", 16, 51423.},
{3, u"a 51423 US dollars", u"a0¤¤¤", 18, 51423.},
{3, u"514.23 USD", u"¤0", 10, 514.23},
{3, u"514.23 GBP", u"¤0", 10, 514.23},
{3, u"a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 14, 51423.},
{3, u"-a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
{3, u"a -𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
{3, u"𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 10, 51423.},
{3, u"[𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, 51423.},
{3, u"𝟱𝟭𝟰𝟮𝟯]", u"[0];(0)", 11, 51423.},
{3, u"[𝟱𝟭𝟰𝟮𝟯]", u"[0];(0)", 12, 51423.},
{3, u"(𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, -51423.},
{3, u"𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 11, -51423.},
{3, u"(𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 12, -51423.},
{3, u"𝟱𝟭𝟰𝟮𝟯", u"{0};{0}", 10, 51423.},
{3, u"{𝟱𝟭𝟰𝟮𝟯", u"{0};{0}", 11, 51423.},
{3, u"𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 11, 51423.},
{3, u"{𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 12, 51423.},
{1, u"a40b", u"a0'0b'", 3, 40.}, // greedy code path thinks "40" is the number
{2, u"a40b", u"a0'0b'", 4, 4.}, // slow code path finds the suffix "0b"
{3, u"𝟱.𝟭𝟰𝟮E𝟯", u"0", 12, 5142.},
{3, u"𝟱.𝟭𝟰𝟮E-𝟯", u"0", 13, 0.005142},
{3, u"𝟱.𝟭𝟰𝟮e-𝟯", u"0", 13, 0.005142},
{7, u"5,142.50 Canadian dollars", u"#,##,##0 ¤¤¤", 25, 5142.5},
{3, u"a$ b5", u"a ¤ b0", 5, 5.0},
{3, u"📺1.23", u"📺0;📻0", 6, 1.23},
{3, u"📻1.23", u"📺0;📻0", 6, -1.23},
{3, u".00", u"0", 3, 0.0},
{3, u" 0", u"a0", 31, 0.0}, // should not hang
{3, u"NaN", u"0", 3, NAN},
{3, u"NaN E5", u"0", 3, NAN},
{3, u"0", u"0", 1, 0.0}};
parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
for (auto cas : cases) {
UnicodeString inputString(cas.inputString);
UnicodeString patternString(cas.patternString);
const NumberParserImpl* parser = NumberParserImpl::createSimpleParser(
Locale("en"), patternString, parseFlags, status);
UnicodeString message =
UnicodeString("Input <") + inputString + UnicodeString("> Parser ") + parser->toString();
if (0 != (cas.flags & 0x01)) {
// Test greedy code path
ParsedNumber resultObject;
parser->parse(inputString, true, resultObject, status);
assertTrue("Greedy Parse failed: " + message, resultObject.success());
assertEquals(
"Greedy Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd);
assertEquals(
"Greedy Parse failed: " + message,
cas.expectedResultDouble,
resultObject.getDouble());
}
if (0 != (cas.flags & 0x02)) {
// Test slow code path
ParsedNumber resultObject;
parser->parse(inputString, false, resultObject, status);
assertTrue("Non-Greedy Parse failed: " + message, resultObject.success());
assertEquals(
"Non-Greedy Parse failed: " + message,
cas.expectedCharsConsumed,
resultObject.charEnd);
assertEquals(
"Non-Greedy Parse failed: " + message,
cas.expectedResultDouble,
resultObject.getDouble());
}
if (0 != (cas.flags & 0x04)) {
// Test with strict separators
parser = NumberParserImpl::createSimpleParser(
Locale("en"),
patternString,
parseFlags | PARSE_FLAG_STRICT_GROUPING_SIZE,
status);
ParsedNumber resultObject;
parser->parse(inputString, true, resultObject, status);
assertTrue("Strict Parse failed: " + message, resultObject.success());
assertEquals(
"Strict Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd);
assertEquals(
"Strict Parse failed: " + message,
cas.expectedResultDouble,
resultObject.getDouble());
}
}
}
#endif

View file

@ -77,7 +77,7 @@ void NumberStringBuilderTest::testInsertAppendUnicodeString() {
}
void NumberStringBuilderTest::testSplice() {
const struct TestCase {
static const struct TestCase {
const char16_t* input;
const int32_t startThis;
const int32_t endThis;

View file

@ -9,9 +9,6 @@
#include "numparse_unisets.h"
#include "unicode/dcfmtsym.h"
#include <iostream>
#include <cstr.h>
using icu::numparse::impl::unisets::get;
void UniSetsTest::runIndexedTest(int32_t index, UBool exec, const char*&name, char*) {

View file

@ -27,9 +27,6 @@ public class DecimalMatcher implements NumberParseMatcher {
/** If true, do not accept numbers in the fraction */
private final boolean integerOnly;
/** If true, save the result as an exponent instead of a quantity in the ParsedNumber */
private final boolean isScientific;
private final int grouping1;
private final int grouping2;
@ -97,20 +94,28 @@ public class DecimalMatcher implements NumberParseMatcher {
fractionGroupingDisabled = 0 != (parseFlags
& ParsingUtils.PARSE_FLAG_FRACTION_GROUPING_DISABLED);
integerOnly = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_INTEGER_ONLY);
isScientific = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_DECIMAL_SCIENTIFIC);
grouping1 = grouper.getPrimary();
grouping2 = grouper.getSecondary();
}
@Override
public boolean match(StringSegment segment, ParsedNumber result) {
return match(segment, result, false);
return match(segment, result, 0);
}
public boolean match(StringSegment segment, ParsedNumber result, boolean negativeExponent) {
if (result.seenNumber() && !isScientific) {
/**
* @param exponentSign
* -1 means a negative exponent; +1 means a positive exponent; 0 means NO exponent. If -1
* or +1, the number will be saved by scaling the pre-existing DecimalQuantity in the
* ParsedNumber. If 0, a new DecimalQuantity will be created to store the number.
*/
public boolean match(StringSegment segment, ParsedNumber result, int exponentSign) {
if (result.seenNumber() && exponentSign == 0) {
// A number has already been consumed.
return false;
} else if (exponentSign != 0) {
// scientific notation always comes after the number
assert result.quantity != null;
}
ParsedNumber backupResult = null;
@ -181,7 +186,7 @@ public class DecimalMatcher implements NumberParseMatcher {
}
// Save the digit in the DecimalQuantity or scientific adjustment.
if (isScientific) {
if (exponentSign != 0) {
int nextExponent = digit + exponent * 10;
if (nextExponent < exponent) {
// Overflow
@ -272,11 +277,6 @@ public class DecimalMatcher implements NumberParseMatcher {
break;
}
// if (backupOffset != -1) {
// segment.setOffset(backupOffset);
// hasPartialPrefix = true;
// }
// Check the final grouping for validity
if (requireGroupingMatch
&& !seenDecimal
@ -303,18 +303,17 @@ public class DecimalMatcher implements NumberParseMatcher {
result.quantity.adjustMagnitude(-digitsAfterDecimal);
}
if (isScientific && segment.getOffset() != initialOffset) {
assert result.quantity != null; // scientific notation always comes after the number
if (exponentSign != 0 && segment.getOffset() != initialOffset) {
boolean overflow = (exponent == Integer.MAX_VALUE);
if (!overflow) {
try {
result.quantity.adjustMagnitude(negativeExponent ? -exponent : exponent);
result.quantity.adjustMagnitude(exponentSign * exponent);
} catch (ArithmeticException e) {
overflow = true;
}
}
if (overflow) {
if (negativeExponent) {
if (exponentSign == -1) {
// Set to zero
result.quantity.clear();
} else {

View file

@ -266,28 +266,27 @@ public class NumberParserImpl {
private final int parseFlags;
private final List<NumberParseMatcher> matchers;
private final List<UnicodeSet> leadCodePointses;
private final List<UnicodeSet> leads;
private Comparator<ParsedNumber> comparator;
private boolean frozen;
/**
* Creates a new, empty parser.
*
* @param ignoreCase
* If true, perform case-folding. This parameter needs to go into the constructor because
* its value is used during the construction of the matcher chain.
* @param optimize
* @param parseFlags
* Settings for constructing the parser.
* @param computeLeads
* If true, compute "lead chars" UnicodeSets for the matchers. This reduces parsing
* runtime but increases construction runtime. If the parser is going to be used only once
* or twice, set this to false; if it is going to be used hundreds of times, set it to
* true.
*/
public NumberParserImpl(int parseFlags, boolean optimize) {
public NumberParserImpl(int parseFlags, boolean computeLeads) {
matchers = new ArrayList<NumberParseMatcher>();
if (optimize) {
leadCodePointses = new ArrayList<UnicodeSet>();
if (computeLeads) {
leads = new ArrayList<UnicodeSet>();
} else {
leadCodePointses = null;
leads = null;
}
comparator = ParsedNumber.COMPARATOR; // default value
this.parseFlags = parseFlags;
@ -297,21 +296,21 @@ public class NumberParserImpl {
public void addMatcher(NumberParseMatcher matcher) {
assert !frozen;
this.matchers.add(matcher);
if (leadCodePointses != null) {
if (leads != null) {
UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
assert leadCodePoints.isFrozen();
this.leadCodePointses.add(leadCodePoints);
this.leads.add(leadCodePoints);
}
}
public void addMatchers(Collection<? extends NumberParseMatcher> matchers) {
assert !frozen;
this.matchers.addAll(matchers);
if (leadCodePointses != null) {
if (leads != null) {
for (NumberParseMatcher matcher : matchers) {
UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
assert leadCodePoints.isFrozen();
this.leadCodePointses.add(leadCodePoints);
this.leads.add(leadCodePoints);
}
}
}
@ -366,7 +365,7 @@ public class NumberParserImpl {
int initialOffset = segment.getOffset();
int leadCp = segment.getCodePoint();
for (int i = 0; i < matchers.size(); i++) {
if (leadCodePointses != null && !leadCodePointses.get(i).contains(leadCp)) {
if (leads != null && !leads.get(i).contains(leadCp)) {
continue;
}
NumberParseMatcher matcher = matchers.get(i);

View file

@ -17,12 +17,11 @@ public class ParsingUtils {
public static final int PARSE_FLAG_STRICT_GROUPING_SIZE = 0x0008;
public static final int PARSE_FLAG_INTEGER_ONLY = 0x0010;
public static final int PARSE_FLAG_GROUPING_DISABLED = 0x0020;
public static final int PARSE_FLAG_DECIMAL_SCIENTIFIC = 0x0040;
public static final int PARSE_FLAG_FRACTION_GROUPING_DISABLED = 0x0040;
public static final int PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES = 0x0080;
public static final int PARSE_FLAG_USE_FULL_AFFIXES = 0x0100;
public static final int PARSE_FLAG_EXACT_AFFIX = 0x0200;
public static final int PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400;
public static final int PARSE_FLAG_FRACTION_GROUPING_DISABLED = 0x0800;
public static void putLeadCodePoints(UnicodeSet input, UnicodeSet output) {
for (EntryRange range : input.ranges()) {

View file

@ -27,13 +27,15 @@ public class ScientificMatcher implements NumberParseMatcher {
exponentSeparatorString = ParsingUtils.maybeFold(symbols.getExponentSeparator(), parseFlags);
exponentMatcher = DecimalMatcher.getInstance(symbols,
grouper,
ParsingUtils.PARSE_FLAG_DECIMAL_SCIENTIFIC | ParsingUtils.PARSE_FLAG_INTEGER_ONLY);
ParsingUtils.PARSE_FLAG_INTEGER_ONLY);
}
@Override
public boolean match(StringSegment segment, ParsedNumber result) {
// Only accept scientific notation after the mantissa.
if (!result.seenNumber()) {
// Most places use result.hasNumber(), but we need a stronger condition here (i.e., exponent is
// not well-defined after NaN or infinity).
if (result.quantity == null) {
return false;
}
@ -54,16 +56,16 @@ public class ScientificMatcher implements NumberParseMatcher {
}
// Allow a sign, and then try to match digits.
boolean minusSign = false;
int exponentSign = 1;
if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN).contains(leadCp)) {
minusSign = true;
exponentSign = -1;
segment.adjustOffset(Character.charCount(leadCp));
} else if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN).contains(leadCp)) {
segment.adjustOffset(Character.charCount(leadCp));
}
int digitsOffset = segment.getOffset();
boolean digitsReturnValue = exponentMatcher.match(segment, result, minusSign);
boolean digitsReturnValue = exponentMatcher.match(segment, result, exponentSign);
if (segment.getOffset() != digitsOffset) {
// At least one exponent digit was matched.
result.flags |= ParsedNumber.FLAG_HAS_EXPONENT;

View file

@ -1654,13 +1654,13 @@ public class DecimalFormatSymbols implements Cloneable, Serializable {
* represents a sequence of ten code points in order.
*
* <p>If the value stored here is positive, it means that the code point stored in this value
* corresponds to the digitStrings array, and zeroCodePoint can be used instead of the
* corresponds to the digitStrings array, and codePointZero can be used instead of the
* digitStrings array for the purposes of efficient formatting; if -1, then digitStrings does
* *not* contain a sequence of code points, and it must be used directly.
*
* <p>It is assumed that zeroCodePoint always shadows the value in digitStrings. zeroCodePoint
* <p>It is assumed that codePointZero always shadows the value in digitStrings. codePointZero
* should never be set directly; rather, it should be updated only when digitStrings mutates.
* That is, the flow of information is digitStrings -> zeroCodePoint, not the other way.
* That is, the flow of information is digitStrings -> codePointZero, not the other way.
*/
private transient int codePointZero;

View file

@ -296,6 +296,9 @@ public class IntlTestDecimalFormatSymbols extends TestFmwk
final String[] differentDigitStrings = {"0", "b", "3", "d", "5", "ff", "7", "h", "9", "j"};
DecimalFormatSymbols symbols = new DecimalFormatSymbols(Locale.ENGLISH);
if (defZero != symbols.getCodePointZero()) {
errln("ERROR: Code point zero initialize to ASCII 0");
}
symbols.setDigitStrings(osmanyaDigitStrings);
if (!Arrays.equals(symbols.getDigitStrings(), osmanyaDigitStrings)) {

View file

@ -3,7 +3,6 @@
package com.ibm.icu.dev.test.number;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import org.junit.Test;
@ -86,30 +85,32 @@ public class NumberParserTest {
{ 3, "📻1.23", "📺0;📻0", 6, -1.23 },
{ 3, ".00", "0", 3, 0.0 },
{ 3, " 0", "a0", 31, 0.0 }, // should not hang
{ 3, "NaN", "0", 3, Double.NaN },
{ 3, "NaN E5", "0", 3, Double.NaN },
{ 3, "0", "0", 1, 0.0 } };
int parseFlags = ParsingUtils.PARSE_FLAG_IGNORE_CASE
| ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
for (Object[] cas : cases) {
int flags = (Integer) cas[0];
String input = (String) cas[1];
String pattern = (String) cas[2];
String inputString = (String) cas[1];
String patternString = (String) cas[2];
int expectedCharsConsumed = (Integer) cas[3];
double resultDouble = (Double) cas[4];
double expectedResultDouble = (Double) cas[4];
NumberParserImpl parser = NumberParserImpl
.createSimpleParser(ULocale.ENGLISH, pattern, parseFlags);
String message = "Input <" + input + "> Parser " + parser;
.createSimpleParser(ULocale.ENGLISH, patternString, parseFlags);
String message = "Input <" + inputString + "> Parser " + parser;
if (0 != (flags & 0x01)) {
// Test greedy code path
ParsedNumber resultObject = new ParsedNumber();
parser.parse(input, true, resultObject);
assertNotNull("Greedy Parse failed: " + message, resultObject.quantity);
parser.parse(inputString, true, resultObject);
assertTrue("Greedy Parse failed: " + message, resultObject.success());
assertEquals("Greedy Parse failed: " + message,
expectedCharsConsumed,
resultObject.charEnd);
assertEquals("Greedy Parse failed: " + message,
resultDouble,
expectedResultDouble,
resultObject.getNumber().doubleValue(),
0.0);
}
@ -117,13 +118,13 @@ public class NumberParserTest {
if (0 != (flags & 0x02)) {
// Test slow code path
ParsedNumber resultObject = new ParsedNumber();
parser.parse(input, false, resultObject);
assertNotNull("Non-Greedy Parse failed: " + message, resultObject.quantity);
parser.parse(inputString, false, resultObject);
assertTrue("Non-Greedy Parse failed: " + message, resultObject.success());
assertEquals("Non-Greedy Parse failed: " + message,
expectedCharsConsumed,
resultObject.charEnd);
assertEquals("Non-Greedy Parse failed: " + message,
resultDouble,
expectedResultDouble,
resultObject.getNumber().doubleValue(),
0.0);
}
@ -131,16 +132,16 @@ public class NumberParserTest {
if (0 != (flags & 0x04)) {
// Test with strict separators
parser = NumberParserImpl.createSimpleParser(ULocale.ENGLISH,
pattern,
patternString,
parseFlags | ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE);
ParsedNumber resultObject = new ParsedNumber();
parser.parse(input, true, resultObject);
assertNotNull("Strict Parse failed: " + message, resultObject.quantity);
parser.parse(inputString, true, resultObject);
assertTrue("Strict Parse failed: " + message, resultObject.success());
assertEquals("Strict Parse failed: " + message,
expectedCharsConsumed,
resultObject.charEnd);
assertEquals("Strict Parse failed: " + message,
resultDouble,
expectedResultDouble,
resultObject.getNumber().doubleValue(),
0.0);
}