mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 06:25:30 +00:00
ICU-13574 Checkpoint commit. Basic NumberParseMatcher implementations in DecimalMatcher and SymbolMatcher. Cleanup in ICU4J.
X-SVN-Rev: 40869
This commit is contained in:
parent
9337205a54
commit
2ee42b9288
27 changed files with 1112 additions and 151 deletions
|
@ -108,7 +108,8 @@ number_decimfmtprops.o number_fluent.o number_formatimpl.o number_grouping.o \
|
|||
number_integerwidth.o number_longnames.o number_modifiers.o number_notation.o \
|
||||
number_padding.o number_patternmodifier.o number_patternstring.o \
|
||||
number_rounding.o number_scientific.o number_stringbuilder.o \
|
||||
numparse_stringsegment.o numparse_unisets.o numparse_parsednumber.o
|
||||
numparse_stringsegment.o numparse_unisets.o numparse_parsednumber.o \
|
||||
numparse_impl.o numparse_symbols.o numparse_decimal.o
|
||||
|
||||
|
||||
## Header files to install
|
||||
|
|
|
@ -190,6 +190,7 @@ int32_t DecimalQuantity::getMagnitude() const {
|
|||
|
||||
void DecimalQuantity::adjustMagnitude(int32_t delta) {
|
||||
if (precision != 0) {
|
||||
// TODO: How to handle overflow cases?
|
||||
scale += delta;
|
||||
origDelta += delta;
|
||||
}
|
||||
|
|
|
@ -38,9 +38,9 @@ enum CldrPatternStyle {
|
|||
// TODO: Consider scientific format.
|
||||
};
|
||||
|
||||
const char16_t *
|
||||
doGetPattern(UResourceBundle *res, const char *nsName, const char *patternKey, UErrorCode &publicStatus,
|
||||
UErrorCode &localStatus) {
|
||||
const char16_t*
|
||||
doGetPattern(UResourceBundle* res, const char* nsName, const char* patternKey, UErrorCode& publicStatus,
|
||||
UErrorCode& localStatus) {
|
||||
// Construct the path into the resource bundle
|
||||
CharString key;
|
||||
key.append("NumberElements/", publicStatus);
|
||||
|
@ -53,9 +53,9 @@ doGetPattern(UResourceBundle *res, const char *nsName, const char *patternKey, U
|
|||
return ures_getStringByKeyWithFallback(res, key.data(), nullptr, &localStatus);
|
||||
}
|
||||
|
||||
const char16_t *getPatternForStyle(const Locale &locale, const char *nsName, CldrPatternStyle style,
|
||||
UErrorCode &status) {
|
||||
const char *patternKey;
|
||||
const char16_t* getPatternForStyle(const Locale& locale, const char* nsName, CldrPatternStyle style,
|
||||
UErrorCode& status) {
|
||||
const char* patternKey;
|
||||
switch (style) {
|
||||
case CLDR_PATTERN_STYLE_DECIMAL:
|
||||
patternKey = "decimalFormat";
|
||||
|
@ -76,7 +76,7 @@ const char16_t *getPatternForStyle(const Locale &locale, const char *nsName, Cld
|
|||
|
||||
// Attempt to get the pattern with the native numbering system.
|
||||
UErrorCode localStatus = U_ZERO_ERROR;
|
||||
const char16_t *pattern;
|
||||
const char16_t* pattern;
|
||||
pattern = doGetPattern(res.getAlias(), nsName, patternKey, status, localStatus);
|
||||
if (U_FAILURE(status)) { return u""; }
|
||||
|
||||
|
@ -96,18 +96,21 @@ struct CurrencyFormatInfoResult {
|
|||
const char16_t* decimalSeparator;
|
||||
const char16_t* groupingSeparator;
|
||||
};
|
||||
CurrencyFormatInfoResult getCurrencyFormatInfo(const Locale& locale, const char* isoCode, UErrorCode& status) {
|
||||
|
||||
CurrencyFormatInfoResult
|
||||
getCurrencyFormatInfo(const Locale& locale, const char* isoCode, UErrorCode& status) {
|
||||
// TODO: Load this data in a centralized location like ICU4J?
|
||||
// TODO: Parts of this same data are loaded in dcfmtsym.cpp; should clean up.
|
||||
CurrencyFormatInfoResult result = { false, nullptr, nullptr, nullptr };
|
||||
if (U_FAILURE(status)) return result;
|
||||
CurrencyFormatInfoResult result = {false, nullptr, nullptr, nullptr};
|
||||
if (U_FAILURE(status)) { return result; }
|
||||
CharString key;
|
||||
key.append("Currencies/", status);
|
||||
key.append(isoCode, status);
|
||||
UErrorCode localStatus = status;
|
||||
LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_CURR, locale.getName(), &localStatus));
|
||||
ures_getByKeyWithFallback(bundle.getAlias(), key.data(), bundle.getAlias(), &localStatus);
|
||||
if (U_SUCCESS(localStatus) && ures_getSize(bundle.getAlias())>2) { // the length is 3 if more data is present
|
||||
if (U_SUCCESS(localStatus) &&
|
||||
ures_getSize(bundle.getAlias()) > 2) { // the length is 3 if more data is present
|
||||
ures_getByIndex(bundle.getAlias(), 2, bundle.getAlias(), &localStatus);
|
||||
int32_t dummy;
|
||||
result.exists = true;
|
||||
|
@ -121,30 +124,30 @@ CurrencyFormatInfoResult getCurrencyFormatInfo(const Locale& locale, const char*
|
|||
return result;
|
||||
}
|
||||
|
||||
inline bool unitIsCurrency(const MeasureUnit &unit) {
|
||||
inline bool unitIsCurrency(const MeasureUnit& unit) {
|
||||
return uprv_strcmp("currency", unit.getType()) == 0;
|
||||
}
|
||||
|
||||
inline bool unitIsNoUnit(const MeasureUnit &unit) {
|
||||
inline bool unitIsNoUnit(const MeasureUnit& unit) {
|
||||
return uprv_strcmp("none", unit.getType()) == 0;
|
||||
}
|
||||
|
||||
inline bool unitIsPercent(const MeasureUnit &unit) {
|
||||
inline bool unitIsPercent(const MeasureUnit& unit) {
|
||||
return uprv_strcmp("percent", unit.getSubtype()) == 0;
|
||||
}
|
||||
|
||||
inline bool unitIsPermille(const MeasureUnit &unit) {
|
||||
inline bool unitIsPermille(const MeasureUnit& unit) {
|
||||
return uprv_strcmp("permille", unit.getSubtype()) == 0;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
NumberFormatterImpl *NumberFormatterImpl::fromMacros(const MacroProps ¯os, UErrorCode &status) {
|
||||
NumberFormatterImpl* NumberFormatterImpl::fromMacros(const MacroProps& macros, UErrorCode& status) {
|
||||
return new NumberFormatterImpl(macros, true, status);
|
||||
}
|
||||
|
||||
void NumberFormatterImpl::applyStatic(const MacroProps ¯os, DecimalQuantity &inValue,
|
||||
NumberStringBuilder &outString, UErrorCode &status) {
|
||||
void NumberFormatterImpl::applyStatic(const MacroProps& macros, DecimalQuantity& inValue,
|
||||
NumberStringBuilder& outString, UErrorCode& status) {
|
||||
NumberFormatterImpl impl(macros, false, status);
|
||||
impl.applyUnsafe(inValue, outString, status);
|
||||
}
|
||||
|
@ -154,8 +157,8 @@ void NumberFormatterImpl::applyStatic(const MacroProps ¯os, DecimalQuantity
|
|||
// The "unsafe" method simply re-uses fMicros, eliminating the extra copy operation.
|
||||
// See MicroProps::processQuantity() for details.
|
||||
|
||||
void NumberFormatterImpl::apply(DecimalQuantity &inValue, NumberStringBuilder &outString,
|
||||
UErrorCode &status) const {
|
||||
void NumberFormatterImpl::apply(DecimalQuantity& inValue, NumberStringBuilder& outString,
|
||||
UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) { return; }
|
||||
MicroProps micros;
|
||||
fMicroPropsGenerator->processQuantity(inValue, micros, status);
|
||||
|
@ -163,23 +166,23 @@ void NumberFormatterImpl::apply(DecimalQuantity &inValue, NumberStringBuilder &o
|
|||
microsToString(micros, inValue, outString, status);
|
||||
}
|
||||
|
||||
void NumberFormatterImpl::applyUnsafe(DecimalQuantity &inValue, NumberStringBuilder &outString,
|
||||
UErrorCode &status) {
|
||||
void NumberFormatterImpl::applyUnsafe(DecimalQuantity& inValue, NumberStringBuilder& outString,
|
||||
UErrorCode& status) {
|
||||
if (U_FAILURE(status)) { return; }
|
||||
fMicroPropsGenerator->processQuantity(inValue, fMicros, status);
|
||||
if (U_FAILURE(status)) { return; }
|
||||
microsToString(fMicros, inValue, outString, status);
|
||||
}
|
||||
|
||||
NumberFormatterImpl::NumberFormatterImpl(const MacroProps ¯os, bool safe, UErrorCode &status) {
|
||||
NumberFormatterImpl::NumberFormatterImpl(const MacroProps& macros, bool safe, UErrorCode& status) {
|
||||
fMicroPropsGenerator = macrosToMicroGenerator(macros, safe, status);
|
||||
}
|
||||
|
||||
//////////
|
||||
|
||||
const MicroPropsGenerator *
|
||||
NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe, UErrorCode &status) {
|
||||
const MicroPropsGenerator *chain = &fMicros;
|
||||
const MicroPropsGenerator*
|
||||
NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, UErrorCode& status) {
|
||||
const MicroPropsGenerator* chain = &fMicros;
|
||||
|
||||
// Check that macros is error-free before continuing.
|
||||
if (macros.copyErrorTo(status)) {
|
||||
|
@ -194,9 +197,9 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe,
|
|||
bool isPercent = isNoUnit && unitIsPercent(macros.unit);
|
||||
bool isPermille = isNoUnit && unitIsPermille(macros.unit);
|
||||
bool isCldrUnit = !isCurrency && !isNoUnit;
|
||||
bool isAccounting = macros.sign == UNUM_SIGN_ACCOUNTING
|
||||
|| macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS
|
||||
|| macros.sign == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO;
|
||||
bool isAccounting =
|
||||
macros.sign == UNUM_SIGN_ACCOUNTING || macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS ||
|
||||
macros.sign == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO;
|
||||
CurrencyUnit currency(kDefaultCurrency, status);
|
||||
if (isCurrency) {
|
||||
currency = CurrencyUnit(macros.unit, status); // Restore CurrencyUnit from MeasureUnit
|
||||
|
@ -208,7 +211,7 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe,
|
|||
|
||||
// Select the numbering system.
|
||||
LocalPointer<const NumberingSystem> nsLocal;
|
||||
const NumberingSystem *ns;
|
||||
const NumberingSystem* ns;
|
||||
if (macros.symbols.isNumberingSystem()) {
|
||||
ns = macros.symbols.getNumberingSystem();
|
||||
} else {
|
||||
|
@ -217,7 +220,7 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe,
|
|||
// Give ownership to the function scope.
|
||||
nsLocal.adoptInstead(ns);
|
||||
}
|
||||
const char *nsName = U_SUCCESS(status) ? ns->getName() : "latn";
|
||||
const char* nsName = U_SUCCESS(status) ? ns->getName() : "latn";
|
||||
|
||||
// Resolve the symbols. Do this here because currency may need to customize them.
|
||||
if (macros.symbols.isDecimalFormatSymbols()) {
|
||||
|
@ -232,7 +235,8 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe,
|
|||
// If we are formatting currency, check for a currency-specific pattern.
|
||||
const char16_t* pattern = nullptr;
|
||||
if (isCurrency) {
|
||||
CurrencyFormatInfoResult info = getCurrencyFormatInfo(macros.locale, currency.getSubtype(), status);
|
||||
CurrencyFormatInfoResult info = getCurrencyFormatInfo(
|
||||
macros.locale, currency.getSubtype(), status);
|
||||
if (info.exists) {
|
||||
pattern = info.pattern;
|
||||
// It's clunky to clone an object here, but this code is not frequently executed.
|
||||
|
@ -240,13 +244,13 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe,
|
|||
fMicros.symbols = symbols;
|
||||
fSymbols.adoptInstead(symbols);
|
||||
symbols->setSymbol(
|
||||
DecimalFormatSymbols::ENumberFormatSymbol::kMonetarySeparatorSymbol,
|
||||
UnicodeString(info.decimalSeparator),
|
||||
FALSE);
|
||||
DecimalFormatSymbols::ENumberFormatSymbol::kMonetarySeparatorSymbol,
|
||||
UnicodeString(info.decimalSeparator),
|
||||
FALSE);
|
||||
symbols->setSymbol(
|
||||
DecimalFormatSymbols::ENumberFormatSymbol::kMonetaryGroupingSeparatorSymbol,
|
||||
UnicodeString(info.groupingSeparator),
|
||||
FALSE);
|
||||
DecimalFormatSymbols::ENumberFormatSymbol::kMonetaryGroupingSeparatorSymbol,
|
||||
UnicodeString(info.groupingSeparator),
|
||||
FALSE);
|
||||
}
|
||||
}
|
||||
if (pattern == nullptr) {
|
||||
|
@ -407,9 +411,9 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps ¯os, bool safe,
|
|||
return chain;
|
||||
}
|
||||
|
||||
const PluralRules *
|
||||
NumberFormatterImpl::resolvePluralRules(const PluralRules *rulesPtr, const Locale &locale,
|
||||
UErrorCode &status) {
|
||||
const PluralRules*
|
||||
NumberFormatterImpl::resolvePluralRules(const PluralRules* rulesPtr, const Locale& locale,
|
||||
UErrorCode& status) {
|
||||
if (rulesPtr != nullptr) {
|
||||
return rulesPtr;
|
||||
}
|
||||
|
@ -420,8 +424,8 @@ NumberFormatterImpl::resolvePluralRules(const PluralRules *rulesPtr, const Local
|
|||
return fRules.getAlias();
|
||||
}
|
||||
|
||||
int32_t NumberFormatterImpl::microsToString(const MicroProps µs, DecimalQuantity &quantity,
|
||||
NumberStringBuilder &string, UErrorCode &status) {
|
||||
int32_t NumberFormatterImpl::microsToString(const MicroProps& micros, DecimalQuantity& quantity,
|
||||
NumberStringBuilder& string, UErrorCode& status) {
|
||||
micros.rounding.apply(quantity, status);
|
||||
micros.integerWidth.apply(quantity, status);
|
||||
int32_t length = writeNumber(micros, quantity, string, status);
|
||||
|
@ -439,8 +443,8 @@ int32_t NumberFormatterImpl::microsToString(const MicroProps µs, DecimalQua
|
|||
return length;
|
||||
}
|
||||
|
||||
int32_t NumberFormatterImpl::writeNumber(const MicroProps µs, DecimalQuantity &quantity,
|
||||
NumberStringBuilder &string, UErrorCode &status) {
|
||||
int32_t NumberFormatterImpl::writeNumber(const MicroProps& micros, DecimalQuantity& quantity,
|
||||
NumberStringBuilder& string, UErrorCode& status) {
|
||||
int32_t length = 0;
|
||||
if (quantity.isInfinite()) {
|
||||
length += string.insert(
|
||||
|
@ -480,8 +484,8 @@ int32_t NumberFormatterImpl::writeNumber(const MicroProps µs, DecimalQuanti
|
|||
return length;
|
||||
}
|
||||
|
||||
int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps µs, DecimalQuantity &quantity,
|
||||
NumberStringBuilder &string, UErrorCode &status) {
|
||||
int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps& micros, DecimalQuantity& quantity,
|
||||
NumberStringBuilder& string, UErrorCode& status) {
|
||||
int length = 0;
|
||||
int integerCount = quantity.getUpperDisplayMagnitude() + 1;
|
||||
for (int i = 0; i < integerCount; i++) {
|
||||
|
@ -499,21 +503,21 @@ int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps µs, Decima
|
|||
|
||||
// Get and append the next digit value
|
||||
int8_t nextDigit = quantity.getDigit(i);
|
||||
length += string.insert(
|
||||
0, getDigitFromSymbols(nextDigit, *micros.symbols), UNUM_INTEGER_FIELD, status);
|
||||
length += insertDigitFromSymbols(
|
||||
string, 0, nextDigit, *micros.symbols, UNUM_INTEGER_FIELD, status);
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
int32_t NumberFormatterImpl::writeFractionDigits(const MicroProps µs, DecimalQuantity &quantity,
|
||||
NumberStringBuilder &string, UErrorCode &status) {
|
||||
int32_t NumberFormatterImpl::writeFractionDigits(const MicroProps& micros, DecimalQuantity& quantity,
|
||||
NumberStringBuilder& string, UErrorCode& status) {
|
||||
int length = 0;
|
||||
int fractionCount = -quantity.getLowerDisplayMagnitude();
|
||||
for (int i = 0; i < fractionCount; i++) {
|
||||
// Get and append the next digit value
|
||||
int8_t nextDigit = quantity.getDigit(-i - 1);
|
||||
length += string.append(
|
||||
getDigitFromSymbols(nextDigit, *micros.symbols), UNUM_FRACTION_FIELD, status);
|
||||
length += insertDigitFromSymbols(
|
||||
string, string.length(), nextDigit, *micros.symbols, UNUM_INTEGER_FIELD, status);
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
|
|
@ -86,4 +86,12 @@ bool Grouper::groupAtPosition(int32_t position, const impl::DecimalQuantity &val
|
|||
&& value.getUpperDisplayMagnitude() - fGrouping1 + 1 >= fMinGrouping;
|
||||
}
|
||||
|
||||
int16_t Grouper::getPrimary() const {
|
||||
return fGrouping1;
|
||||
}
|
||||
|
||||
int16_t Grouper::getSecondary() const {
|
||||
return fGrouping2;
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
|
|
@ -64,8 +64,7 @@ int32_t ScientificModifier::apply(NumberStringBuilder &output, int32_t /*leftInd
|
|||
int32_t disp = std::abs(fExponent);
|
||||
for (int j = 0; j < fHandler->fSettings.fMinExponentDigits || disp > 0; j++, disp /= 10) {
|
||||
auto d = static_cast<int8_t>(disp % 10);
|
||||
const UnicodeString &digitString = getDigitFromSymbols(d, *fHandler->fSymbols);
|
||||
i += output.insert(i - j, digitString, UNUM_EXPONENT_FIELD, status);
|
||||
i += insertDigitFromSymbols(output, i - j, d, *fHandler->fSymbols, UNUM_EXPONENT_FIELD, status);
|
||||
}
|
||||
return i - rightIndex;
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ namespace impl {
|
|||
|
||||
class UnicodeStringCharSequence : public CharSequence {
|
||||
public:
|
||||
explicit UnicodeStringCharSequence(const UnicodeString &other) {
|
||||
explicit UnicodeStringCharSequence(const UnicodeString& other) {
|
||||
fStr = other;
|
||||
}
|
||||
|
||||
|
@ -62,10 +62,10 @@ struct MicroProps : public MicroPropsGenerator {
|
|||
bool useCurrency;
|
||||
|
||||
// Note: This struct has no direct ownership of the following pointers.
|
||||
const DecimalFormatSymbols *symbols;
|
||||
const Modifier *modOuter;
|
||||
const Modifier *modMiddle;
|
||||
const Modifier *modInner;
|
||||
const DecimalFormatSymbols* symbols;
|
||||
const Modifier* modOuter;
|
||||
const Modifier* modMiddle;
|
||||
const Modifier* modInner;
|
||||
|
||||
// The following "helper" fields may optionally be used during the MicroPropsGenerator.
|
||||
// They live here to retain memory.
|
||||
|
@ -78,12 +78,12 @@ struct MicroProps : public MicroPropsGenerator {
|
|||
|
||||
MicroProps() = default;
|
||||
|
||||
MicroProps(const MicroProps &other) = default;
|
||||
MicroProps(const MicroProps& other) = default;
|
||||
|
||||
MicroProps &operator=(const MicroProps &other) = default;
|
||||
MicroProps& operator=(const MicroProps& other) = default;
|
||||
|
||||
void processQuantity(DecimalQuantity &, MicroProps µs, UErrorCode &status) const U_OVERRIDE {
|
||||
(void)status;
|
||||
void processQuantity(DecimalQuantity&, MicroProps& micros, UErrorCode& status) const U_OVERRIDE {
|
||||
(void) status;
|
||||
if (this == µs) {
|
||||
// Unsafe path: no need to perform a copy.
|
||||
U_ASSERT(!exhausted);
|
||||
|
@ -111,14 +111,13 @@ struct NumberFormatterResults : public UMemory {
|
|||
NumberStringBuilder string;
|
||||
};
|
||||
|
||||
inline const UnicodeString getDigitFromSymbols(int8_t digit, const DecimalFormatSymbols &symbols) {
|
||||
// TODO: Implement DecimalFormatSymbols.getCodePointZero()?
|
||||
if (digit == 0) {
|
||||
return symbols.getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kZeroDigitSymbol);
|
||||
} else {
|
||||
return symbols.getSymbol(static_cast<DecimalFormatSymbols::ENumberFormatSymbol>(
|
||||
DecimalFormatSymbols::ENumberFormatSymbol::kOneDigitSymbol + digit - 1));
|
||||
inline int32_t insertDigitFromSymbols(NumberStringBuilder& output, int32_t index, int8_t digit,
|
||||
const DecimalFormatSymbols& symbols, Field field,
|
||||
UErrorCode& status) {
|
||||
if (symbols.getCodePointZero() != -1) {
|
||||
return output.insertCodePoint(index, symbols.getCodePointZero() + digit, field, status);
|
||||
}
|
||||
return output.insert(index, symbols.getConstDigitSymbol(digit), field, status);
|
||||
}
|
||||
|
||||
} // namespace impl
|
||||
|
|
313
icu4c/source/i18n/numparse_decimal.cpp
Normal file
313
icu4c/source/i18n/numparse_decimal.cpp
Normal file
|
@ -0,0 +1,313 @@
|
|||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include "numparse_decimal.h"
|
||||
#include "numparse_unisets.h"
|
||||
#include "numparse_utils.h"
|
||||
#include "unicode/uchar.h"
|
||||
|
||||
using namespace icu;
|
||||
using namespace icu::numparse;
|
||||
using namespace icu::numparse::impl;
|
||||
|
||||
|
||||
DecimalMatcher::DecimalMatcher(const DecimalFormatSymbols& symbols, const Grouper& grouper,
|
||||
parse_flags_t parseFlags) {
|
||||
if (0 != (parseFlags & PARSE_FLAG_MONETARY_SEPARATORS)) {
|
||||
groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetaryGroupingSeparatorSymbol);
|
||||
decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol);
|
||||
} else {
|
||||
groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol);
|
||||
decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
|
||||
}
|
||||
bool strictSeparators = 0 != (parseFlags & PARSE_FLAG_STRICT_SEPARATORS);
|
||||
unisets::Key groupingKey = strictSeparators ? unisets::STRICT_ALL_SEPARATORS
|
||||
: unisets::ALL_SEPARATORS;
|
||||
|
||||
// Attempt to find separators in the static cache
|
||||
|
||||
groupingUniSet = unisets::get(groupingKey);
|
||||
unisets::Key decimalKey = unisets::chooseFrom(
|
||||
decimalSeparator,
|
||||
strictSeparators ? unisets::STRICT_COMMA : unisets::COMMA,
|
||||
strictSeparators ? unisets::STRICT_PERIOD : unisets::PERIOD);
|
||||
if (decimalKey != unisets::COUNT) {
|
||||
decimalUniSet = unisets::get(decimalKey);
|
||||
} else {
|
||||
auto* set = new UnicodeSet();
|
||||
set->add(decimalSeparator.char32At(0));
|
||||
set->freeze();
|
||||
decimalUniSet = set;
|
||||
fLocalDecimalUniSet.adoptInstead(set);
|
||||
}
|
||||
|
||||
if (groupingKey != unisets::COUNT && decimalKey != unisets::COUNT) {
|
||||
// Everything is available in the static cache
|
||||
separatorSet = groupingUniSet;
|
||||
leadSet = unisets::get(
|
||||
strictSeparators ? unisets::DIGITS_OR_ALL_SEPARATORS
|
||||
: unisets::DIGITS_OR_STRICT_ALL_SEPARATORS);
|
||||
} else {
|
||||
auto* set = new UnicodeSet();
|
||||
set->addAll(*groupingUniSet);
|
||||
set->addAll(*decimalUniSet);
|
||||
set->freeze();
|
||||
separatorSet = set;
|
||||
fLocalSeparatorSet.adoptInstead(set);
|
||||
leadSet = nullptr;
|
||||
}
|
||||
|
||||
int cpZero = symbols.getCodePointZero();
|
||||
if (cpZero == -1 || !u_isdigit(cpZero) || u_digit(cpZero, 10) != 0) {
|
||||
// Uncommon case: okay to allocate.
|
||||
auto digitStrings = new UnicodeString[10];
|
||||
fLocalDigitStrings.adoptInstead(digitStrings);
|
||||
for (int32_t i = 0; i <= 9; i++) {
|
||||
digitStrings[i] = symbols.getConstDigitSymbol(i);
|
||||
}
|
||||
}
|
||||
|
||||
requireGroupingMatch = 0 != (parseFlags & PARSE_FLAG_STRICT_GROUPING_SIZE);
|
||||
groupingDisabled = 0 != (parseFlags & PARSE_FLAG_GROUPING_DISABLED);
|
||||
fractionGroupingDisabled = 0 != (
|
||||
parseFlags & PARSE_FLAG_FRACTION_GROUPING_DISABLED);
|
||||
integerOnly = 0 != (parseFlags & PARSE_FLAG_INTEGER_ONLY);
|
||||
grouping1 = grouper.getPrimary();
|
||||
grouping2 = grouper.getSecondary();
|
||||
}
|
||||
|
||||
bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
|
||||
return match(segment, result, 0, status);
|
||||
}
|
||||
|
||||
bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign,
|
||||
UErrorCode&) const {
|
||||
if (result.seenNumber() && exponentSign == 0) {
|
||||
// A number has already been consumed.
|
||||
return false;
|
||||
} else if (exponentSign != 0) {
|
||||
// scientific notation always comes after the number
|
||||
U_ASSERT(!result.quantity.bogus);
|
||||
}
|
||||
|
||||
ParsedNumber backupResult(result);
|
||||
|
||||
// strict parsing
|
||||
bool strictFail = false; // did we exit with a strict parse failure?
|
||||
UnicodeString actualGroupingString = groupingSeparator;
|
||||
UnicodeString actualDecimalString = decimalSeparator;
|
||||
int32_t groupedDigitCount = 0; // tracking count of digits delimited by grouping separator
|
||||
int32_t backupOffset = -1; // used for preserving the last confirmed position
|
||||
bool afterFirstGrouping = false;
|
||||
bool seenGrouping = false;
|
||||
bool seenDecimal = false;
|
||||
int32_t digitsAfterDecimal = 0;
|
||||
int32_t initialOffset = segment.getOffset();
|
||||
int32_t exponent = 0;
|
||||
bool hasPartialPrefix = false;
|
||||
while (segment.length() > 0) {
|
||||
hasPartialPrefix = false;
|
||||
|
||||
// Attempt to match a digit.
|
||||
int8_t digit = -1;
|
||||
|
||||
// Try by code point digit value.
|
||||
int cp = segment.getCodePoint();
|
||||
if (u_isdigit(cp)) {
|
||||
segment.adjustOffset(U16_LENGTH(cp));
|
||||
digit = static_cast<int8_t>(u_digit(cp, 10));
|
||||
}
|
||||
|
||||
// Try by digit string.
|
||||
if (digit == -1 && !fLocalDigitStrings.isNull()) {
|
||||
for (int i = 0; i < 10; i++) {
|
||||
const UnicodeString& str = fLocalDigitStrings[i];
|
||||
int overlap = segment.getCommonPrefixLength(str);
|
||||
if (overlap == str.length()) {
|
||||
segment.adjustOffset(overlap);
|
||||
digit = static_cast<int8_t>(i);
|
||||
break;
|
||||
} else if (overlap == segment.length()) {
|
||||
hasPartialPrefix = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (digit >= 0) {
|
||||
// Digit was found.
|
||||
// Check for grouping size violation
|
||||
if (backupOffset != -1) {
|
||||
if (requireGroupingMatch) {
|
||||
// comma followed by digit, so group before comma is a secondary
|
||||
// group. If there was a group separator before that, the group
|
||||
// must == the secondary group length, else it can be <= the the
|
||||
// secondary group length.
|
||||
if ((afterFirstGrouping && groupedDigitCount != grouping2) ||
|
||||
(!afterFirstGrouping && groupedDigitCount > grouping2)) {
|
||||
strictFail = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
afterFirstGrouping = true;
|
||||
backupOffset = -1;
|
||||
groupedDigitCount = 0;
|
||||
}
|
||||
|
||||
// Save the digit in the DecimalQuantity or scientific adjustment.
|
||||
if (exponentSign != 0) {
|
||||
int nextExponent = digit + exponent * 10;
|
||||
if (nextExponent < exponent) {
|
||||
// Overflow
|
||||
exponent = INT32_MAX;
|
||||
} else {
|
||||
exponent = nextExponent;
|
||||
}
|
||||
} else {
|
||||
if (result.quantity.bogus) {
|
||||
result.quantity.bogus = false;
|
||||
}
|
||||
result.quantity.appendDigit(digit, 0, true);
|
||||
}
|
||||
result.setCharsConsumed(segment);
|
||||
groupedDigitCount++;
|
||||
if (seenDecimal) {
|
||||
digitsAfterDecimal++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Attempt to match a literal grouping or decimal separator
|
||||
int32_t decimalOverlap = segment.getCommonPrefixLength(actualDecimalString);
|
||||
bool decimalStringMatch = decimalOverlap == actualDecimalString.length();
|
||||
int32_t groupingOverlap = segment.getCommonPrefixLength(actualGroupingString);
|
||||
bool groupingStringMatch = groupingOverlap == actualGroupingString.length();
|
||||
|
||||
hasPartialPrefix = (decimalOverlap == segment.length()) || (groupingOverlap == segment.length());
|
||||
|
||||
if (!seenDecimal && !groupingStringMatch &&
|
||||
(decimalStringMatch || (!seenDecimal && decimalUniSet->contains(cp)))) {
|
||||
// matched a decimal separator
|
||||
if (requireGroupingMatch) {
|
||||
if (backupOffset != -1 || (seenGrouping && groupedDigitCount != grouping1)) {
|
||||
strictFail = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If we're only parsing integers, then don't parse this one.
|
||||
if (integerOnly) {
|
||||
break;
|
||||
}
|
||||
|
||||
seenDecimal = true;
|
||||
if (!decimalStringMatch) {
|
||||
actualDecimalString = UnicodeString(cp);
|
||||
}
|
||||
segment.adjustOffset(actualDecimalString.length());
|
||||
result.setCharsConsumed(segment);
|
||||
result.flags |= FLAG_HAS_DECIMAL_SEPARATOR;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!groupingDisabled && !decimalStringMatch &&
|
||||
(groupingStringMatch || (!seenGrouping && groupingUniSet->contains(cp)))) {
|
||||
// matched a grouping separator
|
||||
if (requireGroupingMatch) {
|
||||
if (groupedDigitCount == 0) {
|
||||
// leading group
|
||||
strictFail = true;
|
||||
break;
|
||||
} else if (backupOffset != -1) {
|
||||
// two group separators in a row
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (fractionGroupingDisabled && seenDecimal) {
|
||||
// Stop parsing here.
|
||||
break;
|
||||
}
|
||||
|
||||
seenGrouping = true;
|
||||
if (!groupingStringMatch) {
|
||||
actualGroupingString = UnicodeString(cp);
|
||||
}
|
||||
backupOffset = segment.getOffset();
|
||||
segment.adjustOffset(actualGroupingString.length());
|
||||
// Note: do NOT set charsConsumed
|
||||
continue;
|
||||
}
|
||||
|
||||
// Not a digit and not a separator
|
||||
break;
|
||||
}
|
||||
|
||||
// Check the final grouping for validity
|
||||
if (requireGroupingMatch && !seenDecimal && seenGrouping && afterFirstGrouping &&
|
||||
groupedDigitCount != grouping1) {
|
||||
strictFail = true;
|
||||
}
|
||||
|
||||
if (requireGroupingMatch && strictFail) {
|
||||
result = backupResult;
|
||||
segment.setOffset(initialOffset);
|
||||
}
|
||||
|
||||
if (result.quantity.bogus && segment.getOffset() != initialOffset) {
|
||||
// Strings that start with a separator but have no digits.
|
||||
// We don't need a backup of ParsedNumber because no changes could have been made to it.
|
||||
segment.setOffset(initialOffset);
|
||||
hasPartialPrefix = true;
|
||||
}
|
||||
|
||||
if (!result.quantity.bogus) {
|
||||
// The final separator was a decimal separator.
|
||||
result.quantity.adjustMagnitude(-digitsAfterDecimal);
|
||||
}
|
||||
|
||||
if (exponentSign != 0 && segment.getOffset() != initialOffset) {
|
||||
U_ASSERT(!result.quantity.bogus);
|
||||
bool overflow = (exponent == INT32_MAX);
|
||||
if (!overflow) {
|
||||
result.quantity.adjustMagnitude(exponentSign * exponent);
|
||||
}
|
||||
if (overflow) {
|
||||
if (exponentSign == -1) {
|
||||
// Set to zero
|
||||
result.quantity.clear();
|
||||
} else {
|
||||
// Set to infinity
|
||||
result.quantity.bogus = true;
|
||||
result.flags |= FLAG_INFINITY;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return segment.length() == 0 || hasPartialPrefix;
|
||||
}
|
||||
|
||||
const UnicodeSet* DecimalMatcher::getLeadCodePoints() const {
|
||||
if (fLocalDigitStrings.isNull() && leadSet != nullptr) {
|
||||
return new UnicodeSet(*leadSet);
|
||||
}
|
||||
|
||||
auto* leadCodePoints = new UnicodeSet();
|
||||
// Assumption: the sets are all single code points.
|
||||
leadCodePoints->addAll(*unisets::get(unisets::DIGITS));
|
||||
leadCodePoints->addAll(*separatorSet);
|
||||
if (!fLocalDigitStrings.isNull()) {
|
||||
for (int i = 0; i < 10; i++) {
|
||||
utils::putLeadCodePoint(fLocalDigitStrings[i], leadCodePoints);
|
||||
}
|
||||
}
|
||||
leadCodePoints->freeze();
|
||||
return leadCodePoints;
|
||||
}
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
69
icu4c/source/i18n/numparse_decimal.h
Normal file
69
icu4c/source/i18n/numparse_decimal.h
Normal file
|
@ -0,0 +1,69 @@
|
|||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
#ifndef __NUMPARSE_DECIMAL_H__
|
||||
#define __NUMPARSE_DECIMAL_H__
|
||||
|
||||
#include "unicode/uniset.h"
|
||||
#include "numparse_types.h"
|
||||
|
||||
U_NAMESPACE_BEGIN namespace numparse {
|
||||
namespace impl {
|
||||
|
||||
using ::icu::number::impl::Grouper;
|
||||
|
||||
class DecimalMatcher : public NumberParseMatcher, public UMemory {
|
||||
public:
|
||||
DecimalMatcher(const DecimalFormatSymbols& symbols, const Grouper& grouper,
|
||||
parse_flags_t parseFlags);
|
||||
|
||||
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
|
||||
|
||||
bool
|
||||
match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign, UErrorCode& status) const;
|
||||
|
||||
const UnicodeSet* getLeadCodePoints() const override;
|
||||
|
||||
private:
|
||||
/** If true, only accept strings whose grouping sizes match the locale */
|
||||
bool requireGroupingMatch;
|
||||
|
||||
/** If true, do not accept grouping separators at all */
|
||||
bool groupingDisabled;
|
||||
|
||||
/** If true, do not accept fraction grouping separators */
|
||||
bool fractionGroupingDisabled;
|
||||
|
||||
/** If true, do not accept numbers in the fraction */
|
||||
bool integerOnly;
|
||||
|
||||
int16_t grouping1;
|
||||
int16_t grouping2;
|
||||
|
||||
UnicodeString groupingSeparator;
|
||||
UnicodeString decimalSeparator;
|
||||
|
||||
// Assumption: these sets all consist of single code points. If this assumption needs to be broken,
|
||||
// fix getLeadCodePoints() as well as matching logic. Be careful of the performance impact.
|
||||
const UnicodeSet* groupingUniSet;
|
||||
const UnicodeSet* decimalUniSet;
|
||||
const UnicodeSet* separatorSet;
|
||||
const UnicodeSet* leadSet;
|
||||
|
||||
// Make this class the owner of a few objects that could be allocated.
|
||||
// The first two LocalPointers are used for assigning ownership only.
|
||||
LocalPointer<const UnicodeSet> fLocalDecimalUniSet;
|
||||
LocalPointer<const UnicodeSet> fLocalSeparatorSet;
|
||||
LocalArray<const UnicodeString> fLocalDigitStrings;
|
||||
};
|
||||
|
||||
|
||||
} // namespace impl
|
||||
} // namespace numparse
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif //__NUMPARSE_DECIMAL_H__
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
113
icu4c/source/i18n/numparse_impl.cpp
Normal file
113
icu4c/source/i18n/numparse_impl.cpp
Normal file
|
@ -0,0 +1,113 @@
|
|||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
#include "number_types.h"
|
||||
#include "number_patternstring.h"
|
||||
#include "numparse_types.h"
|
||||
#include "numparse_impl.h"
|
||||
#include "numparse_symbols.h"
|
||||
#include "numparse_decimal.h"
|
||||
#include "unicode/numberformatter.h"
|
||||
|
||||
using namespace icu;
|
||||
using namespace icu::number;
|
||||
using namespace icu::number::impl;
|
||||
using namespace icu::numparse;
|
||||
using namespace icu::numparse::impl;
|
||||
|
||||
|
||||
NumberParserImpl*
|
||||
NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString& patternString,
|
||||
parse_flags_t parseFlags, UErrorCode& status) {
|
||||
|
||||
auto* parser = new NumberParserImpl(parseFlags, true);
|
||||
DecimalFormatSymbols symbols(locale, status);
|
||||
|
||||
// IgnorablesMatcher* ignorables = IgnorablesMatcher.getDefault();
|
||||
//
|
||||
// MatcherFactory factory = new MatcherFactory();
|
||||
// factory.currency = Currency.getInstance("USD");
|
||||
// factory.symbols = symbols;
|
||||
// factory.ignorables = ignorables;
|
||||
// factory.locale = locale;
|
||||
// factory.parseFlags = parseFlags;
|
||||
|
||||
ParsedPatternInfo patternInfo;
|
||||
PatternParser::parseToPatternInfo(patternString, patternInfo, status);
|
||||
// AffixMatcher.createMatchers(patternInfo, parser, factory, ignorables, parseFlags);
|
||||
|
||||
Grouper grouper = Grouper::forStrategy(UNUM_GROUPING_AUTO);
|
||||
grouper.setLocaleData(patternInfo, locale);
|
||||
|
||||
// parser.addMatcher({ignorables, false});
|
||||
parser->addAndAdoptMatcher(new DecimalMatcher(symbols, grouper, parseFlags));
|
||||
parser->addAndAdoptMatcher(new MinusSignMatcher(symbols, false));
|
||||
// parser.addMatcher(NanMatcher.getInstance(symbols, parseFlags));
|
||||
// parser.addMatcher(ScientificMatcher.getInstance(symbols, grouper, parseFlags));
|
||||
// parser.addMatcher(CurrencyTrieMatcher.getInstance(locale));
|
||||
// parser.addMatcher(new RequireNumberMatcher());
|
||||
|
||||
parser->freeze();
|
||||
return parser;
|
||||
}
|
||||
|
||||
NumberParserImpl::NumberParserImpl(parse_flags_t parseFlags, bool computeLeads)
|
||||
: fParseFlags(parseFlags), fComputeLeads(computeLeads) {
|
||||
}
|
||||
|
||||
NumberParserImpl::~NumberParserImpl() {
|
||||
for (int32_t i = 0; i < fNumMatchers; i++) {
|
||||
delete (fMatchers[i]);
|
||||
if (fComputeLeads) {
|
||||
delete (fLeads[i]);
|
||||
}
|
||||
}
|
||||
fNumMatchers = 0;
|
||||
}
|
||||
|
||||
void NumberParserImpl::addAndAdoptMatcher(const NumberParseMatcher* matcher) {
|
||||
if (fNumMatchers + 1 > fMatchers.getCapacity()) {
|
||||
fMatchers.resize(fNumMatchers * 2, fNumMatchers);
|
||||
if (fComputeLeads) {
|
||||
// The two arrays should grow in tandem:
|
||||
U_ASSERT(fNumMatchers >= fLeads.getCapacity());
|
||||
fLeads.resize(fNumMatchers * 2, fNumMatchers);
|
||||
}
|
||||
}
|
||||
|
||||
fMatchers[fNumMatchers] = matcher;
|
||||
|
||||
if (fComputeLeads) {
|
||||
fLeads[fNumMatchers] = matcher->getLeadCodePoints();
|
||||
}
|
||||
|
||||
fNumMatchers++;
|
||||
}
|
||||
|
||||
void NumberParserImpl::freeze() {
|
||||
fFrozen = true;
|
||||
}
|
||||
|
||||
//void
|
||||
//NumberParserImpl::parse(const UnicodeString& input, int32_t start, bool greedy, ParsedNumber& result,
|
||||
// UErrorCode& status) const {
|
||||
// U_ASSERT(frozen);
|
||||
// // TODO: Check start >= 0 and start < input.length()
|
||||
// StringSegment segment(utils::maybeFold(input, parseFlags));
|
||||
// segment.adjustOffset(start);
|
||||
// if (greedy) {
|
||||
// parseGreedyRecursive(segment, result);
|
||||
// } else {
|
||||
// parseLongestRecursive(segment, result);
|
||||
// }
|
||||
// for (NumberParseMatcher matcher : matchers) {
|
||||
// matcher.postProcess(result);
|
||||
// }
|
||||
//}
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
56
icu4c/source/i18n/numparse_impl.h
Normal file
56
icu4c/source/i18n/numparse_impl.h
Normal file
|
@ -0,0 +1,56 @@
|
|||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
#ifndef __NUMPARSE_IMPL_H__
|
||||
#define __NUMPARSE_IMPL_H__
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
U_NAMESPACE_BEGIN namespace numparse {
|
||||
namespace impl {
|
||||
|
||||
class NumberParserImpl {
|
||||
public:
|
||||
static NumberParserImpl* createSimpleParser(const Locale& locale, const UnicodeString& patternString,
|
||||
parse_flags_t parseFlags, UErrorCode& status);
|
||||
|
||||
void addAndAdoptMatcher(const NumberParseMatcher* matcher);
|
||||
|
||||
void freeze();
|
||||
|
||||
void parse(const UnicodeString& input, bool greedy, ParsedNumber& result, UErrorCode& status) const;
|
||||
|
||||
void parse(const UnicodeString& input, int32_t start, bool greedy, ParsedNumber& result,
|
||||
UErrorCode& status) const;
|
||||
|
||||
UnicodeString toString() const;
|
||||
|
||||
private:
|
||||
parse_flags_t fParseFlags;
|
||||
int32_t fNumMatchers = 0;
|
||||
// NOTE: The stack capacity for fMatchers and fLeads should be the same
|
||||
MaybeStackArray<const NumberParseMatcher*, 10> fMatchers;
|
||||
MaybeStackArray<const UnicodeSet*, 10> fLeads;
|
||||
bool fComputeLeads;
|
||||
bool fFrozen = false;
|
||||
|
||||
NumberParserImpl(parse_flags_t parseFlags, bool computeLeads);
|
||||
|
||||
~NumberParserImpl();
|
||||
|
||||
void parseGreedyRecursive(StringSegment& segment, ParsedNumber& result) const;
|
||||
|
||||
void parseLongestRecursive(StringSegment& segment, ParsedNumber& result) const;
|
||||
};
|
||||
|
||||
|
||||
} // namespace impl
|
||||
} // namespace numparse
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif //__NUMPARSE_IMPL_H__
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
95
icu4c/source/i18n/numparse_symbols.cpp
Normal file
95
icu4c/source/i18n/numparse_symbols.cpp
Normal file
|
@ -0,0 +1,95 @@
|
|||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include "numparse_symbols.h"
|
||||
#include "numparse_utils.h"
|
||||
|
||||
using namespace icu;
|
||||
using namespace icu::numparse;
|
||||
using namespace icu::numparse::impl;
|
||||
|
||||
|
||||
SymbolMatcher::SymbolMatcher(const UnicodeString& symbolString, unisets::Key key) {
|
||||
fUniSet = unisets::get(key);
|
||||
fOwnsUniSet = false;
|
||||
if (fUniSet->contains(symbolString)) {
|
||||
fString.setToBogus();
|
||||
} else {
|
||||
fString = symbolString;
|
||||
}
|
||||
}
|
||||
|
||||
SymbolMatcher::~SymbolMatcher() {
|
||||
if (fOwnsUniSet) {
|
||||
delete fUniSet;
|
||||
fUniSet = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
const UnicodeSet* SymbolMatcher::getSet() {
|
||||
return fUniSet;
|
||||
}
|
||||
|
||||
bool SymbolMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const {
|
||||
// Smoke test first; this matcher might be disabled.
|
||||
if (isDisabled(result)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Test the string first in order to consume trailing chars greedily.
|
||||
int overlap = 0;
|
||||
if (!fString.isEmpty()) {
|
||||
overlap = segment.getCommonPrefixLength(fString);
|
||||
if (overlap == fString.length()) {
|
||||
segment.adjustOffset(fString.length());
|
||||
accept(segment, result);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
int cp = segment.getCodePoint();
|
||||
if (cp != -1 && fUniSet->contains(cp)) {
|
||||
segment.adjustOffset(U16_LENGTH(cp));
|
||||
accept(segment, result);
|
||||
return false;
|
||||
}
|
||||
|
||||
return overlap == segment.length();
|
||||
}
|
||||
|
||||
const UnicodeSet* SymbolMatcher::getLeadCodePoints() const {
|
||||
if (fString.isEmpty()) {
|
||||
// Assumption: for sets from UnicodeSetStaticCache, uniSet == leadCodePoints.
|
||||
return new UnicodeSet(*fUniSet);
|
||||
}
|
||||
|
||||
UnicodeSet* leadCodePoints = new UnicodeSet();
|
||||
utils::putLeadCodePoints(fUniSet, leadCodePoints);
|
||||
utils::putLeadCodePoint(fString, leadCodePoints);
|
||||
leadCodePoints->freeze();
|
||||
return leadCodePoints;
|
||||
}
|
||||
|
||||
|
||||
MinusSignMatcher::MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing) : SymbolMatcher(
|
||||
dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol),
|
||||
unisets::MINUS_SIGN), fAllowTrailing(allowTrailing) {
|
||||
}
|
||||
|
||||
bool MinusSignMatcher::isDisabled(const ParsedNumber& result) const {
|
||||
return 0 != (result.flags & FLAG_NEGATIVE) ||
|
||||
(fAllowTrailing ? false : result.seenNumber());
|
||||
}
|
||||
|
||||
void MinusSignMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
|
||||
result.flags |= FLAG_NEGATIVE;
|
||||
result.setCharsConsumed(segment);
|
||||
}
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
60
icu4c/source/i18n/numparse_symbols.h
Normal file
60
icu4c/source/i18n/numparse_symbols.h
Normal file
|
@ -0,0 +1,60 @@
|
|||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
#ifndef __NUMPARSE_SYMBOLS_H__
|
||||
#define __NUMPARSE_SYMBOLS_H__
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "numparse_unisets.h"
|
||||
|
||||
U_NAMESPACE_BEGIN namespace numparse {
|
||||
namespace impl {
|
||||
|
||||
|
||||
class SymbolMatcher : public NumberParseMatcher, public UMemory {
|
||||
public:
|
||||
~SymbolMatcher() override;
|
||||
|
||||
const UnicodeSet* getSet();
|
||||
|
||||
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
|
||||
|
||||
const UnicodeSet* getLeadCodePoints() const override;
|
||||
|
||||
virtual bool isDisabled(const ParsedNumber& result) const = 0;
|
||||
|
||||
virtual void accept(StringSegment& segment, ParsedNumber& result) const = 0;
|
||||
|
||||
protected:
|
||||
UnicodeString fString;
|
||||
const UnicodeSet* fUniSet;
|
||||
bool fOwnsUniSet;
|
||||
|
||||
SymbolMatcher(const UnicodeString& symbolString, unisets::Key key);
|
||||
};
|
||||
|
||||
|
||||
class MinusSignMatcher : public SymbolMatcher {
|
||||
public:
|
||||
MinusSignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing);
|
||||
|
||||
protected:
|
||||
bool isDisabled(const ParsedNumber& result) const override;
|
||||
|
||||
void accept(StringSegment& segment, ParsedNumber& result) const override;
|
||||
|
||||
private:
|
||||
bool fAllowTrailing;
|
||||
};
|
||||
|
||||
|
||||
} // namespace impl
|
||||
} // namespace numparse
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif //__NUMPARSE_SYMBOLS_H__
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
|
@ -17,6 +17,42 @@ namespace impl {
|
|||
class StringSegment;
|
||||
class ParsedNumber;
|
||||
|
||||
typedef int32_t result_flags_t;
|
||||
typedef int32_t parse_flags_t;
|
||||
|
||||
/** Flags for the type result_flags_t */
|
||||
enum ResultFlags {
|
||||
FLAG_NEGATIVE = 0x0001,
|
||||
FLAG_PERCENT = 0x0002,
|
||||
FLAG_PERMILLE = 0x0004,
|
||||
FLAG_HAS_EXPONENT = 0x0008,
|
||||
FLAG_HAS_DEFAULT_CURRENCY = 0x0010,
|
||||
FLAG_HAS_DECIMAL_SEPARATOR = 0x0020,
|
||||
FLAG_NAN = 0x0040,
|
||||
FLAG_INFINITY = 0x0080,
|
||||
FLAG_FAIL = 0x0100,
|
||||
};
|
||||
|
||||
/** Flags for the type parse_flags_t */
|
||||
enum ParseFlags {
|
||||
PARSE_FLAG_IGNORE_CASE = 0x0001,
|
||||
PARSE_FLAG_MONETARY_SEPARATORS = 0x0002,
|
||||
PARSE_FLAG_STRICT_SEPARATORS = 0x0004,
|
||||
PARSE_FLAG_STRICT_GROUPING_SIZE = 0x0008,
|
||||
PARSE_FLAG_INTEGER_ONLY = 0x0010,
|
||||
PARSE_FLAG_GROUPING_DISABLED = 0x0020,
|
||||
PARSE_FLAG_FRACTION_GROUPING_DISABLED = 0x0040,
|
||||
PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES = 0x0080,
|
||||
PARSE_FLAG_USE_FULL_AFFIXES = 0x0100,
|
||||
PARSE_FLAG_EXACT_AFFIX = 0x0200,
|
||||
PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400,
|
||||
};
|
||||
|
||||
//template<typename T>
|
||||
//struct MaybeNeedsAdoption {
|
||||
// T* ptr;
|
||||
// bool needsAdoption;
|
||||
//};
|
||||
|
||||
/**
|
||||
* Struct-like class to hold the results of a parsing routine.
|
||||
|
@ -25,17 +61,6 @@ class ParsedNumber;
|
|||
*/
|
||||
class ParsedNumber {
|
||||
public:
|
||||
enum ParsedNumberFlags {
|
||||
FLAG_NEGATIVE = 0x0001,
|
||||
FLAG_PERCENT = 0x0002,
|
||||
FLAG_PERMILLE = 0x0004,
|
||||
FLAG_HAS_EXPONENT = 0x0008,
|
||||
FLAG_HAS_DEFAULT_CURRENCY = 0x0010,
|
||||
FLAG_HAS_DECIMAL_SEPARATOR = 0x0020,
|
||||
FLAG_NAN = 0x0040,
|
||||
FLAG_INFINITY = 0x0080,
|
||||
FLAG_FAIL = 0x0100,
|
||||
};
|
||||
|
||||
/**
|
||||
* The numerical value that was parsed.
|
||||
|
@ -51,9 +76,9 @@ class ParsedNumber {
|
|||
int32_t charEnd;
|
||||
|
||||
/**
|
||||
* Boolean flags (see constants below).
|
||||
* Boolean flags (see constants above).
|
||||
*/
|
||||
int32_t flags;
|
||||
result_flags_t flags;
|
||||
|
||||
/**
|
||||
* The pattern string corresponding to the prefix that got consumed.
|
||||
|
@ -204,15 +229,17 @@ class NumberParseMatcher {
|
|||
* @return Whether this matcher thinks there may be more interesting chars beyond the end of the
|
||||
* string segment.
|
||||
*/
|
||||
virtual bool match(StringSegment& segment, ParsedNumber& result) const = 0;
|
||||
virtual bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const = 0;
|
||||
|
||||
/**
|
||||
* Should return a set representing all possible chars (UTF-16 code units) that could be the first
|
||||
* char that this matcher can consume. This method is only called during construction phase, and its
|
||||
* return value is used to skip this matcher unless a segment begins with a char in this set. To make
|
||||
* this matcher always run, return {@link UnicodeSet#ALL_CODE_POINTS}.
|
||||
*
|
||||
* The returned UnicodeSet needs adoption!
|
||||
*/
|
||||
virtual UnicodeSet getLeadCodePoints() const = 0;
|
||||
virtual const UnicodeSet* getLeadCodePoints() const = 0;
|
||||
|
||||
/**
|
||||
* Method called at the end of a parse, after all matchers have failed to consume any more chars.
|
||||
|
@ -222,7 +249,9 @@ class NumberParseMatcher {
|
|||
* @param result
|
||||
* The data structure to store results.
|
||||
*/
|
||||
virtual void postProcess(ParsedNumber& result) const = 0;
|
||||
virtual void postProcess(ParsedNumber&) const {
|
||||
// Default implementation: no-op
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
|
38
icu4c/source/i18n/numparse_utils.h
Normal file
38
icu4c/source/i18n/numparse_utils.h
Normal file
|
@ -0,0 +1,38 @@
|
|||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
#ifndef __NUMPARSE_UTILS_H__
|
||||
#define __NUMPARSE_UTILS_H__
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
U_NAMESPACE_BEGIN namespace numparse {
|
||||
namespace impl {
|
||||
namespace utils {
|
||||
|
||||
|
||||
inline static void putLeadCodePoints(const UnicodeSet* input, UnicodeSet* output) {
|
||||
for (int32_t i = 0; i < input->getRangeCount(); i++) {
|
||||
output->add(input->getRangeStart(i), input->getRangeEnd(i));
|
||||
}
|
||||
// TODO: ANDY: How to iterate over the strings in ICU4C UnicodeSet?
|
||||
}
|
||||
|
||||
inline static void putLeadCodePoint(const UnicodeString& input, UnicodeSet* output) {
|
||||
if (!input.isEmpty()) {
|
||||
output->add(input.char32At(0));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace utils
|
||||
} // namespace impl
|
||||
} // namespace numparse
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif //__NUMPARSE_UTILS_H__
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
|
@ -378,7 +378,18 @@ typedef enum UNumberDecimalSeparatorDisplay {
|
|||
UNUM_DECIMAL_SEPARATOR_COUNT
|
||||
} UNumberDecimalMarkDisplay;
|
||||
|
||||
U_NAMESPACE_BEGIN namespace number { // icu::number
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
namespace numparse {
|
||||
namespace impl {
|
||||
|
||||
// Forward declarations:
|
||||
class NumberParserImpl;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
namespace number { // icu::number
|
||||
|
||||
// Forward declarations:
|
||||
class UnlocalizedNumberFormatter;
|
||||
|
@ -1311,6 +1322,12 @@ class U_I18N_API Grouper : public UMemory {
|
|||
Grouper(int16_t grouping1, int16_t grouping2, int16_t minGrouping)
|
||||
: fGrouping1(grouping1), fGrouping2(grouping2), fMinGrouping(minGrouping) {}
|
||||
|
||||
/** @internal */
|
||||
int16_t getPrimary() const;
|
||||
|
||||
/** @internal */
|
||||
int16_t getSecondary() const;
|
||||
|
||||
private:
|
||||
/**
|
||||
* The grouping sizes, with the following special values:
|
||||
|
@ -1349,6 +1366,9 @@ class U_I18N_API Grouper : public UMemory {
|
|||
|
||||
// To allow NumberFormatterImpl to access isBogus() and perform other operations:
|
||||
friend class NumberFormatterImpl;
|
||||
|
||||
// To allow NumberParserImpl to perform setLocaleData():
|
||||
friend class ::icu::numparse::impl::NumberParserImpl;
|
||||
};
|
||||
|
||||
/** @internal */
|
||||
|
|
|
@ -64,7 +64,8 @@ scientificnumberformattertest.o datadrivennumberformattestsuite.o \
|
|||
numberformattesttuple.o numberformat2test.o pluralmaptest.o \
|
||||
numbertest_affixutils.o numbertest_api.o numbertest_decimalquantity.o \
|
||||
numbertest_modifiers.o numbertest_patternmodifier.o numbertest_patternstring.o \
|
||||
numbertest_stringbuilder.o numbertest_stringsegment.o numbertest_unisets.o
|
||||
numbertest_stringbuilder.o numbertest_stringsegment.o numbertest_unisets.o \
|
||||
numbertest_parse.o
|
||||
|
||||
DEPS = $(OBJECTS:.o=.d)
|
||||
|
||||
|
|
|
@ -207,6 +207,16 @@ class UniSetsTest : public IntlTest {
|
|||
const UnicodeSet& set, UChar32 cp);
|
||||
};
|
||||
|
||||
class NumberParserTest : public IntlTest {
|
||||
public:
|
||||
void testBasic();
|
||||
void testLocaleFi();
|
||||
void testSeriesMatcher();
|
||||
void testGroupingDisabled();
|
||||
|
||||
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
|
||||
};
|
||||
|
||||
|
||||
// NOTE: This macro is identical to the one in itformat.cpp
|
||||
#define TESTCLASS(id, TestClass) \
|
||||
|
@ -237,6 +247,7 @@ class NumberTest : public IntlTest {
|
|||
TESTCLASS(6, NumberStringBuilderTest);
|
||||
TESTCLASS(7, StringSegmentTest);
|
||||
TESTCLASS(8, UniSetsTest);
|
||||
TESTCLASS(9, NumberParserTest);
|
||||
default: name = ""; break; // needed to end loop
|
||||
}
|
||||
}
|
||||
|
|
144
icu4c/source/test/intltest/numbertest_parse.cpp
Normal file
144
icu4c/source/test/intltest/numbertest_parse.cpp
Normal file
|
@ -0,0 +1,144 @@
|
|||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT
|
||||
|
||||
#include "numbertest.h"
|
||||
#include "numparse_impl.h"
|
||||
#include "numparse_unisets.h"
|
||||
#include "unicode/dcfmtsym.h"
|
||||
#include "unicode/testlog.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
using icu::numparse::impl::unisets::get;
|
||||
|
||||
void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) {
|
||||
if (exec) {
|
||||
logln("TestSuite NumberParserTest: ");
|
||||
}
|
||||
TESTCASE_AUTO_BEGIN;
|
||||
TESTCASE_AUTO(testBasic);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
void NumberParserTest::testBasic() {
|
||||
IcuTestErrorCode status(*this, "testBasic");
|
||||
|
||||
static const struct TestCase {
|
||||
int32_t flags;
|
||||
const char16_t* inputString;
|
||||
const char16_t* patternString;
|
||||
int32_t expectedCharsConsumed;
|
||||
double expectedResultDouble;
|
||||
} cases[] = {{3, u"51423", u"0", 5, 51423.},
|
||||
{3, u"51423x", u"0", 5, 51423.},
|
||||
{3, u" 51423", u"0", 6, 51423.},
|
||||
{3, u"51423 ", u"0", 5, 51423.},
|
||||
{3, u"𝟱𝟭𝟰𝟮𝟯", u"0", 10, 51423.},
|
||||
{3, u"𝟱𝟭𝟰𝟮𝟯x", u"0", 10, 51423.},
|
||||
{3, u" 𝟱𝟭𝟰𝟮𝟯", u"0", 11, 51423.},
|
||||
{3, u"𝟱𝟭𝟰𝟮𝟯 ", u"0", 10, 51423.},
|
||||
{7, u"𝟱𝟭,𝟰𝟮𝟯", u"#,##,##0", 11, 51423.},
|
||||
{7, u"𝟳,𝟴𝟵,𝟱𝟭,𝟰𝟮𝟯", u"#,##,##0", 19, 78951423.},
|
||||
{7, u"𝟳𝟴,𝟵𝟱𝟭.𝟰𝟮𝟯", u"#,##,##0", 18, 78951.423},
|
||||
{7, u"𝟳𝟴,𝟬𝟬𝟬", u"#,##,##0", 11, 78000.},
|
||||
{7, u"𝟳𝟴,𝟬𝟬𝟬.𝟬𝟬𝟬", u"#,##,##0", 18, 78000.},
|
||||
{7, u"𝟳𝟴,𝟬𝟬𝟬.𝟬𝟮𝟯", u"#,##,##0", 18, 78000.023},
|
||||
{7, u"𝟳𝟴.𝟬𝟬𝟬.𝟬𝟮𝟯", u"#,##,##0", 11, 78.},
|
||||
{3, u"-𝟱𝟭𝟰𝟮𝟯", u"0", 11, -51423.},
|
||||
{3, u"-𝟱𝟭𝟰𝟮𝟯-", u"0", 11, -51423.},
|
||||
{3, u"a51423US dollars", u"a0¤¤¤", 16, 51423.},
|
||||
{3, u"a 51423 US dollars", u"a0¤¤¤", 18, 51423.},
|
||||
{3, u"514.23 USD", u"¤0", 10, 514.23},
|
||||
{3, u"514.23 GBP", u"¤0", 10, 514.23},
|
||||
{3, u"a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 14, 51423.},
|
||||
{3, u"-a 𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
|
||||
{3, u"a -𝟱𝟭𝟰𝟮𝟯 b", u"a0b", 15, -51423.},
|
||||
{3, u"𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 10, 51423.},
|
||||
{3, u"[𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, 51423.},
|
||||
{3, u"𝟱𝟭𝟰𝟮𝟯]", u"[0];(0)", 11, 51423.},
|
||||
{3, u"[𝟱𝟭𝟰𝟮𝟯]", u"[0];(0)", 12, 51423.},
|
||||
{3, u"(𝟱𝟭𝟰𝟮𝟯", u"[0];(0)", 11, -51423.},
|
||||
{3, u"𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 11, -51423.},
|
||||
{3, u"(𝟱𝟭𝟰𝟮𝟯)", u"[0];(0)", 12, -51423.},
|
||||
{3, u"𝟱𝟭𝟰𝟮𝟯", u"{0};{0}", 10, 51423.},
|
||||
{3, u"{𝟱𝟭𝟰𝟮𝟯", u"{0};{0}", 11, 51423.},
|
||||
{3, u"𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 11, 51423.},
|
||||
{3, u"{𝟱𝟭𝟰𝟮𝟯}", u"{0};{0}", 12, 51423.},
|
||||
{1, u"a40b", u"a0'0b'", 3, 40.}, // greedy code path thinks "40" is the number
|
||||
{2, u"a40b", u"a0'0b'", 4, 4.}, // slow code path finds the suffix "0b"
|
||||
{3, u"𝟱.𝟭𝟰𝟮E𝟯", u"0", 12, 5142.},
|
||||
{3, u"𝟱.𝟭𝟰𝟮E-𝟯", u"0", 13, 0.005142},
|
||||
{3, u"𝟱.𝟭𝟰𝟮e-𝟯", u"0", 13, 0.005142},
|
||||
{7, u"5,142.50 Canadian dollars", u"#,##,##0 ¤¤¤", 25, 5142.5},
|
||||
{3, u"a$ b5", u"a ¤ b0", 5, 5.0},
|
||||
{3, u"📺1.23", u"📺0;📻0", 6, 1.23},
|
||||
{3, u"📻1.23", u"📺0;📻0", 6, -1.23},
|
||||
{3, u".00", u"0", 3, 0.0},
|
||||
{3, u" 0", u"a0", 31, 0.0}, // should not hang
|
||||
{3, u"NaN", u"0", 3, NAN},
|
||||
{3, u"NaN E5", u"0", 3, NAN},
|
||||
{3, u"0", u"0", 1, 0.0}};
|
||||
|
||||
parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
|
||||
for (auto cas : cases) {
|
||||
UnicodeString inputString(cas.inputString);
|
||||
UnicodeString patternString(cas.patternString);
|
||||
const NumberParserImpl* parser = NumberParserImpl::createSimpleParser(
|
||||
Locale("en"), patternString, parseFlags, status);
|
||||
UnicodeString message =
|
||||
UnicodeString("Input <") + inputString + UnicodeString("> Parser ") + parser->toString();
|
||||
|
||||
if (0 != (cas.flags & 0x01)) {
|
||||
// Test greedy code path
|
||||
ParsedNumber resultObject;
|
||||
parser->parse(inputString, true, resultObject, status);
|
||||
assertTrue("Greedy Parse failed: " + message, resultObject.success());
|
||||
assertEquals(
|
||||
"Greedy Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd);
|
||||
assertEquals(
|
||||
"Greedy Parse failed: " + message,
|
||||
cas.expectedResultDouble,
|
||||
resultObject.getDouble());
|
||||
}
|
||||
|
||||
if (0 != (cas.flags & 0x02)) {
|
||||
// Test slow code path
|
||||
ParsedNumber resultObject;
|
||||
parser->parse(inputString, false, resultObject, status);
|
||||
assertTrue("Non-Greedy Parse failed: " + message, resultObject.success());
|
||||
assertEquals(
|
||||
"Non-Greedy Parse failed: " + message,
|
||||
cas.expectedCharsConsumed,
|
||||
resultObject.charEnd);
|
||||
assertEquals(
|
||||
"Non-Greedy Parse failed: " + message,
|
||||
cas.expectedResultDouble,
|
||||
resultObject.getDouble());
|
||||
}
|
||||
|
||||
if (0 != (cas.flags & 0x04)) {
|
||||
// Test with strict separators
|
||||
parser = NumberParserImpl::createSimpleParser(
|
||||
Locale("en"),
|
||||
patternString,
|
||||
parseFlags | PARSE_FLAG_STRICT_GROUPING_SIZE,
|
||||
status);
|
||||
ParsedNumber resultObject;
|
||||
parser->parse(inputString, true, resultObject, status);
|
||||
assertTrue("Strict Parse failed: " + message, resultObject.success());
|
||||
assertEquals(
|
||||
"Strict Parse failed: " + message, cas.expectedCharsConsumed, resultObject.charEnd);
|
||||
assertEquals(
|
||||
"Strict Parse failed: " + message,
|
||||
cas.expectedResultDouble,
|
||||
resultObject.getDouble());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif
|
|
@ -77,7 +77,7 @@ void NumberStringBuilderTest::testInsertAppendUnicodeString() {
|
|||
}
|
||||
|
||||
void NumberStringBuilderTest::testSplice() {
|
||||
const struct TestCase {
|
||||
static const struct TestCase {
|
||||
const char16_t* input;
|
||||
const int32_t startThis;
|
||||
const int32_t endThis;
|
||||
|
|
|
@ -9,9 +9,6 @@
|
|||
#include "numparse_unisets.h"
|
||||
#include "unicode/dcfmtsym.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <cstr.h>
|
||||
|
||||
using icu::numparse::impl::unisets::get;
|
||||
|
||||
void UniSetsTest::runIndexedTest(int32_t index, UBool exec, const char*&name, char*) {
|
||||
|
|
|
@ -27,9 +27,6 @@ public class DecimalMatcher implements NumberParseMatcher {
|
|||
/** If true, do not accept numbers in the fraction */
|
||||
private final boolean integerOnly;
|
||||
|
||||
/** If true, save the result as an exponent instead of a quantity in the ParsedNumber */
|
||||
private final boolean isScientific;
|
||||
|
||||
private final int grouping1;
|
||||
private final int grouping2;
|
||||
|
||||
|
@ -97,20 +94,28 @@ public class DecimalMatcher implements NumberParseMatcher {
|
|||
fractionGroupingDisabled = 0 != (parseFlags
|
||||
& ParsingUtils.PARSE_FLAG_FRACTION_GROUPING_DISABLED);
|
||||
integerOnly = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_INTEGER_ONLY);
|
||||
isScientific = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_DECIMAL_SCIENTIFIC);
|
||||
grouping1 = grouper.getPrimary();
|
||||
grouping2 = grouper.getSecondary();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean match(StringSegment segment, ParsedNumber result) {
|
||||
return match(segment, result, false);
|
||||
return match(segment, result, 0);
|
||||
}
|
||||
|
||||
public boolean match(StringSegment segment, ParsedNumber result, boolean negativeExponent) {
|
||||
if (result.seenNumber() && !isScientific) {
|
||||
/**
|
||||
* @param exponentSign
|
||||
* -1 means a negative exponent; +1 means a positive exponent; 0 means NO exponent. If -1
|
||||
* or +1, the number will be saved by scaling the pre-existing DecimalQuantity in the
|
||||
* ParsedNumber. If 0, a new DecimalQuantity will be created to store the number.
|
||||
*/
|
||||
public boolean match(StringSegment segment, ParsedNumber result, int exponentSign) {
|
||||
if (result.seenNumber() && exponentSign == 0) {
|
||||
// A number has already been consumed.
|
||||
return false;
|
||||
} else if (exponentSign != 0) {
|
||||
// scientific notation always comes after the number
|
||||
assert result.quantity != null;
|
||||
}
|
||||
|
||||
ParsedNumber backupResult = null;
|
||||
|
@ -181,7 +186,7 @@ public class DecimalMatcher implements NumberParseMatcher {
|
|||
}
|
||||
|
||||
// Save the digit in the DecimalQuantity or scientific adjustment.
|
||||
if (isScientific) {
|
||||
if (exponentSign != 0) {
|
||||
int nextExponent = digit + exponent * 10;
|
||||
if (nextExponent < exponent) {
|
||||
// Overflow
|
||||
|
@ -272,11 +277,6 @@ public class DecimalMatcher implements NumberParseMatcher {
|
|||
break;
|
||||
}
|
||||
|
||||
// if (backupOffset != -1) {
|
||||
// segment.setOffset(backupOffset);
|
||||
// hasPartialPrefix = true;
|
||||
// }
|
||||
|
||||
// Check the final grouping for validity
|
||||
if (requireGroupingMatch
|
||||
&& !seenDecimal
|
||||
|
@ -303,18 +303,17 @@ public class DecimalMatcher implements NumberParseMatcher {
|
|||
result.quantity.adjustMagnitude(-digitsAfterDecimal);
|
||||
}
|
||||
|
||||
if (isScientific && segment.getOffset() != initialOffset) {
|
||||
assert result.quantity != null; // scientific notation always comes after the number
|
||||
if (exponentSign != 0 && segment.getOffset() != initialOffset) {
|
||||
boolean overflow = (exponent == Integer.MAX_VALUE);
|
||||
if (!overflow) {
|
||||
try {
|
||||
result.quantity.adjustMagnitude(negativeExponent ? -exponent : exponent);
|
||||
result.quantity.adjustMagnitude(exponentSign * exponent);
|
||||
} catch (ArithmeticException e) {
|
||||
overflow = true;
|
||||
}
|
||||
}
|
||||
if (overflow) {
|
||||
if (negativeExponent) {
|
||||
if (exponentSign == -1) {
|
||||
// Set to zero
|
||||
result.quantity.clear();
|
||||
} else {
|
||||
|
|
|
@ -266,28 +266,27 @@ public class NumberParserImpl {
|
|||
|
||||
private final int parseFlags;
|
||||
private final List<NumberParseMatcher> matchers;
|
||||
private final List<UnicodeSet> leadCodePointses;
|
||||
private final List<UnicodeSet> leads;
|
||||
private Comparator<ParsedNumber> comparator;
|
||||
private boolean frozen;
|
||||
|
||||
/**
|
||||
* Creates a new, empty parser.
|
||||
*
|
||||
* @param ignoreCase
|
||||
* If true, perform case-folding. This parameter needs to go into the constructor because
|
||||
* its value is used during the construction of the matcher chain.
|
||||
* @param optimize
|
||||
* @param parseFlags
|
||||
* Settings for constructing the parser.
|
||||
* @param computeLeads
|
||||
* If true, compute "lead chars" UnicodeSets for the matchers. This reduces parsing
|
||||
* runtime but increases construction runtime. If the parser is going to be used only once
|
||||
* or twice, set this to false; if it is going to be used hundreds of times, set it to
|
||||
* true.
|
||||
*/
|
||||
public NumberParserImpl(int parseFlags, boolean optimize) {
|
||||
public NumberParserImpl(int parseFlags, boolean computeLeads) {
|
||||
matchers = new ArrayList<NumberParseMatcher>();
|
||||
if (optimize) {
|
||||
leadCodePointses = new ArrayList<UnicodeSet>();
|
||||
if (computeLeads) {
|
||||
leads = new ArrayList<UnicodeSet>();
|
||||
} else {
|
||||
leadCodePointses = null;
|
||||
leads = null;
|
||||
}
|
||||
comparator = ParsedNumber.COMPARATOR; // default value
|
||||
this.parseFlags = parseFlags;
|
||||
|
@ -297,21 +296,21 @@ public class NumberParserImpl {
|
|||
public void addMatcher(NumberParseMatcher matcher) {
|
||||
assert !frozen;
|
||||
this.matchers.add(matcher);
|
||||
if (leadCodePointses != null) {
|
||||
if (leads != null) {
|
||||
UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
|
||||
assert leadCodePoints.isFrozen();
|
||||
this.leadCodePointses.add(leadCodePoints);
|
||||
this.leads.add(leadCodePoints);
|
||||
}
|
||||
}
|
||||
|
||||
public void addMatchers(Collection<? extends NumberParseMatcher> matchers) {
|
||||
assert !frozen;
|
||||
this.matchers.addAll(matchers);
|
||||
if (leadCodePointses != null) {
|
||||
if (leads != null) {
|
||||
for (NumberParseMatcher matcher : matchers) {
|
||||
UnicodeSet leadCodePoints = matcher.getLeadCodePoints();
|
||||
assert leadCodePoints.isFrozen();
|
||||
this.leadCodePointses.add(leadCodePoints);
|
||||
this.leads.add(leadCodePoints);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -366,7 +365,7 @@ public class NumberParserImpl {
|
|||
int initialOffset = segment.getOffset();
|
||||
int leadCp = segment.getCodePoint();
|
||||
for (int i = 0; i < matchers.size(); i++) {
|
||||
if (leadCodePointses != null && !leadCodePointses.get(i).contains(leadCp)) {
|
||||
if (leads != null && !leads.get(i).contains(leadCp)) {
|
||||
continue;
|
||||
}
|
||||
NumberParseMatcher matcher = matchers.get(i);
|
||||
|
|
|
@ -17,12 +17,11 @@ public class ParsingUtils {
|
|||
public static final int PARSE_FLAG_STRICT_GROUPING_SIZE = 0x0008;
|
||||
public static final int PARSE_FLAG_INTEGER_ONLY = 0x0010;
|
||||
public static final int PARSE_FLAG_GROUPING_DISABLED = 0x0020;
|
||||
public static final int PARSE_FLAG_DECIMAL_SCIENTIFIC = 0x0040;
|
||||
public static final int PARSE_FLAG_FRACTION_GROUPING_DISABLED = 0x0040;
|
||||
public static final int PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES = 0x0080;
|
||||
public static final int PARSE_FLAG_USE_FULL_AFFIXES = 0x0100;
|
||||
public static final int PARSE_FLAG_EXACT_AFFIX = 0x0200;
|
||||
public static final int PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400;
|
||||
public static final int PARSE_FLAG_FRACTION_GROUPING_DISABLED = 0x0800;
|
||||
|
||||
public static void putLeadCodePoints(UnicodeSet input, UnicodeSet output) {
|
||||
for (EntryRange range : input.ranges()) {
|
||||
|
|
|
@ -27,13 +27,15 @@ public class ScientificMatcher implements NumberParseMatcher {
|
|||
exponentSeparatorString = ParsingUtils.maybeFold(symbols.getExponentSeparator(), parseFlags);
|
||||
exponentMatcher = DecimalMatcher.getInstance(symbols,
|
||||
grouper,
|
||||
ParsingUtils.PARSE_FLAG_DECIMAL_SCIENTIFIC | ParsingUtils.PARSE_FLAG_INTEGER_ONLY);
|
||||
ParsingUtils.PARSE_FLAG_INTEGER_ONLY);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean match(StringSegment segment, ParsedNumber result) {
|
||||
// Only accept scientific notation after the mantissa.
|
||||
if (!result.seenNumber()) {
|
||||
// Most places use result.hasNumber(), but we need a stronger condition here (i.e., exponent is
|
||||
// not well-defined after NaN or infinity).
|
||||
if (result.quantity == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -54,16 +56,16 @@ public class ScientificMatcher implements NumberParseMatcher {
|
|||
}
|
||||
|
||||
// Allow a sign, and then try to match digits.
|
||||
boolean minusSign = false;
|
||||
int exponentSign = 1;
|
||||
if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.MINUS_SIGN).contains(leadCp)) {
|
||||
minusSign = true;
|
||||
exponentSign = -1;
|
||||
segment.adjustOffset(Character.charCount(leadCp));
|
||||
} else if (UnicodeSetStaticCache.get(UnicodeSetStaticCache.Key.PLUS_SIGN).contains(leadCp)) {
|
||||
segment.adjustOffset(Character.charCount(leadCp));
|
||||
}
|
||||
|
||||
int digitsOffset = segment.getOffset();
|
||||
boolean digitsReturnValue = exponentMatcher.match(segment, result, minusSign);
|
||||
boolean digitsReturnValue = exponentMatcher.match(segment, result, exponentSign);
|
||||
if (segment.getOffset() != digitsOffset) {
|
||||
// At least one exponent digit was matched.
|
||||
result.flags |= ParsedNumber.FLAG_HAS_EXPONENT;
|
||||
|
|
|
@ -1654,13 +1654,13 @@ public class DecimalFormatSymbols implements Cloneable, Serializable {
|
|||
* represents a sequence of ten code points in order.
|
||||
*
|
||||
* <p>If the value stored here is positive, it means that the code point stored in this value
|
||||
* corresponds to the digitStrings array, and zeroCodePoint can be used instead of the
|
||||
* corresponds to the digitStrings array, and codePointZero can be used instead of the
|
||||
* digitStrings array for the purposes of efficient formatting; if -1, then digitStrings does
|
||||
* *not* contain a sequence of code points, and it must be used directly.
|
||||
*
|
||||
* <p>It is assumed that zeroCodePoint always shadows the value in digitStrings. zeroCodePoint
|
||||
* <p>It is assumed that codePointZero always shadows the value in digitStrings. codePointZero
|
||||
* should never be set directly; rather, it should be updated only when digitStrings mutates.
|
||||
* That is, the flow of information is digitStrings -> zeroCodePoint, not the other way.
|
||||
* That is, the flow of information is digitStrings -> codePointZero, not the other way.
|
||||
*/
|
||||
private transient int codePointZero;
|
||||
|
||||
|
|
|
@ -296,6 +296,9 @@ public class IntlTestDecimalFormatSymbols extends TestFmwk
|
|||
final String[] differentDigitStrings = {"0", "b", "3", "d", "5", "ff", "7", "h", "9", "j"};
|
||||
|
||||
DecimalFormatSymbols symbols = new DecimalFormatSymbols(Locale.ENGLISH);
|
||||
if (defZero != symbols.getCodePointZero()) {
|
||||
errln("ERROR: Code point zero initialize to ASCII 0");
|
||||
}
|
||||
|
||||
symbols.setDigitStrings(osmanyaDigitStrings);
|
||||
if (!Arrays.equals(symbols.getDigitStrings(), osmanyaDigitStrings)) {
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
package com.ibm.icu.dev.test.number;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import org.junit.Test;
|
||||
|
@ -86,30 +85,32 @@ public class NumberParserTest {
|
|||
{ 3, "📻1.23", "📺0;📻0", 6, -1.23 },
|
||||
{ 3, ".00", "0", 3, 0.0 },
|
||||
{ 3, " 0", "a0", 31, 0.0 }, // should not hang
|
||||
{ 3, "NaN", "0", 3, Double.NaN },
|
||||
{ 3, "NaN E5", "0", 3, Double.NaN },
|
||||
{ 3, "0", "0", 1, 0.0 } };
|
||||
|
||||
int parseFlags = ParsingUtils.PARSE_FLAG_IGNORE_CASE
|
||||
| ParsingUtils.PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
|
||||
for (Object[] cas : cases) {
|
||||
int flags = (Integer) cas[0];
|
||||
String input = (String) cas[1];
|
||||
String pattern = (String) cas[2];
|
||||
String inputString = (String) cas[1];
|
||||
String patternString = (String) cas[2];
|
||||
int expectedCharsConsumed = (Integer) cas[3];
|
||||
double resultDouble = (Double) cas[4];
|
||||
double expectedResultDouble = (Double) cas[4];
|
||||
NumberParserImpl parser = NumberParserImpl
|
||||
.createSimpleParser(ULocale.ENGLISH, pattern, parseFlags);
|
||||
String message = "Input <" + input + "> Parser " + parser;
|
||||
.createSimpleParser(ULocale.ENGLISH, patternString, parseFlags);
|
||||
String message = "Input <" + inputString + "> Parser " + parser;
|
||||
|
||||
if (0 != (flags & 0x01)) {
|
||||
// Test greedy code path
|
||||
ParsedNumber resultObject = new ParsedNumber();
|
||||
parser.parse(input, true, resultObject);
|
||||
assertNotNull("Greedy Parse failed: " + message, resultObject.quantity);
|
||||
parser.parse(inputString, true, resultObject);
|
||||
assertTrue("Greedy Parse failed: " + message, resultObject.success());
|
||||
assertEquals("Greedy Parse failed: " + message,
|
||||
expectedCharsConsumed,
|
||||
resultObject.charEnd);
|
||||
assertEquals("Greedy Parse failed: " + message,
|
||||
resultDouble,
|
||||
expectedResultDouble,
|
||||
resultObject.getNumber().doubleValue(),
|
||||
0.0);
|
||||
}
|
||||
|
@ -117,13 +118,13 @@ public class NumberParserTest {
|
|||
if (0 != (flags & 0x02)) {
|
||||
// Test slow code path
|
||||
ParsedNumber resultObject = new ParsedNumber();
|
||||
parser.parse(input, false, resultObject);
|
||||
assertNotNull("Non-Greedy Parse failed: " + message, resultObject.quantity);
|
||||
parser.parse(inputString, false, resultObject);
|
||||
assertTrue("Non-Greedy Parse failed: " + message, resultObject.success());
|
||||
assertEquals("Non-Greedy Parse failed: " + message,
|
||||
expectedCharsConsumed,
|
||||
resultObject.charEnd);
|
||||
assertEquals("Non-Greedy Parse failed: " + message,
|
||||
resultDouble,
|
||||
expectedResultDouble,
|
||||
resultObject.getNumber().doubleValue(),
|
||||
0.0);
|
||||
}
|
||||
|
@ -131,16 +132,16 @@ public class NumberParserTest {
|
|||
if (0 != (flags & 0x04)) {
|
||||
// Test with strict separators
|
||||
parser = NumberParserImpl.createSimpleParser(ULocale.ENGLISH,
|
||||
pattern,
|
||||
patternString,
|
||||
parseFlags | ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE);
|
||||
ParsedNumber resultObject = new ParsedNumber();
|
||||
parser.parse(input, true, resultObject);
|
||||
assertNotNull("Strict Parse failed: " + message, resultObject.quantity);
|
||||
parser.parse(inputString, true, resultObject);
|
||||
assertTrue("Strict Parse failed: " + message, resultObject.success());
|
||||
assertEquals("Strict Parse failed: " + message,
|
||||
expectedCharsConsumed,
|
||||
resultObject.charEnd);
|
||||
assertEquals("Strict Parse failed: " + message,
|
||||
resultDouble,
|
||||
expectedResultDouble,
|
||||
resultObject.getNumber().doubleValue(),
|
||||
0.0);
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue