mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-13783 Re-writing grouping parsing logic. Same behavior but passes more corner cases.
X-SVN-Rev: 41427
This commit is contained in:
parent
ebca759ea1
commit
33a0fa7172
13 changed files with 610 additions and 365 deletions
|
@ -228,6 +228,9 @@ bool DecimalQuantity::adjustMagnitude(int32_t delta) {
|
|||
// i.e., scale += delta; origDelta += delta
|
||||
bool overflow = uprv_add32_overflow(scale, delta, &scale);
|
||||
overflow = uprv_add32_overflow(origDelta, delta, &origDelta) || overflow;
|
||||
// Make sure that precision + scale won't overflow, either
|
||||
int32_t dummy;
|
||||
overflow = overflow || uprv_add32_overflow(scale, precision, &dummy);
|
||||
return overflow;
|
||||
}
|
||||
return false;
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "numparse_utils.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "putilimp.h"
|
||||
#include "number_decimalquantity.h"
|
||||
|
||||
using namespace icu;
|
||||
using namespace icu::numparse;
|
||||
|
@ -79,11 +80,13 @@ DecimalMatcher::DecimalMatcher(const DecimalFormatSymbols& symbols, const Groupe
|
|||
|
||||
requireGroupingMatch = 0 != (parseFlags & PARSE_FLAG_STRICT_GROUPING_SIZE);
|
||||
groupingDisabled = 0 != (parseFlags & PARSE_FLAG_GROUPING_DISABLED);
|
||||
fractionGroupingDisabled = 0 != (
|
||||
parseFlags & PARSE_FLAG_FRACTION_GROUPING_DISABLED);
|
||||
integerOnly = 0 != (parseFlags & PARSE_FLAG_INTEGER_ONLY);
|
||||
grouping1 = grouper.getPrimary();
|
||||
grouping2 = grouper.getSecondary();
|
||||
|
||||
// Fraction grouping parsing is disabled for now but could be enabled later.
|
||||
// See http://bugs.icu-project.org/trac/ticket/10794
|
||||
// fractionGrouping = 0 != (parseFlags & PARSE_FLAG_FRACTION_GROUPING_ENABLED);
|
||||
}
|
||||
|
||||
bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
|
||||
|
@ -100,30 +103,55 @@ bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t
|
|||
U_ASSERT(!result.quantity.bogus);
|
||||
}
|
||||
|
||||
ParsedNumber backupResult(result);
|
||||
|
||||
// strict parsing
|
||||
bool strictFail = false; // did we exit with a strict parse failure?
|
||||
UnicodeString actualGroupingString = groupingSeparator;
|
||||
UnicodeString actualDecimalString = decimalSeparator;
|
||||
int32_t groupedDigitCount = 0; // tracking count of digits delimited by grouping separator
|
||||
int32_t backupOffset = -1; // used for preserving the last confirmed position
|
||||
int32_t smallGroupBackupOffset = -1; // used to back up behind groups of size 1
|
||||
bool afterFirstGrouping = false;
|
||||
bool seenGrouping = false;
|
||||
bool seenDecimal = false;
|
||||
int32_t digitsAfterDecimal = 0;
|
||||
// Initial offset before any character consumption.
|
||||
int32_t initialOffset = segment.getOffset();
|
||||
int32_t exponent = 0;
|
||||
bool hasPartialPrefix = false;
|
||||
|
||||
// Return value: whether to ask for more characters.
|
||||
bool maybeMore = false;
|
||||
|
||||
// All digits consumed so far.
|
||||
number::impl::DecimalQuantity digitsConsumed;
|
||||
digitsConsumed.bogus = true;
|
||||
|
||||
// The total number of digits after the decimal place, used for scaling the result.
|
||||
int32_t digitsAfterDecimalPlace = 0;
|
||||
|
||||
// The actual grouping and decimal separators used in the string.
|
||||
// If non-null, we have seen that token.
|
||||
UnicodeString actualGroupingString;
|
||||
UnicodeString actualDecimalString;
|
||||
actualGroupingString.setToBogus();
|
||||
actualDecimalString.setToBogus();
|
||||
|
||||
// Information for two groups: the previous group and the current group.
|
||||
//
|
||||
// Each group has three pieces of information:
|
||||
//
|
||||
// Offset: the string position of the beginning of the group, including a leading separator
|
||||
// if there was a leading separator. This is needed in case we need to rewind the parse to
|
||||
// that position.
|
||||
//
|
||||
// Separator type:
|
||||
// 0 => beginning of string
|
||||
// 1 => lead separator is a grouping separator
|
||||
// 2 => lead separator is a decimal separator
|
||||
//
|
||||
// Count: the number of digits in the group. If -1, the group has been validated.
|
||||
int32_t currGroupOffset = 0;
|
||||
int32_t currGroupSepType = 0;
|
||||
int32_t currGroupCount = 0;
|
||||
int32_t prevGroupOffset = -1;
|
||||
int32_t prevGroupSepType = -1;
|
||||
int32_t prevGroupCount = -1;
|
||||
|
||||
while (segment.length() > 0) {
|
||||
hasPartialPrefix = false;
|
||||
maybeMore = false;
|
||||
|
||||
// Attempt to match a digit.
|
||||
int8_t digit = -1;
|
||||
|
||||
// Try by code point digit value.
|
||||
int cp = segment.getCodePoint();
|
||||
UChar32 cp = segment.getCodePoint();
|
||||
if (u_isdigit(cp)) {
|
||||
segment.adjustOffset(U16_LENGTH(cp));
|
||||
digit = static_cast<int8_t>(u_digit(cp, 10));
|
||||
|
@ -138,188 +166,207 @@ bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t
|
|||
segment.adjustOffset(overlap);
|
||||
digit = static_cast<int8_t>(i);
|
||||
break;
|
||||
} else if (overlap == segment.length()) {
|
||||
hasPartialPrefix = true;
|
||||
}
|
||||
maybeMore = maybeMore || (overlap == segment.length());
|
||||
}
|
||||
}
|
||||
|
||||
if (digit >= 0) {
|
||||
// Digit was found.
|
||||
// Check for grouping size violation
|
||||
if (backupOffset != -1) {
|
||||
smallGroupBackupOffset = backupOffset;
|
||||
backupOffset = -1;
|
||||
if (requireGroupingMatch) {
|
||||
// comma followed by digit, so group before comma is a secondary
|
||||
// group. If there was a group separator before that, the group
|
||||
// must == the secondary group length, else it can be <= the the
|
||||
// secondary group length.
|
||||
if ((afterFirstGrouping && groupedDigitCount != grouping2) ||
|
||||
(!afterFirstGrouping && groupedDigitCount > grouping2)) {
|
||||
strictFail = true;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// #11230: don't accept groups after the first with only 1 digit.
|
||||
// The logic to back up and remove the lone digit is lower down.
|
||||
if (afterFirstGrouping && groupedDigitCount == 1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
afterFirstGrouping = true;
|
||||
groupedDigitCount = 0;
|
||||
if (digitsConsumed.bogus) {
|
||||
digitsConsumed.bogus = false;
|
||||
digitsConsumed.clear();
|
||||
}
|
||||
|
||||
// Save the digit in the DecimalQuantity or scientific adjustment.
|
||||
if (exponentSign != 0) {
|
||||
int32_t nextExponent;
|
||||
// i.e., nextExponent = exponent * 10 + digit
|
||||
UBool overflow = uprv_mul32_overflow(exponent, 10, &nextExponent) ||
|
||||
uprv_add32_overflow(nextExponent, digit, &nextExponent);
|
||||
if (overflow) {
|
||||
exponent = INT32_MAX;
|
||||
} else {
|
||||
exponent = nextExponent;
|
||||
}
|
||||
} else {
|
||||
if (result.quantity.bogus) {
|
||||
result.quantity.bogus = false;
|
||||
}
|
||||
result.quantity.appendDigit(digit, 0, true);
|
||||
}
|
||||
result.setCharsConsumed(segment);
|
||||
groupedDigitCount++;
|
||||
if (seenDecimal) {
|
||||
digitsAfterDecimal++;
|
||||
digitsConsumed.appendDigit(digit, 0, true);
|
||||
currGroupCount++;
|
||||
if (!actualDecimalString.isBogus()) {
|
||||
digitsAfterDecimalPlace++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Attempt to match a literal grouping or decimal separator
|
||||
int32_t decimalOverlap = segment.getCommonPrefixLength(actualDecimalString);
|
||||
bool decimalStringMatch = decimalOverlap == actualDecimalString.length();
|
||||
int32_t groupingOverlap = segment.getCommonPrefixLength(actualGroupingString);
|
||||
bool groupingStringMatch = groupingOverlap == actualGroupingString.length();
|
||||
// Attempt to match a literal grouping or decimal separator.
|
||||
bool isDecimal = false;
|
||||
bool isGrouping = false;
|
||||
|
||||
hasPartialPrefix = (decimalOverlap == segment.length()) || (groupingOverlap == segment.length());
|
||||
|
||||
if (!seenDecimal && !groupingStringMatch &&
|
||||
(decimalStringMatch || (!seenDecimal && decimalUniSet->contains(cp)))) {
|
||||
// matched a decimal separator
|
||||
if (requireGroupingMatch) {
|
||||
if (backupOffset != -1 || (seenGrouping && groupedDigitCount != grouping1)) {
|
||||
strictFail = true;
|
||||
break;
|
||||
}
|
||||
// 1) Attempt the decimal separator string literal.
|
||||
// if (we have not seen a decimal separator yet) { ... }
|
||||
if (actualDecimalString.isBogus()) {
|
||||
int overlap = segment.getCommonPrefixLength(decimalSeparator);
|
||||
maybeMore = maybeMore || (overlap == segment.length());
|
||||
if (overlap == decimalSeparator.length()) {
|
||||
isDecimal = true;
|
||||
actualDecimalString = decimalSeparator;
|
||||
}
|
||||
}
|
||||
|
||||
// If we're only parsing integers, then don't parse this one.
|
||||
if (integerOnly) {
|
||||
break;
|
||||
// 2) Attempt to match the actual grouping string literal.
|
||||
if (!actualGroupingString.isBogus()) {
|
||||
int overlap = segment.getCommonPrefixLength(actualGroupingString);
|
||||
maybeMore = maybeMore || (overlap == segment.length());
|
||||
if (overlap == actualGroupingString.length()) {
|
||||
isGrouping = true;
|
||||
}
|
||||
}
|
||||
|
||||
seenDecimal = true;
|
||||
if (!decimalStringMatch) {
|
||||
// 2.5) Attempt to match a new the grouping separator string literal.
|
||||
// if (we have not seen a grouping or decimal separator yet) { ... }
|
||||
if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus()) {
|
||||
int overlap = segment.getCommonPrefixLength(groupingSeparator);
|
||||
maybeMore = maybeMore || (overlap == segment.length());
|
||||
if (overlap == groupingSeparator.length()) {
|
||||
isGrouping = true;
|
||||
actualGroupingString = groupingSeparator;
|
||||
}
|
||||
}
|
||||
|
||||
// 3) Attempt to match a decimal separator from the equivalence set.
|
||||
// if (we have not seen a decimal separator yet) { ... }
|
||||
// The !isGrouping is to confirm that we haven't yet matched the current character.
|
||||
if (!isGrouping && actualDecimalString.isBogus()) {
|
||||
if (decimalUniSet->contains(cp)) {
|
||||
isDecimal = true;
|
||||
actualDecimalString = UnicodeString(cp);
|
||||
}
|
||||
segment.adjustOffset(actualDecimalString.length());
|
||||
result.setCharsConsumed(segment);
|
||||
result.flags |= FLAG_HAS_DECIMAL_SEPARATOR;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!groupingDisabled && !decimalStringMatch &&
|
||||
(groupingStringMatch || (!seenGrouping && groupingUniSet->contains(cp)))) {
|
||||
// matched a grouping separator
|
||||
if (requireGroupingMatch) {
|
||||
if (groupedDigitCount == 0) {
|
||||
// leading group
|
||||
strictFail = true;
|
||||
break;
|
||||
} else if (backupOffset != -1) {
|
||||
// two group separators in a row
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (fractionGroupingDisabled && seenDecimal) {
|
||||
// Stop parsing here.
|
||||
break;
|
||||
}
|
||||
|
||||
seenGrouping = true;
|
||||
if (!groupingStringMatch) {
|
||||
// 4) Attempt to match a grouping separator from the equivalence set.
|
||||
// if (we have not seen a grouping or decimal separator yet) { ... }
|
||||
if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus()) {
|
||||
if (groupingUniSet->contains(cp)) {
|
||||
isGrouping = true;
|
||||
actualGroupingString = UnicodeString(cp);
|
||||
}
|
||||
backupOffset = segment.getOffset();
|
||||
segment.adjustOffset(actualGroupingString.length());
|
||||
// Note: do NOT set charsConsumed
|
||||
continue;
|
||||
}
|
||||
|
||||
// Not a digit and not a separator
|
||||
break;
|
||||
}
|
||||
|
||||
// Back up if there was a trailing grouping separator
|
||||
if (backupOffset != -1) {
|
||||
segment.setOffset(backupOffset);
|
||||
hasPartialPrefix = true; // redundant with `groupingOverlap == segment.length()`
|
||||
}
|
||||
|
||||
// Check the final grouping for validity
|
||||
if (requireGroupingMatch && !seenDecimal && seenGrouping && afterFirstGrouping &&
|
||||
groupedDigitCount != grouping1) {
|
||||
strictFail = true;
|
||||
}
|
||||
|
||||
// #11230: don't accept groups after the first with only 1 digit.
|
||||
// Behavior in this case is to back up before that 1-digit group.
|
||||
if (!seenDecimal && afterFirstGrouping && groupedDigitCount == 1) {
|
||||
if (segment.length() == 0) {
|
||||
// Strings like "9,999" where we looked at only the first 3 chars.
|
||||
// Ask for a longer segment.
|
||||
hasPartialPrefix = true;
|
||||
// Leave if we failed to match this as a separator.
|
||||
if (!isDecimal && !isGrouping) {
|
||||
break;
|
||||
}
|
||||
segment.setOffset(smallGroupBackupOffset);
|
||||
result.setCharsConsumed(segment);
|
||||
if (smallGroupBackupOffset == initialOffset) {
|
||||
// Strings like ",9"
|
||||
// Reset to no quantity seen.
|
||||
result.quantity.clear();
|
||||
result.quantity.bogus = true;
|
||||
|
||||
// Check for conditions when we don't want to accept the separator.
|
||||
if (isDecimal && integerOnly) {
|
||||
break;
|
||||
} else if (currGroupSepType == 2 && isGrouping) {
|
||||
// Fraction grouping
|
||||
break;
|
||||
}
|
||||
|
||||
// Validate intermediate grouping sizes.
|
||||
bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false);
|
||||
bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true);
|
||||
if (!prevValidSecondary || (isDecimal && !currValidPrimary)) {
|
||||
// Invalid grouping sizes.
|
||||
if (isGrouping && currGroupCount == 0) {
|
||||
// Trailing grouping separators: these are taken care of below
|
||||
U_ASSERT(currGroupSepType == 1);
|
||||
} else if (requireGroupingMatch) {
|
||||
// Strict mode: reject the parse
|
||||
digitsConsumed.clear();
|
||||
digitsConsumed.bogus = true;
|
||||
}
|
||||
break;
|
||||
} else if (requireGroupingMatch && currGroupCount == 0 && currGroupSepType == 1) {
|
||||
break;
|
||||
} else {
|
||||
// Strings like "9,9"
|
||||
// Remove the lone digit from the result quantity.
|
||||
U_ASSERT(!result.quantity.bogus);
|
||||
result.quantity.adjustMagnitude(-1);
|
||||
result.quantity.truncate();
|
||||
// Grouping sizes OK so far.
|
||||
prevGroupOffset = currGroupOffset;
|
||||
prevGroupCount = currGroupCount;
|
||||
if (isDecimal) {
|
||||
// Do not validate this group any more.
|
||||
prevGroupSepType = -1;
|
||||
} else {
|
||||
prevGroupSepType = currGroupSepType;
|
||||
}
|
||||
}
|
||||
|
||||
// OK to accept the separator.
|
||||
// Special case: don't update currGroup if it is empty; this allows two grouping
|
||||
// separators in a row in lenient mode.
|
||||
if (currGroupCount != 0) {
|
||||
currGroupOffset = segment.getOffset();
|
||||
}
|
||||
currGroupSepType = isGrouping ? 1 : 2;
|
||||
currGroupCount = 0;
|
||||
if (isGrouping) {
|
||||
segment.adjustOffset(actualGroupingString.length());
|
||||
} else {
|
||||
segment.adjustOffset(actualDecimalString.length());
|
||||
}
|
||||
}
|
||||
|
||||
if (requireGroupingMatch && strictFail) {
|
||||
result = backupResult;
|
||||
// End of main loop.
|
||||
// Back up if there was a trailing grouping separator.
|
||||
// Shift prev -> curr so we can check it as a final group.
|
||||
if (currGroupSepType != 2 && currGroupCount == 0) {
|
||||
maybeMore = true;
|
||||
segment.setOffset(currGroupOffset);
|
||||
currGroupOffset = prevGroupOffset;
|
||||
currGroupSepType = prevGroupSepType;
|
||||
currGroupCount = prevGroupCount;
|
||||
prevGroupOffset = -1;
|
||||
prevGroupSepType = 0;
|
||||
prevGroupCount = 1;
|
||||
}
|
||||
|
||||
// Validate final grouping sizes.
|
||||
bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false);
|
||||
bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true);
|
||||
if (!requireGroupingMatch) {
|
||||
// The cases we need to handle here are lone digits.
|
||||
// Examples: "1,1" "1,1," "1,1,1" "1,1,1," ",1" (all parse as 1)
|
||||
// See more examples in numberformattestspecification.txt
|
||||
int digitsToRemove = 0;
|
||||
if (!prevValidSecondary) {
|
||||
segment.setOffset(prevGroupOffset);
|
||||
digitsToRemove += prevGroupCount;
|
||||
digitsToRemove += currGroupCount;
|
||||
} else if (!currValidPrimary && (prevGroupSepType != 0 || prevGroupCount != 0)) {
|
||||
maybeMore = true;
|
||||
segment.setOffset(currGroupOffset);
|
||||
digitsToRemove += currGroupCount;
|
||||
}
|
||||
if (digitsToRemove != 0) {
|
||||
digitsConsumed.adjustMagnitude(-digitsToRemove);
|
||||
digitsConsumed.truncate();
|
||||
}
|
||||
prevValidSecondary = true;
|
||||
currValidPrimary = true;
|
||||
}
|
||||
if (currGroupSepType != 2 && (!prevValidSecondary || !currValidPrimary)) {
|
||||
// Grouping failure.
|
||||
digitsConsumed.bogus = true;
|
||||
}
|
||||
|
||||
// Strings that start with a separator but have no digits,
|
||||
// or strings that failed a grouping size check.
|
||||
if (digitsConsumed.bogus) {
|
||||
maybeMore = maybeMore || (segment.length() == 0);
|
||||
segment.setOffset(initialOffset);
|
||||
return maybeMore;
|
||||
}
|
||||
|
||||
if (result.quantity.bogus && segment.getOffset() != initialOffset) {
|
||||
// Strings that start with a separator but have no digits.
|
||||
// We don't need a backup of ParsedNumber because no changes could have been made to it.
|
||||
segment.setOffset(initialOffset);
|
||||
hasPartialPrefix = true;
|
||||
}
|
||||
// We passed all inspections. Start post-processing.
|
||||
|
||||
if (!result.quantity.bogus) {
|
||||
// The final separator was a decimal separator.
|
||||
result.quantity.adjustMagnitude(-digitsAfterDecimal);
|
||||
}
|
||||
// Adjust for fraction part.
|
||||
digitsConsumed.adjustMagnitude(-digitsAfterDecimalPlace);
|
||||
|
||||
// Set the digits, either normal or exponent.
|
||||
if (exponentSign != 0 && segment.getOffset() != initialOffset) {
|
||||
U_ASSERT(!result.quantity.bogus);
|
||||
bool overflow = (exponent == INT32_MAX);
|
||||
if (!overflow) {
|
||||
overflow = result.quantity.adjustMagnitude(exponentSign * exponent);
|
||||
bool overflow = false;
|
||||
if (digitsConsumed.fitsInLong()) {
|
||||
long exponentLong = digitsConsumed.toLong(false);
|
||||
U_ASSERT(exponentLong >= 0);
|
||||
if (exponentLong <= INT32_MAX) {
|
||||
auto exponentInt = static_cast<int32_t>(exponentLong);
|
||||
if (result.quantity.adjustMagnitude(exponentSign * exponentInt)) {
|
||||
overflow = true;
|
||||
}
|
||||
} else {
|
||||
overflow = true;
|
||||
}
|
||||
} else {
|
||||
overflow = true;
|
||||
}
|
||||
if (overflow) {
|
||||
if (exponentSign == -1) {
|
||||
|
@ -331,9 +378,51 @@ bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t
|
|||
result.flags |= FLAG_INFINITY;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
result.quantity = digitsConsumed;
|
||||
}
|
||||
|
||||
return segment.length() == 0 || hasPartialPrefix;
|
||||
// Set other information into the result and return.
|
||||
if (!actualDecimalString.isBogus()) {
|
||||
result.flags |= FLAG_HAS_DECIMAL_SEPARATOR;
|
||||
}
|
||||
result.setCharsConsumed(segment);
|
||||
return segment.length() == 0 || maybeMore;
|
||||
}
|
||||
|
||||
bool DecimalMatcher::validateGroup(int32_t sepType, int32_t count, bool isPrimary) const {
|
||||
if (requireGroupingMatch) {
|
||||
if (sepType == -1) {
|
||||
// No such group (prevGroup before first shift).
|
||||
return true;
|
||||
} else if (sepType == 0) {
|
||||
// First group.
|
||||
if (isPrimary) {
|
||||
// No grouping separators is OK.
|
||||
return true;
|
||||
} else {
|
||||
return count != 0 && count <= grouping2;
|
||||
}
|
||||
} else if (sepType == 1) {
|
||||
// Middle group.
|
||||
if (isPrimary) {
|
||||
return count == grouping1;
|
||||
} else {
|
||||
return count == grouping2;
|
||||
}
|
||||
} else {
|
||||
U_ASSERT(sepType == 2);
|
||||
// After the decimal separator.
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
if (sepType == 1) {
|
||||
// #11230: don't accept middle groups with only 1 digit.
|
||||
return count != 1;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool DecimalMatcher::smokeTest(const StringSegment& segment) const {
|
||||
|
|
|
@ -38,8 +38,9 @@ class DecimalMatcher : public NumberParseMatcher, public UMemory {
|
|||
/** If true, do not accept grouping separators at all */
|
||||
bool groupingDisabled;
|
||||
|
||||
/** If true, do not accept fraction grouping separators */
|
||||
bool fractionGroupingDisabled;
|
||||
// Fraction grouping parsing is disabled for now but could be enabled later.
|
||||
// See http://bugs.icu-project.org/trac/ticket/10794
|
||||
// bool fractionGrouping;
|
||||
|
||||
/** If true, do not accept numbers in the fraction */
|
||||
bool integerOnly;
|
||||
|
@ -62,6 +63,8 @@ class DecimalMatcher : public NumberParseMatcher, public UMemory {
|
|||
LocalPointer<const UnicodeSet> fLocalDecimalUniSet;
|
||||
LocalPointer<const UnicodeSet> fLocalSeparatorSet;
|
||||
LocalArray<const UnicodeString> fLocalDigitStrings;
|
||||
|
||||
bool validateGroup(int32_t sepType, int32_t count, bool isPrimary) const;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -101,8 +101,6 @@ NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatPr
|
|||
Grouper grouper = Grouper::forProperties(properties);
|
||||
int parseFlags = 0;
|
||||
if (affixProvider == nullptr || U_FAILURE(status)) { return nullptr; }
|
||||
// Fraction grouping is disabled by default because it has never been supported in DecimalFormat
|
||||
parseFlags |= PARSE_FLAG_FRACTION_GROUPING_DISABLED;
|
||||
if (!properties.parseCaseSensitive) {
|
||||
parseFlags |= PARSE_FLAG_IGNORE_CASE;
|
||||
}
|
||||
|
|
|
@ -41,12 +41,13 @@ enum ParseFlags {
|
|||
PARSE_FLAG_STRICT_GROUPING_SIZE = 0x0008,
|
||||
PARSE_FLAG_INTEGER_ONLY = 0x0010,
|
||||
PARSE_FLAG_GROUPING_DISABLED = 0x0020,
|
||||
PARSE_FLAG_FRACTION_GROUPING_DISABLED = 0x0040,
|
||||
// PARSE_FLAG_FRACTION_GROUPING_ENABLED = 0x0040, // see #10794
|
||||
PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES = 0x0080,
|
||||
PARSE_FLAG_USE_FULL_AFFIXES = 0x0100,
|
||||
PARSE_FLAG_EXACT_AFFIX = 0x0200,
|
||||
PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400,
|
||||
// PARSE_FLAG_OPTIMIZE = 0x0800, // no longer used
|
||||
// PARSE_FLAG_FORCE_BIG_DECIMAL = 0x1000, // not used in ICU4C
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -48,6 +48,13 @@ void NumberParserTest::testBasic() {
|
|||
{7, u"51,423", u"#,##,##0", 6, 51423.},
|
||||
{7, u" 51,423", u"#,##,##0", 7, 51423.},
|
||||
{7, u"51,423 ", u"#,##,##0", 6, 51423.},
|
||||
{7, u"51,423,", u"#,##,##0", 6, 51423.},
|
||||
{7, u"51,423,,", u"#,##,##0", 6, 51423.},
|
||||
{7, u"51,423.5", u"#,##,##0", 8, 51423.5},
|
||||
{7, u"51,423.5,", u"#,##,##0", 8, 51423.5},
|
||||
{7, u"51,423.5,,", u"#,##,##0", 8, 51423.5},
|
||||
{7, u"51,423.5.", u"#,##,##0", 8, 51423.5},
|
||||
{7, u"51,423.5..", u"#,##,##0", 8, 51423.5},
|
||||
{7, u"𝟱𝟭,𝟰𝟮𝟯", u"#,##,##0", 11, 51423.},
|
||||
{7, u"𝟳,𝟴𝟵,𝟱𝟭,𝟰𝟮𝟯", u"#,##,##0", 19, 78951423.},
|
||||
{7, u"𝟳𝟴,𝟵𝟱𝟭.𝟰𝟮𝟯", u"#,##,##0", 18, 78951.423},
|
||||
|
@ -55,6 +62,18 @@ void NumberParserTest::testBasic() {
|
|||
{7, u"𝟳𝟴,𝟬𝟬𝟬.𝟬𝟬𝟬", u"#,##,##0", 18, 78000.},
|
||||
{7, u"𝟳𝟴,𝟬𝟬𝟬.𝟬𝟮𝟯", u"#,##,##0", 18, 78000.023},
|
||||
{7, u"𝟳𝟴.𝟬𝟬𝟬.𝟬𝟮𝟯", u"#,##,##0", 11, 78.},
|
||||
{7, u"1,", u"#,##,##0", 1, 1.},
|
||||
{7, u"1,,", u"#,##,##0", 1, 1.},
|
||||
{7, u"1.,", u"#,##,##0", 2, 1.},
|
||||
{3, u"1,.", u"#,##,##0", 3, 1.},
|
||||
{7, u"1..", u"#,##,##0", 2, 1.},
|
||||
{3, u",1", u"#,##,##0", 2, 1.},
|
||||
{3, u"1,1", u"#,##,##0", 1, 1.},
|
||||
{3, u"1,1,", u"#,##,##0", 1, 1.},
|
||||
{3, u"1,1,,", u"#,##,##0", 1, 1.},
|
||||
{3, u"1,1,1", u"#,##,##0", 1, 1.},
|
||||
{3, u"1,1,1,", u"#,##,##0", 1, 1.},
|
||||
{3, u"1,1,1,,", u"#,##,##0", 1, 1.},
|
||||
{3, u"-51423", u"0", 6, -51423.},
|
||||
{3, u"51423-", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after
|
||||
{3, u"+51423", u"0", 6, 51423.},
|
||||
|
|
|
@ -760,8 +760,10 @@ parse output breaks
|
|||
// JDK stops parsing at the spaces. JDK doesn't see space as a grouping separator
|
||||
(34 25E-1) -342.5 K
|
||||
(34,,25E-1) -342.5
|
||||
// H doesn't allow trailing separators before E but C and P do
|
||||
(34,,25,E-1) -342.5 CHJP
|
||||
// Trailing grouping separators are not OK.
|
||||
// H fails; C/J/P stop at the offending separator.
|
||||
(34,,25,E-1) fail CJKP
|
||||
(34,,25,E-1) -3425 HK
|
||||
(34 25 E-1) -342.5 HK
|
||||
(34,,25 E-1) -342.5 HK
|
||||
// Spaces are not allowed after exponent symbol
|
||||
|
@ -999,7 +1001,7 @@ parse output breaks
|
|||
१३ 13
|
||||
१३.३१ 13.31
|
||||
123'456 123456
|
||||
524'1.3 5241.3
|
||||
524'11.3 52411.3
|
||||
३'११ 311
|
||||
|
||||
test parse with European-style comma/period
|
||||
|
@ -1442,8 +1444,8 @@ NaN NaN K
|
|||
1E2147483646 1E+2147483646 HJK
|
||||
1E-2147483649 0
|
||||
1E-2147483648 0
|
||||
// H, K, C and P return zero here
|
||||
1E-2147483647 1E-2147483647 CHJKP
|
||||
// H and K return zero here
|
||||
1E-2147483647 1E-2147483647 HJK
|
||||
1E-2147483646 1E-2147483646 HJK
|
||||
|
||||
test format push limits
|
||||
|
@ -1476,26 +1478,43 @@ pattern lenient parse output breaks
|
|||
#,##0 1 9 99 999 K
|
||||
#,##0 1 9 999 9999 K
|
||||
#,##0 1 9 9 9 9 H
|
||||
#,##0 1 ,9 fail HK
|
||||
#,##0 1 ,9 9
|
||||
#,##0 1 99,.0 99
|
||||
#,##0 1 9 9. 9 H
|
||||
#,##0 1 9 99. 999 K
|
||||
0 1 9 9 9
|
||||
0 1 9 99 9
|
||||
0 1 9 999 9
|
||||
0 1 9 9 9 9
|
||||
0 1 ,9 fail
|
||||
0 1 99,.0 99
|
||||
0 1 9 9. 9
|
||||
0 1 9 99. 9
|
||||
#,##0 0 9 9 fail K
|
||||
#,##0 0 9 99 fail K
|
||||
#,##0 0 9 999 9999 K
|
||||
#,##0 0 9 9 9 fail K
|
||||
#,##0 0 ,9 fail K
|
||||
#,##0 0 99,.0 fail K
|
||||
#,##0 0 9 9. fail K
|
||||
#,##0 0 9 99. fail K
|
||||
0 0 9 9 9
|
||||
0 0 9 99 9
|
||||
0 0 9 999 9
|
||||
0 0 9 9 9 9
|
||||
0 0 ,9 fail
|
||||
0 0 99,.0 99
|
||||
0 0 9 9. 9
|
||||
0 0 9 99. 9
|
||||
|
||||
test more strict grouping parse
|
||||
set locale en
|
||||
set pattern #,##,##0
|
||||
begin
|
||||
lenient parse output breaks
|
||||
1 1,23,, 123
|
||||
0 9999, 9999
|
||||
0 1,23,, fail K
|
||||
|
||||
test parse ignorables
|
||||
set locale ar
|
||||
|
|
|
@ -23,8 +23,9 @@ public class DecimalMatcher implements NumberParseMatcher {
|
|||
/** If true, do not accept grouping separators at all */
|
||||
private final boolean groupingDisabled;
|
||||
|
||||
/** If true, do not accept fraction grouping separators */
|
||||
private final boolean fractionGroupingDisabled;
|
||||
// Fraction grouping parsing is disabled for now but could be enabled later.
|
||||
// See http://bugs.icu-project.org/trac/ticket/10794
|
||||
// private final boolean fractionGrouping;
|
||||
|
||||
/** If true, do not accept numbers in the fraction */
|
||||
private final boolean integerOnly;
|
||||
|
@ -93,11 +94,13 @@ public class DecimalMatcher implements NumberParseMatcher {
|
|||
|
||||
requireGroupingMatch = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE);
|
||||
groupingDisabled = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_GROUPING_DISABLED);
|
||||
fractionGroupingDisabled = 0 != (parseFlags
|
||||
& ParsingUtils.PARSE_FLAG_FRACTION_GROUPING_DISABLED);
|
||||
integerOnly = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_INTEGER_ONLY);
|
||||
grouping1 = grouper.getPrimary();
|
||||
grouping2 = grouper.getSecondary();
|
||||
|
||||
// Fraction grouping parsing is disabled for now but could be enabled later.
|
||||
// See http://bugs.icu-project.org/trac/ticket/10794
|
||||
// fractionGrouping = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_FRACTION_GROUPING_ENABLED);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -120,28 +123,46 @@ public class DecimalMatcher implements NumberParseMatcher {
|
|||
assert result.quantity != null;
|
||||
}
|
||||
|
||||
ParsedNumber backupResult = null;
|
||||
if (requireGroupingMatch) {
|
||||
backupResult = new ParsedNumber();
|
||||
backupResult.copyFrom(result);
|
||||
}
|
||||
|
||||
// strict parsing
|
||||
boolean strictFail = false; // did we exit with a strict parse failure?
|
||||
String actualGroupingString = groupingSeparator;
|
||||
String actualDecimalString = decimalSeparator;
|
||||
int groupedDigitCount = 0; // tracking count of digits delimited by grouping separator
|
||||
int backupOffset = -1; // used for preserving the last confirmed position
|
||||
int smallGroupBackupOffset = -1; // used to back up behind groups of size 1
|
||||
boolean afterFirstGrouping = false;
|
||||
boolean seenGrouping = false;
|
||||
boolean seenDecimal = false;
|
||||
int digitsAfterDecimal = 0;
|
||||
// Initial offset before any character consumption.
|
||||
int initialOffset = segment.getOffset();
|
||||
int exponent = 0;
|
||||
boolean hasPartialPrefix = false;
|
||||
|
||||
// Return value: whether to ask for more characters.
|
||||
boolean maybeMore = false;
|
||||
|
||||
// All digits consumed so far.
|
||||
DecimalQuantity_DualStorageBCD digitsConsumed = null;
|
||||
|
||||
// The total number of digits after the decimal place, used for scaling the result.
|
||||
int digitsAfterDecimalPlace = 0;
|
||||
|
||||
// The actual grouping and decimal separators used in the string.
|
||||
// If non-null, we have seen that token.
|
||||
String actualGroupingString = null;
|
||||
String actualDecimalString = null;
|
||||
|
||||
// Information for two groups: the previous group and the current group.
|
||||
//
|
||||
// Each group has three pieces of information:
|
||||
//
|
||||
// Offset: the string position of the beginning of the group, including a leading separator
|
||||
// if there was a leading separator. This is needed in case we need to rewind the parse to
|
||||
// that position.
|
||||
//
|
||||
// Separator type:
|
||||
// 0 => beginning of string
|
||||
// 1 => lead separator is a grouping separator
|
||||
// 2 => lead separator is a decimal separator
|
||||
//
|
||||
// Count: the number of digits in the group. If -1, the group has been validated.
|
||||
int currGroupOffset = 0;
|
||||
int currGroupSepType = 0;
|
||||
int currGroupCount = 0;
|
||||
int prevGroupOffset = -1;
|
||||
int prevGroupSepType = -1;
|
||||
int prevGroupCount = -1;
|
||||
|
||||
while (segment.length() > 0) {
|
||||
hasPartialPrefix = false;
|
||||
maybeMore = false;
|
||||
|
||||
// Attempt to match a digit.
|
||||
byte digit = -1;
|
||||
|
@ -162,194 +183,207 @@ public class DecimalMatcher implements NumberParseMatcher {
|
|||
segment.adjustOffset(overlap);
|
||||
digit = (byte) i;
|
||||
break;
|
||||
} else if (overlap == segment.length()) {
|
||||
hasPartialPrefix = true;
|
||||
}
|
||||
maybeMore = maybeMore || (overlap == segment.length());
|
||||
}
|
||||
}
|
||||
|
||||
if (digit >= 0) {
|
||||
// Digit was found.
|
||||
// Check for grouping size violation
|
||||
if (backupOffset != -1) {
|
||||
smallGroupBackupOffset = backupOffset;
|
||||
backupOffset = -1;
|
||||
if (requireGroupingMatch) {
|
||||
// comma followed by digit, so group before comma is a secondary
|
||||
// group. If there was a group separator before that, the group
|
||||
// must == the secondary group length, else it can be <= the the
|
||||
// secondary group length.
|
||||
if ((afterFirstGrouping && groupedDigitCount != grouping2)
|
||||
|| (!afterFirstGrouping && groupedDigitCount > grouping2)) {
|
||||
strictFail = true;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// #11230: don't accept groups after the first with only 1 digit.
|
||||
// The logic to back up and remove the lone digit is lower down.
|
||||
if (afterFirstGrouping && groupedDigitCount == 1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
afterFirstGrouping = true;
|
||||
groupedDigitCount = 0;
|
||||
if (digitsConsumed == null) {
|
||||
digitsConsumed = new DecimalQuantity_DualStorageBCD();
|
||||
}
|
||||
|
||||
// Save the digit in the DecimalQuantity or scientific adjustment.
|
||||
if (exponentSign != 0) {
|
||||
int nextExponent = digit + exponent * 10;
|
||||
if (nextExponent < exponent) {
|
||||
// Overflow
|
||||
exponent = Integer.MAX_VALUE;
|
||||
} else {
|
||||
exponent = nextExponent;
|
||||
}
|
||||
} else {
|
||||
if (result.quantity == null) {
|
||||
result.quantity = new DecimalQuantity_DualStorageBCD();
|
||||
}
|
||||
result.quantity.appendDigit(digit, 0, true);
|
||||
}
|
||||
result.setCharsConsumed(segment);
|
||||
groupedDigitCount++;
|
||||
if (seenDecimal) {
|
||||
digitsAfterDecimal++;
|
||||
digitsConsumed.appendDigit(digit, 0, true);
|
||||
currGroupCount++;
|
||||
if (actualDecimalString != null) {
|
||||
digitsAfterDecimalPlace++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Attempt to match a literal grouping or decimal separator
|
||||
int decimalOverlap = segment.getCommonPrefixLength(actualDecimalString);
|
||||
boolean decimalStringMatch = decimalOverlap == actualDecimalString.length();
|
||||
int groupingOverlap = segment.getCommonPrefixLength(actualGroupingString);
|
||||
boolean groupingStringMatch = groupingOverlap == actualGroupingString.length();
|
||||
// Attempt to match a literal grouping or decimal separator.
|
||||
boolean isDecimal = false;
|
||||
boolean isGrouping = false;
|
||||
|
||||
hasPartialPrefix = (decimalOverlap == segment.length())
|
||||
|| (groupingOverlap == segment.length());
|
||||
|
||||
if (!seenDecimal
|
||||
&& !groupingStringMatch
|
||||
&& (decimalStringMatch || (!seenDecimal && decimalUniSet.contains(cp)))) {
|
||||
// matched a decimal separator
|
||||
if (requireGroupingMatch) {
|
||||
if (backupOffset != -1 || (seenGrouping && groupedDigitCount != grouping1)) {
|
||||
strictFail = true;
|
||||
break;
|
||||
}
|
||||
// 1) Attempt the decimal separator string literal.
|
||||
// if (we have not seen a decimal separator yet) { ... }
|
||||
if (actualDecimalString == null) {
|
||||
int overlap = segment.getCommonPrefixLength(decimalSeparator);
|
||||
maybeMore = maybeMore || (overlap == segment.length());
|
||||
if (overlap == decimalSeparator.length()) {
|
||||
isDecimal = true;
|
||||
actualDecimalString = decimalSeparator;
|
||||
}
|
||||
}
|
||||
|
||||
// If we're only parsing integers, then don't parse this one.
|
||||
if (integerOnly) {
|
||||
break;
|
||||
// 2) Attempt to match the actual grouping string literal.
|
||||
if (actualGroupingString != null) {
|
||||
int overlap = segment.getCommonPrefixLength(actualGroupingString);
|
||||
maybeMore = maybeMore || (overlap == segment.length());
|
||||
if (overlap == actualGroupingString.length()) {
|
||||
isGrouping = true;
|
||||
}
|
||||
}
|
||||
|
||||
seenDecimal = true;
|
||||
if (!decimalStringMatch) {
|
||||
// 2.5) Attempt to match a new the grouping separator string literal.
|
||||
// if (we have not seen a grouping or decimal separator yet) { ... }
|
||||
if (!groupingDisabled && actualGroupingString == null && actualDecimalString == null) {
|
||||
int overlap = segment.getCommonPrefixLength(groupingSeparator);
|
||||
maybeMore = maybeMore || (overlap == segment.length());
|
||||
if (overlap == groupingSeparator.length()) {
|
||||
isGrouping = true;
|
||||
actualGroupingString = groupingSeparator;
|
||||
}
|
||||
}
|
||||
|
||||
// 3) Attempt to match a decimal separator from the equivalence set.
|
||||
// if (we have not seen a decimal separator yet) { ... }
|
||||
// The !isGrouping is to confirm that we haven't yet matched the current character.
|
||||
if (!isGrouping && actualDecimalString == null) {
|
||||
if (decimalUniSet.contains(cp)) {
|
||||
isDecimal = true;
|
||||
actualDecimalString = UCharacter.toString(cp);
|
||||
}
|
||||
segment.adjustOffset(actualDecimalString.length());
|
||||
result.setCharsConsumed(segment);
|
||||
result.flags |= ParsedNumber.FLAG_HAS_DECIMAL_SEPARATOR;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!groupingDisabled
|
||||
&& !decimalStringMatch
|
||||
&& (groupingStringMatch || (!seenGrouping && groupingUniSet.contains(cp)))) {
|
||||
// matched a grouping separator
|
||||
if (requireGroupingMatch) {
|
||||
if (groupedDigitCount == 0) {
|
||||
// leading group
|
||||
strictFail = true;
|
||||
break;
|
||||
} else if (backupOffset != -1) {
|
||||
// two group separators in a row
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (fractionGroupingDisabled && seenDecimal) {
|
||||
// Stop parsing here.
|
||||
break;
|
||||
}
|
||||
|
||||
seenGrouping = true;
|
||||
if (!groupingStringMatch) {
|
||||
// 4) Attempt to match a grouping separator from the equivalence set.
|
||||
// if (we have not seen a grouping or decimal separator yet) { ... }
|
||||
if (!groupingDisabled && actualGroupingString == null && actualDecimalString == null) {
|
||||
if (groupingUniSet.contains(cp)) {
|
||||
isGrouping = true;
|
||||
actualGroupingString = UCharacter.toString(cp);
|
||||
}
|
||||
backupOffset = segment.getOffset();
|
||||
segment.adjustOffset(actualGroupingString.length());
|
||||
// Note: do NOT set charsConsumed
|
||||
continue;
|
||||
}
|
||||
|
||||
// Not a digit and not a separator
|
||||
break;
|
||||
}
|
||||
|
||||
// Back up if there was a trailing grouping separator
|
||||
if (backupOffset != -1) {
|
||||
segment.setOffset(backupOffset);
|
||||
hasPartialPrefix = true; // redundant with `groupingOverlap == segment.length()`
|
||||
}
|
||||
|
||||
// Check the final grouping for validity
|
||||
if (requireGroupingMatch
|
||||
&& !seenDecimal
|
||||
&& seenGrouping
|
||||
&& afterFirstGrouping
|
||||
&& groupedDigitCount != grouping1) {
|
||||
strictFail = true;
|
||||
}
|
||||
|
||||
// #11230: don't accept groups after the first with only 1 digit.
|
||||
// Behavior in this case is to back up before that 1-digit group.
|
||||
if (!seenDecimal && afterFirstGrouping && groupedDigitCount == 1) {
|
||||
if (segment.length() == 0) {
|
||||
// Strings like "9,999" where we looked at only the first 3 chars.
|
||||
// Ask for a longer segment.
|
||||
hasPartialPrefix = true;
|
||||
// Leave if we failed to match this as a separator.
|
||||
if (!isDecimal && !isGrouping) {
|
||||
break;
|
||||
}
|
||||
segment.setOffset(smallGroupBackupOffset);
|
||||
result.setCharsConsumed(segment);
|
||||
if (smallGroupBackupOffset == initialOffset) {
|
||||
// Strings like ",9"
|
||||
// Reset to no quantity seen.
|
||||
result.quantity = null;
|
||||
|
||||
// Check for conditions when we don't want to accept the separator.
|
||||
if (isDecimal && integerOnly) {
|
||||
break;
|
||||
} else if (currGroupSepType == 2 && isGrouping) {
|
||||
// Fraction grouping
|
||||
break;
|
||||
}
|
||||
|
||||
// Validate intermediate grouping sizes.
|
||||
boolean prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false);
|
||||
boolean currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true);
|
||||
if (!prevValidSecondary || (isDecimal && !currValidPrimary)) {
|
||||
// Invalid grouping sizes.
|
||||
if (isGrouping && currGroupCount == 0) {
|
||||
// Trailing grouping separators: these are taken care of below
|
||||
assert currGroupSepType == 1;
|
||||
} else if (requireGroupingMatch) {
|
||||
// Strict mode: reject the parse
|
||||
digitsConsumed = null;
|
||||
}
|
||||
break;
|
||||
} else if (requireGroupingMatch && currGroupCount == 0 && currGroupSepType == 1) {
|
||||
break;
|
||||
} else {
|
||||
// Strings like "9,9"
|
||||
// Remove the lone digit from the result quantity.
|
||||
assert result.quantity != null;
|
||||
result.quantity.adjustMagnitude(-1);
|
||||
result.quantity.truncate();
|
||||
// Grouping sizes OK so far.
|
||||
prevGroupOffset = currGroupOffset;
|
||||
prevGroupCount = currGroupCount;
|
||||
if (isDecimal) {
|
||||
// Do not validate this group any more.
|
||||
prevGroupSepType = -1;
|
||||
} else {
|
||||
prevGroupSepType = currGroupSepType;
|
||||
}
|
||||
}
|
||||
|
||||
// OK to accept the separator.
|
||||
// Special case: don't update currGroup if it is empty. This is to allow
|
||||
// adjacent grouping separators in lenient mode: "1,,234"
|
||||
if (currGroupCount != 0) {
|
||||
currGroupOffset = segment.getOffset();
|
||||
}
|
||||
currGroupSepType = isGrouping ? 1 : 2;
|
||||
currGroupCount = 0;
|
||||
if (isGrouping) {
|
||||
segment.adjustOffset(actualGroupingString.length());
|
||||
} else {
|
||||
segment.adjustOffset(actualDecimalString.length());
|
||||
}
|
||||
}
|
||||
|
||||
if (requireGroupingMatch && strictFail) {
|
||||
result.copyFrom(backupResult);
|
||||
// End of main loop.
|
||||
// Back up if there was a trailing grouping separator.
|
||||
// Shift prev -> curr so we can check it as a final group.
|
||||
if (currGroupSepType != 2 && currGroupCount == 0) {
|
||||
maybeMore = true;
|
||||
segment.setOffset(currGroupOffset);
|
||||
currGroupOffset = prevGroupOffset;
|
||||
currGroupSepType = prevGroupSepType;
|
||||
currGroupCount = prevGroupCount;
|
||||
prevGroupOffset = -1;
|
||||
prevGroupSepType = 0;
|
||||
prevGroupCount = 1;
|
||||
}
|
||||
|
||||
// Validate final grouping sizes.
|
||||
boolean prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false);
|
||||
boolean currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true);
|
||||
if (!requireGroupingMatch) {
|
||||
// The cases we need to handle here are lone digits.
|
||||
// Examples: "1,1" "1,1," "1,1,1" "1,1,1," ",1" (all parse as 1)
|
||||
// See more examples in numberformattestspecification.txt
|
||||
int digitsToRemove = 0;
|
||||
if (!prevValidSecondary) {
|
||||
segment.setOffset(prevGroupOffset);
|
||||
digitsToRemove += prevGroupCount;
|
||||
digitsToRemove += currGroupCount;
|
||||
} else if (!currValidPrimary && (prevGroupSepType != 0 || prevGroupCount != 0)) {
|
||||
maybeMore = true;
|
||||
segment.setOffset(currGroupOffset);
|
||||
digitsToRemove += currGroupCount;
|
||||
}
|
||||
if (digitsToRemove != 0) {
|
||||
digitsConsumed.adjustMagnitude(-digitsToRemove);
|
||||
digitsConsumed.truncate();
|
||||
}
|
||||
prevValidSecondary = true;
|
||||
currValidPrimary = true;
|
||||
}
|
||||
if (currGroupSepType != 2 && (!prevValidSecondary || !currValidPrimary)) {
|
||||
// Grouping failure.
|
||||
digitsConsumed = null;
|
||||
}
|
||||
|
||||
// Strings that start with a separator but have no digits,
|
||||
// or strings that failed a grouping size check.
|
||||
if (digitsConsumed == null) {
|
||||
maybeMore = maybeMore || (segment.length() == 0);
|
||||
segment.setOffset(initialOffset);
|
||||
return maybeMore;
|
||||
}
|
||||
|
||||
if (result.quantity == null && segment.getOffset() != initialOffset) {
|
||||
// Strings that start with a separator but have no digits.
|
||||
// We don't need a backup of ParsedNumber because no changes could have been made to it.
|
||||
segment.setOffset(initialOffset);
|
||||
hasPartialPrefix = true;
|
||||
}
|
||||
// We passed all inspections. Start post-processing.
|
||||
|
||||
if (result.quantity != null) {
|
||||
// The final separator was a decimal separator.
|
||||
result.quantity.adjustMagnitude(-digitsAfterDecimal);
|
||||
}
|
||||
// Adjust for fraction part.
|
||||
digitsConsumed.adjustMagnitude(-digitsAfterDecimalPlace);
|
||||
|
||||
// Set the digits, either normal or exponent.
|
||||
if (exponentSign != 0 && segment.getOffset() != initialOffset) {
|
||||
boolean overflow = (exponent == Integer.MAX_VALUE);
|
||||
if (!overflow) {
|
||||
try {
|
||||
result.quantity.adjustMagnitude(exponentSign * exponent);
|
||||
} catch (ArithmeticException e) {
|
||||
boolean overflow = false;
|
||||
if (digitsConsumed.fitsInLong()) {
|
||||
long exponentLong = digitsConsumed.toLong(false);
|
||||
assert exponentLong >= 0;
|
||||
if (exponentLong <= Integer.MAX_VALUE) {
|
||||
int exponentInt = (int) exponentLong;
|
||||
try {
|
||||
result.quantity.adjustMagnitude(exponentSign * exponentInt);
|
||||
} catch (ArithmeticException e) {
|
||||
overflow = true;
|
||||
}
|
||||
} else {
|
||||
overflow = true;
|
||||
}
|
||||
} else {
|
||||
overflow = true;
|
||||
}
|
||||
if (overflow) {
|
||||
if (exponentSign == -1) {
|
||||
|
@ -361,9 +395,51 @@ public class DecimalMatcher implements NumberParseMatcher {
|
|||
result.flags |= ParsedNumber.FLAG_INFINITY;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
result.quantity = digitsConsumed;
|
||||
}
|
||||
|
||||
return segment.length() == 0 || hasPartialPrefix;
|
||||
// Set other information into the result and return.
|
||||
if (actualDecimalString != null) {
|
||||
result.flags |= ParsedNumber.FLAG_HAS_DECIMAL_SEPARATOR;
|
||||
}
|
||||
result.setCharsConsumed(segment);
|
||||
return segment.length() == 0 || maybeMore;
|
||||
}
|
||||
|
||||
private boolean validateGroup(int sepType, int count, boolean isPrimary) {
|
||||
if (requireGroupingMatch) {
|
||||
if (sepType == -1) {
|
||||
// No such group (prevGroup before first shift).
|
||||
return true;
|
||||
} else if (sepType == 0) {
|
||||
// First group.
|
||||
if (isPrimary) {
|
||||
// No grouping separators is OK.
|
||||
return true;
|
||||
} else {
|
||||
return count != 0 && count <= grouping2;
|
||||
}
|
||||
} else if (sepType == 1) {
|
||||
// Middle group.
|
||||
if (isPrimary) {
|
||||
return count == grouping1;
|
||||
} else {
|
||||
return count == grouping2;
|
||||
}
|
||||
} else {
|
||||
assert sepType == 2;
|
||||
// After the decimal separator.
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
if (sepType == 1) {
|
||||
// #11230: don't accept middle groups with only 1 digit.
|
||||
return count != 1;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -147,8 +147,6 @@ public class NumberParserImpl {
|
|||
boolean isStrict = properties.getParseMode() == ParseMode.STRICT;
|
||||
Grouper grouper = Grouper.forProperties(properties);
|
||||
int parseFlags = 0;
|
||||
// Fraction grouping is disabled by default because it has never been supported in DecimalFormat
|
||||
parseFlags |= ParsingUtils.PARSE_FLAG_FRACTION_GROUPING_DISABLED;
|
||||
if (!properties.getParseCaseSensitive()) {
|
||||
parseFlags |= ParsingUtils.PARSE_FLAG_IGNORE_CASE;
|
||||
}
|
||||
|
|
|
@ -16,7 +16,7 @@ public class ParsingUtils {
|
|||
public static final int PARSE_FLAG_STRICT_GROUPING_SIZE = 0x0008;
|
||||
public static final int PARSE_FLAG_INTEGER_ONLY = 0x0010;
|
||||
public static final int PARSE_FLAG_GROUPING_DISABLED = 0x0020;
|
||||
public static final int PARSE_FLAG_FRACTION_GROUPING_DISABLED = 0x0040;
|
||||
// public static final int PARSE_FLAG_FRACTION_GROUPING_ENABLED = 0x0040; // see #10794
|
||||
public static final int PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES = 0x0080;
|
||||
public static final int PARSE_FLAG_USE_FULL_AFFIXES = 0x0100;
|
||||
public static final int PARSE_FLAG_EXACT_AFFIX = 0x0200;
|
||||
|
|
|
@ -760,8 +760,10 @@ parse output breaks
|
|||
// JDK stops parsing at the spaces. JDK doesn't see space as a grouping separator
|
||||
(34 25E-1) -342.5 K
|
||||
(34,,25E-1) -342.5
|
||||
// H doesn't allow trailing separators before E but C and P do
|
||||
(34,,25,E-1) -342.5 CHJP
|
||||
// Trailing grouping separators are not OK.
|
||||
// H fails; C/J/P stop at the offending separator.
|
||||
(34,,25,E-1) fail CJKP
|
||||
(34,,25,E-1) -3425 HK
|
||||
(34 25 E-1) -342.5 HK
|
||||
(34,,25 E-1) -342.5 HK
|
||||
// Spaces are not allowed after exponent symbol
|
||||
|
@ -999,7 +1001,7 @@ parse output breaks
|
|||
१३ 13
|
||||
१३.३१ 13.31
|
||||
123'456 123456
|
||||
524'1.3 5241.3
|
||||
524'11.3 52411.3
|
||||
३'११ 311
|
||||
|
||||
test parse with European-style comma/period
|
||||
|
@ -1442,8 +1444,8 @@ NaN NaN K
|
|||
1E2147483646 1E+2147483646 HJK
|
||||
1E-2147483649 0
|
||||
1E-2147483648 0
|
||||
// H, K, C and P return zero here
|
||||
1E-2147483647 1E-2147483647 CHJKP
|
||||
// H and K return zero here
|
||||
1E-2147483647 1E-2147483647 HJK
|
||||
1E-2147483646 1E-2147483646 HJK
|
||||
|
||||
test format push limits
|
||||
|
@ -1476,26 +1478,43 @@ pattern lenient parse output breaks
|
|||
#,##0 1 9 99 999 K
|
||||
#,##0 1 9 999 9999 K
|
||||
#,##0 1 9 9 9 9 H
|
||||
#,##0 1 ,9 fail HK
|
||||
#,##0 1 ,9 9
|
||||
#,##0 1 99,.0 99
|
||||
#,##0 1 9 9. 9 H
|
||||
#,##0 1 9 99. 999 K
|
||||
0 1 9 9 9
|
||||
0 1 9 99 9
|
||||
0 1 9 999 9
|
||||
0 1 9 9 9 9
|
||||
0 1 ,9 fail
|
||||
0 1 99,.0 99
|
||||
0 1 9 9. 9
|
||||
0 1 9 99. 9
|
||||
#,##0 0 9 9 fail K
|
||||
#,##0 0 9 99 fail K
|
||||
#,##0 0 9 999 9999 K
|
||||
#,##0 0 9 9 9 fail K
|
||||
#,##0 0 ,9 fail K
|
||||
#,##0 0 99,.0 fail K
|
||||
#,##0 0 9 9. fail K
|
||||
#,##0 0 9 99. fail K
|
||||
0 0 9 9 9
|
||||
0 0 9 99 9
|
||||
0 0 9 999 9
|
||||
0 0 9 9 9 9
|
||||
0 0 ,9 fail
|
||||
0 0 99,.0 99
|
||||
0 0 9 9. 9
|
||||
0 0 9 99. 9
|
||||
|
||||
test more strict grouping parse
|
||||
set locale en
|
||||
set pattern #,##,##0
|
||||
begin
|
||||
lenient parse output breaks
|
||||
1 1,23,, 123
|
||||
0 9999, 9999
|
||||
0 1,23,, fail K
|
||||
|
||||
test parse ignorables
|
||||
set locale ar
|
||||
|
|
|
@ -2875,6 +2875,8 @@ public class NumberFormatTest extends TestFmwk {
|
|||
"1,2", // wrong number of digits after group separator
|
||||
",.02", // leading group separator before decimal
|
||||
"1,.02", // group separator before decimal
|
||||
",0", // leading group separator before a single digit
|
||||
",1", // leading group separator before a single digit
|
||||
"1,45", // wrong number of digits in primary group
|
||||
"1,45 that", // wrong number of digits in primary group
|
||||
"1,45.34", // wrong number of digits in primary group
|
||||
|
@ -2884,8 +2886,6 @@ public class NumberFormatTest extends TestFmwk {
|
|||
};
|
||||
// Fail both lenient and strict:
|
||||
String[] failBoth = {
|
||||
",0", // leading group separator before a single digit
|
||||
",1", // leading group separator before a single digit
|
||||
};
|
||||
|
||||
DecimalFormat nf = (DecimalFormat) NumberFormat.getInstance(Locale.ENGLISH);
|
||||
|
|
|
@ -54,6 +54,13 @@ public class NumberParserTest {
|
|||
{ 7, "51,423", "#,##,##0", 6, 51423. },
|
||||
{ 7, " 51,423", "#,##,##0", 7, 51423. },
|
||||
{ 7, "51,423 ", "#,##,##0", 6, 51423. },
|
||||
{ 7, "51,423,", "#,##,##0", 6, 51423. },
|
||||
{ 7, "51,423,,", "#,##,##0", 6, 51423. },
|
||||
{ 7, "51,423.5", "#,##,##0", 8, 51423.5 },
|
||||
{ 7, "51,423.5,", "#,##,##0", 8, 51423.5 },
|
||||
{ 7, "51,423.5,,", "#,##,##0", 8, 51423.5 },
|
||||
{ 7, "51,423.5.", "#,##,##0", 8, 51423.5 },
|
||||
{ 7, "51,423.5..", "#,##,##0", 8, 51423.5 },
|
||||
{ 7, "𝟱𝟭,𝟰𝟮𝟯", "#,##,##0", 11, 51423. },
|
||||
{ 7, "𝟳,𝟴𝟵,𝟱𝟭,𝟰𝟮𝟯", "#,##,##0", 19, 78951423. },
|
||||
{ 7, "𝟳𝟴,𝟵𝟱𝟭.𝟰𝟮𝟯", "#,##,##0", 18, 78951.423 },
|
||||
|
@ -61,6 +68,19 @@ public class NumberParserTest {
|
|||
{ 7, "𝟳𝟴,𝟬𝟬𝟬.𝟬𝟬𝟬", "#,##,##0", 18, 78000. },
|
||||
{ 7, "𝟳𝟴,𝟬𝟬𝟬.𝟬𝟮𝟯", "#,##,##0", 18, 78000.023 },
|
||||
{ 7, "𝟳𝟴.𝟬𝟬𝟬.𝟬𝟮𝟯", "#,##,##0", 11, 78. },
|
||||
{ 7, "1,", "#,##,##0", 1, 1. },
|
||||
{ 7, "1,,", "#,##,##0", 1, 1. },
|
||||
{ 7, "1.,", "#,##,##0", 2, 1. },
|
||||
{ 3, "1,.", "#,##,##0", 3, 1. },
|
||||
{ 7, "1..", "#,##,##0", 2, 1. },
|
||||
{ 3, ",1", "#,##,##0", 2, 1. },
|
||||
{ 3, "1,1", "#,##,##0", 1, 1. },
|
||||
{ 3, "1,1,", "#,##,##0", 1, 1. },
|
||||
{ 3, "1,1,,", "#,##,##0", 1, 1. },
|
||||
{ 3, "1,1,1", "#,##,##0", 1, 1. },
|
||||
{ 3, "1,1,1,", "#,##,##0", 1, 1. },
|
||||
{ 3, "1,1,1,1", "#,##,##0", 1, 1. },
|
||||
{ 3, "1,1,1,,", "#,##,##0", 1, 1. },
|
||||
{ 3, "-51423", "0", 6, -51423. },
|
||||
{ 3, "51423-", "0", 5, 51423. }, // plus and minus sign by default do NOT match after
|
||||
{ 3, "+51423", "0", 6, 51423. },
|
||||
|
|
Loading…
Add table
Reference in a new issue