mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-13804 Making number parsing code more robust when given empty symbol strings.
X-SVN-Rev: 41497
This commit is contained in:
parent
9ecddac2b4
commit
dd7667d330
17 changed files with 223 additions and 22 deletions
|
@ -62,7 +62,7 @@ CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UEr
|
|||
// Try to match a currency spacing separator.
|
||||
int32_t initialOffset = segment.getOffset();
|
||||
bool maybeMore = false;
|
||||
if (result.seenNumber()) {
|
||||
if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) {
|
||||
int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
|
||||
if (overlap == beforeSuffixInsert.length()) {
|
||||
segment.adjustOffset(overlap);
|
||||
|
@ -79,7 +79,7 @@ CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UEr
|
|||
}
|
||||
|
||||
// Try to match a currency spacing separator.
|
||||
if (!result.seenNumber()) {
|
||||
if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) {
|
||||
int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert);
|
||||
if (overlap == afterPrefixInsert.length()) {
|
||||
segment.adjustOffset(overlap);
|
||||
|
@ -95,7 +95,12 @@ bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber
|
|||
UErrorCode& status) const {
|
||||
bool maybeMore = false;
|
||||
|
||||
int32_t overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1);
|
||||
int32_t overlap1;
|
||||
if (!fCurrency1.isEmpty()) {
|
||||
overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1);
|
||||
} else {
|
||||
overlap1 = -1;
|
||||
}
|
||||
maybeMore = maybeMore || overlap1 == segment.length();
|
||||
if (overlap1 == fCurrency1.length()) {
|
||||
utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
|
||||
|
@ -104,7 +109,12 @@ bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber
|
|||
return maybeMore;
|
||||
}
|
||||
|
||||
int32_t overlap2 = segment.getCaseSensitivePrefixLength(fCurrency2);
|
||||
int32_t overlap2;
|
||||
if (!fCurrency2.isEmpty()) {
|
||||
overlap2 = segment.getCaseSensitivePrefixLength(fCurrency2);
|
||||
} else {
|
||||
overlap2 = -1;
|
||||
}
|
||||
maybeMore = maybeMore || overlap2 == segment.length();
|
||||
if (overlap2 == fCurrency2.length()) {
|
||||
utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
|
||||
|
|
|
@ -44,12 +44,14 @@ DecimalMatcher::DecimalMatcher(const DecimalFormatSymbols& symbols, const Groupe
|
|||
strictSeparators ? unisets::STRICT_PERIOD : unisets::PERIOD);
|
||||
if (decimalKey >= 0) {
|
||||
decimalUniSet = unisets::get(decimalKey);
|
||||
} else {
|
||||
} else if (!decimalSeparator.isEmpty()) {
|
||||
auto* set = new UnicodeSet();
|
||||
set->add(decimalSeparator.char32At(0));
|
||||
set->freeze();
|
||||
decimalUniSet = set;
|
||||
fLocalDecimalUniSet.adoptInstead(set);
|
||||
} else {
|
||||
decimalUniSet = unisets::get(unisets::EMPTY);
|
||||
}
|
||||
|
||||
if (groupingKey >= 0 && decimalKey >= 0) {
|
||||
|
@ -161,6 +163,9 @@ bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t
|
|||
if (digit == -1 && !fLocalDigitStrings.isNull()) {
|
||||
for (int32_t i = 0; i < 10; i++) {
|
||||
const UnicodeString& str = fLocalDigitStrings[i];
|
||||
if (str.isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
int32_t overlap = segment.getCommonPrefixLength(str);
|
||||
if (overlap == str.length()) {
|
||||
segment.adjustOffset(overlap);
|
||||
|
@ -191,7 +196,7 @@ bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t
|
|||
|
||||
// 1) Attempt the decimal separator string literal.
|
||||
// if (we have not seen a decimal separator yet) { ... }
|
||||
if (actualDecimalString.isBogus()) {
|
||||
if (actualDecimalString.isBogus() && !decimalSeparator.isEmpty()) {
|
||||
int32_t overlap = segment.getCommonPrefixLength(decimalSeparator);
|
||||
maybeMore = maybeMore || (overlap == segment.length());
|
||||
if (overlap == decimalSeparator.length()) {
|
||||
|
@ -211,7 +216,8 @@ bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t
|
|||
|
||||
// 2.5) Attempt to match a new the grouping separator string literal.
|
||||
// if (we have not seen a grouping or decimal separator yet) { ... }
|
||||
if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus()) {
|
||||
if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus() &&
|
||||
!groupingSeparator.isEmpty()) {
|
||||
int32_t overlap = segment.getCommonPrefixLength(groupingSeparator);
|
||||
maybeMore = maybeMore || (overlap == segment.length());
|
||||
if (overlap == groupingSeparator.length()) {
|
||||
|
|
|
@ -57,6 +57,7 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr
|
|||
}
|
||||
|
||||
// First match the scientific separator, and then match another number after it.
|
||||
// NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
|
||||
int overlap1 = segment.getCommonPrefixLength(fExponentSeparatorString);
|
||||
if (overlap1 == fExponentSeparatorString.length()) {
|
||||
// Full exponent separator match.
|
||||
|
@ -75,6 +76,7 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr
|
|||
} else if (segment.startsWith(plusSignSet())) {
|
||||
segment.adjustOffsetByCodePoint();
|
||||
} else if (segment.startsWith(fCustomMinusSign)) {
|
||||
// Note: call site is guarded with startsWith, which returns false on empty string
|
||||
int32_t overlap2 = segment.getCommonPrefixLength(fCustomMinusSign);
|
||||
if (overlap2 != fCustomMinusSign.length()) {
|
||||
// Partial custom sign match; un-match the exponent separator.
|
||||
|
@ -84,6 +86,7 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr
|
|||
exponentSign = -1;
|
||||
segment.adjustOffset(overlap2);
|
||||
} else if (segment.startsWith(fCustomPlusSign)) {
|
||||
// Note: call site is guarded with startsWith, which returns false on empty string
|
||||
int32_t overlap2 = segment.getCommonPrefixLength(fCustomPlusSign);
|
||||
if (overlap2 != fCustomPlusSign.length()) {
|
||||
// Partial custom sign match; un-match the exponent separator.
|
||||
|
|
|
@ -112,6 +112,7 @@ int32_t StringSegment::getCaseSensitivePrefixLength(const UnicodeString& other)
|
|||
}
|
||||
|
||||
int32_t StringSegment::getPrefixLengthInternal(const UnicodeString& other, bool foldCase) {
|
||||
U_ASSERT(other.length() > 0);
|
||||
int32_t offset = 0;
|
||||
for (; offset < uprv_min(length(), other.length());) {
|
||||
// TODO: case-fold code points, not chars
|
||||
|
|
|
@ -247,7 +247,13 @@ class U_I18N_API StringSegment : public UMemory {
|
|||
* since the first 2 characters are the same.
|
||||
*
|
||||
* <p>
|
||||
* This method will perform case folding if case folding is enabled for the parser.
|
||||
* This method only returns offsets along code point boundaries.
|
||||
*
|
||||
* <p>
|
||||
* This method will perform case folding if case folding was enabled in the constructor.
|
||||
*
|
||||
* <p>
|
||||
* IMPORTANT: The given UnicodeString must not be empty! It is the caller's responsibility to check.
|
||||
*/
|
||||
int32_t getCommonPrefixLength(const UnicodeString& other);
|
||||
|
||||
|
|
|
@ -214,6 +214,7 @@ void NumberFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n
|
|||
TESTCASE_AUTO(Test11640_TripleCurrencySymbol);
|
||||
TESTCASE_AUTO(Test13763_FieldPositionIteratorOffset);
|
||||
TESTCASE_AUTO(Test13777_ParseLongNameNonCurrencyMode);
|
||||
TESTCASE_AUTO(Test13804_EmptyStringsWhenParsing);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
@ -9064,4 +9065,80 @@ void NumberFormatTest::Test13777_ParseLongNameNonCurrencyMode() {
|
|||
expect2(*df, 1.5, u"1.50 US dollars");
|
||||
}
|
||||
|
||||
void NumberFormatTest::Test13804_EmptyStringsWhenParsing() {
|
||||
IcuTestErrorCode status(*this, "Test13804_EmptyStringsWhenParsing");
|
||||
|
||||
DecimalFormatSymbols dfs("en", status);
|
||||
if (status.errIfFailureAndReset()) {
|
||||
return;
|
||||
}
|
||||
dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kZeroDigitSymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kOneDigitSymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kTwoDigitSymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kThreeDigitSymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kFourDigitSymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kFiveDigitSymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kSixDigitSymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kSevenDigitSymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kEightDigitSymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kNineDigitSymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kExponentMultiplicationSymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kExponentialSymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kInfinitySymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kMinusSignSymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kMonetaryGroupingSeparatorSymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kNaNSymbol, u"", FALSE);
|
||||
dfs.setPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, FALSE, u"");
|
||||
dfs.setPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, TRUE, u"");
|
||||
dfs.setSymbol(DecimalFormatSymbols::kPercentSymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kPerMillSymbol, u"", FALSE);
|
||||
dfs.setSymbol(DecimalFormatSymbols::kPlusSignSymbol, u"", FALSE);
|
||||
|
||||
DecimalFormat df("0", dfs, status);
|
||||
if (status.errIfFailureAndReset()) {
|
||||
return;
|
||||
}
|
||||
df.setGroupingUsed(TRUE);
|
||||
df.setScientificNotation(TRUE);
|
||||
df.setLenient(TRUE); // enable all matchers
|
||||
{
|
||||
UnicodeString result;
|
||||
df.format(0, result); // should not crash or hit infinite loop
|
||||
}
|
||||
const char16_t* samples[] = {
|
||||
u"",
|
||||
u"123",
|
||||
u"$123",
|
||||
u"-",
|
||||
u"+",
|
||||
u"44%",
|
||||
u"1E+2.3"
|
||||
};
|
||||
for (auto& sample : samples) {
|
||||
logln(UnicodeString(u"Attempting parse on: ") + sample);
|
||||
status.setScope(sample);
|
||||
// We don't care about the results, only that we don't crash and don't loop.
|
||||
Formattable result;
|
||||
ParsePosition ppos(0);
|
||||
df.parse(sample, result, ppos);
|
||||
ppos = ParsePosition(0);
|
||||
df.parseCurrency(sample, ppos);
|
||||
status.errIfFailureAndReset();
|
||||
}
|
||||
|
||||
// Test with a nonempty exponent separator symbol to cover more code
|
||||
dfs.setSymbol(DecimalFormatSymbols::kExponentialSymbol, u"E", FALSE);
|
||||
df.setDecimalFormatSymbols(dfs);
|
||||
{
|
||||
Formattable result;
|
||||
ParsePosition ppos(0);
|
||||
df.parse(u"1E+2.3", result, ppos);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
|
|
@ -278,6 +278,7 @@ class NumberFormatTest: public CalendarTimeZoneTest {
|
|||
void Test11640_TripleCurrencySymbol();
|
||||
void Test13763_FieldPositionIteratorOffset();
|
||||
void Test13777_ParseLongNameNonCurrencyMode();
|
||||
void Test13804_EmptyStringsWhenParsing();
|
||||
|
||||
private:
|
||||
UBool testFormattableAsUFormattable(const char *file, int line, Formattable &f);
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
|
||||
|
||||
import java.util.EnumMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -90,7 +92,7 @@ public class StaticUnicodeSets {
|
|||
* @return key1 if the set contains str, or COUNT if not.
|
||||
*/
|
||||
public static Key chooseFrom(String str, Key key1) {
|
||||
return get(key1).contains(str) ? key1 : null;
|
||||
return safeContains(get(key1), str) ? key1 : null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -108,7 +110,7 @@ public class StaticUnicodeSets {
|
|||
* contains str.
|
||||
*/
|
||||
public static Key chooseFrom(String str, Key key1, Key key2) {
|
||||
return get(key1).contains(str) ? key1 : chooseFrom(str, key2);
|
||||
return safeContains(get(key1), str) ? key1 : chooseFrom(str, key2);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -156,6 +156,9 @@ public class StringSegment implements CharSequence {
|
|||
*
|
||||
* <p>
|
||||
* This method will perform case folding if case folding was enabled in the constructor.
|
||||
*
|
||||
* <p>
|
||||
* IMPORTANT: The given CharSequence must not be empty! It is the caller's responsibility to check.
|
||||
*/
|
||||
public int getCommonPrefixLength(CharSequence other) {
|
||||
return getPrefixLengthInternal(other, foldCase);
|
||||
|
@ -170,8 +173,10 @@ public class StringSegment implements CharSequence {
|
|||
}
|
||||
|
||||
private int getPrefixLengthInternal(CharSequence other, boolean foldCase) {
|
||||
assert other.length() != 0;
|
||||
int offset = 0;
|
||||
for (; offset < Math.min(length(), other.length());) {
|
||||
// TODO: case-fold code points, not chars
|
||||
int cp1 = Character.codePointAt(this, offset);
|
||||
int cp2 = Character.codePointAt(other, offset);
|
||||
if (!codePointsEqual(cp1, cp2, foldCase)) {
|
||||
|
|
|
@ -97,7 +97,7 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher {
|
|||
// Try to match a currency spacing separator.
|
||||
int initialOffset = segment.getOffset();
|
||||
boolean maybeMore = false;
|
||||
if (result.seenNumber()) {
|
||||
if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) {
|
||||
int overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
|
||||
if (overlap == beforeSuffixInsert.length()) {
|
||||
segment.adjustOffset(overlap);
|
||||
|
@ -114,7 +114,7 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher {
|
|||
}
|
||||
|
||||
// Try to match a currency spacing separator.
|
||||
if (!result.seenNumber()) {
|
||||
if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) {
|
||||
int overlap = segment.getCommonPrefixLength(afterPrefixInsert);
|
||||
if (overlap == afterPrefixInsert.length()) {
|
||||
segment.adjustOffset(overlap);
|
||||
|
@ -130,7 +130,12 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher {
|
|||
private boolean matchCurrency(StringSegment segment, ParsedNumber result) {
|
||||
boolean maybeMore = false;
|
||||
|
||||
int overlap1 = segment.getCaseSensitivePrefixLength(currency1);
|
||||
int overlap1;
|
||||
if (!currency1.isEmpty()) {
|
||||
overlap1 = segment.getCaseSensitivePrefixLength(currency1);
|
||||
} else {
|
||||
overlap1 = -1;
|
||||
}
|
||||
maybeMore = maybeMore || overlap1 == segment.length();
|
||||
if (overlap1 == currency1.length()) {
|
||||
result.currencyCode = isoCode;
|
||||
|
@ -139,7 +144,12 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher {
|
|||
return maybeMore;
|
||||
}
|
||||
|
||||
int overlap2 = segment.getCaseSensitivePrefixLength(currency2);
|
||||
int overlap2;
|
||||
if (!currency2.isEmpty()) {
|
||||
overlap2 = segment.getCaseSensitivePrefixLength(currency2);
|
||||
} else {
|
||||
overlap2 = -1;
|
||||
}
|
||||
maybeMore = maybeMore || overlap2 == segment.length();
|
||||
if (overlap2 == currency2.length()) {
|
||||
result.currencyCode = isoCode;
|
||||
|
@ -169,6 +179,9 @@ public class CombinedCurrencyMatcher implements NumberParseMatcher {
|
|||
int longestFullMatch = 0;
|
||||
for (int i=0; i<StandardPlural.COUNT; i++) {
|
||||
String name = localLongNames[i];
|
||||
if (name.isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
int overlap = segment.getCommonPrefixLength(name);
|
||||
if (overlap == name.length() && name.length() > longestFullMatch) {
|
||||
longestFullMatch = name.length();
|
||||
|
|
|
@ -71,8 +71,10 @@ public class DecimalMatcher implements NumberParseMatcher {
|
|||
strictSeparators ? Key.STRICT_PERIOD : Key.PERIOD);
|
||||
if (decimalKey != null) {
|
||||
decimalUniSet = StaticUnicodeSets.get(decimalKey);
|
||||
} else {
|
||||
} else if (!decimalSeparator.isEmpty()) {
|
||||
decimalUniSet = new UnicodeSet().add(decimalSeparator.codePointAt(0)).freeze();
|
||||
} else {
|
||||
decimalUniSet = UnicodeSet.EMPTY;
|
||||
}
|
||||
|
||||
if (groupingKey != null && decimalKey != null) {
|
||||
|
@ -178,6 +180,9 @@ public class DecimalMatcher implements NumberParseMatcher {
|
|||
if (digit == -1 && digitStrings != null) {
|
||||
for (int i = 0; i < digitStrings.length; i++) {
|
||||
String str = digitStrings[i];
|
||||
if (str.isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
int overlap = segment.getCommonPrefixLength(str);
|
||||
if (overlap == str.length()) {
|
||||
segment.adjustOffset(overlap);
|
||||
|
@ -207,7 +212,7 @@ public class DecimalMatcher implements NumberParseMatcher {
|
|||
|
||||
// 1) Attempt the decimal separator string literal.
|
||||
// if (we have not seen a decimal separator yet) { ... }
|
||||
if (actualDecimalString == null) {
|
||||
if (actualDecimalString == null && !decimalSeparator.isEmpty()) {
|
||||
int overlap = segment.getCommonPrefixLength(decimalSeparator);
|
||||
maybeMore = maybeMore || (overlap == segment.length());
|
||||
if (overlap == decimalSeparator.length()) {
|
||||
|
@ -227,7 +232,10 @@ public class DecimalMatcher implements NumberParseMatcher {
|
|||
|
||||
// 2.5) Attempt to match a new the grouping separator string literal.
|
||||
// if (we have not seen a grouping or decimal separator yet) { ... }
|
||||
if (!groupingDisabled && actualGroupingString == null && actualDecimalString == null) {
|
||||
if (!groupingDisabled
|
||||
&& actualGroupingString == null
|
||||
&& actualDecimalString == null
|
||||
&& !groupingSeparator.isEmpty()) {
|
||||
int overlap = segment.getCommonPrefixLength(groupingSeparator);
|
||||
maybeMore = maybeMore || (overlap == segment.length());
|
||||
if (overlap == groupingSeparator.length()) {
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.number.parse;
|
||||
|
||||
import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
|
||||
|
||||
import com.ibm.icu.impl.StaticUnicodeSets;
|
||||
import com.ibm.icu.impl.StringSegment;
|
||||
import com.ibm.icu.text.DecimalFormatSymbols;
|
||||
|
@ -16,7 +18,7 @@ public class InfinityMatcher extends SymbolMatcher {
|
|||
|
||||
public static InfinityMatcher getInstance(DecimalFormatSymbols symbols) {
|
||||
String symbolString = symbols.getInfinity();
|
||||
if (DEFAULT.uniSet.contains(symbolString)) {
|
||||
if (safeContains(DEFAULT.uniSet, symbolString)) {
|
||||
return DEFAULT;
|
||||
} else {
|
||||
return new InfinityMatcher(symbolString);
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.number.parse;
|
||||
|
||||
import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
|
||||
|
||||
import com.ibm.icu.impl.StaticUnicodeSets;
|
||||
import com.ibm.icu.impl.StringSegment;
|
||||
import com.ibm.icu.text.DecimalFormatSymbols;
|
||||
|
@ -17,7 +19,7 @@ public class MinusSignMatcher extends SymbolMatcher {
|
|||
|
||||
public static MinusSignMatcher getInstance(DecimalFormatSymbols symbols, boolean allowTrailing) {
|
||||
String symbolString = symbols.getMinusSignString();
|
||||
if (DEFAULT.uniSet.contains(symbolString)) {
|
||||
if (safeContains(DEFAULT.uniSet, symbolString)) {
|
||||
return allowTrailing ? DEFAULT_ALLOW_TRAILING : DEFAULT;
|
||||
} else {
|
||||
return new MinusSignMatcher(symbolString, allowTrailing);
|
||||
|
|
|
@ -40,4 +40,9 @@ public class ParsingUtils {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: Remove this helper function (and update call sites) when #13805 is fixed
|
||||
public static boolean safeContains(UnicodeSet uniset, CharSequence str) {
|
||||
return str.length() != 0 && uniset.contains(str);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.number.parse;
|
||||
|
||||
import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
|
||||
|
||||
import com.ibm.icu.impl.StaticUnicodeSets;
|
||||
import com.ibm.icu.impl.StringSegment;
|
||||
import com.ibm.icu.text.DecimalFormatSymbols;
|
||||
|
@ -17,7 +19,7 @@ public class PlusSignMatcher extends SymbolMatcher {
|
|||
|
||||
public static PlusSignMatcher getInstance(DecimalFormatSymbols symbols, boolean allowTrailing) {
|
||||
String symbolString = symbols.getPlusSignString();
|
||||
if (DEFAULT.uniSet.contains(symbolString)) {
|
||||
if (safeContains(DEFAULT.uniSet, symbolString)) {
|
||||
return allowTrailing ? DEFAULT_ALLOW_TRAILING : DEFAULT;
|
||||
} else {
|
||||
return new PlusSignMatcher(symbolString, allowTrailing);
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.number.parse;
|
||||
|
||||
import static com.ibm.icu.impl.number.parse.ParsingUtils.safeContains;
|
||||
|
||||
import com.ibm.icu.impl.StaticUnicodeSets;
|
||||
import com.ibm.icu.impl.StringSegment;
|
||||
import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD;
|
||||
|
@ -32,9 +34,9 @@ public class ScientificMatcher implements NumberParseMatcher {
|
|||
ParsingUtils.PARSE_FLAG_INTEGER_ONLY | ParsingUtils.PARSE_FLAG_GROUPING_DISABLED);
|
||||
|
||||
String minusSign = symbols.getMinusSignString();
|
||||
customMinusSign = minusSignSet().contains(minusSign) ? null : minusSign;
|
||||
customMinusSign = safeContains(minusSignSet(), minusSign) ? null : minusSign;
|
||||
String plusSign = symbols.getPlusSignString();
|
||||
customPlusSign = plusSignSet().contains(plusSign) ? null : plusSign;
|
||||
customPlusSign = safeContains(plusSignSet(), plusSign) ? null : plusSign;
|
||||
}
|
||||
|
||||
private static UnicodeSet minusSignSet() {
|
||||
|
@ -53,6 +55,7 @@ public class ScientificMatcher implements NumberParseMatcher {
|
|||
}
|
||||
|
||||
// First match the scientific separator, and then match another number after it.
|
||||
// NOTE: This is guarded by the smoke test; no need to check exponentSeparatorString length again.
|
||||
int overlap1 = segment.getCommonPrefixLength(exponentSeparatorString);
|
||||
if (overlap1 == exponentSeparatorString.length()) {
|
||||
// Full exponent separator match.
|
||||
|
@ -71,6 +74,7 @@ public class ScientificMatcher implements NumberParseMatcher {
|
|||
} else if (segment.startsWith(plusSignSet())) {
|
||||
segment.adjustOffsetByCodePoint();
|
||||
} else if (segment.startsWith(customMinusSign)) {
|
||||
// Note: call site is guarded with startsWith, which returns false on empty string
|
||||
int overlap2 = segment.getCommonPrefixLength(customMinusSign);
|
||||
if (overlap2 != customMinusSign.length()) {
|
||||
// Partial custom sign match; un-match the exponent separator.
|
||||
|
@ -80,6 +84,7 @@ public class ScientificMatcher implements NumberParseMatcher {
|
|||
exponentSign = -1;
|
||||
segment.adjustOffset(overlap2);
|
||||
} else if (segment.startsWith(customPlusSign)) {
|
||||
// Note: call site is guarded with startsWith, which returns false on empty string
|
||||
int overlap2 = segment.getCommonPrefixLength(customPlusSign);
|
||||
if (overlap2 != customPlusSign.length()) {
|
||||
// Partial custom sign match; un-match the exponent separator.
|
||||
|
|
|
@ -6170,4 +6170,57 @@ public class NumberFormatTest extends TestFmwk {
|
|||
NumberFormat df = NumberFormat.getInstance(ULocale.US, NumberFormat.PLURALCURRENCYSTYLE);
|
||||
expect2(df, 1.5, "1.50 US dollars");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test13804_EmptyStringsWhenParsing() {
|
||||
DecimalFormatSymbols dfs = DecimalFormatSymbols.getInstance(ULocale.ENGLISH);
|
||||
dfs.setCurrencySymbol("");
|
||||
dfs.setDecimalSeparatorString("");
|
||||
dfs.setDigitStrings(new String[] { "", "", "", "", "", "", "", "", "", "" });
|
||||
dfs.setExponentMultiplicationSign("");
|
||||
dfs.setExponentSeparator("");
|
||||
dfs.setGroupingSeparatorString("");
|
||||
dfs.setInfinity("");
|
||||
dfs.setInternationalCurrencySymbol("");
|
||||
dfs.setMinusSignString("");
|
||||
dfs.setMonetaryDecimalSeparatorString("");
|
||||
dfs.setMonetaryGroupingSeparatorString("");
|
||||
dfs.setNaN("");
|
||||
dfs.setPatternForCurrencySpacing(DecimalFormatSymbols.CURRENCY_SPC_INSERT, false, "");
|
||||
dfs.setPatternForCurrencySpacing(DecimalFormatSymbols.CURRENCY_SPC_INSERT, true, "");
|
||||
dfs.setPercentString("");
|
||||
dfs.setPerMillString("");
|
||||
dfs.setPlusSignString("");
|
||||
|
||||
DecimalFormat df = new DecimalFormat("0", dfs);
|
||||
df.setGroupingUsed(true);
|
||||
df.setScientificNotation(true);
|
||||
df.setParseStrict(false); // enable all matchers
|
||||
df.format(0); // should not throw or hit infinite loop
|
||||
String[] samples = new String[] {
|
||||
"",
|
||||
"123",
|
||||
"$123",
|
||||
"-",
|
||||
"+",
|
||||
"44%",
|
||||
"1E+2.3"
|
||||
};
|
||||
for (String sample : samples) {
|
||||
logln("Attempting parse on: " + sample);
|
||||
// We don't care about the results, only that we don't throw and don't loop.
|
||||
ParsePosition ppos = new ParsePosition(0);
|
||||
df.parse(sample, ppos);
|
||||
ppos = new ParsePosition(0);
|
||||
df.parseCurrency(sample, ppos);
|
||||
}
|
||||
|
||||
// Test with a nonempty exponent separator symbol to cover more code
|
||||
dfs.setExponentSeparator("E");
|
||||
df.setDecimalFormatSymbols(dfs);
|
||||
{
|
||||
ParsePosition ppos = new ParsePosition(0);
|
||||
df.parse("1E+2.3", ppos);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue