ICU-13309 Changing number parsing to accept only horizontal whitespace, not vertical whitespace or control characters.

X-SVN-Rev: 40646
This commit is contained in:
Shane Carr 2017-10-26 21:53:50 +00:00
parent 16598e5468
commit 7c59127769
4 changed files with 45 additions and 5 deletions

View file

@ -132,9 +132,10 @@ public class Parse {
INSIDE_AFFIX_PATTERN;
}
// TODO: Does this set make sense for the whitespace characters?
// This set was decided after discussion with icu-design@. See ticket #13309.
// Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
private static final UnicodeSet UNISET_WHITESPACE =
new UnicodeSet("[[:whitespace:][\\u2000-\\u200D]]").freeze();
new UnicodeSet("[[:Zs:][\\u0009]]").freeze();
// BiDi characters are skipped over and ignored at any point in the string, even in strict mode.
private static final UnicodeSet UNISET_BIDI =

View file

@ -202,7 +202,9 @@ import com.ibm.icu.util.ULocale.Category;
* pattern string and the input string. For example, the pattern "# %" matches "35 %" (with a single
* space), "35%" (with no space), "35 %" (with a non-breaking space), and "35  %" (with
* multiple spaces). Arbitrary ignorables are also allowed at boundaries between the parts of the
* number: prefix, number, exponent separator, and suffix.
* number: prefix, number, exponent separator, and suffix. Ignorable whitespace characters are those
* having the Unicode "blank" property for regular expressions, defined in UTS #18 Annex C, which is
* "horizontal" whitespace, like spaces and tabs, but not "vertical" whitespace, like line breaks.
*
* <p>If {@link #parse(String, ParsePosition)} fails to parse a string, it returns <code>null</code>
* and leaves the parse position unchanged. The convenience method {@link #parse(String)} indicates

View file

@ -424,6 +424,9 @@ public abstract class NumberFormat extends UFormat {
* <p>Does not throw an exception; if no object can be parsed, index is
* unchanged!
*
* <p>For more detail on parsing, see the "Parsing" header in the class
* documentation of {@link DecimalFormat}.
*
* @see #isParseIntegerOnly
* @see DecimalFormat#setParseBigDecimal
* @see java.text.Format#parseObject(String, ParsePosition)

View file

@ -63,6 +63,7 @@ import com.ibm.icu.text.NumberFormat.SimpleNumberFormatFactory;
import com.ibm.icu.text.NumberingSystem;
import com.ibm.icu.text.PluralRules;
import com.ibm.icu.text.RuleBasedNumberFormat;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.Currency;
import com.ibm.icu.util.Currency.CurrencyUsage;
import com.ibm.icu.util.CurrencyAmount;
@ -438,8 +439,8 @@ public class NumberFormatTest extends TestFmwk {
{" $ 124 ", "6", "-1"},
{"124$", "3", "-1"},
{"124 $", "3", "-1"},
{"$124\u200D", "4", "-1"},
{"$\u200D124", "5", "-1"},
{"$124\u200A", "4", "-1"},
{"$\u200A124", "5", "-1"},
};
NumberFormat foo = NumberFormat.getCurrencyInstance();
for (int i = 0; i < DATA.length; ++i) {
@ -1712,6 +1713,29 @@ public class NumberFormatTest extends TestFmwk {
expect(fmt, "ab 1234", n);
expect(fmt, "a b1234", n);
expect(fmt, "a b1234", n);
expect(fmt, " a b 1234", n);
// Horizontal whitespace is allowed, but not vertical whitespace.
expect(fmt, "\ta\u00A0b\u20001234", n);
expect(fmt, "a \u200A b1234", n);
expectParseException(fmt, "\nab1234", n);
expectParseException(fmt, "a \n b1234", n);
expectParseException(fmt, "a \u0085 b1234", n);
expectParseException(fmt, "a \u2028 b1234", n);
// Test all characters in the UTS 18 "blank" set stated in the API docstring.
UnicodeSet blanks = new UnicodeSet("[[:Zs:][\\u0009]]").freeze();
for (String space : blanks) {
String str = "a " + space + " b1234";
expect(fmt, str, n);
}
// Test that other whitespace characters do not work
UnicodeSet otherWhitespace = new UnicodeSet("[[:whitespace:]]").removeAll(blanks).freeze();
for (String space : otherWhitespace) {
String str = "a " + space + " b1234";
expectParseException(fmt, str, n);
}
}
/**
@ -2676,6 +2700,16 @@ public class NumberFormatTest extends TestFmwk {
expect(fmt, str, new Long(n));
}
/** Parse test */
public void expectParseException(DecimalFormat fmt, String str, Number n) {
Number num = null;
try {
num = fmt.parse(str);
errln("Expected failure, but passed: " + n + " on " + fmt.toPattern() + " -> " + num);
} catch (ParseException e) {
}
}
private void expectCurrency(NumberFormat nf, Currency curr,
double value, String string) {
DecimalFormat fmt = (DecimalFormat) nf;