mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 01:11:02 +00:00
ICU-13309 Changing number parsing to accept only horizontal whitespace, not vertical whitespace or control characters.
X-SVN-Rev: 40646
This commit is contained in:
parent
16598e5468
commit
7c59127769
4 changed files with 45 additions and 5 deletions
|
@ -132,9 +132,10 @@ public class Parse {
|
|||
INSIDE_AFFIX_PATTERN;
|
||||
}
|
||||
|
||||
// TODO: Does this set make sense for the whitespace characters?
|
||||
// This set was decided after discussion with icu-design@. See ticket #13309.
|
||||
// Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property).
|
||||
private static final UnicodeSet UNISET_WHITESPACE =
|
||||
new UnicodeSet("[[:whitespace:][\\u2000-\\u200D]]").freeze();
|
||||
new UnicodeSet("[[:Zs:][\\u0009]]").freeze();
|
||||
|
||||
// BiDi characters are skipped over and ignored at any point in the string, even in strict mode.
|
||||
private static final UnicodeSet UNISET_BIDI =
|
||||
|
|
|
@ -202,7 +202,9 @@ import com.ibm.icu.util.ULocale.Category;
|
|||
* pattern string and the input string. For example, the pattern "# %" matches "35 %" (with a single
|
||||
* space), "35%" (with no space), "35 %" (with a non-breaking space), and "35 %" (with
|
||||
* multiple spaces). Arbitrary ignorables are also allowed at boundaries between the parts of the
|
||||
* number: prefix, number, exponent separator, and suffix.
|
||||
* number: prefix, number, exponent separator, and suffix. Ignorable whitespace characters are those
|
||||
* having the Unicode "blank" property for regular expressions, defined in UTS #18 Annex C, which is
|
||||
* "horizontal" whitespace, like spaces and tabs, but not "vertical" whitespace, like line breaks.
|
||||
*
|
||||
* <p>If {@link #parse(String, ParsePosition)} fails to parse a string, it returns <code>null</code>
|
||||
* and leaves the parse position unchanged. The convenience method {@link #parse(String)} indicates
|
||||
|
|
|
@ -424,6 +424,9 @@ public abstract class NumberFormat extends UFormat {
|
|||
* <p>Does not throw an exception; if no object can be parsed, index is
|
||||
* unchanged!
|
||||
*
|
||||
* <p>For more detail on parsing, see the "Parsing" header in the class
|
||||
* documentation of {@link DecimalFormat}.
|
||||
*
|
||||
* @see #isParseIntegerOnly
|
||||
* @see DecimalFormat#setParseBigDecimal
|
||||
* @see java.text.Format#parseObject(String, ParsePosition)
|
||||
|
|
|
@ -63,6 +63,7 @@ import com.ibm.icu.text.NumberFormat.SimpleNumberFormatFactory;
|
|||
import com.ibm.icu.text.NumberingSystem;
|
||||
import com.ibm.icu.text.PluralRules;
|
||||
import com.ibm.icu.text.RuleBasedNumberFormat;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.util.Currency;
|
||||
import com.ibm.icu.util.Currency.CurrencyUsage;
|
||||
import com.ibm.icu.util.CurrencyAmount;
|
||||
|
@ -438,8 +439,8 @@ public class NumberFormatTest extends TestFmwk {
|
|||
{" $ 124 ", "6", "-1"},
|
||||
{"124$", "3", "-1"},
|
||||
{"124 $", "3", "-1"},
|
||||
{"$124\u200D", "4", "-1"},
|
||||
{"$\u200D124", "5", "-1"},
|
||||
{"$124\u200A", "4", "-1"},
|
||||
{"$\u200A124", "5", "-1"},
|
||||
};
|
||||
NumberFormat foo = NumberFormat.getCurrencyInstance();
|
||||
for (int i = 0; i < DATA.length; ++i) {
|
||||
|
@ -1712,6 +1713,29 @@ public class NumberFormatTest extends TestFmwk {
|
|||
expect(fmt, "ab 1234", n);
|
||||
expect(fmt, "a b1234", n);
|
||||
expect(fmt, "a b1234", n);
|
||||
expect(fmt, " a b 1234", n);
|
||||
|
||||
// Horizontal whitespace is allowed, but not vertical whitespace.
|
||||
expect(fmt, "\ta\u00A0b\u20001234", n);
|
||||
expect(fmt, "a \u200A b1234", n);
|
||||
expectParseException(fmt, "\nab1234", n);
|
||||
expectParseException(fmt, "a \n b1234", n);
|
||||
expectParseException(fmt, "a \u0085 b1234", n);
|
||||
expectParseException(fmt, "a \u2028 b1234", n);
|
||||
|
||||
// Test all characters in the UTS 18 "blank" set stated in the API docstring.
|
||||
UnicodeSet blanks = new UnicodeSet("[[:Zs:][\\u0009]]").freeze();
|
||||
for (String space : blanks) {
|
||||
String str = "a " + space + " b1234";
|
||||
expect(fmt, str, n);
|
||||
}
|
||||
|
||||
// Test that other whitespace characters do not work
|
||||
UnicodeSet otherWhitespace = new UnicodeSet("[[:whitespace:]]").removeAll(blanks).freeze();
|
||||
for (String space : otherWhitespace) {
|
||||
String str = "a " + space + " b1234";
|
||||
expectParseException(fmt, str, n);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2676,6 +2700,16 @@ public class NumberFormatTest extends TestFmwk {
|
|||
expect(fmt, str, new Long(n));
|
||||
}
|
||||
|
||||
/** Parse test */
|
||||
public void expectParseException(DecimalFormat fmt, String str, Number n) {
|
||||
Number num = null;
|
||||
try {
|
||||
num = fmt.parse(str);
|
||||
errln("Expected failure, but passed: " + n + " on " + fmt.toPattern() + " -> " + num);
|
||||
} catch (ParseException e) {
|
||||
}
|
||||
}
|
||||
|
||||
private void expectCurrency(NumberFormat nf, Currency curr,
|
||||
double value, String string) {
|
||||
DecimalFormat fmt = (DecimalFormat) nf;
|
||||
|
|
Loading…
Add table
Reference in a new issue