mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-5206 strict number parsing
X-SVN-Rev: 19645
This commit is contained in:
parent
1d32573438
commit
dcd476c75f
3 changed files with 232 additions and 4 deletions
|
@ -1499,4 +1499,107 @@ public class NumberFormatTest extends com.ibm.icu.dev.test.TestFmwk {
|
|||
errln("Did not get the expected output Expected: "+expected+" Got: "+ str);
|
||||
}
|
||||
}
|
||||
|
||||
public void TestStrictParse() {
|
||||
String[] pass = {
|
||||
"0", // single zero before end of text is not leading
|
||||
"0 ", // single zero at end of number is not leading
|
||||
"0.", // single zero before period (or decimal, it's ambiguous) is not leading
|
||||
"0,", // single zero before comma (not group separator) is not leading
|
||||
"0.0", // single zero before decimal followed by digit is not leading
|
||||
"0. ", // same as above before period (or decimal) is not leading
|
||||
"0.100,5", // comma stops parse of decimal (no grouping)
|
||||
".00", // leading decimal is ok, even with zeros
|
||||
"1234567", // group separators are not required
|
||||
"12345, ", // comma not followed by digit is not a group separator, but end of number
|
||||
"1,234, ", // if group separator is present, group sizes must be appropriate
|
||||
"1,234,567", // ...secondary too
|
||||
"0E", // an exponnent not followed by zero or digits is not an exponent
|
||||
};
|
||||
String[] fail = {
|
||||
"00", // leading zero before zero
|
||||
"012", // leading zero before digit
|
||||
"0,456", // leading zero before group separator
|
||||
"1,2", // wrong number of digits after group separator
|
||||
",0", // leading group separator before zero
|
||||
",1", // leading group separator before digit
|
||||
",.02", // leading group separator before decimal
|
||||
"1,.02", // group separator before decimal
|
||||
"1,,200", // multiple group separators
|
||||
"1,45", // wrong number of digits in primary group
|
||||
"1,45 that", // wrong number of digits in primary group
|
||||
"1,45.34", // wrong number of digits in primary group
|
||||
"1234,567", // wrong number of digits in secondary group
|
||||
"12,34,567", // wrong number of digits in secondary group
|
||||
"1,23,456,7890", // wrong number of digits in primary and secondary groups
|
||||
};
|
||||
|
||||
DecimalFormat nf = (DecimalFormat) NumberFormat.getInstance(Locale.ENGLISH);
|
||||
runStrictParseBatch(nf, pass, fail);
|
||||
|
||||
String[] scientificPass = {
|
||||
"0E2", // single zero before exponent is ok
|
||||
"1234E2", // any number of digits before exponent is ok
|
||||
"1,234E", // an exponent string not followed by zero or digits is not an exponent
|
||||
};
|
||||
String[] scientificFail = {
|
||||
"00E2", // double zeros fail
|
||||
"1,234E2", // group separators with exponent fail
|
||||
};
|
||||
|
||||
nf = (DecimalFormat) NumberFormat.getInstance(Locale.ENGLISH);
|
||||
runStrictParseBatch(nf, scientificPass, scientificFail);
|
||||
|
||||
String[] mixedPass = {
|
||||
"12,34,567",
|
||||
"12,34,567,",
|
||||
"12,34,567, that",
|
||||
"12,34,567 that",
|
||||
};
|
||||
String[] mixedFail = {
|
||||
"12,34,56",
|
||||
"12,34,56,",
|
||||
"12,34,56, that ",
|
||||
"12,34,56 that",
|
||||
};
|
||||
|
||||
nf = new DecimalFormat("#,##,##0.#");
|
||||
runStrictParseBatch(nf, mixedPass, mixedFail);
|
||||
}
|
||||
|
||||
void runStrictParseBatch(DecimalFormat nf, String[] pass, String[] fail) {
|
||||
nf.setParseStrict(false);
|
||||
runStrictParseTests("should pass", nf, pass, true);
|
||||
runStrictParseTests("should also pass", nf, fail, true);
|
||||
nf.setParseStrict(true);
|
||||
runStrictParseTests("should still pass", nf, pass, true);
|
||||
runStrictParseTests("should fail", nf, fail, false);
|
||||
}
|
||||
|
||||
void runStrictParseTests(String msg, DecimalFormat nf, String[] tests, boolean pass) {
|
||||
logln("");
|
||||
logln("pattern: '" + nf.toPattern() + "'");
|
||||
logln(msg);
|
||||
for (int i = 0; i < tests.length; ++i) {
|
||||
String str = tests[i];
|
||||
ParsePosition pp = new ParsePosition(0);
|
||||
Number n = nf.parse(str, pp);
|
||||
String formatted = n != null ? nf.format(n) : "null";
|
||||
String err = pp.getErrorIndex() == -1 ? "" : "(error at " + pp.getErrorIndex() + ")";
|
||||
if ((err.length() == 0) != pass) {
|
||||
errln("'" + str + "' parsed '" +
|
||||
str.substring(0, pp.getIndex()) +
|
||||
"' returned " + n + " formats to '" +
|
||||
formatted + "' " + err);
|
||||
} else {
|
||||
if (err.length() > 0) {
|
||||
err = "got expected " + err;
|
||||
}
|
||||
logln("'" + str + "' parsed '" +
|
||||
str.substring(0, pp.getIndex()) +
|
||||
"' returned " + n + " formats to '" +
|
||||
formatted + "' " + err);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1620,10 +1620,18 @@ public class DecimalFormat extends NumberFormat {
|
|||
int exponent = 0; // Set to the exponent value, if any
|
||||
int digit = 0;
|
||||
|
||||
// strict parsing
|
||||
boolean strictParse = isParseStrict();
|
||||
boolean strictFail = false; // did we exit with a strict parse failure?
|
||||
boolean leadingZero = false; // did we see a leading zero?
|
||||
int lastGroup = -1; // where did we last see a grouping separator?
|
||||
int prevGroup = -1; // where did we see the grouping separator before that?
|
||||
int gs2 = groupingSize2 == 0 ? groupingSize : groupingSize2;
|
||||
|
||||
// We have to track digitCount ourselves, because digits.count will
|
||||
// pin when the maximum allowable digits is reached.
|
||||
int digitCount = 0;
|
||||
|
||||
|
||||
int backup = -1;
|
||||
for (; position < text.length(); ++position)
|
||||
{
|
||||
|
@ -1646,14 +1654,38 @@ public class DecimalFormat extends NumberFormat {
|
|||
if (digit == 0)
|
||||
{
|
||||
// Cancel out backup setting (see grouping handler below)
|
||||
if (strictParse && backup != -1) {
|
||||
// comma followed by digit, so group before comma is a
|
||||
// secondary group. If there was a group separator
|
||||
// before that, the group must == the secondary group
|
||||
// length, else it can be <= the the secondary group
|
||||
// length.
|
||||
if ((lastGroup != -1 && backup - lastGroup - 1 != gs2) ||
|
||||
(lastGroup == -1 && position - oldStart - 1 > gs2)) {
|
||||
strictFail = true;
|
||||
break;
|
||||
}
|
||||
prevGroup = lastGroup;
|
||||
lastGroup = backup;
|
||||
}
|
||||
backup = -1; // Do this BEFORE continue statement below!!!
|
||||
sawDigit = true;
|
||||
|
||||
// Handle leading zeros
|
||||
if (digits.count == 0)
|
||||
{
|
||||
// Ignore leading zeros in integer part of number.
|
||||
if (!sawDecimal) continue;
|
||||
if (!sawDecimal) {
|
||||
if (strictParse && !isExponent) {
|
||||
// Allow leading zeros in exponents
|
||||
if (leadingZero) {
|
||||
strictFail = true;
|
||||
break;
|
||||
}
|
||||
leadingZero = true;
|
||||
}
|
||||
// Ignore leading zeros in integer part of number.
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we have seen the decimal, but no significant digits yet,
|
||||
// then we account for leading zeros by decrementing the
|
||||
|
@ -1668,6 +1700,23 @@ public class DecimalFormat extends NumberFormat {
|
|||
}
|
||||
else if (digit > 0 && digit <= 9) // [sic] digit==0 handled above
|
||||
{
|
||||
if (strictParse) {
|
||||
if (leadingZero) {
|
||||
// a leading zero before a digit is an error with strict parsing
|
||||
strictFail = true;
|
||||
break;
|
||||
}
|
||||
if (backup != -1) {
|
||||
if ((lastGroup != -1 && backup - lastGroup - 1 != gs2) ||
|
||||
(lastGroup == -1 && position - oldStart - 1 > gs2)) {
|
||||
strictFail = true;
|
||||
break;
|
||||
}
|
||||
prevGroup = lastGroup;
|
||||
lastGroup = backup;
|
||||
}
|
||||
}
|
||||
|
||||
sawDigit = true;
|
||||
++digitCount;
|
||||
digits.append((char)(digit + '0'));
|
||||
|
@ -1677,17 +1726,32 @@ public class DecimalFormat extends NumberFormat {
|
|||
}
|
||||
else if (!isExponent && ch == decimal)
|
||||
{
|
||||
if (strictParse) {
|
||||
if (backup != -1 ||
|
||||
(lastGroup != -1 && position - lastGroup != groupingSize - 1)) {
|
||||
strictFail = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If we're only parsing integers, or if we ALREADY saw the
|
||||
// decimal, then don't parse this one.
|
||||
if (isParseIntegerOnly() || sawDecimal) break;
|
||||
digits.decimalAt = digitCount; // Not digits.count!
|
||||
sawDecimal = true;
|
||||
leadingZero = false; // a single leading zero before a decimal is ok
|
||||
}
|
||||
else if (!isExponent && ch == grouping && isGroupingUsed())
|
||||
{
|
||||
if (sawDecimal) {
|
||||
break;
|
||||
}
|
||||
if (strictParse) {
|
||||
if ((!sawDigit || backup != -1)) {
|
||||
// leading group, or two group separators in a row
|
||||
strictFail = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Ignore grouping characters, if we are using them, but require
|
||||
// that they be followed by a digit. Otherwise we backup and
|
||||
// reprocess them.
|
||||
|
@ -1732,6 +1796,14 @@ public class DecimalFormat extends NumberFormat {
|
|||
}
|
||||
|
||||
if (exponentDigits.count > 0) {
|
||||
// defer strict parse until we know we have a bona-fide exponent
|
||||
if (strictParse) {
|
||||
if (backup != -1 || lastGroup != -1) {
|
||||
strictFail = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
exponentDigits.decimalAt = exponentDigits.count;
|
||||
exponent = (int) exponentDigits.getLong();
|
||||
if (negExp) {
|
||||
|
@ -1748,6 +1820,22 @@ public class DecimalFormat extends NumberFormat {
|
|||
|
||||
if (backup != -1) position = backup;
|
||||
|
||||
if (strictParse && !sawDecimal) {
|
||||
if (lastGroup != -1 && position - lastGroup != groupingSize + 1) {
|
||||
strictFail = true;
|
||||
}
|
||||
}
|
||||
if (strictFail) {
|
||||
// only set with strictParse and a leading zero error
|
||||
// leading zeros are an error with strict parsing except
|
||||
// immediately before nondigit (except group separator
|
||||
// followed by digit), or end of text.
|
||||
|
||||
parsePosition.setIndex(oldStart);
|
||||
parsePosition.setErrorIndex(position);
|
||||
return false;
|
||||
}
|
||||
|
||||
// If there was no decimal point we have an integer
|
||||
if (!sawDecimal) digits.decimalAt = digitCount; // Not digits.count!
|
||||
|
||||
|
|
|
@ -434,6 +434,39 @@ public abstract class NumberFormat extends UFormat {
|
|||
parseIntegerOnly = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets whether strict parsing is in effect. When this is true, the
|
||||
* following conditions cause a parse failure (examples use the pattern "#,##0.#"):<ul>
|
||||
* <li>Leading zeros<br>
|
||||
* '00', '0123' fail the parse, but '0' and '0.001' pass</li>
|
||||
* <li>Leading or doubled grouping separators<br>
|
||||
* ',123' and '1,,234" fail</li>
|
||||
* <li>Groups of incorrect length when grouping is used<br>
|
||||
* '1,23' and '1234,567' fail, but '1234' passes</li>
|
||||
* <li>Grouping separators used in numbers followed by exponents<br>
|
||||
* '1,234E5' fails, but '1234E5' and '1,234E' pass ('E' is not an exponent when
|
||||
* not followed by a number)</li>
|
||||
* </ul>
|
||||
* When strict parsing is off, leading zeros and all grouping separators are ignored.
|
||||
* This is the default behavior.
|
||||
* @param value True to enable strict parsing. Default is false.
|
||||
* @see #getParseStrict
|
||||
* @draft ICU 3.6
|
||||
*/
|
||||
public void setParseStrict(boolean value) {
|
||||
parseStrict = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return whether strict parsing is in effect.
|
||||
* @return true if strict parsing is in effect
|
||||
* @see #setParseStrict
|
||||
* @draft ICU 3.6
|
||||
*/
|
||||
public boolean isParseStrict() {
|
||||
return parseStrict;
|
||||
}
|
||||
|
||||
//============== Locale Stuff =====================
|
||||
|
||||
/**
|
||||
|
@ -919,7 +952,8 @@ public abstract class NumberFormat extends UFormat {
|
|||
&& maximumFractionDigits == other.maximumFractionDigits
|
||||
&& minimumFractionDigits == other.minimumFractionDigits
|
||||
&& groupingUsed == other.groupingUsed
|
||||
&& parseIntegerOnly == other.parseIntegerOnly;
|
||||
&& parseIntegerOnly == other.parseIntegerOnly
|
||||
&& parseStrict == other.parseStrict;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1500,4 +1534,7 @@ public abstract class NumberFormat extends UFormat {
|
|||
*/
|
||||
public NumberFormat() {
|
||||
}
|
||||
|
||||
// new in ICU4J 3.6
|
||||
private boolean parseStrict;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue