ICU-5206 strict number parsing

X-SVN-Rev: 19645
This commit is contained in:
Doug Felt 2006-05-23 22:39:52 +00:00
parent 1d32573438
commit dcd476c75f
3 changed files with 232 additions and 4 deletions

View file

@ -1499,4 +1499,107 @@ public class NumberFormatTest extends com.ibm.icu.dev.test.TestFmwk {
errln("Did not get the expected output Expected: "+expected+" Got: "+ str);
}
}
public void TestStrictParse() {
String[] pass = {
"0", // single zero before end of text is not leading
"0 ", // single zero at end of number is not leading
"0.", // single zero before period (or decimal, it's ambiguous) is not leading
"0,", // single zero before comma (not group separator) is not leading
"0.0", // single zero before decimal followed by digit is not leading
"0. ", // same as above before period (or decimal) is not leading
"0.100,5", // comma stops parse of decimal (no grouping)
".00", // leading decimal is ok, even with zeros
"1234567", // group separators are not required
"12345, ", // comma not followed by digit is not a group separator, but end of number
"1,234, ", // if group separator is present, group sizes must be appropriate
"1,234,567", // ...secondary too
"0E", // an exponnent not followed by zero or digits is not an exponent
};
String[] fail = {
"00", // leading zero before zero
"012", // leading zero before digit
"0,456", // leading zero before group separator
"1,2", // wrong number of digits after group separator
",0", // leading group separator before zero
",1", // leading group separator before digit
",.02", // leading group separator before decimal
"1,.02", // group separator before decimal
"1,,200", // multiple group separators
"1,45", // wrong number of digits in primary group
"1,45 that", // wrong number of digits in primary group
"1,45.34", // wrong number of digits in primary group
"1234,567", // wrong number of digits in secondary group
"12,34,567", // wrong number of digits in secondary group
"1,23,456,7890", // wrong number of digits in primary and secondary groups
};
DecimalFormat nf = (DecimalFormat) NumberFormat.getInstance(Locale.ENGLISH);
runStrictParseBatch(nf, pass, fail);
String[] scientificPass = {
"0E2", // single zero before exponent is ok
"1234E2", // any number of digits before exponent is ok
"1,234E", // an exponent string not followed by zero or digits is not an exponent
};
String[] scientificFail = {
"00E2", // double zeros fail
"1,234E2", // group separators with exponent fail
};
nf = (DecimalFormat) NumberFormat.getInstance(Locale.ENGLISH);
runStrictParseBatch(nf, scientificPass, scientificFail);
String[] mixedPass = {
"12,34,567",
"12,34,567,",
"12,34,567, that",
"12,34,567 that",
};
String[] mixedFail = {
"12,34,56",
"12,34,56,",
"12,34,56, that ",
"12,34,56 that",
};
nf = new DecimalFormat("#,##,##0.#");
runStrictParseBatch(nf, mixedPass, mixedFail);
}
void runStrictParseBatch(DecimalFormat nf, String[] pass, String[] fail) {
nf.setParseStrict(false);
runStrictParseTests("should pass", nf, pass, true);
runStrictParseTests("should also pass", nf, fail, true);
nf.setParseStrict(true);
runStrictParseTests("should still pass", nf, pass, true);
runStrictParseTests("should fail", nf, fail, false);
}
void runStrictParseTests(String msg, DecimalFormat nf, String[] tests, boolean pass) {
logln("");
logln("pattern: '" + nf.toPattern() + "'");
logln(msg);
for (int i = 0; i < tests.length; ++i) {
String str = tests[i];
ParsePosition pp = new ParsePosition(0);
Number n = nf.parse(str, pp);
String formatted = n != null ? nf.format(n) : "null";
String err = pp.getErrorIndex() == -1 ? "" : "(error at " + pp.getErrorIndex() + ")";
if ((err.length() == 0) != pass) {
errln("'" + str + "' parsed '" +
str.substring(0, pp.getIndex()) +
"' returned " + n + " formats to '" +
formatted + "' " + err);
} else {
if (err.length() > 0) {
err = "got expected " + err;
}
logln("'" + str + "' parsed '" +
str.substring(0, pp.getIndex()) +
"' returned " + n + " formats to '" +
formatted + "' " + err);
}
}
}
}

View file

@ -1620,10 +1620,18 @@ public class DecimalFormat extends NumberFormat {
int exponent = 0; // Set to the exponent value, if any
int digit = 0;
// strict parsing
boolean strictParse = isParseStrict();
boolean strictFail = false; // did we exit with a strict parse failure?
boolean leadingZero = false; // did we see a leading zero?
int lastGroup = -1; // where did we last see a grouping separator?
int prevGroup = -1; // where did we see the grouping separator before that?
int gs2 = groupingSize2 == 0 ? groupingSize : groupingSize2;
// We have to track digitCount ourselves, because digits.count will
// pin when the maximum allowable digits is reached.
int digitCount = 0;
int backup = -1;
for (; position < text.length(); ++position)
{
@ -1646,14 +1654,38 @@ public class DecimalFormat extends NumberFormat {
if (digit == 0)
{
// Cancel out backup setting (see grouping handler below)
if (strictParse && backup != -1) {
// comma followed by digit, so group before comma is a
// secondary group. If there was a group separator
// before that, the group must == the secondary group
// length, else it can be <= the the secondary group
// length.
if ((lastGroup != -1 && backup - lastGroup - 1 != gs2) ||
(lastGroup == -1 && position - oldStart - 1 > gs2)) {
strictFail = true;
break;
}
prevGroup = lastGroup;
lastGroup = backup;
}
backup = -1; // Do this BEFORE continue statement below!!!
sawDigit = true;
// Handle leading zeros
if (digits.count == 0)
{
// Ignore leading zeros in integer part of number.
if (!sawDecimal) continue;
if (!sawDecimal) {
if (strictParse && !isExponent) {
// Allow leading zeros in exponents
if (leadingZero) {
strictFail = true;
break;
}
leadingZero = true;
}
// Ignore leading zeros in integer part of number.
continue;
}
// If we have seen the decimal, but no significant digits yet,
// then we account for leading zeros by decrementing the
@ -1668,6 +1700,23 @@ public class DecimalFormat extends NumberFormat {
}
else if (digit > 0 && digit <= 9) // [sic] digit==0 handled above
{
if (strictParse) {
if (leadingZero) {
// a leading zero before a digit is an error with strict parsing
strictFail = true;
break;
}
if (backup != -1) {
if ((lastGroup != -1 && backup - lastGroup - 1 != gs2) ||
(lastGroup == -1 && position - oldStart - 1 > gs2)) {
strictFail = true;
break;
}
prevGroup = lastGroup;
lastGroup = backup;
}
}
sawDigit = true;
++digitCount;
digits.append((char)(digit + '0'));
@ -1677,17 +1726,32 @@ public class DecimalFormat extends NumberFormat {
}
else if (!isExponent && ch == decimal)
{
if (strictParse) {
if (backup != -1 ||
(lastGroup != -1 && position - lastGroup != groupingSize - 1)) {
strictFail = true;
break;
}
}
// If we're only parsing integers, or if we ALREADY saw the
// decimal, then don't parse this one.
if (isParseIntegerOnly() || sawDecimal) break;
digits.decimalAt = digitCount; // Not digits.count!
sawDecimal = true;
leadingZero = false; // a single leading zero before a decimal is ok
}
else if (!isExponent && ch == grouping && isGroupingUsed())
{
if (sawDecimal) {
break;
}
if (strictParse) {
if ((!sawDigit || backup != -1)) {
// leading group, or two group separators in a row
strictFail = true;
break;
}
}
// Ignore grouping characters, if we are using them, but require
// that they be followed by a digit. Otherwise we backup and
// reprocess them.
@ -1732,6 +1796,14 @@ public class DecimalFormat extends NumberFormat {
}
if (exponentDigits.count > 0) {
// defer strict parse until we know we have a bona-fide exponent
if (strictParse) {
if (backup != -1 || lastGroup != -1) {
strictFail = true;
break;
}
}
exponentDigits.decimalAt = exponentDigits.count;
exponent = (int) exponentDigits.getLong();
if (negExp) {
@ -1748,6 +1820,22 @@ public class DecimalFormat extends NumberFormat {
if (backup != -1) position = backup;
if (strictParse && !sawDecimal) {
if (lastGroup != -1 && position - lastGroup != groupingSize + 1) {
strictFail = true;
}
}
if (strictFail) {
// only set with strictParse and a leading zero error
// leading zeros are an error with strict parsing except
// immediately before nondigit (except group separator
// followed by digit), or end of text.
parsePosition.setIndex(oldStart);
parsePosition.setErrorIndex(position);
return false;
}
// If there was no decimal point we have an integer
if (!sawDecimal) digits.decimalAt = digitCount; // Not digits.count!

View file

@ -434,6 +434,39 @@ public abstract class NumberFormat extends UFormat {
parseIntegerOnly = value;
}
/**
* Sets whether strict parsing is in effect. When this is true, the
* following conditions cause a parse failure (examples use the pattern "#,##0.#"):<ul>
* <li>Leading zeros<br>
* '00', '0123' fail the parse, but '0' and '0.001' pass</li>
* <li>Leading or doubled grouping separators<br>
* ',123' and '1,,234" fail</li>
* <li>Groups of incorrect length when grouping is used<br>
* '1,23' and '1234,567' fail, but '1234' passes</li>
* <li>Grouping separators used in numbers followed by exponents<br>
* '1,234E5' fails, but '1234E5' and '1,234E' pass ('E' is not an exponent when
* not followed by a number)</li>
* </ul>
* When strict parsing is off, leading zeros and all grouping separators are ignored.
* This is the default behavior.
* @param value True to enable strict parsing. Default is false.
* @see #getParseStrict
* @draft ICU 3.6
*/
public void setParseStrict(boolean value) {
parseStrict = value;
}
/**
* Return whether strict parsing is in effect.
* @return true if strict parsing is in effect
* @see #setParseStrict
* @draft ICU 3.6
*/
public boolean isParseStrict() {
return parseStrict;
}
//============== Locale Stuff =====================
/**
@ -919,7 +952,8 @@ public abstract class NumberFormat extends UFormat {
&& maximumFractionDigits == other.maximumFractionDigits
&& minimumFractionDigits == other.minimumFractionDigits
&& groupingUsed == other.groupingUsed
&& parseIntegerOnly == other.parseIntegerOnly;
&& parseIntegerOnly == other.parseIntegerOnly
&& parseStrict == other.parseStrict;
}
/**
@ -1500,4 +1534,7 @@ public abstract class NumberFormat extends UFormat {
*/
public NumberFormat() {
}
// new in ICU4J 3.6
private boolean parseStrict;
}