ICU-2154 standardize whitespace handling by date/number format

X-SVN-Rev: 11456
This commit is contained in:
Alan Liu 2003-04-04 19:20:52 +00:00
parent f6815a28c0
commit 52ac97f86e
4 changed files with 234 additions and 107 deletions

View file

@ -4,8 +4,8 @@
* others. All Rights Reserved. *
*******************************************************************************
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/format/DateFormatTest.java,v $
* $Date: 2003/03/13 20:27:47 $
* $Revision: 1.13 $
* $Date: 2003/04/04 19:20:51 $
* $Revision: 1.14 $
*
*****************************************************************************************
*/
@ -876,6 +876,23 @@ public class DateFormatTest extends com.ibm.icu.dev.test.TestFmwk {
expectParse(DATA, new Locale("en"));
}
/**
* Test handling of white space.
*/
public void TestWhiteSpaceParsing() {
String DATA[] = {
"yyyy MM dd",
// pattern, input, expected parse or null if expect parse failure
// Pattern space run should parse input text space run
"MM d yy", " 04 01 03", "2003 04 01",
null, " 04 01 03 ", "2003 04 01",
};
expectParse(DATA, new Locale("en"));
}
public void TestCoverage() {
Date now = new Date();
Calendar cal = new GregorianCalendar();

View file

@ -4,8 +4,8 @@
* others. All Rights Reserved. *
*******************************************************************************
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/format/NumberFormatTest.java,v $
* $Date: 2003/02/25 23:39:43 $
* $Revision: 1.10 $
* $Date: 2003/04/04 19:20:52 $
* $Revision: 1.11 $
*
*****************************************************************************************
*/
@ -842,6 +842,14 @@ public class NumberFormatTest extends com.ibm.icu.dev.test.TestFmwk {
}
}
public void TestWhiteSpaceParsing() {
DecimalFormatSymbols US = new DecimalFormatSymbols(Locale.US);
DecimalFormat fmt = new DecimalFormat("a b#0c ", US);
int n = 1234;
expect(fmt, "a b1234c ", n);
expect(fmt, "a b1234c ", n);
}
public void expectPad(DecimalFormat fmt, String pat, int pos) {
expectPad(fmt, pat, pos, 0, (char)0);
}

View file

@ -5,14 +5,16 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/DecimalFormat.java,v $
* $Date: 2003/02/21 01:49:21 $
* $Revision: 1.21 $
* $Date: 2003/04/04 19:20:52 $
* $Revision: 1.22 $
*
*****************************************************************************************
*/
package com.ibm.icu.text;
import com.ibm.icu.util.Currency;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.impl.UCharacterProperty;
import java.text.ParsePosition;
import java.text.FieldPosition;
import java.math.BigInteger;
@ -1180,25 +1182,44 @@ public class DecimalFormat extends NumberFormat {
int oldStart = parsePosition.getIndex();
int backup;
// check for positivePrefix; take longest
boolean gotPositive = text.regionMatches(position,positivePrefix,0,
positivePrefix.length());
boolean gotNegative = text.regionMatches(position,negativePrefix,0,
negativePrefix.length());
if (gotPositive && gotNegative) {
if (positivePrefix.length() > negativePrefix.length())
gotNegative = false;
else if (positivePrefix.length() < negativePrefix.length())
gotPositive = false;
// Match positive and negative prefixes; prefer longest match.
int posMatch = compareAffix(positivePrefix, text, position);
int negMatch = compareAffix(negativePrefix, text, position);
if (posMatch >= 0 && negMatch >= 0) {
if (posMatch > negMatch) {
negMatch = -1;
} else if (negMatch > posMatch) {
posMatch = -1;
}
}
if (gotPositive) {
position += positivePrefix.length();
} else if (gotNegative) {
position += negativePrefix.length();
if (posMatch >= 0) {
position += posMatch;
} else if (negMatch >= 0) {
position += negMatch;
} else {
//PP:parsePosition.errorIndex = position;
return false;
}
// // check for positivePrefix; take longest
// boolean gotPositive = text.regionMatches(position,positivePrefix,0,
// positivePrefix.length());
// boolean gotNegative = text.regionMatches(position,negativePrefix,0,
// negativePrefix.length());
// if (gotPositive && gotNegative) {
// if (positivePrefix.length() > negativePrefix.length())
// gotNegative = false;
// else if (positivePrefix.length() < negativePrefix.length())
// gotPositive = false;
// }
// if (gotPositive) {
// position += positivePrefix.length();
// } else if (gotNegative) {
// position += negativePrefix.length();
// } else {
// //PP:parsePosition.errorIndex = position;
// return false;
// }
// process digits or Inf, find decimal position
status[STATUS_INFINITE] = false;
if (!isExponent && text.regionMatches(position,symbols.getInfinity(),0,
@ -1371,32 +1392,57 @@ public class DecimalFormat extends NumberFormat {
}
}
// check for positiveSuffix
if (gotPositive)
gotPositive = text.regionMatches(position,positiveSuffix,0,
positiveSuffix.length());
if (gotNegative)
gotNegative = text.regionMatches(position,negativeSuffix,0,
negativeSuffix.length());
// if both match, take longest
if (gotPositive && gotNegative) {
if (positiveSuffix.length() > negativeSuffix.length())
gotNegative = false;
else if (positiveSuffix.length() < negativeSuffix.length())
gotPositive = false;
// Match positive and negative suffixes; prefer longest match.
if (posMatch >= 0) {
posMatch = compareAffix(positiveSuffix, text, position);
}
if (negMatch >= 0) {
negMatch = compareAffix(negativeSuffix, text, position);
}
if (posMatch >= 0 && negMatch >= 0) {
if (posMatch > negMatch) {
negMatch = -1;
} else if (negMatch > posMatch) {
posMatch = -1;
}
}
// fail if neither or both
if (gotPositive == gotNegative) {
// Fail if neither or both
if ((posMatch >= 0) == (negMatch >= 0)) {
//PP:parsePosition.errorIndex = position;
return false;
}
parsePosition.setIndex(position +
(gotPositive ? positiveSuffix.length() : negativeSuffix.length())); // mark success!
parsePosition.setIndex(position + (posMatch>=0 ? posMatch : negMatch));
status[STATUS_POSITIVE] = gotPositive;
status[STATUS_POSITIVE] = (posMatch >= 0);
// // check for positiveSuffix
// if (gotPositive)
// gotPositive = text.regionMatches(position,positiveSuffix,0,
// positiveSuffix.length());
// if (gotNegative)
// gotNegative = text.regionMatches(position,negativeSuffix,0,
// negativeSuffix.length());
//
// // if both match, take longest
// if (gotPositive && gotNegative) {
// if (positiveSuffix.length() > negativeSuffix.length())
// gotNegative = false;
// else if (positiveSuffix.length() < negativeSuffix.length())
// gotPositive = false;
// }
//
// // fail if neither or both
// if (gotPositive == gotNegative) {
// //PP:parsePosition.errorIndex = position;
// return false;
// }
//
// parsePosition.setIndex(position +
// (gotPositive ? positiveSuffix.length() : negativeSuffix.length())); // mark success!
//
// status[STATUS_POSITIVE] = gotPositive;
if (parsePosition.getIndex() == oldStart) {
//PP:parsePosition.errorIndex = position;
return false;
@ -1404,6 +1450,58 @@ public class DecimalFormat extends NumberFormat {
return true;
}
/**
* Return the length matched by the given affix, or -1 if none.
* Runs of white space in the affix, match runs of white space in
* the input. Pattern white space and input white space are
* determined differently; see code.
* @param affix pattern string, taken as a literal
* @param input input text
* @param pos offset into input at which to begin matching
* @return length of input that matches, or -1 if match failure
*/
private int compareAffix(String affix, String input, int pos) {
int start = pos;
for (int i=0; i<affix.length(); ) {
int c = UTF16.charAt(affix, i);
int len = UTF16.getCharCount(c);
i += len;
if (UCharacterProperty.isRuleWhiteSpace(c)) {
// Advance over run in pattern
while (i < affix.length()) {
c = UTF16.charAt(affix, i);
if (!UCharacterProperty.isRuleWhiteSpace(c)) {
break;
}
i += UTF16.getCharCount(c);
}
// Advance over run in input text
int s = pos;
while (pos < input.length()) {
c = UTF16.charAt(input, pos);
if (!UCharacter.isUWhiteSpace(c)) {
break;
}
pos += UTF16.getCharCount(c);
}
// Must see at least one white space char in input
if (pos == s) {
return -1;
}
} else {
if (pos < input.length() &&
UTF16.charAt(input, pos) == c) {
pos += len;
} else {
return -1;
}
}
}
return pos - start;
}
/**
* Returns the decimal format symbols, which is generally not changed
* by the programmer or user.

View file

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/SimpleDateFormat.java,v $
* $Date: 2003/03/13 20:28:29 $
* $Revision: 1.19 $
* $Date: 2003/04/04 19:20:52 $
* $Revision: 1.20 $
*
*****************************************************************************************
*/
@ -18,6 +18,7 @@ import com.ibm.icu.util.Calendar;
import com.ibm.icu.util.SimpleTimeZone;
import com.ibm.icu.util.TimeZone;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.impl.UCharacterProperty;
import java.io.IOException;
import java.io.ObjectInputStream;
@ -669,10 +670,10 @@ public class SimpleDateFormat extends DateFormat {
* @see DateFormat
* @stable ICU 2.0
*/
public void parse(String text, Calendar cal, ParsePosition pos)
public void parse(String text, Calendar cal, ParsePosition parsePos)
{
int start = pos.getIndex();
int oldStart = start;
int pos = parsePos.getIndex();
int start = pos;
boolean[] ambiguousYear = {false};
int count = 0;
@ -684,52 +685,13 @@ public class SimpleDateFormat extends DateFormat {
int abutPat = -1; // If >=0, we are in a run of abutting numeric fields
int abutStart = 0;
int abutPass = 0;
boolean inQuote = false;
for (int i=0; i<pattern.length(); ++i) {
char ch = pattern.charAt(i);
// Handle quoted strings. Two consecutive quotes is a
// quote literal, inside or outside of quotes.
if (ch == '\'') {
abutPat = -1; // End of any abutting fields
// Match a quote literal '' outside of quotes
if ((i+1)<pattern.length() && pattern.charAt(i+1)==ch) {
if (start==text.length() || text.charAt(start) != ch) {
pos.setIndex(oldStart);
pos.setErrorIndex(start);
return;
}
++start;
++i; // Skip over doubled quote
continue;
}
// Match a quoted string, including any embedded ''
// quote literals. Note that we allow an unclosed
// quote for backward compatibility.
while (++i<pattern.length()) {
ch = pattern.charAt(i);
if (ch == '\'') {
if ((i+1)<pattern.length() && pattern.charAt(i+1)==ch) {
++i;
// Fall through and match literal quote
} else {
break; // Closing quote seen
}
}
if (start==text.length() || text.charAt(start) != ch) {
pos.setIndex(oldStart);
pos.setErrorIndex(start);
return;
}
++start;
}
continue;
}
// Handle alphabetic field characters.
if (ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z') {
if (!inQuote && (ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z')) {
int fieldPat = i;
// Count the length of this field specifier
@ -766,7 +728,7 @@ public class SimpleDateFormat extends DateFormat {
// fields.
if (abutting) {
abutPat = fieldPat;
abutStart = start;
abutStart = pos;
abutPass = 0;
}
}
@ -790,20 +752,20 @@ public class SimpleDateFormat extends DateFormat {
if (fieldPat == abutPat) {
count -= abutPass++;
if (count == 0) {
pos.setIndex(oldStart);
pos.setErrorIndex(start);
parsePos.setIndex(start);
parsePos.setErrorIndex(pos);
return;
}
}
start = subParse(text, start, ch, count,
true, false, ambiguousYear, cal);
pos = subParse(text, pos, ch, count,
true, false, ambiguousYear, cal);
// If the parse fails anywhere in the run, back up to the
// start of the run and retry.
if (start < 0) {
if (pos < 0) {
i = abutPat - 1;
start = abutStart;
pos = abutStart;
continue;
}
}
@ -811,28 +773,70 @@ public class SimpleDateFormat extends DateFormat {
// Handle non-numeric fields and non-abutting numeric
// fields.
else {
int k = start;
start=subParse(text, start, ch, count,
int s = pos;
pos = subParse(text, pos, ch, count,
false, true, ambiguousYear, cal);
if (start < 0) {
pos.setErrorIndex(k);
pos.setIndex(oldStart);
if (pos < 0) {
parsePos.setErrorIndex(s);
parsePos.setIndex(start);
return;
}
}
}
// Handle unquoted non-alphabetic characters. These are
// treated as literals.
// Handle literal pattern characters. These are any
// quoted characters and non-alphabetic unquoted
// characters.
else {
abutPat = -1; // End of any abutting fields
if (start==text.length() || text.charAt(start) != ch) {
pos.setIndex(oldStart);
pos.setErrorIndex(start);
return;
// Handle quotes. Two consecutive quotes is a quote
// literal, inside or outside of quotes. Otherwise a
// quote indicates entry or exit from a quoted region.
if (ch == '\'') {
// Match a quote literal '' within OR outside of quotes
if ((i+1)<pattern.length() && pattern.charAt(i+1)==ch) {
++i; // Skip over doubled quote
// Fall through and treat quote as a literal
} else {
// Enter or exit quoted region
inQuote = !inQuote;
continue;
}
}
++start;
// A run of white space in the pattern matches a run
// of white space in the input text.
if (UCharacterProperty.isRuleWhiteSpace(ch)) {
// Advance over run in pattern
while ((i+1)<pattern.length() &&
UCharacterProperty.isRuleWhiteSpace(pattern.charAt(i+1))) {
++i;
}
// Advance over run in input text
int s = pos;
while (pos<text.length() &&
UCharacter.isUWhiteSpace(text.charAt(pos))) {
++pos;
}
// Must see at least one white space char in input
if (pos > s) {
continue;
}
} else if (pos<text.length() && text.charAt(pos)==ch) {
// Match a literal
++pos;
continue;
}
// We fall through to this point if the match fails
parsePos.setIndex(start);
parsePos.setErrorIndex(pos);
return;
}
}
@ -840,7 +844,7 @@ public class SimpleDateFormat extends DateFormat {
// will fill in default values for missing fields when the time
// is computed.
pos.setIndex(start);
parsePos.setIndex(pos);
// This part is a problem: When we call parsedDate.after, we compute the time.
// Take the date April 3 2004 at 2:30 am. When this is first set up, the year
@ -883,8 +887,8 @@ public class SimpleDateFormat extends DateFormat {
// An IllegalArgumentException will be thrown by Calendar.getTime()
// if any fields are out of range, e.g., MONTH == 17.
catch (IllegalArgumentException e) {
pos.setErrorIndex(start);
pos.setIndex(oldStart);
parsePos.setErrorIndex(pos);
parsePos.setIndex(start);
}
}