mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-20 20:19:32 +00:00
ICU-2154 standardize whitespace handling by date/number format
X-SVN-Rev: 11456
This commit is contained in:
parent
f6815a28c0
commit
52ac97f86e
4 changed files with 234 additions and 107 deletions
|
@ -4,8 +4,8 @@
|
|||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/format/DateFormatTest.java,v $
|
||||
* $Date: 2003/03/13 20:27:47 $
|
||||
* $Revision: 1.13 $
|
||||
* $Date: 2003/04/04 19:20:51 $
|
||||
* $Revision: 1.14 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -876,6 +876,23 @@ public class DateFormatTest extends com.ibm.icu.dev.test.TestFmwk {
|
|||
expectParse(DATA, new Locale("en"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test handling of white space.
|
||||
*/
|
||||
public void TestWhiteSpaceParsing() {
|
||||
String DATA[] = {
|
||||
"yyyy MM dd",
|
||||
|
||||
// pattern, input, expected parse or null if expect parse failure
|
||||
|
||||
// Pattern space run should parse input text space run
|
||||
"MM d yy", " 04 01 03", "2003 04 01",
|
||||
null, " 04 01 03 ", "2003 04 01",
|
||||
};
|
||||
|
||||
expectParse(DATA, new Locale("en"));
|
||||
}
|
||||
|
||||
public void TestCoverage() {
|
||||
Date now = new Date();
|
||||
Calendar cal = new GregorianCalendar();
|
||||
|
|
|
@ -4,8 +4,8 @@
|
|||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/format/NumberFormatTest.java,v $
|
||||
* $Date: 2003/02/25 23:39:43 $
|
||||
* $Revision: 1.10 $
|
||||
* $Date: 2003/04/04 19:20:52 $
|
||||
* $Revision: 1.11 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -842,6 +842,14 @@ public class NumberFormatTest extends com.ibm.icu.dev.test.TestFmwk {
|
|||
}
|
||||
}
|
||||
|
||||
public void TestWhiteSpaceParsing() {
|
||||
DecimalFormatSymbols US = new DecimalFormatSymbols(Locale.US);
|
||||
DecimalFormat fmt = new DecimalFormat("a b#0c ", US);
|
||||
int n = 1234;
|
||||
expect(fmt, "a b1234c ", n);
|
||||
expect(fmt, "a b1234c ", n);
|
||||
}
|
||||
|
||||
public void expectPad(DecimalFormat fmt, String pat, int pos) {
|
||||
expectPad(fmt, pat, pos, 0, (char)0);
|
||||
}
|
||||
|
|
|
@ -5,14 +5,16 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/DecimalFormat.java,v $
|
||||
* $Date: 2003/02/21 01:49:21 $
|
||||
* $Revision: 1.21 $
|
||||
* $Date: 2003/04/04 19:20:52 $
|
||||
* $Revision: 1.22 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
package com.ibm.icu.text;
|
||||
|
||||
import com.ibm.icu.util.Currency;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.impl.UCharacterProperty;
|
||||
import java.text.ParsePosition;
|
||||
import java.text.FieldPosition;
|
||||
import java.math.BigInteger;
|
||||
|
@ -1180,25 +1182,44 @@ public class DecimalFormat extends NumberFormat {
|
|||
int oldStart = parsePosition.getIndex();
|
||||
int backup;
|
||||
|
||||
// check for positivePrefix; take longest
|
||||
boolean gotPositive = text.regionMatches(position,positivePrefix,0,
|
||||
positivePrefix.length());
|
||||
boolean gotNegative = text.regionMatches(position,negativePrefix,0,
|
||||
negativePrefix.length());
|
||||
if (gotPositive && gotNegative) {
|
||||
if (positivePrefix.length() > negativePrefix.length())
|
||||
gotNegative = false;
|
||||
else if (positivePrefix.length() < negativePrefix.length())
|
||||
gotPositive = false;
|
||||
// Match positive and negative prefixes; prefer longest match.
|
||||
int posMatch = compareAffix(positivePrefix, text, position);
|
||||
int negMatch = compareAffix(negativePrefix, text, position);
|
||||
if (posMatch >= 0 && negMatch >= 0) {
|
||||
if (posMatch > negMatch) {
|
||||
negMatch = -1;
|
||||
} else if (negMatch > posMatch) {
|
||||
posMatch = -1;
|
||||
}
|
||||
}
|
||||
if (gotPositive) {
|
||||
position += positivePrefix.length();
|
||||
} else if (gotNegative) {
|
||||
position += negativePrefix.length();
|
||||
if (posMatch >= 0) {
|
||||
position += posMatch;
|
||||
} else if (negMatch >= 0) {
|
||||
position += negMatch;
|
||||
} else {
|
||||
//PP:parsePosition.errorIndex = position;
|
||||
return false;
|
||||
}
|
||||
|
||||
// // check for positivePrefix; take longest
|
||||
// boolean gotPositive = text.regionMatches(position,positivePrefix,0,
|
||||
// positivePrefix.length());
|
||||
// boolean gotNegative = text.regionMatches(position,negativePrefix,0,
|
||||
// negativePrefix.length());
|
||||
// if (gotPositive && gotNegative) {
|
||||
// if (positivePrefix.length() > negativePrefix.length())
|
||||
// gotNegative = false;
|
||||
// else if (positivePrefix.length() < negativePrefix.length())
|
||||
// gotPositive = false;
|
||||
// }
|
||||
// if (gotPositive) {
|
||||
// position += positivePrefix.length();
|
||||
// } else if (gotNegative) {
|
||||
// position += negativePrefix.length();
|
||||
// } else {
|
||||
// //PP:parsePosition.errorIndex = position;
|
||||
// return false;
|
||||
// }
|
||||
// process digits or Inf, find decimal position
|
||||
status[STATUS_INFINITE] = false;
|
||||
if (!isExponent && text.regionMatches(position,symbols.getInfinity(),0,
|
||||
|
@ -1371,32 +1392,57 @@ public class DecimalFormat extends NumberFormat {
|
|||
}
|
||||
}
|
||||
|
||||
// check for positiveSuffix
|
||||
if (gotPositive)
|
||||
gotPositive = text.regionMatches(position,positiveSuffix,0,
|
||||
positiveSuffix.length());
|
||||
if (gotNegative)
|
||||
gotNegative = text.regionMatches(position,negativeSuffix,0,
|
||||
negativeSuffix.length());
|
||||
|
||||
// if both match, take longest
|
||||
if (gotPositive && gotNegative) {
|
||||
if (positiveSuffix.length() > negativeSuffix.length())
|
||||
gotNegative = false;
|
||||
else if (positiveSuffix.length() < negativeSuffix.length())
|
||||
gotPositive = false;
|
||||
// Match positive and negative suffixes; prefer longest match.
|
||||
if (posMatch >= 0) {
|
||||
posMatch = compareAffix(positiveSuffix, text, position);
|
||||
}
|
||||
if (negMatch >= 0) {
|
||||
negMatch = compareAffix(negativeSuffix, text, position);
|
||||
}
|
||||
if (posMatch >= 0 && negMatch >= 0) {
|
||||
if (posMatch > negMatch) {
|
||||
negMatch = -1;
|
||||
} else if (negMatch > posMatch) {
|
||||
posMatch = -1;
|
||||
}
|
||||
}
|
||||
|
||||
// fail if neither or both
|
||||
if (gotPositive == gotNegative) {
|
||||
// Fail if neither or both
|
||||
if ((posMatch >= 0) == (negMatch >= 0)) {
|
||||
//PP:parsePosition.errorIndex = position;
|
||||
return false;
|
||||
}
|
||||
|
||||
parsePosition.setIndex(position +
|
||||
(gotPositive ? positiveSuffix.length() : negativeSuffix.length())); // mark success!
|
||||
parsePosition.setIndex(position + (posMatch>=0 ? posMatch : negMatch));
|
||||
|
||||
status[STATUS_POSITIVE] = gotPositive;
|
||||
status[STATUS_POSITIVE] = (posMatch >= 0);
|
||||
|
||||
// // check for positiveSuffix
|
||||
// if (gotPositive)
|
||||
// gotPositive = text.regionMatches(position,positiveSuffix,0,
|
||||
// positiveSuffix.length());
|
||||
// if (gotNegative)
|
||||
// gotNegative = text.regionMatches(position,negativeSuffix,0,
|
||||
// negativeSuffix.length());
|
||||
//
|
||||
// // if both match, take longest
|
||||
// if (gotPositive && gotNegative) {
|
||||
// if (positiveSuffix.length() > negativeSuffix.length())
|
||||
// gotNegative = false;
|
||||
// else if (positiveSuffix.length() < negativeSuffix.length())
|
||||
// gotPositive = false;
|
||||
// }
|
||||
//
|
||||
// // fail if neither or both
|
||||
// if (gotPositive == gotNegative) {
|
||||
// //PP:parsePosition.errorIndex = position;
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// parsePosition.setIndex(position +
|
||||
// (gotPositive ? positiveSuffix.length() : negativeSuffix.length())); // mark success!
|
||||
//
|
||||
// status[STATUS_POSITIVE] = gotPositive;
|
||||
if (parsePosition.getIndex() == oldStart) {
|
||||
//PP:parsePosition.errorIndex = position;
|
||||
return false;
|
||||
|
@ -1404,6 +1450,58 @@ public class DecimalFormat extends NumberFormat {
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the length matched by the given affix, or -1 if none.
|
||||
* Runs of white space in the affix, match runs of white space in
|
||||
* the input. Pattern white space and input white space are
|
||||
* determined differently; see code.
|
||||
* @param affix pattern string, taken as a literal
|
||||
* @param input input text
|
||||
* @param pos offset into input at which to begin matching
|
||||
* @return length of input that matches, or -1 if match failure
|
||||
*/
|
||||
private int compareAffix(String affix, String input, int pos) {
|
||||
int start = pos;
|
||||
for (int i=0; i<affix.length(); ) {
|
||||
int c = UTF16.charAt(affix, i);
|
||||
int len = UTF16.getCharCount(c);
|
||||
i += len;
|
||||
if (UCharacterProperty.isRuleWhiteSpace(c)) {
|
||||
// Advance over run in pattern
|
||||
while (i < affix.length()) {
|
||||
c = UTF16.charAt(affix, i);
|
||||
if (!UCharacterProperty.isRuleWhiteSpace(c)) {
|
||||
break;
|
||||
}
|
||||
i += UTF16.getCharCount(c);
|
||||
}
|
||||
|
||||
// Advance over run in input text
|
||||
int s = pos;
|
||||
while (pos < input.length()) {
|
||||
c = UTF16.charAt(input, pos);
|
||||
if (!UCharacter.isUWhiteSpace(c)) {
|
||||
break;
|
||||
}
|
||||
pos += UTF16.getCharCount(c);
|
||||
}
|
||||
|
||||
// Must see at least one white space char in input
|
||||
if (pos == s) {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
if (pos < input.length() &&
|
||||
UTF16.charAt(input, pos) == c) {
|
||||
pos += len;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return pos - start;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the decimal format symbols, which is generally not changed
|
||||
* by the programmer or user.
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/SimpleDateFormat.java,v $
|
||||
* $Date: 2003/03/13 20:28:29 $
|
||||
* $Revision: 1.19 $
|
||||
* $Date: 2003/04/04 19:20:52 $
|
||||
* $Revision: 1.20 $
|
||||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
@ -18,6 +18,7 @@ import com.ibm.icu.util.Calendar;
|
|||
import com.ibm.icu.util.SimpleTimeZone;
|
||||
import com.ibm.icu.util.TimeZone;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.impl.UCharacterProperty;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.ObjectInputStream;
|
||||
|
@ -669,10 +670,10 @@ public class SimpleDateFormat extends DateFormat {
|
|||
* @see DateFormat
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
public void parse(String text, Calendar cal, ParsePosition pos)
|
||||
public void parse(String text, Calendar cal, ParsePosition parsePos)
|
||||
{
|
||||
int start = pos.getIndex();
|
||||
int oldStart = start;
|
||||
int pos = parsePos.getIndex();
|
||||
int start = pos;
|
||||
boolean[] ambiguousYear = {false};
|
||||
int count = 0;
|
||||
|
||||
|
@ -684,52 +685,13 @@ public class SimpleDateFormat extends DateFormat {
|
|||
int abutPat = -1; // If >=0, we are in a run of abutting numeric fields
|
||||
int abutStart = 0;
|
||||
int abutPass = 0;
|
||||
boolean inQuote = false;
|
||||
|
||||
for (int i=0; i<pattern.length(); ++i) {
|
||||
char ch = pattern.charAt(i);
|
||||
|
||||
// Handle quoted strings. Two consecutive quotes is a
|
||||
// quote literal, inside or outside of quotes.
|
||||
if (ch == '\'') {
|
||||
abutPat = -1; // End of any abutting fields
|
||||
|
||||
// Match a quote literal '' outside of quotes
|
||||
if ((i+1)<pattern.length() && pattern.charAt(i+1)==ch) {
|
||||
if (start==text.length() || text.charAt(start) != ch) {
|
||||
pos.setIndex(oldStart);
|
||||
pos.setErrorIndex(start);
|
||||
return;
|
||||
}
|
||||
++start;
|
||||
++i; // Skip over doubled quote
|
||||
continue;
|
||||
}
|
||||
|
||||
// Match a quoted string, including any embedded ''
|
||||
// quote literals. Note that we allow an unclosed
|
||||
// quote for backward compatibility.
|
||||
while (++i<pattern.length()) {
|
||||
ch = pattern.charAt(i);
|
||||
if (ch == '\'') {
|
||||
if ((i+1)<pattern.length() && pattern.charAt(i+1)==ch) {
|
||||
++i;
|
||||
// Fall through and match literal quote
|
||||
} else {
|
||||
break; // Closing quote seen
|
||||
}
|
||||
}
|
||||
if (start==text.length() || text.charAt(start) != ch) {
|
||||
pos.setIndex(oldStart);
|
||||
pos.setErrorIndex(start);
|
||||
return;
|
||||
}
|
||||
++start;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle alphabetic field characters.
|
||||
if (ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z') {
|
||||
if (!inQuote && (ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z')) {
|
||||
int fieldPat = i;
|
||||
|
||||
// Count the length of this field specifier
|
||||
|
@ -766,7 +728,7 @@ public class SimpleDateFormat extends DateFormat {
|
|||
// fields.
|
||||
if (abutting) {
|
||||
abutPat = fieldPat;
|
||||
abutStart = start;
|
||||
abutStart = pos;
|
||||
abutPass = 0;
|
||||
}
|
||||
}
|
||||
|
@ -790,20 +752,20 @@ public class SimpleDateFormat extends DateFormat {
|
|||
if (fieldPat == abutPat) {
|
||||
count -= abutPass++;
|
||||
if (count == 0) {
|
||||
pos.setIndex(oldStart);
|
||||
pos.setErrorIndex(start);
|
||||
parsePos.setIndex(start);
|
||||
parsePos.setErrorIndex(pos);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
start = subParse(text, start, ch, count,
|
||||
true, false, ambiguousYear, cal);
|
||||
pos = subParse(text, pos, ch, count,
|
||||
true, false, ambiguousYear, cal);
|
||||
|
||||
// If the parse fails anywhere in the run, back up to the
|
||||
// start of the run and retry.
|
||||
if (start < 0) {
|
||||
if (pos < 0) {
|
||||
i = abutPat - 1;
|
||||
start = abutStart;
|
||||
pos = abutStart;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
@ -811,28 +773,70 @@ public class SimpleDateFormat extends DateFormat {
|
|||
// Handle non-numeric fields and non-abutting numeric
|
||||
// fields.
|
||||
else {
|
||||
int k = start;
|
||||
start=subParse(text, start, ch, count,
|
||||
int s = pos;
|
||||
pos = subParse(text, pos, ch, count,
|
||||
false, true, ambiguousYear, cal);
|
||||
|
||||
if (start < 0) {
|
||||
pos.setErrorIndex(k);
|
||||
pos.setIndex(oldStart);
|
||||
if (pos < 0) {
|
||||
parsePos.setErrorIndex(s);
|
||||
parsePos.setIndex(start);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle unquoted non-alphabetic characters. These are
|
||||
// treated as literals.
|
||||
// Handle literal pattern characters. These are any
|
||||
// quoted characters and non-alphabetic unquoted
|
||||
// characters.
|
||||
else {
|
||||
|
||||
abutPat = -1; // End of any abutting fields
|
||||
if (start==text.length() || text.charAt(start) != ch) {
|
||||
pos.setIndex(oldStart);
|
||||
pos.setErrorIndex(start);
|
||||
return;
|
||||
|
||||
// Handle quotes. Two consecutive quotes is a quote
|
||||
// literal, inside or outside of quotes. Otherwise a
|
||||
// quote indicates entry or exit from a quoted region.
|
||||
if (ch == '\'') {
|
||||
// Match a quote literal '' within OR outside of quotes
|
||||
if ((i+1)<pattern.length() && pattern.charAt(i+1)==ch) {
|
||||
++i; // Skip over doubled quote
|
||||
// Fall through and treat quote as a literal
|
||||
} else {
|
||||
// Enter or exit quoted region
|
||||
inQuote = !inQuote;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
++start;
|
||||
|
||||
// A run of white space in the pattern matches a run
|
||||
// of white space in the input text.
|
||||
if (UCharacterProperty.isRuleWhiteSpace(ch)) {
|
||||
// Advance over run in pattern
|
||||
while ((i+1)<pattern.length() &&
|
||||
UCharacterProperty.isRuleWhiteSpace(pattern.charAt(i+1))) {
|
||||
++i;
|
||||
}
|
||||
|
||||
// Advance over run in input text
|
||||
int s = pos;
|
||||
while (pos<text.length() &&
|
||||
UCharacter.isUWhiteSpace(text.charAt(pos))) {
|
||||
++pos;
|
||||
}
|
||||
|
||||
// Must see at least one white space char in input
|
||||
if (pos > s) {
|
||||
continue;
|
||||
}
|
||||
} else if (pos<text.length() && text.charAt(pos)==ch) {
|
||||
// Match a literal
|
||||
++pos;
|
||||
continue;
|
||||
}
|
||||
|
||||
// We fall through to this point if the match fails
|
||||
parsePos.setIndex(start);
|
||||
parsePos.setErrorIndex(pos);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -840,7 +844,7 @@ public class SimpleDateFormat extends DateFormat {
|
|||
// will fill in default values for missing fields when the time
|
||||
// is computed.
|
||||
|
||||
pos.setIndex(start);
|
||||
parsePos.setIndex(pos);
|
||||
|
||||
// This part is a problem: When we call parsedDate.after, we compute the time.
|
||||
// Take the date April 3 2004 at 2:30 am. When this is first set up, the year
|
||||
|
@ -883,8 +887,8 @@ public class SimpleDateFormat extends DateFormat {
|
|||
// An IllegalArgumentException will be thrown by Calendar.getTime()
|
||||
// if any fields are out of range, e.g., MONTH == 17.
|
||||
catch (IllegalArgumentException e) {
|
||||
pos.setErrorIndex(start);
|
||||
pos.setIndex(oldStart);
|
||||
parsePos.setErrorIndex(pos);
|
||||
parsePos.setIndex(start);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue