ICU-2656 make HHmmss parsing not parse negative numbers

X-SVN-Rev: 11254
This commit is contained in:
Alan Liu 2003-03-07 01:04:43 +00:00
parent 956a7236ac
commit e7be1a69e2
4 changed files with 162 additions and 166 deletions

View file

@ -61,6 +61,11 @@ const UChar SimpleDateFormat::fgDefaultPattern[] =
0x79, 0x79, 0x79, 0x79, 0x4D, 0x4D, 0x64, 0x64, 0x20, 0x68, 0x68, 0x3A, 0x6D, 0x6D, 0x20, 0x61, 0
}; /* "yyyyMMdd hh:mm a" */
// This prefix is designed to NEVER MATCH real text, in order to
// suppress the parsing of negative numbers. Adjust as needed (if
// this becomes valid Unicode).
static const UChar SUPPRESS_NEGATIVE_PREFIX[] = {0xAB00, 0};
/**
* These are the tags we expect to see in normal resource bundle files associated
* with a locale.
@ -687,41 +692,6 @@ SimpleDateFormat::zeroPaddingNumber(UnicodeString &appendTo, int32_t value, int3
//----------------------------------------------------------------------
// {sfb} removed
/*
// this function will dump output to the console on a debug build when there's a parse error
#ifdef _DEBUG
void chk(ParsePosition& val, UChar ch, ParsePosition& start, int32_t count)
{
if (val.getIndex() < 0)
{
cout << "[Parse failure on '" << (char)ch << "' x " << dec << count << " @ " << start.getIndex() << ']';
}
}
#else
inline void chk(ParsePosition& val, UChar ch, ParsePosition& start, int32_t count)
{
}
#endif
inline Date
parseFailureResult(ParsePosition& pos, ParsePosition& oldStart, ParsePosition& failurePos)
{
// Note: The C++ version currently supports the notion of returning zero
// with a non-zero parse position, but only if this format is lenient.
// The returned position in this case is the first un-parseable character.
// This is useful, but is not present in the Java version, and causes a
// DateFormat test to fail.
// For now, I am removing this function. It can be restored later.
// if (!isLenient()) pos = oldStart;
// else { pos = failurePos.getIndex(); if (pos.getIndex() < 0) pos = -pos.getIndex(); };
pos = oldStart;
return 0;
}
*/
void
SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& pos) const
{
@ -733,6 +703,7 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
UChar prevCh = 0;
int32_t count = 0;
int32_t interQuoteCount = 1; // Number of chars between quotes
UBool allowNegative = TRUE;
// loop through the pattern string character by character, using it to control how
// we match characters in the input
@ -789,7 +760,8 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
if (count > 0)
{
int32_t startOffset = start;
start = subParse(text, start, prevCh, count, FALSE, ambiguousYear, cal);
start = subParse(text, start, prevCh, count, FALSE, allowNegative, ambiguousYear, cal);
allowNegative = TRUE;
if ( start < 0 ) {
pos.setErrorIndex(startOffset);
pos.setIndex(oldStart);
@ -831,7 +803,11 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
// obeyCount. That's because the next field directly
// abuts this one, so we have to use the count to know when
// to stop parsing. [LIU]
start = subParse(text, start, prevCh, count, TRUE, ambiguousYear, cal);
// Don't allow negatives in this field or in the next.
// This prevents anomalies like HHmmss matching 12-34
// as 12:-3:4, or 11:57:04.
start = subParse(text, start, prevCh, count, TRUE, FALSE, ambiguousYear, cal);
allowNegative = FALSE;
if (start < 0) {
pos.setErrorIndex(startOffset);
pos.setIndex(oldStart);
@ -854,7 +830,8 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
// handle cases like: MM-dd-yy, HH:mm:ss, or yyyy MM dd,
// where ch = '-', ':', or ' ', repectively.
int32_t startOffset = start;
start = subParse( text, start, prevCh, count, FALSE, ambiguousYear, cal);
start = subParse( text, start, prevCh, count, FALSE, allowNegative, ambiguousYear, cal);
allowNegative = TRUE;
if ( start < 0 ) {
pos.setErrorIndex(startOffset);
pos.setIndex(oldStart);
@ -897,7 +874,7 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
if (count > 0)
{
int32_t startOffset = start;
start = subParse(text, start, prevCh, count, FALSE, ambiguousYear, cal);
start = subParse(text, start, prevCh, count, FALSE, allowNegative, ambiguousYear, cal);
if ( start < 0 ) {
pos.setIndex(oldStart);
pos.setErrorIndex(startOffset);
@ -1033,31 +1010,6 @@ SimpleDateFormat::set2DigitYearStart(UDate d, UErrorCode& status)
parseAmbiguousDatesAsAfter(d, status);
}
/**
* Parse the given text, at the given position, as a numeric value, using
* this objects fNumberFormat. Return the corresponding long value in the
* fill-in parameter 'value'. If the parse fails, this method leaves pos
* unchanged and returns FALSE; otherwise it advances pos and
* returns TRUE.
*/
// {sfb} removed
/*
UBool
SimpleDateFormat::subParseLong(const UnicodeString& text, ParsePosition& pos, int32_t& value) const
{
Formattable parseResult;
ParsePosition posSave = pos;
fNumberFormat->parse(text, parseResult, pos);
if (pos != posSave && parseResult.getType() == Formattable::kLong)
{
value = parseResult.getLong();
return TRUE;
}
pos = posSave;
return FALSE;
}
*/
/**
* Private member function that converts the parsed date strings into
* timeFields. Returns -start (for ParsePosition) if failed.
@ -1069,13 +1021,14 @@ SimpleDateFormat::subParseLong(const UnicodeString& text, ParsePosition& pos, in
* indicating matching failure, otherwise.
*/
int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count,
UBool obeyCount, UBool ambiguousYear[], Calendar& cal) const
UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], Calendar& cal) const
{
Formattable number;
int32_t value = 0;
int32_t i;
ParsePosition pos(0);
int32_t patternCharIndex;
UnicodeString temp;
UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch);
if (patternCharPtr == NULL) {
@ -1113,16 +1066,17 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
int32_t parseStart = pos.getIndex(); // WORK AROUND BUG IN NUMBER FORMAT IN 1.2B3
// It would be good to unify this with the obeyCount logic below,
// but that's going to be difficult.
if (obeyCount)
{
if ((start+count) > text.length())
const UnicodeString* src;
if (obeyCount) {
if ((start+count) > text.length()) {
return -start;
UnicodeString temp;
}
text.extractBetween(0, start + count, temp);
fNumberFormat->parse(temp, number, pos);
src = &temp;
} else {
src = &text;
}
else
fNumberFormat->parse(text, number, pos);
parseInt(*src, number, pos, allowNegative);
if (pos.getIndex() == parseStart)
// WORK AROUND BUG IN NUMBER FORMAT IN 1.2B3
return -start;
@ -1389,17 +1343,17 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
// WORK AROUND BUG IN NUMBER FORMAT IN 1.2B3
int32_t parseStart = pos.getIndex();
// Handle "generic" fields
if (obeyCount)
{
if ((start+count) > text.length())
const UnicodeString* src;
if (obeyCount) {
if ((start+count) > text.length()) {
return -start;
UnicodeString s;
// {sfb} old code had extract, make sure it works
text.extractBetween(0, start + count, s);
fNumberFormat->parse(s, number, pos);
}
text.extractBetween(0, start + count, temp);
src = &temp;
} else {
src = &text;
}
else
fNumberFormat->parse(text, number, pos);
parseInt(*src, number, pos, allowNegative);
if (pos.getIndex() != parseStart) {
// WORK AROUND BUG IN NUMBER FORMAT IN 1.2B3
cal.set(field, number.getLong());
@ -1409,6 +1363,28 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
}
}
/**
* Parse an integer using fNumberFormat. This method is semantically
* const, but actually may modify fNumberFormat.
*/
void SimpleDateFormat::parseInt(const UnicodeString& text,
Formattable& number,
ParsePosition& pos,
UBool allowNegative) const {
UnicodeString oldPrefix;
DecimalFormat* df = NULL;
if (!allowNegative &&
fNumberFormat->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
df = (DecimalFormat*)fNumberFormat;
df->getNegativePrefix(oldPrefix);
df->setNegativePrefix(SUPPRESS_NEGATIVE_PREFIX);
}
fNumberFormat->parse(text, number, pos);
if (df != NULL) {
df->setNegativePrefix(oldPrefix);
}
}
//----------------------------------------------------------------------
void SimpleDateFormat::translatePattern(const UnicodeString& originalPattern,
@ -1517,28 +1493,6 @@ SimpleDateFormat::setDateFormatSymbols(const DateFormatSymbols& newFormatSymbols
}
//----------------------------------------------------------------------
// {sfb} removed
/*int32_t
SimpleDateFormat::getZoneIndex(const UnicodeString& ID) const
{
// this function searches a time zone list for a time zone with the specified
// ID. It'll either return an apprpriate row number or -1 if the ID wasn't
// found.
int32_t index, col;
for (col=0; col<=4 && col<fSymbols->fZoneStringsColCount; col+=2)
{
for (index = 0; index < fSymbols->fZoneStringsRowCount; index++)
{
if (fSymbols->fZoneStrings[index][col] == ID) return index;
}
}
return - 1;
}*/
//----------------------------------------------------------------------
UDate

View file

@ -569,20 +569,6 @@ private:
friend class DateFormat;
/**
* Gets the index for the given time zone ID to obtain the timezone strings
* for formatting. The time zone ID is just for programmatic lookup. NOT
* LOCALIZED!!!
*
* @param DateFormatSymbols a DateFormatSymbols object contianing the time zone names
* @param ID the given time zone ID.
* @return the index of the given time zone ID. Returns -1 if
* the given time zone ID can't be located in the
* DateFormatSymbols object.
* @see SimpleTimeZone
*/
//int32_t getZoneIndex(const DateFormatSymbols&, const UnicodeString& ID) const;
void initializeDefaultCentury(void);
/**
@ -694,16 +680,12 @@ private:
* indicating matching failure, otherwise.
*/
int32_t subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count,
UBool obeyCount, UBool ambiguousYear[], Calendar& cal) const;
UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], Calendar& cal) const;
/**
* Parse the given text, at the given position, as a numeric value, using
* this object's NumberFormat. Return the corresponding long value in the
* fill-in parameter 'value'. If the parse fails, this method leaves pos
* unchanged and returns FALSE; otherwise it advances pos and
* returns TRUE.
*/
//UBool subParseLong(const UnicodeString& text, ParsePosition& pos, int32_t& value) const;
void parseInt(const UnicodeString& text,
Formattable& number,
ParsePosition& pos,
UBool allowNegative) const;
/**
* Translate a pattern, mapping each character in the from string to the
@ -722,11 +704,6 @@ private:
const UnicodeString& from,
const UnicodeString& to,
UErrorCode& status);
/**
* Given a zone ID, try to locate it in our time zone array. Return the
* index (row index) of the found time zone, or -1 if we can't find it.
*/
//int32_t getZoneIndex(const UnicodeString& ID) const;
/**
* Sets the starting date of the 100-year window that dates with 2-digit years

View file

@ -44,6 +44,7 @@ void DateFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &nam
TESTCASE(16,TestWallyWedel);
TESTCASE(17,TestDateFormatCalendar);
TESTCASE(18,TestSpaceParsing);
TESTCASE(19,TestExactCountFormat);
default: name = ""; break;
}
}
@ -1012,56 +1013,114 @@ void DateFormatTest::TestDateFormatCalendar() {
* Test DateFormat's parsing of space characters. See jitterbug 1916.
*/
void DateFormatTest::TestSpaceParsing() {
const char* PARSE_FAILURE = "parse failure";
const char* DATA[] = {
// pattern, input, expexted output (in quotes)
"MMMM d yy", " 04 05 06", PARSE_FAILURE, // MMMM wants Apr/April
"MMMM d yy", "04 05 06", PARSE_FAILURE,
"MM d yy", " 04 05 06", "\"2006 04 05\"",
"MM d yy", "04 05 06", "\"2006 04 05\"",
"MMMM d yy", " Apr 05 06", "\"2006 04 05\"",
"MMMM d yy", "Apr 05 06", "\"2006 04 05\"",
"yyyy MM dd HH:mm:ss",
// pattern, input, expected parse or NULL if expect parse failure
"MMMM d yy", " 04 05 06", NULL, // MMMM wants Apr/April
NULL, "04 05 06", NULL,
"MM d yy", " 04 05 06", "2006 04 05 00:00:00",
NULL, "04 05 06", "2006 04 05 00:00:00",
"MMMM d yy", " Apr 05 06", "2006 04 05 00:00:00",
NULL, "Apr 05 06", "2006 04 05 00:00:00",
};
const int32_t DATA_len = sizeof(DATA)/sizeof(DATA[0]);
expectParse(DATA, DATA_len, Locale("en"));
}
/**
* Test handling of "HHmmss" pattern.
*/
void DateFormatTest::TestExactCountFormat() {
const char* DATA[] = {
"yyyy MM dd HH:mm:ss",
// pattern, input, expected parse or NULL if expect parse failure
"HHmmss", "123456", "1970 01 01 12:34:56",
NULL, "12345", "1970 01 01 12:34:05",
NULL, "1234", NULL,
NULL, "00-05", NULL,
NULL, "12-34", NULL,
NULL, "00+05", NULL,
};
const int32_t DATA_len = sizeof(DATA)/sizeof(DATA[0]);
expectParse(DATA, DATA_len, Locale("en"));
}
/**
* Test parsing. Input is an array that starts with the following
* header:
*
* [0] = pattern string to parse [i+2] with
*
* followed by test cases, each of which is 3 array elements:
*
* [i] = pattern, or NULL to reuse prior pattern
* [i+1] = input string
* [i+2] = expected parse result (parsed with pattern [0])
*
* If expect parse failure, then [i+2] should be NULL.
*/
void DateFormatTest::expectParse(const char** data, int32_t data_length,
const Locale& loc) {
const UDate FAIL = (UDate) -1;
const UnicodeString FAIL_STR("parse failure");
int32_t i = 0;
UErrorCode ec = U_ZERO_ERROR;
Locale en("en");
SimpleDateFormat sdfObj("", en, ec);
SimpleDateFormat fmt("", loc, ec);
SimpleDateFormat ref(data[i++], loc, ec);
SimpleDateFormat gotfmt("G yyyy MM dd HH:mm:ss z", loc, ec);
if (U_FAILURE(ec)) {
errln("FAIL: SimpleDateFormat constructor");
return;
}
int32_t i;
for (i=0; i<DATA_len; i+=3) {
sdfObj.applyPattern(DATA[i]);
ParsePosition pp(0);
UDate udDate = sdfObj.parse(DATA[i+1], pp);
UnicodeString output;
if (pp.getErrorIndex() == -1) {
ec = U_ZERO_ERROR;
SimpleDateFormat formatter("yyyy MM dd", en, ec);
if (U_FAILURE(ec)) {
errln("FAIL: SimpleDateFormat constructor");
const char* currentPat = NULL;
while (i<data_length) {
const char* pattern = data[i++];
const char* input = data[i++];
const char* expected = data[i++];
ec = U_ZERO_ERROR;
if (pattern != NULL) {
fmt.applyPattern(pattern);
currentPat = pattern;
}
UDate got = fmt.parse(input, ec);
UnicodeString gotstr(FAIL_STR);
if (U_FAILURE(ec)) {
got = FAIL;
} else {
gotstr.remove();
gotfmt.format(got, gotstr);
}
UErrorCode ec2 = U_ZERO_ERROR;
UDate exp = FAIL;
UnicodeString expstr(FAIL_STR);
if (expected != NULL) {
expstr = expected;
exp = ref.parse(expstr, ec2);
if (U_FAILURE(ec2)) {
// This only happens if expected is in wrong format --
// should never happen once test is debugged.
errln("FAIL: Internal test error");
return;
}
FieldPosition fp(0);
formatter.format(udDate, output, fp);
output.insert(0, (UChar)34);
output.append((UChar)34);
} else {
output = UnicodeString(PARSE_FAILURE, "");
}
UnicodeString exp(DATA[i+2], "");
if (output == exp) {
logln((UnicodeString)"Ok: Parse of \"" + DATA[i+1] + "\" with \"" +
DATA[i] + "\" => " + output);
if (got == exp) {
logln((UnicodeString)"Ok: " + input + " x " +
currentPat + " => " + gotstr);
} else {
errln((UnicodeString)"FAIL: Parse of \"" + DATA[i+1] + "\" with \"" +
DATA[i] + "\" => " +
output + ", expected " + exp);
errln((UnicodeString)"FAIL: " + input + " x " +
currentPat + " => " + gotstr + ", expected " +
expstr);
}
}
}
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -149,6 +149,12 @@ public: // package
virtual void TestDateFormatCalendar(void);
virtual void TestSpaceParsing(void);
void TestExactCountFormat(void);
private:
void expectParse(const char** data, int32_t data_length,
const Locale& locale);
};
#endif /* #if !UCONFIG_NO_FORMATTING */