From e7be1a69e23dcd5de41d00fe1b37ac31012f4eec Mon Sep 17 00:00:00 2001 From: Alan Liu Date: Fri, 7 Mar 2003 01:04:43 +0000 Subject: [PATCH] ICU-2656 make HHmmss parsing not parse negative numbers X-SVN-Rev: 11254 --- icu4c/source/i18n/smpdtfmt.cpp | 160 +++++++++--------------- icu4c/source/i18n/unicode/smpdtfmt.h | 33 +---- icu4c/source/test/intltest/dtfmttst.cpp | 129 +++++++++++++------ icu4c/source/test/intltest/dtfmttst.h | 6 + 4 files changed, 162 insertions(+), 166 deletions(-) diff --git a/icu4c/source/i18n/smpdtfmt.cpp b/icu4c/source/i18n/smpdtfmt.cpp index e23b58a8859..9c045fdc046 100644 --- a/icu4c/source/i18n/smpdtfmt.cpp +++ b/icu4c/source/i18n/smpdtfmt.cpp @@ -61,6 +61,11 @@ const UChar SimpleDateFormat::fgDefaultPattern[] = 0x79, 0x79, 0x79, 0x79, 0x4D, 0x4D, 0x64, 0x64, 0x20, 0x68, 0x68, 0x3A, 0x6D, 0x6D, 0x20, 0x61, 0 }; /* "yyyyMMdd hh:mm a" */ +// This prefix is designed to NEVER MATCH real text, in order to +// suppress the parsing of negative numbers. Adjust as needed (if +// this becomes valid Unicode). +static const UChar SUPPRESS_NEGATIVE_PREFIX[] = {0xAB00, 0}; + /** * These are the tags we expect to see in normal resource bundle files associated * with a locale. @@ -687,41 +692,6 @@ SimpleDateFormat::zeroPaddingNumber(UnicodeString &appendTo, int32_t value, int3 //---------------------------------------------------------------------- -// {sfb} removed -/* -// this function will dump output to the console on a debug build when there's a parse error -#ifdef _DEBUG -void chk(ParsePosition& val, UChar ch, ParsePosition& start, int32_t count) -{ - if (val.getIndex() < 0) - { - cout << "[Parse failure on '" << (char)ch << "' x " << dec << count << " @ " << start.getIndex() << ']'; - } -} -#else -inline void chk(ParsePosition& val, UChar ch, ParsePosition& start, int32_t count) -{ -} -#endif - -inline Date -parseFailureResult(ParsePosition& pos, ParsePosition& oldStart, ParsePosition& failurePos) -{ - // Note: The C++ version currently supports the notion of returning zero - // with a non-zero parse position, but only if this format is lenient. - // The returned position in this case is the first un-parseable character. - // This is useful, but is not present in the Java version, and causes a - // DateFormat test to fail. - - // For now, I am removing this function. It can be restored later. - - // if (!isLenient()) pos = oldStart; - // else { pos = failurePos.getIndex(); if (pos.getIndex() < 0) pos = -pos.getIndex(); }; - pos = oldStart; - return 0; -} -*/ - void SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& pos) const { @@ -733,6 +703,7 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& UChar prevCh = 0; int32_t count = 0; int32_t interQuoteCount = 1; // Number of chars between quotes + UBool allowNegative = TRUE; // loop through the pattern string character by character, using it to control how // we match characters in the input @@ -789,7 +760,8 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& if (count > 0) { int32_t startOffset = start; - start = subParse(text, start, prevCh, count, FALSE, ambiguousYear, cal); + start = subParse(text, start, prevCh, count, FALSE, allowNegative, ambiguousYear, cal); + allowNegative = TRUE; if ( start < 0 ) { pos.setErrorIndex(startOffset); pos.setIndex(oldStart); @@ -831,7 +803,11 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& // obeyCount. That's because the next field directly // abuts this one, so we have to use the count to know when // to stop parsing. [LIU] - start = subParse(text, start, prevCh, count, TRUE, ambiguousYear, cal); + // Don't allow negatives in this field or in the next. + // This prevents anomalies like HHmmss matching 12-34 + // as 12:-3:4, or 11:57:04. + start = subParse(text, start, prevCh, count, TRUE, FALSE, ambiguousYear, cal); + allowNegative = FALSE; if (start < 0) { pos.setErrorIndex(startOffset); pos.setIndex(oldStart); @@ -854,7 +830,8 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& // handle cases like: MM-dd-yy, HH:mm:ss, or yyyy MM dd, // where ch = '-', ':', or ' ', repectively. int32_t startOffset = start; - start = subParse( text, start, prevCh, count, FALSE, ambiguousYear, cal); + start = subParse( text, start, prevCh, count, FALSE, allowNegative, ambiguousYear, cal); + allowNegative = TRUE; if ( start < 0 ) { pos.setErrorIndex(startOffset); pos.setIndex(oldStart); @@ -897,7 +874,7 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& if (count > 0) { int32_t startOffset = start; - start = subParse(text, start, prevCh, count, FALSE, ambiguousYear, cal); + start = subParse(text, start, prevCh, count, FALSE, allowNegative, ambiguousYear, cal); if ( start < 0 ) { pos.setIndex(oldStart); pos.setErrorIndex(startOffset); @@ -1033,31 +1010,6 @@ SimpleDateFormat::set2DigitYearStart(UDate d, UErrorCode& status) parseAmbiguousDatesAsAfter(d, status); } -/** - * Parse the given text, at the given position, as a numeric value, using - * this objects fNumberFormat. Return the corresponding long value in the - * fill-in parameter 'value'. If the parse fails, this method leaves pos - * unchanged and returns FALSE; otherwise it advances pos and - * returns TRUE. - */ -// {sfb} removed -/* -UBool -SimpleDateFormat::subParseLong(const UnicodeString& text, ParsePosition& pos, int32_t& value) const -{ - Formattable parseResult; - ParsePosition posSave = pos; - fNumberFormat->parse(text, parseResult, pos); - if (pos != posSave && parseResult.getType() == Formattable::kLong) - { - value = parseResult.getLong(); - return TRUE; - } - pos = posSave; - return FALSE; -} -*/ - /** * Private member function that converts the parsed date strings into * timeFields. Returns -start (for ParsePosition) if failed. @@ -1069,13 +1021,14 @@ SimpleDateFormat::subParseLong(const UnicodeString& text, ParsePosition& pos, in * indicating matching failure, otherwise. */ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count, - UBool obeyCount, UBool ambiguousYear[], Calendar& cal) const + UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], Calendar& cal) const { Formattable number; int32_t value = 0; int32_t i; ParsePosition pos(0); int32_t patternCharIndex; + UnicodeString temp; UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch); if (patternCharPtr == NULL) { @@ -1113,16 +1066,17 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC int32_t parseStart = pos.getIndex(); // WORK AROUND BUG IN NUMBER FORMAT IN 1.2B3 // It would be good to unify this with the obeyCount logic below, // but that's going to be difficult. - if (obeyCount) - { - if ((start+count) > text.length()) + const UnicodeString* src; + if (obeyCount) { + if ((start+count) > text.length()) { return -start; - UnicodeString temp; + } text.extractBetween(0, start + count, temp); - fNumberFormat->parse(temp, number, pos); + src = &temp; + } else { + src = &text; } - else - fNumberFormat->parse(text, number, pos); + parseInt(*src, number, pos, allowNegative); if (pos.getIndex() == parseStart) // WORK AROUND BUG IN NUMBER FORMAT IN 1.2B3 return -start; @@ -1389,17 +1343,17 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC // WORK AROUND BUG IN NUMBER FORMAT IN 1.2B3 int32_t parseStart = pos.getIndex(); // Handle "generic" fields - if (obeyCount) - { - if ((start+count) > text.length()) + const UnicodeString* src; + if (obeyCount) { + if ((start+count) > text.length()) { return -start; - UnicodeString s; - // {sfb} old code had extract, make sure it works - text.extractBetween(0, start + count, s); - fNumberFormat->parse(s, number, pos); + } + text.extractBetween(0, start + count, temp); + src = &temp; + } else { + src = &text; } - else - fNumberFormat->parse(text, number, pos); + parseInt(*src, number, pos, allowNegative); if (pos.getIndex() != parseStart) { // WORK AROUND BUG IN NUMBER FORMAT IN 1.2B3 cal.set(field, number.getLong()); @@ -1409,6 +1363,28 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC } } +/** + * Parse an integer using fNumberFormat. This method is semantically + * const, but actually may modify fNumberFormat. + */ +void SimpleDateFormat::parseInt(const UnicodeString& text, + Formattable& number, + ParsePosition& pos, + UBool allowNegative) const { + UnicodeString oldPrefix; + DecimalFormat* df = NULL; + if (!allowNegative && + fNumberFormat->getDynamicClassID() == DecimalFormat::getStaticClassID()) { + df = (DecimalFormat*)fNumberFormat; + df->getNegativePrefix(oldPrefix); + df->setNegativePrefix(SUPPRESS_NEGATIVE_PREFIX); + } + fNumberFormat->parse(text, number, pos); + if (df != NULL) { + df->setNegativePrefix(oldPrefix); + } +} + //---------------------------------------------------------------------- void SimpleDateFormat::translatePattern(const UnicodeString& originalPattern, @@ -1517,28 +1493,6 @@ SimpleDateFormat::setDateFormatSymbols(const DateFormatSymbols& newFormatSymbols } -//---------------------------------------------------------------------- - -// {sfb} removed -/*int32_t -SimpleDateFormat::getZoneIndex(const UnicodeString& ID) const -{ - // this function searches a time zone list for a time zone with the specified - // ID. It'll either return an apprpriate row number or -1 if the ID wasn't - // found. - int32_t index, col; - - for (col=0; col<=4 && colfZoneStringsColCount; col+=2) - { - for (index = 0; index < fSymbols->fZoneStringsRowCount; index++) - { - if (fSymbols->fZoneStrings[index][col] == ID) return index; - } - } - - return - 1; -}*/ - //---------------------------------------------------------------------- UDate diff --git a/icu4c/source/i18n/unicode/smpdtfmt.h b/icu4c/source/i18n/unicode/smpdtfmt.h index 581be1a888e..91631668c30 100644 --- a/icu4c/source/i18n/unicode/smpdtfmt.h +++ b/icu4c/source/i18n/unicode/smpdtfmt.h @@ -569,20 +569,6 @@ private: friend class DateFormat; - /** - * Gets the index for the given time zone ID to obtain the timezone strings - * for formatting. The time zone ID is just for programmatic lookup. NOT - * LOCALIZED!!! - * - * @param DateFormatSymbols a DateFormatSymbols object contianing the time zone names - * @param ID the given time zone ID. - * @return the index of the given time zone ID. Returns -1 if - * the given time zone ID can't be located in the - * DateFormatSymbols object. - * @see SimpleTimeZone - */ - //int32_t getZoneIndex(const DateFormatSymbols&, const UnicodeString& ID) const; - void initializeDefaultCentury(void); /** @@ -694,16 +680,12 @@ private: * indicating matching failure, otherwise. */ int32_t subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count, - UBool obeyCount, UBool ambiguousYear[], Calendar& cal) const; + UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], Calendar& cal) const; - /** - * Parse the given text, at the given position, as a numeric value, using - * this object's NumberFormat. Return the corresponding long value in the - * fill-in parameter 'value'. If the parse fails, this method leaves pos - * unchanged and returns FALSE; otherwise it advances pos and - * returns TRUE. - */ - //UBool subParseLong(const UnicodeString& text, ParsePosition& pos, int32_t& value) const; + void parseInt(const UnicodeString& text, + Formattable& number, + ParsePosition& pos, + UBool allowNegative) const; /** * Translate a pattern, mapping each character in the from string to the @@ -722,11 +704,6 @@ private: const UnicodeString& from, const UnicodeString& to, UErrorCode& status); - /** - * Given a zone ID, try to locate it in our time zone array. Return the - * index (row index) of the found time zone, or -1 if we can't find it. - */ - //int32_t getZoneIndex(const UnicodeString& ID) const; /** * Sets the starting date of the 100-year window that dates with 2-digit years diff --git a/icu4c/source/test/intltest/dtfmttst.cpp b/icu4c/source/test/intltest/dtfmttst.cpp index 282debb2a00..9385cb0ea98 100644 --- a/icu4c/source/test/intltest/dtfmttst.cpp +++ b/icu4c/source/test/intltest/dtfmttst.cpp @@ -44,6 +44,7 @@ void DateFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &nam TESTCASE(16,TestWallyWedel); TESTCASE(17,TestDateFormatCalendar); TESTCASE(18,TestSpaceParsing); + TESTCASE(19,TestExactCountFormat); default: name = ""; break; } } @@ -1012,56 +1013,114 @@ void DateFormatTest::TestDateFormatCalendar() { * Test DateFormat's parsing of space characters. See jitterbug 1916. */ void DateFormatTest::TestSpaceParsing() { - const char* PARSE_FAILURE = "parse failure"; const char* DATA[] = { - // pattern, input, expexted output (in quotes) - "MMMM d yy", " 04 05 06", PARSE_FAILURE, // MMMM wants Apr/April - "MMMM d yy", "04 05 06", PARSE_FAILURE, - "MM d yy", " 04 05 06", "\"2006 04 05\"", - "MM d yy", "04 05 06", "\"2006 04 05\"", - "MMMM d yy", " Apr 05 06", "\"2006 04 05\"", - "MMMM d yy", "Apr 05 06", "\"2006 04 05\"", + "yyyy MM dd HH:mm:ss", + + // pattern, input, expected parse or NULL if expect parse failure + "MMMM d yy", " 04 05 06", NULL, // MMMM wants Apr/April + NULL, "04 05 06", NULL, + "MM d yy", " 04 05 06", "2006 04 05 00:00:00", + NULL, "04 05 06", "2006 04 05 00:00:00", + "MMMM d yy", " Apr 05 06", "2006 04 05 00:00:00", + NULL, "Apr 05 06", "2006 04 05 00:00:00", }; const int32_t DATA_len = sizeof(DATA)/sizeof(DATA[0]); + expectParse(DATA, DATA_len, Locale("en")); +} + +/** + * Test handling of "HHmmss" pattern. + */ +void DateFormatTest::TestExactCountFormat() { + const char* DATA[] = { + "yyyy MM dd HH:mm:ss", + + // pattern, input, expected parse or NULL if expect parse failure + "HHmmss", "123456", "1970 01 01 12:34:56", + NULL, "12345", "1970 01 01 12:34:05", + NULL, "1234", NULL, + NULL, "00-05", NULL, + NULL, "12-34", NULL, + NULL, "00+05", NULL, + }; + const int32_t DATA_len = sizeof(DATA)/sizeof(DATA[0]); + + expectParse(DATA, DATA_len, Locale("en")); +} + +/** + * Test parsing. Input is an array that starts with the following + * header: + * + * [0] = pattern string to parse [i+2] with + * + * followed by test cases, each of which is 3 array elements: + * + * [i] = pattern, or NULL to reuse prior pattern + * [i+1] = input string + * [i+2] = expected parse result (parsed with pattern [0]) + * + * If expect parse failure, then [i+2] should be NULL. + */ +void DateFormatTest::expectParse(const char** data, int32_t data_length, + const Locale& loc) { + const UDate FAIL = (UDate) -1; + const UnicodeString FAIL_STR("parse failure"); + int32_t i = 0; + UErrorCode ec = U_ZERO_ERROR; - Locale en("en"); - SimpleDateFormat sdfObj("", en, ec); + SimpleDateFormat fmt("", loc, ec); + SimpleDateFormat ref(data[i++], loc, ec); + SimpleDateFormat gotfmt("G yyyy MM dd HH:mm:ss z", loc, ec); if (U_FAILURE(ec)) { errln("FAIL: SimpleDateFormat constructor"); return; } - int32_t i; - for (i=0; i " + output); + + if (got == exp) { + logln((UnicodeString)"Ok: " + input + " x " + + currentPat + " => " + gotstr); } else { - errln((UnicodeString)"FAIL: Parse of \"" + DATA[i+1] + "\" with \"" + - DATA[i] + "\" => " + - output + ", expected " + exp); + errln((UnicodeString)"FAIL: " + input + " x " + + currentPat + " => " + gotstr + ", expected " + + expstr); } - } + } } #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/intltest/dtfmttst.h b/icu4c/source/test/intltest/dtfmttst.h index b8518cd4f06..cf0f7d89ef7 100644 --- a/icu4c/source/test/intltest/dtfmttst.h +++ b/icu4c/source/test/intltest/dtfmttst.h @@ -149,6 +149,12 @@ public: // package virtual void TestDateFormatCalendar(void); virtual void TestSpaceParsing(void); + + void TestExactCountFormat(void); + + private: + void expectParse(const char** data, int32_t data_length, + const Locale& locale); }; #endif /* #if !UCONFIG_NO_FORMATTING */