From 41d2876025368eca417b479814c8a83d426293f6 Mon Sep 17 00:00:00 2001 From: Scott Russell Date: Mon, 16 Sep 2013 12:45:41 +0000 Subject: [PATCH] ICU-10261 give DateFormat more granular leniency control X-SVN-Rev: 34327 --- icu4c/source/i18n/datefmt.cpp | 26 +++++++++++++++- icu4c/source/i18n/smpdtfmt.cpp | 35 +++++++++++++-------- icu4c/source/i18n/unicode/datefmt.h | 47 ++++++++++++++++++++++++++++- icu4c/source/i18n/unicode/udat.h | 44 +++++++++++++++++++++++++++ 4 files changed, 137 insertions(+), 15 deletions(-) diff --git a/icu4c/source/i18n/datefmt.cpp b/icu4c/source/i18n/datefmt.cpp index fe7b39ef167..bc34dae813d 100644 --- a/icu4c/source/i18n/datefmt.cpp +++ b/icu4c/source/i18n/datefmt.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* - * Copyright (C) 1997-2011, International Business Machines Corporation and * + * Copyright (C) 1997-2013, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* * @@ -74,6 +74,7 @@ DateFormat& DateFormat::operator=(const DateFormat& other) } else { fNumberFormat = NULL; } + fBoolFlags = other.fBoolFlags; } return *this; } @@ -511,6 +512,29 @@ DateFormat::isLenient() const return FALSE; } +//---------------------------------------------------------------------- + +DateFormat& +DateFormat::setBooleanAttribute(UDateFormatBooleanAttribute attr, + UBool newValue, + UErrorCode &status) { + if(!fBoolFlags.isValidValue(newValue)) { + status = U_ILLEGAL_ARGUMENT_ERROR; + } else { + fBoolFlags.set(attr, newValue); + } + + return *this; +} + +//---------------------------------------------------------------------- + +UBool +DateFormat::getBooleanAttribute(UDateFormatBooleanAttribute attr, UErrorCode &status) const { + + return fBoolFlags.get(attr); +} + U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/smpdtfmt.cpp b/icu4c/source/i18n/smpdtfmt.cpp index 6b6a880bbfc..28fd78f982d 100644 --- a/icu4c/source/i18n/smpdtfmt.cpp +++ b/icu4c/source/i18n/smpdtfmt.cpp @@ -248,6 +248,7 @@ SimpleDateFormat::SimpleDateFormat(UErrorCode& status) fOverrideList(NULL), fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE) { + setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status).setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status); construct(kShort, (EStyle) (kShort + kDateOffset), fLocale, status); initializeDefaultCentury(); } @@ -266,6 +267,7 @@ SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, { fDateOverride.setToBogus(); fTimeOverride.setToBogus(); + setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status).setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status); initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status); initialize(fLocale, status); initializeDefaultCentury(); @@ -286,6 +288,7 @@ SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, { fDateOverride.setTo(override); fTimeOverride.setToBogus(); + setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status).setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status); initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status); initialize(fLocale, status); initializeDefaultCentury(); @@ -309,6 +312,7 @@ SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, fDateOverride.setToBogus(); fTimeOverride.setToBogus(); + setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status).setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status); initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status); initialize(fLocale, status); @@ -331,6 +335,7 @@ SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, fDateOverride.setTo(override); fTimeOverride.setToBogus(); + setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status).setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status); initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status); initialize(fLocale, status); @@ -356,6 +361,7 @@ SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, fDateOverride.setToBogus(); fTimeOverride.setToBogus(); + setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status).setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status); initializeCalendar(NULL,fLocale,status); initialize(fLocale, status); @@ -378,6 +384,7 @@ SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, fDateOverride.setToBogus(); fTimeOverride.setToBogus(); + setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status).setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status); initializeCalendar(NULL, fLocale, status); initialize(fLocale, status); @@ -398,6 +405,7 @@ SimpleDateFormat::SimpleDateFormat(EStyle timeStyle, fOverrideList(NULL), fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE) { + setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status).setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status); construct(timeStyle, dateStyle, fLocale, status); if(U_SUCCESS(status)) { initializeDefaultCentury(); @@ -438,6 +446,7 @@ SimpleDateFormat::SimpleDateFormat(const Locale& locale, fDateOverride.setToBogus(); fTimeOverride.setToBogus(); + setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status).setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status); initialize(fLocale, status); if(U_SUCCESS(status)) { @@ -456,6 +465,8 @@ SimpleDateFormat::SimpleDateFormat(const SimpleDateFormat& other) fOverrideList(NULL), fCapitalizationContext(UDISPCTX_CAPITALIZATION_NONE) { + UErrorCode status = U_ZERO_ERROR; + setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status).setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status); *this = other; } @@ -1689,8 +1700,6 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& int32_t saveHebrewMonth = -1; int32_t count = 0; - UBool lenient = isLenient(); - // hack, reset tztype, cast away const ((SimpleDateFormat*)this)->tztype = UTZFMT_TIME_TYPE_UNKNOWN; @@ -1838,7 +1847,7 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& abutPat = -1; // End of any abutting fields - if (! matchLiterals(fPattern, i, text, pos, lenient)) { + if (! matchLiterals(fPattern, i, text, pos, getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status))) { status = U_PARSE_ERROR; goto ExitParse; } @@ -1846,7 +1855,7 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& } // Special hack for trailing "." after non-numeric field. - if (text.charAt(pos) == 0x2e && lenient) { + if (text.charAt(pos) == 0x2e && getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status)) { // only do if the last field is not numeric if (isAfterNonNumericField(fPattern, fPattern.length())) { pos++; // skip the extra "." @@ -2407,11 +2416,11 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC int32_t value = 0; int32_t i; int32_t ps = 0; + UErrorCode status = U_ZERO_ERROR; ParsePosition pos(0); UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(ch); NumberFormat *currentNumberFormat; UnicodeString temp; - UBool lenient = isLenient(); UBool gotNumber = FALSE; #if defined (U_DEBUG_CAL) @@ -2517,7 +2526,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, FALSE); } - if (!lenient) { + if (!getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status)) { // Check the range of the value int32_t bias = gFieldRangeBias[patternCharIndex]; if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) { @@ -2653,7 +2662,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC return newStart; } } - if (gotNumber && (lenient || value > fSymbols->fShortYearNamesCount)) { + if (gotNumber && (getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC,status) || value > fSymbols->fShortYearNamesCount)) { cal.set(UCAL_YEAR, value); return pos.getIndex(); } @@ -2714,7 +2723,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC } newStart = matchString(text, start, UCAL_MONTH, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, shortMonthPat, cal); // try LLL } - if (newStart > 0 || !lenient) // currently we do not try to parse MMMMM/LLLLL: #8860 + if (newStart > 0 || !getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) // currently we do not try to parse MMMMM/LLLLL: #8860 return newStart; // else we allowing parsing as number, below } @@ -2779,7 +2788,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, NULL, cal)) > 0) return newStart; - else if (!lenient || patternCharIndex == UDAT_DAY_OF_WEEK_FIELD) + else if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status) || patternCharIndex == UDAT_DAY_OF_WEEK_FIELD) return newStart; // else we allowing parsing as number, below } @@ -2805,7 +2814,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, fSymbols->fStandaloneShorterWeekdays, fSymbols->fStandaloneShorterWeekdaysCount, NULL, cal)) > 0) return newStart; - else if (!lenient) + else if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) return newStart; // else we allowing parsing as number, below } @@ -2845,7 +2854,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC else if ((newStart = matchQuarterString(text, start, UCAL_MONTH, fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal)) > 0) return newStart; - else if (!lenient) + else if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) return newStart; // else we allowing parsing as number, below } @@ -2871,7 +2880,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC else if ((newStart = matchQuarterString(text, start, UCAL_MONTH, fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal)) > 0) return newStart; - else if (!lenient) + else if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) return newStart; // else we allowing parsing as number, below } @@ -3038,7 +3047,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC // Don't need suffix processing here (as in number processing at the beginning of the function); // the new fields being handled as numeric values (month, weekdays, quarters) should not have suffixes. - if (!lenient) { + if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) { // Check the range of the value int32_t bias = gFieldRangeBias[patternCharIndex]; if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) { diff --git a/icu4c/source/i18n/unicode/datefmt.h b/icu4c/source/i18n/unicode/datefmt.h index 1741c64541f..4d06c567ca3 100644 --- a/icu4c/source/i18n/unicode/datefmt.h +++ b/icu4c/source/i18n/unicode/datefmt.h @@ -28,6 +28,7 @@ #include "unicode/numfmt.h" #include "unicode/format.h" #include "unicode/locid.h" +#include "unicode/enumset.h" /** * \file @@ -39,6 +40,13 @@ U_NAMESPACE_BEGIN class TimeZone; class DateTimePatternGenerator; +// explicit template instantiation. see digitlst.h +#if defined (_MSC_VER) +template class U_I18N_API EnumSet; +#endif + /** * DateFormat is an abstract class for a family of classes that convert dates and * times from their internal representations to textual form and back again in a @@ -572,7 +580,7 @@ public: static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); /** - * Returns true if the formatter is set for lenient parsing. + * Returns true if the encapsulated Calendar object is set for lenient parsing. * @stable ICU 2.0 */ virtual UBool isLenient(void) const; @@ -583,6 +591,9 @@ public: * precisely match this object's format. With strict parsing, inputs must * match this object's format. * + * Note: This method is specific to the encapsulated Calendar object. DateFormat + * leniency aspects are controlled by setBooleanAttribute. + * * @param lenient True specifies date/time interpretation to be lenient. * @see Calendar::setLenient * @stable ICU 2.0 @@ -662,6 +673,32 @@ public: */ virtual void setTimeZone(const TimeZone& zone); + /** + * Set an boolean attribute on this DateFormat. + * May return U_UNSUPPORTED_ERROR if this instance does not support + * the specified attribute. + * @param attr the attribute to set + * @param newvalue new value + * @param status the error type + * @return *this - for chaining (example: format.setAttribute(...).setAttribute(...) ) + * @internal ICU 5.2 technology preview + */ + + virtual DateFormat& U_EXPORT2 setBooleanAttribute(UDateFormatBooleanAttribute attr, + UBool newvalue, + UErrorCode &status); + + /** + * Get an boolean from this DateFormat + * May return U_UNSUPPORTED_ERROR if this instance does not support + * the specified attribute. + * @param attr the attribute to set + * @param status the error type + * @return the attribute value. Undefined if there is an error. + * @internal ICU 5.2 technology preview + */ + virtual UBool U_EXPORT2 getBooleanAttribute(UDateFormatBooleanAttribute attr, UErrorCode &status) const; + protected: /** * Default constructor. Creates a DateFormat with no Calendar or NumberFormat @@ -699,6 +736,7 @@ protected: */ NumberFormat* fNumberFormat; + private: /** * Gets the date/time formatter with the given formatting styles for the @@ -710,6 +748,13 @@ private: */ static DateFormat* U_EXPORT2 create(EStyle timeStyle, EStyle dateStyle, const Locale& inLocale); + + /** + * enum set of active boolean attributes for this instance + */ + EnumSet fBoolFlags; + + public: #ifndef U_HIDE_OBSOLETE_API /** diff --git a/icu4c/source/i18n/unicode/udat.h b/icu4c/source/i18n/unicode/udat.h index 19b7a17c341..67b057776f7 100644 --- a/icu4c/source/i18n/unicode/udat.h +++ b/icu4c/source/i18n/unicode/udat.h @@ -826,6 +826,50 @@ udat_open(UDateFormatStyle timeStyle, U_STABLE void U_EXPORT2 udat_close(UDateFormat* format); + +/** + * DateFormat boolean attributes + * @internal ICU 5.2 technology preview + */ +typedef enum UDateFormatBooleanAttribute { + /** indicates whether whitespace is allowed. Includes trailing dot tolerance. */ + UDAT_PARSE_ALLOW_WHITESPACE, + /** indicates tolerance of numeric data when String data may be assumed. eg: UDAT_YEAR_NAME_FIELD, + * UDAT_STANDALONE_MONTH_FIELD, UDAT_DAY_OF_WEEK_FIELD */ + UDAT_PARSE_ALLOW_NUMERIC, + /** count boolean date format constants */ + UDAT_BOOLEAN_ATTRIBUTE_COUNT +} UDateFormatBooleanAttribute; + + +/** + * Get a boolean attribute associated with a UDateFormat. + * An example would be a true value for a key of UDAT_PARSE_ALLOW_WHITESPACE indicating allowing whitespace leniency. + * If the formatter does not understand the attribute, -1 is returned. + * @param fmt The formatter to query. + * @param attr The attribute to query; e.g. UDAT_PARSE_ALLOW_WHITESPACE. + * @param status A pointer to an UErrorCode to receive any errors + * @return The value of attr. + * @internal ICU 5.2 technology preview + */ +U_INTERNAL UBool U_EXPORT2 +udat_getBooleanAttribute(const UDateFormat* fmt, UDateFormatBooleanAttribute attr, UErrorCode* status); + +/** + * Set a boolean attribute associated with a UDateFormat. + * An example of a boolean attribute is parse leniency control. If the formatter does not understand + * the attribute, the call is ignored. + * @param fmt The formatter to set. + * @param attr The attribute to set; one of UDAT_PARSE_ALLOW_WHITESPACE or UDAT_PARSE_ALLOW_NUMERIC + * @param newValue The new value of attr. + * @param status A pointer to an UErrorCode to receive any errors + * @internal ICU 5.2 technology preview + */ +U_INTERNAL void U_EXPORT2 +udat_setBooleanAttribute(UDateFormat *fmt, UDateFormatBooleanAttribute attr, UBool, UErrorCode* status); + + + #if U_SHOW_CPLUSPLUS_API U_NAMESPACE_BEGIN