ICU-6109 & #4942,#3579: Merge changes for lenient number & date parsing from branches/eric/lenient_number_parsing (manually!)

X-SVN-Rev: 30018
This commit is contained in:
Peter Edberg 2011-05-04 12:23:42 +00:00
parent 56b28bd292
commit 332037ef5b
25 changed files with 1667 additions and 301 deletions

4
.gitattributes vendored
View file

@ -73,8 +73,12 @@ icu4c/source/data/region/pool.res -text
icu4c/source/data/zone/pool.res -text
icu4c/source/extra/uconv/uconv.vcxproj -text
icu4c/source/extra/uconv/uconv.vcxproj.filters -text
icu4c/source/i18n/decfmtst.cpp -text
icu4c/source/i18n/decfmtst.h -text
icu4c/source/i18n/i18n.vcxproj -text
icu4c/source/i18n/i18n.vcxproj.filters -text
icu4c/source/i18n/smpdtfst.cpp -text
icu4c/source/i18n/smpdtfst.h -text
icu4c/source/i18n/udateintervalformat.cpp -text
icu4c/source/i18n/unicode/udateintervalformat.h -text
icu4c/source/i18n/unicode/upluralrules.h -text

View file

@ -82,7 +82,7 @@ csdetect.o csmatch.o csr2022.o csrecog.o csrmbcs.o csrsbcs.o csrucode.o csrutf8.
wintzimpl.o windtfmt.o winnmfmt.o basictz.o dtrule.o rbtz.o tzrule.o tztrans.o vtzone.o zonemeta.o \
upluralrules.o plurrule.o plurfmt.o selfmt.o dtitvfmt.o dtitvinf.o udateintervalformat.o \
tmunit.o tmutamt.o tmutfmt.o colldata.o bmsearch.o bms.o currpinf.o \
uspoof.o uspoof_impl.o uspoof_build.o uspoof_conf.o uspoof_wsconf.o \
uspoof.o uspoof_impl.o uspoof_build.o uspoof_conf.o uspoof_wsconf.o decfmtst.o smpdtfst.o \
ztrans.o zrule.o vzone.o fphdlimp.o fpositer.o locdspnm.o \
decNumber.o decContext.o alphaindex.o tznames.o tznames_impl.o tzgnames.o \
tzfmt.o

View file

@ -0,0 +1,239 @@
/*
*******************************************************************************
* Copyright (C) 2009-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* This file contains the class DecimalFormatStaticSets
*
* DecimalFormatStaticSets holds the UnicodeSets that are needed for lenient
* parsing of decimal and group separators.
********************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/unistr.h"
#include "unicode/uniset.h"
#include "unicode/uchar.h"
#include "cmemory.h"
#include "cstring.h"
#include "uassert.h"
#include "ucln_in.h"
#include "umutex.h"
#include "decfmtst.h"
U_NAMESPACE_BEGIN
//------------------------------------------------------------------------------
//
// Unicode Set pattern strings for all of the required constant sets.
// Initialized with hex values for portability to EBCDIC based machines.
// Really ugly, but there's no good way to avoid it.
//
//------------------------------------------------------------------------------
static const UChar gDotEquivalentsPattern[] = {
// [ . \u2024 \u3002 \uFE12 \uFE52 \uFF0E \uFF61 ]
0x005B, 0x002E, 0x2024, 0x3002, 0xFE12, 0xFE52, 0xFF0E, 0xFF61, 0x005D, 0x0000};
static const UChar gCommaEquivalentsPattern[] = {
// [ , \u060C \u066B \u3001 \uFE10 \uFE11 \uFE50 \uFE51 \uFF0C \uFF64 ]
0x005B, 0x002C, 0x060C, 0x066B, 0x3001, 0xFE10, 0xFE11, 0xFE50, 0xFE51, 0xFF0C, 0xFF64, 0x005D, 0x0000};
static const UChar gOtherGroupingSeparatorsPattern[] = {
// [ \ SPACE ' NBSP \u066C \u2000 - \u200A \u2018 \u2019 \u202F \u205F \u3000 \uFF07 ]
0x005B, 0x005C, 0x0020, 0x0027, 0x00A0, 0x066C, 0x2000, 0x002D, 0x200A, 0x2018, 0x2019, 0x202F, 0x205F, 0x3000, 0xFF07, 0x005D, 0x0000};
static const UChar gDashEquivalentsPattern[] = {
// [ \ - HYPHEN F_DASH N_DASH MINUS ]
0x005B, 0x005C, 0x002D, 0x2010, 0x2012, 0x2013, 0x2212, 0x005D, 0x0000};
static const UChar gStrictDotEquivalentsPattern[] = {
// [ . \u2024 \uFE52 \uFF0E \uFF61 ]
0x005B, 0x002E, 0x2024, 0xFE52, 0xFF0E, 0xFF61, 0x005D, 0x0000};
static const UChar gStrictCommaEquivalentsPattern[] = {
// [ , \u066B \uFE10 \uFE50 \uFF0C ]
0x005B, 0x002C, 0x066B, 0xFE10, 0xFE50, 0xFF0C, 0x005D, 0x0000};
static const UChar gStrictOtherGroupingSeparatorsPattern[] = {
// [ \ SPACE ' NBSP \u066C \u2000 - \u200A \u2018 \u2019 \u202F \u205F \u3000 \uFF07 ]
0x005B, 0x005C, 0x0020, 0x0027, 0x00A0, 0x066C, 0x2000, 0x002D, 0x200A, 0x2018, 0x2019, 0x202F, 0x205F, 0x3000, 0xFF07, 0x005D, 0x0000};
static const UChar gStrictDashEquivalentsPattern[] = {
// [ \ - MINUS ]
0x005B, 0x005C, 0x002D, 0x2212, 0x005D, 0x0000};
DecimalFormatStaticSets *DecimalFormatStaticSets::gStaticSets = NULL;
DecimalFormatStaticSets::DecimalFormatStaticSets(UErrorCode *status)
: fDotEquivalents(NULL),
fCommaEquivalents(NULL),
fOtherGroupingSeparators(NULL),
fDashEquivalents(NULL),
fStrictDotEquivalents(NULL),
fStrictCommaEquivalents(NULL),
fStrictOtherGroupingSeparators(NULL),
fStrictDashEquivalents(NULL),
fDefaultGroupingSeparators(NULL),
fStrictDefaultGroupingSeparators(NULL)
{
fDotEquivalents = new UnicodeSet(UnicodeString(TRUE, gDotEquivalentsPattern, -1), *status);
fCommaEquivalents = new UnicodeSet(UnicodeString(TRUE, gCommaEquivalentsPattern, -1), *status);
fOtherGroupingSeparators = new UnicodeSet(UnicodeString(TRUE, gOtherGroupingSeparatorsPattern, -1), *status);
fDashEquivalents = new UnicodeSet(UnicodeString(TRUE, gDashEquivalentsPattern, -1), *status);
fStrictDotEquivalents = new UnicodeSet(UnicodeString(TRUE, gStrictDotEquivalentsPattern, -1), *status);
fStrictCommaEquivalents = new UnicodeSet(UnicodeString(TRUE, gStrictCommaEquivalentsPattern, -1), *status);
fStrictOtherGroupingSeparators = new UnicodeSet(UnicodeString(TRUE, gStrictOtherGroupingSeparatorsPattern, -1), *status);
fStrictDashEquivalents = new UnicodeSet(UnicodeString(TRUE, gStrictDashEquivalentsPattern, -1), *status);
fDefaultGroupingSeparators = new UnicodeSet(*fDotEquivalents);
fDefaultGroupingSeparators->addAll(*fCommaEquivalents);
fDefaultGroupingSeparators->addAll(*fOtherGroupingSeparators);
fStrictDefaultGroupingSeparators = new UnicodeSet(*fStrictDotEquivalents);
fStrictDefaultGroupingSeparators->addAll(*fStrictCommaEquivalents);
fStrictDefaultGroupingSeparators->addAll(*fStrictOtherGroupingSeparators);
// Check for null pointers
if (fDotEquivalents == NULL || fCommaEquivalents == NULL || fOtherGroupingSeparators == NULL || fDashEquivalents == NULL ||
fStrictDotEquivalents == NULL || fStrictCommaEquivalents == NULL || fStrictOtherGroupingSeparators == NULL || fStrictDashEquivalents == NULL ||
fDefaultGroupingSeparators == NULL || fStrictOtherGroupingSeparators == NULL) {
goto ExitConstrDeleteAll;
}
// Freeze all the sets
fDotEquivalents->freeze();
fCommaEquivalents->freeze();
fOtherGroupingSeparators->freeze();
fDashEquivalents->freeze();
fStrictDotEquivalents->freeze();
fStrictCommaEquivalents->freeze();
fStrictOtherGroupingSeparators->freeze();
fStrictDashEquivalents->freeze();
fDefaultGroupingSeparators->freeze();
fStrictDefaultGroupingSeparators->freeze();
return; // If we reached this point, everything is fine so just exit
ExitConstrDeleteAll: // Remove fPropSets and fRuleSets and return error
delete fDotEquivalents; fDotEquivalents = NULL;
delete fCommaEquivalents; fCommaEquivalents = NULL;
delete fOtherGroupingSeparators; fOtherGroupingSeparators = NULL;
delete fDashEquivalents; fDashEquivalents = NULL;
delete fStrictDotEquivalents; fStrictDotEquivalents = NULL;
delete fStrictCommaEquivalents; fStrictCommaEquivalents = NULL;
delete fStrictOtherGroupingSeparators; fStrictOtherGroupingSeparators = NULL;
delete fStrictDashEquivalents; fStrictDashEquivalents = NULL;
delete fDefaultGroupingSeparators; fDefaultGroupingSeparators = NULL;
delete fStrictOtherGroupingSeparators; fStrictOtherGroupingSeparators = NULL;
*status = U_MEMORY_ALLOCATION_ERROR;
}
DecimalFormatStaticSets::~DecimalFormatStaticSets() {
delete fDotEquivalents; fDotEquivalents = NULL;
delete fCommaEquivalents; fCommaEquivalents = NULL;
delete fOtherGroupingSeparators; fOtherGroupingSeparators = NULL;
delete fDashEquivalents; fDashEquivalents = NULL;
delete fStrictDotEquivalents; fStrictDotEquivalents = NULL;
delete fStrictCommaEquivalents; fStrictCommaEquivalents = NULL;
delete fStrictOtherGroupingSeparators; fStrictOtherGroupingSeparators = NULL;
delete fStrictDashEquivalents; fStrictDashEquivalents = NULL;
delete fDefaultGroupingSeparators; fDefaultGroupingSeparators = NULL;
delete fStrictOtherGroupingSeparators; fStrictOtherGroupingSeparators = NULL;
}
//------------------------------------------------------------------------------
//
// decfmt_cleanup Memory cleanup function, free/delete all
// cached memory. Called by ICU's u_cleanup() function.
//
//------------------------------------------------------------------------------
UBool
DecimalFormatStaticSets::cleanup(void)
{
delete DecimalFormatStaticSets::gStaticSets;
DecimalFormatStaticSets::gStaticSets = NULL;
return TRUE;
}
U_CDECL_BEGIN
static UBool U_CALLCONV
decimfmt_cleanup(void)
{
return DecimalFormatStaticSets::cleanup();
}
U_CDECL_END
void DecimalFormatStaticSets::initSets(UErrorCode *status)
{
DecimalFormatStaticSets *p;
UMTX_CHECK(NULL, gStaticSets, p);
if (p == NULL) {
p = new DecimalFormatStaticSets(status);
if (p == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return;
}
if (U_FAILURE(*status)) {
delete p;
return;
}
umtx_lock(NULL);
if (gStaticSets == NULL) {
gStaticSets = p;
p = NULL;
}
umtx_unlock(NULL);
if (p != NULL) {
delete p;
}
ucln_i18n_registerCleanup(UCLN_I18N_DECFMT, decimfmt_cleanup);
}
}
UnicodeSet *DecimalFormatStaticSets::getSimilarDecimals(UChar32 decimal, UBool strictParse, UnicodeSet *fallback)
{
UErrorCode status = U_ZERO_ERROR;
initSets(&status);
if (U_FAILURE(status)) {
fallback->set(decimal, decimal);
return fallback;
}
if (gStaticSets->fDotEquivalents->contains(decimal)) {
return strictParse ? gStaticSets->fStrictDotEquivalents : gStaticSets->fDotEquivalents;
}
if (gStaticSets->fCommaEquivalents->contains(decimal)) {
return strictParse ? gStaticSets->fStrictCommaEquivalents : gStaticSets->fCommaEquivalents;
}
// if there is no match, return the character itself
fallback->set(decimal, decimal);
return fallback;
}
U_NAMESPACE_END
#endif // !UCONFIG_NO_FORMATTING

View file

@ -0,0 +1,59 @@
/*
*******************************************************************************
* Copyright (C) 2009-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* This file contains declarations for the class DecimalFormatStaticSets
*
* DecimalFormatStaticSets holds the UnicodeSets that are needed for lenient
* parsing of decimal and group separators.
********************************************************************************
*/
#ifndef DECFMTST_H
#define DECFMTST_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
U_NAMESPACE_BEGIN
class UnicodeSet;
class DecimalFormatStaticSets : public UMemory
{
public:
static DecimalFormatStaticSets *gStaticSets; // Ptr to all lazily initialized constant
// shared sets.
DecimalFormatStaticSets(UErrorCode *status);
~DecimalFormatStaticSets();
static void initSets(UErrorCode *status);
static UBool cleanup();
static UnicodeSet *getSimilarDecimals(UChar32 decimal, UBool strictParse, UnicodeSet *fallback);
UnicodeSet *fDotEquivalents;
UnicodeSet *fCommaEquivalents;
UnicodeSet *fOtherGroupingSeparators;
UnicodeSet *fDashEquivalents;
UnicodeSet *fStrictDotEquivalents;
UnicodeSet *fStrictCommaEquivalents;
UnicodeSet *fStrictOtherGroupingSeparators;
UnicodeSet *fStrictDashEquivalents;
UnicodeSet *fDefaultGroupingSeparators;
UnicodeSet *fStrictDefaultGroupingSeparators;
};
U_NAMESPACE_END
#endif // !UCONFIG_NO_FORMATTING
#endif // DECFMTST_H

View file

@ -49,6 +49,7 @@
#include "unicode/dcfmtsym.h"
#include "unicode/ures.h"
#include "unicode/uchar.h"
#include "unicode/uniset.h"
#include "unicode/curramt.h"
#include "unicode/currpinf.h"
#include "unicode/plurrule.h"
@ -64,6 +65,7 @@
#include "putilimp.h"
#include <math.h>
#include "hash.h"
#include "decfmtst.h"
U_NAMESPACE_BEGIN
@ -1602,6 +1604,12 @@ void DecimalFormat::parse(const UnicodeString& text,
fPadPosition == kPadAfterPrefix)) {
i = skipPadding(text, i);
}
if (isLenient()) {
// skip any leading whitespace
i = backup = skipUWhiteSpace(text, i);
}
// If the text is composed of the representation of NaN, returns NaN.length
const UnicodeString *nan = &getConstSymbol(DecimalFormatSymbols::kNaNSymbol);
int32_t nanLen = (text.compare(i, nan->length(), *nan)
@ -1619,6 +1627,7 @@ void DecimalFormat::parse(const UnicodeString& text,
// NaN parse failed; start over
i = backup;
parsePosition.setIndex(i);
// status is used to record whether a number is infinite.
UBool status[fgStatusLength];
@ -1838,6 +1847,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
int32_t position = parsePosition.getIndex();
int32_t oldStart = position;
UBool strictParse = !isLenient();
// Match padding before prefix
if (fFormatWidth > 0 && fPadPosition == kPadBeforePrefix) {
@ -1846,7 +1856,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
// Match positive and negative prefixes; prefer longest match.
int32_t posMatch = compareAffix(text, position, FALSE, TRUE, posPrefix, currencyParsing, type, currency);
int32_t negMatch = compareAffix(text, position, TRUE, TRUE, negPrefix,currencyParsing, type, currency);
int32_t negMatch = compareAffix(text, position, TRUE, TRUE, negPrefix, currencyParsing, type, currency);
if (posMatch >= 0 && negMatch >= 0) {
if (posMatch > negMatch) {
negMatch = -1;
@ -1860,7 +1870,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
} else if (negMatch >= 0) {
position += negMatch;
parsedNum.append('-', err);
} else {
} else if (strictParse){
parsePosition.setErrorIndex(position);
return FALSE;
}
@ -1870,13 +1880,18 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
position = skipPadding(text, position);
}
if (! strictParse) {
position = skipUWhiteSpace(text, position);
}
// process digits or Inf, find decimal position
const UnicodeString *inf = &getConstSymbol(DecimalFormatSymbols::kInfinitySymbol);
int32_t infLen = (text.compare(position, inf->length(), *inf)
? 0 : inf->length());
position += infLen; // infLen is non-zero when it does equal to infinity
status[fgStatusInfinite] = (UBool)infLen;
if (infLen) {
status[fgStatusInfinite] = infLen != 0;
if (infLen != 0) {
parsedNum.append("Infinity", err);
} else {
// We now have a string of digits, possibly with grouping symbols,
@ -1888,24 +1903,64 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
UChar32 zero = getConstSymbol(DecimalFormatSymbols::kZeroDigitSymbol).char32At(0);
const UnicodeString *decimal;
if(fCurrencySignCount > fgCurrencySignCountZero) {
decimal = &getConstSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol);
UBool strictFail = FALSE; // did we exit with a strict parse failure?
int32_t lastGroup = -1; // where did we last see a grouping separator?
int32_t digitStart = position;
int32_t gs2 = fGroupingSize2 == 0 ? fGroupingSize : fGroupingSize2;
const UnicodeString *decimalString;
if (fCurrencySignCount > fgCurrencySignCountZero) {
decimalString = &getConstSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol);
} else {
decimal = &getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
decimalString = &getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
}
const UnicodeString *grouping = &getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol);
UChar32 decimalChar = decimalString->char32At(0);
const UnicodeString *groupingString = &getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol);
UChar32 groupingChar = groupingString->char32At(0);
UBool sawDecimal = FALSE;
UBool sawDigit = FALSE;
int32_t backup = -1;
int32_t digit;
int32_t textLength = text.length(); // One less pointer to follow
int32_t groupingLen = grouping->length();
int32_t decimalLen = decimal->length();
int32_t decimalStringLength = decimalString->length();
int32_t decimalCharLength = U16_LENGTH(decimalChar);
int32_t groupingStringLength = groupingString->length();
int32_t groupingCharLength = U16_LENGTH(groupingChar);
// equivalent grouping and decimal support
// TODO markdavis Cache these if it makes a difference in performance.
UnicodeSet decimalFallback;
UnicodeSet *decimalSet = NULL;
UnicodeSet *groupingSet = NULL;
if (decimalCharLength == decimalStringLength) {
decimalSet = (UnicodeSet *) DecimalFormatStaticSets::getSimilarDecimals(decimalChar, strictParse, &decimalFallback)->cloneAsThawed();
}
if (groupingCharLength == groupingStringLength) {
if (strictParse) {
groupingSet = (UnicodeSet *) DecimalFormatStaticSets::gStaticSets->fStrictDefaultGroupingSeparators->cloneAsThawed();
} else {
groupingSet = (UnicodeSet *) DecimalFormatStaticSets::gStaticSets->fDefaultGroupingSeparators->cloneAsThawed();
}
groupingSet->add(groupingChar);
if (decimalSet != NULL) {
groupingSet->removeAll(*decimalSet);
}
}
// we are guaranteed that
// decimalSet contains the decimal, and
// groupingSet contains the groupingSeparator
// (unless decimal and grouping are the same, which should never happen. But in that case, groupingSet will just be empty.)
// We have to track digitCount ourselves, because digits.fCount will
// pin when the maximum allowable digits is reached.
int32_t digitCount = 0;
int32_t integerDigitCount = 0;
for (; position < textLength; )
{
@ -1944,31 +1999,89 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
if (digit >= 0 && digit <= 9)
{
if (strictParse && backup != -1) {
// comma followed by digit, so group before comma is a
// secondary group. If there was a group separator
// before that, the group must == the secondary group
// length, else it can be <= the the secondary group
// length.
if ((lastGroup != -1 && backup - lastGroup - 1 != gs2) ||
(lastGroup == -1 && position - digitStart - 1 > gs2)) {
strictFail = TRUE;
break;
}
lastGroup = backup;
}
// Cancel out backup setting (see grouping handler below)
backup = -1;
sawDigit = TRUE;
// output a regular non-zero digit.
++digitCount;
// Note: this will append leading zeros
parsedNum.append((char)(digit + '0'), err);
// count any digit that's not a leading zero
if (digit > 0 || digitCount > 0 || sawDecimal) {
digitCount += 1;
// count any integer digit that's not a leading zero
if (! sawDecimal) {
integerDigitCount += 1;
}
}
position += U16_LENGTH(ch);
}
else if (groupingLen > 0 && !text.compare(position, groupingLen, *grouping) && isGroupingUsed())
else if (groupingStringLength > 0 && matchSymbol(text, position, groupingStringLength, *groupingString, groupingSet, ch) && isGroupingUsed())
{
if (sawDecimal) {
break;
}
if (strictParse) {
if ((!sawDigit || backup != -1)) {
// leading group, or two group separators in a row
strictFail = TRUE;
break;
}
}
// Ignore grouping characters, if we are using them, but require
// that they be followed by a digit. Otherwise we backup and
// reprocess them.
backup = position;
position += groupingLen;
position += groupingStringLength;
if (groupingSet != NULL) {
// Once we see a grouping character, we only accept that grouping character from then on.
groupingSet->set(ch, ch);
}
}
else if (!text.compare(position, decimalLen, *decimal) && !isParseIntegerOnly() && !sawDecimal)
else if (matchSymbol(text, position, decimalStringLength, *decimalString, decimalSet, ch))
{
if (strictParse) {
if (backup != -1 ||
(lastGroup != -1 && position - lastGroup != fGroupingSize + 1)) {
strictFail = TRUE;
break;
}
}
// If we're only parsing integers, or if we ALREADY saw the
// decimal, then don't parse this one.
if (isParseIntegerOnly() || sawDecimal) {
break;
}
parsedNum.append('.', err);
position += decimalStringLength;
sawDecimal = TRUE;
position += decimalLen;
if (decimalSet != NULL) {
// Once we see a decimal character, we only accept that decimal character from then on.
decimalSet->set(ch, ch);
}
}
else {
const UnicodeString *tmp;
@ -2029,11 +2142,28 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
}
}
delete groupingSet;
delete decimalSet;
if (backup != -1)
{
position = backup;
}
if (strictParse && !sawDecimal) {
if (lastGroup != -1 && position - lastGroup != fGroupingSize + 1) {
strictFail = TRUE;
}
}
if (strictFail) {
// only set with strictParse and a grouping separator error
parsePosition.setIndex(oldStart);
parsePosition.setErrorIndex(position);
return FALSE;
}
// If there was no decimal point we have an integer
// If none of the text string was recognized. For example, parse
@ -2052,28 +2182,30 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
position = skipPadding(text, position);
}
int32_t posSuffixMatch = -1, negSuffixMatch = -1;
// Match positive and negative suffixes; prefer longest match.
if (posMatch >= 0) {
posMatch = compareAffix(text, position, FALSE, FALSE, posSuffix, currencyParsing, type, currency);
if (posMatch >= 0 || (!strictParse && negMatch < 0)) {
posSuffixMatch = compareAffix(text, position, FALSE, FALSE, posSuffix, currencyParsing, type, currency);
}
if (negMatch >= 0) {
negMatch = compareAffix(text, position, TRUE, FALSE, negSuffix, currencyParsing, type, currency);
negSuffixMatch = compareAffix(text, position, TRUE, FALSE, negSuffix, currencyParsing, type, currency);
}
if (posMatch >= 0 && negMatch >= 0) {
if (posMatch > negMatch) {
negMatch = -1;
} else if (negMatch > posMatch) {
posMatch = -1;
if (posSuffixMatch >= 0 && negSuffixMatch >= 0) {
if (posSuffixMatch > negSuffixMatch) {
negSuffixMatch = -1;
} else if (negSuffixMatch > posSuffixMatch) {
posSuffixMatch = -1;
}
}
// Fail if neither or both
if ((posMatch >= 0) == (negMatch >= 0)) {
if (strictParse && ((posSuffixMatch >= 0) == (negSuffixMatch >= 0))) {
parsePosition.setErrorIndex(position);
return FALSE;
}
position += (posMatch>=0 ? posMatch : negMatch);
position += (posSuffixMatch >= 0 ? posSuffixMatch : (negSuffixMatch >= 0 ? negSuffixMatch : 0));
// Match padding before suffix
if (fFormatWidth > 0 && fPadPosition == kPadAfterSuffix) {
@ -2082,7 +2214,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
parsePosition.setIndex(position);
parsedNum.data()[0] = (posMatch >= 0) ? '+' : '-';
parsedNum.data()[0] = (posSuffixMatch >= 0 || (!strictParse && negMatch < 0 && negSuffixMatch < 0)) ? '+' : '-';
if(parsePosition.getIndex() == oldStart)
{
@ -2164,7 +2296,7 @@ int32_t DecimalFormat::compareAffix(const UnicodeString& text,
patternToCompare = &fPositiveSuffix;
}
}
return compareSimpleAffix(*patternToCompare, text, pos);
return compareSimpleAffix(*patternToCompare, text, pos, isLenient());
}
/**
@ -2179,58 +2311,119 @@ int32_t DecimalFormat::compareAffix(const UnicodeString& text,
*/
int32_t DecimalFormat::compareSimpleAffix(const UnicodeString& affix,
const UnicodeString& input,
int32_t pos) {
int32_t pos,
UBool lenient) {
UErrorCode status = U_ZERO_ERROR;
int32_t start = pos;
for (int32_t i=0; i<affix.length(); ) {
UChar32 c = affix.char32At(i);
int32_t len = U16_LENGTH(c);
if (PatternProps::isWhiteSpace(c)) {
// We may have a pattern like: \u200F \u0020
// and input text like: \u200F \u0020
// Note that U+200F and U+0020 are Pattern_White_Space but only
// U+0020 is UWhiteSpace. So we have to first do a direct
// match of the run of Pattern_White_Space in the pattern,
// then match any extra characters.
UBool literalMatch = FALSE;
while (pos < input.length() &&
input.char32At(pos) == c) {
literalMatch = TRUE;
i += len;
pos += len;
if (i == affix.length()) {
break;
}
c = affix.char32At(i);
len = U16_LENGTH(c);
if (!PatternProps::isWhiteSpace(c)) {
break;
}
UChar32 affixChar = affix.char32At(0);
int32_t affixLength = affix.length();
int32_t inputLength = input.length();
int32_t affixCharLength = U16_LENGTH(affixChar);
UnicodeSet *affixSet;
DecimalFormatStaticSets::initSets(&status);
if (!lenient) {
affixSet = DecimalFormatStaticSets::gStaticSets->fStrictDashEquivalents;
// If the affix is exactly one character long and that character
// is in the dash set and the very next input character is also
// in the dash set, return a match.
if (affixCharLength == affixLength && affixSet->contains(affixChar)) {
if (affixSet->contains(input.char32At(pos))) {
return 1;
}
}
// Advance over run in pattern
i = skipPatternWhiteSpace(affix, i);
for (int32_t i = 0; i < affixLength; ) {
UChar32 c = affix.char32At(i);
int32_t len = U16_LENGTH(c);
if (PatternProps::isWhiteSpace(c)) {
// We may have a pattern like: \u200F \u0020
// and input text like: \u200F \u0020
// Note that U+200F and U+0020 are Pattern_White_Space but only
// U+0020 is UWhiteSpace. So we have to first do a direct
// match of the run of Pattern_White_Space in the pattern,
// then match any extra characters.
UBool literalMatch = FALSE;
while (pos < inputLength &&
input.char32At(pos) == c) {
literalMatch = TRUE;
i += len;
pos += len;
if (i == affixLength) {
break;
}
c = affix.char32At(i);
len = U16_LENGTH(c);
if (!PatternProps::isWhiteSpace(c)) {
break;
}
}
// Advance over run in input text
// Must see at least one white space char in input,
// unless we've already matched some characters literally.
int32_t s = pos;
pos = skipUWhiteSpace(input, pos);
if (pos == s && !literalMatch) {
return -1;
}
// Advance over run in pattern
i = skipPatternWhiteSpace(affix, i);
// If we skip UWhiteSpace in the input text, we need to skip it in the pattern.
// Otherwise, the previous lines may have skipped over text (such as U+00A0) that
// is also in the affix.
i = skipUWhiteSpace(affix, i);
} else {
if (pos < input.length() &&
input.char32At(pos) == c) {
i += len;
pos += len;
// Advance over run in input text
// Must see at least one white space char in input,
// unless we've already matched some characters literally.
int32_t s = pos;
pos = skipUWhiteSpace(input, pos);
if (pos == s && !literalMatch) {
return -1;
}
// If we skip UWhiteSpace in the input text, we need to skip it in the pattern.
// Otherwise, the previous lines may have skipped over text (such as U+00A0) that
// is also in the affix.
i = skipUWhiteSpace(affix, i);
} else {
if (pos < inputLength &&
input.char32At(pos) == c) {
i += len;
pos += len;
} else {
return -1;
}
}
}
} else {
UBool match = FALSE;
affixSet = DecimalFormatStaticSets::gStaticSets->fDashEquivalents;
if (affixCharLength == affixLength && affixSet->contains(affixChar)) {
pos = skipUWhiteSpace(input, pos);
if (affixSet->contains(input.char32At(pos))) {
return pos - start + 1;
}
}
for (int32_t i = 0; i < affixLength; )
{
//i = skipRuleWhiteSpace(affix, i);
i = skipUWhiteSpace(affix, i);
pos = skipUWhiteSpace(input, pos);
if (i >= affixLength || pos >= inputLength) {
break;
}
UChar32 c = affix.char32At(i);
int32_t len = U16_LENGTH(c);
if (input.char32At(pos) != c) {
return -1;
}
match = TRUE;
i += len;
pos += len;
}
if (affixLength > 0 && ! match) {
return -1;
}
}
return pos - start;
@ -2330,7 +2523,7 @@ int32_t DecimalFormat::compareComplexAffix(const UnicodeString& affixPat,
u_strcpy(currency, curr);
}
pos = ppos.getIndex();
} else {
} else if (!isLenient()){
pos = -1;
}
continue;
@ -2403,6 +2596,17 @@ int32_t DecimalFormat::match(const UnicodeString& text, int32_t pos, const Unico
return pos;
}
UBool DecimalFormat::matchSymbol(const UnicodeString &text, int32_t position, int32_t length, const UnicodeString &symbol,
UnicodeSet *sset, UChar32 schar)
{
if (sset != NULL) {
return sset->contains(schar);
}
return text.compare(position, length, symbol) == 0;
}
//------------------------------------------------------------------------------
// Gets the pointer to the localized decimal format symbols

View file

@ -284,6 +284,7 @@
<ClCompile Include="datefmt.cpp" />
<ClCompile Include="dcfmtsym.cpp" />
<ClCompile Include="decContext.c" />
<ClCompile Include="decfmtst.cpp" />
<ClCompile Include="decimfmt.cpp" />
<ClCompile Include="decNumber.c" />
<ClCompile Include="digitlst.cpp" />
@ -323,6 +324,7 @@
<ClCompile Include="selfmt.cpp" />
<ClCompile Include="simpletz.cpp" />
<ClCompile Include="smpdtfmt.cpp" />
<ClCompile Include="smpdtfst.cpp" />
<ClCompile Include="taiwncal.cpp" />
<ClCompile Include="timezone.cpp" />
<ClCompile Include="tmunit.cpp" />
@ -733,6 +735,7 @@
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="decContext.h" />
<ClInclude Include="decfmtst.h" />
<CustomBuild Include="unicode\decimfmt.h">
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
</Command>
@ -1104,6 +1107,7 @@
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<ClInclude Include="smpdtfst.h" />
<ClInclude Include="taiwncal.h" />
<CustomBuild Include="unicode\timezone.h">
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode

View file

@ -129,6 +129,9 @@
<ClCompile Include="decContext.c">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="decfmtst.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="decimfmt.cpp">
<Filter>formatting</Filter>
</ClCompile>
@ -246,6 +249,9 @@
<ClCompile Include="smpdtfmt.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="smpdtfst.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="taiwncal.cpp">
<Filter>formatting</Filter>
</ClCompile>
@ -533,6 +539,9 @@
<ClInclude Include="decContext.h">
<Filter>formatting</Filter>
</ClInclude>
<ClInclude Include="decfmtst.h">
<Filter>formatting</Filter>
</ClInclude>
<ClInclude Include="decNumber.h">
<Filter>formatting</Filter>
</ClInclude>
@ -596,6 +605,9 @@
<ClInclude Include="reldtfmt.h">
<Filter>formatting</Filter>
</ClInclude>
<ClInclude Include="smpdtfst.h">
<Filter>formatting</Filter>
</ClInclude>
<ClInclude Include="taiwncal.h">
<Filter>formatting</Filter>
</ClInclude>

View file

@ -1,6 +1,6 @@
/*
******************************************************************************
* Copyright (C) 2003-2010, International Business Machines Corporation
* Copyright (C) 2003-2011, International Business Machines Corporation
* and others. All Rights Reserved.
******************************************************************************
*
@ -141,6 +141,10 @@ UBool IslamicCalendar::isCivil() {
// Note: Current IslamicCalendar implementation does not work
// well with negative years.
// TODO: In some cases the current ICU Islamic calendar implementation shows
// a month as having 31 days. Since date parsing now uses range checks based
// on the table below, we need to change the range for last day of month to
// include 31 as a workaround until the implementation is fixed.
static const int32_t LIMITS[UCAL_FIELD_COUNT][4] = {
// Minimum Greatest Least Maximum
// Minimum Maximum
@ -149,7 +153,7 @@ static const int32_t LIMITS[UCAL_FIELD_COUNT][4] = {
{ 0, 0, 11, 11}, // MONTH
{ 1, 1, 50, 51}, // WEEK_OF_YEAR
{/*N/A*/-1,/*N/A*/-1,/*N/A*/-1,/*N/A*/-1}, // WEEK_OF_MONTH
{ 1, 1, 29, 30}, // DAY_OF_MONTH
{ 1, 1, 29, 31}, // DAY_OF_MONTH - 31 to workaround for cal implementation bug, should be 30
{ 1, 1, 354, 355}, // DAY_OF_YEAR
{/*N/A*/-1,/*N/A*/-1,/*N/A*/-1,/*N/A*/-1}, // DAY_OF_WEEK
{ -1, -1, 5, 5}, // DAY_OF_WEEK_IN_MONTH

View file

@ -214,7 +214,8 @@ NumberFormat::NumberFormat()
fMinIntegerDigits(1),
fMaxFractionDigits(3), // invariant, >= minFractionDigits
fMinFractionDigits(0),
fParseIntegerOnly(FALSE)
fParseIntegerOnly(FALSE),
fLenient(FALSE)
{
fCurrency[0] = 0;
}
@ -648,6 +649,15 @@ NumberFormat::setParseIntegerOnly(UBool value)
fParseIntegerOnly = value;
}
// -------------------------------------
// Sets whether lenient parse is enabled.
void
NumberFormat::setLenient(UBool enable)
{
fLenient = enable;
}
// -------------------------------------
// Create a number style NumberFormat instance with the default locale.

View file

@ -40,6 +40,7 @@
#include "unicode/decimfmt.h"
#include "unicode/dcfmtsym.h"
#include "unicode/uchar.h"
#include "unicode/uniset.h"
#include "unicode/ustring.h"
#include "unicode/basictz.h"
#include "unicode/simpletz.h"
@ -56,6 +57,7 @@
#include "umutex.h"
#include "tzfmt.h"
#include <float.h>
#include "smpdtfst.h"
#if defined( U_DEBUG_CALSVC ) || defined (U_DEBUG_CAL)
#include <stdio.h>
@ -156,6 +158,48 @@ static const char gDateTimePatternsTag[]="DateTimePatterns";
static const UChar gEtcUTC[] = {0x45, 0x74, 0x63, 0x2F, 0x55, 0x54, 0x43, 0x00}; // "Etc/UTC"
static const UChar QUOTE = 0x27; // Single quote
/*
* The field range check bias for each UDateFormatField.
* The bias is added to the minimum and maximum values
* before they are compared to the parsed number.
* For example, the calendar stores zero-based month numbers
* but the parsed month numbers start at 1, so the bias is 1.
*
* A value of -1 means that the value is not checked.
*/
static const int32_t gFieldRangeBias[] = {
-1, // 'G' - UDAT_ERA_FIELD
-1, // 'y' - UDAT_YEAR_FIELD
1, // 'M' - UDAT_MONTH_FIELD
0, // 'd' - UDAT_DATE_FIELD
-1, // 'k' - UDAT_HOUR_OF_DAY1_FIELD
-1, // 'H' - UDAT_HOUR_OF_DAY0_FIELD
0, // 'm' - UDAT_MINUTE_FIELD
0, // 's' - UDAT_SEOND_FIELD
-1, // 'S' - UDAT_FRACTIONAL_SECOND_FIELD (0-999?)
-1, // 'E' - UDAT_DAY_OF_WEEK_FIELD (1-7?)
-1, // 'D' - UDAT_DAY_OF_YEAR_FIELD (1 - 366?)
-1, // 'F' - UDAT_DAY_OF_WEEK_IN_MONTH_FIELD (1-5?)
-1, // 'w' - UDAT_WEEK_OF_YEAR_FIELD (1-52?)
-1, // 'W' - UDAT_WEEK_OF_MONTH_FIELD (1-5?)
-1, // 'a' - UDAT_AM_PM_FIELD
-1, // 'h' - UDAT_HOUR1_FIELD
-1, // 'K' - UDAT_HOUR0_FIELD
-1, // 'z' - UDAT_TIMEZONE_FIELD
-1, // 'Y' - UDAT_YEAR_WOY_FIELD
-1, // 'e' - UDAT_DOW_LOCAL_FIELD
-1, // 'u' - UDAT_EXTENDED_YEAR_FIELD
-1, // 'g' - UDAT_JULIAN_DAY_FIELD
-1, // 'A' - UDAT_MILLISECONDS_IN_DAY_FIELD
-1, // 'Z' - UDAT_TIMEZONE_RFC_FIELD
-1, // 'v' - UDAT_TIMEZONE_GENERIC_FIELD
0, // 'c' - UDAT_STANDALONE_DAY_FIELD
1, // 'L' - UDAT_STANDALONE_MONTH_FIELD
-1, // 'Q' - UDAT_QUARTER_FIELD (1-4?)
-1, // 'q' - UDAT_STANDALONE_QUARTER_FIELD
-1 // 'V' - UDAT_TIMEZONE_SPECIAL_FIELD
};
static UMTX LOCK;
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleDateFormat)
@ -430,7 +474,7 @@ SimpleDateFormat& SimpleDateFormat::operator=(const SimpleDateFormat& other)
fPattern = other.fPattern;
// TimeZoneFormat in ICU4C only deneds on a locale for now
// TimeZoneFormat in ICU4C only depends on a locale for now
if (fLocale != other.fLocale) {
delete fTimeZoneFormat;
}
@ -681,8 +725,8 @@ SimpleDateFormat::initializeSymbols(const Locale& locale, Calendar* calendar, UE
fSymbols = new DateFormatSymbols(locale, calendar?calendar->getType() :NULL , status);
// Null pointer check
if (fSymbols == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
}
}
@ -710,6 +754,8 @@ SimpleDateFormat::initialize(const Locale& locale,
fNumberFormat->setParseIntegerOnly(TRUE);
fNumberFormat->setMinimumFractionDigits(0); // To prevent "Jan 1.00, 1997.00"
fNumberFormat->setLenient(TRUE); // Java uses a custom DateNumberFormat to format/parse
initNumberFormatters(locale,status);
}
@ -875,13 +921,13 @@ SimpleDateFormat::fgCalendarFieldToLevel[] =
const int32_t
SimpleDateFormat::fgPatternCharToLevel[] = {
// A B C D E F G H I J K L M N O
-1, 40, -1, -1, 20, 30, 30, 0, 50, -1, -1, 50, 20, 20, -1, -1,
-1, 40, -1, -1, 20, 30, 30, 0, 50, -1, -1, 50, 20, 20, -1, -1,
// P Q R S T U V W X Y Z
-1, 20, -1, 80, -1, -1, 0, 30, -1, 10, 0, -1, -1, -1, -1, -1,
-1, 20, -1, 80, -1, -1, 0, 30, -1, 10, 0, -1, -1, -1, -1, -1,
// a b c d e f g h i j k l m n o
-1, 40, -1, 30, 30, 30, -1, 0, 50, -1, -1, 50, -1, 60, -1, -1,
-1, 40, -1, 30, 30, 30, -1, 0, 50, -1, -1, 50, -1, 60, -1, -1,
// p q r s t u v w x y z
-1, 20, -1, 70, -1, 10, 0, 20, -1, 10, 0, -1, -1, -1, -1, -1
-1, 20, -1, 70, -1, 10, 0, 20, -1, 10, 0, -1, -1, -1, -1, -1
};
@ -1277,6 +1323,11 @@ SimpleDateFormat::initGMTFormatters(UErrorCode &status) {
SimpleDateFormat *sdf = (SimpleDateFormat*)this->clone();
sdf->adoptCalendar(gcal);
sdf->applyPattern(*hourPattern);
// This prevents an hours format pattern like "-HH:mm:ss" from matching
// in a string like "GMT-07:00 10:08:11 PM"
sdf->setLenient(FALSE);
fGMTFormatters[i]->adoptFormat(0, sdf);
// For parsing, we only allow Hms patterns to be equal or longer
@ -1508,12 +1559,12 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo,
// OLD: for "yyyy", write out the whole year; for "yy", write out the last 2 digits
// NEW: UTS#35:
//Year y yy yyy yyyy yyyyy
//AD 1 1 01 001 0001 00001
//AD 12 12 12 012 0012 00012
//AD 123 123 23 123 0123 00123
//AD 1234 1234 34 1234 1234 01234
//AD 12345 12345 45 12345 12345 12345
//Year y yy yyy yyyy yyyyy
//AD 1 1 01 001 0001 00001
//AD 12 12 12 012 0012 00012
//AD 123 123 23 123 0123 00123
//AD 1234 1234 34 1234 1234 01234
//AD 12345 12345 45 12345 12345 12345
case UDAT_YEAR_FIELD:
case UDAT_YEAR_WOY_FIELD:
if(count == 2)
@ -1806,6 +1857,8 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
int32_t saveHebrewMonth = -1;
int32_t count = 0;
UBool lenient = isLenient();
// hack, reset tztype, cast away const
((SimpleDateFormat*)this)->tztype = TZTYPE_UNK;
@ -1943,7 +1996,7 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
}
}
}
else if (s < 0) {
else if (s <= 0) {
status = U_PARSE_ERROR;
goto ExitParse;
}
@ -1957,53 +2010,11 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
else {
abutPat = -1; // End of any abutting fields
// Handle quotes. Two consecutive quotes is a quote
// literal, inside or outside of quotes. Otherwise a
// quote indicates entry or exit from a quoted region.
if (ch == QUOTE) {
// Match a quote literal '' within OR outside of quotes
if ((i+1)<fPattern.length() && fPattern.charAt(i+1)==ch) {
++i; // Skip over doubled quote
// Fall through and treat quote as a literal
} else {
// Enter or exit quoted region
inQuote = !inQuote;
continue;
}
if (! matchLiterals(fPattern, i, text, pos, lenient)) {
status = U_PARSE_ERROR;
goto ExitParse;
}
// A run of white space in the pattern matches a run
// of white space in the input text.
if (PatternProps::isWhiteSpace(ch)) {
// Advance over run in pattern
while ((i+1)<fPattern.length() &&
PatternProps::isWhiteSpace(fPattern.charAt(i+1))) {
++i;
}
// Advance over run in input text
int32_t s = pos;
while (pos<text.length() &&
( u_isUWhiteSpace(text.charAt(pos)) || PatternProps::isWhiteSpace(text.charAt(pos)))) {
++pos;
}
// Must see at least one white space char in input
if (pos > s) {
continue;
}
} else if (pos<text.length() && text.charAt(pos)==ch) {
// Match a literal
++pos;
continue;
}
// We fall through to this point if the match fails
status = U_PARSE_ERROR;
goto ExitParse;
}
}
@ -2046,8 +2057,8 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
copy = cal.clone();
// Check for failed cloning.
if (copy == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
goto ExitParse;
status = U_MEMORY_ALLOCATION_ERROR;
goto ExitParse;
}
UDate parsedDate = copy->getTime(status);
// {sfb} check internalGetDefaultCenturyStart
@ -2062,8 +2073,8 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition&
copy = cal.clone();
// Check for failed cloning.
if (copy == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
goto ExitParse;
status = U_MEMORY_ALLOCATION_ERROR;
goto ExitParse;
}
const TimeZone & tz = cal.getTimeZone();
BasicTimeZone *btz = NULL;
@ -2282,6 +2293,133 @@ int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text,
return -start;
}
//----------------------------------------------------------------------
UBool SimpleDateFormat::matchLiterals(const UnicodeString &pattern,
int32_t &patternOffset,
const UnicodeString &text,
int32_t &textOffset,
UBool lenient)
{
UBool inQuote = FALSE;
UnicodeString literal;
int32_t i = patternOffset;
// scan pattern looking for contiguous literal characters
for ( ; i < pattern.length(); i += 1) {
UChar ch = pattern.charAt(i);
if (!inQuote && ((ch >= 0x41 && ch <= 0x5A) || (ch >= 0x61 && ch <= 0x7A))) { // unquoted [A-Za-z]
break;
}
if (ch == QUOTE) {
// Match a quote literal ('') inside OR outside of quotes
if ((i + 1) < pattern.length() && pattern.charAt(i + 1) == QUOTE) {
i += 1;
} else {
inQuote = !inQuote;
continue;
}
}
literal += ch;
}
// at this point, literal contains the literal text
// and i is the index of the next non-literal pattern character.
int32_t p;
int32_t t = textOffset;
if (lenient) {
// trim leading, trailing whitespace from
// the literal text
literal.trim();
// ignore any leading whitespace in the text
while (t < text.length() && u_isWhitespace(text.charAt(t))) {
t += 1;
}
}
for (p = 0; p < literal.length() && t < text.length(); p += 1, t += 1) {
UBool needWhitespace = FALSE;
while (p < literal.length() && PatternProps::isWhiteSpace(literal.charAt(p))) {
needWhitespace = TRUE;
p += 1;
}
if (needWhitespace) {
int32_t tStart = t;
while (t < text.length()) {
UChar tch = text.charAt(t);
if (!u_isUWhiteSpace(tch) && !PatternProps::isWhiteSpace(tch)) {
break;
}
t += 1;
}
// TODO: should we require internal spaces
// in lenient mode? (There won't be any
// leading or trailing spaces)
if (!lenient && t == tStart) {
// didn't find matching whitespace:
// an error in strict mode
return FALSE;
}
// In strict mode, this run of whitespace
// may have been at the end.
if (p >= literal.length()) {
break;
}
}
if (t >= text.length() || literal.charAt(p) != text.charAt(t)) {
// Ran out of text, or found a non-matching character:
// OK in lenient mode, an error in strict mode.
if (lenient) {
break;
}
return FALSE;
}
}
// At this point if we're in strict mode we have a complete match.
// If we're in lenient mode we may have a partial match, or no
// match at all.
if (p <= 0) {
// no match. Pretend it matched a run of whitespace
// and ignorables in the text.
const UnicodeSet *ignorables = NULL;
UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), pattern.charAt(i));
if (patternCharPtr != NULL) {
UDateFormatField patternCharIndex = (UDateFormatField) (patternCharPtr - DateFormatSymbols::getPatternUChars());
ignorables = SimpleDateFormatStaticSets::getIgnorables(patternCharIndex);
}
for (t = textOffset; t < text.length(); t += 1) {
UChar ch = text.charAt(t);
if (ignorables == NULL || !ignorables->contains(ch)) {
break;
}
}
}
// if we get here, we've got a complete match.
patternOffset = i - 1;
textOffset = t;
return TRUE;
}
//----------------------------------------------------------------------
int32_t SimpleDateFormat::matchString(const UnicodeString& text,
@ -2399,6 +2537,8 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
NumberFormat *currentNumberFormat;
UnicodeString temp;
UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch);
UBool lenient = isLenient();
UBool gotNumber = FALSE;
#if defined (U_DEBUG_CAL)
//fprintf(stderr, "%s:%d - [%c] st=%d \n", __FILE__, __LINE__, (char) ch, start);
@ -2419,7 +2559,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
return -start;
}
UChar32 c = text.char32At(start);
if (!u_isUWhiteSpace(c) || !PatternProps::isWhiteSpace(c)) {
if (!u_isUWhiteSpace(c) /*||*/ && !PatternProps::isWhiteSpace(c)) {
break;
}
start += UTF_CHAR_LENGTH(c);
@ -2431,13 +2571,15 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
// to handle some of them here because some fields require extra processing on
// the parsed value.
if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD ||
patternCharIndex == UDAT_HOUR_OF_DAY0_FIELD ||
patternCharIndex == UDAT_HOUR1_FIELD ||
(patternCharIndex == UDAT_DOW_LOCAL_FIELD && count <= 2) ||
(patternCharIndex == UDAT_STANDALONE_DAY_FIELD && count <= 2) ||
(patternCharIndex == UDAT_MONTH_FIELD && count <= 2) ||
(patternCharIndex == UDAT_STANDALONE_MONTH_FIELD && count <= 2) ||
(patternCharIndex == UDAT_QUARTER_FIELD && count <= 2) ||
(patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD && count <= 2) ||
patternCharIndex == UDAT_HOUR0_FIELD ||
patternCharIndex == UDAT_DOW_LOCAL_FIELD ||
patternCharIndex == UDAT_STANDALONE_DAY_FIELD ||
patternCharIndex == UDAT_MONTH_FIELD ||
patternCharIndex == UDAT_STANDALONE_MONTH_FIELD ||
patternCharIndex == UDAT_QUARTER_FIELD ||
patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD ||
patternCharIndex == UDAT_YEAR_FIELD ||
patternCharIndex == UDAT_YEAR_WOY_FIELD ||
patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD)
@ -2460,23 +2602,82 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
parseInt(*src, number, pos, allowNegative,currentNumberFormat);
if (pos.getIndex() == parseStart)
return -start;
value = number.getLong();
// suffix processing
int32_t txtLoc = pos.getIndex();
if (value <0 ) {
txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, TRUE);
if (txtLoc != pos.getIndex()) {
value *= -1;
}
}
else {
txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, FALSE);
}
pos.setIndex(txtLoc);
if (txtLoc > parseStart) {
value = number.getLong();
gotNumber = TRUE;
// suffix processing
if (value < 0 ) {
txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, TRUE);
if (txtLoc != pos.getIndex()) {
value *= -1;
}
}
else {
txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, FALSE);
}
// Check the range of the value
int32_t bias = gFieldRangeBias[patternCharIndex];
if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) {
return -start;
}
pos.setIndex(txtLoc);
}
}
// Make sure that we got a number if
// we want one, and didn't get one
// if we don't want one.
switch (patternCharIndex) {
case UDAT_HOUR_OF_DAY1_FIELD:
case UDAT_HOUR_OF_DAY0_FIELD:
case UDAT_HOUR1_FIELD:
case UDAT_HOUR0_FIELD:
// special range check for hours:
if (value < 0 || value > 24) {
return -start;
}
// fall through to gotNumber check
case UDAT_YEAR_FIELD:
case UDAT_YEAR_WOY_FIELD:
case UDAT_FRACTIONAL_SECOND_FIELD:
// these must be a number
if (! gotNumber) {
return -start;
}
break;
case UDAT_DOW_LOCAL_FIELD:
case UDAT_STANDALONE_DAY_FIELD:
case UDAT_MONTH_FIELD:
case UDAT_STANDALONE_MONTH_FIELD:
case UDAT_QUARTER_FIELD:
case UDAT_STANDALONE_QUARTER_FIELD:
// in strict mode, these can only
// be a number if count <= 2
if (!lenient && gotNumber && count > 2) {
// We have a string pattern in strict mode
// but the input parsed as a number. Ignore
// the fact that the input parsed as a number
// and try to match it as a string. (Some
// locales have numbers for the month names.)
gotNumber = FALSE;
pos.setIndex(start);
}
break;
default:
// we check the rest of the fields below.
break;
}
switch (patternCharIndex) {
@ -2504,7 +2705,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
// we made adjustments to place the 2-digit year in the proper
// century, for parsed strings from "00" to "99". Any other string
// is treated literally: "2250", "-1", "1", "002".
if (count <= 2 && (pos.getIndex() - start) == 2
if ((pos.getIndex() - start) == 2
&& u_isdigit(text.charAt(start))
&& u_isdigit(text.charAt(start+1)))
{
@ -2539,7 +2740,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
case UDAT_YEAR_WOY_FIELD:
// Comment is the same as for UDAT_Year_FIELDs - look above
if (count <= 2 && (pos.getIndex() - start) == 2
if ((pos.getIndex() - start) == 2
&& u_isdigit(text.charAt(start))
&& u_isdigit(text.charAt(start+1))
&& fHaveDefaultCentury )
@ -2553,7 +2754,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
return pos.getIndex();
case UDAT_MONTH_FIELD:
if (count <= 2) // i.e., M or MM.
if (gotNumber) // i.e., M or MM.
{
// When parsing month numbers from the Hebrew Calendar, we might need to adjust the month depending on whether
// or not it was a leap year. We may or may not yet know what year it is, so might have to delay checking until
@ -2592,7 +2793,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
}
case UDAT_STANDALONE_MONTH_FIELD:
if (count <= 2) // i.e., L or LL.
if (gotNumber) // i.e., L or LL.
{
// Don't want to parse the month if it is a string
// while pattern uses numeric style: M or MM.
@ -2617,6 +2818,10 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
// [We computed 'value' above.]
if (value == cal.getMaximum(UCAL_HOUR_OF_DAY) + 1)
value = 0;
// fall through to set field
case UDAT_HOUR_OF_DAY0_FIELD:
cal.set(UCAL_HOUR_OF_DAY, value);
return pos.getIndex();
@ -2640,7 +2845,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
return pos.getIndex();
case UDAT_DOW_LOCAL_FIELD:
if (count <= 2) // i.e., e or ee
if (gotNumber) // i.e., e or ee
{
// [We computed 'value' above.]
cal.set(UCAL_DOW_LOCAL, value);
@ -2668,7 +2873,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
case UDAT_STANDALONE_DAY_FIELD:
{
if (count <= 2) // c or cc
if (gotNumber) // c or cc
{
// [We computed 'value' above.]
cal.set(UCAL_DOW_LOCAL, value);
@ -2692,11 +2897,15 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
// [We computed 'value' above.]
if (value == cal.getLeastMaximum(UCAL_HOUR)+1)
value = 0;
// fall through to set field
case UDAT_HOUR0_FIELD:
cal.set(UCAL_HOUR, value);
return pos.getIndex();
case UDAT_QUARTER_FIELD:
if (count <= 2) // i.e., Q or QQ.
if (gotNumber) // i.e., Q or QQ.
{
// Don't want to parse the month if it is a string
// while pattern uses numeric style: Q or QQ.
@ -2718,7 +2927,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
}
case UDAT_STANDALONE_QUARTER_FIELD:
if (count <= 2) // i.e., q or qq.
if (gotNumber) // i.e., q or qq.
{
// Don't want to parse the month if it is a string
// while pattern uses numeric style: q or q.
@ -2928,8 +3137,15 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC
}
parseInt(*src, number, pos, allowNegative,currentNumberFormat);
if (pos.getIndex() != parseStart) {
cal.set(field, number.getLong());
return pos.getIndex();
int32_t value = number.getLong();
// Check the range of the value
int32_t bias = gFieldRangeBias[patternCharIndex];
if (bias < 0 || (value >= cal.getMinimum(field) + bias && value <= cal.getMaximum(field) + bias)) {
cal.set(field, value);
return pos.getIndex();
}
}
return -start;
}

View file

@ -0,0 +1,158 @@
/*
*******************************************************************************
* Copyright (C) 2009-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* This file contains the class SimpleDateFormatStaticSets
*
* SimpleDateFormatStaticSets holds the UnicodeSets that are needed for lenient
* parsing of literal characters in date/time strings.
********************************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/uniset.h"
#include "unicode/udat.h"
#include "cmemory.h"
#include "ucln_in.h"
#include "umutex.h"
#include "smpdtfst.h"
U_NAMESPACE_BEGIN
SimpleDateFormatStaticSets *SimpleDateFormatStaticSets::gStaticSets = NULL;
SimpleDateFormatStaticSets::SimpleDateFormatStaticSets(UErrorCode *status)
: fDateIgnorables(NULL),
fTimeIgnorables(NULL),
fOtherIgnorables(NULL)
{
fDateIgnorables = new UnicodeSet("[-,./[:whitespace:]]", *status);
fTimeIgnorables = new UnicodeSet("[-.:[:whitespace:]]", *status);
fOtherIgnorables = new UnicodeSet("[:whitespace:]", *status);
// Check for null pointers
if (fDateIgnorables == NULL || fTimeIgnorables == NULL || fOtherIgnorables == NULL) {
goto ExitConstrDeleteAll;
}
// Freeze all the sets
fDateIgnorables->freeze();
fTimeIgnorables->freeze();
fOtherIgnorables->freeze();
return; // If we reached this point, everything is fine so just exit
ExitConstrDeleteAll: // Remove all sets and return error
delete fDateIgnorables; fDateIgnorables = NULL;
delete fTimeIgnorables; fTimeIgnorables = NULL;
delete fOtherIgnorables; fOtherIgnorables = NULL;
*status = U_MEMORY_ALLOCATION_ERROR;
}
SimpleDateFormatStaticSets::~SimpleDateFormatStaticSets() {
delete fDateIgnorables; fDateIgnorables = NULL;
delete fTimeIgnorables; fTimeIgnorables = NULL;
delete fOtherIgnorables; fOtherIgnorables = NULL;
}
//------------------------------------------------------------------------------
//
// smpdtfmt_cleanup Memory cleanup function, free/delete all
// cached memory. Called by ICU's u_cleanup() function.
//
//------------------------------------------------------------------------------
UBool
SimpleDateFormatStaticSets::cleanup(void)
{
delete SimpleDateFormatStaticSets::gStaticSets;
SimpleDateFormatStaticSets::gStaticSets = NULL;
return TRUE;
}
U_CDECL_BEGIN
static UBool U_CALLCONV
smpdtfmt_cleanup(void)
{
return SimpleDateFormatStaticSets::cleanup();
}
U_CDECL_END
void SimpleDateFormatStaticSets::initSets(UErrorCode *status)
{
SimpleDateFormatStaticSets *p;
UMTX_CHECK(NULL, gStaticSets, p);
if (p == NULL) {
p = new SimpleDateFormatStaticSets(status);
if (p == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return;
}
if (U_FAILURE(*status)) {
delete p;
return;
}
umtx_lock(NULL);
if (gStaticSets == NULL) {
gStaticSets = p;
p = NULL;
}
umtx_unlock(NULL);
if (p != NULL) {
delete p;
}
ucln_i18n_registerCleanup(UCLN_I18N_SMPDTFMT, smpdtfmt_cleanup);
}
}
UnicodeSet *SimpleDateFormatStaticSets::getIgnorables(UDateFormatField fieldIndex)
{
UErrorCode status = U_ZERO_ERROR;
initSets(&status);
if (U_FAILURE(status)) {
return NULL;
}
switch (fieldIndex) {
case UDAT_YEAR_FIELD:
case UDAT_MONTH_FIELD:
case UDAT_DATE_FIELD:
case UDAT_STANDALONE_DAY_FIELD:
case UDAT_STANDALONE_MONTH_FIELD:
return gStaticSets->fDateIgnorables;
case UDAT_HOUR_OF_DAY1_FIELD:
case UDAT_HOUR_OF_DAY0_FIELD:
case UDAT_MINUTE_FIELD:
case UDAT_SECOND_FIELD:
case UDAT_HOUR1_FIELD:
case UDAT_HOUR0_FIELD:
return gStaticSets->fTimeIgnorables;
default:
return gStaticSets->fOtherIgnorables;
}
}
U_NAMESPACE_END
#endif // #if !UCONFIG_NO_FORMATTING

View file

@ -0,0 +1,52 @@
/*
*******************************************************************************
* Copyright (C) 2009-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* This file contains declarations for the class SimpleDateFormatStaticSets
*
* SimpleDateFormatStaticSets holds the UnicodeSets that are needed for lenient
* parsing of literal characters in date/time strings.
********************************************************************************
*/
#ifndef SMPDTFST_H
#define SMPDTFST_H
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/udat.h"
U_NAMESPACE_BEGIN
class UnicodeSet;
class SimpleDateFormatStaticSets : public UMemory
{
public:
static SimpleDateFormatStaticSets *gStaticSets; // Ptr to all lazily initialized constant
// shared sets.
SimpleDateFormatStaticSets(UErrorCode *status);
~SimpleDateFormatStaticSets();
static void initSets(UErrorCode *status);
static UBool cleanup();
static UnicodeSet *getIgnorables(UDateFormatField fieldIndex);
private:
UnicodeSet *fDateIgnorables;
UnicodeSet *fTimeIgnorables;
UnicodeSet *fOtherIgnorables;
};
U_NAMESPACE_END
#endif // #if !UCONFIG_NO_FORMATTING
#endif // SMPDTFST_H

View file

@ -176,16 +176,16 @@ static int32_t findInStringArray(UResourceBundle* array, const UnicodeString& id
UnicodeString copy;
const UChar *u;
int32_t len;
int32_t start = 0;
int32_t limit = ures_getSize(array);
int32_t mid;
int32_t lastMid = INT32_MAX;
if(U_FAILURE(status) || (limit < 1)) {
if(U_FAILURE(status) || (limit < 1)) {
return -1;
}
U_DEBUG_TZ_MSG(("fisa: Looking for %s, between %d and %d\n", U_DEBUG_TZ_STR(UnicodeString(id).getTerminatedBuffer()), start, limit));
for (;;) {
mid = (int32_t)((start + limit) / 2);
if (lastMid == mid) { /* Have we moved? */
@ -213,7 +213,7 @@ static int32_t findInStringArray(UResourceBundle* array, const UnicodeString& id
}
/**
* Fetch a specific zone by name. Replaces the getByKey call.
* Fetch a specific zone by name. Replaces the getByKey call.
* @param top Top timezone resource
* @param id Time zone ID
* @param oldbundle Bundle for reuse (or NULL). see 'ures_open()'
@ -222,12 +222,12 @@ static int32_t findInStringArray(UResourceBundle* array, const UnicodeString& id
static UResourceBundle* getZoneByName(const UResourceBundle* top, const UnicodeString& id, UResourceBundle *oldbundle, UErrorCode& status) {
// load the Rules object
UResourceBundle *tmp = ures_getByKey(top, kNAMES, NULL, &status);
// search for the string
int32_t idx = findInStringArray(tmp, id, status);
if((idx == -1) && U_SUCCESS(status)) {
// not found
// not found
status = U_MISSING_RESOURCE_ERROR;
//ures_close(oldbundle);
//oldbundle = NULL;
@ -239,7 +239,7 @@ static UResourceBundle* getZoneByName(const UResourceBundle* top, const UnicodeS
U_DEBUG_TZ_MSG(("gzbn: loaded z#%d, size %d, type %d, path %s, %s\n", idx, ures_getSize(oldbundle), ures_getType(oldbundle), ures_getPath(oldbundle), u_errorName(status)));
}
ures_close(tmp);
if(U_FAILURE(status)) {
if(U_FAILURE(status)) {
//ures_close(oldbundle);
return NULL;
} else {
@ -444,7 +444,7 @@ TimeZone::createSystemTimeZone(const UnicodeString& id, UErrorCode& ec) {
*/
void
TimeZone::initDefault()
{
{
// We access system timezone data through TPlatformUtilities,
// including tzset(), timezone, and tzname[].
int32_t rawOffset = 0;
@ -467,13 +467,13 @@ TimeZone::initDefault()
ucln_i18n_registerCleanup(UCLN_I18N_TIMEZONE, timeZone_cleanup);
uprv_tzset(); // Initialize tz... system data
// Get the timezone ID from the host. This function should do
// any required host-specific remapping; e.g., on Windows this
// function maps the Date and Time control panel setting to an
// ICU timezone ID.
hostID = uprv_tzname(0);
// Invert sign because UNIX semantics are backwards
rawOffset = uprv_timezone() * -U_MILLIS_PER_SECOND;
}
@ -532,7 +532,7 @@ TimeZone::initDefault()
ucln_i18n_registerCleanup(UCLN_I18N_TIMEZONE, timeZone_cleanup);
}
umtx_unlock(&LOCK);
delete default_zone;
}
@ -1198,7 +1198,7 @@ TimeZone::getDisplayName(UBool daylight, EDisplayType style, UnicodeString& resu
return getDisplayName(daylight,style, Locale::getDefault(), result);
}
//--------------------------------------
int32_t
int32_t
TimeZone::getDSTSavings()const {
if (useDaylightTime()) {
return 3600000;
@ -1266,7 +1266,7 @@ TimeZone::getDisplayName(UBool daylight, EDisplayType style, const Locale& local
if (U_FAILURE(status)) {
return result.remove();
}
if ((daylight && dstOffset != 0) ||
(!daylight && dstOffset == 0) ||
(style == SHORT_GENERIC) ||
@ -1276,7 +1276,7 @@ TimeZone::getDisplayName(UBool daylight, EDisplayType style, const Locale& local
format.setTimeZone(*this);
return format.format(d, result);
}
// Create a new SimpleTimeZone as a stand-in for this zone; the
// stand-in will have no DST, or DST during July, but the same ID and offset,
// and hence the same display name.
@ -1293,10 +1293,10 @@ TimeZone::getDisplayName(UBool daylight, EDisplayType style, const Locale& local
}
cal.set(UCAL_MONTH, UCAL_JULY);
cal.set(UCAL_DATE, 1);
// Get July 1 date
d = cal.getTime(status);
// Check if it is in DST
if (cal.get(UCAL_DST_OFFSET, status) == 0) {
// We need to create a fake time zone
@ -1396,6 +1396,7 @@ TimeZone::parseCustomID(const UnicodeString& id, int32_t& sign,
return FALSE;
}
numberFormat->setParseIntegerOnly(TRUE);
//numberFormat->setLenient(TRUE); // TODO: May need to set this, depends on latest timezone parsing
// Look for either hh:mm, hhmm, or hh
int32_t start = pos.getIndex();
@ -1526,10 +1527,10 @@ TimeZone::formatCustomID(int32_t hour, int32_t min, int32_t sec,
}
UBool
UBool
TimeZone::hasSameRules(const TimeZone& other) const
{
return (getRawOffset() == other.getRawOffset() &&
return (getRawOffset() == other.getRawOffset() &&
useDaylightTime() == other.useDaylightTime());
}

View file

@ -39,7 +39,9 @@ typedef enum ECleanupI18NType {
UCLN_I18N_TIMEZONE,
UCLN_I18N_PLURAL_RULE,
UCLN_I18N_CURRENCY,
UCLN_I18N_DECFMT,
UCLN_I18N_NUMFMT,
UCLN_I18N_SMPDTFMT,
UCLN_I18N_USEARCH,
UCLN_I18N_COLLATOR,
UCLN_I18N_UCOL,

View file

@ -46,6 +46,7 @@ class DigitList;
class ChoiceFormat;
class CurrencyPluralInfo;
class Hashtable;
class UnicodeSet;
class FieldPositionHandler;
/**
@ -350,7 +351,8 @@ class FieldPositionHandler;
* DecimalFormatSymbols object. During formatting, the
* DecimalFormatSymbols-based digits are output.
*
* <p>During parsing, grouping separators are ignored.
* <p>During parsing, grouping separators are ignored if in lenient mode;
* otherwise, if present, they must be in appropriate positions.
*
* <p>For currency parsing, the formatter is able to parse every currency
* style formats no matter which style the formatter is constructed with.
@ -1982,7 +1984,8 @@ private:
static int32_t compareSimpleAffix(const UnicodeString& affix,
const UnicodeString& input,
int32_t pos);
int32_t pos,
UBool lenient);
static int32_t skipPatternWhiteSpace(const UnicodeString& text, int32_t pos);
@ -1998,6 +2001,9 @@ private:
static int32_t match(const UnicodeString& text, int32_t pos, const UnicodeString& str);
static UBool matchSymbol(const UnicodeString &text, int32_t position, int32_t length, const UnicodeString &symbol,
UnicodeSet *sset, UChar32 schar);
/**
* Get a decimal format symbol.
* Returns a const reference to the symbol string.

View file

@ -599,6 +599,25 @@ public:
*/
virtual void setParseIntegerOnly(UBool value);
/**
* Sets whether lenient parsing should be enabled (it is off by default).
*
* @param enable <code>TRUE</code> if lenient parsing should be used,
* <code>FALSE</code> otherwise.
* @draft ICU 4.8
*/
virtual void setLenient(UBool enable);
/**
* Returns whether lenient parsing is enabled (it is off by default).
*
* @return <code>TRUE</code> if lenient parsing is enabled,
* <code>FALSE</code> otherwise.
* @see #setLenient
* @draft ICU 4.8
*/
virtual UBool isLenient(void) const;
/**
* Returns the default number format for the current default
* locale. The default format is one of the styles provided by
@ -920,6 +939,7 @@ private:
int32_t fMaxFractionDigits;
int32_t fMinFractionDigits;
UBool fParseIntegerOnly;
UBool fLenient; // TRUE => lenient parse is enabled
// ISO currency code
UChar fCurrency[4];
@ -1021,6 +1041,12 @@ NumberFormat::isParseIntegerOnly() const
return fParseIntegerOnly;
}
inline UBool
NumberFormat::isLenient() const
{
return fLenient;
}
inline UnicodeString&
NumberFormat::format(const Formattable& obj,
UnicodeString& appendTo,

View file

@ -910,7 +910,23 @@ private:
*/
int32_t matchQuarterString(const UnicodeString& text, int32_t start, UCalendarDateFields field,
const UnicodeString* stringArray, int32_t stringArrayCount, Calendar& cal) const;
/**
* Private function used by subParse to match literal pattern text.
*
* @param pattern the pattern string
* @param patternOffset the starting offset into the pattern text. On
* outupt will be set the offset of the first non-literal character in the pattern
* @param text the text being parsed
* @param textOffset the starting offset into the text. On output
* will be set to the offset of the character after the match
* @param lenient <code>TRUE</code> if the parse is lenient, <code>FALSE</code> otherwise.
*
* @return <code>TRUE</code> if the literal text could be matched, <code>FALSE</code> otherwise.
*/
static UBool matchLiterals(const UnicodeString &pattern, int32_t &patternOffset,
const UnicodeString &text, int32_t &textOffset, UBool lenient);
/**
* Private member function that converts the parsed date strings into
* timeFields. Returns -start (for ParsePosition) if failed.

View file

@ -450,6 +450,11 @@ unum_getAttribute(const UNumberFormat* fmt,
UNumberFormatAttribute attr)
{
const NumberFormat* nf = reinterpret_cast<const NumberFormat*>(fmt);
if ( attr == UNUM_LENIENT_PARSE ) {
// Supported for all subclasses
return nf->isLenient();
}
// The remaining attributea are only supported for DecimalFormat
const DecimalFormat* df = dynamic_cast<const DecimalFormat*>(nf);
if (df != NULL) {
switch(attr) {
@ -508,19 +513,11 @@ unum_getAttribute(const UNumberFormat* fmt,
case UNUM_SECONDARY_GROUPING_SIZE:
return df->getSecondaryGroupingSize();
default:
/* enums out of sync? unsupported enum? */
break;
}
} else {
const RuleBasedNumberFormat* rbnf = dynamic_cast<const RuleBasedNumberFormat*>(nf);
U_ASSERT(rbnf != NULL);
if (attr == UNUM_LENIENT_PARSE) {
#if !UCONFIG_NO_COLLATION
return rbnf->isLenient();
#endif
}
}
return -1;
@ -532,6 +529,11 @@ unum_setAttribute( UNumberFormat* fmt,
int32_t newValue)
{
NumberFormat* nf = reinterpret_cast<NumberFormat*>(fmt);
if ( attr == UNUM_LENIENT_PARSE ) {
// Supported for all subclasses
return nf->setLenient(newValue != 0);
}
// The remaining attributea are only supported for DecimalFormat
DecimalFormat* df = dynamic_cast<DecimalFormat*>(nf);
if (df != NULL) {
switch(attr) {
@ -609,19 +611,11 @@ unum_setAttribute( UNumberFormat* fmt,
case UNUM_SECONDARY_GROUPING_SIZE:
df->setSecondaryGroupingSize(newValue);
break;
default:
/* Shouldn't get here anyway */
break;
}
} else {
RuleBasedNumberFormat* rbnf = dynamic_cast<RuleBasedNumberFormat*>(nf);
U_ASSERT(rbnf != NULL);
if (attr == UNUM_LENIENT_PARSE) {
#if !UCONFIG_NO_COLLATION
rbnf->setLenient((UBool)newValue);
#endif
}
}
}

View file

@ -56,6 +56,7 @@ void addNumForTest(TestNode** root)
TESTCASE(TestRBNFFormat);
TESTCASE(TestNBSPInPattern);
TESTCASE(TestInt64Parse);
TESTCASE(TestParseZero);
}
/** copy src to dst with unicode-escapes for values < 0x20 and > 0x7e, null terminate if possible */
@ -99,12 +100,12 @@ static void TestInt64Parse()
UErrorCode st = U_ZERO_ERROR;
UErrorCode* status = &st;
const char* st1 = "009223372036854775808";
const int size = 21;
UChar text[21];
UNumberFormat* nf;
int64_t a;
@ -121,7 +122,7 @@ static void TestInt64Parse()
log_verbose("About to test unum_parseInt64() with out of range number\n");
a = unum_parseInt64(nf, text, size, 0, status);
if(!U_FAILURE(*status))
{
@ -198,7 +199,7 @@ static void TestNumberFormat()
log_verbose("\nTesting unum_open(currency, frenchlocale, status)\n");
cur_fr=unum_open(style,NULL,0, "fr_FR", NULL, &status);
if(U_FAILURE(status))
log_err("Error: could not create NumberFormat using unum_open(currency, french, &status): %s\n",
log_err("Error: could not create NumberFormat using unum_open(currency, french, &status): %s\n",
myErrorName(status));
log_verbose("\nTesting unum_open(percent, NULL, status)\n");
@ -408,7 +409,7 @@ free(result);
}
/*
* Note: "for strict standard conformance all operations and constants are now supposed to be
* Note: "for strict standard conformance all operations and constants are now supposed to be
evaluated in precision of long double". So, we assign a1 before comparing to a double. Bug #7932.
*/
a1 = 462.12345;
@ -587,7 +588,7 @@ free(result);
{
log_err("Error in formatting using unum_format(.....): %s\n", myErrorName(status));
}
/* TODO:
/* TODO:
* This test fails because we have not called unum_applyPattern().
* Currently, such an applyPattern() does not exist on the C API, and
* we have jitterbug 411 for it.
@ -676,7 +677,7 @@ free(result);
log_err("error in getting the text attributes : %s\n", myErrorName(status));
}
if(u_strcmp(prefix, temp)!=0)
if(u_strcmp(prefix, temp)!=0)
log_err("ERROR: get and setTextAttributes with positive prefix failed\n");
else
log_verbose("Pass: get and setTextAttributes with positive prefix works fine\n");
@ -692,7 +693,7 @@ free(result);
{
log_err("error in getting the text attributes : %s\n", myErrorName(status));
}
if(u_strcmp(prefix, temp)!=0)
if(u_strcmp(prefix, temp)!=0)
log_err("ERROR: get and setTextAttributes with negative prefix failed\n");
else
log_verbose("Pass: get and setTextAttributes with negative prefix works fine\n");
@ -709,7 +710,7 @@ free(result);
{
log_err("error in getting the text attributes : %s\n", myErrorName(status));
}
if(u_strcmp(suffix, temp)!=0)
if(u_strcmp(suffix, temp)!=0)
log_err("ERROR: get and setTextAttributes with negative suffix failed\n");
else
log_verbose("Pass: get and settextAttributes with negative suffix works fine\n");
@ -726,7 +727,7 @@ free(result);
{
log_err("error in getting the text attributes : %s\n", myErrorName(status));
}
if(u_strcmp(suffix, temp)!=0)
if(u_strcmp(suffix, temp)!=0)
log_err("ERROR: get and setTextAttributes with negative suffix failed\n");
else
log_verbose("Pass: get and settextAttributes with negative suffix works fine\n");
@ -810,7 +811,7 @@ free(result);
UChar groupingSep[] = { 0 };
UChar numPercent[] = { 0x0031, 0x0032, 0x0025, 0 }; /* "12%" */
double parseResult = 0.0;
status=U_ZERO_ERROR;
dec_en = unum_open(UNUM_DECIMAL, NULL, 0, "en_US", NULL, &status);
unum_setAttribute(dec_en, UNUM_LENIENT_PARSE, 0);
@ -825,12 +826,12 @@ free(result);
}
unum_close(dec_en);
}
{ /* Test parse & format of big decimals. Use a number with too many digits to fit in a double,
to verify that it is taking the pure decimal path. */
UNumberFormat *fmt;
const char *bdpattern = "#,##0.#########";
const char *numInitial = "12345678900987654321.1234567896";
const char *bdpattern = "#,##0.#########";
const char *numInitial = "12345678900987654321.1234567896";
const char *numFormatted = "12,345,678,900,987,654,321.12345679";
const char *parseExpected = "12345678900987654321.12345679";
int32_t resultSize = 0;
@ -847,7 +848,7 @@ free(result);
fmt = unum_open(UNUM_PATTERN_DECIMAL, dest, -1, "en", NULL /*parseError*/, &status);
if (U_FAILURE(status)) log_err("File %s, Line %d, status = %s\n", __FILE__, __LINE__, u_errorName(status));
resultSize = unum_formatDecimal(fmt, numInitial, -1, dest, DESTCAPACITY, NULL, &status);
resultSize = unum_formatDecimal(fmt, numInitial, -1, dest, DESTCAPACITY, NULL, &status);
if (U_FAILURE(status)) {
log_err("File %s, Line %d, status = %s\n", __FILE__, __LINE__, u_errorName(status));
}
@ -857,7 +858,7 @@ free(result);
__FILE__, __LINE__, numFormatted, desta);
}
if (strlen(numFormatted) != resultSize) {
log_err("File %s, Line %d, (expected, actual) = (%d, %d)\n",
log_err("File %s, Line %d, (expected, actual) = (%d, %d)\n",
__FILE__, __LINE__, strlen(numFormatted), resultSize);
}
@ -865,7 +866,7 @@ free(result);
fieldPos.field = 2; /* Ticket 8034 - need enum constants for the field values. */
/* 2 = kDecimalSeparatorField */
resultSize = unum_formatDecimal(fmt, numInitial, -1, dest, DESTCAPACITY, &fieldPos, &status);
resultSize = unum_formatDecimal(fmt, numInitial, -1, dest, DESTCAPACITY, &fieldPos, &status);
if (U_FAILURE(status)) {
log_err("File %s, Line %d, status = %s\n", __FILE__, __LINE__, u_errorName(status));
}
@ -882,7 +883,7 @@ free(result);
log_err("File %s, Line %d, (expected, acutal) = (%d, %d)\n",
__FILE__, __LINE__, 0, fieldPos.endIndex);
}
/* Parse */
status = U_ZERO_ERROR;
@ -901,7 +902,7 @@ free(result);
}
/* Parse with a parsePos parameter */
status = U_ZERO_ERROR;
u_uastrcpy(dest, numFormatted); /* Parse the expected output of the formatting test */
parsePos = 3; /* 12,345,678,900,987,654,321.12345679 */
@ -967,6 +968,27 @@ free(result);
}
static void TestParseZero(void)
{
UErrorCode errorCode = U_ZERO_ERROR;
UChar input[] = {'0', 0}; /* Input text is decimal '0' */
UChar pat[] = {'#', ';', '#', 0};
double dbl;
#if 0
UNumberFormat* unum = unum_open( UNUM_DECIMAL /*or UNUM_DEFAULT*/, NULL, -1, NULL, NULL, &errorCode);
#else
UNumberFormat* unum = unum_open( UNUM_PATTERN_DECIMAL /*needs pattern*/, pat, -1, NULL, NULL, &errorCode);
#endif
dbl = unum_parseDouble( unum, input, -1 /*u_strlen(input)*/, 0 /* 0 = start */, &errorCode );
if (U_FAILURE(errorCode)) {
log_err("Result: %s\n", u_errorName(errorCode));
} else {
log_verbose("Double: %f\n", dbl);
}
}
typedef struct {
const char * testname;
const char * locale;
@ -1101,7 +1123,7 @@ static void TestSigDigRounding()
u_uastrcpy(expected, "140");
if(u_strcmp(result, expected)!=0)
log_err("FAIL: Error in unum_formatDouble result %s instead of %s\n", u_austrcpy(temp1, result), u_austrcpy(temp2, expected) );
unum_close(fmt);
}
@ -1218,7 +1240,7 @@ free(result);
static UBool
withinErr(double a, double b, double err) {
return uprv_fabs(a - b) < uprv_fabs(a * err);
return uprv_fabs(a - b) < uprv_fabs(a * err);
}
static void TestInt64Format() {
@ -1228,7 +1250,7 @@ static void TestInt64Format() {
UErrorCode status = U_ZERO_ERROR;
const double doubleInt64Max = (double)U_INT64_MAX;
const double doubleInt64Min = (double)U_INT64_MIN;
const double doubleBig = 10.0 * (double)U_INT64_MAX;
const double doubleBig = 10.0 * (double)U_INT64_MAX;
int32_t val32;
int64_t val64;
double valDouble;
@ -1385,13 +1407,13 @@ static void test_fmt(UNumberFormat* fmt, UBool isDecimal) {
{
int isLenient = unum_getAttribute(fmt, UNUM_LENIENT_PARSE);
log_verbose("lenient: 0x%x\n", isLenient);
if (isDecimal ? (isLenient != -1) : (isLenient == TRUE)) {
if (isLenient != FALSE) {
log_err("didn't expect lenient value: %d\n", isLenient);
}
unum_setAttribute(fmt, UNUM_LENIENT_PARSE, TRUE);
isLenient = unum_getAttribute(fmt, UNUM_LENIENT_PARSE);
if (isDecimal ? (isLenient != -1) : (isLenient == FALSE)) {
if (isLenient != TRUE) {
log_err("didn't expect lenient value after set: %d\n", isLenient);
}
}
@ -1619,7 +1641,7 @@ static void TestRBNFFormat() {
}
static void TestCurrencyRegression(void) {
/*
/*
I've found a case where unum_parseDoubleCurrency is not doing what I
expect. The value I pass in is $1234567890q123460000.00 and this
returns with a status of zero error & a parse pos of 22 (I would
@ -1647,15 +1669,15 @@ their data!
currency[0]=0;
u_uastrcpy(buf, "$1234567890q643210000.00");
cur = unum_open(UNUM_CURRENCY, NULL,0,"en_US", NULL, &status);
if(U_FAILURE(status)) {
log_data_err("unum_open failed: %s (Are you missing data?)\n", u_errorName(status));
return;
}
status = U_ZERO_ERROR; /* so we can test it later. */
pos = 0;
d = unum_parseDoubleCurrency(cur,
buf,
-1,
@ -1671,7 +1693,7 @@ their data!
} else {
log_verbose("unum_parseDoubleCurrency failed, value %.9f err %s, pos %d, currency [%s]\n", d, u_errorName(status), pos, acurrency);
}
unum_close(cur);
}
@ -1679,9 +1701,9 @@ static void TestTextAttributeCrash(void) {
UChar ubuffer[64] = {0x0049,0x004E,0x0052,0};
static const UChar expectedNeg[] = {0x0049,0x004E,0x0052,0x0031,0x0032,0x0033,0x0034,0x002E,0x0035,0};
static const UChar expectedPos[] = {0x0031,0x0032,0x0033,0x0034,0x002E,0x0035,0};
int32_t used;
int32_t used;
UErrorCode status = U_ZERO_ERROR;
UNumberFormat *nf = unum_open(UNUM_CURRENCY, NULL, 0, "en_US", NULL, &status);
UNumberFormat *nf = unum_open(UNUM_CURRENCY, NULL, 0, "en_US", NULL, &status);
if (U_FAILURE(status)) {
log_data_err("FAILED 1 -> %s (Are you missing data?)\n", u_errorName(status));
return;
@ -1697,7 +1719,7 @@ static void TestTextAttributeCrash(void) {
log_err("FAILED 2\n"); exit(1);
}
log_verbose("attempting to format...\n");
used = unum_formatDouble(nf, -1234.5, ubuffer, 64, NULL, &status);
used = unum_formatDouble(nf, -1234.5, ubuffer, 64, NULL, &status);
if (U_FAILURE(status) || 64 < used) {
log_err("Failed formatting %s\n", u_errorName(status));
return;
@ -1705,7 +1727,7 @@ static void TestTextAttributeCrash(void) {
if (u_strcmp(expectedNeg, ubuffer) == 0) {
log_err("Didn't get expected negative result\n");
}
used = unum_formatDouble(nf, 1234.5, ubuffer, 64, NULL, &status);
used = unum_formatDouble(nf, 1234.5, ubuffer, 64, NULL, &status);
if (U_FAILURE(status) || 64 < used) {
log_err("Failed formatting %s\n", u_errorName(status));
return;
@ -1722,7 +1744,7 @@ static void TestNBSPPatternRtNum(const char *testcase, UNumberFormat *nf, double
char tmpbuf[200];
double aNumber = -1.0;
unum_formatDouble(nf, myNumber, myString, 20, NULL, &status);
log_verbose("%s: formatted %.2f into %s\n", testcase, myNumber, u_austrcpy(tmpbuf, myString));
log_verbose("%s: formatted %.2f into %s\n", testcase, myNumber, u_austrcpy(tmpbuf, myString));
if(U_FAILURE(status)) {
log_err("%s: failed format of %.2g with %s\n", testcase, myNumber, u_errorName(status));
return;
@ -1748,8 +1770,8 @@ static void TestNBSPInPattern(void) {
UErrorCode status = U_ZERO_ERROR;
UNumberFormat* nf = NULL;
const char *testcase;
testcase="ar_AE UNUM_CURRENCY";
nf = unum_open(UNUM_CURRENCY, NULL, -1, "ar_AE", NULL, &status);
if(U_FAILURE(status) || nf == NULL) {
@ -1757,7 +1779,7 @@ static void TestNBSPInPattern(void) {
return;
}
TestNBSPPatternRT(testcase, nf);
/* if we don't have CLDR 1.6 data, bring out the problem anyways */
{
#define SPECIAL_PATTERN "\\u00A4\\u00A4'\\u062f.\\u0625.\\u200f\\u00a0'###0.00"
@ -1765,7 +1787,7 @@ static void TestNBSPInPattern(void) {
testcase = "ar_AE special pattern: " SPECIAL_PATTERN;
u_unescape(SPECIAL_PATTERN, pat, sizeof(pat)/sizeof(pat[0]));
unum_applyPattern(nf, FALSE, pat, -1, NULL, &status);
if(U_FAILURE(status)) {
if(U_FAILURE(status)) {
log_err("%s: unum_applyPattern failed with %s\n", testcase, u_errorName(status));
} else {
TestNBSPPatternRT(testcase, nf);
@ -1773,7 +1795,7 @@ static void TestNBSPInPattern(void) {
#undef SPECIAL_PATTERN
}
unum_close(nf); status = U_ZERO_ERROR;
testcase="ar_AE UNUM_DECIMAL";
nf = unum_open(UNUM_DECIMAL, NULL, -1, "ar_AE", NULL, &status);
if(U_FAILURE(status)) {
@ -1781,17 +1803,17 @@ static void TestNBSPInPattern(void) {
}
TestNBSPPatternRT(testcase, nf);
unum_close(nf); status = U_ZERO_ERROR;
testcase="ar_AE UNUM_PERCENT";
nf = unum_open(UNUM_PERCENT, NULL, -1, "ar_AE", NULL, &status);
if(U_FAILURE(status)) {
log_err("%s: unum_open failed with %s\n", testcase, u_errorName(status));
}
TestNBSPPatternRT(testcase, nf);
}
TestNBSPPatternRT(testcase, nf);
unum_close(nf); status = U_ZERO_ERROR;
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2010, International Business Machines Corporation and
* Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/********************************************************************************
@ -65,6 +65,10 @@ static void TestRBNFFormat(void);
**/
static void TestCurrencyRegression(void);
/**
* Test strict parsing of "0"
**/
static void TestParseZero(void);
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2010, International Business Machines Corporation and
* Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -362,6 +362,9 @@ void DateFormatRegressionTest::Test4060212(void)
errln((UnicodeString) "Fail: Got " + cal->get(UCAL_DAY_OF_YEAR, status) +
" Want 40");
#if 0
// this is an odd usage of "ddd" and it doesn't
// work now that date values are range checked per #3579.
logln("Using yyyy-ddd.hh:mm:ss");
delete formatter;
formatter = NULL;
@ -376,6 +379,7 @@ void DateFormatRegressionTest::Test4060212(void)
if ((cal->get(UCAL_DAY_OF_YEAR, status) != 40) || failure(status, "cal->get"))
errln((UnicodeString) "Fail: Got " + cal->get(UCAL_DAY_OF_YEAR, status) +
" Want 40");
#endif
delete formatter;
delete fmt;
@ -397,8 +401,8 @@ void DateFormatRegressionTest::Test4061287(void)
}
failure(status, "new SimpleDateFormat");
//try {
logln(UnicodeString("") + df->parse("35/01/1971", status));
failure(status, "df->parse");
logln(UnicodeString("") + df->parse("30/02/1971", status));
failure(status, "df->parse(\"30/02/1971\")");
//logln(df.parse("35/01/1971").toString());
//}
/*catch (ParseException e) {
@ -408,7 +412,7 @@ void DateFormatRegressionTest::Test4061287(void)
df->setLenient(FALSE);
UBool ok = FALSE;
//try {
logln(UnicodeString("") + df->parse("35/01/1971", status));
logln(UnicodeString("") + df->parse("30/02/1971", status));
if(U_FAILURE(status))
ok = TRUE;
//logln(df.parse("35/01/1971").toString());
@ -554,7 +558,7 @@ void DateFormatRegressionTest::Test4071441(void)
US locale a string formatted according to mm/dd/yy and parses it
correctly.
When given a string mm/dd/yyyy it only parses up to the first
When given a string mm/dd/yyyy [sic] it only parses up to the first
two y's, typically resulting in a date in the year 1919.
Please extend the parsing method(s) to handle strings with
@ -567,7 +571,7 @@ void DateFormatRegressionTest::Test4073003(void)
{
//try {
UErrorCode ec = U_ZERO_ERROR;
SimpleDateFormat fmt("dd/MM/yy", Locale::getUK(), ec);
SimpleDateFormat fmt("MM/dd/yy", Locale::getUK(), ec);
if (U_FAILURE(ec)) {
dataerrln("FAIL: SimpleDateFormat constructor - %s", u_errorName(ec));
return;

View file

@ -81,9 +81,10 @@ void DateFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &nam
TESTCASE(41,Test6880);
TESTCASE(42,TestISOEra);
TESTCASE(43,TestFormalChineseDate);
TESTCASE(44,TestNumberAsStringParsing);
/*
TESTCASE(43,TestRelativeError);
TESTCASE(44,TestRelativeOther);
TESTCASE(45,TestRelativeError);
TESTCASE(46,TestRelativeOther);
*/
default: name = ""; break;
}
@ -681,8 +682,9 @@ DateFormatTest::TestLetterDPattern212()
{
UErrorCode status = U_ZERO_ERROR;
UnicodeString dateString("1995-040.05:01:29");
UnicodeString ddateString("1995-02-09.05:01:29");
UnicodeString bigD("yyyy-DDD.hh:mm:ss");
UnicodeString littleD("yyyy-ddd.hh:mm:ss");
UnicodeString littleD("yyyy-MM-dd.hh:mm:ss");
UDate expLittleD = date(95, 0, 1, 5, 1, 29);
UDate expBigD = expLittleD + 39 * 24 * 3600000.0;
expLittleD = expBigD; // Expect the same, with default lenient parsing
@ -696,14 +698,14 @@ DateFormatTest::TestLetterDPattern212()
ParsePosition pos(0);
UDate myDate = formatter->parse(dateString, pos);
logln((UnicodeString)"Using " + bigD + " -> " + myDate);
if (myDate != expBigD) errln((UnicodeString)"FAIL: Expected " + dateToString(expBigD));
if (myDate != expBigD) errln((UnicodeString)"FAIL: bigD - Expected " + dateToString(expBigD));
delete formatter;
formatter = new SimpleDateFormat(littleD, status);
ASSERT_OK(status);
pos = ParsePosition(0);
myDate = formatter->parse(dateString, pos);
myDate = formatter->parse(ddateString, pos);
logln((UnicodeString)"Using " + littleD + " -> " + dateToString(myDate));
if (myDate != expLittleD) errln((UnicodeString)"FAIL: Expected " + dateToString(expLittleD));
if (myDate != expLittleD) errln((UnicodeString)"FAIL: littleD - Expected " + dateToString(expLittleD));
delete formatter;
if (U_FAILURE(status)) errln((UnicodeString)"FAIL: UErrorCode received during test: " + (int32_t)status);
}
@ -859,6 +861,8 @@ static const char* const parseFormats[] = {
"h:mm a MMMM d, yyyy"
};
#if 0
// strict inputStrings
static const char* const inputStrings[] = {
"bogus string", 0, 0, 0, 0, 0, 0, 0, 0, 0,
"April 1, 1997", "April 1, 1997", 0, 0, 0, 0, 0, "April 1", 0, 0,
@ -872,7 +876,23 @@ static const char* const inputStrings[] = {
"1", 0, 0, 0, 0, 0, 0, 0, "0001", 0,
"3:00 pm Jan 1, 1997", 0, 0, 0, 0, 0, 0, 0, "0003", "3:00 PM January 1, 1997",
};
#else
// lenient inputStrings
static const char* const inputStrings[] = {
"bogus string", 0, 0, 0, 0, 0, 0, 0, 0, 0,
"April 1, 1997", "April 1, 1997", "April 1 1997", "4/1/97", 0, 0, 0, "April 1", 0, 0,
"Jan 1, 1970", "January 1, 1970", "January 1 1970", "1/1/70", 0, 0, 0, "January 1", 0, 0,
"Jan 1 2037", "January 1, 2037", "January 1 2037", "1/1/37", 0, 0, 0, "January 1", 0, 0,
"1/1/70", "January 1, 1970", "January 1 1970", "1/1/70", "1 January, 1970", "1 January 1970", "1 January", "January 1", "0001", 0,
"5 May 1997", 0, 0, 0, "5 May, 1997", "5 May 1997", "5 May", 0, "0005", 0,
"16 May", 0, 0, 0, 0, 0, "16 May", 0, "2016", 0,
"April 30", 0, 0, 0, 0, 0, 0, "April 30", 0, 0,
"1998", 0, 0, 0, 0, 0, 0, 0, "1998", 0,
"1", 0, 0, 0, 0, 0, 0, 0, "0001", 0,
"3:00 pm Jan 1, 1997", 0, 0, 0, 0, 0, 0, 0, "0003", "3:00 PM January 1, 1997",
};
#endif
// -------------------------------------
/**
@ -935,9 +955,9 @@ DateFormatTest::TestBadInput135a()
((DateFormat*)dateParse)->format(date, result);
logln((UnicodeString)"Parsed \"" + s + "\" using \"" + dateParse->toPattern(thePat) + "\" to: " + result);
if (expected == 0)
errln((UnicodeString)"FAIL: Expected parse failure");
errln((UnicodeString)"FAIL: Expected parse failure, got " + result);
else if (!(result == expected))
errln(UnicodeString("FAIL: Expected ") + expected);
errln(UnicodeString("FAIL: Expected ") + expected + UnicodeString(", got ") + result);
}
}
else if (expected != 0) {
@ -1233,12 +1253,38 @@ void DateFormatTest::TestSpaceParsing() {
"yyyy MM dd HH:mm:ss",
// pattern, input, expected parse or NULL if expect parse failure
"MMMM d yy", " 04 05 06", NULL, // MMMM wants Apr/April
NULL, "04 05 06", NULL,
"MM d yy", " 04 05 06", "2006 04 05 00:00:00",
"MMMM d yy", " 04 05 06", "2006 04 05 00:00:00",
NULL, "04 05 06", "2006 04 05 00:00:00",
"MM d yy", " 04 05 06", "2006 04 05 00:00:00",
NULL, "04 05 06", "2006 04 05 00:00:00",
NULL, "04/05/06", "2006 04 05 00:00:00",
NULL, "04-05-06", "2006 04 05 00:00:00",
NULL, "04.05.06", "2006 04 05 00:00:00",
NULL, "04 / 05 / 06", "2006 04 05 00:00:00",
NULL, "Apr / 05/ 06", "2006 04 05 00:00:00",
NULL, "Apr-05-06", "2006 04 05 00:00:00",
NULL, "Apr 05, 2006", "2006 04 05 00:00:00",
"MMMM d yy", " Apr 05 06", "2006 04 05 00:00:00",
NULL, "Apr 05 06", "2006 04 05 00:00:00",
NULL, "Apr05 06", "2006 04 05 00:00:00",
"hh:mm:ss a", "12:34:56 PM", "1970 01 01 12:34:56",
NULL, "12:34:56PM", "1970 01 01 12:34:56",
NULL, "12.34.56PM", "1970 01 01 12:34:56",
NULL, "12-34-56 PM", "1970 01 01 12:34:56",
NULL, "12 : 34 : 56 PM", "1970 01 01 12:34:56",
"MM d yy 'at' hh:mm:ss a", "04/05/06 12:34:56 PM", "2006 04 05 12:34:56",
"MMMM dd yyyy hh:mm a", "September 27, 1964 21:56 PM", "1964 09 28 09:56:00",
NULL, "November 4, 2008 0:13 AM", "2008 11 04 00:13:00",
"HH'h'mm'min'ss's'", "12h34min56s", "1970 01 01 12:34:56",
NULL, "12h34mi56s", "1970 01 01 12:34:56",
NULL, "12h34m56s", "1970 01 01 12:34:56",
NULL, "12:34:56", "1970 01 01 12:34:56"
};
const int32_t DATA_len = sizeof(DATA)/sizeof(DATA[0]);
@ -2032,8 +2078,8 @@ void DateFormatTest::TestZTimeZoneParsing(void) {
pp.setIndex(0);
UDate d = univ.parse(tests[i].input, pp);
if(pp.getIndex() != tests[i].input.length()){
errln("setZoneString() did not succeed. Consumed: %i instead of %i",
pp.getIndex(), tests[i].input.length());
errln("Test %i: setZoneString() did not succeed. Consumed: %i instead of %i",
i, pp.getIndex(), tests[i].input.length());
return;
}
result.remove();
@ -3411,6 +3457,32 @@ void DateFormatTest::Test6880() {
delete fmt;
}
void DateFormatTest::TestNumberAsStringParsing()
{
UErrorCode status = U_ZERO_ERROR;
UnicodeString dateString("2009 7 2 08:14:16");
UnicodeString datePattern("y MMMM d HH:mm:ss");
SimpleDateFormat *formatter = new SimpleDateFormat(datePattern, Locale(""), status);
UDate date1 = 0;
formatter->setLenient(FALSE);
date1 = formatter->parse(dateString, status);
if (U_FAILURE(status)) {
errln("FAIL: Could not parse \"2009 7 2 08:14:16\" with pattern \"y MMMM d HH:mm:ss\"");
} else {
UnicodeString formatted;
formatter->format(date1, formatted);
if (formatted != dateString) {
errln("FAIL: parsed string did not match input.");
}
}
delete formatter;
}
void DateFormatTest::TestISOEra() {
const char* data[] = {

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2010, International Business Machines Corporation and
* Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -210,6 +210,12 @@ public:
void TestRelativeOther(void);
*/
public:
/**
* Test parsing a number as a string
*/
void TestNumberAsStringParsing(void);
private:
void TestRelative(int daysdelta,
const Locale& loc,

View file

@ -114,6 +114,7 @@ void NumberFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n
CASE(48,TestCurrencyFractionDigits);
CASE(49,TestExponentParse);
CASE(50,TestExplicitParents);
CASE(51,TestLenientParse);
default: name = ""; break;
}
}
@ -849,6 +850,234 @@ NumberFormatTest::TestParse(void)
// -------------------------------------
static const char *lenientAffixTestCases[] = {
"(1)",
"( 1)",
"(1 )",
"( 1 )"
};
static const char *lenientMinusTestCases[] = {
"-5",
"\\u22125",
"\\u20105"
};
static const char *lenientCurrencyTestCases[] = {
"$1,000",
"$ 1,000",
"$1000",
"$ 1000",
"$1 000.00",
"$ 1 000.00",
"$ 1\\u00A0000.00",
"1000.00"
};
static const char *lenientNegativeCurrencyTestCases[] = {
"($1,000)",
"($ 1,000)",
"($1000)",
"($ 1000)",
"($1 000.00)",
"($ 1 000.00)",
"( $ 1,000.00 )",
"($ 1\\u00A0000.00)",
"(1000.00)"
};
static const char *lenientPercentTestCases[] = {
"25%",
" 25%",
" 25 %",
"25 %",
"25\\u00A0%",
"25"
};
static const char *lenientNegativePercentTestCases[] = {
"-25%",
" -25%",
" - 25%",
"- 25 %",
" - 25 %",
"-25 %",
"-25\\u00A0%",
"-25",
"- 25"
};
static const char *strictFailureTestCases[] = {
" 1000",
"10,00",
"1,000,.0"
};
#define ARRAY_SIZE(array) ((int32_t) (sizeof (array) / sizeof(array[0])))
/**
* Test lenient parsing.
*/
void
NumberFormatTest::TestLenientParse(void)
{
UErrorCode status = U_ZERO_ERROR;
DecimalFormat *format = new DecimalFormat("(#,##0)", status);
Formattable n;
format->setLenient(TRUE);
for (int32_t t = 0; t < ARRAY_SIZE (lenientAffixTestCases); t += 1) {
UnicodeString testCase = ctou(lenientAffixTestCases[t]);
format->parse(testCase, n, status);
logln((UnicodeString)"parse(" + testCase + ") = " + n.getLong());
if (U_FAILURE(status) || n.getType() != Formattable::kLong ||
n.getLong() != 1) {
errln((UnicodeString)"Lenient parse failed for \"" + (UnicodeString) lenientAffixTestCases[t] + (UnicodeString) "\"");
status = U_ZERO_ERROR;
}
}
delete format;
Locale en_US("en_US");
Locale sv_SE("sv_SE");
NumberFormat *mFormat = NumberFormat::createInstance(sv_SE, UNUM_DECIMAL, status);
mFormat->setLenient(TRUE);
for (int32_t t = 0; t < ARRAY_SIZE(lenientMinusTestCases); t += 1) {
UnicodeString testCase = ctou(lenientMinusTestCases[t]);
mFormat->parse(testCase, n, status);
logln((UnicodeString)"parse(" + testCase + ") = " + n.getLong());
if (U_FAILURE(status) || n.getType() != Formattable::kLong || n.getLong() != -5) {
errln((UnicodeString)"Lenient parse failed for \"" + (UnicodeString) lenientMinusTestCases[t] + (UnicodeString) "\"");
status = U_ZERO_ERROR;
}
}
delete mFormat;
mFormat = NumberFormat::createInstance(en_US, UNUM_DECIMAL, status);
mFormat->setLenient(TRUE);
for (int32_t t = 0; t < ARRAY_SIZE(lenientMinusTestCases); t += 1) {
UnicodeString testCase = ctou(lenientMinusTestCases[t]);
mFormat->parse(testCase, n, status);
logln((UnicodeString)"parse(" + testCase + ") = " + n.getLong());
if (U_FAILURE(status) || n.getType() != Formattable::kLong || n.getLong() != -5) {
errln((UnicodeString)"Lenient parse failed for \"" + (UnicodeString) lenientMinusTestCases[t] + (UnicodeString) "\"");
status = U_ZERO_ERROR;
}
}
delete mFormat;
NumberFormat *cFormat = NumberFormat::createInstance(en_US, UNUM_CURRENCY, status);
cFormat->setLenient(TRUE);
for (int32_t t = 0; t < ARRAY_SIZE (lenientCurrencyTestCases); t += 1) {
UnicodeString testCase = ctou(lenientCurrencyTestCases[t]);
cFormat->parse(testCase, n, status);
logln((UnicodeString)"parse(" + testCase + ") = " + n.getLong());
if (U_FAILURE(status) ||n.getType() != Formattable::kLong ||
n.getLong() != 1000) {
errln((UnicodeString)"Lenient parse failed for \"" + (UnicodeString) lenientCurrencyTestCases[t] + (UnicodeString) "\"");
status = U_ZERO_ERROR;
}
}
for (int32_t t = 0; t < ARRAY_SIZE (lenientNegativeCurrencyTestCases); t += 1) {
UnicodeString testCase = ctou(lenientNegativeCurrencyTestCases[t]);
cFormat->parse(testCase, n, status);
logln((UnicodeString)"parse(" + testCase + ") = " + n.getLong());
if (U_FAILURE(status) ||n.getType() != Formattable::kLong ||
n.getLong() != -1000) {
errln((UnicodeString)"Lenient parse failed for \"" + (UnicodeString) lenientNegativeCurrencyTestCases[t] + (UnicodeString) "\"");
status = U_ZERO_ERROR;
}
}
delete cFormat;
NumberFormat *pFormat = NumberFormat::createPercentInstance(en_US, status);
pFormat->setLenient(TRUE);
for (int32_t t = 0; t < ARRAY_SIZE (lenientPercentTestCases); t += 1) {
UnicodeString testCase = ctou(lenientPercentTestCases[t]);
pFormat->parse(testCase, n, status);
logln((UnicodeString)"parse(" + testCase + ") = " + n.getDouble());
if (U_FAILURE(status) ||n.getType() != Formattable::kDouble ||
n.getDouble() != 0.25) {
errln((UnicodeString)"Lenient parse failed for \"" + (UnicodeString) lenientPercentTestCases[t] + (UnicodeString) "\"");
status = U_ZERO_ERROR;
}
}
for (int32_t t = 0; t < ARRAY_SIZE (lenientNegativePercentTestCases); t += 1) {
UnicodeString testCase = ctou(lenientNegativePercentTestCases[t]);
pFormat->parse(testCase, n, status);
logln((UnicodeString)"parse(" + testCase + ") = " + n.getDouble());
if (U_FAILURE(status) ||n.getType() != Formattable::kDouble ||
n.getDouble() != -0.25) {
errln((UnicodeString)"Lenient parse failed for \"" + (UnicodeString) lenientNegativePercentTestCases[t] + (UnicodeString) "\"");
status = U_ZERO_ERROR;
}
}
delete pFormat;
// Test cases that should fail with a strict parse and pass with a
// lenient parse.
NumberFormat *nFormat = NumberFormat::createInstance(en_US, status);
// first, make sure that they fail with a strict parse
for (int32_t t = 0; t < ARRAY_SIZE(strictFailureTestCases); t += 1) {
UnicodeString testCase = ctou(strictFailureTestCases[t]);
nFormat->parse(testCase, n, status);
logln((UnicodeString)"parse(" + testCase + ") = " + n.getLong());
if (! U_FAILURE(status)) {
errln((UnicodeString)"Strict Parse succeeded for \"" + (UnicodeString) strictFailureTestCases[t] + (UnicodeString) "\"");
}
status = U_ZERO_ERROR;
}
// then, make sure that they pass with a lenient parse
nFormat->setLenient(TRUE);
for (int32_t t = 0; t < ARRAY_SIZE(strictFailureTestCases); t += 1) {
UnicodeString testCase = ctou(strictFailureTestCases[t]);
nFormat->parse(testCase, n, status);
logln((UnicodeString)"parse(" + testCase + ") = " + n.getLong());
if (U_FAILURE(status) ||n.getType() != Formattable::kLong ||
n.getLong() != 1000) {
errln((UnicodeString)"Lenient parse failed for \"" + (UnicodeString) strictFailureTestCases[t] + (UnicodeString) "\"");
status = U_ZERO_ERROR;
}
}
delete nFormat;
}
// -------------------------------------
/**
* Test proper rounding by the format method.
*/
@ -2630,40 +2859,60 @@ void NumberFormatTest::TestNonpositiveMultiplier() {
//expect2(df, java.math.BigDecimal.valueOf(Long.MIN_VALUE), java.math.BigDecimal.valueOf(Long.MIN_VALUE).negate().toString());
}
typedef struct {
const char * stringToParse;
int parsedPos;
int errorIndex;
UBool lenient;
} TestSpaceParsingItem;
void
NumberFormatTest::TestSpaceParsing() {
// the data are:
// the string to be parsed, parsed position, parsed error index
const char* DATA[][3] = {
{"$124", "4", "-1"},
{"$124 $124", "4", "-1"},
{"$124 ", "4", "-1"},
//{"$ 124 ", "5", "-1"}, // TODO: need to handle space correctly
//{"$\\u00A0124 ", "5", "-1"}, // TODO: need to handle space correctly
{"$ 124 ", "0", "0"},
{"$\\u00A0124 ", "0", "0"},
{" $ 124 ", "0", "0"}, // TODO: need to handle space correctly
{"124$", "0", "3"}, // TODO: need to handle space correctly
// {"124 $", "5", "-1"}, TODO: OK or not, need currency spacing rule
{"124 $", "0", "3"},
const TestSpaceParsingItem DATA[] = {
// TOTO: Update the following TODOs, some may be handled now
{"$124", 4, -1, FALSE},
{"$124 $124", 4, -1, FALSE},
{"$124 ", 4, -1, FALSE},
//{"$ 124 ", 5, -1, FALSE}, // TODO: need to handle space correctly
//{"$\\u00A0124 ", 5, -1, FALSE}, // TODO: need to handle space correctly
{"$ 124 ", 0, 1, FALSE}, // errorIndex used to be 0, now 1 (better)
{"$\\u00A0124 ", 0, 1, FALSE}, // errorIndex used to be 0, now 1 (better)
{" $ 124 ", 0, 0, FALSE}, // TODO: need to handle space correctly
{"124$", 0, 3, FALSE}, // TODO: need to handle space correctly
// {"124 $", 5, -1, FALSE}, // TODO: OK or not, need currency spacing rule
{"124 $", 0, 3, FALSE},
{"$124", 4, -1, TRUE},
{"$124 $124", 4, -1, TRUE},
{"$124 ", 4, -1, TRUE},
{"$ 124 ", 5, -1, TRUE},
{"$\\u00A0124 ", 5, -1, TRUE},
{" $ 124 ", 6, -1, TRUE},
//{"124$", 4, -1, TRUE}, // TODO: need to handle trailing currency correctly
{"124$", 3, -1, TRUE},
//{"124 $", 5, -1, TRUE}, // TODO: OK or not, need currency spacing rule
{"124 $", 4, -1, TRUE},
};
UErrorCode status = U_ZERO_ERROR;
NumberFormat* foo = NumberFormat::createCurrencyInstance(status);
Locale locale("en_US");
NumberFormat* foo = NumberFormat::createCurrencyInstance(locale, status);
if (U_FAILURE(status)) {
delete foo;
return;
}
for (uint32_t i = 0; i < sizeof(DATA)/sizeof(DATA[0]); ++i) {
ParsePosition parsePosition(0);
UnicodeString stringToBeParsed = ctou(DATA[i][0]);
int parsedPosition = atoi(DATA[i][1]);
int errorIndex = atoi(DATA[i][2]);
UnicodeString stringToBeParsed = ctou(DATA[i].stringToParse);
int parsedPosition = DATA[i].parsedPos;
int errorIndex = DATA[i].errorIndex;
foo->setLenient(DATA[i].lenient);
Formattable result;
foo->parse(stringToBeParsed, result, parsePosition);
if (parsePosition.getIndex() != parsedPosition ||
parsePosition.getErrorIndex() != errorIndex) {
errln("FAILED parse " + stringToBeParsed + "; wrong position, expected: (" + parsedPosition + ", " + errorIndex + "); got (" + parsePosition.getIndex() + ", " + parsePosition.getErrorIndex() + ")");
errln("FAILED parse " + stringToBeParsed + "; lenient: " + DATA[i].lenient + "; wrong position, expected: (" + parsedPosition + ", " + errorIndex + "); got (" + parsePosition.getIndex() + ", " + parsePosition.getErrorIndex() + ")");
}
if (parsePosition.getErrorIndex() == -1 &&
result.getType() == Formattable::kLong &&
@ -6198,7 +6447,7 @@ void NumberFormatTest::TestExponentParse() {
// create format instance
status = U_ZERO_ERROR;
DecimalFormat fmt("#####", symbols, status);
if(U_FAILURE(status)) {
if(U_FAILURE(status)) {
errln((UnicodeString)"ERROR: Could not create DecimalFormat (pattern, symbols*)");
}

View file

@ -145,6 +145,8 @@ class NumberFormatTest: public CalendarTimeZoneTest {
void TestFormatAttributes();
void TestFieldPositionIterator();
void TestLenientParse();
void TestDecimal();
void TestCurrencyFractionDigits();