mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-18 19:25:49 +00:00
ICU-2119 rewrite parsing of padding to work property; redo and fix parsing of surrogates and variable-length elements
X-SVN-Rev: 11609
This commit is contained in:
parent
b8bac331e6
commit
4a553638ef
2 changed files with 67 additions and 29 deletions
|
@ -1056,29 +1056,33 @@ DecimalFormat::parse(const UnicodeString& text,
|
|||
Formattable& result,
|
||||
ParsePosition& parsePosition) const
|
||||
{
|
||||
int32_t backup = parsePosition.getIndex();
|
||||
int32_t i;
|
||||
int32_t padLen = fPad.length();
|
||||
int32_t backup;
|
||||
int32_t i = backup = parsePosition.getIndex();
|
||||
|
||||
// Skip padding characters, if any
|
||||
if (fFormatWidth > 0) {
|
||||
i = parsePosition.getIndex();
|
||||
while (i < text.length() && !text.compare(i, padLen, fPad, 0, padLen)) {
|
||||
i += padLen;
|
||||
}
|
||||
parsePosition.setIndex(i);
|
||||
// Handle NaN as a special case:
|
||||
|
||||
// Skip padding characters, if around prefix
|
||||
if (fFormatWidth > 0 && (fPadPosition == kPadBeforePrefix ||
|
||||
fPadPosition == kPadAfterPrefix)) {
|
||||
i = skipPadding(text, i);
|
||||
}
|
||||
|
||||
// special case NaN
|
||||
// If the text is composed of the representation of NaN, returns NaN.length
|
||||
const UnicodeString *nan = &getConstSymbol(DecimalFormatSymbols::kNaNSymbol);
|
||||
int32_t nanLen = (text.compare(parsePosition.getIndex(), nan->length(), *nan)
|
||||
? 0 : nan->length());
|
||||
int32_t nanLen = (text.compare(i, nan->length(), *nan)
|
||||
? 0 : nan->length());
|
||||
if (nanLen) {
|
||||
parsePosition.setIndex(parsePosition.getIndex() + nanLen);
|
||||
i += nanLen;
|
||||
if (fFormatWidth > 0 && (fPadPosition == kPadBeforeSuffix ||
|
||||
fPadPosition == kPadAfterSuffix)) {
|
||||
i = skipPadding(text, i);
|
||||
}
|
||||
parsePosition.setIndex(i);
|
||||
result.setDouble(uprv_getNaN());
|
||||
return;
|
||||
}
|
||||
|
||||
// NaN parse failed; start over
|
||||
i = backup;
|
||||
|
||||
// status is used to record whether a number is infinite.
|
||||
UBool status[fgStatusLength];
|
||||
|
@ -1088,13 +1092,6 @@ DecimalFormat::parse(const UnicodeString& text,
|
|||
parsePosition.setIndex(backup);
|
||||
return;
|
||||
}
|
||||
if (fFormatWidth < 0) {
|
||||
i = parsePosition.getIndex();
|
||||
while (i < text.length() && !text.compare(i, padLen, fPad, 0, padLen)) {
|
||||
i += padLen;
|
||||
}
|
||||
parsePosition.setIndex(i);
|
||||
}
|
||||
|
||||
// Handle infinity
|
||||
if (status[fgStatusInfinite]) {
|
||||
|
@ -1161,6 +1158,11 @@ UBool DecimalFormat::subparse(const UnicodeString& text, ParsePosition& parsePos
|
|||
int32_t position = parsePosition.getIndex();
|
||||
int32_t oldStart = position;
|
||||
|
||||
// Match padding before prefix
|
||||
if (fFormatWidth > 0 && fPadPosition == kPadBeforePrefix) {
|
||||
position = skipPadding(text, position);
|
||||
}
|
||||
|
||||
// Match positive and negative prefixes; prefer longest match.
|
||||
int32_t posMatch = compareAffix(fPositivePrefix, text, position);
|
||||
int32_t negMatch = compareAffix(fNegativePrefix, text, position);
|
||||
|
@ -1180,6 +1182,11 @@ UBool DecimalFormat::subparse(const UnicodeString& text, ParsePosition& parsePos
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
// Match padding before prefix
|
||||
if (fFormatWidth > 0 && fPadPosition == kPadAfterPrefix) {
|
||||
position = skipPadding(text, position);
|
||||
}
|
||||
|
||||
// process digits or Inf, find decimal position
|
||||
const UnicodeString *inf = &getConstSymbol(DecimalFormatSymbols::kInfinitySymbol);
|
||||
int32_t infLen = (text.compare(position, inf->length(), *inf)
|
||||
|
@ -1209,7 +1216,6 @@ UBool DecimalFormat::subparse(const UnicodeString& text, ParsePosition& parsePos
|
|||
UBool sawDecimal = FALSE;
|
||||
UBool sawDigit = FALSE;
|
||||
int32_t backup = -1;
|
||||
UChar32 ch;
|
||||
int32_t digit;
|
||||
int32_t textLength = text.length(); // One less pointer to follow
|
||||
int32_t groupingLen = grouping->length();
|
||||
|
@ -1219,9 +1225,9 @@ UBool DecimalFormat::subparse(const UnicodeString& text, ParsePosition& parsePos
|
|||
// pin when the maximum allowable digits is reached.
|
||||
int32_t digitCount = 0;
|
||||
|
||||
for (; position < textLength; position += 1 + UTF_NEED_MULTIPLE_UCHAR(ch))
|
||||
for (; position < textLength; )
|
||||
{
|
||||
ch = text.char32At(position);
|
||||
UChar32 ch = text.char32At(position);
|
||||
|
||||
/* We recognize all digit ranges, not only the Latin digit range
|
||||
* '0'..'9'. We do so by using the Character.digit() method,
|
||||
|
@ -1249,6 +1255,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text, ParsePosition& parsePos
|
|||
// output a regular non-zero digit.
|
||||
++digitCount;
|
||||
digits.append((char)(digit + '0'));
|
||||
position += U16_LENGTH(ch);
|
||||
}
|
||||
else if (digit == 0)
|
||||
{
|
||||
|
@ -1271,6 +1278,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text, ParsePosition& parsePos
|
|||
--digits.fDecimalAt;
|
||||
}
|
||||
// else ignore leading zeros in integer part of number.
|
||||
position += U16_LENGTH(ch);
|
||||
}
|
||||
else if (!text.compare(position, groupingLen, *grouping) && isGroupingUsed())
|
||||
{
|
||||
|
@ -1278,6 +1286,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text, ParsePosition& parsePos
|
|||
// that they be followed by a digit. Otherwise we backup and
|
||||
// reprocess them.
|
||||
backup = position;
|
||||
position += groupingLen;
|
||||
}
|
||||
else if (!text.compare(position, decimalLen, *decimal) && !isParseIntegerOnly() && !sawDecimal)
|
||||
{
|
||||
|
@ -1286,6 +1295,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text, ParsePosition& parsePos
|
|||
|
||||
digits.fDecimalAt = digitCount; // Not digits.fCount!
|
||||
sawDecimal = TRUE;
|
||||
position += decimalLen;
|
||||
}
|
||||
else {
|
||||
const UnicodeString *tmp;
|
||||
|
@ -1293,7 +1303,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text, ParsePosition& parsePos
|
|||
if (!text.caseCompare(position, tmp->length(), *tmp, U_FOLD_CASE_DEFAULT)) // error code is set below if !sawDigit
|
||||
{
|
||||
// Parse sign, if present
|
||||
int32_t pos = position + 1; // position + exponentSep.length();
|
||||
int32_t pos = position + tmp->length();
|
||||
DigitList exponentDigits;
|
||||
|
||||
if (pos < textLength)
|
||||
|
@ -1301,13 +1311,13 @@ UBool DecimalFormat::subparse(const UnicodeString& text, ParsePosition& parsePos
|
|||
tmp = &getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
|
||||
if (!text.compare(pos, tmp->length(), *tmp))
|
||||
{
|
||||
++pos;
|
||||
pos += tmp->length();
|
||||
}
|
||||
else {
|
||||
tmp = &getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
|
||||
if (!text.compare(pos, tmp->length(), *tmp))
|
||||
{
|
||||
++pos;
|
||||
pos += tmp->length();
|
||||
exponentDigits.fIsPositive = FALSE;
|
||||
}
|
||||
}
|
||||
|
@ -1364,6 +1374,11 @@ UBool DecimalFormat::subparse(const UnicodeString& text, ParsePosition& parsePos
|
|||
}
|
||||
}
|
||||
|
||||
// Match padding before suffix
|
||||
if (fFormatWidth > 0 && fPadPosition == kPadBeforeSuffix) {
|
||||
position = skipPadding(text, position);
|
||||
}
|
||||
|
||||
// Match positive and negative suffixes; prefer longest match.
|
||||
if (posMatch >= 0) {
|
||||
posMatch = compareAffix(fPositiveSuffix, text, position);
|
||||
|
@ -1385,7 +1400,14 @@ UBool DecimalFormat::subparse(const UnicodeString& text, ParsePosition& parsePos
|
|||
return FALSE;
|
||||
}
|
||||
|
||||
parsePosition.setIndex(position + (posMatch>=0 ? posMatch : negMatch));
|
||||
position += (posMatch>=0 ? posMatch : negMatch);
|
||||
|
||||
// Match padding before suffix
|
||||
if (fFormatWidth > 0 && fPadPosition == kPadAfterSuffix) {
|
||||
position = skipPadding(text, position);
|
||||
}
|
||||
|
||||
parsePosition.setIndex(position);
|
||||
|
||||
digits.fIsPositive = (posMatch >= 0);
|
||||
|
||||
|
@ -1397,6 +1419,20 @@ UBool DecimalFormat::subparse(const UnicodeString& text, ParsePosition& parsePos
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Starting at position, advance past a run of pad characters, if any.
|
||||
* Return the index of the first character after position that is not a pad
|
||||
* character. Result is >= position.
|
||||
*/
|
||||
int32_t DecimalFormat::skipPadding(const UnicodeString& text, int32_t position) const {
|
||||
int32_t padLen = fPad.length();
|
||||
while (position < text.length() &&
|
||||
text.compare(position, padLen, fPad) == 0) {
|
||||
position += padLen;
|
||||
}
|
||||
return position;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the length matched by the given affix, or -1 if none.
|
||||
* Runs of white space in the affix, match runs of white space in
|
||||
|
|
|
@ -1187,6 +1187,8 @@ private:
|
|||
UBool subparse(const UnicodeString& text, ParsePosition& parsePosition,
|
||||
DigitList& digits, UBool* status) const;
|
||||
|
||||
int32_t skipPadding(const UnicodeString& text, int32_t position) const;
|
||||
|
||||
static int32_t compareAffix(const UnicodeString& affix,
|
||||
const UnicodeString& input,
|
||||
int32_t pos);
|
||||
|
|
Loading…
Add table
Reference in a new issue