ICU-13391 Change ICU4C parsing to count digits instead of UTF-16 code units for grouping sizes.

X-SVN-Rev: 40573
This commit is contained in:
Shane Carr 2017-10-05 21:41:46 +00:00
parent 0b5b91a70b
commit e6987fbfd8
8 changed files with 64 additions and 17 deletions

View file

@ -1423,8 +1423,8 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
UBool strictFail = FALSE; // did we exit with a strict parse failure?
int32_t lastGroup = -1; // where did we last see a grouping separator?
int32_t digitStart = position;
int32_t lastGroup = -1; // after which digit index did we last see a grouping separator?
int32_t currGroup = -1; // for temporary storage the digit index of the current grouping separator
int32_t gs2 = fImpl->fEffGrouping.fGrouping2 == 0 ? fImpl->fEffGrouping.fGrouping : fImpl->fEffGrouping.fGrouping2;
const UnicodeString *decimalString;
@ -1513,16 +1513,17 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
// before that, the group must == the secondary group
// length, else it can be <= the the secondary group
// length.
if ((lastGroup != -1 && backup - lastGroup - 1 != gs2) ||
(lastGroup == -1 && position - digitStart - 1 > gs2)) {
if ((lastGroup != -1 && currGroup - lastGroup != gs2) ||
(lastGroup == -1 && digitCount - 1 > gs2)) {
strictFail = TRUE;
break;
}
lastGroup = backup;
lastGroup = currGroup;
}
// Cancel out backup setting (see grouping handler below)
currGroup = -1;
backup = -1;
sawDigit = TRUE;
@ -1561,6 +1562,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
// Ignore grouping characters, if we are using them, but require
// that they be followed by a digit. Otherwise we backup and
// reprocess them.
currGroup = digitCount;
backup = position;
position += groupingStringLength;
sawGrouping=TRUE;
@ -1571,7 +1573,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
{
if (strictParse) {
if (backup != -1 ||
(lastGroup != -1 && position - lastGroup != fImpl->fEffGrouping.fGrouping + 1)) {
(lastGroup != -1 && digitCount - lastGroup != fImpl->fEffGrouping.fGrouping)) {
strictFail = TRUE;
break;
}
@ -1622,7 +1624,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
UBool sawExponentDigit = FALSE;
while (pos < textLength) {
ch = text[(int32_t)pos];
ch = text.char32At(pos);
digit = ch - zero;
if (digit < 0 || digit > 9) {
@ -1634,7 +1636,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
parsedNum.append(exponentSign, err);
sawExponentDigit = TRUE;
}
++pos;
pos += U16_LENGTH(ch);
parsedNum.append((char)(digit + '0'), err);
} else {
break;
@ -1673,7 +1675,7 @@ UBool DecimalFormat::subparse(const UnicodeString& text,
}
if (strictParse && !sawDecimal) {
if (lastGroup != -1 && position - lastGroup != fImpl->fEffGrouping.fGrouping + 1) {
if (lastGroup != -1 && digitCount - lastGroup != fImpl->fEffGrouping.fGrouping) {
strictFail = TRUE;
}
}

View file

@ -123,9 +123,6 @@ NumberFormatRoundTripTest::start()
logln("Quick mode: only testing first 5 Locales");
}
for(int i = 0; i < locCount; ++i) {
if (uprv_strcmp(loc[i].getLanguage(),"ccp")==0 && logKnownIssue("13391", "Skip handling ccp until NumberFormat parsing is fixed")) {
continue;
}
UnicodeString name;
logln(loc[i].getDisplayName(name));

View file

@ -622,6 +622,7 @@ void NumberFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n
TESTCASE_AUTO(Test11640_getAffixes);
TESTCASE_AUTO(Test11649_toPatternWithMultiCurrency);
TESTCASE_AUTO(Test13327_numberingSystemBufferOverflow);
TESTCASE_AUTO(Test13391_chakmaParsing);
TESTCASE_AUTO_END;
}
@ -8807,6 +8808,36 @@ void NumberFormatTest::Test13327_numberingSystemBufferOverflow() {
}
}
void NumberFormatTest::Test13391_chakmaParsing() {
UErrorCode status = U_ZERO_ERROR;
LocalPointer<DecimalFormat> df(static_cast<DecimalFormat*>(
NumberFormat::createInstance(Locale("ccp"), status)));
const UChar* expected = u"\U00011137\U00011138,\U00011139\U0001113A\U0001113B";
UnicodeString actual;
df->format(12345, actual, status);
assertSuccess("Should not fail when formatting in ccp", status);
assertEquals("Should produce expected output in ccp", expected, actual);
Formattable result;
df->parse(expected, result, status);
assertSuccess("Should not fail when parsing in ccp", status);
assertEquals("Should parse to 12345 in ccp", 12345, result);
const UChar* expectedScientific = u"\U00011137.\U00011139E\U00011138";
UnicodeString actualScientific;
df.adoptInstead(static_cast<DecimalFormat*>(
NumberFormat::createScientificInstance(Locale("ccp"), status)));
df->format(130, actualScientific, status);
assertSuccess("Should not fail when formatting scientific in ccp", status);
assertEquals("Should produce expected scientific output in ccp",
expectedScientific, actualScientific);
Formattable resultScientific;
df->parse(expectedScientific, resultScientific, status);
assertSuccess("Should not fail when parsing scientific in ccp", status);
assertEquals("Should parse scientific to 130 in ccp", 130, resultScientific);
}
void NumberFormatTest::verifyFieldPositionIterator(
NumberFormatTest_Attributes *expected, FieldPositionIterator &iter) {

View file

@ -216,6 +216,7 @@ class NumberFormatTest: public CalendarTimeZoneTest {
void Test11640_getAffixes();
void Test11649_toPatternWithMultiCurrency();
void Test13327_numberingSystemBufferOverflow();
void Test13391_chakmaParsing();
void checkExceptionIssue11735();

View file

@ -442,9 +442,6 @@ void IntlTestNumberFormat::monsterTest(/* char* par */)
}
for (int32_t i=0; i<count; ++i)
{
if (uprv_strcmp(locales[i].getLanguage(),"ccp")==0 && logKnownIssue("13391", "Skip handling ccp until NumberFormat parsing is fixed")) {
continue;
}
UnicodeString name(locales[i].getName(), "");
logln(SEP);
testLocale(/* par, */locales[i], name);

View file

@ -839,7 +839,7 @@ parse output breaks
(63,425) -63425
// JDK and S allow separators in sci notation and parses as -342.5
// C passes
(63,425E-1) fail KS
(63,425E-1) fail CKS
// Both prefix and suffix needed for strict.
// JDK accepts this and parses as -342.5
(3425E-1 fail K

View file

@ -839,7 +839,7 @@ parse output breaks
(63,425) -63425
// JDK and S allow separators in sci notation and parses as -342.5
// C passes
(63,425E-1) fail KS
(63,425E-1) fail CKS
// Both prefix and suffix needed for strict.
// JDK accepts this and parses as -342.5
(3425E-1 fail K

View file

@ -5266,6 +5266,25 @@ public class NumberFormatTest extends TestFmwk {
new DecimalFormat("000000000.0#E0").format(10000000.76d));
}
@Test
public void Test13391() throws ParseException {
DecimalFormat df = (DecimalFormat) NumberFormat.getInstance(new ULocale("ccp"));
df.setParseStrict(true);
String expected = "\uD804\uDD37\uD804\uDD38,\uD804\uDD39\uD804\uDD3A\uD804\uDD3B";
assertEquals("Should produce expected output in ccp", expected, df.format(12345));
Number result = df.parse(expected);
assertEquals("Should parse to 12345 in ccp", 12345, result.longValue());
df = (DecimalFormat) NumberFormat.getScientificInstance(new ULocale("ccp"));
df.setParseStrict(true);
String expectedScientific = "\uD804\uDD37.\uD804\uDD39E\uD804\uDD38";
assertEquals("Should produce expected scientific output in ccp",
expectedScientific, df.format(130));
Number resultScientific = df.parse(expectedScientific);
assertEquals("Should parse scientific to 130 in ccp",
130, resultScientific.longValue());
}
@Test
public void testPercentZero() {
DecimalFormat df = (DecimalFormat) NumberFormat.getPercentInstance();