ICU-21340 Don't coalesce adjacent list formatter fields in ICU4C

See #1425
This commit is contained in:
Shane F. Carr 2020-10-22 07:02:22 +00:00 committed by Shane F. Carr
parent 87643423be
commit 86f00ad7fb
5 changed files with 127 additions and 47 deletions

View file

@ -117,6 +117,12 @@ private:
};
// Internal struct that must be exported for MSVC
struct U_I18N_API SpanInfo {
int32_t spanValue;
int32_t length;
};
// Export an explicit template instantiation of the MaybeStackArray that
// is used as a data member of CEBuffer.
//
@ -126,7 +132,7 @@ private:
// See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.
//
#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
template class U_I18N_API MaybeStackArray<int32_t, 8>;
template class U_I18N_API MaybeStackArray<SpanInfo, 8>;
#endif
/**
@ -162,13 +168,19 @@ public:
return fString;
}
void appendSpanIndex(int32_t index);
void prependSpanIndex(int32_t index);
/**
* Adds additional metadata used for span fields.
*
* spanValue: the index of the list item, for example.
* length: the length of the span, used to split adjacent fields.
*/
void appendSpanInfo(int32_t spanValue, int32_t length);
void prependSpanInfo(int32_t spanValue, int32_t length);
private:
FormattedStringBuilder fString;
FormattedStringBuilder::Field fNumericField;
MaybeStackArray<int32_t, 8> spanIndices;
MaybeStackArray<SpanInfo, 8> spanIndices;
bool nextPositionImpl(ConstrainedFieldPosition& cfpos, FormattedStringBuilder::Field numericField, UErrorCode& status) const;
static bool isIntOrGroup(FormattedStringBuilder::Field field);

View file

@ -46,19 +46,19 @@ Appendable& FormattedValueStringBuilderImpl::appendTo(Appendable& appendable, UE
UBool FormattedValueStringBuilderImpl::nextPosition(ConstrainedFieldPosition& cfpos, UErrorCode& status) const {
// NOTE: MSVC sometimes complains when implicitly converting between bool and UBool
return nextPositionImpl(cfpos, fNumericField, status) ? TRUE : FALSE;
return nextPositionImpl(cfpos, fNumericField, status) ? true : false;
}
UBool FormattedValueStringBuilderImpl::nextFieldPosition(FieldPosition& fp, UErrorCode& status) const {
int32_t rawField = fp.getField();
if (rawField == FieldPosition::DONT_CARE) {
return FALSE;
return false;
}
if (rawField < 0 || rawField >= UNUM_FIELD_COUNT) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
return false;
}
ConstrainedFieldPosition cfpos;
@ -67,7 +67,7 @@ UBool FormattedValueStringBuilderImpl::nextFieldPosition(FieldPosition& fp, UErr
if (nextPositionImpl(cfpos, kUndefinedField, status)) {
fp.setBeginIndex(cfpos.getStart());
fp.setEndIndex(cfpos.getLimit());
return TRUE;
return true;
}
// Special case: fraction should start after integer if fraction is not present
@ -85,7 +85,7 @@ UBool FormattedValueStringBuilderImpl::nextFieldPosition(FieldPosition& fp, UErr
fp.setEndIndex(i - fString.fZero);
}
return FALSE;
return false;
}
void FormattedValueStringBuilderImpl::getAllFieldPositions(FieldPositionIteratorHandler& fpih,
@ -103,23 +103,12 @@ static constexpr Field kEndField = Field(0xf, 0xf);
bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& cfpos, Field numericField, UErrorCode& /*status*/) const {
int32_t fieldStart = -1;
Field currField = kUndefinedField;
UFieldCategory spanCategory = UFIELD_CATEGORY_UNDEFINED;
int32_t spanValue;
for (int32_t i = fString.fZero + cfpos.getLimit(); i <= fString.fZero + fString.fLength; i++) {
Field _field = (i < fString.fZero + fString.fLength) ? fString.getFieldPtr()[i] : kEndField;
// Case 1: currently scanning a field.
if (currField != kUndefinedField) {
if (currField != _field) {
int32_t end = i - fString.fZero;
// Handle span fields; don't trim them
if (spanCategory != UFIELD_CATEGORY_UNDEFINED) {
cfpos.setState(
spanCategory,
spanValue,
fieldStart,
end);
return true;
}
// Grouping separators can be whitespace; don't throw them out!
if (isTrimmable(currField)) {
end = trimBack(i - fString.fZero);
@ -182,13 +171,11 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition&
if (elementField == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)
&& cfpos.matchesField(elementField.getCategory(), elementField.getField())
&& (cfpos.getLimit() < i - fString.fZero || cfpos.getCategory() != elementField.getCategory())) {
// Re-wind to the beginning of the field and then emit it
int32_t j = i - 1;
for (; j >= fString.fZero && fString.getFieldPtr()[j] == fString.getFieldPtr()[i-1]; j--) {}
int64_t si = cfpos.getInt64IterationContext() - 1;
cfpos.setState(
elementField.getCategory(),
elementField.getField(),
j - fString.fZero + 1,
i - fString.fZero - spanIndices[si].length,
i - fString.fZero);
return true;
}
@ -203,22 +190,28 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition&
}
// Case 3: check for field starting at this position
// Case 3a: Need to add a SpanField
if (_field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)
// don't return the same field twice in a row:
&& (i == fString.fZero
|| fString.getFieldPtr()[i-1].getCategory() != UFIELD_CATEGORY_LIST
|| fString.getFieldPtr()[i-1].getField() != ULISTFMT_ELEMENT_FIELD)) {
if (_field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) {
int64_t si = cfpos.getInt64IterationContext();
spanValue = spanIndices[si];
int32_t spanValue = spanIndices[si].spanValue;
int32_t length = spanIndices[si].length;
cfpos.setInt64IterationContext(si + 1);
if (cfpos.matchesField(UFIELD_CATEGORY_LIST_SPAN, spanValue)) {
spanCategory = UFIELD_CATEGORY_LIST_SPAN;
UFieldCategory spanCategory = UFIELD_CATEGORY_LIST_SPAN;
fieldStart = i - fString.fZero;
currField = _field;
int32_t end = fieldStart + length;
cfpos.setState(
spanCategory,
spanValue,
fieldStart,
end);
return true;
} else {
// Failed to match; jump ahead
i += length - 1;
continue;
}
}
// Case 3b: No SpanField or SpanField did not match
// Case 3b: No SpanField
if (cfpos.matchesField(_field.getCategory(), _field.getField())) {
fieldStart = i - fString.fZero;
currField = _field;
@ -229,21 +222,21 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition&
return false;
}
void FormattedValueStringBuilderImpl::appendSpanIndex(int32_t position) {
if (spanIndices.getCapacity() <= position) {
spanIndices.resize(position * 2);
void FormattedValueStringBuilderImpl::appendSpanInfo(int32_t spanValue, int32_t length) {
if (spanIndices.getCapacity() <= spanValue) {
spanIndices.resize(spanValue * 2);
}
spanIndices[position] = position;
spanIndices[spanValue] = {spanValue, length};
}
void FormattedValueStringBuilderImpl::prependSpanIndex(int32_t position) {
if (spanIndices.getCapacity() <= position) {
spanIndices.resize(position * 2);
void FormattedValueStringBuilderImpl::prependSpanInfo(int32_t spanValue, int32_t length) {
if (spanIndices.getCapacity() <= spanValue) {
spanIndices.resize(spanValue * 2);
}
for (int32_t i = 0; i < position; i++) {
for (int32_t i = spanValue - 1; i >= 0; i--) {
spanIndices[i+1] = spanIndices[i];
}
spanIndices[0] = position;
spanIndices[0] = {spanValue, length};
}
bool FormattedValueStringBuilderImpl::isIntOrGroup(Field field) {

View file

@ -567,7 +567,7 @@ public:
start,
{UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD},
status);
data->appendSpanIndex(0);
data->appendSpanInfo(0, start.length());
}
}
@ -603,7 +603,7 @@ public:
next,
{UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD},
status);
data->appendSpanIndex(position);
data->appendSpanInfo(position, next.length());
data->getStringRef().append(
temp.tempSubString(offsets[1]),
{UFIELD_CATEGORY_LIST, ULISTFMT_LITERAL_FIELD},
@ -622,7 +622,7 @@ public:
next,
{UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD},
status);
data->prependSpanIndex(position);
data->prependSpanInfo(position, next.length());
data->getStringRef().insert(
0,
temp.tempSubStringBetween(0, offsets[1]),

View file

@ -48,6 +48,7 @@ void ListFormatterTest::runIndexedTest(int32_t index, UBool exec,
TESTCASE_AUTO(TestBadStylesFail);
TESTCASE_AUTO(TestCreateStyled);
TESTCASE_AUTO(TestContextual);
TESTCASE_AUTO(TestNextPosition);
TESTCASE_AUTO_END;
}
@ -494,10 +495,10 @@ void ListFormatterTest::TestOutOfOrderPatterns() {
void ListFormatterTest::TestFormattedValue() {
IcuTestErrorCode status(*this, "TestFormattedValue");
LocalPointer<ListFormatter> fmt(ListFormatter::createInstance("en", status));
if (status.errIfFailureAndReset()) { return; }
{
LocalPointer<ListFormatter> fmt(ListFormatter::createInstance("en", status));
if (status.errIfFailureAndReset()) { return; }
const char16_t* message = u"Field position test 1";
const char16_t* expectedString = u"hello, wonderful, and world";
const UnicodeString inputs[] = {
@ -523,6 +524,33 @@ void ListFormatterTest::TestFormattedValue() {
expectedFieldPositions,
UPRV_LENGTHOF(expectedFieldPositions));
}
{
LocalPointer<ListFormatter> fmt(ListFormatter::createInstance("zh", ULISTFMT_TYPE_UNITS, ULISTFMT_WIDTH_SHORT, status));
if (status.errIfFailureAndReset()) { return; }
const char16_t* message = u"Field position test 2 (ICU-21340)";
const char16_t* expectedString = u"aabbbbbbbccc";
const UnicodeString inputs[] = {
u"aa",
u"bbbbbbb",
u"ccc"
};
FormattedList result = fmt->formatStringsToValue(inputs, UPRV_LENGTHOF(inputs), status);
static const UFieldPositionWithCategory expectedFieldPositions[] = {
// field, begin index, end index
{UFIELD_CATEGORY_LIST_SPAN, 0, 0, 2},
{UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD, 0, 2},
{UFIELD_CATEGORY_LIST_SPAN, 1, 2, 9},
{UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD, 2, 9},
{UFIELD_CATEGORY_LIST_SPAN, 2, 9, 12},
{UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD, 9, 12}};
checkMixedFormattedValue(
message,
result,
expectedString,
expectedFieldPositions,
UPRV_LENGTHOF(expectedFieldPositions));
}
}
void ListFormatterTest::DoTheRealListStyleTesting(Locale locale,
@ -700,4 +728,50 @@ void ListFormatterTest::TestContextual() {
}
}
void ListFormatterTest::TestNextPosition() {
IcuTestErrorCode status(*this, "TestNextPosition");
std::vector<std::string> locales = { "en", "es", "zh", "ja" };
UListFormatterWidth widths [] = {
ULISTFMT_WIDTH_WIDE, ULISTFMT_WIDTH_SHORT, ULISTFMT_WIDTH_NARROW
};
const char* widthStr [] = {"wide", "short", "narrow"};
UListFormatterType types [] = {
ULISTFMT_TYPE_AND, ULISTFMT_TYPE_OR, ULISTFMT_TYPE_UNITS
};
const char* typeStr [] = {"and", "or", "units"};
const UnicodeString inputs[] = { u"A1", u"B2", u"C3", u"D4" };
for (auto width : widths) {
for (auto type : types) {
for (auto locale : locales) {
LocalPointer<ListFormatter> fmt(
ListFormatter::createInstance(locale.c_str(), type, width, status),
status);
if (status.errIfFailureAndReset()) {
continue;
}
for (int32_t n = 1; n <= UPRV_LENGTHOF(inputs); n++) {
FormattedList result = fmt->formatStringsToValue(
inputs, n, status);
int32_t elements = 0;
icu::ConstrainedFieldPosition cfpos;
cfpos.constrainCategory(UFIELD_CATEGORY_LIST);
while (result.nextPosition(cfpos, status) && U_SUCCESS(status)) {
if (cfpos.getField() == ULISTFMT_ELEMENT_FIELD) {
elements++;
}
}
std::string msg = locale;
// Test that if there are n elements (n=1..4) in the input, then the
// nextPosition() should iterate through exactly n times
// with field == ULISTFMT_ELEMENT_FIELD.
assertEquals((msg
.append(" w=").append(widthStr[width])
.append(" t=").append(typeStr[type])).c_str(),
n, elements);
}
}
}
}
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -55,6 +55,7 @@ class ListFormatterTest : public IntlTestWithFieldPosition {
void TestBadStylesFail();
void TestCreateStyled();
void TestContextual();
void TestNextPosition();
private:
void CheckFormatting(