mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 06:25:30 +00:00
ICU-21340 Don't coalesce adjacent list formatter fields in ICU4C
See #1425
This commit is contained in:
parent
87643423be
commit
86f00ad7fb
5 changed files with 127 additions and 47 deletions
|
@ -117,6 +117,12 @@ private:
|
|||
};
|
||||
|
||||
|
||||
// Internal struct that must be exported for MSVC
|
||||
struct U_I18N_API SpanInfo {
|
||||
int32_t spanValue;
|
||||
int32_t length;
|
||||
};
|
||||
|
||||
// Export an explicit template instantiation of the MaybeStackArray that
|
||||
// is used as a data member of CEBuffer.
|
||||
//
|
||||
|
@ -126,7 +132,7 @@ private:
|
|||
// See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.
|
||||
//
|
||||
#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
|
||||
template class U_I18N_API MaybeStackArray<int32_t, 8>;
|
||||
template class U_I18N_API MaybeStackArray<SpanInfo, 8>;
|
||||
#endif
|
||||
|
||||
/**
|
||||
|
@ -162,13 +168,19 @@ public:
|
|||
return fString;
|
||||
}
|
||||
|
||||
void appendSpanIndex(int32_t index);
|
||||
void prependSpanIndex(int32_t index);
|
||||
/**
|
||||
* Adds additional metadata used for span fields.
|
||||
*
|
||||
* spanValue: the index of the list item, for example.
|
||||
* length: the length of the span, used to split adjacent fields.
|
||||
*/
|
||||
void appendSpanInfo(int32_t spanValue, int32_t length);
|
||||
void prependSpanInfo(int32_t spanValue, int32_t length);
|
||||
|
||||
private:
|
||||
FormattedStringBuilder fString;
|
||||
FormattedStringBuilder::Field fNumericField;
|
||||
MaybeStackArray<int32_t, 8> spanIndices;
|
||||
MaybeStackArray<SpanInfo, 8> spanIndices;
|
||||
|
||||
bool nextPositionImpl(ConstrainedFieldPosition& cfpos, FormattedStringBuilder::Field numericField, UErrorCode& status) const;
|
||||
static bool isIntOrGroup(FormattedStringBuilder::Field field);
|
||||
|
|
|
@ -46,19 +46,19 @@ Appendable& FormattedValueStringBuilderImpl::appendTo(Appendable& appendable, UE
|
|||
|
||||
UBool FormattedValueStringBuilderImpl::nextPosition(ConstrainedFieldPosition& cfpos, UErrorCode& status) const {
|
||||
// NOTE: MSVC sometimes complains when implicitly converting between bool and UBool
|
||||
return nextPositionImpl(cfpos, fNumericField, status) ? TRUE : FALSE;
|
||||
return nextPositionImpl(cfpos, fNumericField, status) ? true : false;
|
||||
}
|
||||
|
||||
UBool FormattedValueStringBuilderImpl::nextFieldPosition(FieldPosition& fp, UErrorCode& status) const {
|
||||
int32_t rawField = fp.getField();
|
||||
|
||||
if (rawField == FieldPosition::DONT_CARE) {
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (rawField < 0 || rawField >= UNUM_FIELD_COUNT) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
|
||||
ConstrainedFieldPosition cfpos;
|
||||
|
@ -67,7 +67,7 @@ UBool FormattedValueStringBuilderImpl::nextFieldPosition(FieldPosition& fp, UErr
|
|||
if (nextPositionImpl(cfpos, kUndefinedField, status)) {
|
||||
fp.setBeginIndex(cfpos.getStart());
|
||||
fp.setEndIndex(cfpos.getLimit());
|
||||
return TRUE;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Special case: fraction should start after integer if fraction is not present
|
||||
|
@ -85,7 +85,7 @@ UBool FormattedValueStringBuilderImpl::nextFieldPosition(FieldPosition& fp, UErr
|
|||
fp.setEndIndex(i - fString.fZero);
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
return false;
|
||||
}
|
||||
|
||||
void FormattedValueStringBuilderImpl::getAllFieldPositions(FieldPositionIteratorHandler& fpih,
|
||||
|
@ -103,23 +103,12 @@ static constexpr Field kEndField = Field(0xf, 0xf);
|
|||
bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& cfpos, Field numericField, UErrorCode& /*status*/) const {
|
||||
int32_t fieldStart = -1;
|
||||
Field currField = kUndefinedField;
|
||||
UFieldCategory spanCategory = UFIELD_CATEGORY_UNDEFINED;
|
||||
int32_t spanValue;
|
||||
for (int32_t i = fString.fZero + cfpos.getLimit(); i <= fString.fZero + fString.fLength; i++) {
|
||||
Field _field = (i < fString.fZero + fString.fLength) ? fString.getFieldPtr()[i] : kEndField;
|
||||
// Case 1: currently scanning a field.
|
||||
if (currField != kUndefinedField) {
|
||||
if (currField != _field) {
|
||||
int32_t end = i - fString.fZero;
|
||||
// Handle span fields; don't trim them
|
||||
if (spanCategory != UFIELD_CATEGORY_UNDEFINED) {
|
||||
cfpos.setState(
|
||||
spanCategory,
|
||||
spanValue,
|
||||
fieldStart,
|
||||
end);
|
||||
return true;
|
||||
}
|
||||
// Grouping separators can be whitespace; don't throw them out!
|
||||
if (isTrimmable(currField)) {
|
||||
end = trimBack(i - fString.fZero);
|
||||
|
@ -182,13 +171,11 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition&
|
|||
if (elementField == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)
|
||||
&& cfpos.matchesField(elementField.getCategory(), elementField.getField())
|
||||
&& (cfpos.getLimit() < i - fString.fZero || cfpos.getCategory() != elementField.getCategory())) {
|
||||
// Re-wind to the beginning of the field and then emit it
|
||||
int32_t j = i - 1;
|
||||
for (; j >= fString.fZero && fString.getFieldPtr()[j] == fString.getFieldPtr()[i-1]; j--) {}
|
||||
int64_t si = cfpos.getInt64IterationContext() - 1;
|
||||
cfpos.setState(
|
||||
elementField.getCategory(),
|
||||
elementField.getField(),
|
||||
j - fString.fZero + 1,
|
||||
i - fString.fZero - spanIndices[si].length,
|
||||
i - fString.fZero);
|
||||
return true;
|
||||
}
|
||||
|
@ -203,22 +190,28 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition&
|
|||
}
|
||||
// Case 3: check for field starting at this position
|
||||
// Case 3a: Need to add a SpanField
|
||||
if (_field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)
|
||||
// don't return the same field twice in a row:
|
||||
&& (i == fString.fZero
|
||||
|| fString.getFieldPtr()[i-1].getCategory() != UFIELD_CATEGORY_LIST
|
||||
|| fString.getFieldPtr()[i-1].getField() != ULISTFMT_ELEMENT_FIELD)) {
|
||||
if (_field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) {
|
||||
int64_t si = cfpos.getInt64IterationContext();
|
||||
spanValue = spanIndices[si];
|
||||
int32_t spanValue = spanIndices[si].spanValue;
|
||||
int32_t length = spanIndices[si].length;
|
||||
cfpos.setInt64IterationContext(si + 1);
|
||||
if (cfpos.matchesField(UFIELD_CATEGORY_LIST_SPAN, spanValue)) {
|
||||
spanCategory = UFIELD_CATEGORY_LIST_SPAN;
|
||||
UFieldCategory spanCategory = UFIELD_CATEGORY_LIST_SPAN;
|
||||
fieldStart = i - fString.fZero;
|
||||
currField = _field;
|
||||
int32_t end = fieldStart + length;
|
||||
cfpos.setState(
|
||||
spanCategory,
|
||||
spanValue,
|
||||
fieldStart,
|
||||
end);
|
||||
return true;
|
||||
} else {
|
||||
// Failed to match; jump ahead
|
||||
i += length - 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// Case 3b: No SpanField or SpanField did not match
|
||||
// Case 3b: No SpanField
|
||||
if (cfpos.matchesField(_field.getCategory(), _field.getField())) {
|
||||
fieldStart = i - fString.fZero;
|
||||
currField = _field;
|
||||
|
@ -229,21 +222,21 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition&
|
|||
return false;
|
||||
}
|
||||
|
||||
void FormattedValueStringBuilderImpl::appendSpanIndex(int32_t position) {
|
||||
if (spanIndices.getCapacity() <= position) {
|
||||
spanIndices.resize(position * 2);
|
||||
void FormattedValueStringBuilderImpl::appendSpanInfo(int32_t spanValue, int32_t length) {
|
||||
if (spanIndices.getCapacity() <= spanValue) {
|
||||
spanIndices.resize(spanValue * 2);
|
||||
}
|
||||
spanIndices[position] = position;
|
||||
spanIndices[spanValue] = {spanValue, length};
|
||||
}
|
||||
|
||||
void FormattedValueStringBuilderImpl::prependSpanIndex(int32_t position) {
|
||||
if (spanIndices.getCapacity() <= position) {
|
||||
spanIndices.resize(position * 2);
|
||||
void FormattedValueStringBuilderImpl::prependSpanInfo(int32_t spanValue, int32_t length) {
|
||||
if (spanIndices.getCapacity() <= spanValue) {
|
||||
spanIndices.resize(spanValue * 2);
|
||||
}
|
||||
for (int32_t i = 0; i < position; i++) {
|
||||
for (int32_t i = spanValue - 1; i >= 0; i--) {
|
||||
spanIndices[i+1] = spanIndices[i];
|
||||
}
|
||||
spanIndices[0] = position;
|
||||
spanIndices[0] = {spanValue, length};
|
||||
}
|
||||
|
||||
bool FormattedValueStringBuilderImpl::isIntOrGroup(Field field) {
|
||||
|
|
|
@ -567,7 +567,7 @@ public:
|
|||
start,
|
||||
{UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD},
|
||||
status);
|
||||
data->appendSpanIndex(0);
|
||||
data->appendSpanInfo(0, start.length());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -603,7 +603,7 @@ public:
|
|||
next,
|
||||
{UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD},
|
||||
status);
|
||||
data->appendSpanIndex(position);
|
||||
data->appendSpanInfo(position, next.length());
|
||||
data->getStringRef().append(
|
||||
temp.tempSubString(offsets[1]),
|
||||
{UFIELD_CATEGORY_LIST, ULISTFMT_LITERAL_FIELD},
|
||||
|
@ -622,7 +622,7 @@ public:
|
|||
next,
|
||||
{UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD},
|
||||
status);
|
||||
data->prependSpanIndex(position);
|
||||
data->prependSpanInfo(position, next.length());
|
||||
data->getStringRef().insert(
|
||||
0,
|
||||
temp.tempSubStringBetween(0, offsets[1]),
|
||||
|
|
|
@ -48,6 +48,7 @@ void ListFormatterTest::runIndexedTest(int32_t index, UBool exec,
|
|||
TESTCASE_AUTO(TestBadStylesFail);
|
||||
TESTCASE_AUTO(TestCreateStyled);
|
||||
TESTCASE_AUTO(TestContextual);
|
||||
TESTCASE_AUTO(TestNextPosition);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
|
@ -494,10 +495,10 @@ void ListFormatterTest::TestOutOfOrderPatterns() {
|
|||
|
||||
void ListFormatterTest::TestFormattedValue() {
|
||||
IcuTestErrorCode status(*this, "TestFormattedValue");
|
||||
LocalPointer<ListFormatter> fmt(ListFormatter::createInstance("en", status));
|
||||
if (status.errIfFailureAndReset()) { return; }
|
||||
|
||||
{
|
||||
LocalPointer<ListFormatter> fmt(ListFormatter::createInstance("en", status));
|
||||
if (status.errIfFailureAndReset()) { return; }
|
||||
const char16_t* message = u"Field position test 1";
|
||||
const char16_t* expectedString = u"hello, wonderful, and world";
|
||||
const UnicodeString inputs[] = {
|
||||
|
@ -523,6 +524,33 @@ void ListFormatterTest::TestFormattedValue() {
|
|||
expectedFieldPositions,
|
||||
UPRV_LENGTHOF(expectedFieldPositions));
|
||||
}
|
||||
|
||||
{
|
||||
LocalPointer<ListFormatter> fmt(ListFormatter::createInstance("zh", ULISTFMT_TYPE_UNITS, ULISTFMT_WIDTH_SHORT, status));
|
||||
if (status.errIfFailureAndReset()) { return; }
|
||||
const char16_t* message = u"Field position test 2 (ICU-21340)";
|
||||
const char16_t* expectedString = u"aabbbbbbbccc";
|
||||
const UnicodeString inputs[] = {
|
||||
u"aa",
|
||||
u"bbbbbbb",
|
||||
u"ccc"
|
||||
};
|
||||
FormattedList result = fmt->formatStringsToValue(inputs, UPRV_LENGTHOF(inputs), status);
|
||||
static const UFieldPositionWithCategory expectedFieldPositions[] = {
|
||||
// field, begin index, end index
|
||||
{UFIELD_CATEGORY_LIST_SPAN, 0, 0, 2},
|
||||
{UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD, 0, 2},
|
||||
{UFIELD_CATEGORY_LIST_SPAN, 1, 2, 9},
|
||||
{UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD, 2, 9},
|
||||
{UFIELD_CATEGORY_LIST_SPAN, 2, 9, 12},
|
||||
{UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD, 9, 12}};
|
||||
checkMixedFormattedValue(
|
||||
message,
|
||||
result,
|
||||
expectedString,
|
||||
expectedFieldPositions,
|
||||
UPRV_LENGTHOF(expectedFieldPositions));
|
||||
}
|
||||
}
|
||||
|
||||
void ListFormatterTest::DoTheRealListStyleTesting(Locale locale,
|
||||
|
@ -700,4 +728,50 @@ void ListFormatterTest::TestContextual() {
|
|||
}
|
||||
}
|
||||
|
||||
void ListFormatterTest::TestNextPosition() {
|
||||
IcuTestErrorCode status(*this, "TestNextPosition");
|
||||
std::vector<std::string> locales = { "en", "es", "zh", "ja" };
|
||||
UListFormatterWidth widths [] = {
|
||||
ULISTFMT_WIDTH_WIDE, ULISTFMT_WIDTH_SHORT, ULISTFMT_WIDTH_NARROW
|
||||
};
|
||||
const char* widthStr [] = {"wide", "short", "narrow"};
|
||||
UListFormatterType types [] = {
|
||||
ULISTFMT_TYPE_AND, ULISTFMT_TYPE_OR, ULISTFMT_TYPE_UNITS
|
||||
};
|
||||
const char* typeStr [] = {"and", "or", "units"};
|
||||
const UnicodeString inputs[] = { u"A1", u"B2", u"C3", u"D4" };
|
||||
for (auto width : widths) {
|
||||
for (auto type : types) {
|
||||
for (auto locale : locales) {
|
||||
LocalPointer<ListFormatter> fmt(
|
||||
ListFormatter::createInstance(locale.c_str(), type, width, status),
|
||||
status);
|
||||
if (status.errIfFailureAndReset()) {
|
||||
continue;
|
||||
}
|
||||
for (int32_t n = 1; n <= UPRV_LENGTHOF(inputs); n++) {
|
||||
FormattedList result = fmt->formatStringsToValue(
|
||||
inputs, n, status);
|
||||
int32_t elements = 0;
|
||||
icu::ConstrainedFieldPosition cfpos;
|
||||
cfpos.constrainCategory(UFIELD_CATEGORY_LIST);
|
||||
while (result.nextPosition(cfpos, status) && U_SUCCESS(status)) {
|
||||
if (cfpos.getField() == ULISTFMT_ELEMENT_FIELD) {
|
||||
elements++;
|
||||
}
|
||||
}
|
||||
std::string msg = locale;
|
||||
// Test that if there are n elements (n=1..4) in the input, then the
|
||||
// nextPosition() should iterate through exactly n times
|
||||
// with field == ULISTFMT_ELEMENT_FIELD.
|
||||
assertEquals((msg
|
||||
.append(" w=").append(widthStr[width])
|
||||
.append(" t=").append(typeStr[type])).c_str(),
|
||||
n, elements);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
|
|
@ -55,6 +55,7 @@ class ListFormatterTest : public IntlTestWithFieldPosition {
|
|||
void TestBadStylesFail();
|
||||
void TestCreateStyled();
|
||||
void TestContextual();
|
||||
void TestNextPosition();
|
||||
|
||||
private:
|
||||
void CheckFormatting(
|
||||
|
|
Loading…
Add table
Reference in a new issue