From b6687af59c295664086f64477fca6fe76d055e77 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 12 Feb 2021 01:27:39 +0000 Subject: [PATCH] ICU-20421 Add span fields to NumberRangeFormatter See #1572 --- icu4c/source/i18n/formattedval_impl.h | 13 +- icu4c/source/i18n/formattedval_sbimpl.cpp | 142 ++++++++++++------ icu4c/source/i18n/listformatter.cpp | 6 +- icu4c/source/i18n/numrange_impl.cpp | 26 ++-- icu4c/source/i18n/unicode/uformattedvalue.h | 9 ++ .../test/cintltst/unumberrangeformattertst.c | 17 ++- .../source/test/intltest/numbertest_range.cpp | 40 ++--- .../ibm/icu/impl/FormattedStringBuilder.java | 27 +++- .../impl/FormattedValueStringBuilderImpl.java | 127 ++++++++++++---- .../range/PrefixInfixSuffixLengthHelper.java | 4 + .../ibm/icu/number/FormattedNumberRange.java | 3 +- .../ibm/icu/number/NumberRangeFormatter.java | 42 ++++++ .../icu/number/NumberRangeFormatterImpl.java | 36 +++-- .../src/com/ibm/icu/text/ListFormatter.java | 1 + .../test/number/NumberRangeFormatterTest.java | 4 + .../dev/test/serializable/FormatHandler.java | 16 ++ .../serializable/SerializableTestUtility.java | 1 + 17 files changed, 378 insertions(+), 136 deletions(-) diff --git a/icu4c/source/i18n/formattedval_impl.h b/icu4c/source/i18n/formattedval_impl.h index c0dec83ba1e..4ff25dcfb87 100644 --- a/icu4c/source/i18n/formattedval_impl.h +++ b/icu4c/source/i18n/formattedval_impl.h @@ -119,7 +119,9 @@ private: // Internal struct that must be exported for MSVC struct U_I18N_API SpanInfo { + UFieldCategory category; int32_t spanValue; + int32_t start; int32_t length; }; @@ -170,17 +172,20 @@ public: /** * Adds additional metadata used for span fields. - * - * spanValue: the index of the list item, for example. + * + * category: the category to use for the span field. + * spanValue: the value of the span field: index of the list item, for example. + * start: the start position within the string of the span. -1 if unknown. * length: the length of the span, used to split adjacent fields. */ - void appendSpanInfo(int32_t spanValue, int32_t length, UErrorCode& status); - void prependSpanInfo(int32_t spanValue, int32_t length, UErrorCode& status); + void appendSpanInfo(UFieldCategory category, int32_t spanValue, int32_t start, int32_t length, UErrorCode& status); + void prependSpanInfo(UFieldCategory category, int32_t spanValue, int32_t start, int32_t length, UErrorCode& status); private: FormattedStringBuilder fString; FormattedStringBuilder::Field fNumericField; MaybeStackArray spanIndices; + int32_t spanIndicesCount = 0; bool nextPositionImpl(ConstrainedFieldPosition& cfpos, FormattedStringBuilder::Field numericField, UErrorCode& status) const; static bool isIntOrGroup(FormattedStringBuilder::Field field); diff --git a/icu4c/source/i18n/formattedval_sbimpl.cpp b/icu4c/source/i18n/formattedval_sbimpl.cpp index 64601f436ba..f3911c02c73 100644 --- a/icu4c/source/i18n/formattedval_sbimpl.cpp +++ b/icu4c/source/i18n/formattedval_sbimpl.cpp @@ -103,6 +103,27 @@ static constexpr Field kEndField = Field(0xf, 0xf); bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& cfpos, Field numericField, UErrorCode& /*status*/) const { int32_t fieldStart = -1; Field currField = kUndefinedField; + bool prevIsSpan = false; + int32_t nextSpanStart = -1; + if (spanIndicesCount > 0) { + int64_t si = cfpos.getInt64IterationContext(); + U_ASSERT(si <= spanIndicesCount); + if (si < spanIndicesCount) { + nextSpanStart = spanIndices[si].start; + } + if (si > 0) { + prevIsSpan = cfpos.getCategory() == spanIndices[si-1].category + && cfpos.getField() == spanIndices[si-1].spanValue; + } + } + bool prevIsNumeric = false; + if (numericField != kUndefinedField) { + prevIsNumeric = cfpos.getCategory() == numericField.getCategory() + && cfpos.getField() == numericField.getField(); + } + bool prevIsInteger = cfpos.getCategory() == UFIELD_CATEGORY_NUMBER + && cfpos.getField() == UNUM_INTEGER_FIELD; + for (int32_t i = fString.fZero + cfpos.getLimit(); i <= fString.fZero + fString.fLength; i++) { Field _field = (i < fString.fZero + fString.fLength) ? fString.getFieldPtr()[i] : kEndField; // Case 1: currently scanning a field. @@ -129,11 +150,34 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& } continue; } + // Special case: emit normalField if we are pointing at the end of spanField. + if (i > fString.fZero && prevIsSpan) { + int64_t si = cfpos.getInt64IterationContext() - 1; + U_ASSERT(si >= 0); + if (_field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) { + // Special handling for ULISTFMT_ELEMENT_FIELD + if (cfpos.matchesField(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) { + fieldStart = i - fString.fZero - spanIndices[si].length; + int32_t end = fieldStart + spanIndices[si].length; + cfpos.setState( + UFIELD_CATEGORY_LIST, + ULISTFMT_ELEMENT_FIELD, + fieldStart, + end); + return true; + } + } else { + // Re-wind, since there may be multiple fields in the span. + i -= spanIndices[si].length; + U_ASSERT(i >= fString.fZero); + _field = fString.getFieldPtr()[i]; + } + } // Special case: coalesce the INTEGER if we are pointing at the end of the INTEGER. if (cfpos.matchesField(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD) && i > fString.fZero - // don't return the same field twice in a row: - && i - fString.fZero > cfpos.getLimit() + && !prevIsInteger + && !prevIsNumeric && isIntOrGroup(fString.getFieldPtr()[i - 1]) && !isIntOrGroup(_field)) { int j = i - 1; @@ -149,10 +193,7 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& if (numericField != kUndefinedField && cfpos.matchesField(numericField.getCategory(), numericField.getField()) && i > fString.fZero - // don't return the same field twice in a row: - && (i - fString.fZero > cfpos.getLimit() - || cfpos.getCategory() != numericField.getCategory() - || cfpos.getField() != numericField.getField()) + && !prevIsNumeric && fString.getFieldPtr()[i - 1].isNumeric() && !_field.isNumeric()) { // Re-wind to the beginning of the field and then emit it @@ -165,41 +206,22 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& i - fString.fZero); return true; } - // Special case: emit normalField if we are pointing at the end of spanField. - if (i > fString.fZero) { - auto elementField = fString.getFieldPtr()[i-1]; - if (elementField == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD) - && cfpos.matchesField(elementField.getCategory(), elementField.getField()) - && ( - cfpos.getLimit() < i - fString.fZero - || cfpos.getCategory() != elementField.getCategory() - || cfpos.getField() != elementField.getField())) { - int64_t si = cfpos.getInt64IterationContext() - 1; - cfpos.setState( - elementField.getCategory(), - elementField.getField(), - i - fString.fZero - spanIndices[si].length, - i - fString.fZero); - return true; - } - } - // Special case: skip over INTEGER; will be coalesced later. - if (_field == Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)) { - _field = kUndefinedField; - } - // Case 2: no field starting at this position. - if (_field.isUndefined() || _field == kEndField) { - continue; - } - // Case 3: check for field starting at this position - // Case 3a: Need to add a SpanField - if (_field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) { + // Check for span field + if (!prevIsSpan && ( + _field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD) || + i - fString.fZero == nextSpanStart)) { int64_t si = cfpos.getInt64IterationContext(); + if (si >= spanIndicesCount) { + break; + } + UFieldCategory spanCategory = spanIndices[si].category; int32_t spanValue = spanIndices[si].spanValue; int32_t length = spanIndices[si].length; cfpos.setInt64IterationContext(si + 1); - if (cfpos.matchesField(UFIELD_CATEGORY_LIST_SPAN, spanValue)) { - UFieldCategory spanCategory = UFIELD_CATEGORY_LIST_SPAN; + if (si + 1 < spanIndicesCount) { + nextSpanStart = spanIndices[si + 1].start; + } + if (cfpos.matchesField(spanCategory, spanValue)) { fieldStart = i - fString.fZero; int32_t end = fieldStart + length; cfpos.setState( @@ -208,17 +230,41 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& fieldStart, end); return true; - } else { - // Failed to match; jump ahead - i += length - 1; - continue; + } else if (_field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) { + // Special handling for ULISTFMT_ELEMENT_FIELD + if (cfpos.matchesField(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) { + fieldStart = i - fString.fZero; + int32_t end = fieldStart + length; + cfpos.setState( + UFIELD_CATEGORY_LIST, + ULISTFMT_ELEMENT_FIELD, + fieldStart, + end); + return true; + } else { + // Failed to match; jump ahead + i += length - 1; + // goto loopend + } } } - // Case 3b: No SpanField - if (cfpos.matchesField(_field.getCategory(), _field.getField())) { + // Special case: skip over INTEGER; will be coalesced later. + else if (_field == Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)) { + _field = kUndefinedField; + } + // No field starting at this position. + else if (_field.isUndefined() || _field == kEndField) { + // goto loopend + } + // No SpanField + else if (cfpos.matchesField(_field.getCategory(), _field.getField())) { fieldStart = i - fString.fZero; currField = _field; } + // loopend: + prevIsSpan = false; + prevIsNumeric = false; + prevIsInteger = false; } U_ASSERT(currField == kUndefinedField); @@ -231,7 +277,7 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& return false; } -void FormattedValueStringBuilderImpl::appendSpanInfo(int32_t spanValue, int32_t length, UErrorCode& status) { +void FormattedValueStringBuilderImpl::appendSpanInfo(UFieldCategory category, int32_t spanValue, int32_t start, int32_t length, UErrorCode& status) { if (U_FAILURE(status)) { return; } U_ASSERT(spanIndices.getCapacity() >= spanValue); if (spanIndices.getCapacity() == spanValue) { @@ -240,10 +286,11 @@ void FormattedValueStringBuilderImpl::appendSpanInfo(int32_t spanValue, int32_t return; } } - spanIndices[spanValue] = {spanValue, length}; + spanIndices[spanValue] = {category, spanValue, start, length}; + spanIndicesCount++; } -void FormattedValueStringBuilderImpl::prependSpanInfo(int32_t spanValue, int32_t length, UErrorCode& status) { +void FormattedValueStringBuilderImpl::prependSpanInfo(UFieldCategory category, int32_t spanValue, int32_t start, int32_t length, UErrorCode& status) { if (U_FAILURE(status)) { return; } U_ASSERT(spanIndices.getCapacity() >= spanValue); if (spanIndices.getCapacity() == spanValue) { @@ -255,7 +302,8 @@ void FormattedValueStringBuilderImpl::prependSpanInfo(int32_t spanValue, int32_t for (int32_t i = spanValue - 1; i >= 0; i--) { spanIndices[i+1] = spanIndices[i]; } - spanIndices[0] = {spanValue, length}; + spanIndices[0] = {category, spanValue, start, length}; + spanIndicesCount++; } bool FormattedValueStringBuilderImpl::isIntOrGroup(Field field) { diff --git a/icu4c/source/i18n/listformatter.cpp b/icu4c/source/i18n/listformatter.cpp index be0d16bc7f5..e5c01c0ab32 100644 --- a/icu4c/source/i18n/listformatter.cpp +++ b/icu4c/source/i18n/listformatter.cpp @@ -567,7 +567,7 @@ public: start, {UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD}, status); - data->appendSpanInfo(0, start.length(), status); + data->appendSpanInfo(UFIELD_CATEGORY_LIST_SPAN, 0, -1, start.length(), status); } } @@ -603,7 +603,7 @@ public: next, {UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD}, status); - data->appendSpanInfo(position, next.length(), status); + data->appendSpanInfo(UFIELD_CATEGORY_LIST_SPAN, position, -1, next.length(), status); data->getStringRef().append( temp.tempSubString(offsets[1]), {UFIELD_CATEGORY_LIST, ULISTFMT_LITERAL_FIELD}, @@ -622,7 +622,7 @@ public: next, {UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD}, status); - data->prependSpanInfo(position, next.length(), status); + data->prependSpanInfo(UFIELD_CATEGORY_LIST_SPAN, position, -1, next.length(), status); data->getStringRef().insert( 0, temp.tempSubStringBetween(0, offsets[1]), diff --git a/icu4c/source/i18n/numrange_impl.cpp b/icu4c/source/i18n/numrange_impl.cpp index c6e8fce128a..aa713f1398b 100644 --- a/icu4c/source/i18n/numrange_impl.cpp +++ b/icu4c/source/i18n/numrange_impl.cpp @@ -328,6 +328,7 @@ void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data, #define UPRV_INDEX_1 (lengthPrefix + length1) #define UPRV_INDEX_2 (lengthPrefix + length1 + lengthInfix) #define UPRV_INDEX_3 (lengthPrefix + length1 + lengthInfix + length2) + #define UPRV_INDEX_4 (lengthPrefix + length1 + lengthInfix + length2 + lengthSuffix) int32_t lengthRange = SimpleModifier::formatTwoArgPattern( fRangeFormatter, @@ -367,31 +368,38 @@ void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data, // TODO: Support padding? if (collapseInner) { - // Note: this is actually a mix of prefix and suffix, but adding to infix length works const Modifier& mod = resolveModifierPlurals(*micros1.modInner, *micros2.modInner); - lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status); + lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status); + lengthPrefix += mod.getPrefixLength(); + lengthSuffix -= mod.getPrefixLength(); } else { length1 += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status); - length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status); + length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status); } if (collapseMiddle) { - // Note: this is actually a mix of prefix and suffix, but adding to infix length works const Modifier& mod = resolveModifierPlurals(*micros1.modMiddle, *micros2.modMiddle); - lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status); + lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status); + lengthPrefix += mod.getPrefixLength(); + lengthSuffix -= mod.getPrefixLength(); } else { length1 += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status); - length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status); + length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status); } if (collapseOuter) { - // Note: this is actually a mix of prefix and suffix, but adding to infix length works const Modifier& mod = resolveModifierPlurals(*micros1.modOuter, *micros2.modOuter); - lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status); + lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status); + lengthPrefix += mod.getPrefixLength(); + lengthSuffix -= mod.getPrefixLength(); } else { length1 += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status); - length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status); + length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status); } + + // Now that all pieces are added, save the span info. + data.appendSpanInfo(UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 0, UPRV_INDEX_0, length1, status); + data.appendSpanInfo(UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 1, UPRV_INDEX_2, length2, status); } diff --git a/icu4c/source/i18n/unicode/uformattedvalue.h b/icu4c/source/i18n/unicode/uformattedvalue.h index c358629c051..1a550e87589 100644 --- a/icu4c/source/i18n/unicode/uformattedvalue.h +++ b/icu4c/source/i18n/unicode/uformattedvalue.h @@ -93,6 +93,15 @@ typedef enum UFieldCategory { */ UFIELD_CATEGORY_DATE_INTERVAL_SPAN = 0x1000 + UFIELD_CATEGORY_DATE_INTERVAL, +#ifndef U_HIDE_DRAFT_API + /** + * Category for spans in a number range. + * + * @draft ICU 69 + */ + UFIELD_CATEGORY_NUMBER_RANGE_SPAN = 0x1000 + UFIELD_CATEGORY_NUMBER, +#endif // U_HIDE_DRAFT_API + } UFieldCategory; diff --git a/icu4c/source/test/cintltst/unumberrangeformattertst.c b/icu4c/source/test/cintltst/unumberrangeformattertst.c index 35c21a2a510..4dc7a6b2409 100644 --- a/icu4c/source/test/cintltst/unumberrangeformattertst.c +++ b/icu4c/source/test/cintltst/unumberrangeformattertst.c @@ -96,17 +96,18 @@ static void TestFormattedValue() { if (assertSuccessCheck("Should format without error", &ec, TRUE)) { const UFormattedValue* fv = unumrf_resultAsValue(uresult, &ec); assertSuccess("Should convert without error", &ec); - static const UFieldPosition expectedFieldPositions[] = { - // field, begin index, end index - {UNUM_INTEGER_FIELD, 0, 2}, - {UNUM_COMPACT_FIELD, 2, 3}, - {UNUM_INTEGER_FIELD, 6, 9}, - {UNUM_COMPACT_FIELD, 9, 10}}; - checkFormattedValue( + static const UFieldPositionWithCategory expectedFieldPositions[] = { + // category, field, begin index, end index + {UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 0, 0, 3}, + {UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD, 0, 2}, + {UFIELD_CATEGORY_NUMBER, UNUM_COMPACT_FIELD, 2, 3}, + {UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 1, 6, 10}, + {UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD, 6, 9}, + {UFIELD_CATEGORY_NUMBER, UNUM_COMPACT_FIELD, 9, 10}}; + checkMixedFormattedValue( "FormattedNumber as FormattedValue", fv, u"55K – 150K", - UFIELD_CATEGORY_NUMBER, expectedFieldPositions, UPRV_LENGTHOF(expectedFieldPositions)); } diff --git a/icu4c/source/test/intltest/numbertest_range.cpp b/icu4c/source/test/intltest/numbertest_range.cpp index 2cd31cae1b8..911361695b0 100644 --- a/icu4c/source/test/intltest/numbertest_range.cpp +++ b/icu4c/source/test/intltest/numbertest_range.cpp @@ -740,18 +740,19 @@ void NumberRangeFormatterTest::testFieldPositions() { 3000, 5000, expectedString); - static const UFieldPosition expectedFieldPositions[] = { - // field, begin index, end index - {UNUM_INTEGER_FIELD, 0, 1}, - {UNUM_COMPACT_FIELD, 1, 2}, - {UNUM_INTEGER_FIELD, 5, 6}, - {UNUM_COMPACT_FIELD, 6, 7}, - {UNUM_MEASURE_UNIT_FIELD, 8, 9}}; - checkFormattedValue( + static const UFieldPositionWithCategory expectedFieldPositions[] = { + // category, field, begin index, end index + {UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 0, 0, 2}, + {UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD, 0, 1}, + {UFIELD_CATEGORY_NUMBER, UNUM_COMPACT_FIELD, 1, 2}, + {UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 1, 5, 7}, + {UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD, 5, 6}, + {UFIELD_CATEGORY_NUMBER, UNUM_COMPACT_FIELD, 6, 7}, + {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD, 8, 9}}; + checkMixedFormattedValue( message, result, expectedString, - UFIELD_CATEGORY_NUMBER, expectedFieldPositions, UPRV_LENGTHOF(expectedFieldPositions)); } @@ -765,19 +766,20 @@ void NumberRangeFormatterTest::testFieldPositions() { 87654321, 98765432, expectedString); - static const UFieldPosition expectedFieldPositions[] = { - // field, begin index, end index - {UNUM_GROUPING_SEPARATOR_FIELD, 2, 3}, - {UNUM_GROUPING_SEPARATOR_FIELD, 6, 7}, - {UNUM_INTEGER_FIELD, 0, 10}, - {UNUM_GROUPING_SEPARATOR_FIELD, 13, 14}, - {UNUM_GROUPING_SEPARATOR_FIELD, 17, 18}, - {UNUM_INTEGER_FIELD, 11, 21}}; - checkFormattedValue( + static const UFieldPositionWithCategory expectedFieldPositions[] = { + // category, field, begin index, end index + {UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 0, 0, 10}, + {UFIELD_CATEGORY_NUMBER, UNUM_GROUPING_SEPARATOR_FIELD, 2, 3}, + {UFIELD_CATEGORY_NUMBER, UNUM_GROUPING_SEPARATOR_FIELD, 6, 7}, + {UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD, 0, 10}, + {UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 1, 11, 21}, + {UFIELD_CATEGORY_NUMBER, UNUM_GROUPING_SEPARATOR_FIELD, 13, 14}, + {UFIELD_CATEGORY_NUMBER, UNUM_GROUPING_SEPARATOR_FIELD, 17, 18}, + {UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD, 11, 21}}; + checkMixedFormattedValue( message, result, expectedString, - UFIELD_CATEGORY_NUMBER, expectedFieldPositions, UPRV_LENGTHOF(expectedFieldPositions)); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/FormattedStringBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/FormattedStringBuilder.java index 48f048c57bf..4a799dac8dd 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/FormattedStringBuilder.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/FormattedStringBuilder.java @@ -25,6 +25,22 @@ import com.ibm.icu.text.NumberFormat; */ public class FormattedStringBuilder implements CharSequence, Appendable { + public static interface FieldWrapper { + java.text.Format.Field unwrap(); + } + + public static java.text.Format.Field unwrapField(Object field) { + if (field == null) { + return null; + } else if (field instanceof FieldWrapper) { + return ((FieldWrapper) field).unwrap(); + } else if (field instanceof java.text.Format.Field) { + return (java.text.Format.Field) field; + } else { + throw new AssertionError("Not a field: " + field); + } + } + /** A constant, empty FormattedStringBuilder. Do NOT call mutative operations on this. */ public static final FormattedStringBuilder EMPTY = new FormattedStringBuilder(); @@ -534,10 +550,12 @@ public class FormattedStringBuilder implements CharSequence, Appendable { if (fields.length != length) return false; for (int i = 0; i < length; i++) { - if (this.chars[zero + i] != chars[i]) + if (this.chars[zero + i] != chars[i]) { return false; - if (this.fields[zero + i] != fields[i]) + } + if (unwrapField(this.fields[zero + i]) != unwrapField(fields[i])) { return false; + } } return true; } @@ -551,7 +569,10 @@ public class FormattedStringBuilder implements CharSequence, Appendable { if (length != other.length) return false; for (int i = 0; i < length; i++) { - if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) { + if (charAt(i) != other.charAt(i)) { + return false; + } + if (unwrapField(fieldAt(i)) != unwrapField(other.fieldAt(i))) { return false; } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/FormattedValueStringBuilderImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/FormattedValueStringBuilderImpl.java index 938eb13e2da..d989635de1e 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/FormattedValueStringBuilderImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/FormattedValueStringBuilderImpl.java @@ -30,11 +30,16 @@ public class FormattedValueStringBuilderImpl { * Placeholder field used for calculating spans. * Does not currently support nested fields beyond one level. */ - public static class SpanFieldPlaceholder { + public static class SpanFieldPlaceholder implements FormattedStringBuilder.FieldWrapper { public UFormat.SpanField spanField; public Field normalField; public Object value; + public int start; public int length; + + public Field unwrap() { + return normalField; + } } /** @@ -55,6 +60,29 @@ public class FormattedValueStringBuilderImpl { return -1; } + /** + * Upgrade a range of a string to a span field. + * + * Similar to appendSpanInfo in ICU4C. + */ + public static void applySpanRange( + FormattedStringBuilder self, + UFormat.SpanField spanField, + Object value, + int start, + int end) { + for (int i = start + self.zero; i < end + self.zero; i++) { + Object oldField = self.fields[i]; + SpanFieldPlaceholder newField = new SpanFieldPlaceholder(); + newField.spanField = spanField; + newField.normalField = (java.text.Format.Field) oldField; + newField.value = value; + newField.start = start; + newField.length = end - start; + self.fields[i] = newField; + } + } + public static boolean nextFieldPosition(FormattedStringBuilder self, FieldPosition fp) { java.text.Format.Field rawField = fp.getFieldAttribute(); @@ -137,6 +165,17 @@ public class FormattedValueStringBuilderImpl { public static boolean nextPosition(FormattedStringBuilder self, ConstrainedFieldPosition cfpos, Field numericField) { int fieldStart = -1; Object currField = null; + boolean prevIsSpan = false; + if (cfpos.getLimit() > 0) { + prevIsSpan = cfpos.getField() instanceof UFormat.SpanField + && cfpos.getStart() < cfpos.getLimit(); + } + boolean prevIsNumeric = false; + if (numericField != null) { + prevIsNumeric = cfpos.getField() == numericField; + } + boolean prevIsInteger = cfpos.getField() == NumberFormat.Field.INTEGER; + for (int i = self.zero + cfpos.getLimit(); i <= self.zero + self.length; i++) { Object _field = (i < self.zero + self.length) ? self.fields[i] : NullField.END; // Case 1: currently scanning a field. @@ -163,11 +202,30 @@ public class FormattedValueStringBuilderImpl { } continue; } + // Special case: emit normalField if we are pointing at the end of spanField. + if (i > self.zero && prevIsSpan) { + assert self.fields[i-1] instanceof SpanFieldPlaceholder; + SpanFieldPlaceholder ph = (SpanFieldPlaceholder) self.fields[i-1]; + if (ph.normalField == ListFormatter.Field.ELEMENT) { + // Special handling for ULISTFMT_ELEMENT_FIELD + if (cfpos.matchesField(ListFormatter.Field.ELEMENT, null)) { + fieldStart = i - self.zero - ph.length; + int end = fieldStart + ph.length; + cfpos.setState(ListFormatter.Field.ELEMENT, null, fieldStart, end); + return true; + } + } else { + // Re-wind, since there may be multiple fields in the span. + i -= ph.length; + assert i >= self.zero; + _field = ((SpanFieldPlaceholder) self.fields[i]).normalField; + } + } // Special case: coalesce the INTEGER if we are pointing at the end of the INTEGER. if (cfpos.matchesField(NumberFormat.Field.INTEGER, null) && i > self.zero - // don't return the same field twice in a row: - && i - self.zero > cfpos.getLimit() + && !prevIsInteger + && !prevIsNumeric && isIntOrGroup(self.fields[i - 1]) && !isIntOrGroup(_field)) { int j = i - 1; @@ -179,8 +237,7 @@ public class FormattedValueStringBuilderImpl { if (numericField != null && cfpos.matchesField(numericField, null) && i > self.zero - // don't return the same field twice in a row: - && (i - self.zero > cfpos.getLimit() || cfpos.getField() != numericField) + && !prevIsNumeric && isNumericField(self.fields[i - 1]) && !isNumericField(_field)) { // Re-wind to the beginning of the field and then emit it @@ -189,45 +246,49 @@ public class FormattedValueStringBuilderImpl { cfpos.setState(numericField, null, j - self.zero + 1, i - self.zero); return true; } - // Special case: emit normalField if we are pointing at the end of spanField. - if (i > self.zero - && self.fields[i-1] instanceof SpanFieldPlaceholder) { - SpanFieldPlaceholder ph = (SpanFieldPlaceholder) self.fields[i-1]; - if (cfpos.matchesField(ph.normalField, null) - && (cfpos.getLimit() < i - self.zero - || cfpos.getField() != ph.normalField)) { - cfpos.setState(ph.normalField, null, i - self.zero - ph.length, i - self.zero); - return true; - } - } - // Special case: skip over INTEGER; will be coalesced later. - if (_field == NumberFormat.Field.INTEGER) { - _field = null; - } - // Case 2: no field starting at this position. - if (_field == null || _field == NullField.END) { - continue; - } - // Case 3: check for field starting at this position - // Case 3a: SpanField placeholder + // Check for span field + SpanFieldPlaceholder ph = null; if (_field instanceof SpanFieldPlaceholder) { - SpanFieldPlaceholder ph = (SpanFieldPlaceholder) _field; + ph = (SpanFieldPlaceholder) _field; + _field = ph.normalField; + } + if (ph != null && (ph.start == -1 || ph.start == i - self.zero)) { if (cfpos.matchesField(ph.spanField, ph.value)) { fieldStart = i - self.zero; int end = fieldStart + ph.length; cfpos.setState(ph.spanField, ph.value, fieldStart, end); return true; - } else { - // Failed to match; jump ahead - i += ph.length - 1; - continue; + } else if (ph.normalField == ListFormatter.Field.ELEMENT) { + // Special handling for ListFormatter.Field.ELEMENT + if (cfpos.matchesField(ListFormatter.Field.ELEMENT, null)) { + fieldStart = i - self.zero; + int end = fieldStart + ph.length; + cfpos.setState(ListFormatter.Field.ELEMENT, null, fieldStart, end); + return true; + } else { + // Failed to match; jump ahead + i += ph.length - 1; + // goto loopend + } } } - // Case 3b: No SpanField + // Special case: skip over INTEGER; will be coalesced later. + else if (_field == NumberFormat.Field.INTEGER) { + _field = null; + } + // No field starting at this position. + else if (_field == null || _field == NullField.END) { + // goto loopend + } + // No SpanField else if (cfpos.matchesField((Field) _field, null)) { fieldStart = i - self.zero; currField = _field; } + // loopend: + prevIsSpan = false; + prevIsNumeric = false; + prevIsInteger = false; } assert currField == null; @@ -241,10 +302,12 @@ public class FormattedValueStringBuilderImpl { } private static boolean isIntOrGroup(Object field) { + field = FormattedStringBuilder.unwrapField(field); return field == NumberFormat.Field.INTEGER || field == NumberFormat.Field.GROUPING_SEPARATOR; } private static boolean isNumericField(Object field) { + field = FormattedStringBuilder.unwrapField(field); return field == null || NumberFormat.Field.class.isAssignableFrom(field.getClass()); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/range/PrefixInfixSuffixLengthHelper.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/range/PrefixInfixSuffixLengthHelper.java index c88cb2a9232..31ec48f34f6 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/number/range/PrefixInfixSuffixLengthHelper.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/number/range/PrefixInfixSuffixLengthHelper.java @@ -27,4 +27,8 @@ public class PrefixInfixSuffixLengthHelper { public int index3() { return lengthPrefix + length1 + lengthInfix + length2; } + + public int index4() { + return lengthPrefix + length1 + lengthInfix + length2 + lengthSuffix; + } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/number/FormattedNumberRange.java b/icu4j/main/classes/core/src/com/ibm/icu/number/FormattedNumberRange.java index 90d23c3a91a..49b875028ed 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/number/FormattedNumberRange.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/number/FormattedNumberRange.java @@ -185,8 +185,7 @@ public class FormattedNumberRange implements FormattedValue { // FormattedStringBuilder and BigDecimal are mutable, so we can't call // #equals() or #hashCode() on them directly. FormattedNumberRange _other = (FormattedNumberRange) other; - return Arrays.equals(string.toCharArray(), _other.string.toCharArray()) - && Arrays.equals(string.toFieldArray(), _other.string.toFieldArray()) + return string.contentEquals(_other.string) && quantity1.toBigDecimal().equals(_other.quantity1.toBigDecimal()) && quantity2.toBigDecimal().equals(_other.quantity2.toBigDecimal()); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/number/NumberRangeFormatter.java b/icu4j/main/classes/core/src/com/ibm/icu/number/NumberRangeFormatter.java index 93d37bfd6cf..559d87ff824 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/number/NumberRangeFormatter.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/number/NumberRangeFormatter.java @@ -2,9 +2,11 @@ // License & terms of use: http://www.unicode.org/copyright.html package com.ibm.icu.number; +import java.io.InvalidObjectException; import java.util.Locale; import com.ibm.icu.util.ULocale; +import com.ibm.icu.text.UFormat; /** * The main entrypoint to the formatting of ranges of numbers, including currencies and other units of measurement. @@ -153,6 +155,46 @@ public abstract class NumberRangeFormatter { NOT_EQUAL } + /** + * Class for span fields in FormattedNumberRange. + * + * @draft ICU 69 + * @provisional This API might change or be removed in a future release. + */ + public static final class SpanField extends UFormat.SpanField { + private static final long serialVersionUID = 8750397196515368729L; + + /** + * The concrete field used for spans in FormattedNumberRange. + * + * Instances of NUMBER_RANGE_SPAN should have an associated value, the index within the input + * list that is represented by the span. + * + * @draft ICU 69 + * @provisional This API might change or be removed in a future release. + */ + public static final SpanField NUMBER_RANGE_SPAN = new SpanField("number-range-span"); + + private SpanField(String name) { + super(name); + } + + /** + * Serialization method resolve instances to the constant + * NumberRangeFormatter.SpanField values + * @internal + * @deprecated This API is ICU internal only. + */ + @Deprecated + @Override + protected Object readResolve() throws InvalidObjectException { + if (this.getName().equals(NUMBER_RANGE_SPAN.getName())) + return NUMBER_RANGE_SPAN; + + throw new InvalidObjectException("An invalid object."); + } + } + private static final UnlocalizedNumberRangeFormatter BASE = new UnlocalizedNumberRangeFormatter(); /** diff --git a/icu4j/main/classes/core/src/com/ibm/icu/number/NumberRangeFormatterImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/number/NumberRangeFormatterImpl.java index 6a8bad4c0aa..795af726d8c 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/number/NumberRangeFormatterImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/number/NumberRangeFormatterImpl.java @@ -5,6 +5,7 @@ package com.ibm.icu.number; import java.util.MissingResourceException; import com.ibm.icu.impl.FormattedStringBuilder; +import com.ibm.icu.impl.FormattedValueStringBuilderImpl; import com.ibm.icu.impl.ICUData; import com.ibm.icu.impl.ICUResourceBundle; import com.ibm.icu.impl.PatternProps; @@ -341,31 +342,48 @@ class NumberRangeFormatterImpl { // TODO: Support padding? if (collapseInner) { - // Note: this is actually a mix of prefix and suffix, but adding to infix length works Modifier mod = resolveModifierPlurals(micros1.modInner, micros2.modInner); - h.lengthInfix += mod.apply(string, h.index0(), h.index3()); + h.lengthSuffix += mod.apply(string, h.index0(), h.index4()); + h.lengthPrefix += mod.getPrefixLength(); + h.lengthSuffix -= mod.getPrefixLength(); } else { h.length1 += micros1.modInner.apply(string, h.index0(), h.index1()); - h.length2 += micros2.modInner.apply(string, h.index2(), h.index3()); + h.length2 += micros2.modInner.apply(string, h.index2(), h.index4()); } if (collapseMiddle) { - // Note: this is actually a mix of prefix and suffix, but adding to infix length works Modifier mod = resolveModifierPlurals(micros1.modMiddle, micros2.modMiddle); - h.lengthInfix += mod.apply(string, h.index0(), h.index3()); + h.lengthSuffix += mod.apply(string, h.index0(), h.index4()); + h.lengthPrefix += mod.getPrefixLength(); + h.lengthSuffix -= mod.getPrefixLength(); } else { h.length1 += micros1.modMiddle.apply(string, h.index0(), h.index1()); - h.length2 += micros2.modMiddle.apply(string, h.index2(), h.index3()); + h.length2 += micros2.modMiddle.apply(string, h.index2(), h.index4()); } if (collapseOuter) { - // Note: this is actually a mix of prefix and suffix, but adding to infix length works Modifier mod = resolveModifierPlurals(micros1.modOuter, micros2.modOuter); - h.lengthInfix += mod.apply(string, h.index0(), h.index3()); + h.lengthSuffix += mod.apply(string, h.index0(), h.index4()); + h.lengthPrefix += mod.getPrefixLength(); + h.lengthSuffix -= mod.getPrefixLength(); } else { h.length1 += micros1.modOuter.apply(string, h.index0(), h.index1()); - h.length2 += micros2.modOuter.apply(string, h.index2(), h.index3()); + h.length2 += micros2.modOuter.apply(string, h.index2(), h.index4()); } + + // Now that all pieces are added, save the span info. + FormattedValueStringBuilderImpl.applySpanRange( + string, + NumberRangeFormatter.SpanField.NUMBER_RANGE_SPAN, + 0, + h.index0(), + h.index1()); + FormattedValueStringBuilderImpl.applySpanRange( + string, + NumberRangeFormatter.SpanField.NUMBER_RANGE_SPAN, + 1, + h.index2(), + h.index3()); } Modifier resolveModifierPlurals(Modifier first, Modifier second) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/ListFormatter.java b/icu4j/main/classes/core/src/com/ibm/icu/text/ListFormatter.java index ebde0457a3e..db4ca91deff 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/ListFormatter.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/ListFormatter.java @@ -656,6 +656,7 @@ final public class ListFormatter { field.spanField = SpanField.LIST_SPAN; field.normalField = Field.ELEMENT; field.value = position; + field.start = -1; field.length = elementString.length(); string.append(elementString, field); } else { diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberRangeFormatterTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberRangeFormatterTest.java index 1fcdc0e4da6..0788513337a 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberRangeFormatterTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberRangeFormatterTest.java @@ -736,8 +736,10 @@ public class NumberRangeFormatterTest extends TestFmwk { 5000, expectedString); Object[][] expectedFieldPositions = new Object[][]{ + {NumberRangeFormatter.SpanField.NUMBER_RANGE_SPAN, 0, 2, 0}, {NumberFormat.Field.INTEGER, 0, 1}, {NumberFormat.Field.COMPACT, 1, 2}, + {NumberRangeFormatter.SpanField.NUMBER_RANGE_SPAN, 5, 7, 1}, {NumberFormat.Field.INTEGER, 5, 6}, {NumberFormat.Field.COMPACT, 6, 7}, {NumberFormat.Field.MEASURE_UNIT, 8, 9}}; @@ -754,9 +756,11 @@ public class NumberRangeFormatterTest extends TestFmwk { 98765432, expectedString); Object[][] expectedFieldPositions = new Object[][]{ + {NumberRangeFormatter.SpanField.NUMBER_RANGE_SPAN, 0, 10, 0}, {NumberFormat.Field.GROUPING_SEPARATOR, 2, 3}, {NumberFormat.Field.GROUPING_SEPARATOR, 6, 7}, {NumberFormat.Field.INTEGER, 0, 10}, + {NumberRangeFormatter.SpanField.NUMBER_RANGE_SPAN, 11, 21, 1}, {NumberFormat.Field.GROUPING_SEPARATOR, 13, 14}, {NumberFormat.Field.GROUPING_SEPARATOR, 17, 18}, {NumberFormat.Field.INTEGER, 11, 21}}; diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/FormatHandler.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/FormatHandler.java index ca51c1b87dd..1ff44bd213c 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/FormatHandler.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/FormatHandler.java @@ -19,6 +19,7 @@ import com.ibm.icu.impl.DateNumberFormat; import com.ibm.icu.impl.TZDBTimeZoneNames; import com.ibm.icu.impl.TimeZoneGenericNames; import com.ibm.icu.impl.TimeZoneGenericNames.GenericNameType; +import com.ibm.icu.number.NumberRangeFormatter; import com.ibm.icu.text.ChineseDateFormat; import com.ibm.icu.text.ChineseDateFormatSymbols; import com.ibm.icu.text.CompactDecimalFormat; @@ -1862,6 +1863,21 @@ public class FormatHandler } } + public static class NumberRangeFormatterSpanFieldHandler implements SerializableTestUtility.Handler + { + @Override + public Object[] getTestObjects() + { + return new Object[] {NumberRangeFormatter.SpanField.NUMBER_RANGE_SPAN}; + } + + @Override + public boolean hasSameBehavior(Object a, Object b) + { + return (a == b); + } + } + public static class DateFormatHandler implements SerializableTestUtility.Handler { static HashMap cannedPatterns = new HashMap(); diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/SerializableTestUtility.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/SerializableTestUtility.java index f4f27e1d073..263228b5353 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/SerializableTestUtility.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/SerializableTestUtility.java @@ -835,6 +835,7 @@ public class SerializableTestUtility { map.put("com.ibm.icu.text.DateIntervalFormat$SpanField", new FormatHandler.DateIntervalSpanFieldHandler()); map.put("com.ibm.icu.text.ListFormatter$Field", new FormatHandler.ListFormatterFieldHandler()); map.put("com.ibm.icu.text.ListFormatter$SpanField", new FormatHandler.ListFormatterSpanFieldHandler()); + map.put("com.ibm.icu.number.NumberRangeFormatter$SpanField", new FormatHandler.NumberRangeFormatterSpanFieldHandler()); map.put("com.ibm.icu.impl.duration.BasicDurationFormat", new FormatHandler.BasicDurationFormatHandler()); map.put("com.ibm.icu.impl.RelativeDateFormat", new FormatHandler.RelativeDateFormatHandler());