ICU-13267 Implementing MEASURE_UNIT field.

Adds plumbing to trim whitespace near the FormattedNumber API boundary.
That plumbing requires a change to dependencies.txt.
This commit is contained in:
Shane Carr 2018-11-13 21:42:27 -08:00 committed by Shane F. Carr
parent 46a888be87
commit 74bbf9e381
13 changed files with 358 additions and 17 deletions

View file

@ -181,8 +181,7 @@ LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unitRef, c
UnicodeString simpleFormats[ARRAY_LENGTH];
getMeasureData(loc, unit, width, simpleFormats, status);
if (U_FAILURE(status)) { return result; }
// TODO: What field to use for units?
result->simpleFormatsToModifiers(simpleFormats, UNUM_FIELD_COUNT, status);
result->simpleFormatsToModifiers(simpleFormats, UNUM_MEASURE_UNIT_FIELD, status);
return result;
}
@ -220,8 +219,7 @@ LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit, con
compiled.format(UnicodeString(u"{0}"), secondaryString, perUnitFormat, status);
if (U_FAILURE(status)) { return result; }
}
// TODO: What field to use for units?
result->multiSimpleFormatsToModifiers(primaryData, perUnitFormat, UNUM_FIELD_COUNT, status);
result->multiSimpleFormatsToModifiers(primaryData, perUnitFormat, UNUM_MEASURE_UNIT_FIELD, status);
return result;
}

View file

@ -6,6 +6,7 @@
#if !UCONFIG_NO_FORMATTING
#include "number_stringbuilder.h"
#include "static_unicode_sets.h"
#include "unicode/utf16.h"
using namespace icu;
@ -432,7 +433,7 @@ bool NumberStringBuilder::nextFieldPosition(FieldPosition& fp, UErrorCode& statu
bool seenStart = false;
int32_t fractionStart = -1;
int32_t startIndex = fp.getEndIndex();
for (int i = fZero + startIndex; i <= fZero + fLength; i++) {
for (int32_t i = fZero + startIndex; i <= fZero + fLength; i++) {
Field _field = UNUM_FIELD_COUNT;
if (i < fZero + fLength) {
_field = getFieldPtr()[i];
@ -443,7 +444,16 @@ bool NumberStringBuilder::nextFieldPosition(FieldPosition& fp, UErrorCode& statu
continue;
}
fp.setEndIndex(i - fZero);
break;
// Trim ignorables (whitespace, etc.) from the edge of the field.
UFieldPosition ufp = {0, fp.getBeginIndex(), fp.getEndIndex()};
if (trimFieldPosition(ufp)) {
fp.setBeginIndex(ufp.beginIndex);
fp.setEndIndex(ufp.endIndex);
break;
}
// This position was all ignorables; continue to the next position.
fp.setEndIndex(fp.getBeginIndex());
seenStart = false;
} else if (!seenStart && field == _field) {
fp.setBeginIndex(i - fZero);
seenStart = true;
@ -471,10 +481,14 @@ void NumberStringBuilder::getAllFieldPositions(FieldPositionIteratorHandler& fpi
Field field = fieldAt(i);
if (current == UNUM_INTEGER_FIELD && field == UNUM_GROUPING_SEPARATOR_FIELD) {
// Special case: GROUPING_SEPARATOR counts as an INTEGER.
// TODO(ICU-13064): Grouping separator can be more than 1 code unit.
fpih.addAttribute(UNUM_GROUPING_SEPARATOR_FIELD, i, i + 1);
} else if (current != field) {
if (current != UNUM_FIELD_COUNT) {
fpih.addAttribute(current, currentStart, i);
UFieldPosition fp = {0, currentStart, i};
if (trimFieldPosition(fp)) {
fpih.addAttribute(current, fp.beginIndex, fp.endIndex);
}
}
current = field;
currentStart = i;
@ -484,7 +498,10 @@ void NumberStringBuilder::getAllFieldPositions(FieldPositionIteratorHandler& fpi
}
}
if (current != UNUM_FIELD_COUNT) {
fpih.addAttribute(current, currentStart, fLength);
UFieldPosition fp = {0, currentStart, fLength};
if (trimFieldPosition(fp)) {
fpih.addAttribute(current, fp.beginIndex, fp.endIndex);
}
}
}
@ -497,4 +514,26 @@ bool NumberStringBuilder::containsField(Field field) const {
return false;
}
bool NumberStringBuilder::trimFieldPosition(UFieldPosition& fp) const {
// Trim ignorables from the back
int32_t endIgnorablesRelPos = unisets::get(unisets::DEFAULT_IGNORABLES)->spanBack(
getCharPtr() + fZero + fp.beginIndex,
fp.endIndex - fp.beginIndex,
USET_SPAN_CONTAINED);
// Check if the entire segment is ignorables
if (endIgnorablesRelPos == 0) {
return false;
}
fp.endIndex = fp.beginIndex + endIgnorablesRelPos;
// Trim ignorables from the front
int32_t startIgnorablesRelPos = unisets::get(unisets::DEFAULT_IGNORABLES)->span(
getCharPtr() + fZero + fp.beginIndex,
fp.endIndex - fp.beginIndex,
USET_SPAN_CONTAINED);
fp.beginIndex = fp.beginIndex + startIgnorablesRelPos;
return true;
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -140,6 +140,8 @@ class U_I18N_API NumberStringBuilder : public UMemory {
int32_t prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status);
int32_t remove(int32_t index, int32_t count);
bool trimFieldPosition(UFieldPosition& fpos) const;
};
} // namespace impl

View file

@ -237,6 +237,8 @@ public:
kPermillField = UNUM_PERMILL_FIELD,
/** @stable ICU 2.0 */
kSignField = UNUM_SIGN_FIELD,
/** @draft ICU 64 */
kMeasureUnitField = UNUM_MEASURE_UNIT_FIELD,
/**
* These constants are provided for backwards compatibility only.

View file

@ -375,6 +375,8 @@ typedef enum UNumberFormatFields {
UNUM_PERMILL_FIELD,
/** @stable ICU 49 */
UNUM_SIGN_FIELD,
/** @draft ICU 64 */
UNUM_MEASURE_UNIT_FIELD,
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UNumberFormatFields value.

View file

@ -105,7 +105,8 @@ static void TestSkeletonFormatToFields() {
{UNUM_GROUPING_SEPARATOR_FIELD, 10, 11},
{UNUM_INTEGER_FIELD, 1, 14},
{UNUM_DECIMAL_SEPARATOR_FIELD, 14, 15},
{UNUM_FRACTION_FIELD, 15, 17}
{UNUM_FRACTION_FIELD, 15, 17},
{UNUM_MEASURE_UNIT_FIELD, 18, 19}
};
UFieldPosition actual;
for (int32_t i = 0; i < sizeof(expectedFields) / sizeof(*expectedFields); i++) {

View file

@ -918,6 +918,8 @@ group: number_representation
number_decimalquantity.o number_stringbuilder.o numparse_stringsegment.o number_utils.o
deps
decnumber double_conversion
# for trimming whitespace around fields
static_unicode_sets
# for data loading; that could be split off
resourcebundle
int_functions

View file

@ -71,6 +71,7 @@ class NumberFormatterApiTest : public IntlTest {
void locale();
void formatTypes();
void fieldPositionLogic();
void fieldPositionCoverage();
void toFormat();
void errors();
void validRanges();

View file

@ -85,6 +85,7 @@ void NumberFormatterApiTest::runIndexedTest(int32_t index, UBool exec, const cha
TESTCASE_AUTO(locale);
TESTCASE_AUTO(formatTypes);
TESTCASE_AUTO(fieldPositionLogic);
TESTCASE_AUTO(fieldPositionCoverage);
TESTCASE_AUTO(toFormat);
TESTCASE_AUTO(errors);
TESTCASE_AUTO(validRanges);
@ -2219,6 +2220,137 @@ void NumberFormatterApiTest::fieldPositionLogic() {
assertFalse(u"No fraction part in an integer", fmtd.nextFieldPosition(actual, status));
}
void NumberFormatterApiTest::fieldPositionCoverage() {
IcuTestErrorCode status(*this, "fieldPositionCoverage");
{
const char16_t* message = u"Measure unit field position basic";
FormattedNumber result = assertFormatSingle(
message,
u"measure-unit/temperature-fahrenheit",
NumberFormatter::with().unit(FAHRENHEIT),
Locale::getEnglish(),
68,
u"68°F");
static const UFieldPosition expectedFieldPositions[] = {
// field, begin index, end index
{UNUM_INTEGER_FIELD, 0, 2},
{UNUM_MEASURE_UNIT_FIELD, 2, 4}};
assertFieldPositions(
message,
result,
expectedFieldPositions,
sizeof(expectedFieldPositions)/sizeof(*expectedFieldPositions));
}
{
const char16_t* message = u"Measure unit field position with compound unit";
FormattedNumber result = assertFormatSingle(
message,
u"measure-unit/temperature-fahrenheit per-measure-unit/duration-day",
NumberFormatter::with().unit(FAHRENHEIT).perUnit(DAY),
Locale::getEnglish(),
68,
u"68°F/d");
static const UFieldPosition expectedFieldPositions[] = {
// field, begin index, end index
{UNUM_INTEGER_FIELD, 0, 2},
// coverage for old enum:
{DecimalFormat::kMeasureUnitField, 2, 6}};
assertFieldPositions(
message,
result,
expectedFieldPositions,
sizeof(expectedFieldPositions)/sizeof(*expectedFieldPositions));
}
{
const char16_t* message = u"Measure unit field position with spaces";
FormattedNumber result = assertFormatSingle(
message,
u"measure-unit/length-meter unit-width-full-name",
NumberFormatter::with().unit(METER).unitWidth(UNUM_UNIT_WIDTH_FULL_NAME),
Locale::getEnglish(),
68,
u"68 meters");
static const UFieldPosition expectedFieldPositions[] = {
// field, begin index, end index
{UNUM_INTEGER_FIELD, 0, 2},
// note: field starts after the space
{UNUM_MEASURE_UNIT_FIELD, 3, 9}};
assertFieldPositions(
message,
result,
expectedFieldPositions,
sizeof(expectedFieldPositions)/sizeof(*expectedFieldPositions));
}
{
const char16_t* message = u"Measure unit field position with prefix and suffix";
FormattedNumber result = assertFormatSingle(
message,
u"measure-unit/length-meter per-measure-unit/duration-second unit-width-full-name",
NumberFormatter::with().unit(METER).perUnit(SECOND).unitWidth(UNUM_UNIT_WIDTH_FULL_NAME),
"ky", // locale with the interesting data
68,
u"секундасына 68 метр");
static const UFieldPosition expectedFieldPositions[] = {
// field, begin index, end index
{UNUM_MEASURE_UNIT_FIELD, 0, 11},
{UNUM_INTEGER_FIELD, 12, 14},
{UNUM_MEASURE_UNIT_FIELD, 15, 19}};
assertFieldPositions(
message,
result,
expectedFieldPositions,
sizeof(expectedFieldPositions)/sizeof(*expectedFieldPositions));
}
{
const char16_t* message = u"Measure unit field position with inner spaces";
FormattedNumber result = assertFormatSingle(
message,
u"measure-unit/temperature-fahrenheit unit-width-full-name",
NumberFormatter::with().unit(FAHRENHEIT).unitWidth(UNUM_UNIT_WIDTH_FULL_NAME),
"vi", // locale with the interesting data
68,
u"68 độ F");
static const UFieldPosition expectedFieldPositions[] = {
// field, begin index, end index
{UNUM_INTEGER_FIELD, 0, 2},
// Should trim leading/trailing spaces, but not inner spaces:
{UNUM_MEASURE_UNIT_FIELD, 3, 7}};
assertFieldPositions(
message,
result,
expectedFieldPositions,
sizeof(expectedFieldPositions)/sizeof(*expectedFieldPositions));
}
{
// Data: other{"{0} K"} == "\u200E{0} K"
// If that data changes, try to find another example of a non-empty unit prefix/suffix
// that is also all ignorables (whitespace and bidi control marks).
const char16_t* message = u"Measure unit field position with fully ignorable prefix";
FormattedNumber result = assertFormatSingle(
message,
u"measure-unit/temperature-kelvin",
NumberFormatter::with().unit(KELVIN),
"fa", // locale with the interesting data
68,
u"‎۶۸ K");
static const UFieldPosition expectedFieldPositions[] = {
// field, begin index, end index
{UNUM_INTEGER_FIELD, 1, 3},
{UNUM_MEASURE_UNIT_FIELD, 4, 5}};
assertFieldPositions(
message,
result,
expectedFieldPositions,
sizeof(expectedFieldPositions)/sizeof(*expectedFieldPositions));
}
}
void NumberFormatterApiTest::toFormat() {
IcuTestErrorCode status(*this, "icuFormat");
LocalizedNumberFormatter lnf = NumberFormatter::withLocale("fr")

View file

@ -202,12 +202,11 @@ public class LongNameHandler implements MicroPropsGenerator, ModifierStore {
String[] simpleFormats = new String[ARRAY_LENGTH];
getMeasureData(locale, unit, width, simpleFormats);
// TODO: What field to use for units?
// TODO(ICU4J): Reduce the number of object creations here?
Map<StandardPlural, SimpleModifier> modifiers = new EnumMap<>(
StandardPlural.class);
LongNameHandler result = new LongNameHandler(modifiers, rules, parent);
result.simpleFormatsToModifiers(simpleFormats, null);
result.simpleFormatsToModifiers(simpleFormats, NumberFormat.Field.MEASURE_UNIT);
return result;
}
@ -239,11 +238,10 @@ public class LongNameHandler implements MicroPropsGenerator, ModifierStore {
.trim();
perUnitFormat = SimpleFormatterImpl.formatCompiledPattern(compiled, "{0}", secondaryString);
}
// TODO: What field to use for units?
Map<StandardPlural, SimpleModifier> modifiers = new EnumMap<>(
StandardPlural.class);
LongNameHandler result = new LongNameHandler(modifiers, rules, parent);
result.multiSimpleFormatsToModifiers(primaryData, perUnitFormat, null);
result.multiSimpleFormatsToModifiers(primaryData, perUnitFormat, NumberFormat.Field.MEASURE_UNIT);
return result;
}

View file

@ -9,8 +9,10 @@ import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.text.NumberFormat;
import com.ibm.icu.text.NumberFormat.Field;
import com.ibm.icu.text.UnicodeSet;
/**
* A StringBuilder optimized for number formatting. It implements the following key features beyond a
@ -384,7 +386,7 @@ public class NumberStringBuilder implements CharSequence {
return new String(chars, zero, length);
}
private static final Map<Field, Character> fieldToDebugChar = new HashMap<Field, Character>();
private static final Map<Field, Character> fieldToDebugChar = new HashMap<>();
static {
fieldToDebugChar.put(NumberFormat.Field.SIGN, '-');
@ -523,7 +525,12 @@ public class NumberStringBuilder implements CharSequence {
continue;
}
fp.setEndIndex(i - zero);
break;
// Trim ignorables (whitespace, etc.) from the edge of the field.
if (trimFieldPosition(fp)) {
break;
}
// This position was all ignorables; continue to the next position.
seenStart = false;
} else if (!seenStart && field == _field) {
fp.setBeginIndex(i - zero);
seenStart = true;
@ -552,22 +559,51 @@ public class NumberStringBuilder implements CharSequence {
if (current == NumberFormat.Field.INTEGER
&& field == NumberFormat.Field.GROUPING_SEPARATOR) {
// Special case: GROUPING_SEPARATOR counts as an INTEGER.
// TODO(ICU-13064): Grouping separator can be more than 1 code unit.
as.addAttribute(NumberFormat.Field.GROUPING_SEPARATOR,
NumberFormat.Field.GROUPING_SEPARATOR,
i,
i + 1);
} else if (current != field) {
if (current != null) {
as.addAttribute(current, current, currentStart, i);
FieldPosition fp = new FieldPosition(null);
fp.setBeginIndex(currentStart);
fp.setEndIndex(i);
if (trimFieldPosition(fp)) {
as.addAttribute(current, current, fp.getBeginIndex(), fp.getEndIndex());
}
}
current = field;
currentStart = i;
}
}
if (current != null) {
as.addAttribute(current, current, currentStart, length);
FieldPosition fp = new FieldPosition(null);
fp.setBeginIndex(currentStart);
fp.setEndIndex(length);
if (trimFieldPosition(fp)) {
as.addAttribute(current, current, fp.getBeginIndex(), fp.getEndIndex());
}
}
return as.getIterator();
}
private boolean trimFieldPosition(FieldPosition fp) {
// Trim ignorables from the back
int endIgnorablesIndex = StaticUnicodeSets.get(StaticUnicodeSets.Key.DEFAULT_IGNORABLES)
.spanBack(this, fp.getEndIndex(), UnicodeSet.SpanCondition.CONTAINED);
// Check if the entire segment is ignorables
if (endIgnorablesIndex <= fp.getBeginIndex()) {
return false;
}
fp.setEndIndex(endIgnorablesIndex);
// Trim ignorables from the front
int startIgnorablesIndex = StaticUnicodeSets.get(StaticUnicodeSets.Key.DEFAULT_IGNORABLES)
.span(this, fp.getBeginIndex(), UnicodeSet.SpanCondition.CONTAINED);
fp.setBeginIndex(startIgnorablesIndex);
return true;
}
}

View file

@ -1961,6 +1961,11 @@ public abstract class NumberFormat extends UFormat {
*/
public static final Field CURRENCY = new Field("currency");
/**
* @draft ICU 64
*/
public static final Field MEASURE_UNIT = new Field("measure unit");
/**
* Constructs a new instance of NumberFormat.Field with the given field
* name.

View file

@ -2196,6 +2196,129 @@ public class NumberFormatterApiTest {
assertFalse("No fraction part in an integer", fmtd.nextFieldPosition(actual));
}
@Test
public void fieldPositionCoverage() {
{
String message = "Measure unit field position basic";
FormattedNumber result = assertFormatSingle(
message,
"measure-unit/temperature-fahrenheit",
NumberFormatter.with().unit(MeasureUnit.FAHRENHEIT),
ULocale.ENGLISH,
68,
"68°F");
Object[][] expectedFieldPositions = new Object[][] {
// field, begin index, end index
{NumberFormat.Field.INTEGER, 0, 2},
{NumberFormat.Field.MEASURE_UNIT, 2, 4}};
assertFieldPositions(
message,
result,
expectedFieldPositions);
}
{
String message = "Measure unit field position with compound unit";
FormattedNumber result = assertFormatSingle(
message,
"measure-unit/temperature-fahrenheit per-measure-unit/duration-day",
NumberFormatter.with().unit(MeasureUnit.FAHRENHEIT).perUnit(MeasureUnit.DAY),
ULocale.ENGLISH,
68,
"68°F/d");
Object[][] expectedFieldPositions = new Object[][] {
// field, begin index, end index
{NumberFormat.Field.INTEGER, 0, 2},
{NumberFormat.Field.MEASURE_UNIT, 2, 6}};
assertFieldPositions(
message,
result,
expectedFieldPositions);
}
{
String message = "Measure unit field position with spaces";
FormattedNumber result = assertFormatSingle(
message,
"measure-unit/length-meter unit-width-full-name",
NumberFormatter.with().unit(MeasureUnit.METER).unitWidth(UnitWidth.FULL_NAME),
ULocale.ENGLISH,
68,
"68 meters");
Object[][] expectedFieldPositions = new Object[][] {
// field, begin index, end index
{NumberFormat.Field.INTEGER, 0, 2},
// note: field starts after the space
{NumberFormat.Field.MEASURE_UNIT, 3, 9}};
assertFieldPositions(
message,
result,
expectedFieldPositions);
}
{
String message = "Measure unit field position with prefix and suffix";
FormattedNumber result = assertFormatSingle(
message,
"measure-unit/length-meter per-measure-unit/duration-second unit-width-full-name",
NumberFormatter.with().unit(MeasureUnit.METER).perUnit(MeasureUnit.SECOND).unitWidth(UnitWidth.FULL_NAME),
new ULocale("ky"), // locale with the interesting data
68,
"секундасына 68 метр");
Object[][] expectedFieldPositions = new Object[][] {
// field, begin index, end index
{NumberFormat.Field.MEASURE_UNIT, 0, 11},
{NumberFormat.Field.INTEGER, 12, 14},
{NumberFormat.Field.MEASURE_UNIT, 15, 19}};
assertFieldPositions(
message,
result,
expectedFieldPositions);
}
{
String message = "Measure unit field position with inner spaces";
FormattedNumber result = assertFormatSingle(
message,
"measure-unit/temperature-fahrenheit unit-width-full-name",
NumberFormatter.with().unit(MeasureUnit.FAHRENHEIT).unitWidth(UnitWidth.FULL_NAME),
new ULocale("vi"), // locale with the interesting data
68,
"68 độ F");
Object[][] expectedFieldPositions = new Object[][] {
// field, begin index, end index
{NumberFormat.Field.INTEGER, 0, 2},
// Should trim leading/trailing spaces, but not inner spaces:
{NumberFormat.Field.MEASURE_UNIT, 3, 7}};
assertFieldPositions(
message,
result,
expectedFieldPositions);
}
{
// Data: other{"{0} K"} == "\u200E{0} K"
// If that data changes, try to find another example of a non-empty unit prefix/suffix
// that is also all ignorables (whitespace and bidi control marks).
String message = "Measure unit field position with fully ignorable prefix";
FormattedNumber result = assertFormatSingle(
message,
"measure-unit/temperature-kelvin",
NumberFormatter.with().unit(MeasureUnit.KELVIN),
new ULocale("fa"), // locale with the interesting data
68,
"‎۶۸ K");
Object[][] expectedFieldPositions = new Object[][] {
// field, begin index, end index
{NumberFormat.Field.INTEGER, 1, 3},
{NumberFormat.Field.MEASURE_UNIT, 4, 5}};
assertFieldPositions(
message,
result,
expectedFieldPositions);
}
}
/** Handler for serialization compatibility test suite. */
public static class FormatHandler implements SerializableTestUtility.Handler {
@Override