ICU-21123 Support unit inflections in ICU4C

See #1574
This commit is contained in:
Hugo van der Merwe 2021-02-17 15:58:16 +00:00
parent 66460b9fad
commit 1dbe70ac18
15 changed files with 670 additions and 98 deletions

View file

@ -288,6 +288,20 @@ Derived NumberFormatterSettings<Derived>::usage(const StringPiece usage)&& {
return move;
}
template<typename Derived>
Derived NumberFormatterSettings<Derived>::unitDisplayCase(const StringPiece unitDisplayCase) const& {
Derived copy(*this);
copy.fMacros.unitDisplayCase.set(unitDisplayCase);
return copy;
}
template<typename Derived>
Derived NumberFormatterSettings<Derived>::unitDisplayCase(const StringPiece unitDisplayCase)&& {
Derived move(std::move(*this));
move.fMacros.unitDisplayCase.set(unitDisplayCase);
return move;
}
template<typename Derived>
Derived NumberFormatterSettings<Derived>::padding(const Padder& padder) const& {
Derived copy(*this);

View file

@ -39,6 +39,7 @@ int32_t NumberFormatterImpl::formatStatic(const MacroProps &macros, UFormattedNu
int32_t length = writeNumber(micros, inValue, outString, 0, status);
length += writeAffixes(micros, outString, 0, length, status);
results->outputUnit = std::move(micros.outputUnit);
results->gender = micros.gender;
return length;
}
@ -63,6 +64,7 @@ int32_t NumberFormatterImpl::format(UFormattedNumberData *results, UErrorCode &s
int32_t length = writeNumber(micros, inValue, outString, 0, status);
length += writeAffixes(micros, outString, 0, length, status);
results->outputUnit = std::move(micros.outputUnit);
results->gender = micros.gender;
return length;
}
@ -177,6 +179,9 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe,
uprv_strncpy(fMicros.nsName, nsName, 8);
fMicros.nsName[8] = 0; // guarantee NUL-terminated
// Default gender: none.
fMicros.gender = "";
// Resolve the symbols. Do this here because currency may need to customize them.
if (macros.symbols.isDecimalFormatSymbols()) {
fMicros.symbols = macros.symbols.getDecimalFormatSymbols();
@ -246,7 +251,7 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe,
return nullptr;
}
auto usagePrefsHandler =
new UsagePrefsHandler(macros.locale, macros.unit, macros.usage.fUsage, chain, status);
new UsagePrefsHandler(macros.locale, macros.unit, macros.usage.fValue, chain, status);
fUsagePrefsHandler.adoptInsteadAndCheckErrorCode(usagePrefsHandler, status);
chain = fUsagePrefsHandler.getAlias();
} else if (isMixedUnit) {
@ -370,10 +375,14 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe,
// Outer modifier (CLDR units and currency long names)
if (isCldrUnit) {
StringPiece unitDisplayCase("");
if (macros.unitDisplayCase.isSet()) {
unitDisplayCase = macros.unitDisplayCase.fValue;
}
if (macros.usage.isSet()) {
fLongNameMultiplexer.adoptInsteadAndCheckErrorCode(
LongNameMultiplexer::forMeasureUnits(
macros.locale, *fUsagePrefsHandler->getOutputUnits(), unitWidth,
macros.locale, *fUsagePrefsHandler->getOutputUnits(), unitWidth, unitDisplayCase,
resolvePluralRules(macros.rules, macros.locale, status), chain, status),
status);
chain = fLongNameMultiplexer.getAlias();
@ -381,7 +390,7 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe,
fMixedUnitLongNameHandler.adoptInsteadAndCheckErrorCode(new MixedUnitLongNameHandler(),
status);
MixedUnitLongNameHandler::forMeasureUnit(
macros.locale, macros.unit, unitWidth,
macros.locale, macros.unit, unitWidth, unitDisplayCase,
resolvePluralRules(macros.rules, macros.locale, status), chain,
fMixedUnitLongNameHandler.getAlias(), status);
chain = fMixedUnitLongNameHandler.getAlias();
@ -391,7 +400,7 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe,
unit = unit.product(macros.perUnit.reciprocal(status), status);
}
fLongNameHandler.adoptInsteadAndCheckErrorCode(new LongNameHandler(), status);
LongNameHandler::forMeasureUnit(macros.locale, unit, unitWidth,
LongNameHandler::forMeasureUnit(macros.locale, unit, unitWidth, unitDisplayCase,
resolvePluralRules(macros.rules, macros.locale, status),
chain, fLongNameHandler.getAlias(), status);
chain = fLongNameHandler.getAlias();

View file

@ -46,6 +46,34 @@ constexpr int32_t GENDER_INDEX = StandardPlural::Form::COUNT + 2;
// Number of keys in the array populated by PluralTableSink.
constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 3;
// TODO(inflections): load this list from resources, after creating a "&set"
// function for use in ldml2icu rules.
const int32_t GENDER_COUNT = 7;
const char *gGenders[GENDER_COUNT] = {"animate", "common", "feminine", "inanimate",
"masculine", "neuter", "personal"};
const char *getGenderString(UnicodeString uGender, UErrorCode status) {
CharString gender;
gender.appendInvariantChars(uGender, status);
if (U_FAILURE(status)) {
return "";
}
int32_t first = 0;
int32_t last = GENDER_COUNT;
while (first < last) {
int32_t mid = (first + last) / 2;
int32_t cmp = uprv_strcmp(gender.data(), gGenders[mid]);
if (cmp == 0) {
return gGenders[mid];
} else if (cmp > 0) {
first = mid + 1;
} else if (cmp < 0) {
last = mid;
}
}
return "";
}
static int32_t getIndex(const char* pluralKeyword, UErrorCode& status) {
// pluralKeyword can also be "dnam", "per", or "gender"
switch (*pluralKeyword) {
@ -109,7 +137,6 @@ class PluralTableSink : public ResourceSink {
ResourceTable pluralsTable = value.getTable(status);
if (U_FAILURE(status)) { return; }
for (int32_t i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) {
// TODO(ICU-21123): Load the correct inflected form, possibly from the "case" structure.
if (uprv_strcmp(key, "case") == 0) {
continue;
}
@ -137,12 +164,19 @@ class PluralTableSink : public ResourceSink {
* UNUM_UNIT_WIDTH_NARROW), or just *unitsShort* (for width
* UNUM_UNIT_WIDTH_SHORT). For other widths, it reads just "units".
*
* @param unit must have a type and subtype (i.e. it must be a unit listed in
* gTypes and gSubTypes in measunit.cpp).
* @param unit must be a built-in unit, i.e. must have a type and subtype,
* listed in gTypes and gSubTypes in measunit.cpp.
* @param unitDisplayCase the empty string and "nominative" are treated the
* same. For other cases, strings for the requested case are used if found.
* (For any missing case-specific data, we fall back to nominative.)
* @param outArray must be of fixed length ARRAY_LENGTH.
*/
void getMeasureData(const Locale &locale, const MeasureUnit &unit, const UNumberUnitWidth &width,
UnicodeString *outArray, UErrorCode &status) {
void getMeasureData(const Locale &locale,
const MeasureUnit &unit,
const UNumberUnitWidth &width,
StringPiece unitDisplayCase,
UnicodeString *outArray,
UErrorCode &status) {
PluralTableSink sink(outArray);
LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
if (U_FAILURE(status)) { return; }
@ -159,6 +193,7 @@ void getMeasureData(const Locale &locale, const MeasureUnit &unit, const UNumber
CharString key;
key.append("units", status);
// TODO(icu-units#140): support gender for other unit widths.
if (width == UNUM_UNIT_WIDTH_NARROW) {
key.append("Narrow", status);
} else if (width == UNUM_UNIT_WIDTH_SHORT) {
@ -169,6 +204,23 @@ void getMeasureData(const Locale &locale, const MeasureUnit &unit, const UNumber
key.append("/", status);
key.append(subtypeForResource, status);
// Grab desired case first, if available. Then grab no-case data to fill in
// the gaps.
if (width == UNUM_UNIT_WIDTH_FULL_NAME && !unitDisplayCase.empty()) {
CharString caseKey;
caseKey.append(key, status);
caseKey.append("/case/", status);
caseKey.append(unitDisplayCase, status);
UErrorCode localStatus = U_ZERO_ERROR;
ures_getAllItemsWithFallback(unitsBundle.getAlias(), caseKey.data(), sink, localStatus);
// TODO(icu-units#138): our fallback logic is not spec-compliant: we
// check the given case, then go straight to the no-case data. The spec
// states we should first look for case="nominative". As part of #138,
// either get the spec changed, or add unit tests that warn us if
// case="nominative" data differs from no-case data?
}
UErrorCode localStatus = U_ZERO_ERROR;
ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus);
if (width == UNUM_UNIT_WIDTH_SHORT) {
@ -232,15 +284,156 @@ UnicodeString getPerUnitFormat(const Locale& locale, const UNumberUnitWidth &wid
return UnicodeString(ptr, len);
}
/**
* Loads and applies deriveComponent rules from CLDR's grammaticalFeatures.xml.
*
* Consider a deriveComponent rule that looks like this:
*
* <deriveComponent feature="case" structure="per" value0="compound" value1="nominative"/>
*
* Instantiating an instance as follows:
*
* DerivedComponents d(loc, "case", "per", "foo");
*
* Applying the rule in the XML element above, `d.value0()` will be "foo", and
* `d.value1()` will be "nominative".
*
* In case of any kind of failure, value0() and value1() will simply return "".
*/
class DerivedComponents {
public:
/**
* Constructor.
*
* The feature and structure parameters must be null-terminated. The string
* referenced by compoundValue must exist for longer than the
* DerivedComponents instance.
*/
DerivedComponents(const Locale &locale,
const char *feature,
const char *structure,
const StringPiece compoundValue) {
StackUResourceBundle derivationsBundle, stackBundle;
ures_openDirectFillIn(derivationsBundle.getAlias(), NULL, "grammaticalFeatures", &status);
ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(),
&status);
ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(),
&status);
if (U_FAILURE(status)) {
return;
}
UErrorCode localStatus = U_ZERO_ERROR;
// TODO: use standard normal locale resolution algorithms rather than just grabbing language:
ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(),
&localStatus);
// TODO:
// - code currently assumes if the locale exists, the rules are there -
// instead of falling back to root when the requested rule is missing.
// - investigate ures.h functions, see if one that uses res_findResource()
// might be better (or use res_findResource directly), or maybe help
// improve ures documentation to guide function selection?
if (localStatus == U_MISSING_RESOURCE_ERROR) {
ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status);
} else {
status = localStatus;
}
ures_getByKey(stackBundle.getAlias(), "component", stackBundle.getAlias(), &status);
ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status);
ures_getByKey(stackBundle.getAlias(), structure, stackBundle.getAlias(), &status);
UnicodeString val0 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 0, &status);
UnicodeString val1 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 1, &status);
if (U_SUCCESS(status)) {
if (val0.compare(UnicodeString(u"compound")) == 0) {
sp0 = compoundValue;
} else {
memory0.appendInvariantChars(val0, status);
sp0 = memory0.toStringPiece();
}
if (val1.compare(UnicodeString(u"compound")) == 0) {
sp1 = compoundValue;
} else {
memory1.appendInvariantChars(val1, status);
sp1 = memory1.toStringPiece();
}
}
}
// The returned StringPiece is only valid as long as both the instance
// exists, and the compoundValue passed to the constructor is valid.
StringPiece value0() const {
return sp0;
}
// The returned StringPiece is only valid as long as both the instance
// exists, and the compoundValue passed to the constructor is valid.
StringPiece value1() const {
return sp1;
}
private:
UErrorCode status = U_ZERO_ERROR;
// Holds strings referred to by value0 and value1;
CharString memory0, memory1;
StringPiece sp0, sp1;
};
UnicodeString
getDeriveCompoundRule(Locale locale, const char *feature, const char *structure, UErrorCode &status) {
StackUResourceBundle derivationsBundle, stackBundle;
ures_openDirectFillIn(derivationsBundle.getAlias(), NULL, "grammaticalFeatures", &status);
ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(),
&status);
ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), &status);
// TODO: use standard normal locale resolution algorithms rather than just grabbing language:
ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), &status);
// TODO:
// - code currently assumes if the locale exists, the rules are there -
// instead of falling back to root when the requested rule is missing.
// - investigate ures.h functions, see if one that uses res_findResource()
// might be better (or use res_findResource directly), or maybe help
// improve ures documentation to guide function selection?
if (status == U_MISSING_RESOURCE_ERROR) {
status = U_ZERO_ERROR;
ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status);
}
ures_getByKey(stackBundle.getAlias(), "compound", stackBundle.getAlias(), &status);
ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status);
return ures_getUnicodeStringByKey(stackBundle.getAlias(), structure, &status);
}
////////////////////////
/// END DATA LOADING ///
////////////////////////
// TODO: promote this somewhere? It's based on patternprops.cpp' trimWhitespace
const UChar *trimSpaceChars(const UChar *s, int32_t &length) {
if (length <= 0 || (!u_isJavaSpaceChar(s[0]) && !u_isJavaSpaceChar(s[length - 1]))) {
return s;
}
int32_t start = 0;
int32_t limit = length;
while (start < limit && u_isJavaSpaceChar(s[start])) {
++start;
}
if (start < limit) {
// There is non-white space at start; we will not move limit below that,
// so we need not test start<limit in the loop.
while (u_isJavaSpaceChar(s[limit - 1])) {
--limit;
}
}
length = limit - start;
return s + start;
}
} // namespace
void LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unitRef,
const UNumberUnitWidth &width, const PluralRules *rules,
const MicroPropsGenerator *parent, LongNameHandler *fillIn,
void LongNameHandler::forMeasureUnit(const Locale &loc,
const MeasureUnit &unitRef,
const UNumberUnitWidth &width,
StringPiece unitDisplayCase,
const PluralRules *rules,
const MicroPropsGenerator *parent,
LongNameHandler *fillIn,
UErrorCode &status) {
// Not valid for mixed units that aren't built-in units, and there should
// not be any built-in mixed units!
@ -253,6 +446,9 @@ void LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unitR
// "builtin-per-builtin".
// TODO(ICU-20941): support more generic case than builtin-per-builtin.
MeasureUnitImpl fullUnit = MeasureUnitImpl::forMeasureUnitMaybeCopy(unitRef, status);
if (U_FAILURE(status)) {
return;
}
MeasureUnitImpl unit;
MeasureUnitImpl perUnit;
for (int32_t i = 0; i < fullUnit.singleUnits.length(); i++) {
@ -265,12 +461,12 @@ void LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unitR
}
}
forCompoundUnit(loc, std::move(unit).build(status), std::move(perUnit).build(status), width,
rules, parent, fillIn, status);
unitDisplayCase, rules, parent, fillIn, status);
return;
}
UnicodeString simpleFormats[ARRAY_LENGTH];
getMeasureData(loc, unitRef, width, simpleFormats, status);
getMeasureData(loc, unitRef, width, unitDisplayCase, simpleFormats, status);
if (U_FAILURE(status)) {
return;
}
@ -278,12 +474,23 @@ void LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unitR
fillIn->parent = parent;
fillIn->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD},
status);
if (!simpleFormats[GENDER_INDEX].isBogus()) {
fillIn->gender = getGenderString(simpleFormats[GENDER_INDEX], status);
}
}
void LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit,
const MeasureUnit &perUnit, const UNumberUnitWidth &width,
const PluralRules *rules, const MicroPropsGenerator *parent,
LongNameHandler *fillIn, UErrorCode &status) {
void LongNameHandler::forCompoundUnit(const Locale &loc,
const MeasureUnit &unit,
const MeasureUnit &perUnit,
const UNumberUnitWidth &width,
StringPiece unitDisplayCase,
const PluralRules *rules,
const MicroPropsGenerator *parent,
LongNameHandler *fillIn,
UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
if (uprv_strcmp(unit.getType(), "") == 0 || uprv_strcmp(perUnit.getType(), "") == 0) {
// TODO(ICU-20941): Unsanctioned unit. Not yet fully supported. Set an
// error code. Once we support not-built-in units here, unitRef may be
@ -295,17 +502,24 @@ void LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit
status = U_INTERNAL_PROGRAM_ERROR;
return;
}
DerivedComponents derivedPerCases(loc, "case", "per", unitDisplayCase);
UnicodeString primaryData[ARRAY_LENGTH];
getMeasureData(loc, unit, width, primaryData, status);
getMeasureData(loc, unit, width, derivedPerCases.value0(), primaryData, status);
if (U_FAILURE(status)) {
return;
}
UnicodeString secondaryData[ARRAY_LENGTH];
getMeasureData(loc, perUnit, width, secondaryData, status);
getMeasureData(loc, perUnit, width, derivedPerCases.value1(), secondaryData, status);
if (U_FAILURE(status)) {
return;
}
// TODO(icu-units#139): implement these rules:
// <deriveComponent feature="plural" structure="per" ...>
// This has impact on multiSimpleFormatsToModifiers(...) below too.
// These rules are currently (ICU 69) all the same and hard-coded below.
UnicodeString perUnitFormat;
if (!secondaryData[PER_INDEX].isBogus()) {
perUnitFormat = secondaryData[PER_INDEX];
@ -314,7 +528,7 @@ void LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit
if (U_FAILURE(status)) {
return;
}
// rawPerUnitFormat is something like "{0}/{1}"; we need to substitute in the secondary unit.
// rawPerUnitFormat is something like "{0} per {1}"; we need to substitute in the secondary unit.
SimpleFormatter compiled(rawPerUnitFormat, 2, 2, status);
if (U_FAILURE(status)) {
return;
@ -328,7 +542,11 @@ void LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit
if (U_FAILURE(status)) {
return;
}
UnicodeString secondaryString = secondaryCompiled.getTextWithNoArguments().trim();
UnicodeString secondaryFormatString = secondaryCompiled.getTextWithNoArguments();
int32_t trimmedSecondaryLen = secondaryFormatString.length();
const UChar *trimmedSecondaryString =
trimSpaceChars(secondaryFormatString.getBuffer(), trimmedSecondaryLen);
UnicodeString secondaryString(false, trimmedSecondaryString, trimmedSecondaryLen);
// TODO: Why does UnicodeString need to be explicit in the following line?
compiled.format(UnicodeString(u"{0}"), secondaryString, perUnitFormat, status);
if (U_FAILURE(status)) {
@ -339,6 +557,24 @@ void LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit
fillIn->parent = parent;
fillIn->multiSimpleFormatsToModifiers(primaryData, perUnitFormat,
{UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
// Gender
UnicodeString uVal = getDeriveCompoundRule(loc, "gender", "per", status);
if (U_FAILURE(status)) {
return;
}
U_ASSERT(!uVal.isBogus() && uVal.length() == 1);
switch (uVal[0]) {
case u'0':
fillIn->gender = getGenderString(primaryData[GENDER_INDEX], status);
break;
case u'1':
fillIn->gender = getGenderString(secondaryData[GENDER_INDEX], status);
break;
default:
// Data error. Assert-fail in debug mode, else return no gender.
U_ASSERT(false);
}
}
UnicodeString LongNameHandler::getUnitDisplayName(
@ -350,7 +586,7 @@ UnicodeString LongNameHandler::getUnitDisplayName(
return ICU_Utility::makeBogusString();
}
UnicodeString simpleFormats[ARRAY_LENGTH];
getMeasureData(loc, unit, width, simpleFormats, status);
getMeasureData(loc, unit, width, "", simpleFormats, status);
return simpleFormats[DNAM_INDEX];
}
@ -364,7 +600,7 @@ UnicodeString LongNameHandler::getUnitPattern(
return ICU_Utility::makeBogusString();
}
UnicodeString simpleFormats[ARRAY_LENGTH];
getMeasureData(loc, unit, width, simpleFormats, status);
getMeasureData(loc, unit, width, "", simpleFormats, status);
// The above already handles fallback from other widths to short
if (U_FAILURE(status)) {
return ICU_Utility::makeBogusString();
@ -387,6 +623,7 @@ LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const
getCurrencyLongNameData(loc, currency, simpleFormats, status);
if (U_FAILURE(status)) { return nullptr; }
result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status);
// TODO(inflections): currency gender?
return result;
}
@ -426,29 +663,41 @@ void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps &mic
}
StandardPlural::Form pluralForm = utils::getPluralSafe(micros.rounder, rules, quantity, status);
micros.modOuter = &fModifiers[pluralForm];
micros.gender = gender;
}
const Modifier* LongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form plural) const {
return &fModifiers[plural];
}
void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &mixedUnit,
const UNumberUnitWidth &width, const PluralRules *rules,
void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc,
const MeasureUnit &mixedUnit,
const UNumberUnitWidth &width,
StringPiece unitDisplayCase,
const PluralRules *rules,
const MicroPropsGenerator *parent,
MixedUnitLongNameHandler *fillIn, UErrorCode &status) {
MixedUnitLongNameHandler *fillIn,
UErrorCode &status) {
U_ASSERT(mixedUnit.getComplexity(status) == UMEASURE_UNIT_MIXED);
U_ASSERT(fillIn != nullptr);
MeasureUnitImpl temp;
const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(mixedUnit, temp, status);
const MeasureUnitImpl &impl = MeasureUnitImpl::forMeasureUnit(mixedUnit, temp, status);
fillIn->fMixedUnitCount = impl.singleUnits.length();
fillIn->fMixedUnitData.adoptInstead(new UnicodeString[fillIn->fMixedUnitCount * ARRAY_LENGTH]);
for (int32_t i = 0; i < fillIn->fMixedUnitCount; i++) {
// Grab data for each of the components.
UnicodeString *unitData = &fillIn->fMixedUnitData[i * ARRAY_LENGTH];
getMeasureData(loc, impl.singleUnits[i]->build(status), width, unitData, status);
// TODO(CLDR-14502): check from the CLDR-14502 ticket whether this
// propagation of unitDisplayCase is correct:
getMeasureData(loc, impl.singleUnits[i]->build(status), width, unitDisplayCase, unitData,
status);
}
// TODO(icu-units#120): Make sure ICU doesn't output zero-valued
// high-magnitude fields
// * for mixed units count N, produce N listFormatters, one for each subset
// that might be formatted.
UListFormatterWidth listWidth = ULISTFMT_WIDTH_SHORT;
if (width == UNUM_UNIT_WIDTH_NARROW) {
listWidth = ULISTFMT_WIDTH_NARROW;
@ -458,6 +707,8 @@ void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUn
}
fillIn->fListFormatter.adoptInsteadAndCheckErrorCode(
ListFormatter::createInstance(loc, ULISTFMT_TYPE_UNITS, listWidth, status), status);
// TODO(ICU-21494): grab gender of each unit, calculate the gender
// associated with this list formatter, save it for later.
fillIn->rules = rules;
fillIn->parent = parent;
@ -541,7 +792,11 @@ const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity &
}
}
// TODO(ICU-21494): implement gender for lists of mixed units. Presumably we
// can set micros.gender to the gender associated with the list formatter in
// use below (once we have correct support for that). And then document this
// appropriately? "getMixedUnitModifier" doesn't sound like it would do
// something like this.
// Combine list into a "premixed" pattern
UnicodeString premixedFormatPattern;
@ -560,16 +815,19 @@ const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity &
const Modifier *MixedUnitLongNameHandler::getModifier(Signum /*signum*/,
StandardPlural::Form /*plural*/) const {
// TODO(units): investigate this method when investigating where
// LongNameHandler::getModifier() gets used. To be sure it remains
// ModifierStore::getModifier() gets used. To be sure it remains
// unreachable:
UPRV_UNREACHABLE;
return nullptr;
}
LongNameMultiplexer *
LongNameMultiplexer::forMeasureUnits(const Locale &loc, const MaybeStackVector<MeasureUnit> &units,
const UNumberUnitWidth &width, const PluralRules *rules,
const MicroPropsGenerator *parent, UErrorCode &status) {
LongNameMultiplexer *LongNameMultiplexer::forMeasureUnits(const Locale &loc,
const MaybeStackVector<MeasureUnit> &units,
const UNumberUnitWidth &width,
StringPiece unitDisplayCase,
const PluralRules *rules,
const MicroPropsGenerator *parent,
UErrorCode &status) {
LocalPointer<LongNameMultiplexer> result(new LongNameMultiplexer(parent), status);
if (U_FAILURE(status)) {
return nullptr;
@ -581,15 +839,16 @@ LongNameMultiplexer::forMeasureUnits(const Locale &loc, const MaybeStackVector<M
}
result->fMeasureUnits.adoptInstead(new MeasureUnit[units.length()]);
for (int32_t i = 0, length = units.length(); i < length; i++) {
const MeasureUnit& unit = *units[i];
const MeasureUnit &unit = *units[i];
result->fMeasureUnits[i] = unit;
if (unit.getComplexity(status) == UMEASURE_UNIT_MIXED) {
MixedUnitLongNameHandler *mlnh = result->fMixedUnitHandlers.createAndCheckErrorCode(status);
MixedUnitLongNameHandler::forMeasureUnit(loc, unit, width, rules, NULL, mlnh, status);
MixedUnitLongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL,
mlnh, status);
result->fHandlers[i] = mlnh;
} else {
LongNameHandler *lnh = result->fLongNameHandlers.createAndCheckErrorCode(status);
LongNameHandler::forMeasureUnit(loc, unit, width, rules, NULL, lnh, status);
LongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL, lnh, status);
result->fHandlers[i] = lnh;
}
if (U_FAILURE(status)) {

View file

@ -16,6 +16,8 @@
U_NAMESPACE_BEGIN namespace number {
namespace impl {
// LongNameHandler takes care of formatting currency and measurement unit names,
// as well as populating the gender of measure units.
class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public UMemory {
public:
static UnicodeString getUnitDisplayName(
@ -24,6 +26,8 @@ class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public
UNumberUnitWidth width,
UErrorCode& status);
// This function does not support inflections or other newer NumberFormatter
// features: it exists to support the older not-recommended MeasureFormat.
static UnicodeString getUnitPattern(
const Locale& loc,
const MeasureUnit& unit,
@ -47,13 +51,21 @@ class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public
* @param loc The desired locale.
* @param unitRef The measure unit to construct a LongNameHandler for.
* @param width Specifies the desired unit rendering.
* @param unitDisplayCase Specifies the desired grammatical case. The empty
* string and "nominative" are treated the same. For other cases,
* strings for the requested case are used if found. (For any missing
* case-specific data, we fall back to nominative.)
* @param rules Does not take ownership.
* @param parent Does not take ownership.
* @param fillIn Required.
*/
static void forMeasureUnit(const Locale &loc, const MeasureUnit &unitRef,
const UNumberUnitWidth &width, const PluralRules *rules,
const MicroPropsGenerator *parent, LongNameHandler *fillIn,
static void forMeasureUnit(const Locale &loc,
const MeasureUnit &unitRef,
const UNumberUnitWidth &width,
StringPiece unitDisplayCase,
const PluralRules *rules,
const MicroPropsGenerator *parent,
LongNameHandler *fillIn,
UErrorCode &status);
/**
@ -63,10 +75,6 @@ class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public
void
processQuantity(DecimalQuantity &quantity, MicroProps &micros, UErrorCode &status) const U_OVERRIDE;
// TODO(units): investigate whether we might run into Mixed Unit trouble
// with this. This override for ModifierStore::getModifier does not support
// mixed units: investigate under which circumstances it gets called (check
// both ImmutablePatternModifier and in NumberRangeFormatterImpl).
const Modifier* getModifier(Signum signum, StandardPlural::Form plural) const U_OVERRIDE;
private:
@ -76,6 +84,9 @@ class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public
const PluralRules *rules;
// Not owned
const MicroPropsGenerator *parent;
// Grammatical gender of the formatted result. Not owned: must point at
// static or global strings.
const char *gender = "";
LongNameHandler(const PluralRules *rules, const MicroPropsGenerator *parent)
: rules(rules), parent(parent) {
@ -94,9 +105,14 @@ class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public
// Fills in LongNameHandler fields for formatting compound units identified
// via `unit` and `perUnit`. Both `unit` and `perUnit` need to be built-in
// units (for which data exists).
static void forCompoundUnit(const Locale &loc, const MeasureUnit &unit, const MeasureUnit &perUnit,
const UNumberUnitWidth &width, const PluralRules *rules,
const MicroPropsGenerator *parent, LongNameHandler *fillIn,
static void forCompoundUnit(const Locale &loc,
const MeasureUnit &unit,
const MeasureUnit &perUnit,
const UNumberUnitWidth &width,
StringPiece unitDisplayCase,
const PluralRules *rules,
const MicroPropsGenerator *parent,
LongNameHandler *fillIn,
UErrorCode &status);
// Sets fModifiers to use the patterns from `simpleFormats`.
@ -126,13 +142,21 @@ class MixedUnitLongNameHandler : public MicroPropsGenerator, public ModifierStor
* @param mixedUnit The mixed measure unit to construct a
* MixedUnitLongNameHandler for.
* @param width Specifies the desired unit rendering.
* @param unitDisplayCase Specifies the desired grammatical case. The empty
* string and "nominative" are treated the same. For other cases,
* strings for the requested case are used if found. (For any missing
* case-specific data, we fall back to nominative.)
* @param rules Does not take ownership.
* @param parent Does not take ownership.
* @param fillIn Required.
*/
static void forMeasureUnit(const Locale &loc, const MeasureUnit &mixedUnit,
const UNumberUnitWidth &width, const PluralRules *rules,
const MicroPropsGenerator *parent, MixedUnitLongNameHandler *fillIn,
static void forMeasureUnit(const Locale &loc,
const MeasureUnit &mixedUnit,
const UNumberUnitWidth &width,
StringPiece unitDisplayCase,
const PluralRules *rules,
const MicroPropsGenerator *parent,
MixedUnitLongNameHandler *fillIn,
UErrorCode &status);
/**
@ -205,8 +229,11 @@ class LongNameMultiplexer : public MicroPropsGenerator, public UMemory {
// `units`. An individual unit might be a mixed unit.
static LongNameMultiplexer *forMeasureUnits(const Locale &loc,
const MaybeStackVector<MeasureUnit> &units,
const UNumberUnitWidth &width, const PluralRules *rules,
const MicroPropsGenerator *parent, UErrorCode &status);
const UNumberUnitWidth &width,
StringPiece unitDisplayCase,
const PluralRules *rules,
const MicroPropsGenerator *parent,
UErrorCode &status);
// The output unit must be provided via `micros.outputUnit`, it must match
// one of the units provided to the factory function.

View file

@ -83,6 +83,11 @@ struct MicroProps : public MicroPropsGenerator {
bool useCurrency;
char nsName[9];
// No ownership: must point at a string which will outlive MicroProps
// instances, e.g. a string with static storage duration, or just a string
// that will never be deallocated or modified.
const char *gender;
// Note: This struct has no direct ownership of the following pointers.
const DecimalFormatSymbols* symbols;

View file

@ -39,6 +39,11 @@ MeasureUnit FormattedNumber::getOutputUnit(UErrorCode& status) const {
return fData->outputUnit;
}
const char *FormattedNumber::getGender(UErrorCode &status) const {
UPRV_FORMATTED_VALUE_METHOD_GUARD("")
return fData->gender;
}
void FormattedNumber::getDecimalQuantity(impl::DecimalQuantity& output, UErrorCode& status) const {
UPRV_FORMATTED_VALUE_METHOD_GUARD(UPRV_NOARG)
output = fData->quantity;

View file

@ -204,7 +204,7 @@ class RoundingImpl {
* - see blueprint_helpers::parseIncrementOption().
*
* Referencing MacroProps means needing to pull in the .o files that have the
* destructors for the SymbolsWrapper, Usage, and Scale classes.
* destructors for the SymbolsWrapper, StringProp, and Scale classes.
*/
void parseIncrementOption(const StringSegment &segment, Precision &outPrecision, UErrorCode &status);

View file

@ -890,6 +890,10 @@ void GeneratorHelpers::generateSkeleton(const MacroProps& macros, UnicodeString&
status = U_UNSUPPORTED_ERROR;
return;
}
if (macros.unitDisplayCase.isSet()) {
status = U_UNSUPPORTED_ERROR;
return;
}
if (macros.affixProvider != nullptr) {
status = U_UNSUPPORTED_ERROR;
return;
@ -1512,7 +1516,7 @@ bool GeneratorHelpers::unit(const MacroProps& macros, UnicodeString& sb, UErrorC
bool GeneratorHelpers::usage(const MacroProps& macros, UnicodeString& sb, UErrorCode& /* status */) {
if (macros.usage.isSet()) {
sb.append(u"usage/", -1);
sb.append(UnicodeString(macros.usage.fUsage, -1, US_INV));
sb.append(UnicodeString(macros.usage.fValue, -1, US_INV));
return true;
}
return false;

View file

@ -28,79 +28,81 @@ using icu::StringSegment;
using icu::units::ConversionRates;
// Copy constructor
Usage::Usage(const Usage &other) : Usage() {
StringProp::StringProp(const StringProp &other) : StringProp() {
this->operator=(other);
}
// Copy assignment operator
Usage &Usage::operator=(const Usage &other) {
StringProp &StringProp::operator=(const StringProp &other) {
fLength = 0;
fError = other.fError;
if (fUsage != nullptr) {
uprv_free(fUsage);
fUsage = nullptr;
if (fValue != nullptr) {
uprv_free(fValue);
fValue = nullptr;
}
if (other.fUsage == nullptr) {
if (other.fValue == nullptr) {
return *this;
}
if (U_FAILURE(other.fError)) {
// We don't bother trying to allocating memory if we're in any case busy
// copying an errored Usage.
// copying an errored StringProp.
return *this;
}
fUsage = (char *)uprv_malloc(other.fLength + 1);
if (fUsage == nullptr) {
fValue = (char *)uprv_malloc(other.fLength + 1);
if (fValue == nullptr) {
fError = U_MEMORY_ALLOCATION_ERROR;
return *this;
}
fLength = other.fLength;
uprv_strncpy(fUsage, other.fUsage, fLength + 1);
uprv_strncpy(fValue, other.fValue, fLength + 1);
return *this;
}
// Move constructor
Usage::Usage(Usage &&src) U_NOEXCEPT : fUsage(src.fUsage), fLength(src.fLength), fError(src.fError) {
StringProp::StringProp(StringProp &&src) U_NOEXCEPT : fValue(src.fValue),
fLength(src.fLength),
fError(src.fError) {
// Take ownership away from src if necessary
src.fUsage = nullptr;
src.fValue = nullptr;
}
// Move assignment operator
Usage &Usage::operator=(Usage &&src) U_NOEXCEPT {
StringProp &StringProp::operator=(StringProp &&src) U_NOEXCEPT {
if (this == &src) {
return *this;
}
if (fUsage != nullptr) {
uprv_free(fUsage);
if (fValue != nullptr) {
uprv_free(fValue);
}
fUsage = src.fUsage;
fValue = src.fValue;
fLength = src.fLength;
fError = src.fError;
// Take ownership away from src if necessary
src.fUsage = nullptr;
src.fValue = nullptr;
return *this;
}
Usage::~Usage() {
if (fUsage != nullptr) {
uprv_free(fUsage);
fUsage = nullptr;
StringProp::~StringProp() {
if (fValue != nullptr) {
uprv_free(fValue);
fValue = nullptr;
}
}
void Usage::set(StringPiece value) {
if (fUsage != nullptr) {
uprv_free(fUsage);
fUsage = nullptr;
void StringProp::set(StringPiece value) {
if (fValue != nullptr) {
uprv_free(fValue);
fValue = nullptr;
}
fLength = value.length();
fUsage = (char *)uprv_malloc(fLength + 1);
if (fUsage == nullptr) {
fValue = (char *)uprv_malloc(fLength + 1);
if (fValue == nullptr) {
fLength = 0;
fError = U_MEMORY_ALLOCATION_ERROR;
return;
}
uprv_strncpy(fUsage, value.data(), fLength);
fUsage[fLength] = 0;
uprv_strncpy(fValue, value.data(), fLength);
fValue[fLength] = 0;
}
// Populates micros.mixedMeasures and modifies quantity, based on the values in

View file

@ -42,6 +42,9 @@ public:
// TODO(units,hugovdm): populate this correctly for the general case - it's
// currently only implemented for the .usage() use case.
MeasureUnit outputUnit;
// The gender of the formatted output.
const char *gender = "";
};

View file

@ -1131,33 +1131,35 @@ class U_I18N_API Scale : public UMemory {
namespace impl {
// Do not enclose entire Usage with #ifndef U_HIDE_INTERNAL_API, needed for a protected field
// Do not enclose entire StringProp with #ifndef U_HIDE_INTERNAL_API, needed for a protected field
/**
* Manages NumberFormatterSettings::usage()'s char* instance on the heap.
* @internal
*/
class U_I18N_API Usage : public UMemory {
class U_I18N_API StringProp : public UMemory {
#ifndef U_HIDE_INTERNAL_API
public:
/** @internal */
Usage(const Usage& other);
StringProp(const StringProp &other);
/** @internal */
Usage& operator=(const Usage& other);
StringProp &operator=(const StringProp &other);
/** @internal */
Usage(Usage &&src) U_NOEXCEPT;
StringProp(StringProp &&src) U_NOEXCEPT;
/** @internal */
Usage& operator=(Usage&& src) U_NOEXCEPT;
StringProp &operator=(StringProp &&src) U_NOEXCEPT;
/** @internal */
~Usage();
~StringProp();
/** @internal */
int16_t length() const { return fLength; }
int16_t length() const {
return fLength;
}
/** @internal
* Makes a copy of value. Set to "" to unset.
@ -1165,16 +1167,19 @@ class U_I18N_API Usage : public UMemory {
void set(StringPiece value);
/** @internal */
bool isSet() const { return fLength > 0; }
bool isSet() const {
return fLength > 0;
}
#endif // U_HIDE_INTERNAL_API
private:
char *fUsage;
char *fValue;
int16_t fLength;
UErrorCode fError;
Usage() : fUsage(nullptr), fLength(0), fError(U_ZERO_ERROR) {}
StringProp() : fValue(nullptr), fLength(0), fError(U_ZERO_ERROR) {
}
/** @internal */
UBool copyErrorTo(UErrorCode &status) const {
@ -1185,7 +1190,7 @@ class U_I18N_API Usage : public UMemory {
return false;
}
// Allow NumberFormatterImpl to access fUsage.
// Allow NumberFormatterImpl to access fValue.
friend class impl::NumberFormatterImpl;
// Allow skeleton generation code to access private members.
@ -1480,7 +1485,10 @@ struct U_I18N_API MacroProps : public UMemory {
Scale scale; // = Scale(); (benign value)
/** @internal */
Usage usage; // = Usage(); (no usage)
StringProp usage; // = StringProp(); (no usage)
/** @internal */
StringProp unitDisplayCase; // = StringProp(); (nominative)
/** @internal */
const AffixPatternProvider* affixProvider = nullptr; // no ownership
@ -1503,7 +1511,8 @@ struct U_I18N_API MacroProps : public UMemory {
bool copyErrorTo(UErrorCode &status) const {
return notation.copyErrorTo(status) || precision.copyErrorTo(status) ||
padder.copyErrorTo(status) || integerWidth.copyErrorTo(status) ||
symbols.copyErrorTo(status) || scale.copyErrorTo(status) || usage.copyErrorTo(status);
symbols.copyErrorTo(status) || scale.copyErrorTo(status) || usage.copyErrorTo(status) ||
unitDisplayCase.copyErrorTo(status);
}
};
@ -2169,6 +2178,21 @@ class U_I18N_API NumberFormatterSettings {
* @draft ICU 68
*/
Derived usage(StringPiece usage) &&;
/**
* Specifies the desired case for a unit formatter's output (e.g.
* accusative, dative, genitive).
*
* @internal ICU 69 technology preview
*/
Derived unitDisplayCase(StringPiece unitDisplayCase) const &;
/**
* Overload of unitDisplayCase() for use on an rvalue reference.
*
* @internal ICU 69 technology preview
*/
Derived unitDisplayCase(StringPiece unitDisplayCase) &&;
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_INTERNAL_API
@ -2658,6 +2682,14 @@ class U_I18N_API FormattedNumber : public UMemory, public FormattedValue {
* @draft ICU 68
*/
MeasureUnit getOutputUnit(UErrorCode& status) const;
/**
* Gets the gender of the formatted output. Returns "" when the gender is
* unknown, or for ungendered languages.
*
* @internal ICU 69 technology preview.
*/
const char *getGender(UErrorCode& status) const;
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_INTERNAL_API

View file

@ -1974,6 +1974,8 @@ UBool IntlTest::assertEquals(const char* message,
UBool IntlTest::assertEquals(const char* message,
const char* expected,
const char* actual) {
U_ASSERT(expected != nullptr);
U_ASSERT(actual != nullptr);
if (uprv_strcmp(expected, actual) != 0) {
errln((UnicodeString)"FAIL: " + message + "; got \"" +
actual +

View file

@ -64,6 +64,8 @@ class NumberFormatterApiTest : public IntlTestWithFieldPosition {
void unitUsageErrorCodes();
void unitUsageSkeletons();
void unitCurrency();
void unitInflections();
void unitGender();
void unitPercent();
void percentParity();
void roundingFraction();
@ -170,6 +172,19 @@ class NumberFormatterApiTest : public IntlTestWithFieldPosition {
const FormattedNumber& formattedNumber,
const UFieldPosition* expectedFieldPositions,
int32_t length);
struct UnitInflectionTestCase {
const char *locale;
const char *unitDisplayCase;
double value;
const UChar *expected;
};
void runUnitInflectionsTestCases(UnlocalizedNumberFormatter unf,
const UChar *skeleton,
const UChar *conciseSkeleton,
const UnitInflectionTestCase *cases,
int32_t numCases);
};
class DecimalQuantityTest : public IntlTest {

View file

@ -85,6 +85,8 @@ void NumberFormatterApiTest::runIndexedTest(int32_t index, UBool exec, const cha
TESTCASE_AUTO(unitUsageErrorCodes);
TESTCASE_AUTO(unitUsageSkeletons);
TESTCASE_AUTO(unitCurrency);
TESTCASE_AUTO(unitInflections);
TESTCASE_AUTO(unitGender);
TESTCASE_AUTO(unitPercent);
if (!quick) {
// Slow test: run in exhaustive mode only
@ -1926,6 +1928,197 @@ void NumberFormatterApiTest::unitCurrency() {
u"123,12 CN¥");
}
void NumberFormatterApiTest::runUnitInflectionsTestCases(UnlocalizedNumberFormatter unf,
const UChar *skeleton,
const UChar *conciseSkeleton,
const UnitInflectionTestCase *cases,
int32_t numCases) {
for (int32_t i = 0; i < numCases; i++) {
UnitInflectionTestCase t = cases[i];
const UChar *skel;
const UChar *cSkel;
if (t.unitDisplayCase == nullptr || t.unitDisplayCase[0] == 0) {
unf = unf.unitDisplayCase("");
skel = skeleton;
cSkel = conciseSkeleton;
} else {
unf = unf.unitDisplayCase(t.unitDisplayCase);
skel = nullptr;
cSkel = nullptr;
}
assertFormatSingle((UnicodeString("\"") + skeleton + u"\", locale=\"" + t.locale +
u"\", case=\"" + (t.unitDisplayCase ? t.unitDisplayCase : "") +
u"\", value=" + t.value)
.getTerminatedBuffer(),
skel, cSkel, unf, Locale(t.locale), t.value, t.expected);
}
}
void NumberFormatterApiTest::unitInflections() {
IcuTestErrorCode status(*this, "unitInflections");
UnlocalizedNumberFormatter unf;
const UChar *skeleton;
const UChar *conciseSkeleton;
{
// Simple inflected form test - test case based on the example in CLDR's
// grammaticalFeatures.xml
unf = NumberFormatter::with().unit(NoUnit::percent()).unitWidth(UNUM_UNIT_WIDTH_FULL_NAME);
skeleton = u"percent unit-width-full-name";
conciseSkeleton = u"% unit-width-full-name";
const UnitInflectionTestCase percentCases[] = {
{"ru", nullptr, 10, u"10 процентов"}, // many
{"ru", "genitive", 10, u"10 процентов"}, // many
{"ru", nullptr, 33, u"33 процента"}, // few
{"ru", "genitive", 33, u"33 процентов"}, // few
{"ru", nullptr, 1, u"1 процент"}, // one
{"ru", "genitive", 1, u"1 процента"}, // one
};
runUnitInflectionsTestCases(unf, skeleton, conciseSkeleton, percentCases,
UPRV_LENGTHOF(percentCases));
}
{
// Testing "de" rules:
// <deriveComponent feature="case" structure="per" value0="compound" value1="accusative"/>
// <deriveComponent feature="plural" structure="per" value0="compound" value1="one"/>
//
// per-patterns use accusative, but happen to match nominative, so we're
// not testing value1 in the first rule above.
unf = NumberFormatter::with().unit(MeasureUnit::getMeter()).unitWidth(UNUM_UNIT_WIDTH_FULL_NAME);
skeleton = u"unit/meter unit-width-full-name";
conciseSkeleton = u"unit/meter unit-width-full-name";
const UnitInflectionTestCase meterCases[] = {
{"de", nullptr, 1, u"1 Meter"},
{"de", "genitive", 1, u"1 Meters"},
{"de", nullptr, 2, u"2 Meter"},
{"de", "dative", 2, u"2 Metern"},
};
runUnitInflectionsTestCases(unf, skeleton, conciseSkeleton, meterCases,
UPRV_LENGTHOF(meterCases));
unf = NumberFormatter::with().unit(MeasureUnit::getDay()).unitWidth(UNUM_UNIT_WIDTH_FULL_NAME);
skeleton = u"unit/day unit-width-full-name";
conciseSkeleton = u"unit/day unit-width-full-name";
const UnitInflectionTestCase dayCases[] = {
{"de", nullptr, 1, u"1 Tag"},
{"de", "genitive", 1, u"1 Tages"},
{"de", nullptr, 2, u"2 Tage"},
{"de", "dative", 2, u"2 Tagen"},
};
runUnitInflectionsTestCases(unf, skeleton, conciseSkeleton, dayCases, UPRV_LENGTHOF(dayCases));
// Day has a perUnitPattern
unf = NumberFormatter::with()
.unit(MeasureUnit::forIdentifier("meter-per-day", status))
.unitWidth(UNUM_UNIT_WIDTH_FULL_NAME);
skeleton = u"unit/meter-per-day unit-width-full-name";
conciseSkeleton = u"unit/meter-per-day unit-width-full-name";
const UnitInflectionTestCase meterPerDayCases[] = {
{"de", nullptr, 1, u"1 Meter pro Tag"},
{"de", "genitive", 1, u"1 Meters pro Tag"},
{"de", nullptr, 2, u"2 Meter pro Tag"},
{"de", "dative", 2, u"2 Metern pro Tag"},
// testing code path that falls back to "root" but does not inflect:
{"af", nullptr, 1, u"1 meter per dag"},
{"af", "dative", 1, u"1 meter per dag"},
};
runUnitInflectionsTestCases(unf, skeleton, conciseSkeleton, meterPerDayCases,
UPRV_LENGTHOF(meterPerDayCases));
// Decade does not have a perUnitPattern at this time (CLDR 39 / ICU
// 69), so we can test for the correct form of the per part:
unf = NumberFormatter::with()
.unit(MeasureUnit::forIdentifier("parsec-per-decade", status))
.unitWidth(UNUM_UNIT_WIDTH_FULL_NAME);
skeleton = u"unit/parsec-per-decade unit-width-full-name";
conciseSkeleton = u"unit/parsec-per-decade unit-width-full-name";
// Fragile test cases: these cases will break when whitespace is more
// consistently applied.
const UnitInflectionTestCase parsecPerDecadeCases[] = {
{"de", nullptr, 1, u"1\u00A0Parsec pro Jahrzehnt"},
{"de", "genitive", 1, u"1 Parsec pro Jahrzehnt"},
{"de", nullptr, 2, u"2\u00A0Parsec pro Jahrzehnt"},
{"de", "dative", 2, u"2 Parsec pro Jahrzehnt"},
};
runUnitInflectionsTestCases(unf, skeleton, conciseSkeleton, parsecPerDecadeCases,
UPRV_LENGTHOF(parsecPerDecadeCases));
}
{
// Testing inflection of mixed units:
unf = NumberFormatter::with()
.unit(MeasureUnit::forIdentifier("meter-and-centimeter", status))
.unitWidth(UNUM_UNIT_WIDTH_FULL_NAME);
skeleton = u"unit/meter-and-centimeter unit-width-full-name";
conciseSkeleton = u"unit/meter-and-centimeter unit-width-full-name";
const UnitInflectionTestCase meterPerDayCases[] = {
// TODO(CLDR-14502): check that these inflections are correct, and
// whether CLDR needs any rules for them (presumably CLDR spec
// should mention it, if it's a consistent rule):
{"de", nullptr, 1.01, u"1 Meter, 1 Zentimeter"},
{"de", "genitive", 1.01, u"1 Meters, 1 Zentimeters"},
{"de", "genitive", 1.1, u"1 Meters, 10 Zentimeter"},
{"de", "dative", 1.1, u"1 Meter, 10 Zentimetern"},
{"de", "dative", 2.1, u"2 Metern, 10 Zentimetern"},
};
runUnitInflectionsTestCases(unf, skeleton, conciseSkeleton, meterPerDayCases,
UPRV_LENGTHOF(meterPerDayCases));
}
// TODO: add a usage case that selects between preferences with different
// genders (e.g. year, month, day, hour).
// TODO: look at "↑↑↑" cases: check that inheritance is done right.
}
void NumberFormatterApiTest::unitGender() {
IcuTestErrorCode status(*this, "unitGender");
const struct TestCase {
const char *locale;
const char *unitIdentifier;
const char *expectedGender;
} cases[] = {
{"de", "meter", "masculine"},
{"de", "minute", "feminine"},
{"de", "hour", "feminine"},
{"de", "day", "masculine"},
{"de", "year", "neuter"},
{"fr", "minute", "feminine"},
{"fr", "hour", "feminine"},
{"fr", "day", "masculine"},
// grammaticalFeatures deriveCompound "per" rule:
{"de", "meter-per-hour", "masculine"},
{"af", "meter-per-hour", ""},
// TODO(ICU-21494): determine whether list genders behave as follows,
// and implement proper getListGender support (covering more than just
// two genders):
// // gender rule for lists of people: de "neutral", fr "maleTaints"
// {"de", "day-and-hour-and-minute", "neuter"},
// {"de", "hour-and-minute", "feminine"},
// {"fr", "day-and-hour-and-minute", "masculine"},
// {"fr", "hour-and-minute", "feminine"},
};
LocalizedNumberFormatter formatter;
FormattedNumber fn;
for (const TestCase &t : cases) {
// TODO(icu-units#140): make this work for more than just UNUM_UNIT_WIDTH_FULL_NAME
formatter = NumberFormatter::with()
.unit(MeasureUnit::forIdentifier(t.unitIdentifier, status))
.unitWidth(UNUM_UNIT_WIDTH_FULL_NAME)
.locale(Locale(t.locale));
fn = formatter.formatDouble(1.1, status);
assertEquals(UnicodeString("Testing gender, unit: ") + t.unitIdentifier +
", locale: " + t.locale,
t.expectedGender, fn.getGender(status));
status.assertSuccess();
}
// Make sure getGender does not return garbage for genderless languages
formatter = NumberFormatter::with().locale(Locale::getEnglish());
fn = formatter.formatDouble(1.1, status);
status.assertSuccess();
assertEquals("getGender for a genderless language", "", fn.getGender(status));
}
void NumberFormatterApiTest::unitPercent() {
assertFormatDescending(
u"Percent",

View file

@ -320,6 +320,7 @@
//ldml/units/unitLength[@type="short"]/unit[@type="(\w++)-(%A)"]/displayName ; /unitsShort/$1/$2/dnam
//ldml/units/unitLength[@type="long"]/unit[@type="(\w++)-(%A)"]/displayName ; /units/$1/$2/dnam
# TODO(icu-units#138): homogenize with compoundUnitPattern1 rules below by using "_" as case when case is absent in XML.
//ldml/units/unitLength[@type="narrow"]/unit[@type="(\w++)-(%A)"]/unitPattern[@count="(%A)"][@case="(%A)"] ; /unitsNarrow/$1/$2/case/$4/$3
//ldml/units/unitLength[@type="short"]/unit[@type="(\w++)-(%A)"]/unitPattern[@count="(%A)"][@case="(%A)"] ; /unitsShort/$1/$2/case/$4/$3
//ldml/units/unitLength[@type="long"]/unit[@type="(\w++)-(%A)"]/unitPattern[@count="(%A)"][@case="(%A)"] ; /units/$1/$2/case/$4/$3
@ -338,6 +339,7 @@
//ldml/units/unitLength[@type="short"]/compoundUnit[@type="(%A)"]/compoundUnitPattern ; /unitsShort/compound/$1
//ldml/units/unitLength[@type="long"]/compoundUnit[@type="(%A)"]/compoundUnitPattern ; /units/compound/$1
# TODO(icu-units#138): the style of output paths used in these rules is the proposed format for all count/gender/case lateral inheritance rules.
//ldml/units/unitLength[@type="narrow"]/compoundUnit[@type="(%A)"]/compoundUnitPattern1[@count="(%A)"][@gender="(%A)"][@case="(%A)"] ; /unitsNarrow/compound/$1/$2/$3/$4
//ldml/units/unitLength[@type="short"]/compoundUnit[@type="(%A)"]/compoundUnitPattern1[@count="(%A)"][@gender="(%A)"][@case="(%A)"] ; /unitsShort/compound/$1/$2/$3/$4
//ldml/units/unitLength[@type="long"]/compoundUnit[@type="(%A)"]/compoundUnitPattern1[@count="(%A)"][@gender="(%A)"][@case="(%A)"] ; /units/compound/$1/$2/$3/$4