From c86a2bd7d79c9a221013d1d9b174898ef77e1f01 Mon Sep 17 00:00:00 2001 From: younies Date: Wed, 2 Mar 2022 10:44:43 +0000 Subject: [PATCH] ICU-21379 Add getNounClass and enum NounClass See #2002 --- icu4c/source/i18n/number_output.cpp | 43 +++ icu4c/source/i18n/unicode/numberformatter.h | 29 +- icu4c/source/i18n/unicode/unounclass.h | 43 +++ icu4c/source/test/intltest/numbertest.h | 1 + icu4c/source/test/intltest/numbertest_api.cpp | 302 +++++++++++++++++ .../com/ibm/icu/number/FormattedNumber.java | 50 ++- .../core/src/com/ibm/icu/util/NounClass.java | 20 ++ .../test/number/NumberFormatterApiTest.java | 305 ++++++++++++++++++ 8 files changed, 787 insertions(+), 6 deletions(-) create mode 100644 icu4c/source/i18n/unicode/unounclass.h create mode 100644 icu4j/main/classes/core/src/com/ibm/icu/util/NounClass.java diff --git a/icu4c/source/i18n/number_output.cpp b/icu4c/source/i18n/number_output.cpp index 2c2c25eaedb..78006da8c42 100644 --- a/icu4c/source/i18n/number_output.cpp +++ b/icu4c/source/i18n/number_output.cpp @@ -39,6 +39,49 @@ MeasureUnit FormattedNumber::getOutputUnit(UErrorCode& status) const { return fData->outputUnit; } +NounClass FormattedNumber::getNounClass(UErrorCode &status) const { + UPRV_FORMATTED_VALUE_METHOD_GUARD(NounClass::OTHER); + const char *nounClass = fData->gender; + + // if it is not exist, return `OTHER` + if (uprv_strcmp(nounClass, "") == 0) { + return NounClass::OTHER; + } + + if (uprv_strcmp(nounClass, "neuter") == 0) { + return NounClass::NEUTER; + } + + if (uprv_strcmp(nounClass, "feminine") == 0) { + return NounClass::FEMININE; + } + + if (uprv_strcmp(nounClass, "masculine") == 0) { + return NounClass::MASCULINE; + } + + if (uprv_strcmp(nounClass, "animate") == 0) { + return NounClass::ANIMATE; + } + + if (uprv_strcmp(nounClass, "inanimate") == 0) { + return NounClass::INANIMATE; + } + + if (uprv_strcmp(nounClass, "personal") == 0) { + return NounClass::PERSONAL; + } + + if (uprv_strcmp(nounClass, "common") == 0) { + return NounClass::COMMON; + } + + // In case there is no matching, this means there are noun classes + // that are not supported yet. + status = U_INTERNAL_PROGRAM_ERROR; + return NounClass::OTHER; +} + const char *FormattedNumber::getGender(UErrorCode &status) const { UPRV_FORMATTED_VALUE_METHOD_GUARD("") return fData->gender; diff --git a/icu4c/source/i18n/unicode/numberformatter.h b/icu4c/source/i18n/unicode/numberformatter.h index abe1579cb55..a345a00d4ee 100644 --- a/icu4c/source/i18n/unicode/numberformatter.h +++ b/icu4c/source/i18n/unicode/numberformatter.h @@ -22,6 +22,7 @@ #include "unicode/parseerr.h" #include "unicode/plurrule.h" #include "unicode/ucurr.h" +#include "unicode/unounclass.h" #include "unicode/unum.h" #include "unicode/unumberformatter.h" #include "unicode/uobject.h" @@ -2766,14 +2767,20 @@ class U_I18N_API FormattedNumber : public UMemory, public FormattedValue { */ MeasureUnit getOutputUnit(UErrorCode& status) const; -#ifndef U_HIDE_INTERNAL_API +#ifndef U_HIDE_DRAFT_API + /** - * Gets the gender of the formatted output. Returns "" when the gender is - * unknown, or for ungendered languages. + * Gets the noun class of the formatted output. Returns `OTHER` when the noun class + * is not supported yet. * - * @internal ICU 69 technology preview. + * @return `NounClass` + * @draft ICU 71. */ - const char *getGender(UErrorCode& status) const; + NounClass getNounClass(UErrorCode &status) const; + +#endif // U_HIDE_DRAFT_API + +#ifndef U_HIDE_INTERNAL_API /** * Gets the raw DecimalQuantity for plural rule selection. @@ -2789,6 +2796,18 @@ class U_I18N_API FormattedNumber : public UMemory, public FormattedValue { #endif /* U_HIDE_INTERNAL_API */ +#ifndef U_HIDE_DEPRECATED_API + + /** + * Gets the gender of the formatted output. Returns "" when the gender is + * unknown, or for ungendered languages. + * + * @deprecated This API is for ICU internal use only. + */ + const char *getGender(UErrorCode &status) const; + +#endif /* U_HIDE_DEPRECATED_API */ + private: // Can't use LocalPointer because UFormattedNumberData is forward-declared const impl::UFormattedNumberData *fData; diff --git a/icu4c/source/i18n/unicode/unounclass.h b/icu4c/source/i18n/unicode/unounclass.h new file mode 100644 index 00000000000..1721dbd584f --- /dev/null +++ b/icu4c/source/i18n/unicode/unounclass.h @@ -0,0 +1,43 @@ +// © 2022 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef __UNOUNCLASS_H__ +#define __UNOUNCLASS_H__ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/uversion.h" + +U_NAMESPACE_BEGIN + +#ifndef U_HIDE_DRAFT_API + +/** + * Represents all the grammatical noun classes that are supported by CLDR. + * + * @draft ICU 71. + */ +enum NounClass { + OTHER = 0, + NEUTER = 1, + FEMININE = 2, + MASCULINE = 3, + ANIMATE = 4, + INANIMATE = 5, + PERSONAL = 6, + COMMON = 7, +}; + +#endif // U_HIDE_DRAFT_API + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __UNOUNCLASS_H__ diff --git a/icu4c/source/test/intltest/numbertest.h b/icu4c/source/test/intltest/numbertest.h index 2e134d5a374..a39f177299a 100644 --- a/icu4c/source/test/intltest/numbertest.h +++ b/icu4c/source/test/intltest/numbertest.h @@ -66,6 +66,7 @@ class NumberFormatterApiTest : public IntlTestWithFieldPosition { void unitUsageSkeletons(); void unitCurrency(); void unitInflections(); + void unitNounClass(); void unitGender(); void unitNotConvertible(); void unitPercent(); diff --git a/icu4c/source/test/intltest/numbertest_api.cpp b/icu4c/source/test/intltest/numbertest_api.cpp index f981d31c5a8..a8f7326792e 100644 --- a/icu4c/source/test/intltest/numbertest_api.cpp +++ b/icu4c/source/test/intltest/numbertest_api.cpp @@ -87,6 +87,7 @@ void NumberFormatterApiTest::runIndexedTest(int32_t index, UBool exec, const cha TESTCASE_AUTO(unitUsageSkeletons); TESTCASE_AUTO(unitCurrency); TESTCASE_AUTO(unitInflections); + TESTCASE_AUTO(unitNounClass); TESTCASE_AUTO(unitGender); TESTCASE_AUTO(unitNotConvertible); TESTCASE_AUTO(unitPercent); @@ -2438,6 +2439,307 @@ void NumberFormatterApiTest::unitInflections() { // TODO: look at "↑↑↑" cases: check that inheritance is done right. } +using icu::NounClass; +void NumberFormatterApiTest::unitNounClass() { + IcuTestErrorCode status(*this, "unitNounClass"); + const struct TestCase { + const char *locale; + const char *unitIdentifier; + const NounClass expectedNounClass; + } cases[] = { + {"de", "inch", NounClass::MASCULINE}, + {"de", "yard", NounClass::NEUTER}, + {"de", "meter", NounClass::MASCULINE}, + {"de", "liter", NounClass::MASCULINE}, + {"de", "second", NounClass::FEMININE}, + {"de", "minute", NounClass::FEMININE}, + {"de", "hour", NounClass::FEMININE}, + {"de", "day", NounClass::MASCULINE}, + {"de", "year", NounClass::NEUTER}, + {"de", "gram", NounClass::NEUTER}, + {"de", "watt", NounClass::NEUTER}, + {"de", "bit", NounClass::NEUTER}, + {"de", "byte", NounClass::NEUTER}, + + {"fr", "inch", NounClass::MASCULINE}, + {"fr", "yard", NounClass::MASCULINE}, + {"fr", "meter", NounClass::MASCULINE}, + {"fr", "liter", NounClass::MASCULINE}, + {"fr", "second", NounClass::FEMININE}, + {"fr", "minute", NounClass::FEMININE}, + {"fr", "hour", NounClass::FEMININE}, + {"fr", "day", NounClass::MASCULINE}, + {"fr", "year", NounClass::MASCULINE}, + {"fr", "gram", NounClass::MASCULINE}, + + // grammaticalFeatures deriveCompound "per" rule takes the gender of the + // numerator unit: + {"de", "meter-per-hour", NounClass::MASCULINE}, + {"fr", "meter-per-hour", NounClass::MASCULINE}, + {"af", "meter-per-hour", NounClass::OTHER}, // ungendered language + + // French "times" takes gender from first value, German takes the + // second. Prefix and power does not have impact on gender for these + // languages: + {"de", "square-decimeter-square-second", NounClass::FEMININE}, + {"fr", "square-decimeter-square-second", NounClass::MASCULINE}, + + // TODO(icu-units#149): percent and permille bypasses LongNameHandler + // when unitWidth is not FULL_NAME: + // // Gender of per-second might be that of percent? TODO(icu-units#28) + // {"de", "percent", NounClass::NEUTER}, + // {"fr", "percent", NounClass::MASCULINE}, + + // Built-in units whose simple units lack gender in the CLDR data file + {"de", "kilopascal", NounClass::NEUTER}, + {"fr", "kilopascal", NounClass::MASCULINE}, + // {"de", "pascal", NounClass::OTHER}, + // {"fr", "pascal", NounClass::OTHER}, + + // Built-in units that lack gender in the CLDR data file + // {"de", "revolution", NounClass::OTHER}, + // {"de", "radian", NounClass::OTHER}, + // {"de", "arc-minute", NounClass::OTHER}, + // {"de", "arc-second", NounClass::OTHER}, + {"de", "square-yard", NounClass::NEUTER}, // POWER + {"de", "square-inch", NounClass::MASCULINE}, // POWER + // {"de", "dunam", NounClass::OTHER}, + // {"de", "karat", NounClass::OTHER}, + // {"de", "milligram-ofglucose-per-deciliter", NounClass::OTHER}, // COMPOUND, ofglucose + // {"de", "millimole-per-liter", NounClass::OTHER}, // COMPOUND, mole + // {"de", "permillion", NounClass::OTHER}, + // {"de", "permille", NounClass::OTHER}, + // {"de", "permyriad", NounClass::OTHER}, + // {"de", "mole", NounClass::OTHER}, + {"de", "liter-per-kilometer", NounClass::MASCULINE}, // COMPOUND + {"de", "petabyte", NounClass::NEUTER}, // PREFIX + {"de", "terabit", NounClass::NEUTER}, // PREFIX + // {"de", "century", NounClass::OTHER}, + // {"de", "decade", NounClass::OTHER}, + {"de", "millisecond", NounClass::FEMININE}, // PREFIX + {"de", "microsecond", NounClass::FEMININE}, // PREFIX + {"de", "nanosecond", NounClass::FEMININE}, // PREFIX + // {"de", "ampere", NounClass::OTHER}, + // {"de", "milliampere", NounClass::OTHER}, // PREFIX, ampere + // {"de", "ohm", NounClass::OTHER}, + // {"de", "calorie", NounClass::OTHER}, + // {"de", "kilojoule", NounClass::OTHER}, // PREFIX, joule + // {"de", "joule", NounClass::OTHER}, + {"de", "kilowatt-hour", NounClass::FEMININE}, // COMPOUND + // {"de", "electronvolt", NounClass::OTHER}, + // {"de", "british-thermal-unit", NounClass::OTHER}, + // {"de", "therm-us", NounClass::OTHER}, + // {"de", "pound-force", NounClass::OTHER}, + // {"de", "newton", NounClass::OTHER}, + // {"de", "gigahertz", NounClass::OTHER}, // PREFIX, hertz + // {"de", "megahertz", NounClass::OTHER}, // PREFIX, hertz + // {"de", "kilohertz", NounClass::OTHER}, // PREFIX, hertz + // {"de", "hertz", NounClass::OTHER}, + // {"de", "em", NounClass::OTHER}, + // {"de", "pixel", NounClass::OTHER}, + // {"de", "megapixel", NounClass::OTHER}, + // {"de", "pixel-per-centimeter", NounClass::OTHER}, // COMPOUND, pixel + // {"de", "pixel-per-inch", NounClass::OTHER}, // COMPOUND, pixel + // {"de", "dot-per-centimeter", NounClass::OTHER}, // COMPOUND, dot + // {"de", "dot-per-inch", NounClass::OTHER}, // COMPOUND, dot + // {"de", "dot", NounClass::OTHER}, + // {"de", "earth-radius", NounClass::OTHER}, + {"de", "decimeter", NounClass::MASCULINE}, // PREFIX + {"de", "micrometer", NounClass::MASCULINE}, // PREFIX + {"de", "nanometer", NounClass::MASCULINE}, // PREFIX + // {"de", "light-year", NounClass::OTHER}, + // {"de", "astronomical-unit", NounClass::OTHER}, + // {"de", "furlong", NounClass::OTHER}, + // {"de", "fathom", NounClass::OTHER}, + // {"de", "nautical-mile", NounClass::OTHER}, + // {"de", "mile-scandinavian", NounClass::OTHER}, + // {"de", "point", NounClass::OTHER}, + // {"de", "lux", NounClass::OTHER}, + // {"de", "candela", NounClass::OTHER}, + // {"de", "lumen", NounClass::OTHER}, + // {"de", "metric-ton", NounClass::OTHER}, + // {"de", "microgram", NounClass::NEUTER}, // PREFIX + // {"de", "ton", NounClass::OTHER}, + // {"de", "stone", NounClass::OTHER}, + // {"de", "ounce-troy", NounClass::OTHER}, + // {"de", "carat", NounClass::OTHER}, + {"de", "gigawatt", NounClass::NEUTER}, // PREFIX + {"de", "milliwatt", NounClass::NEUTER}, // PREFIX + // {"de", "horsepower", NounClass::OTHER}, + // {"de", "millimeter-ofhg", NounClass::OTHER}, + // {"de", "pound-force-per-square-inch", NounClass::OTHER}, // COMPOUND, pound-force + // {"de", "inch-ofhg", NounClass::OTHER}, + // {"de", "bar", NounClass::OTHER}, + // {"de", "millibar", NounClass::OTHER}, // PREFIX, bar + // {"de", "atmosphere", NounClass::OTHER}, + // {"de", "pascal", NounClass::OTHER}, // PREFIX, kilopascal? neuter? + // {"de", "hectopascal", NounClass::OTHER}, // PREFIX, pascal, neuter? + // {"de", "megapascal", NounClass::OTHER}, // PREFIX, pascal, neuter? + // {"de", "knot", NounClass::OTHER}, + {"de", "pound-force-foot", NounClass::MASCULINE}, // COMPOUND + {"de", "newton-meter", NounClass::MASCULINE}, // COMPOUND + {"de", "cubic-kilometer", NounClass::MASCULINE}, // POWER + {"de", "cubic-yard", NounClass::NEUTER}, // POWER + {"de", "cubic-inch", NounClass::MASCULINE}, // POWER + {"de", "megaliter", NounClass::MASCULINE}, // PREFIX + {"de", "hectoliter", NounClass::MASCULINE}, // PREFIX + // {"de", "pint-metric", NounClass::OTHER}, + // {"de", "cup-metric", NounClass::OTHER}, + {"de", "acre-foot", NounClass::MASCULINE}, // COMPOUND + // {"de", "bushel", NounClass::OTHER}, + // {"de", "barrel", NounClass::OTHER}, + // Units missing gender in German also misses gender in French: + // {"fr", "revolution", NounClass::OTHER}, + // {"fr", "radian", NounClass::OTHER}, + // {"fr", "arc-minute", NounClass::OTHER}, + // {"fr", "arc-second", NounClass::OTHER}, + {"fr", "square-yard", NounClass::MASCULINE}, // POWER + {"fr", "square-inch", NounClass::MASCULINE}, // POWER + // {"fr", "dunam", NounClass::OTHER}, + // {"fr", "karat", NounClass::OTHER}, + {"fr", "milligram-ofglucose-per-deciliter", NounClass::MASCULINE}, // COMPOUND + // {"fr", "millimole-per-liter", NounClass::OTHER}, // COMPOUND, mole + // {"fr", "permillion", NounClass::OTHER}, + // {"fr", "permille", NounClass::OTHER}, + // {"fr", "permyriad", NounClass::OTHER}, + // {"fr", "mole", NounClass::OTHER}, + {"fr", "liter-per-kilometer", NounClass::MASCULINE}, // COMPOUND + // {"fr", "petabyte", NounClass::OTHER}, // PREFIX + // {"fr", "terabit", NounClass::OTHER}, // PREFIX + // {"fr", "century", NounClass::OTHER}, + // {"fr", "decade", NounClass::OTHER}, + {"fr", "millisecond", NounClass::FEMININE}, // PREFIX + {"fr", "microsecond", NounClass::FEMININE}, // PREFIX + {"fr", "nanosecond", NounClass::FEMININE}, // PREFIX + // {"fr", "ampere", NounClass::OTHER}, + // {"fr", "milliampere", NounClass::OTHER}, // PREFIX, ampere + // {"fr", "ohm", NounClass::OTHER}, + // {"fr", "calorie", NounClass::OTHER}, + // {"fr", "kilojoule", NounClass::OTHER}, // PREFIX, joule + // {"fr", "joule", NounClass::OTHER}, + // {"fr", "kilowatt-hour", NounClass::OTHER}, // COMPOUND + // {"fr", "electronvolt", NounClass::OTHER}, + // {"fr", "british-thermal-unit", NounClass::OTHER}, + // {"fr", "therm-us", NounClass::OTHER}, + // {"fr", "pound-force", NounClass::OTHER}, + // {"fr", "newton", NounClass::OTHER}, + // {"fr", "gigahertz", NounClass::OTHER}, // PREFIX, hertz + // {"fr", "megahertz", NounClass::OTHER}, // PREFIX, hertz + // {"fr", "kilohertz", NounClass::OTHER}, // PREFIX, hertz + // {"fr", "hertz", NounClass::OTHER}, + // {"fr", "em", NounClass::OTHER}, + // {"fr", "pixel", NounClass::OTHER}, + // {"fr", "megapixel", NounClass::OTHER}, + // {"fr", "pixel-per-centimeter", NounClass::OTHER}, // COMPOUND, pixel + // {"fr", "pixel-per-inch", NounClass::OTHER}, // COMPOUND, pixel + // {"fr", "dot-per-centimeter", NounClass::OTHER}, // COMPOUND, dot + // {"fr", "dot-per-inch", NounClass::OTHER}, // COMPOUND, dot + // {"fr", "dot", NounClass::OTHER}, + // {"fr", "earth-radius", NounClass::OTHER}, + {"fr", "decimeter", NounClass::MASCULINE}, // PREFIX + {"fr", "micrometer", NounClass::MASCULINE}, // PREFIX + {"fr", "nanometer", NounClass::MASCULINE}, // PREFIX + // {"fr", "light-year", NounClass::OTHER}, + // {"fr", "astronomical-unit", NounClass::OTHER}, + // {"fr", "furlong", NounClass::OTHER}, + // {"fr", "fathom", NounClass::OTHER}, + // {"fr", "nautical-mile", NounClass::OTHER}, + // {"fr", "mile-scandinavian", NounClass::OTHER}, + // {"fr", "point", NounClass::OTHER}, + // {"fr", "lux", NounClass::OTHER}, + // {"fr", "candela", NounClass::OTHER}, + // {"fr", "lumen", NounClass::OTHER}, + // {"fr", "metric-ton", NounClass::OTHER}, + // {"fr", "microgram", NounClass::MASCULINE}, // PREFIX + // {"fr", "ton", NounClass::OTHER}, + // {"fr", "stone", NounClass::OTHER}, + // {"fr", "ounce-troy", NounClass::OTHER}, + // {"fr", "carat", NounClass::OTHER}, + // {"fr", "gigawatt", NounClass::OTHER}, // PREFIX + // {"fr", "milliwatt", NounClass::OTHER}, + // {"fr", "horsepower", NounClass::OTHER}, + {"fr", "millimeter-ofhg", NounClass::MASCULINE}, + // {"fr", "pound-force-per-square-inch", NounClass::OTHER}, // COMPOUND, pound-force + {"fr", "inch-ofhg", NounClass::MASCULINE}, + // {"fr", "bar", NounClass::OTHER}, + // {"fr", "millibar", NounClass::OTHER}, // PREFIX, bar + // {"fr", "atmosphere", NounClass::OTHER}, + // {"fr", "pascal", NounClass::OTHER}, // PREFIX, kilopascal? + // {"fr", "hectopascal", NounClass::OTHER}, // PREFIX, pascal + // {"fr", "megapascal", NounClass::OTHER}, // PREFIX, pascal + // {"fr", "knot", NounClass::OTHER}, + // {"fr", "pound-force-foot", NounClass::OTHER}, + // {"fr", "newton-meter", NounClass::OTHER}, + {"fr", "cubic-kilometer", NounClass::MASCULINE}, // POWER + {"fr", "cubic-yard", NounClass::MASCULINE}, // POWER + {"fr", "cubic-inch", NounClass::MASCULINE}, // POWER + {"fr", "megaliter", NounClass::MASCULINE}, // PREFIX + {"fr", "hectoliter", NounClass::MASCULINE}, // PREFIX + // {"fr", "pint-metric", NounClass::OTHER}, + // {"fr", "cup-metric", NounClass::OTHER}, + {"fr", "acre-foot", NounClass::FEMININE}, // COMPOUND + // {"fr", "bushel", NounClass::OTHER}, + // {"fr", "barrel", NounClass::OTHER}, + // Some more French units missing gender: + // {"fr", "degree", NounClass::OTHER}, + {"fr", "square-meter", NounClass::MASCULINE}, // POWER + // {"fr", "terabyte", NounClass::OTHER}, // PREFIX, byte + // {"fr", "gigabyte", NounClass::OTHER}, // PREFIX, byte + // {"fr", "gigabit", NounClass::OTHER}, // PREFIX, bit + // {"fr", "megabyte", NounClass::OTHER}, // PREFIX, byte + // {"fr", "megabit", NounClass::OTHER}, // PREFIX, bit + // {"fr", "kilobyte", NounClass::OTHER}, // PREFIX, byte + // {"fr", "kilobit", NounClass::OTHER}, // PREFIX, bit + // {"fr", "byte", NounClass::OTHER}, + // {"fr", "bit", NounClass::OTHER}, + // {"fr", "volt", NounClass::OTHER}, + // {"fr", "watt", NounClass::OTHER}, + {"fr", "cubic-meter", NounClass::MASCULINE}, // POWER + + // gender-lacking builtins within compound units + {"de", "newton-meter-per-second", NounClass::MASCULINE}, + + // TODO(ICU-21494): determine whether list genders behave as follows, + // and implement proper getListGender support (covering more than just + // two genders): + // // gender rule for lists of people: de "neutral", fr "maleTaints" + // {"de", "day-and-hour-and-minute", NounClass::NEUTER}, + // {"de", "hour-and-minute", NounClass::FEMININE}, + // {"fr", "day-and-hour-and-minute", NounClass::MASCULINE}, + // {"fr", "hour-and-minute", NounClass::FEMININE}, + }; + + LocalizedNumberFormatter formatter; + FormattedNumber fn; + for (const TestCase &t : cases) { + formatter = NumberFormatter::with() + .unit(MeasureUnit::forIdentifier(t.unitIdentifier, status)) + .locale(Locale(t.locale)); + fn = formatter.formatDouble(1.1, status); + assertEquals(UnicodeString("Testing NounClass with default width, unit: ") + t.unitIdentifier + + ", locale: " + t.locale, + t.expectedNounClass, fn.getNounClass(status)); + status.assertSuccess(); + + formatter = NumberFormatter::with() + .unit(MeasureUnit::forIdentifier(t.unitIdentifier, status)) + .unitWidth(UNUM_UNIT_WIDTH_FULL_NAME) + .locale(Locale(t.locale)); + fn = formatter.formatDouble(1.1, status); + assertEquals(UnicodeString("Testing NounClass with UNUM_UNIT_WIDTH_FULL_NAME, unit: ") + + t.unitIdentifier + ", locale: " + t.locale, + t.expectedNounClass, fn.getNounClass(status)); + status.assertSuccess(); + } + + // Make sure getNounClass does not return garbage for languages without noun classes. + formatter = NumberFormatter::with().locale(Locale::getEnglish()); + fn = formatter.formatDouble(1.1, status); + status.assertSuccess(); + assertEquals("getNounClasses for a not supported language", NounClass::OTHER, + fn.getNounClass(status)); +} + void NumberFormatterApiTest::unitGender() { IcuTestErrorCode status(*this, "unitGender"); diff --git a/icu4j/main/classes/core/src/com/ibm/icu/number/FormattedNumber.java b/icu4j/main/classes/core/src/com/ibm/icu/number/FormattedNumber.java index 258788f8294..3782b9af5b5 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/number/FormattedNumber.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/number/FormattedNumber.java @@ -13,6 +13,8 @@ import com.ibm.icu.text.ConstrainedFieldPosition; import com.ibm.icu.text.FormattedValue; import com.ibm.icu.text.PluralRules.IFixedDecimal; import com.ibm.icu.util.MeasureUnit; +import com.ibm.icu.util.NounClass; +import com.ibm.icu.util.UResourceTypeMismatchException; /** * The result of a number formatting operation. This class allows the result to be exported in several @@ -136,11 +138,57 @@ public class FormattedNumber implements FormattedValue { return this.outputUnit; } + /** + * Gets the noun class of the formatted output. Returns `OTHER` when the noun class + * is not supported yet. + * + * @throws UResourceTypeMismatchException + * @return `NounClass` + * @draft ICU 71. + */ + public NounClass getNounClass() { + // if it is not exist, return `OTHER` + if (this.gender == null || this.gender.isEmpty()) { + return NounClass.OTHER; + } + + if (this.gender.equals("neuter")) { + return NounClass.NEUTER; + } + + if (this.gender.equals("feminine")) { + return NounClass.FEMININE; + } + + if (this.gender.equals("masculine")) { + return NounClass.MASCULINE; + } + + if (this.gender.equals("animate")) { + return NounClass.ANIMATE; + } + + if (this.gender.equals("inanimate")) { + return NounClass.INANIMATE; + } + + if (this.gender.equals("personal")) { + return NounClass.PERSONAL; + } + + if (this.gender.equals("common")) { + return NounClass.COMMON; + } + + // In case there is no matching. + throw new UResourceTypeMismatchException("there are noun classes that are not supported yet"); + } + /** * The gender of the formatted output. * * @internal ICU 69 technology preview - * @deprecated This API is for technology preview only. + * @deprecated This API is for ICU internal use only. */ @Deprecated public String getGender() { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/NounClass.java b/icu4j/main/classes/core/src/com/ibm/icu/util/NounClass.java new file mode 100644 index 00000000000..c5c643225db --- /dev/null +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/NounClass.java @@ -0,0 +1,20 @@ +// © 2022 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +package com.ibm.icu.util; + +/** + * Represents all the grammatical noun classes that are supported by CLDR. + * + * @draft ICU 71. + */ +public enum NounClass { + OTHER, + NEUTER, + FEMININE, + MASCULINE, + ANIMATE, + INANIMATE, + PERSONAL, + COMMON, +} diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java index 0f1fff855a0..25cbe3480de 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java @@ -55,6 +55,7 @@ import com.ibm.icu.util.Currency.CurrencyUsage; import com.ibm.icu.util.CurrencyAmount; import com.ibm.icu.util.Measure; import com.ibm.icu.util.MeasureUnit; +import com.ibm.icu.util.NounClass; import com.ibm.icu.util.NoUnit; import com.ibm.icu.util.ULocale; @@ -2428,6 +2429,310 @@ public class NumberFormatterApiTest extends TestFmwk { // TODO: look at "↑↑↑" cases: check that inheritance is done right. } + @Test + public void unitNounClass() { + class TestCase { + public String locale; + public String unitIdentifier; + public NounClass expectedNounClass; + + public TestCase(String locale, String unitIdentifier, NounClass expectedNounClass) { + this.locale = locale; + this.unitIdentifier = unitIdentifier; + this.expectedNounClass = expectedNounClass; + } + } + + TestCase cases[] = { + new TestCase("de", "inch", NounClass.MASCULINE), // + new TestCase("de", "yard", NounClass.NEUTER), // + new TestCase("de", "meter", NounClass.MASCULINE), // + new TestCase("de", "liter", NounClass.MASCULINE), // + new TestCase("de", "second", NounClass.FEMININE), // + new TestCase("de", "minute", NounClass.FEMININE), // + new TestCase("de", "hour", NounClass.FEMININE), // + new TestCase("de", "day", NounClass.MASCULINE), // + new TestCase("de", "year", NounClass.NEUTER), // + new TestCase("de", "gram", NounClass.NEUTER), // + new TestCase("de", "watt", NounClass.NEUTER), // + new TestCase("de", "bit", NounClass.NEUTER), // + new TestCase("de", "byte", NounClass.NEUTER), // + + new TestCase("fr", "inch", NounClass.MASCULINE), // + new TestCase("fr", "yard", NounClass.MASCULINE), // + new TestCase("fr", "meter", NounClass.MASCULINE), // + new TestCase("fr", "liter", NounClass.MASCULINE), // + new TestCase("fr", "second", NounClass.FEMININE), // + new TestCase("fr", "minute", NounClass.FEMININE), // + new TestCase("fr", "hour", NounClass.FEMININE), // + new TestCase("fr", "day", NounClass.MASCULINE), // + new TestCase("fr", "year", NounClass.MASCULINE), // + new TestCase("fr", "gram", NounClass.MASCULINE), // + + // grammaticalFeatures deriveCompound "per" rule takes the gender of the + // numerator unit: + new TestCase("de", "meter-per-hour", NounClass.MASCULINE), + new TestCase("fr", "meter-per-hour", NounClass.MASCULINE), + new TestCase("af", "meter-per-hour", NounClass.OTHER), // ungendered language + + // French "times" takes gender from first value, German takes the + // second. Prefix and power does not have impact on gender for these + // languages: + new TestCase("de", "square-decimeter-square-second", NounClass.FEMININE), + new TestCase("fr", "square-decimeter-square-second", NounClass.MASCULINE), + + // TODO(icu-units#149): percent and permille bypasses + // LongNameHandler when unitWidth is not FULL_NAME: + // // Gender of per-second might be that of percent? TODO(icu-units#28) + // new TestCase("de", "percent", NounClass.NEUTER), // + // new TestCase("fr", "percent", NounClass.MASCULINE), // + + // Built-in units whose simple units lack gender in the CLDR data file + new TestCase("de", "kilopascal", NounClass.NEUTER), // + new TestCase("fr", "kilopascal", NounClass.MASCULINE), // + // new TestCase("de", "pascal", NounClass.OTHER), // + // new TestCase("fr", "pascal", NounClass.OTHER), // + + // Built-in units that lack gender in the CLDR data file + // new TestCase("de", "revolution", NounClass.OTHER), // + // new TestCase("de", "radian", NounClass.OTHER), // + // new TestCase("de", "arc-minute", NounClass.OTHER), // + // new TestCase("de", "arc-second", NounClass.OTHER), // + new TestCase("de", "square-yard", NounClass.NEUTER), // COMPOUND + new TestCase("de", "square-inch", NounClass.MASCULINE), // COMPOUND + // new TestCase("de", "dunam", NounClass.OTHER), // + // new TestCase("de", "karat", NounClass.OTHER), // + // new TestCase("de", "milligram-ofglucose-per-deciliter", NounClass.OTHER), // COMPOUND, ofglucose + // new TestCase("de", "millimole-per-liter", NounClass.OTHER), // COMPOUND, mole + // new TestCase("de", "permillion", NounClass.OTHER), // + // new TestCase("de", "permille", NounClass.OTHER), // + // new TestCase("de", "permyriad", NounClass.OTHER), // + // new TestCase("de", "mole", NounClass.OTHER), // + new TestCase("de", "liter-per-kilometer", NounClass.MASCULINE), // COMPOUND + new TestCase("de", "petabyte", NounClass.NEUTER), // PREFIX + new TestCase("de", "terabit", NounClass.NEUTER), // PREFIX + // new TestCase("de", "century", NounClass.OTHER), // + // new TestCase("de", "decade", NounClass.OTHER), // + new TestCase("de", "millisecond", NounClass.FEMININE), // PREFIX + new TestCase("de", "microsecond", NounClass.FEMININE), // PREFIX + new TestCase("de", "nanosecond", NounClass.FEMININE), // PREFIX + // new TestCase("de", "ampere", NounClass.OTHER), // + // new TestCase("de", "milliampere", NounClass.OTHER), // PREFIX, ampere + // new TestCase("de", "ohm", NounClass.OTHER), // + // new TestCase("de", "calorie", NounClass.OTHER), // + // new TestCase("de", "kilojoule", NounClass.OTHER), // PREFIX, joule + // new TestCase("de", "joule", NounClass.OTHER), // + new TestCase("de", "kilowatt-hour", NounClass.FEMININE), // COMPOUND + // new TestCase("de", "electronvolt", NounClass.OTHER), // + // new TestCase("de", "british-thermal-unit", NounClass.OTHER), // + // new TestCase("de", "therm-us", NounClass.OTHER), // + // new TestCase("de", "pound-force", NounClass.OTHER), // + // new TestCase("de", "newton", NounClass.OTHER), // + // new TestCase("de", "gigahertz", NounClass.OTHER), // PREFIX, hertz + // new TestCase("de", "megahertz", NounClass.OTHER), // PREFIX, hertz + // new TestCase("de", "kilohertz", NounClass.OTHER), // PREFIX, hertz + // new TestCase("de", "hertz", NounClass.OTHER), // PREFIX, hertz + // new TestCase("de", "em", NounClass.OTHER), // + // new TestCase("de", "pixel", NounClass.OTHER), // + // new TestCase("de", "megapixel", NounClass.OTHER), // + // new TestCase("de", "pixel-per-centimeter", NounClass.OTHER), // COMPOUND, pixel + // new TestCase("de", "pixel-per-inch", NounClass.OTHER), // COMPOUND, pixel + // new TestCase("de", "dot-per-centimeter", NounClass.OTHER), // COMPOUND, dot + // new TestCase("de", "dot-per-inch", NounClass.OTHER), // COMPOUND, dot + // new TestCase("de", "dot", NounClass.OTHER), // + // new TestCase("de", "earth-radius", NounClass.OTHER), // + new TestCase("de", "decimeter", NounClass.MASCULINE), // PREFIX + new TestCase("de", "micrometer", NounClass.MASCULINE), // PREFIX + new TestCase("de", "nanometer", NounClass.MASCULINE), // PREFIX + // new TestCase("de", "light-year", NounClass.OTHER), // + // new TestCase("de", "astronomical-unit", NounClass.OTHER), // + // new TestCase("de", "furlong", NounClass.OTHER), // + // new TestCase("de", "fathom", NounClass.OTHER), // + // new TestCase("de", "nautical-mile", NounClass.OTHER), // + // new TestCase("de", "mile-scandinavian", NounClass.OTHER), // + // new TestCase("de", "point", NounClass.OTHER), // + // new TestCase("de", "lux", NounClass.OTHER), // + // new TestCase("de", "candela", NounClass.OTHER), // + // new TestCase("de", "lumen", NounClass.OTHER), // + // new TestCase("de", "metric-ton", NounClass.OTHER), // + new TestCase("de", "microgram", NounClass.NEUTER), // PREFIX + // new TestCase("de", "ton", NounClass.OTHER), // + // new TestCase("de", "stone", NounClass.OTHER), // + // new TestCase("de", "ounce-troy", NounClass.OTHER), // + // new TestCase("de", "carat", NounClass.OTHER), // + new TestCase("de", "gigawatt", NounClass.NEUTER), // PREFIX + new TestCase("de", "milliwatt", NounClass.NEUTER), // PREFIX + // new TestCase("de", "horsepower", NounClass.OTHER), // + // new TestCase("de", "millimeter-ofhg", NounClass.OTHER), // + // new TestCase("de", "pound-force-per-square-inch", NounClass.OTHER), // COMPOUND, pound-force + // new TestCase("de", "inch-ofhg", NounClass.OTHER), // + // new TestCase("de", "bar", NounClass.OTHER), // + // new TestCase("de", "millibar", NounClass.OTHER), // PREFIX, bar + // new TestCase("de", "atmosphere", NounClass.OTHER), // + // new TestCase("de", "pascal", NounClass.OTHER), // PREFIX, kilopascal? neuter? + // new TestCase("de", "hectopascal", NounClass.OTHER), // PREFIX, pascal, neuter? + // new TestCase("de", "megapascal", NounClass.OTHER), // PREFIX, pascal, neuter? + // new TestCase("de", "knot", NounClass.OTHER), // + new TestCase("de", "pound-force-foot", NounClass.MASCULINE), // COMPOUND + new TestCase("de", "newton-meter", NounClass.MASCULINE), // COMPOUND + new TestCase("de", "cubic-kilometer", NounClass.MASCULINE), // POWER + new TestCase("de", "cubic-yard", NounClass.NEUTER), // POWER + new TestCase("de", "cubic-inch", NounClass.MASCULINE), // POWER + new TestCase("de", "megaliter", NounClass.MASCULINE), // PREFIX + new TestCase("de", "hectoliter", NounClass.MASCULINE), // PREFIX + // new TestCase("de", "pint-metric", NounClass.OTHER), // + // new TestCase("de", "cup-metric", NounClass.OTHER), // + new TestCase("de", "acre-foot", NounClass.MASCULINE), // COMPOUND + // new TestCase("de", "bushel", NounClass.OTHER), // + // new TestCase("de", "barrel", NounClass.OTHER), // + // Units missing gender in German also misses gender in French: + // new TestCase("fr", "revolution", NounClass.OTHER), // + // new TestCase("fr", "radian", NounClass.OTHER), // + // new TestCase("fr", "arc-minute", NounClass.OTHER), // + // new TestCase("fr", "arc-second", NounClass.OTHER), // + new TestCase("fr", "square-yard", NounClass.MASCULINE), // COMPOUND + new TestCase("fr", "square-inch", NounClass.MASCULINE), // COMPOUND + // new TestCase("fr", "dunam", NounClass.OTHER), // + // new TestCase("fr", "karat", NounClass.OTHER), // + new TestCase("fr", "milligram-ofglucose-per-deciliter", NounClass.MASCULINE), // COMPOUND + // new TestCase("fr", "millimole-per-liter", NounClass.OTHER), // COMPOUND, mole + // new TestCase("fr", "permillion", NounClass.OTHER), // + // new TestCase("fr", "permille", NounClass.OTHER), // + // new TestCase("fr", "permyriad", NounClass.OTHER), // + // new TestCase("fr", "mole", NounClass.OTHER), // + new TestCase("fr", "liter-per-kilometer", NounClass.MASCULINE), // COMPOUND + // new TestCase("fr", "petabyte", NounClass.OTHER), // PREFIX + // new TestCase("fr", "terabit", NounClass.OTHER), // PREFIX + // new TestCase("fr", "century", NounClass.OTHER), // + // new TestCase("fr", "decade", NounClass.OTHER), // + new TestCase("fr", "millisecond", NounClass.FEMININE), // PREFIX + new TestCase("fr", "microsecond", NounClass.FEMININE), // PREFIX + new TestCase("fr", "nanosecond", NounClass.FEMININE), // PREFIX + // new TestCase("fr", "ampere", NounClass.OTHER), // + // new TestCase("fr", "milliampere", NounClass.OTHER), // PREFIX, ampere + // new TestCase("fr", "ohm", NounClass.OTHER), // + // new TestCase("fr", "calorie", NounClass.OTHER), // + // new TestCase("fr", "kilojoule", NounClass.OTHER), // PREFIX, joule + // new TestCase("fr", "joule", NounClass.OTHER), // + // new TestCase("fr", "kilowatt-hour", NounClass.OTHER), // COMPOUND + // new TestCase("fr", "electronvolt", NounClass.OTHER), // + // new TestCase("fr", "british-thermal-unit", NounClass.OTHER), // + // new TestCase("fr", "therm-us", NounClass.OTHER), // + // new TestCase("fr", "pound-force", NounClass.OTHER), // + // new TestCase("fr", "newton", NounClass.OTHER), // + // new TestCase("fr", "gigahertz", NounClass.OTHER), // PREFIX, hertz + // new TestCase("fr", "megahertz", NounClass.OTHER), // PREFIX, hertz + // new TestCase("fr", "kilohertz", NounClass.OTHER), // PREFIX, hertz + // new TestCase("fr", "hertz", NounClass.OTHER), // PREFIX, hertz + // new TestCase("fr", "em", NounClass.OTHER), // + // new TestCase("fr", "pixel", NounClass.OTHER), // + // new TestCase("fr", "megapixel", NounClass.OTHER), // + // new TestCase("fr", "pixel-per-centimeter", NounClass.OTHER), // COMPOUND, pixel + // new TestCase("fr", "pixel-per-inch", NounClass.OTHER), // COMPOUND, pixel + // new TestCase("fr", "dot-per-centimeter", NounClass.OTHER), // COMPOUND, dot + // new TestCase("fr", "dot-per-inch", NounClass.OTHER), // COMPOUND, dot + // new TestCase("fr", "dot", NounClass.OTHER), // + // new TestCase("fr", "earth-radius", NounClass.OTHER), // + new TestCase("fr", "decimeter", NounClass.MASCULINE), // PREFIX + new TestCase("fr", "micrometer", NounClass.MASCULINE), // PREFIX + new TestCase("fr", "nanometer", NounClass.MASCULINE), // PREFIX + // new TestCase("fr", "light-year", NounClass.OTHER), // + // new TestCase("fr", "astronomical-unit", NounClass.OTHER), // + // new TestCase("fr", "furlong", NounClass.OTHER), // + // new TestCase("fr", "fathom", NounClass.OTHER), // + // new TestCase("fr", "nautical-mile", NounClass.OTHER), // + // new TestCase("fr", "mile-scandinavian", NounClass.OTHER), // + // new TestCase("fr", "point", NounClass.OTHER), // + // new TestCase("fr", "lux", NounClass.OTHER), // + // new TestCase("fr", "candela", NounClass.OTHER), // + // new TestCase("fr", "lumen", NounClass.OTHER), // + // new TestCase("fr", "metric-ton", NounClass.OTHER), // + new TestCase("fr", "microgram", NounClass.MASCULINE), // PREFIX + // new TestCase("fr", "ton", NounClass.OTHER), // + // new TestCase("fr", "stone", NounClass.OTHER), // + // new TestCase("fr", "ounce-troy", NounClass.OTHER), // + // new TestCase("fr", "carat", NounClass.OTHER), // + // new TestCase("fr", "gigawatt", NounClass.OTHER), // PREFIX + // new TestCase("fr", "milliwatt", NounClass.OTHER), // + // new TestCase("fr", "horsepower", NounClass.OTHER), // + new TestCase("fr", "millimeter-ofhg", NounClass.MASCULINE), // + // new TestCase("fr", "pound-force-per-square-inch", NounClass.OTHER), // COMPOUND, pound-force + new TestCase("fr", "inch-ofhg", NounClass.MASCULINE), // + // new TestCase("fr", "bar", NounClass.OTHER), // + // new TestCase("fr", "millibar", NounClass.OTHER), // PREFIX, bar + // new TestCase("fr", "atmosphere", NounClass.OTHER), // + // new TestCase("fr", "pascal", NounClass.OTHER), // PREFIX, kilopascal? + // new TestCase("fr", "hectopascal", NounClass.OTHER), // PREFIX, pascal + // new TestCase("fr", "megapascal", NounClass.OTHER), // PREFIX, pascal + // new TestCase("fr", "knot", NounClass.OTHER), // + // new TestCase("fr", "pound-force-foot", NounClass.OTHER), // + // new TestCase("fr", "newton-meter", NounClass.OTHER), // + new TestCase("fr", "cubic-kilometer", NounClass.MASCULINE), // POWER + new TestCase("fr", "cubic-yard", NounClass.MASCULINE), // POWER + new TestCase("fr", "cubic-inch", NounClass.MASCULINE), // POWER + new TestCase("fr", "megaliter", NounClass.MASCULINE), // PREFIX + new TestCase("fr", "hectoliter", NounClass.MASCULINE), // PREFIX + // new TestCase("fr", "pint-metric", NounClass.OTHER), // + // new TestCase("fr", "cup-metric", NounClass.OTHER), // + new TestCase("fr", "acre-foot", NounClass.FEMININE), // COMPOUND + // new TestCase("fr", "bushel", NounClass.OTHER), // + // new TestCase("fr", "barrel", NounClass.OTHER), // + // Some more French units missing gender: + // new TestCase("fr", "degree", NounClass.OTHER), // + new TestCase("fr", "square-meter", NounClass.MASCULINE), // COMPOUND + // new TestCase("fr", "terabyte", NounClass.OTHER), // PREFIX, byte + // new TestCase("fr", "gigabyte", NounClass.OTHER), // PREFIX, byte + // new TestCase("fr", "gigabit", NounClass.OTHER), // PREFIX, bit + // new TestCase("fr", "megabyte", NounClass.OTHER), // PREFIX, byte + // new TestCase("fr", "megabit", NounClass.OTHER), // PREFIX, bit + // new TestCase("fr", "kilobyte", NounClass.OTHER), // PREFIX, byte + // new TestCase("fr", "kilobit", NounClass.OTHER), // PREFIX, bit + // new TestCase("fr", "byte", NounClass.OTHER), // + // new TestCase("fr", "bit", NounClass.OTHER), // + // new TestCase("fr", "volt", NounClass.OTHER), // + new TestCase("fr", "cubic-meter", NounClass.MASCULINE), // POWER + + // gender-lacking builtins within compound units + new TestCase("de", "newton-meter-per-second", NounClass.MASCULINE), + + // TODO(ICU-21494): determine whether list genders behave as follows, + // and implement proper getListGender support (covering more than just + // two genders): + // // gender rule for lists of people: de "neutral", fr "maleTaints" + // new TestCase("de", "day-and-hour-and-minute", NounClass.NEUTER), + // new TestCase("de", "hour-and-minute", NounClass.FEMININE), + // new TestCase("fr", "day-and-hour-and-minute", NounClass.MASCULINE), + // new TestCase("fr", "hour-and-minute", NounClass.FEMININE), + }; + + LocalizedNumberFormatter formatter; + FormattedNumber fn; + for (TestCase t : cases) { + formatter = NumberFormatter.with() + .unit(MeasureUnit.forIdentifier(t.unitIdentifier)) + .locale(new ULocale(t.locale)); + fn = formatter.format(1.1); + assertEquals("Testing noun classes with default width, unit: " + t.unitIdentifier + + ", locale: " + t.locale, + t.expectedNounClass, fn.getNounClass()); + + formatter = NumberFormatter.with() + .unit(MeasureUnit.forIdentifier(t.unitIdentifier)) + .unitWidth(UnitWidth.FULL_NAME) + .locale(new ULocale(t.locale)); + fn = formatter.format(1.1); + assertEquals("Testing noun classes with UnitWidth.FULL_NAME, unit: " + t.unitIdentifier + + ", locale: " + t.locale, + t.expectedNounClass, fn.getNounClass()); + } + + // Make sure getGender does not return garbage for genderless languages + formatter = NumberFormatter.with().locale(ULocale.ENGLISH); + fn = formatter.format(1.1); + assertEquals("getNounClass for not supported language", NounClass.OTHER, fn.getNounClass()); + + } + @Test public void unitGender() { class TestCase {