diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index e543c745681..82811190079 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -24,7 +24,6 @@ #include "unicode/bytestrie.h" #include "unicode/bytestriebuilder.h" #include "unicode/localpointer.h" -#include "unicode/measunit.h" #include "unicode/stringpiece.h" #include "unicode/stringtriebuilder.h" #include "unicode/ures.h" @@ -335,9 +334,6 @@ char *gSerializedUnitExtrasStemTrie = nullptr; const UChar **gCategories = nullptr; // Number of items in `gCategories`. int32_t gCategoriesCount = 0; -// TODO: rather save an index into gCategories? -const char *kConsumption = "consumption"; -size_t kConsumptionLen = strlen("consumption"); // Serialized BytesTrie for mapping from base units to indices into gCategories. char *gSerializedUnitCategoriesTrie = nullptr; @@ -809,17 +805,13 @@ compareSingleUnits(const void* /*context*/, const void* left, const void* right) // Returns an index into the gCategories array, for the "unitQuantity" (aka // "type" or "category") associated with the given base unit identifier. Returns // -1 on failure, together with U_UNSUPPORTED_ERROR. -int32_t getUnitCategoryIndex(StringPiece baseUnitIdentifier, UErrorCode &status) { - umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status); - if (U_FAILURE(status)) { - return -1; - } - BytesTrie trie(gSerializedUnitCategoriesTrie); - UStringTrieResult result = trie.next(baseUnitIdentifier.data(), baseUnitIdentifier.length()); +int32_t getUnitCategoryIndex(BytesTrie &trie, StringPiece baseUnitIdentifier, UErrorCode &status) { + UStringTrieResult result = trie.reset().next(baseUnitIdentifier.data(), baseUnitIdentifier.length()); if (!USTRINGTRIE_HAS_VALUE(result)) { status = U_UNSUPPORTED_ERROR; return -1; } + return trie.getValue(); } @@ -847,29 +839,76 @@ umeas_getPrefixBase(UMeasurePrefix unitPrefix) { return 10; } -CharString U_I18N_API getUnitQuantity(StringPiece baseUnitIdentifier, UErrorCode &status) { +CharString U_I18N_API getUnitQuantity(const MeasureUnitImpl &baseMeasureUnitImpl, UErrorCode &status) { CharString result; - U_ASSERT(result.length() == 0); + MeasureUnitImpl baseUnitImpl = baseMeasureUnitImpl.copy(status); + UErrorCode localStatus = U_ZERO_ERROR; + umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status); if (U_FAILURE(status)) { return result; } - UErrorCode localStatus = U_ZERO_ERROR; - int32_t idx = getUnitCategoryIndex(baseUnitIdentifier, localStatus); + BytesTrie trie(gSerializedUnitCategoriesTrie); + + baseUnitImpl.serialize(status); + StringPiece identifier = baseUnitImpl.identifier.data(); + int32_t idx = getUnitCategoryIndex(trie, identifier, localStatus); + if (U_FAILURE(status)) { + return result; + } + + // In case the base unit identifier did not match any entry. if (U_FAILURE(localStatus)) { - // TODO(icu-units#130): support inverting any unit, with correct - // fallback logic: inversion and fallback may depend on presence or - // absence of a usage for that category. - if (uprv_strcmp(baseUnitIdentifier.data(), "meter-per-cubic-meter") == 0) { - result.append(kConsumption, (int32_t)kConsumptionLen, status); + localStatus = U_ZERO_ERROR; + baseUnitImpl.takeReciprocal(status); + baseUnitImpl.serialize(status); + identifier.set(baseUnitImpl.identifier.data()); + idx = getUnitCategoryIndex(trie, identifier, localStatus); + + if (U_FAILURE(status)) { return result; } + } + + // In case the reciprocal of the base unit identifier did not match any entry. + MeasureUnitImpl simplifiedUnit = baseMeasureUnitImpl.copyAndSimplify(status); + if (U_FAILURE(status)) { + return result; + } + if (U_FAILURE(localStatus)) { + localStatus = U_ZERO_ERROR; + simplifiedUnit.serialize(status); + identifier.set(simplifiedUnit.identifier.data()); + idx = getUnitCategoryIndex(trie, identifier, localStatus); + + if (U_FAILURE(status)) { + return result; + } + } + + // In case the simplified base unit identifier did not match any entry. + if (U_FAILURE(localStatus)) { + localStatus = U_ZERO_ERROR; + simplifiedUnit.takeReciprocal(status); + simplifiedUnit.serialize(status); + identifier.set(simplifiedUnit.identifier.data()); + idx = getUnitCategoryIndex(trie, identifier, localStatus); + + if (U_FAILURE(status)) { + return result; + } + } + + // If there is no match at all, throw an exception. + if (U_FAILURE(localStatus)) { status = U_INVALID_FORMAT_ERROR; return result; } + if (idx < 0 || idx >= gCategoriesCount) { status = U_INVALID_FORMAT_ERROR; return result; } + result.appendInvariantChars(gCategories[idx], u_strlen(gCategories[idx]), status); return result; } @@ -989,6 +1028,33 @@ void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) { } } +MeasureUnitImpl MeasureUnitImpl::copyAndSimplify(UErrorCode &status) const { + MeasureUnitImpl result; + for (int32_t i = 0; i < singleUnits.length(); i++) { + const SingleUnitImpl &singleUnit = *this->singleUnits[i]; + + // The following `for` loop will cause time complexity to be O(n^2). + // However, n is very small (number of units, generally, at maximum equal to 10) + bool unitExist = false; + for (int32_t j = 0; j < result.singleUnits.length(); j++) { + if (uprv_strcmp(result.singleUnits[j]->getSimpleUnitID(), singleUnit.getSimpleUnitID()) == + 0 && + result.singleUnits[j]->unitPrefix == singleUnit.unitPrefix) { + unitExist = true; + result.singleUnits[j]->dimensionality = + result.singleUnits[j]->dimensionality + singleUnit.dimensionality; + break; + } + } + + if (!unitExist) { + result.appendSingleUnit(singleUnit, status); + } + } + + return result; +} + bool MeasureUnitImpl::appendSingleUnit(const SingleUnitImpl &singleUnit, UErrorCode &status) { identifier.clear(); diff --git a/icu4c/source/i18n/measunit_impl.h b/icu4c/source/i18n/measunit_impl.h index 888dc04496b..c60ff2fc33b 100644 --- a/icu4c/source/i18n/measunit_impl.h +++ b/icu4c/source/i18n/measunit_impl.h @@ -29,13 +29,14 @@ static const char kDefaultCurrency8[] = "XXX"; * empty. * * This only supports base units: other units must be resolved to base units - * before passing to this function, otherwise U_UNSUPPORTED_ERROR status will be + * before passing to this function, otherwise U_UNSUPPORTED_ERROR status may be * returned. * * Categories are found in `unitQuantities` in the `units` resource (see * `units.txt`). */ -CharString U_I18N_API getUnitQuantity(StringPiece baseUnitIdentifier, UErrorCode &status); +// TODO: make this function accepts any `MeasureUnit` as Java and move it to the `UnitsData` class. +CharString U_I18N_API getUnitQuantity(const MeasureUnitImpl &baseMeasureUnitImpl, UErrorCode &status); /** * A struct representing a single unit (optional SI or binary prefix, and dimensionality). @@ -289,6 +290,16 @@ class U_I18N_API MeasureUnitImpl : public UMemory { /** Mutates this MeasureUnitImpl to take the reciprocal. */ void takeReciprocal(UErrorCode& status); + /** + * Returns a simplified version of the unit. + * NOTE: the simplification happen when there are two units equals in their base unit and their + * prefixes. + * + * Example 1: "square-meter-per-meter" --> "meter" + * Example 2: "square-millimeter-per-meter" --> "square-millimeter-per-meter" + */ + MeasureUnitImpl copyAndSimplify(UErrorCode &status) const; + /** * Mutates this MeasureUnitImpl to append a single unit. * @@ -297,6 +308,11 @@ class U_I18N_API MeasureUnitImpl : public UMemory { */ bool appendSingleUnit(const SingleUnitImpl& singleUnit, UErrorCode& status); + /** + * Normalizes a MeasureUnitImpl and generate the identifier string in place. + */ + void serialize(UErrorCode &status); + /** The complexity, either SINGLE, COMPOUND, or MIXED. */ UMeasureUnitComplexity complexity = UMEASURE_UNIT_SINGLE; @@ -314,12 +330,6 @@ class U_I18N_API MeasureUnitImpl : public UMemory { */ CharString identifier; - private: - /** - * Normalizes a MeasureUnitImpl and generate the identifier string in place. - */ - void serialize(UErrorCode &status); - // For calling serialize // TODO(icu-units#147): revisit serialization friend class number::impl::LongNameHandler; diff --git a/icu4c/source/i18n/units_router.cpp b/icu4c/source/i18n/units_router.cpp index 51f66bfa892..0e6082fae5c 100644 --- a/icu4c/source/i18n/units_router.cpp +++ b/icu4c/source/i18n/units_router.cpp @@ -66,9 +66,9 @@ void UnitsRouter::init(const MeasureUnit &inputUnit, StringPiece region, StringP UnitPreferences prefs(status); MeasureUnitImpl inputUnitImpl = MeasureUnitImpl::forMeasureUnitMaybeCopy(inputUnit, status); - MeasureUnit baseUnit = - (extractCompoundBaseUnit(inputUnitImpl, conversionRates, status)).build(status); - CharString category = getUnitQuantity(baseUnit.getIdentifier(), status); + MeasureUnitImpl baseUnitImpl = + (extractCompoundBaseUnit(inputUnitImpl, conversionRates, status)); + CharString category = getUnitQuantity(baseUnitImpl, status); if (U_FAILURE(status)) { return; } diff --git a/icu4c/source/test/intltest/numbertest_api.cpp b/icu4c/source/test/intltest/numbertest_api.cpp index 17207d96dd7..8d26b902a6b 100644 --- a/icu4c/source/test/intltest/numbertest_api.cpp +++ b/icu4c/source/test/intltest/numbertest_api.cpp @@ -1783,6 +1783,76 @@ void NumberFormatterApiTest::unitUsage() { 3048, // u"3,048 cm"); + assertFormatSingle(u"kilometer-per-liter match the correct category", // + u"unit/kilometer-per-liter usage/default", // + u"unit/kilometer-per-liter usage/default", // + NumberFormatter::with() // + .unit(MeasureUnit::forIdentifier("kilometer-per-liter", status)) // + .usage("default"), // + Locale("en-US"), // + 1, // + u"100 L/100 km"); + + assertFormatSingle(u"gallon-per-mile match the correct category", // + u"unit/gallon-per-mile usage/default", // + u"unit/gallon-per-mile usage/default", // + NumberFormatter::with() // + .unit(MeasureUnit::forIdentifier("gallon-per-mile", status)) // + .usage("default"), // + Locale("en-US"), // + 1, // + u"235 L/100 km"); + + assertFormatSingle(u"psi match the correct category", // + u"unit/megapascal usage/default", // + u"unit/megapascal usage/default", // + NumberFormatter::with() // + .unit(MeasureUnit::forIdentifier("megapascal", status)) // + .usage("default"), // + Locale("en-US"), // + 1, // + "145 psi"); + + assertFormatSingle(u"millibar match the correct category", // + u"unit/millibar usage/default", // + u"unit/millibar usage/default", // + NumberFormatter::with() // + .unit(MeasureUnit::forIdentifier("millibar", status)) // + .usage("default"), // + Locale("en-US"), // + 1, // + "0.015 psi"); + + assertFormatSingle(u"pound-force-per-square-inch match the correct category", // + u"unit/pound-force-per-square-inch usage/default", // + u"unit/pound-force-per-square-inch usage/default", // + NumberFormatter::with() // + .unit(MeasureUnit::forIdentifier("pound-force-per-square-inch", status)) // + .usage("default"), // + Locale("en-US"), // + 1, // + "1 psi"); // + + assertFormatSingle(u"inch-ofhg match the correct category", // + u"unit/inch-ofhg usage/default", // + u"unit/inch-ofhg usage/default", // + NumberFormatter::with() // + .unit(MeasureUnit::forIdentifier("inch-ofhg", status)) // + .usage("default"), // + Locale("en-US"), // + 1, // + "0.49 psi"); + + assertFormatSingle(u"millimeter-ofhg match the correct category", // + u"unit/millimeter-ofhg usage/default", // + u"unit/millimeter-ofhg usage/default", // + NumberFormatter::with() // + .unit(MeasureUnit::forIdentifier("millimeter-ofhg", status)) // + .usage("default"), // + Locale("en-US"), // + 1, // + "0.019 psi"); + // TODO(icu-units#38): improve unit testing coverage. E.g. add vehicle-fuel // triggering inversion conversion code. Test with 0 too, to see // divide-by-zero behaviour. diff --git a/icu4c/source/test/intltest/units_data_test.cpp b/icu4c/source/test/intltest/units_data_test.cpp index b0ca7e2173e..ece7235ca68 100644 --- a/icu4c/source/test/intltest/units_data_test.cpp +++ b/icu4c/source/test/intltest/units_data_test.cpp @@ -44,17 +44,20 @@ void UnitsDataTest::testGetUnitCategory() { {"kilogram-per-cubic-meter", "mass-density"}, {"cubic-meter-per-kilogram", "specific-volume"}, {"meter-per-second", "speed"}, - // TODO(icu-units#130): inverse-speed - // {"second-per-meter", "speed"}, - // Consumption specifically supports inverse units (mile-per-galon, - // liter-per-100-kilometer): + {"second-per-meter", "speed"}, + // TODO: add this test cases once the `getUnitCategory` accepts any `MeasureUnit` and not only + // base units. + // Tests are: + // {"liter-per-100-kilometer", "consumption"}, + // {"mile-per-gallon", "consumption"}, {"cubic-meter-per-meter", "consumption"}, {"meter-per-cubic-meter", "consumption"}, + {"kilogram-meter-per-square-meter-square-second", "pressure"}, }; IcuTestErrorCode status(*this, "testGetUnitCategory"); for (const auto &t : testCases) { - CharString category = getUnitQuantity(t.unit, status); + CharString category = getUnitQuantity(MeasureUnitImpl::forIdentifier(t.unit, status), status); if (!status.errIfFailureAndReset("getUnitCategory(%s)", t.unit)) { assertEquals("category", t.expectedCategory, category.data()); } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/units/MeasureUnitImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/units/MeasureUnitImpl.java index b5d1c009639..59789b12a7b 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/units/MeasureUnitImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/units/MeasureUnitImpl.java @@ -6,6 +6,9 @@ import java.math.BigDecimal; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; import com.ibm.icu.util.BytesTrie; import com.ibm.icu.util.CharsTrie; @@ -67,12 +70,45 @@ public class MeasureUnitImpl { MeasureUnitImpl result = new MeasureUnitImpl(); result.complexity = this.complexity; result.identifier = this.identifier; - for (SingleUnitImpl single : this.singleUnits) { - result.singleUnits.add(single.copy()); + for (SingleUnitImpl singleUnit : this.singleUnits) { + result.singleUnits.add(singleUnit.copy()); } return result; } + /** + * Returns a simplified version of the unit. + * NOTE: the simplification happen when there are two units equals in their base unit and their + * prefixes. + * + * Example 1: "square-meter-per-meter" --> "meter" + * Example 2: "square-millimeter-per-meter" --> "square-millimeter-per-meter" + */ + public MeasureUnitImpl copyAndSimplify() { + MeasureUnitImpl result = new MeasureUnitImpl(); + for (SingleUnitImpl singleUnit : this.getSingleUnits()) { + // This `for` loop will cause time complexity to be O(n^2). + // However, n is very small (number of units, generally, at maximum equal to 10) + boolean unitExist = false; + for (SingleUnitImpl resultSingleUnit : result.getSingleUnits()) { + if(resultSingleUnit.getSimpleUnitID().compareTo(singleUnit.getSimpleUnitID()) == 0 + && + resultSingleUnit.getPrefix().getIdentifier().compareTo(singleUnit.getPrefix().getIdentifier()) == 0 + ) { + unitExist = true; + resultSingleUnit.setDimensionality(resultSingleUnit.getDimensionality() + singleUnit.getDimensionality()); + break; + } + } + + if(!unitExist) { + result.appendSingleUnit(singleUnit); + } + } + + return result; + } + /** * Returns the list of simple units. */ diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/units/UnitsData.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/units/UnitsData.java index 910fa92aa48..bd090d2a550 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/units/UnitsData.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/units/UnitsData.java @@ -9,8 +9,8 @@ import java.util.Iterator; import com.ibm.icu.impl.ICUData; import com.ibm.icu.impl.ICUResourceBundle; +import com.ibm.icu.impl.IllegalIcuArgumentException; import com.ibm.icu.impl.UResource; -import com.ibm.icu.util.MeasureUnit; import com.ibm.icu.util.UResourceBundle; /** @@ -68,18 +68,44 @@ public class UnitsData { * @return the corresponding category. */ public String getCategory(MeasureUnitImpl measureUnit) { - MeasureUnitImpl baseMeasureUnit + MeasureUnitImpl baseMeasureUnitImpl = this.getConversionRates().extractCompoundBaseUnit(measureUnit); - String baseUnitIdentifier = MeasureUnit.fromMeasureUnitImpl(baseMeasureUnit).getIdentifier(); + baseMeasureUnitImpl.serialize(); + String identifier = baseMeasureUnitImpl.getIdentifier(); - if (baseUnitIdentifier.equals("meter-per-cubic-meter")) { - // TODO(icu-units#130): support inverting any unit, with correct - // fallback logic: inversion and fallback may depend on presence or - // absence of a usage for that category. - return "consumption"; + + Integer index = Categories.baseUnitToIndex.get(identifier); + + // In case the base unit identifier did not match any entry. + if (index == null) { + baseMeasureUnitImpl.takeReciprocal(); + baseMeasureUnitImpl.serialize(); + identifier = baseMeasureUnitImpl.getIdentifier(); + index = Categories.baseUnitToIndex.get(identifier); + } + + // In case the reciprocal of the base unit identifier did not match any entry. + baseMeasureUnitImpl.takeReciprocal(); // return to original form + MeasureUnitImpl simplifiedUnit = baseMeasureUnitImpl.copyAndSimplify(); + if (index == null) { + simplifiedUnit.serialize(); + identifier = simplifiedUnit.getIdentifier(); + index = Categories.baseUnitToIndex.get(identifier); + } + + // In case the simplified base unit identifier did not match any entry. + if (index == null) { + simplifiedUnit.takeReciprocal(); + simplifiedUnit.serialize(); + identifier = simplifiedUnit.getIdentifier(); + index = Categories.baseUnitToIndex.get(identifier); + } + + // If there is no match at all, throw an exception. + if (index == null) { + throw new IllegalIcuArgumentException("This unit does not has a category" + measureUnit.getIdentifier()); } - int index = Categories.baseUnitToIndex.get(baseUnitIdentifier); return Categories.indexToCategory[index]; } diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/impl/UnitsTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/impl/UnitsTest.java index 37679e95bb8..ba15eab7f18 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/impl/UnitsTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/impl/UnitsTest.java @@ -370,6 +370,36 @@ public class UnitsTest { } } + @Test + public void testGetUnitCategory() { + class TestCase { + final MeasureUnitImpl unit; + final String expectedCategory; + + TestCase(String unitId, String expectedCategory) { + this.unit = MeasureUnitImpl.forIdentifier(unitId); + this.expectedCategory = expectedCategory; + } + } + + TestCase testCases[] = { + new TestCase("kilogram-per-cubic-meter", "mass-density"), + new TestCase("cubic-meter-per-kilogram", "specific-volume"), + new TestCase("meter-per-second", "speed"), + new TestCase("second-per-meter", "speed"), + new TestCase("mile-per-gallon", "consumption"), + new TestCase("liter-per-100-kilometer", "consumption"), + new TestCase("cubic-meter-per-meter", "consumption"), + new TestCase("meter-per-cubic-meter", "consumption"), + new TestCase("kilogram-meter-per-square-meter-square-second", "pressure"), + }; + + UnitsData data = new UnitsData(); + for (TestCase test : testCases) { + assertEquals(test.expectedCategory, data.getCategory(test.unit)); + } + } + @Test public void testConverter() { class TestData { diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java index 68817dbf13a..9c2a0918ab1 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/number/NumberFormatterApiTest.java @@ -1721,6 +1721,75 @@ public class NumberFormatterApiTest extends TestFmwk { 3048, "3,048 cm"); + assertFormatSingle("kilometer-per-liter match the correct category", + "unit/kilometer-per-liter usage/default", + "unit/kilometer-per-liter usage/default", + NumberFormatter.with() + .unit(MeasureUnit.forIdentifier("kilometer-per-liter")) + .usage("default"), + new ULocale("en-US"), + 1, + "100 L/100 km"); + + assertFormatSingle("gallon-per-mile match the correct category", + "unit/gallon-per-mile usage/default", + "unit/gallon-per-mile usage/default", + NumberFormatter.with() + .unit(MeasureUnit.forIdentifier("gallon-per-mile")) + .usage("default"), + new ULocale("en-US"), + 1, + "235 L/100 km"); + + assertFormatSingle("psi match the correct category", + "unit/megapascal usage/default", + "unit/megapascal usage/default", + NumberFormatter.with() + .unit(MeasureUnit.forIdentifier("megapascal")) + .usage("default"), + new ULocale("en-US"), + 1, + "145 psi"); + + assertFormatSingle("millibar match the correct category", + "unit/millibar usage/default", + "unit/millibar usage/default", + NumberFormatter.with() + .unit(MeasureUnit.forIdentifier("millibar")) + .usage("default"), + new ULocale("en-US"), + 1, + "0.015 psi"); + + assertFormatSingle("pound-force-per-square-inch match the correct category", + "unit/pound-force-per-square-inch usage/default", + "unit/pound-force-per-square-inch usage/default", + NumberFormatter.with() + .unit(MeasureUnit.forIdentifier("pound-force-per-square-inch")) + .usage("default"), + new ULocale("en-US"), + 1, + "1 psi"); + + assertFormatSingle("inch-ofhg match the correct category", + "unit/inch-ofhg usage/default", + "unit/inch-ofhg usage/default", + NumberFormatter.with() + .unit(MeasureUnit.forIdentifier("inch-ofhg")) + .usage("default"), + new ULocale("en-US"), + 1, + "0.49 psi"); + + assertFormatSingle("millimeter-ofhg match the correct category", + "unit/millimeter-ofhg usage/default", + "unit/millimeter-ofhg usage/default", + NumberFormatter.with() + .unit(MeasureUnit.forIdentifier("millimeter-ofhg")) + .usage("default"), + new ULocale("en-US"), + 1, + "0.019 psi"); // TODO(icu-units#38): improve unit testing coverage. E.g. add // vehicle-fuel triggering inversion conversion code. Test with 0 too,