ICU-21656 fix wrong matching categories.

See #1839
This commit is contained in:
Younies 2021-09-16 18:04:34 +00:00 committed by Younies Mahmoud
parent f026e967f6
commit 9d26c917f2
9 changed files with 357 additions and 47 deletions

View file

@ -24,7 +24,6 @@
#include "unicode/bytestrie.h"
#include "unicode/bytestriebuilder.h"
#include "unicode/localpointer.h"
#include "unicode/measunit.h"
#include "unicode/stringpiece.h"
#include "unicode/stringtriebuilder.h"
#include "unicode/ures.h"
@ -335,9 +334,6 @@ char *gSerializedUnitExtrasStemTrie = nullptr;
const UChar **gCategories = nullptr;
// Number of items in `gCategories`.
int32_t gCategoriesCount = 0;
// TODO: rather save an index into gCategories?
const char *kConsumption = "consumption";
size_t kConsumptionLen = strlen("consumption");
// Serialized BytesTrie for mapping from base units to indices into gCategories.
char *gSerializedUnitCategoriesTrie = nullptr;
@ -809,17 +805,13 @@ compareSingleUnits(const void* /*context*/, const void* left, const void* right)
// Returns an index into the gCategories array, for the "unitQuantity" (aka
// "type" or "category") associated with the given base unit identifier. Returns
// -1 on failure, together with U_UNSUPPORTED_ERROR.
int32_t getUnitCategoryIndex(StringPiece baseUnitIdentifier, UErrorCode &status) {
umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
if (U_FAILURE(status)) {
return -1;
}
BytesTrie trie(gSerializedUnitCategoriesTrie);
UStringTrieResult result = trie.next(baseUnitIdentifier.data(), baseUnitIdentifier.length());
int32_t getUnitCategoryIndex(BytesTrie &trie, StringPiece baseUnitIdentifier, UErrorCode &status) {
UStringTrieResult result = trie.reset().next(baseUnitIdentifier.data(), baseUnitIdentifier.length());
if (!USTRINGTRIE_HAS_VALUE(result)) {
status = U_UNSUPPORTED_ERROR;
return -1;
}
return trie.getValue();
}
@ -847,29 +839,76 @@ umeas_getPrefixBase(UMeasurePrefix unitPrefix) {
return 10;
}
CharString U_I18N_API getUnitQuantity(StringPiece baseUnitIdentifier, UErrorCode &status) {
CharString U_I18N_API getUnitQuantity(const MeasureUnitImpl &baseMeasureUnitImpl, UErrorCode &status) {
CharString result;
U_ASSERT(result.length() == 0);
MeasureUnitImpl baseUnitImpl = baseMeasureUnitImpl.copy(status);
UErrorCode localStatus = U_ZERO_ERROR;
umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
if (U_FAILURE(status)) {
return result;
}
UErrorCode localStatus = U_ZERO_ERROR;
int32_t idx = getUnitCategoryIndex(baseUnitIdentifier, localStatus);
BytesTrie trie(gSerializedUnitCategoriesTrie);
baseUnitImpl.serialize(status);
StringPiece identifier = baseUnitImpl.identifier.data();
int32_t idx = getUnitCategoryIndex(trie, identifier, localStatus);
if (U_FAILURE(status)) {
return result;
}
// In case the base unit identifier did not match any entry.
if (U_FAILURE(localStatus)) {
// TODO(icu-units#130): support inverting any unit, with correct
// fallback logic: inversion and fallback may depend on presence or
// absence of a usage for that category.
if (uprv_strcmp(baseUnitIdentifier.data(), "meter-per-cubic-meter") == 0) {
result.append(kConsumption, (int32_t)kConsumptionLen, status);
localStatus = U_ZERO_ERROR;
baseUnitImpl.takeReciprocal(status);
baseUnitImpl.serialize(status);
identifier.set(baseUnitImpl.identifier.data());
idx = getUnitCategoryIndex(trie, identifier, localStatus);
if (U_FAILURE(status)) {
return result;
}
}
// In case the reciprocal of the base unit identifier did not match any entry.
MeasureUnitImpl simplifiedUnit = baseMeasureUnitImpl.copyAndSimplify(status);
if (U_FAILURE(status)) {
return result;
}
if (U_FAILURE(localStatus)) {
localStatus = U_ZERO_ERROR;
simplifiedUnit.serialize(status);
identifier.set(simplifiedUnit.identifier.data());
idx = getUnitCategoryIndex(trie, identifier, localStatus);
if (U_FAILURE(status)) {
return result;
}
}
// In case the simplified base unit identifier did not match any entry.
if (U_FAILURE(localStatus)) {
localStatus = U_ZERO_ERROR;
simplifiedUnit.takeReciprocal(status);
simplifiedUnit.serialize(status);
identifier.set(simplifiedUnit.identifier.data());
idx = getUnitCategoryIndex(trie, identifier, localStatus);
if (U_FAILURE(status)) {
return result;
}
}
// If there is no match at all, throw an exception.
if (U_FAILURE(localStatus)) {
status = U_INVALID_FORMAT_ERROR;
return result;
}
if (idx < 0 || idx >= gCategoriesCount) {
status = U_INVALID_FORMAT_ERROR;
return result;
}
result.appendInvariantChars(gCategories[idx], u_strlen(gCategories[idx]), status);
return result;
}
@ -989,6 +1028,33 @@ void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) {
}
}
MeasureUnitImpl MeasureUnitImpl::copyAndSimplify(UErrorCode &status) const {
MeasureUnitImpl result;
for (int32_t i = 0; i < singleUnits.length(); i++) {
const SingleUnitImpl &singleUnit = *this->singleUnits[i];
// The following `for` loop will cause time complexity to be O(n^2).
// However, n is very small (number of units, generally, at maximum equal to 10)
bool unitExist = false;
for (int32_t j = 0; j < result.singleUnits.length(); j++) {
if (uprv_strcmp(result.singleUnits[j]->getSimpleUnitID(), singleUnit.getSimpleUnitID()) ==
0 &&
result.singleUnits[j]->unitPrefix == singleUnit.unitPrefix) {
unitExist = true;
result.singleUnits[j]->dimensionality =
result.singleUnits[j]->dimensionality + singleUnit.dimensionality;
break;
}
}
if (!unitExist) {
result.appendSingleUnit(singleUnit, status);
}
}
return result;
}
bool MeasureUnitImpl::appendSingleUnit(const SingleUnitImpl &singleUnit, UErrorCode &status) {
identifier.clear();

View file

@ -29,13 +29,14 @@ static const char kDefaultCurrency8[] = "XXX";
* empty.
*
* This only supports base units: other units must be resolved to base units
* before passing to this function, otherwise U_UNSUPPORTED_ERROR status will be
* before passing to this function, otherwise U_UNSUPPORTED_ERROR status may be
* returned.
*
* Categories are found in `unitQuantities` in the `units` resource (see
* `units.txt`).
*/
CharString U_I18N_API getUnitQuantity(StringPiece baseUnitIdentifier, UErrorCode &status);
// TODO: make this function accepts any `MeasureUnit` as Java and move it to the `UnitsData` class.
CharString U_I18N_API getUnitQuantity(const MeasureUnitImpl &baseMeasureUnitImpl, UErrorCode &status);
/**
* A struct representing a single unit (optional SI or binary prefix, and dimensionality).
@ -289,6 +290,16 @@ class U_I18N_API MeasureUnitImpl : public UMemory {
/** Mutates this MeasureUnitImpl to take the reciprocal. */
void takeReciprocal(UErrorCode& status);
/**
* Returns a simplified version of the unit.
* NOTE: the simplification happen when there are two units equals in their base unit and their
* prefixes.
*
* Example 1: "square-meter-per-meter" --> "meter"
* Example 2: "square-millimeter-per-meter" --> "square-millimeter-per-meter"
*/
MeasureUnitImpl copyAndSimplify(UErrorCode &status) const;
/**
* Mutates this MeasureUnitImpl to append a single unit.
*
@ -297,6 +308,11 @@ class U_I18N_API MeasureUnitImpl : public UMemory {
*/
bool appendSingleUnit(const SingleUnitImpl& singleUnit, UErrorCode& status);
/**
* Normalizes a MeasureUnitImpl and generate the identifier string in place.
*/
void serialize(UErrorCode &status);
/** The complexity, either SINGLE, COMPOUND, or MIXED. */
UMeasureUnitComplexity complexity = UMEASURE_UNIT_SINGLE;
@ -314,12 +330,6 @@ class U_I18N_API MeasureUnitImpl : public UMemory {
*/
CharString identifier;
private:
/**
* Normalizes a MeasureUnitImpl and generate the identifier string in place.
*/
void serialize(UErrorCode &status);
// For calling serialize
// TODO(icu-units#147): revisit serialization
friend class number::impl::LongNameHandler;

View file

@ -66,9 +66,9 @@ void UnitsRouter::init(const MeasureUnit &inputUnit, StringPiece region, StringP
UnitPreferences prefs(status);
MeasureUnitImpl inputUnitImpl = MeasureUnitImpl::forMeasureUnitMaybeCopy(inputUnit, status);
MeasureUnit baseUnit =
(extractCompoundBaseUnit(inputUnitImpl, conversionRates, status)).build(status);
CharString category = getUnitQuantity(baseUnit.getIdentifier(), status);
MeasureUnitImpl baseUnitImpl =
(extractCompoundBaseUnit(inputUnitImpl, conversionRates, status));
CharString category = getUnitQuantity(baseUnitImpl, status);
if (U_FAILURE(status)) {
return;
}

View file

@ -1783,6 +1783,76 @@ void NumberFormatterApiTest::unitUsage() {
3048, //
u"3,048 cm");
assertFormatSingle(u"kilometer-per-liter match the correct category", //
u"unit/kilometer-per-liter usage/default", //
u"unit/kilometer-per-liter usage/default", //
NumberFormatter::with() //
.unit(MeasureUnit::forIdentifier("kilometer-per-liter", status)) //
.usage("default"), //
Locale("en-US"), //
1, //
u"100 L/100 km");
assertFormatSingle(u"gallon-per-mile match the correct category", //
u"unit/gallon-per-mile usage/default", //
u"unit/gallon-per-mile usage/default", //
NumberFormatter::with() //
.unit(MeasureUnit::forIdentifier("gallon-per-mile", status)) //
.usage("default"), //
Locale("en-US"), //
1, //
u"235 L/100 km");
assertFormatSingle(u"psi match the correct category", //
u"unit/megapascal usage/default", //
u"unit/megapascal usage/default", //
NumberFormatter::with() //
.unit(MeasureUnit::forIdentifier("megapascal", status)) //
.usage("default"), //
Locale("en-US"), //
1, //
"145 psi");
assertFormatSingle(u"millibar match the correct category", //
u"unit/millibar usage/default", //
u"unit/millibar usage/default", //
NumberFormatter::with() //
.unit(MeasureUnit::forIdentifier("millibar", status)) //
.usage("default"), //
Locale("en-US"), //
1, //
"0.015 psi");
assertFormatSingle(u"pound-force-per-square-inch match the correct category", //
u"unit/pound-force-per-square-inch usage/default", //
u"unit/pound-force-per-square-inch usage/default", //
NumberFormatter::with() //
.unit(MeasureUnit::forIdentifier("pound-force-per-square-inch", status)) //
.usage("default"), //
Locale("en-US"), //
1, //
"1 psi"); //
assertFormatSingle(u"inch-ofhg match the correct category", //
u"unit/inch-ofhg usage/default", //
u"unit/inch-ofhg usage/default", //
NumberFormatter::with() //
.unit(MeasureUnit::forIdentifier("inch-ofhg", status)) //
.usage("default"), //
Locale("en-US"), //
1, //
"0.49 psi");
assertFormatSingle(u"millimeter-ofhg match the correct category", //
u"unit/millimeter-ofhg usage/default", //
u"unit/millimeter-ofhg usage/default", //
NumberFormatter::with() //
.unit(MeasureUnit::forIdentifier("millimeter-ofhg", status)) //
.usage("default"), //
Locale("en-US"), //
1, //
"0.019 psi");
// TODO(icu-units#38): improve unit testing coverage. E.g. add vehicle-fuel
// triggering inversion conversion code. Test with 0 too, to see
// divide-by-zero behaviour.

View file

@ -44,17 +44,20 @@ void UnitsDataTest::testGetUnitCategory() {
{"kilogram-per-cubic-meter", "mass-density"},
{"cubic-meter-per-kilogram", "specific-volume"},
{"meter-per-second", "speed"},
// TODO(icu-units#130): inverse-speed
// {"second-per-meter", "speed"},
// Consumption specifically supports inverse units (mile-per-galon,
// liter-per-100-kilometer):
{"second-per-meter", "speed"},
// TODO: add this test cases once the `getUnitCategory` accepts any `MeasureUnit` and not only
// base units.
// Tests are:
// {"liter-per-100-kilometer", "consumption"},
// {"mile-per-gallon", "consumption"},
{"cubic-meter-per-meter", "consumption"},
{"meter-per-cubic-meter", "consumption"},
{"kilogram-meter-per-square-meter-square-second", "pressure"},
};
IcuTestErrorCode status(*this, "testGetUnitCategory");
for (const auto &t : testCases) {
CharString category = getUnitQuantity(t.unit, status);
CharString category = getUnitQuantity(MeasureUnitImpl::forIdentifier(t.unit, status), status);
if (!status.errIfFailureAndReset("getUnitCategory(%s)", t.unit)) {
assertEquals("category", t.expectedCategory, category.data());
}

View file

@ -6,6 +6,9 @@ import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import com.ibm.icu.util.BytesTrie;
import com.ibm.icu.util.CharsTrie;
@ -67,12 +70,45 @@ public class MeasureUnitImpl {
MeasureUnitImpl result = new MeasureUnitImpl();
result.complexity = this.complexity;
result.identifier = this.identifier;
for (SingleUnitImpl single : this.singleUnits) {
result.singleUnits.add(single.copy());
for (SingleUnitImpl singleUnit : this.singleUnits) {
result.singleUnits.add(singleUnit.copy());
}
return result;
}
/**
* Returns a simplified version of the unit.
* NOTE: the simplification happen when there are two units equals in their base unit and their
* prefixes.
*
* Example 1: "square-meter-per-meter" --> "meter"
* Example 2: "square-millimeter-per-meter" --> "square-millimeter-per-meter"
*/
public MeasureUnitImpl copyAndSimplify() {
MeasureUnitImpl result = new MeasureUnitImpl();
for (SingleUnitImpl singleUnit : this.getSingleUnits()) {
// This `for` loop will cause time complexity to be O(n^2).
// However, n is very small (number of units, generally, at maximum equal to 10)
boolean unitExist = false;
for (SingleUnitImpl resultSingleUnit : result.getSingleUnits()) {
if(resultSingleUnit.getSimpleUnitID().compareTo(singleUnit.getSimpleUnitID()) == 0
&&
resultSingleUnit.getPrefix().getIdentifier().compareTo(singleUnit.getPrefix().getIdentifier()) == 0
) {
unitExist = true;
resultSingleUnit.setDimensionality(resultSingleUnit.getDimensionality() + singleUnit.getDimensionality());
break;
}
}
if(!unitExist) {
result.appendSingleUnit(singleUnit);
}
}
return result;
}
/**
* Returns the list of simple units.
*/

View file

@ -9,8 +9,8 @@ import java.util.Iterator;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.IllegalIcuArgumentException;
import com.ibm.icu.impl.UResource;
import com.ibm.icu.util.MeasureUnit;
import com.ibm.icu.util.UResourceBundle;
/**
@ -68,18 +68,44 @@ public class UnitsData {
* @return the corresponding category.
*/
public String getCategory(MeasureUnitImpl measureUnit) {
MeasureUnitImpl baseMeasureUnit
MeasureUnitImpl baseMeasureUnitImpl
= this.getConversionRates().extractCompoundBaseUnit(measureUnit);
String baseUnitIdentifier = MeasureUnit.fromMeasureUnitImpl(baseMeasureUnit).getIdentifier();
baseMeasureUnitImpl.serialize();
String identifier = baseMeasureUnitImpl.getIdentifier();
if (baseUnitIdentifier.equals("meter-per-cubic-meter")) {
// TODO(icu-units#130): support inverting any unit, with correct
// fallback logic: inversion and fallback may depend on presence or
// absence of a usage for that category.
return "consumption";
Integer index = Categories.baseUnitToIndex.get(identifier);
// In case the base unit identifier did not match any entry.
if (index == null) {
baseMeasureUnitImpl.takeReciprocal();
baseMeasureUnitImpl.serialize();
identifier = baseMeasureUnitImpl.getIdentifier();
index = Categories.baseUnitToIndex.get(identifier);
}
// In case the reciprocal of the base unit identifier did not match any entry.
baseMeasureUnitImpl.takeReciprocal(); // return to original form
MeasureUnitImpl simplifiedUnit = baseMeasureUnitImpl.copyAndSimplify();
if (index == null) {
simplifiedUnit.serialize();
identifier = simplifiedUnit.getIdentifier();
index = Categories.baseUnitToIndex.get(identifier);
}
// In case the simplified base unit identifier did not match any entry.
if (index == null) {
simplifiedUnit.takeReciprocal();
simplifiedUnit.serialize();
identifier = simplifiedUnit.getIdentifier();
index = Categories.baseUnitToIndex.get(identifier);
}
// If there is no match at all, throw an exception.
if (index == null) {
throw new IllegalIcuArgumentException("This unit does not has a category" + measureUnit.getIdentifier());
}
int index = Categories.baseUnitToIndex.get(baseUnitIdentifier);
return Categories.indexToCategory[index];
}

View file

@ -370,6 +370,36 @@ public class UnitsTest {
}
}
@Test
public void testGetUnitCategory() {
class TestCase {
final MeasureUnitImpl unit;
final String expectedCategory;
TestCase(String unitId, String expectedCategory) {
this.unit = MeasureUnitImpl.forIdentifier(unitId);
this.expectedCategory = expectedCategory;
}
}
TestCase testCases[] = {
new TestCase("kilogram-per-cubic-meter", "mass-density"),
new TestCase("cubic-meter-per-kilogram", "specific-volume"),
new TestCase("meter-per-second", "speed"),
new TestCase("second-per-meter", "speed"),
new TestCase("mile-per-gallon", "consumption"),
new TestCase("liter-per-100-kilometer", "consumption"),
new TestCase("cubic-meter-per-meter", "consumption"),
new TestCase("meter-per-cubic-meter", "consumption"),
new TestCase("kilogram-meter-per-square-meter-square-second", "pressure"),
};
UnitsData data = new UnitsData();
for (TestCase test : testCases) {
assertEquals(test.expectedCategory, data.getCategory(test.unit));
}
}
@Test
public void testConverter() {
class TestData {

View file

@ -1721,6 +1721,75 @@ public class NumberFormatterApiTest extends TestFmwk {
3048,
"3,048 cm");
assertFormatSingle("kilometer-per-liter match the correct category",
"unit/kilometer-per-liter usage/default",
"unit/kilometer-per-liter usage/default",
NumberFormatter.with()
.unit(MeasureUnit.forIdentifier("kilometer-per-liter"))
.usage("default"),
new ULocale("en-US"),
1,
"100 L/100 km");
assertFormatSingle("gallon-per-mile match the correct category",
"unit/gallon-per-mile usage/default",
"unit/gallon-per-mile usage/default",
NumberFormatter.with()
.unit(MeasureUnit.forIdentifier("gallon-per-mile"))
.usage("default"),
new ULocale("en-US"),
1,
"235 L/100 km");
assertFormatSingle("psi match the correct category",
"unit/megapascal usage/default",
"unit/megapascal usage/default",
NumberFormatter.with()
.unit(MeasureUnit.forIdentifier("megapascal"))
.usage("default"),
new ULocale("en-US"),
1,
"145 psi");
assertFormatSingle("millibar match the correct category",
"unit/millibar usage/default",
"unit/millibar usage/default",
NumberFormatter.with()
.unit(MeasureUnit.forIdentifier("millibar"))
.usage("default"),
new ULocale("en-US"),
1,
"0.015 psi");
assertFormatSingle("pound-force-per-square-inch match the correct category",
"unit/pound-force-per-square-inch usage/default",
"unit/pound-force-per-square-inch usage/default",
NumberFormatter.with()
.unit(MeasureUnit.forIdentifier("pound-force-per-square-inch"))
.usage("default"),
new ULocale("en-US"),
1,
"1 psi");
assertFormatSingle("inch-ofhg match the correct category",
"unit/inch-ofhg usage/default",
"unit/inch-ofhg usage/default",
NumberFormatter.with()
.unit(MeasureUnit.forIdentifier("inch-ofhg"))
.usage("default"),
new ULocale("en-US"),
1,
"0.49 psi");
assertFormatSingle("millimeter-ofhg match the correct category",
"unit/millimeter-ofhg usage/default",
"unit/millimeter-ofhg usage/default",
NumberFormatter.with()
.unit(MeasureUnit.forIdentifier("millimeter-ofhg"))
.usage("default"),
new ULocale("en-US"),
1,
"0.019 psi");
// TODO(icu-units#38): improve unit testing coverage. E.g. add
// vehicle-fuel triggering inversion conversion code. Test with 0 too,