mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-21551 Fixing unit prefix normalization order to match LDML spec
See #1754
This commit is contained in:
parent
e2fe1aa8b7
commit
33e1d87041
6 changed files with 84 additions and 38 deletions
|
@ -96,6 +96,7 @@ struct U_I18N_API SingleUnitImpl : public UMemory {
|
|||
if (dimensionality > 0 && other.dimensionality < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Sort by official quantity order
|
||||
int32_t thisQuantity = this->getUnitCategoryIndex();
|
||||
int32_t otherQuantity = other.getUnitCategoryIndex();
|
||||
|
@ -105,6 +106,7 @@ struct U_I18N_API SingleUnitImpl : public UMemory {
|
|||
if (thisQuantity > otherQuantity) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// If quantity order didn't help, then we go by index.
|
||||
if (index < other.index) {
|
||||
return -1;
|
||||
|
@ -112,15 +114,39 @@ struct U_I18N_API SingleUnitImpl : public UMemory {
|
|||
if (index > other.index) {
|
||||
return 1;
|
||||
}
|
||||
// TODO: revisit if the spec dictates prefix sort order - it doesn't
|
||||
// currently. For now we're sorting binary prefixes before SI prefixes,
|
||||
// as per enum values order.
|
||||
if (unitPrefix < other.unitPrefix) {
|
||||
return -1;
|
||||
}
|
||||
if (unitPrefix > other.unitPrefix) {
|
||||
|
||||
// When comparing binary prefixes vs SI prefixes, instead of comparing the actual values, we can
|
||||
// multiply the binary prefix power by 3 and compare the powers. if they are equal, we can can
|
||||
// compare the bases.
|
||||
// NOTE: this methodology will fail if the binary prefix more than or equal 98.
|
||||
int32_t unitBase = umeas_getPrefixBase(unitPrefix);
|
||||
int32_t otherUnitBase = umeas_getPrefixBase(other.unitPrefix);
|
||||
|
||||
// Values for comparison purposes only.
|
||||
int32_t unitPower = unitBase == 1024 /* Binary Prefix */ ? umeas_getPrefixPower(unitPrefix) * 3
|
||||
: umeas_getPrefixPower(unitPrefix);
|
||||
int32_t otherUnitPower =
|
||||
otherUnitBase == 1024 /* Binary Prefix */ ? umeas_getPrefixPower(other.unitPrefix) * 3
|
||||
: umeas_getPrefixPower(other.unitPrefix);
|
||||
|
||||
// NOTE: if the unitPower is less than the other,
|
||||
// we return 1 not -1. Thus because we want th sorting order
|
||||
// for the bigger prefix to be before the smaller.
|
||||
// Example: megabyte should come before kilobyte.
|
||||
if (unitPower < otherUnitPower) {
|
||||
return 1;
|
||||
}
|
||||
if (unitPower > otherUnitPower) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (unitBase < otherUnitBase) {
|
||||
return 1;
|
||||
}
|
||||
if (unitBase > otherUnitBase) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -4480,10 +4480,14 @@ void MeasureFormatTest::TestIdentifiers() {
|
|||
{"dot", "dot"},
|
||||
|
||||
// Testing sort order of prefixes.
|
||||
//
|
||||
// TODO(icu-units#70): revisit when fixing normalization. For now we're
|
||||
// just checking some consistency between C&J.
|
||||
{"megafoot-mebifoot-kibifoot-kilofoot", "kibifoot-mebifoot-kilofoot-megafoot"},
|
||||
{"megafoot-mebifoot-kibifoot-kilofoot", "mebifoot-megafoot-kibifoot-kilofoot"},
|
||||
{"per-megafoot-mebifoot-kibifoot-kilofoot", "per-mebifoot-megafoot-kibifoot-kilofoot"},
|
||||
{"megafoot-mebifoot-kibifoot-kilofoot-per-megafoot-mebifoot-kibifoot-kilofoot",
|
||||
"mebifoot-megafoot-kibifoot-kilofoot-per-mebifoot-megafoot-kibifoot-kilofoot"},
|
||||
{"microfoot-millifoot-megafoot-mebifoot-kibifoot-kilofoot",
|
||||
"mebifoot-megafoot-kibifoot-kilofoot-millifoot-microfoot"},
|
||||
{"per-microfoot-millifoot-megafoot-mebifoot-kibifoot-kilofoot",
|
||||
"per-mebifoot-megafoot-kibifoot-kilofoot-millifoot-microfoot"},
|
||||
};
|
||||
for (const auto &cas : cases) {
|
||||
status.setScope(cas.id);
|
||||
|
@ -5045,7 +5049,7 @@ void MeasureFormatTest::TestInternalMeasureUnitImpl() {
|
|||
assertEquals("append meter & centimeter: units[1]", "meter",
|
||||
mcm.singleUnits[1]->getSimpleUnitID());
|
||||
}
|
||||
assertEquals("append meter & centimeter: identifier", "centimeter-meter",
|
||||
assertEquals("append meter & centimeter: identifier", "meter-centimeter",
|
||||
std::move(mcm).build(status).getIdentifier());
|
||||
|
||||
MeasureUnitImpl m2m = MeasureUnitImpl::forIdentifier("meter-square-meter", status);
|
||||
|
|
|
@ -2283,13 +2283,13 @@ void NumberFormatterApiTest::unitInflections() {
|
|||
// tests can't test value0 of the following two rules:
|
||||
// <deriveComponent feature="plural" structure="power" value0="one" value1="compound"/>
|
||||
// <deriveComponent feature="case" structure="power" value0="nominative" value1="compound"/>
|
||||
{"square-decimeter-dekameter", "de", nullptr, 1, u"1 Quadratdezimeter⋅Dekameter"},
|
||||
{"square-decimeter-dekameter", "de", "genitive", 1, u"1 Quadratdezimeter⋅Dekameters"},
|
||||
{"square-decimeter-dekameter", "de", nullptr, 2, u"2 Quadratdezimeter⋅Dekameter"},
|
||||
{"square-decimeter-dekameter", "de", "dative", 2, u"2 Quadratdezimeter⋅Dekametern"},
|
||||
{"square-decimeter-dekameter", "de", nullptr, 1, u"1 Dekameter⋅Quadratdezimeter"},
|
||||
{"square-decimeter-dekameter", "de", "genitive", 1, u"1 Dekameter⋅Quadratdezimeter"},
|
||||
{"square-decimeter-dekameter", "de", nullptr, 2, u"2 Dekameter⋅Quadratdezimeter"},
|
||||
{"square-decimeter-dekameter", "de", "dative", 2, u"2 Dekameter⋅Quadratdezimeter"},
|
||||
// Feminine "Meile" better demonstrates singular-vs-plural form:
|
||||
{"cubic-mile-dekamile", "de", nullptr, 1, u"1 Kubikmeile⋅Dekameile"},
|
||||
{"cubic-mile-dekamile", "de", nullptr, 2, u"2 Kubikmeile⋅Dekameilen"},
|
||||
{"cubic-mile-dekamile", "de", nullptr, 1, u"1 Dekameile⋅Kubikmeile"},
|
||||
{"cubic-mile-dekamile", "de", nullptr, 2, u"2 Dekameile⋅Kubikmeilen"},
|
||||
|
||||
// French handles plural "times" and "power" structures differently:
|
||||
// plural form impacts all "numerator" units (denominator remains
|
||||
|
|
|
@ -122,21 +122,35 @@ public class SingleUnitImpl {
|
|||
if (index > other.index) {
|
||||
return 1;
|
||||
}
|
||||
// TODO: revisit if the spec dictates prefix sort order - it doesn't
|
||||
// currently. For now we're sorting binary prefixes before SI prefixes,
|
||||
// as per ICU4C's enum values order.
|
||||
if (this.getPrefix().getBase() < other.getPrefix().getBase()) {
|
||||
|
||||
// When comparing binary prefixes vs SI prefixes, instead of comparing the actual values, we can
|
||||
// multiply the binary prefix power by 3 and compare the powers. if they are equal, we can can
|
||||
// compare the bases.
|
||||
// NOTE: this methodology will fail if the binary prefix more than or equal 98.
|
||||
int unitBase = this.unitPrefix.getBase();
|
||||
int otherUnitBase = other.unitPrefix.getBase();
|
||||
// Values for comparison purposes only.
|
||||
int unitPowerComp =
|
||||
unitBase == 1024 /* Binary Prefix */ ? this.unitPrefix.getPower() * 3
|
||||
: this.unitPrefix.getPower();
|
||||
int otherUnitPowerComp =
|
||||
otherUnitBase == 1024 /* Binary Prefix */ ? other.unitPrefix.getPower() * 3
|
||||
: other.unitPrefix.getPower();
|
||||
|
||||
if (unitPowerComp < otherUnitPowerComp) {
|
||||
return 1;
|
||||
}
|
||||
if (this.getPrefix().getBase() > other.getPrefix().getBase()) {
|
||||
if (unitPowerComp > otherUnitPowerComp) {
|
||||
return -1;
|
||||
}
|
||||
if (this.getPrefix().getPower() < other.getPrefix().getPower()) {
|
||||
return -1;
|
||||
}
|
||||
if (this.getPrefix().getPower() > other.getPrefix().getPower()) {
|
||||
|
||||
if (unitBase < otherUnitBase) {
|
||||
return 1;
|
||||
}
|
||||
if (unitBase > otherUnitBase) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -3923,6 +3923,7 @@ public class MeasureUnitTest extends TestFmwk {
|
|||
new TestCase("exbibyte", "exbibyte"),
|
||||
new TestCase("zebibyte", "zebibyte"),
|
||||
new TestCase("yobibyte", "yobibyte"),
|
||||
|
||||
// Testing aliases
|
||||
new TestCase("foodcalorie", "foodcalorie"),
|
||||
new TestCase("dot-per-centimeter", "dot-per-centimeter"),
|
||||
|
@ -3930,10 +3931,11 @@ public class MeasureUnitTest extends TestFmwk {
|
|||
new TestCase("dot", "dot"),
|
||||
|
||||
// Testing sort order of prefixes.
|
||||
//
|
||||
// TODO(icu-units#70): revisit when fixing normalization. For now we're
|
||||
// just checking some consistency between C&J.
|
||||
new TestCase("megafoot-mebifoot-kibifoot-kilofoot", "kibifoot-mebifoot-kilofoot-megafoot"),
|
||||
new TestCase("megafoot-mebifoot-kibifoot-kilofoot", "mebifoot-megafoot-kibifoot-kilofoot"),
|
||||
new TestCase("per-megafoot-mebifoot-kibifoot-kilofoot", "per-mebifoot-megafoot-kibifoot-kilofoot"),
|
||||
new TestCase("megafoot-mebifoot-kibifoot-kilofoot-per-megafoot-mebifoot-kibifoot-kilofoot", "mebifoot-megafoot-kibifoot-kilofoot-per-mebifoot-megafoot-kibifoot-kilofoot"),
|
||||
new TestCase("microfoot-millifoot-megafoot-mebifoot-kibifoot-kilofoot", "mebifoot-megafoot-kibifoot-kilofoot-millifoot-microfoot"),
|
||||
new TestCase("per-microfoot-millifoot-megafoot-mebifoot-kibifoot-kilofoot", "per-mebifoot-megafoot-kibifoot-kilofoot-millifoot-microfoot"),
|
||||
};
|
||||
|
||||
for (TestCase testCase : cases) {
|
||||
|
@ -4409,7 +4411,7 @@ public class MeasureUnitTest extends TestFmwk {
|
|||
assertEquals("append meter & centimeter: units length", 2, mcm.getSingleUnits().size());
|
||||
assertEquals("append meter & centimeter: units[0]", "meter", mcm.getSingleUnits().get(0).getSimpleUnitID());
|
||||
assertEquals("append meter & centimeter: units[1]", "meter", mcm.getSingleUnits().get(1).getSimpleUnitID());
|
||||
assertEquals("append meter & centimeter: identifier", "centimeter-meter", mcm.build().getIdentifier());
|
||||
assertEquals("append meter & centimeter: identifier", "meter-centimeter", mcm.build().getIdentifier());
|
||||
|
||||
MeasureUnitImpl m2m = MeasureUnitImpl.forIdentifier("meter-square-meter");
|
||||
assertEquals("meter-square-meter: complexity", MeasureUnit.Complexity.SINGLE, m2m.getComplexity());
|
||||
|
|
|
@ -2255,18 +2255,18 @@ public class NumberFormatterApiTest extends TestFmwk {
|
|||
// <deriveComponent feature="case" structure="power" value0="nominative" value1="compound"/>
|
||||
|
||||
new UnitInflectionTestCase("square-decimeter-dekameter", "de", null, 1,
|
||||
"1 Quadratdezimeter⋅Dekameter"),
|
||||
"1 Dekameter⋅Quadratdezimeter"),
|
||||
new UnitInflectionTestCase("square-decimeter-dekameter", "de", "genitive", 1,
|
||||
"1 Quadratdezimeter⋅Dekameters"),
|
||||
"1 Dekameter⋅Quadratdezimeter"),
|
||||
new UnitInflectionTestCase("square-decimeter-dekameter", "de", null, 2,
|
||||
"2 Quadratdezimeter⋅Dekameter"),
|
||||
"2 Dekameter⋅Quadratdezimeter"),
|
||||
new UnitInflectionTestCase("square-decimeter-dekameter", "de", "dative", 2,
|
||||
"2 Quadratdezimeter⋅Dekametern"),
|
||||
"2 Dekameter⋅Quadratdezimeter"),
|
||||
// Feminine "Meile" better demonstrates singular-vs-plural form:
|
||||
new UnitInflectionTestCase("cubic-mile-dekamile", "de", null, 1,
|
||||
"1 Kubikmeile⋅Dekameile"),
|
||||
"1 Dekameile⋅Kubikmeile"),
|
||||
new UnitInflectionTestCase("cubic-mile-dekamile", "de", null, 2,
|
||||
"2 Kubikmeile⋅Dekameilen"),
|
||||
"2 Dekameile⋅Kubikmeilen"),
|
||||
|
||||
// French handles plural "times" and "power" structures differently:
|
||||
// plural form impacts all "numerator" units (denominator remains
|
||||
|
|
Loading…
Add table
Reference in a new issue