mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-21246 Handle kilogram SI prefix correctly
Fix kilogram parsing: ignore 'kilogram' as a stem, we have 'gram'. Failures in the added unit test before the fix: withSIPrefix resulted in 'microkilogram', and kilogram's prefix was considered to be "ONE" (i.e. 10^0).
This commit is contained in:
parent
0dbe1ae013
commit
35eae09a7c
2 changed files with 83 additions and 12 deletions
|
@ -14,6 +14,7 @@
|
|||
|
||||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "measunit_impl.h"
|
||||
#include "resource.h"
|
||||
#include "uarrsort.h"
|
||||
|
@ -115,20 +116,37 @@ const struct SIPrefixStrings {
|
|||
};
|
||||
|
||||
/**
|
||||
* A ResourceSink that collects table keys from a resource.
|
||||
* A ResourceSink that collects simple unit identifiers from the keys of the
|
||||
* convertUnits table into an array, and adds these values to a TrieBuilder,
|
||||
* with associated values being their index into this array plus a specified
|
||||
* offset, to a trie.
|
||||
*
|
||||
* This class is for use by ures_getAllItemsWithFallback. Example code:
|
||||
* Example code:
|
||||
*
|
||||
* UErrorCode status = U_ZERO_ERROR;
|
||||
* const char* unitIdentifiers[200];
|
||||
* TableKeysSink identifierSink(unitIdentifiers, 200);
|
||||
* BytesTrieBuilder b(status);
|
||||
* const char *unitIdentifiers[200];
|
||||
* SimpleUnitIdentifiersSink identifierSink(unitIdentifiers, 200, b, kTrieValueOffset);
|
||||
* LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
|
||||
* ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);
|
||||
*/
|
||||
class TableKeysSink : public icu::ResourceSink {
|
||||
class SimpleUnitIdentifiersSink : public icu::ResourceSink {
|
||||
public:
|
||||
explicit TableKeysSink(const char **out, int32_t outSize)
|
||||
: outArray(out), outSize(outSize), outIndex(0) {
|
||||
/**
|
||||
* Constructor.
|
||||
* @param out Array of char* to which the simple unit identifiers will be
|
||||
* saved.
|
||||
* @param outSize The size of `out`.
|
||||
* @param trieBuilder The trie builder to which the simple unit identifier
|
||||
* should be added. The trie builder must outlive this resource sink.
|
||||
* @param trieValueOffset This is added to the index of the identifier in
|
||||
* the `out` array, before adding to `trieBuilder` as the value
|
||||
* associated with the identifier.
|
||||
*/
|
||||
explicit SimpleUnitIdentifiersSink(const char **out, int32_t outSize, BytesTrieBuilder &trieBuilder,
|
||||
int32_t trieValueOffset)
|
||||
: outArray(out), outSize(outSize), trieBuilder(trieBuilder), trieValueOffset(trieValueOffset),
|
||||
outIndex(0) {
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -154,13 +172,25 @@ class TableKeysSink : public icu::ResourceSink {
|
|||
for (int32_t i = 0; table.getKeyAndValue(i, key, value); ++i) {
|
||||
U_ASSERT(i < table.getSize());
|
||||
U_ASSERT(outIndex < outSize);
|
||||
outArray[outIndex++] = key;
|
||||
if (uprv_strcmp(key, "kilogram") == 0) {
|
||||
// For parsing, we use "gram", the prefixless metric mass unit. We
|
||||
// thus ignore the SI Base Unit of Mass: it exists due to being the
|
||||
// mass conversion target unit, but not needed for MeasureUnit
|
||||
// parsing.
|
||||
continue;
|
||||
}
|
||||
outArray[outIndex] = key;
|
||||
trieBuilder.add(key, trieValueOffset + outIndex, status);
|
||||
outIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const char **outArray;
|
||||
int32_t outSize;
|
||||
BytesTrieBuilder &trieBuilder;
|
||||
int32_t trieValueOffset;
|
||||
|
||||
int32_t outIndex;
|
||||
};
|
||||
|
||||
|
@ -228,6 +258,8 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) {
|
|||
ures_getByKey(unitsBundle.getAlias(), "convertUnits", NULL, &status));
|
||||
if (U_FAILURE(status)) { return; }
|
||||
|
||||
// Allocate enough space: with identifierSink below skipping kilogram, we're
|
||||
// probably allocating one more than needed.
|
||||
int32_t simpleUnitsCount = convertUnits.getAlias()->fSize;
|
||||
int32_t arrayMallocSize = sizeof(char *) * simpleUnitsCount;
|
||||
gSimpleUnits = static_cast<const char **>(uprv_malloc(arrayMallocSize));
|
||||
|
@ -237,11 +269,9 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) {
|
|||
}
|
||||
uprv_memset(gSimpleUnits, 0, arrayMallocSize);
|
||||
|
||||
TableKeysSink identifierSink(gSimpleUnits, simpleUnitsCount);
|
||||
// Populate gSimpleUnits and build the associated trie.
|
||||
SimpleUnitIdentifiersSink identifierSink(gSimpleUnits, simpleUnitsCount, b, kSimpleUnitOffset);
|
||||
ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);
|
||||
for (int i = 0; i < simpleUnitsCount; i++) {
|
||||
b.add(gSimpleUnits[i], kSimpleUnitOffset + i, status);
|
||||
}
|
||||
|
||||
// Build the CharsTrie
|
||||
// TODO: Use SLOW or FAST here?
|
||||
|
|
|
@ -81,6 +81,7 @@ private:
|
|||
void TestNumericTimeSomeSpecialFormats();
|
||||
void TestIdentifiers();
|
||||
void TestInvalidIdentifiers();
|
||||
void TestKilogramIdentifier();
|
||||
void TestCompoundUnitOperations();
|
||||
void TestDimensionlessBehaviour();
|
||||
void Test21060_AddressSanitizerProblem();
|
||||
|
@ -206,6 +207,7 @@ void MeasureFormatTest::runIndexedTest(
|
|||
TESTCASE_AUTO(TestNumericTimeSomeSpecialFormats);
|
||||
TESTCASE_AUTO(TestIdentifiers);
|
||||
TESTCASE_AUTO(TestInvalidIdentifiers);
|
||||
TESTCASE_AUTO(TestKilogramIdentifier);
|
||||
TESTCASE_AUTO(TestCompoundUnitOperations);
|
||||
TESTCASE_AUTO(TestDimensionlessBehaviour);
|
||||
TESTCASE_AUTO(Test21060_AddressSanitizerProblem);
|
||||
|
@ -3323,6 +3325,45 @@ void MeasureFormatTest::TestInvalidIdentifiers() {
|
|||
}
|
||||
}
|
||||
|
||||
// Kilogram is a "base unit", although it's also "gram" with a kilo- prefix.
|
||||
// This tests that it is handled in the preferred manner.
|
||||
void MeasureFormatTest::TestKilogramIdentifier() {
|
||||
IcuTestErrorCode status(*this, "TestKilogramIdentifier");
|
||||
|
||||
// SI unit of mass
|
||||
MeasureUnit kilogram = MeasureUnit::forIdentifier("kilogram", status);
|
||||
// Metric mass unit
|
||||
MeasureUnit gram = MeasureUnit::forIdentifier("gram", status);
|
||||
// Microgram: still a built-in type
|
||||
MeasureUnit microgram = MeasureUnit::forIdentifier("microgram", status);
|
||||
// Nanogram: not a built-in type at this time
|
||||
MeasureUnit nanogram = MeasureUnit::forIdentifier("nanogram", status);
|
||||
status.assertSuccess();
|
||||
|
||||
assertEquals("parsed kilogram equals built-in kilogram", MeasureUnit::getKilogram().getType(),
|
||||
kilogram.getType());
|
||||
assertEquals("parsed kilogram equals built-in kilogram", MeasureUnit::getKilogram().getSubtype(),
|
||||
kilogram.getSubtype());
|
||||
assertEquals("parsed gram equals built-in gram", MeasureUnit::getGram().getType(), gram.getType());
|
||||
assertEquals("parsed gram equals built-in gram", MeasureUnit::getGram().getSubtype(),
|
||||
gram.getSubtype());
|
||||
assertEquals("parsed microgram equals built-in microgram", MeasureUnit::getMicrogram().getType(),
|
||||
microgram.getType());
|
||||
assertEquals("parsed microgram equals built-in microgram", MeasureUnit::getMicrogram().getSubtype(),
|
||||
microgram.getSubtype());
|
||||
assertEquals("nanogram", "", nanogram.getType());
|
||||
assertEquals("nanogram", "nanogram", nanogram.getIdentifier());
|
||||
|
||||
assertEquals("prefix of kilogram", UMEASURE_SI_PREFIX_KILO, kilogram.getSIPrefix(status));
|
||||
assertEquals("prefix of gram", UMEASURE_SI_PREFIX_ONE, gram.getSIPrefix(status));
|
||||
assertEquals("prefix of microgram", UMEASURE_SI_PREFIX_MICRO, microgram.getSIPrefix(status));
|
||||
assertEquals("prefix of nanogram", UMEASURE_SI_PREFIX_NANO, nanogram.getSIPrefix(status));
|
||||
|
||||
MeasureUnit tmp = kilogram.withSIPrefix(UMEASURE_SI_PREFIX_MILLI, status);
|
||||
assertEquals(UnicodeString("Kilogram + milli should be milligram, got: ") + tmp.getIdentifier(),
|
||||
MeasureUnit::getMilligram().getIdentifier(), tmp.getIdentifier());
|
||||
}
|
||||
|
||||
void MeasureFormatTest::TestCompoundUnitOperations() {
|
||||
IcuTestErrorCode status(*this, "TestCompoundUnitOperations");
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue