mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 13:35:32 +00:00
1540 lines
54 KiB
C++
1540 lines
54 KiB
C++
// © 2020 and later: Unicode, Inc. and others.
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
|
|
// Extra functions for MeasureUnit not needed for all clients.
|
|
// Separate .o file so that it can be removed for modularity.
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#if !UCONFIG_NO_FORMATTING
|
|
|
|
// Allow implicit conversion from char16_t* to UnicodeString for this file:
|
|
// Helpful in toString methods and elsewhere.
|
|
#define UNISTR_FROM_STRING_EXPLICIT
|
|
|
|
#include "charstr.h"
|
|
#include "cmemory.h"
|
|
#include "cstring.h"
|
|
#include "double-conversion-string-to-double.h"
|
|
#include "measunit_impl.h"
|
|
#include "resource.h"
|
|
#include "uarrsort.h"
|
|
#include "uassert.h"
|
|
#include "ucln_in.h"
|
|
#include "umutex.h"
|
|
#include "unicode/bytestrie.h"
|
|
#include "unicode/bytestriebuilder.h"
|
|
#include "unicode/localpointer.h"
|
|
#include "unicode/stringpiece.h"
|
|
#include "unicode/stringtriebuilder.h"
|
|
#include "unicode/ures.h"
|
|
#include "unicode/ustringtrie.h"
|
|
#include "uresimp.h"
|
|
#include "util.h"
|
|
#include <limits.h>
|
|
#include <cstdlib>
|
|
U_NAMESPACE_BEGIN
|
|
|
|
|
|
namespace {
|
|
|
|
using icu::double_conversion::StringToDoubleConverter;
|
|
|
|
// TODO: Propose a new error code for this?
|
|
constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
// Trie value offset for SI or binary prefixes. This is big enough to ensure we only
|
|
// insert positive integers into the trie.
|
|
constexpr int32_t kPrefixOffset = 64;
|
|
static_assert(kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MIN_BIN > 0,
|
|
"kPrefixOffset is too small for minimum UMeasurePrefix value");
|
|
static_assert(kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MIN_SI > 0,
|
|
"kPrefixOffset is too small for minimum UMeasurePrefix value");
|
|
|
|
// Trie value offset for compound parts, e.g. "-per-", "-", "-and-".
|
|
constexpr int32_t kCompoundPartOffset = 128;
|
|
static_assert(kCompoundPartOffset > kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MAX_BIN,
|
|
"Ambiguous token values: prefix tokens are overlapping with CompoundPart tokens");
|
|
static_assert(kCompoundPartOffset > kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MAX_SI,
|
|
"Ambiguous token values: prefix tokens are overlapping with CompoundPart tokens");
|
|
|
|
enum CompoundPart {
|
|
// Represents "-per-"
|
|
COMPOUND_PART_PER = kCompoundPartOffset,
|
|
// Represents "-"
|
|
COMPOUND_PART_TIMES,
|
|
// Represents "-and-"
|
|
COMPOUND_PART_AND,
|
|
};
|
|
|
|
// Trie value offset for "per-".
|
|
constexpr int32_t kInitialCompoundPartOffset = 192;
|
|
|
|
enum InitialCompoundPart {
|
|
// Represents "per-", the only compound part that can appear at the start of
|
|
// an identifier.
|
|
INITIAL_COMPOUND_PART_PER = kInitialCompoundPartOffset,
|
|
};
|
|
|
|
// Trie value offset for powers like "square-", "cubic-", "pow2-" etc.
|
|
constexpr int32_t kPowerPartOffset = 256;
|
|
|
|
enum PowerPart {
|
|
POWER_PART_P2 = kPowerPartOffset + 2,
|
|
POWER_PART_P3,
|
|
POWER_PART_P4,
|
|
POWER_PART_P5,
|
|
POWER_PART_P6,
|
|
POWER_PART_P7,
|
|
POWER_PART_P8,
|
|
POWER_PART_P9,
|
|
POWER_PART_P10,
|
|
POWER_PART_P11,
|
|
POWER_PART_P12,
|
|
POWER_PART_P13,
|
|
POWER_PART_P14,
|
|
POWER_PART_P15,
|
|
};
|
|
|
|
// Trie value offset for simple units, e.g. "gram", "nautical-mile",
|
|
// "fluid-ounce-imperial".
|
|
constexpr int32_t kSimpleUnitOffset = 512;
|
|
|
|
const struct UnitPrefixStrings {
|
|
const char* const string;
|
|
UMeasurePrefix value;
|
|
} gUnitPrefixStrings[] = {
|
|
// SI prefixes
|
|
{ "quetta", UMEASURE_PREFIX_QUETTA },
|
|
{ "ronna", UMEASURE_PREFIX_RONNA },
|
|
{ "yotta", UMEASURE_PREFIX_YOTTA },
|
|
{ "zetta", UMEASURE_PREFIX_ZETTA },
|
|
{ "exa", UMEASURE_PREFIX_EXA },
|
|
{ "peta", UMEASURE_PREFIX_PETA },
|
|
{ "tera", UMEASURE_PREFIX_TERA },
|
|
{ "giga", UMEASURE_PREFIX_GIGA },
|
|
{ "mega", UMEASURE_PREFIX_MEGA },
|
|
{ "kilo", UMEASURE_PREFIX_KILO },
|
|
{ "hecto", UMEASURE_PREFIX_HECTO },
|
|
{ "deka", UMEASURE_PREFIX_DEKA },
|
|
{ "deci", UMEASURE_PREFIX_DECI },
|
|
{ "centi", UMEASURE_PREFIX_CENTI },
|
|
{ "milli", UMEASURE_PREFIX_MILLI },
|
|
{ "micro", UMEASURE_PREFIX_MICRO },
|
|
{ "nano", UMEASURE_PREFIX_NANO },
|
|
{ "pico", UMEASURE_PREFIX_PICO },
|
|
{ "femto", UMEASURE_PREFIX_FEMTO },
|
|
{ "atto", UMEASURE_PREFIX_ATTO },
|
|
{ "zepto", UMEASURE_PREFIX_ZEPTO },
|
|
{ "yocto", UMEASURE_PREFIX_YOCTO },
|
|
{ "ronto", UMEASURE_PREFIX_RONTO },
|
|
{ "quecto", UMEASURE_PREFIX_QUECTO },
|
|
// Binary prefixes
|
|
{ "yobi", UMEASURE_PREFIX_YOBI },
|
|
{ "zebi", UMEASURE_PREFIX_ZEBI },
|
|
{ "exbi", UMEASURE_PREFIX_EXBI },
|
|
{ "pebi", UMEASURE_PREFIX_PEBI },
|
|
{ "tebi", UMEASURE_PREFIX_TEBI },
|
|
{ "gibi", UMEASURE_PREFIX_GIBI },
|
|
{ "mebi", UMEASURE_PREFIX_MEBI },
|
|
{ "kibi", UMEASURE_PREFIX_KIBI },
|
|
};
|
|
|
|
/**
|
|
* A ResourceSink that collects simple unit identifiers from the keys of the
|
|
* convertUnits table into an array, and adds these values to a TrieBuilder,
|
|
* with associated values being their index into this array plus a specified
|
|
* offset.
|
|
*
|
|
* Example code:
|
|
*
|
|
* UErrorCode status = U_ZERO_ERROR;
|
|
* BytesTrieBuilder b(status);
|
|
* int32_t ARR_SIZE = 200;
|
|
* const char *unitIdentifiers[ARR_SIZE];
|
|
* int32_t *unitCategories[ARR_SIZE];
|
|
* SimpleUnitIdentifiersSink identifierSink(gSerializedUnitCategoriesTrie, unitIdentifiers,
|
|
* unitCategories, ARR_SIZE, b, kTrieValueOffset);
|
|
* LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
|
|
* ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);
|
|
*/
|
|
class SimpleUnitIdentifiersSink : public icu::ResourceSink {
|
|
public:
|
|
/**
|
|
* Constructor.
|
|
* @param quantitiesTrieData The data for constructing a quantitiesTrie,
|
|
* which maps from a simple unit identifier to an index into the
|
|
* gCategories array.
|
|
* @param out Array of char* to which pointers to the simple unit
|
|
* identifiers will be saved. (Does not take ownership.)
|
|
* @param outCategories Array of int32_t to which category indexes will be
|
|
* saved: this corresponds to simple unit IDs saved to `out`, mapping
|
|
* from the ID to the value produced by the quantitiesTrie (which is an
|
|
* index into the gCategories array).
|
|
* @param outSize The size of `out` and `outCategories`.
|
|
* @param trieBuilder The trie builder to which the simple unit identifier
|
|
* should be added. The trie builder must outlive this resource sink.
|
|
* @param trieValueOffset This is added to the index of the identifier in
|
|
* the `out` array, before adding to `trieBuilder` as the value
|
|
* associated with the identifier.
|
|
*/
|
|
explicit SimpleUnitIdentifiersSink(StringPiece quantitiesTrieData, const char **out,
|
|
int32_t *outCategories, int32_t outSize,
|
|
BytesTrieBuilder &trieBuilder, int32_t trieValueOffset)
|
|
: outArray(out), outCategories(outCategories), outSize(outSize), trieBuilder(trieBuilder),
|
|
trieValueOffset(trieValueOffset), quantitiesTrieData(quantitiesTrieData), outIndex(0) {}
|
|
|
|
/**
|
|
* Adds the table keys found in value to the output vector.
|
|
* @param key The key of the resource passed to `value`: the second
|
|
* parameter of the ures_getAllItemsWithFallback() call.
|
|
* @param value Should be a ResourceTable value, if
|
|
* ures_getAllItemsWithFallback() was called correctly for this sink.
|
|
* @param noFallback Ignored.
|
|
* @param status The standard ICU error code output parameter.
|
|
*/
|
|
void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
|
|
ResourceTable table = value.getTable(status);
|
|
if (U_FAILURE(status)) return;
|
|
|
|
if (outIndex + table.getSize() > outSize) {
|
|
status = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
return;
|
|
}
|
|
|
|
BytesTrie quantitiesTrie(quantitiesTrieData.data());
|
|
|
|
// Collect keys from the table resource.
|
|
const char *simpleUnitID;
|
|
for (int32_t i = 0; table.getKeyAndValue(i, simpleUnitID, value); ++i) {
|
|
U_ASSERT(i < table.getSize());
|
|
U_ASSERT(outIndex < outSize);
|
|
if (uprv_strcmp(simpleUnitID, "kilogram") == 0) {
|
|
// For parsing, we use "gram", the prefixless metric mass unit. We
|
|
// thus ignore the SI Base Unit of Mass: it exists due to being the
|
|
// mass conversion target unit, but not needed for MeasureUnit
|
|
// parsing.
|
|
continue;
|
|
}
|
|
outArray[outIndex] = simpleUnitID;
|
|
trieBuilder.add(simpleUnitID, trieValueOffset + outIndex, status);
|
|
|
|
// Find the base target unit for this simple unit
|
|
ResourceTable table = value.getTable(status);
|
|
if (U_FAILURE(status)) { return; }
|
|
if (!table.findValue("target", value)) {
|
|
status = U_INVALID_FORMAT_ERROR;
|
|
break;
|
|
}
|
|
int32_t len;
|
|
const char16_t* uTarget = value.getString(len, status);
|
|
CharString target;
|
|
target.appendInvariantChars(uTarget, len, status);
|
|
if (U_FAILURE(status)) { return; }
|
|
quantitiesTrie.reset();
|
|
UStringTrieResult result = quantitiesTrie.next(target.data(), target.length());
|
|
if (!USTRINGTRIE_HAS_VALUE(result)) {
|
|
status = U_INVALID_FORMAT_ERROR;
|
|
break;
|
|
}
|
|
outCategories[outIndex] = quantitiesTrie.getValue();
|
|
|
|
outIndex++;
|
|
}
|
|
}
|
|
|
|
private:
|
|
const char **outArray;
|
|
int32_t *outCategories;
|
|
int32_t outSize;
|
|
BytesTrieBuilder &trieBuilder;
|
|
int32_t trieValueOffset;
|
|
|
|
StringPiece quantitiesTrieData;
|
|
|
|
int32_t outIndex;
|
|
};
|
|
|
|
/**
|
|
* A ResourceSink that collects information from `unitQuantities` in the `units`
|
|
* resource to provide key->value lookups from base unit to category, as well as
|
|
* preserving ordering information for these categories. See `units.txt`.
|
|
*
|
|
* For example: "kilogram" -> "mass", "meter-per-second" -> "speed".
|
|
*
|
|
* In C++ unitQuantity values are collected in order into a char16_t* array, while
|
|
* unitQuantity keys are added added to a TrieBuilder, with associated values
|
|
* being the index into the aforementioned char16_t* array.
|
|
*/
|
|
class CategoriesSink : public icu::ResourceSink {
|
|
public:
|
|
/**
|
|
* Constructor.
|
|
* @param out Array of char16_t* to which unitQuantity values will be saved.
|
|
* The pointers returned not owned: they point directly at the resource
|
|
* strings in static memory.
|
|
* @param outSize The size of the `out` array.
|
|
* @param trieBuilder The trie builder to which the keys (base units) of
|
|
* each unitQuantity will be added, each with value being the offset
|
|
* into `out`.
|
|
*/
|
|
explicit CategoriesSink(const char16_t **out, int32_t &outSize, BytesTrieBuilder &trieBuilder)
|
|
: outQuantitiesArray(out), outSize(outSize), trieBuilder(trieBuilder), outIndex(0) {}
|
|
|
|
void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
|
|
ResourceArray array = value.getArray(status);
|
|
if (U_FAILURE(status)) {
|
|
return;
|
|
}
|
|
|
|
if (outIndex + array.getSize() > outSize) {
|
|
status = U_INDEX_OUTOFBOUNDS_ERROR;
|
|
return;
|
|
}
|
|
|
|
for (int32_t i = 0; array.getValue(i, value); ++i) {
|
|
U_ASSERT(outIndex < outSize);
|
|
ResourceTable table = value.getTable(status);
|
|
if (U_FAILURE(status)) {
|
|
return;
|
|
}
|
|
if (table.getSize() != 1) {
|
|
status = U_INVALID_FORMAT_ERROR;
|
|
return;
|
|
}
|
|
const char *key;
|
|
table.getKeyAndValue(0, key, value);
|
|
int32_t uTmpLen;
|
|
outQuantitiesArray[outIndex] = value.getString(uTmpLen, status);
|
|
trieBuilder.add(key, outIndex, status);
|
|
outIndex++;
|
|
}
|
|
}
|
|
|
|
private:
|
|
const char16_t **outQuantitiesArray;
|
|
int32_t &outSize;
|
|
BytesTrieBuilder &trieBuilder;
|
|
|
|
int32_t outIndex;
|
|
};
|
|
|
|
icu::UInitOnce gUnitExtrasInitOnce {};
|
|
|
|
// Array of simple unit IDs.
|
|
//
|
|
// The array memory itself is owned by this pointer, but the individual char* in
|
|
// that array point at static memory. (Note that these char* are also returned
|
|
// by SingleUnitImpl::getSimpleUnitID().)
|
|
const char **gSimpleUnits = nullptr;
|
|
|
|
// Maps from the value associated with each simple unit ID to an index into the
|
|
// gCategories array.
|
|
int32_t *gSimpleUnitCategories = nullptr;
|
|
|
|
char *gSerializedUnitExtrasStemTrie = nullptr;
|
|
|
|
// Array of char16_t* pointing at the unit categories (aka "quantities", aka
|
|
// "types"), as found in the `unitQuantities` resource. The array memory itself
|
|
// is owned by this pointer, but the individual char16_t* in that array point at
|
|
// static memory.
|
|
const char16_t **gCategories = nullptr;
|
|
// Number of items in `gCategories`.
|
|
int32_t gCategoriesCount = 0;
|
|
// Serialized BytesTrie for mapping from base units to indices into gCategories.
|
|
char *gSerializedUnitCategoriesTrie = nullptr;
|
|
|
|
UBool U_CALLCONV cleanupUnitExtras() {
|
|
uprv_free(gSerializedUnitCategoriesTrie);
|
|
gSerializedUnitCategoriesTrie = nullptr;
|
|
uprv_free(gCategories);
|
|
gCategories = nullptr;
|
|
uprv_free(gSerializedUnitExtrasStemTrie);
|
|
gSerializedUnitExtrasStemTrie = nullptr;
|
|
uprv_free(gSimpleUnitCategories);
|
|
gSimpleUnitCategories = nullptr;
|
|
uprv_free(gSimpleUnits);
|
|
gSimpleUnits = nullptr;
|
|
gUnitExtrasInitOnce.reset();
|
|
return true;
|
|
}
|
|
|
|
void U_CALLCONV initUnitExtras(UErrorCode& status) {
|
|
ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras);
|
|
LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
|
|
|
|
// Collect unitQuantities information into gSerializedUnitCategoriesTrie and gCategories.
|
|
const char *CATEGORY_TABLE_NAME = "unitQuantities";
|
|
LocalUResourceBundlePointer unitQuantities(
|
|
ures_getByKey(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, nullptr, &status));
|
|
if (U_FAILURE(status)) { return; }
|
|
gCategoriesCount = unitQuantities.getAlias()->fSize;
|
|
size_t quantitiesMallocSize = sizeof(char16_t *) * gCategoriesCount;
|
|
gCategories = static_cast<const char16_t **>(uprv_malloc(quantitiesMallocSize));
|
|
if (gCategories == nullptr) {
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
return;
|
|
}
|
|
uprv_memset(gCategories, 0, quantitiesMallocSize);
|
|
BytesTrieBuilder quantitiesBuilder(status);
|
|
CategoriesSink categoriesSink(gCategories, gCategoriesCount, quantitiesBuilder);
|
|
ures_getAllItemsWithFallback(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, categoriesSink, status);
|
|
StringPiece resultQuantities = quantitiesBuilder.buildStringPiece(USTRINGTRIE_BUILD_FAST, status);
|
|
if (U_FAILURE(status)) { return; }
|
|
// Copy the result into the global constant pointer
|
|
size_t numBytesQuantities = resultQuantities.length();
|
|
gSerializedUnitCategoriesTrie = static_cast<char *>(uprv_malloc(numBytesQuantities));
|
|
if (gSerializedUnitCategoriesTrie == nullptr) {
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
return;
|
|
}
|
|
uprv_memcpy(gSerializedUnitCategoriesTrie, resultQuantities.data(), numBytesQuantities);
|
|
|
|
// Build the BytesTrie that Parser needs for parsing unit identifiers.
|
|
|
|
BytesTrieBuilder b(status);
|
|
if (U_FAILURE(status)) { return; }
|
|
|
|
// Add SI and binary prefixes
|
|
for (const auto& unitPrefixInfo : gUnitPrefixStrings) {
|
|
b.add(unitPrefixInfo.string, unitPrefixInfo.value + kPrefixOffset, status);
|
|
}
|
|
if (U_FAILURE(status)) { return; }
|
|
|
|
// Add syntax parts (compound, power prefixes)
|
|
b.add("-per-", COMPOUND_PART_PER, status);
|
|
b.add("-", COMPOUND_PART_TIMES, status);
|
|
b.add("-and-", COMPOUND_PART_AND, status);
|
|
b.add("per-", INITIAL_COMPOUND_PART_PER, status);
|
|
b.add("square-", POWER_PART_P2, status);
|
|
b.add("cubic-", POWER_PART_P3, status);
|
|
b.add("pow2-", POWER_PART_P2, status);
|
|
b.add("pow3-", POWER_PART_P3, status);
|
|
b.add("pow4-", POWER_PART_P4, status);
|
|
b.add("pow5-", POWER_PART_P5, status);
|
|
b.add("pow6-", POWER_PART_P6, status);
|
|
b.add("pow7-", POWER_PART_P7, status);
|
|
b.add("pow8-", POWER_PART_P8, status);
|
|
b.add("pow9-", POWER_PART_P9, status);
|
|
b.add("pow10-", POWER_PART_P10, status);
|
|
b.add("pow11-", POWER_PART_P11, status);
|
|
b.add("pow12-", POWER_PART_P12, status);
|
|
b.add("pow13-", POWER_PART_P13, status);
|
|
b.add("pow14-", POWER_PART_P14, status);
|
|
b.add("pow15-", POWER_PART_P15, status);
|
|
if (U_FAILURE(status)) { return; }
|
|
|
|
// Add sanctioned simple units by offset: simple units all have entries in
|
|
// units/convertUnits resources.
|
|
LocalUResourceBundlePointer convertUnits(
|
|
ures_getByKey(unitsBundle.getAlias(), "convertUnits", nullptr, &status));
|
|
if (U_FAILURE(status)) { return; }
|
|
|
|
// Allocate enough space: with identifierSink below skipping kilogram, we're
|
|
// probably allocating one more than needed.
|
|
int32_t simpleUnitsCount = convertUnits.getAlias()->fSize;
|
|
int32_t arrayMallocSize = sizeof(char *) * simpleUnitsCount;
|
|
gSimpleUnits = static_cast<const char **>(uprv_malloc(arrayMallocSize));
|
|
if (gSimpleUnits == nullptr) {
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
return;
|
|
}
|
|
uprv_memset(gSimpleUnits, 0, arrayMallocSize);
|
|
arrayMallocSize = sizeof(int32_t) * simpleUnitsCount;
|
|
gSimpleUnitCategories = static_cast<int32_t *>(uprv_malloc(arrayMallocSize));
|
|
if (gSimpleUnitCategories == nullptr) {
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
return;
|
|
}
|
|
uprv_memset(gSimpleUnitCategories, 0, arrayMallocSize);
|
|
|
|
// Populate gSimpleUnits and build the associated trie.
|
|
SimpleUnitIdentifiersSink identifierSink(resultQuantities, gSimpleUnits, gSimpleUnitCategories,
|
|
simpleUnitsCount, b, kSimpleUnitOffset);
|
|
ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);
|
|
|
|
// Build the CharsTrie
|
|
// TODO: Use SLOW or FAST here?
|
|
StringPiece result = b.buildStringPiece(USTRINGTRIE_BUILD_FAST, status);
|
|
if (U_FAILURE(status)) { return; }
|
|
|
|
// Copy the result into the global constant pointer
|
|
size_t numBytes = result.length();
|
|
gSerializedUnitExtrasStemTrie = static_cast<char *>(uprv_malloc(numBytes));
|
|
if (gSerializedUnitExtrasStemTrie == nullptr) {
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
return;
|
|
}
|
|
uprv_memcpy(gSerializedUnitExtrasStemTrie, result.data(), numBytes);
|
|
}
|
|
|
|
class Token {
|
|
public:
|
|
Token(int64_t match) : fMatch(match) {
|
|
if (fMatch < kCompoundPartOffset) {
|
|
this->fType = TYPE_PREFIX;
|
|
} else if (fMatch < kInitialCompoundPartOffset) {
|
|
this->fType = TYPE_COMPOUND_PART;
|
|
} else if (fMatch < kPowerPartOffset) {
|
|
this->fType = TYPE_INITIAL_COMPOUND_PART;
|
|
} else if (fMatch < kSimpleUnitOffset) {
|
|
this->fType = TYPE_POWER_PART;
|
|
} else {
|
|
this->fType = TYPE_SIMPLE_UNIT;
|
|
}
|
|
}
|
|
|
|
static Token constantToken(StringPiece str, UErrorCode &status) {
|
|
Token result;
|
|
auto value = Token::parseStringToLong(str, status);
|
|
if (U_FAILURE(status)) {
|
|
return result;
|
|
}
|
|
result.fMatch = value;
|
|
result.fType = TYPE_CONSTANT_DENOMINATOR;
|
|
return result;
|
|
}
|
|
|
|
enum Type {
|
|
TYPE_UNDEFINED,
|
|
TYPE_PREFIX,
|
|
// Token type for "-per-", "-", and "-and-".
|
|
TYPE_COMPOUND_PART,
|
|
// Token type for "per-".
|
|
TYPE_INITIAL_COMPOUND_PART,
|
|
TYPE_POWER_PART,
|
|
TYPE_SIMPLE_UNIT,
|
|
TYPE_CONSTANT_DENOMINATOR,
|
|
};
|
|
|
|
// Calling getType() is invalid, resulting in an assertion failure, if Token
|
|
// value isn't positive.
|
|
Type getType() const {
|
|
U_ASSERT(fMatch >= 0);
|
|
return this->fType;
|
|
}
|
|
|
|
// Retrieve the value of the constant denominator if the token is of type TYPE_CONSTANT_DENOMINATOR.
|
|
uint64_t getConstantDenominator() const {
|
|
U_ASSERT(getType() == TYPE_CONSTANT_DENOMINATOR);
|
|
return static_cast<uint64_t>(fMatch);
|
|
}
|
|
|
|
UMeasurePrefix getUnitPrefix() const {
|
|
U_ASSERT(getType() == TYPE_PREFIX);
|
|
return static_cast<UMeasurePrefix>(fMatch - kPrefixOffset);
|
|
}
|
|
|
|
// Valid only for tokens with type TYPE_COMPOUND_PART.
|
|
int32_t getMatch() const {
|
|
U_ASSERT(getType() == TYPE_COMPOUND_PART);
|
|
return fMatch;
|
|
}
|
|
|
|
int32_t getInitialCompoundPart() const {
|
|
// Even if there is only one InitialCompoundPart value, we have this
|
|
// function for the simplicity of code consistency.
|
|
U_ASSERT(getType() == TYPE_INITIAL_COMPOUND_PART);
|
|
// Defensive: if this assert fails, code using this function also needs
|
|
// to change.
|
|
U_ASSERT(fMatch == INITIAL_COMPOUND_PART_PER);
|
|
return fMatch;
|
|
}
|
|
|
|
int8_t getPower() const {
|
|
U_ASSERT(getType() == TYPE_POWER_PART);
|
|
return static_cast<int8_t>(fMatch - kPowerPartOffset);
|
|
}
|
|
|
|
int32_t getSimpleUnitIndex() const {
|
|
U_ASSERT(getType() == TYPE_SIMPLE_UNIT);
|
|
return fMatch - kSimpleUnitOffset;
|
|
}
|
|
|
|
// TODO: Consider moving this to a separate utility class.
|
|
// Utility function to parse a string into an unsigned long value.
|
|
// The value must be a positive integer within the range [1, INT64_MAX].
|
|
// The input can be in integer or scientific notation.
|
|
static uint64_t parseStringToLong(const StringPiece strNum, UErrorCode &status) {
|
|
// We are processing well-formed input, so we don't need any special options to
|
|
// StringToDoubleConverter.
|
|
StringToDoubleConverter converter(0, 0, 0, "", "");
|
|
int32_t count;
|
|
double double_result = converter.StringToDouble(strNum.data(), strNum.length(), &count);
|
|
if (count != strNum.length()) {
|
|
status = kUnitIdentifierSyntaxError;
|
|
return 0;
|
|
}
|
|
|
|
if (U_FAILURE(status) || double_result < 1.0 || double_result > static_cast<double>(INT64_MAX)) {
|
|
status = kUnitIdentifierSyntaxError;
|
|
return 0;
|
|
}
|
|
|
|
// Check if the value is integer.
|
|
uint64_t int_result = static_cast<uint64_t>(double_result);
|
|
const double kTolerance = 1e-9;
|
|
if (abs(double_result - int_result) > kTolerance) {
|
|
status = kUnitIdentifierSyntaxError;
|
|
return 0;
|
|
}
|
|
|
|
return int_result;
|
|
}
|
|
|
|
private:
|
|
Token() = default;
|
|
int64_t fMatch;
|
|
Type fType = TYPE_UNDEFINED;
|
|
};
|
|
|
|
class Parser {
|
|
public:
|
|
/**
|
|
* Factory function for parsing the given identifier.
|
|
*
|
|
* @param source The identifier to parse. This function does not make a copy
|
|
* of source: the underlying string that source points at, must outlive the
|
|
* parser.
|
|
* @param status ICU error code.
|
|
*/
|
|
static Parser from(StringPiece source, UErrorCode& status) {
|
|
if (U_FAILURE(status)) {
|
|
return {};
|
|
}
|
|
umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
|
|
if (U_FAILURE(status)) {
|
|
return {};
|
|
}
|
|
return {source};
|
|
}
|
|
|
|
/**
|
|
* A single unit or a constant denominator.
|
|
*/
|
|
struct SingleUnitOrConstant {
|
|
enum ValueType {
|
|
kSingleUnit,
|
|
kConstantDenominator,
|
|
};
|
|
|
|
ValueType type = kSingleUnit;
|
|
SingleUnitImpl singleUnit;
|
|
uint64_t constantDenominator;
|
|
|
|
static SingleUnitOrConstant singleUnitValue(SingleUnitImpl singleUnit) {
|
|
SingleUnitOrConstant result;
|
|
result.type = kSingleUnit;
|
|
result.singleUnit = singleUnit;
|
|
result.constantDenominator = 0;
|
|
return result;
|
|
}
|
|
|
|
static SingleUnitOrConstant constantDenominatorValue(uint64_t constant) {
|
|
SingleUnitOrConstant result;
|
|
result.type = kConstantDenominator;
|
|
result.singleUnit = {};
|
|
result.constantDenominator = constant;
|
|
return result;
|
|
}
|
|
|
|
uint64_t getConstantDenominator() const {
|
|
U_ASSERT(type == kConstantDenominator);
|
|
return constantDenominator;
|
|
}
|
|
|
|
SingleUnitImpl getSingleUnit() const {
|
|
U_ASSERT(type == kSingleUnit);
|
|
return singleUnit;
|
|
}
|
|
|
|
bool isSingleUnit() const { return type == kSingleUnit; }
|
|
|
|
bool isConstantDenominator() const { return type == kConstantDenominator; }
|
|
};
|
|
|
|
MeasureUnitImpl parse(UErrorCode& status) {
|
|
MeasureUnitImpl result;
|
|
|
|
if (U_FAILURE(status)) {
|
|
return result;
|
|
}
|
|
if (fSource.empty()) {
|
|
// The dimenionless unit: nothing to parse. leave result as is.
|
|
return result;
|
|
}
|
|
|
|
while (hasNext()) {
|
|
bool sawAnd = false;
|
|
|
|
auto singleUnitOrConstant = nextSingleUnitOrConstant(sawAnd, status);
|
|
if (U_FAILURE(status)) {
|
|
return result;
|
|
}
|
|
|
|
if (singleUnitOrConstant.isConstantDenominator()) {
|
|
result.constantDenominator = singleUnitOrConstant.getConstantDenominator();
|
|
result.complexity = UMEASURE_UNIT_COMPOUND;
|
|
continue;
|
|
}
|
|
|
|
U_ASSERT(singleUnitOrConstant.isSingleUnit());
|
|
bool added = result.appendSingleUnit(singleUnitOrConstant.getSingleUnit(), status);
|
|
if (U_FAILURE(status)) {
|
|
return result;
|
|
}
|
|
|
|
if (sawAnd && !added) {
|
|
// Two similar units are not allowed in a mixed unit.
|
|
status = kUnitIdentifierSyntaxError;
|
|
return result;
|
|
}
|
|
|
|
if (result.singleUnits.length() >= 2) {
|
|
// nextSingleUnit fails appropriately for "per" and "and" in the
|
|
// same identifier. It doesn't fail for other compound units
|
|
// (COMPOUND_PART_TIMES). Consequently we take care of that
|
|
// here.
|
|
UMeasureUnitComplexity complexity =
|
|
sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND;
|
|
if (result.singleUnits.length() == 2) {
|
|
// After appending two singleUnits, the complexity will be `UMEASURE_UNIT_COMPOUND`
|
|
U_ASSERT(result.complexity == UMEASURE_UNIT_COMPOUND);
|
|
result.complexity = complexity;
|
|
} else if (result.complexity != complexity) {
|
|
// Can't have mixed compound units
|
|
status = kUnitIdentifierSyntaxError;
|
|
return result;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (result.singleUnits.length() == 0) {
|
|
// The identifier was empty or only had a constant denominator.
|
|
status = kUnitIdentifierSyntaxError;
|
|
return result; // add it for code consistency.
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
private:
|
|
// Tracks parser progress: the offset into fSource.
|
|
int32_t fIndex = 0;
|
|
|
|
// Since we're not owning this memory, whatever is passed to the constructor
|
|
// should live longer than this Parser - and the parser shouldn't return any
|
|
// references to that string.
|
|
StringPiece fSource;
|
|
BytesTrie fTrie;
|
|
|
|
// Set to true when we've seen a "-per-" or a "per-", after which all units
|
|
// are in the denominator. Until we find an "-and-", at which point the
|
|
// identifier is invalid pending TODO(CLDR-13701).
|
|
bool fAfterPer = false;
|
|
|
|
// Set to true when we've just seen a "per-". This is used to determine if
|
|
// the next token can be a constant denominator token.
|
|
bool fJustSawPer = false;
|
|
|
|
Parser() : fSource(""), fTrie(u"") {}
|
|
|
|
Parser(StringPiece source)
|
|
: fSource(source), fTrie(gSerializedUnitExtrasStemTrie) {}
|
|
|
|
inline bool hasNext() const {
|
|
return fIndex < fSource.length();
|
|
}
|
|
|
|
// Returns the next Token parsed from fSource, advancing fIndex to the end
|
|
// of that token in fSource. In case of U_FAILURE(status), the token
|
|
// returned will cause an abort if getType() is called on it.
|
|
Token nextToken(UErrorCode& status) {
|
|
fTrie.reset();
|
|
int32_t match = -1;
|
|
// Saves the position in the fSource string for the end of the most
|
|
// recent matching token.
|
|
int32_t previ = -1;
|
|
|
|
// Saves the position in the fSource string for later use in case of unit constant found.
|
|
int32_t currentFIndex = fIndex;
|
|
|
|
// Find the longest token that matches a value in the trie:
|
|
while (fIndex < fSource.length()) {
|
|
auto result = fTrie.next(fSource.data()[fIndex++]);
|
|
if (result == USTRINGTRIE_NO_MATCH) {
|
|
break;
|
|
} else if (result == USTRINGTRIE_NO_VALUE) {
|
|
continue;
|
|
}
|
|
U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
|
|
match = fTrie.getValue();
|
|
previ = fIndex;
|
|
if (result == USTRINGTRIE_FINAL_VALUE) {
|
|
break;
|
|
}
|
|
U_ASSERT(result == USTRINGTRIE_INTERMEDIATE_VALUE);
|
|
// continue;
|
|
}
|
|
|
|
if (match >= 0) {
|
|
fIndex = previ;
|
|
return {match};
|
|
}
|
|
|
|
// If no match was found, we check if the token is a constant denominator.
|
|
// 1. We find the index of the start of the next token or the end of the string.
|
|
int32_t endOfConstantIndex = fSource.find("-", currentFIndex);
|
|
endOfConstantIndex = (endOfConstantIndex == -1) ? fSource.length() : endOfConstantIndex;
|
|
if (endOfConstantIndex <= currentFIndex) {
|
|
status = kUnitIdentifierSyntaxError;
|
|
return {match};
|
|
}
|
|
|
|
// 2. We extract the substring from the start of the constant to the end of the constant.
|
|
StringPiece constantDenominatorStr =
|
|
fSource.substr(currentFIndex, endOfConstantIndex - currentFIndex);
|
|
fIndex = endOfConstantIndex;
|
|
return Token::constantToken(constantDenominatorStr, status);
|
|
}
|
|
|
|
/**
|
|
* Returns the next "single unit" via result.
|
|
*
|
|
* If a "-per-" was parsed, the result will have appropriate negative
|
|
* dimensionality.
|
|
*
|
|
* Returns an error if we parse both compound units and "-and-", since mixed
|
|
* compound units are not yet supported - TODO(CLDR-13701).
|
|
*
|
|
* @param result Will be overwritten by the result, if status shows success.
|
|
* @param sawAnd If an "-and-" was parsed prior to finding the "single
|
|
* unit", sawAnd is set to true. If not, it is left as is.
|
|
* @param status ICU error code.
|
|
*/
|
|
SingleUnitOrConstant nextSingleUnitOrConstant(bool &sawAnd, UErrorCode &status) {
|
|
SingleUnitImpl singleUnitResult;
|
|
if (U_FAILURE(status)) {
|
|
return {};
|
|
}
|
|
|
|
// state:
|
|
// 0 = no tokens seen yet (will accept power, SI or binary prefix, or simple unit)
|
|
// 1 = power token seen (will not accept another power token)
|
|
// 2 = SI or binary prefix token seen (will not accept a power, or SI or binary prefix token)
|
|
int32_t state = 0;
|
|
|
|
bool atStart = fIndex == 0;
|
|
Token token = nextToken(status);
|
|
if (U_FAILURE(status)) {
|
|
return {};
|
|
}
|
|
|
|
fJustSawPer = false;
|
|
|
|
if (atStart) {
|
|
// Identifiers optionally start with "per-".
|
|
if (token.getType() == Token::TYPE_INITIAL_COMPOUND_PART) {
|
|
U_ASSERT(token.getInitialCompoundPart() == INITIAL_COMPOUND_PART_PER);
|
|
fAfterPer = true;
|
|
fJustSawPer = true;
|
|
singleUnitResult.dimensionality = -1;
|
|
|
|
token = nextToken(status);
|
|
if (U_FAILURE(status)) {
|
|
return {};
|
|
}
|
|
}
|
|
} else {
|
|
// All other SingleUnit's are separated from previous SingleUnit's
|
|
// via a compound part:
|
|
if (token.getType() != Token::TYPE_COMPOUND_PART) {
|
|
status = kUnitIdentifierSyntaxError;
|
|
return {};
|
|
}
|
|
|
|
switch (token.getMatch()) {
|
|
case COMPOUND_PART_PER:
|
|
if (sawAnd) {
|
|
// Mixed compound units not yet supported,
|
|
// TODO(CLDR-13701).
|
|
status = kUnitIdentifierSyntaxError;
|
|
return {};
|
|
}
|
|
fAfterPer = true;
|
|
fJustSawPer = true;
|
|
singleUnitResult.dimensionality = -1;
|
|
break;
|
|
|
|
case COMPOUND_PART_TIMES:
|
|
if (fAfterPer) {
|
|
singleUnitResult.dimensionality = -1;
|
|
}
|
|
break;
|
|
|
|
case COMPOUND_PART_AND:
|
|
if (fAfterPer) {
|
|
// Can't start with "-and-", and mixed compound units
|
|
// not yet supported, TODO(CLDR-13701).
|
|
status = kUnitIdentifierSyntaxError;
|
|
return {};
|
|
}
|
|
sawAnd = true;
|
|
break;
|
|
}
|
|
|
|
token = nextToken(status);
|
|
if (U_FAILURE(status)) {
|
|
return {};
|
|
}
|
|
}
|
|
|
|
if (token.getType() == Token::TYPE_CONSTANT_DENOMINATOR) {
|
|
if (!fJustSawPer) {
|
|
status = kUnitIdentifierSyntaxError;
|
|
return {};
|
|
}
|
|
|
|
return SingleUnitOrConstant::constantDenominatorValue(token.getConstantDenominator());
|
|
}
|
|
|
|
// Read tokens until we have a complete SingleUnit or we reach the end.
|
|
while (true) {
|
|
switch (token.getType()) {
|
|
case Token::TYPE_POWER_PART:
|
|
if (state > 0) {
|
|
status = kUnitIdentifierSyntaxError;
|
|
return {};
|
|
}
|
|
singleUnitResult.dimensionality *= token.getPower();
|
|
state = 1;
|
|
break;
|
|
|
|
case Token::TYPE_PREFIX:
|
|
if (state > 1) {
|
|
status = kUnitIdentifierSyntaxError;
|
|
return {};
|
|
}
|
|
singleUnitResult.unitPrefix = token.getUnitPrefix();
|
|
state = 2;
|
|
break;
|
|
|
|
case Token::TYPE_SIMPLE_UNIT:
|
|
singleUnitResult.index = token.getSimpleUnitIndex();
|
|
break;
|
|
|
|
default:
|
|
status = kUnitIdentifierSyntaxError;
|
|
return {};
|
|
}
|
|
|
|
if (token.getType() == Token::TYPE_SIMPLE_UNIT) {
|
|
break;
|
|
}
|
|
|
|
if (!hasNext()) {
|
|
// We ran out of tokens before finding a complete single unit.
|
|
status = kUnitIdentifierSyntaxError;
|
|
return {};
|
|
}
|
|
token = nextToken(status);
|
|
if (U_FAILURE(status)) {
|
|
return {};
|
|
}
|
|
}
|
|
|
|
return SingleUnitOrConstant::singleUnitValue(singleUnitResult);
|
|
}
|
|
};
|
|
|
|
// Sorting function wrapping SingleUnitImpl::compareTo for use with uprv_sortArray.
|
|
int32_t U_CALLCONV
|
|
compareSingleUnits(const void* /*context*/, const void* left, const void* right) {
|
|
const auto* realLeft = static_cast<const SingleUnitImpl* const*>(left);
|
|
const auto* realRight = static_cast<const SingleUnitImpl* const*>(right);
|
|
return (*realLeft)->compareTo(**realRight);
|
|
}
|
|
|
|
// Returns an index into the gCategories array, for the "unitQuantity" (aka
|
|
// "type" or "category") associated with the given base unit identifier. Returns
|
|
// -1 on failure, together with U_UNSUPPORTED_ERROR.
|
|
int32_t getUnitCategoryIndex(BytesTrie &trie, StringPiece baseUnitIdentifier, UErrorCode &status) {
|
|
UStringTrieResult result = trie.reset().next(baseUnitIdentifier.data(), baseUnitIdentifier.length());
|
|
if (!USTRINGTRIE_HAS_VALUE(result)) {
|
|
status = U_UNSUPPORTED_ERROR;
|
|
return -1;
|
|
}
|
|
|
|
return trie.getValue();
|
|
}
|
|
|
|
} // namespace
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
umeas_getPrefixPower(UMeasurePrefix unitPrefix) {
|
|
if (unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_BIN &&
|
|
unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_BIN) {
|
|
return unitPrefix - UMEASURE_PREFIX_INTERNAL_ONE_BIN;
|
|
}
|
|
U_ASSERT(unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_SI &&
|
|
unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_SI);
|
|
return unitPrefix - UMEASURE_PREFIX_ONE;
|
|
}
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
umeas_getPrefixBase(UMeasurePrefix unitPrefix) {
|
|
if (unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_BIN &&
|
|
unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_BIN) {
|
|
return 1024;
|
|
}
|
|
U_ASSERT(unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_SI &&
|
|
unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_SI);
|
|
return 10;
|
|
}
|
|
|
|
CharString U_I18N_API getUnitQuantity(const MeasureUnitImpl &baseMeasureUnitImpl, UErrorCode &status) {
|
|
CharString result;
|
|
MeasureUnitImpl baseUnitImpl = baseMeasureUnitImpl.copy(status);
|
|
UErrorCode localStatus = U_ZERO_ERROR;
|
|
umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
|
|
if (U_FAILURE(status)) {
|
|
return result;
|
|
}
|
|
BytesTrie trie(gSerializedUnitCategoriesTrie);
|
|
|
|
baseUnitImpl.serialize(status);
|
|
StringPiece identifier = baseUnitImpl.identifier.data();
|
|
int32_t idx = getUnitCategoryIndex(trie, identifier, localStatus);
|
|
if (U_FAILURE(status)) {
|
|
return result;
|
|
}
|
|
|
|
// In case the base unit identifier did not match any entry.
|
|
if (U_FAILURE(localStatus)) {
|
|
localStatus = U_ZERO_ERROR;
|
|
baseUnitImpl.takeReciprocal(status);
|
|
baseUnitImpl.serialize(status);
|
|
identifier.set(baseUnitImpl.identifier.data());
|
|
idx = getUnitCategoryIndex(trie, identifier, localStatus);
|
|
|
|
if (U_FAILURE(status)) {
|
|
return result;
|
|
}
|
|
}
|
|
|
|
// In case the reciprocal of the base unit identifier did not match any entry.
|
|
MeasureUnitImpl simplifiedUnit = baseMeasureUnitImpl.copyAndSimplify(status);
|
|
if (U_FAILURE(status)) {
|
|
return result;
|
|
}
|
|
if (U_FAILURE(localStatus)) {
|
|
localStatus = U_ZERO_ERROR;
|
|
simplifiedUnit.serialize(status);
|
|
identifier.set(simplifiedUnit.identifier.data());
|
|
idx = getUnitCategoryIndex(trie, identifier, localStatus);
|
|
|
|
if (U_FAILURE(status)) {
|
|
return result;
|
|
}
|
|
}
|
|
|
|
// In case the simplified base unit identifier did not match any entry.
|
|
if (U_FAILURE(localStatus)) {
|
|
localStatus = U_ZERO_ERROR;
|
|
simplifiedUnit.takeReciprocal(status);
|
|
simplifiedUnit.serialize(status);
|
|
identifier.set(simplifiedUnit.identifier.data());
|
|
idx = getUnitCategoryIndex(trie, identifier, localStatus);
|
|
|
|
if (U_FAILURE(status)) {
|
|
return result;
|
|
}
|
|
}
|
|
|
|
// If there is no match at all, throw an exception.
|
|
if (U_FAILURE(localStatus)) {
|
|
status = U_INVALID_FORMAT_ERROR;
|
|
return result;
|
|
}
|
|
|
|
if (idx < 0 || idx >= gCategoriesCount) {
|
|
status = U_INVALID_FORMAT_ERROR;
|
|
return result;
|
|
}
|
|
|
|
result.appendInvariantChars(gCategories[idx], u_strlen(gCategories[idx]), status);
|
|
return result;
|
|
}
|
|
|
|
// In ICU4J, this is MeasureUnit.getSingleUnitImpl().
|
|
SingleUnitImpl SingleUnitImpl::forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status) {
|
|
MeasureUnitImpl temp;
|
|
const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(measureUnit, temp, status);
|
|
if (U_FAILURE(status)) {
|
|
return {};
|
|
}
|
|
if (impl.singleUnits.length() == 0) {
|
|
return {};
|
|
}
|
|
if (impl.singleUnits.length() == 1) {
|
|
return *impl.singleUnits[0];
|
|
}
|
|
status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return {};
|
|
}
|
|
|
|
MeasureUnit SingleUnitImpl::build(UErrorCode& status) const {
|
|
MeasureUnitImpl temp;
|
|
temp.appendSingleUnit(*this, status);
|
|
// TODO(icu-units#28): the MeasureUnitImpl::build() method uses
|
|
// findBySubtype, which is relatively slow.
|
|
// - At the time of loading the simple unit IDs, we could also save a
|
|
// mapping to the builtin MeasureUnit type and subtype they correspond to.
|
|
// - This method could then check dimensionality and index, and if both are
|
|
// 1, directly return MeasureUnit instances very quickly.
|
|
return std::move(temp).build(status);
|
|
}
|
|
|
|
const char *SingleUnitImpl::getSimpleUnitID() const {
|
|
return gSimpleUnits[index];
|
|
}
|
|
|
|
void SingleUnitImpl::appendNeutralIdentifier(CharString &result, UErrorCode &status) const UPRV_NO_SANITIZE_UNDEFINED {
|
|
int32_t absPower = std::abs(this->dimensionality);
|
|
|
|
U_ASSERT(absPower > 0); // "this function does not support the dimensionless single units";
|
|
|
|
if (absPower == 1) {
|
|
// no-op
|
|
} else if (absPower == 2) {
|
|
result.append(StringPiece("square-"), status);
|
|
} else if (absPower == 3) {
|
|
result.append(StringPiece("cubic-"), status);
|
|
} else if (absPower <= 15) {
|
|
result.append(StringPiece("pow"), status);
|
|
result.appendNumber(absPower, status);
|
|
result.append(StringPiece("-"), status);
|
|
} else {
|
|
status = U_ILLEGAL_ARGUMENT_ERROR; // Unit Identifier Syntax Error
|
|
return;
|
|
}
|
|
|
|
if (U_FAILURE(status)) {
|
|
return;
|
|
}
|
|
|
|
if (this->unitPrefix != UMEASURE_PREFIX_ONE) {
|
|
bool found = false;
|
|
for (const auto &unitPrefixInfo : gUnitPrefixStrings) {
|
|
// TODO: consider using binary search? If we do this, add a unit
|
|
// test to ensure gUnitPrefixStrings is sorted?
|
|
if (unitPrefixInfo.value == this->unitPrefix) {
|
|
result.append(unitPrefixInfo.string, status);
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!found) {
|
|
status = U_UNSUPPORTED_ERROR;
|
|
return;
|
|
}
|
|
}
|
|
|
|
result.append(StringPiece(this->getSimpleUnitID()), status);
|
|
}
|
|
|
|
int32_t SingleUnitImpl::getUnitCategoryIndex() const {
|
|
return gSimpleUnitCategories[index];
|
|
}
|
|
|
|
MeasureUnitImpl::MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status) {
|
|
this->appendSingleUnit(singleUnit, status);
|
|
}
|
|
|
|
MeasureUnitImpl MeasureUnitImpl::forIdentifier(StringPiece identifier, UErrorCode& status) {
|
|
return Parser::from(identifier, status).parse(status);
|
|
}
|
|
|
|
const MeasureUnitImpl& MeasureUnitImpl::forMeasureUnit(
|
|
const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status) {
|
|
if (measureUnit.fImpl) {
|
|
return *measureUnit.fImpl;
|
|
} else {
|
|
memory = Parser::from(measureUnit.getIdentifier(), status).parse(status);
|
|
return memory;
|
|
}
|
|
}
|
|
|
|
MeasureUnitImpl MeasureUnitImpl::forMeasureUnitMaybeCopy(
|
|
const MeasureUnit& measureUnit, UErrorCode& status) {
|
|
if (measureUnit.fImpl) {
|
|
return measureUnit.fImpl->copy(status);
|
|
} else {
|
|
return Parser::from(measureUnit.getIdentifier(), status).parse(status);
|
|
}
|
|
}
|
|
|
|
void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) {
|
|
identifier.clear();
|
|
for (int32_t i = 0; i < singleUnits.length(); i++) {
|
|
singleUnits[i]->dimensionality *= -1;
|
|
}
|
|
}
|
|
|
|
MeasureUnitImpl MeasureUnitImpl::copyAndSimplify(UErrorCode &status) const {
|
|
MeasureUnitImpl result;
|
|
for (int32_t i = 0; i < singleUnits.length(); i++) {
|
|
const SingleUnitImpl &singleUnit = *this->singleUnits[i];
|
|
|
|
// The following `for` loop will cause time complexity to be O(n^2).
|
|
// However, n is very small (number of units, generally, at maximum equal to 10)
|
|
bool unitExist = false;
|
|
for (int32_t j = 0; j < result.singleUnits.length(); j++) {
|
|
if (uprv_strcmp(result.singleUnits[j]->getSimpleUnitID(), singleUnit.getSimpleUnitID()) ==
|
|
0 &&
|
|
result.singleUnits[j]->unitPrefix == singleUnit.unitPrefix) {
|
|
unitExist = true;
|
|
result.singleUnits[j]->dimensionality =
|
|
result.singleUnits[j]->dimensionality + singleUnit.dimensionality;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!unitExist) {
|
|
result.appendSingleUnit(singleUnit, status);
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
bool MeasureUnitImpl::appendSingleUnit(const SingleUnitImpl &singleUnit, UErrorCode &status) {
|
|
identifier.clear();
|
|
|
|
if (singleUnit.isDimensionless()) {
|
|
// Do not append dimensionless units.
|
|
return false;
|
|
}
|
|
|
|
// Find a similar unit that already exists, to attempt to coalesce
|
|
SingleUnitImpl *oldUnit = nullptr;
|
|
for (int32_t i = 0; i < this->singleUnits.length(); i++) {
|
|
auto *candidate = this->singleUnits[i];
|
|
if (candidate->isCompatibleWith(singleUnit)) {
|
|
oldUnit = candidate;
|
|
}
|
|
}
|
|
|
|
if (oldUnit) {
|
|
// Both dimensionalities will be positive, or both will be negative, by
|
|
// virtue of isCompatibleWith().
|
|
oldUnit->dimensionality += singleUnit.dimensionality;
|
|
|
|
return false;
|
|
}
|
|
|
|
// Add a copy of singleUnit
|
|
// NOTE: MaybeStackVector::emplaceBackAndCheckErrorCode creates new copy of singleUnit.
|
|
this->singleUnits.emplaceBackAndCheckErrorCode(status, singleUnit);
|
|
if (U_FAILURE(status)) {
|
|
return false;
|
|
}
|
|
|
|
// If the MeasureUnitImpl is `UMEASURE_UNIT_SINGLE` and after the appending a unit, the `singleUnits`
|
|
// contains more than one. thus means the complexity should be `UMEASURE_UNIT_COMPOUND`
|
|
if (this->singleUnits.length() > 1 &&
|
|
this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_SINGLE) {
|
|
this->complexity = UMeasureUnitComplexity::UMEASURE_UNIT_COMPOUND;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
MaybeStackVector<MeasureUnitImplWithIndex>
|
|
MeasureUnitImpl::extractIndividualUnitsWithIndices(UErrorCode &status) const {
|
|
MaybeStackVector<MeasureUnitImplWithIndex> result;
|
|
|
|
if (this->complexity != UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) {
|
|
result.emplaceBackAndCheckErrorCode(status, 0, *this, status);
|
|
return result;
|
|
}
|
|
|
|
for (int32_t i = 0; i < singleUnits.length(); ++i) {
|
|
result.emplaceBackAndCheckErrorCode(status, i, *singleUnits[i], status);
|
|
if (U_FAILURE(status)) {
|
|
return result;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
int32_t countCharacter(const CharString &str, char c) {
|
|
int32_t count = 0;
|
|
for (int32_t i = 0, n = str.length(); i < n; i++) {
|
|
if (str[i] == c) {
|
|
count++;
|
|
}
|
|
}
|
|
return count;
|
|
}
|
|
|
|
/**
|
|
* Internal function that returns a string of the constants in the correct
|
|
* format.
|
|
*
|
|
* Example:
|
|
* 1000 --> "-per-1000"
|
|
* 1000000 --> "-per-1e6"
|
|
*
|
|
* NOTE: this function is only used when the constant denominator is greater
|
|
* than 0.
|
|
*/
|
|
CharString getConstantsString(uint64_t constantDenominator, UErrorCode &status) {
|
|
U_ASSERT(constantDenominator > 0 && constantDenominator <= LLONG_MAX);
|
|
|
|
CharString result;
|
|
result.appendNumber(constantDenominator, status);
|
|
if (U_FAILURE(status)) {
|
|
return result;
|
|
}
|
|
|
|
if (constantDenominator <= 1000) {
|
|
return result;
|
|
}
|
|
|
|
// Check if the constant is a power of 10.
|
|
int32_t zeros = countCharacter(result, '0');
|
|
if (zeros == result.length() - 1 && result[0] == '1') {
|
|
result.clear();
|
|
result.append(StringPiece("1e"), status);
|
|
result.appendNumber(zeros, status);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Normalize a MeasureUnitImpl and generate the identifier string in place.
|
|
*/
|
|
void MeasureUnitImpl::serialize(UErrorCode &status) {
|
|
if (U_FAILURE(status)) {
|
|
return;
|
|
}
|
|
|
|
if (this->singleUnits.length() == 0 && this->constantDenominator == 0) {
|
|
// Dimensionless, constructed by the default constructor.
|
|
return;
|
|
}
|
|
|
|
if (this->complexity == UMEASURE_UNIT_COMPOUND) {
|
|
// Note: don't sort a MIXED unit
|
|
uprv_sortArray(this->singleUnits.getAlias(), this->singleUnits.length(),
|
|
sizeof(this->singleUnits[0]), compareSingleUnits, nullptr, false, &status);
|
|
if (U_FAILURE(status)) {
|
|
return;
|
|
}
|
|
}
|
|
|
|
CharString result;
|
|
bool beforePer = true;
|
|
bool firstTimeNegativeDimension = false;
|
|
bool constantDenominatorAppended = false;
|
|
for (int32_t i = 0; i < this->singleUnits.length(); i++) {
|
|
if (beforePer && (*this->singleUnits[i]).dimensionality < 0) {
|
|
beforePer = false;
|
|
firstTimeNegativeDimension = true;
|
|
} else if ((*this->singleUnits[i]).dimensionality < 0) {
|
|
firstTimeNegativeDimension = false;
|
|
}
|
|
|
|
if (U_FAILURE(status)) {
|
|
return;
|
|
}
|
|
|
|
if (this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) {
|
|
if (result.length() != 0) {
|
|
result.append(StringPiece("-and-"), status);
|
|
}
|
|
} else {
|
|
if (firstTimeNegativeDimension) {
|
|
if (result.length() == 0) {
|
|
result.append(StringPiece("per-"), status);
|
|
} else {
|
|
result.append(StringPiece("-per-"), status);
|
|
}
|
|
|
|
if (this->constantDenominator > 0) {
|
|
result.append(getConstantsString(this->constantDenominator, status), status);
|
|
result.append(StringPiece("-"), status);
|
|
constantDenominatorAppended = true;
|
|
}
|
|
|
|
} else if (result.length() != 0) {
|
|
result.append(StringPiece("-"), status);
|
|
}
|
|
}
|
|
|
|
this->singleUnits[i]->appendNeutralIdentifier(result, status);
|
|
}
|
|
|
|
if (!constantDenominatorAppended && this->constantDenominator > 0) {
|
|
result.append(StringPiece("-per-"), status);
|
|
result.append(getConstantsString(this->constantDenominator, status), status);
|
|
}
|
|
|
|
if (U_FAILURE(status)) {
|
|
return;
|
|
}
|
|
this->identifier = CharString(result, status);
|
|
}
|
|
|
|
MeasureUnit MeasureUnitImpl::build(UErrorCode &status) && {
|
|
this->serialize(status);
|
|
return MeasureUnit(std::move(*this));
|
|
}
|
|
|
|
MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode &status) {
|
|
return Parser::from(identifier, status).parse(status).build(status);
|
|
}
|
|
|
|
UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode &status) const {
|
|
MeasureUnitImpl temp;
|
|
return MeasureUnitImpl::forMeasureUnit(*this, temp, status).complexity;
|
|
}
|
|
|
|
UMeasurePrefix MeasureUnit::getPrefix(UErrorCode &status) const {
|
|
return SingleUnitImpl::forMeasureUnit(*this, status).unitPrefix;
|
|
}
|
|
|
|
MeasureUnit MeasureUnit::withPrefix(UMeasurePrefix prefix,
|
|
UErrorCode &status) const UPRV_NO_SANITIZE_UNDEFINED {
|
|
SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
|
|
singleUnit.unitPrefix = prefix;
|
|
return singleUnit.build(status);
|
|
}
|
|
|
|
uint64_t MeasureUnit::getConstantDenominator(UErrorCode &status) const {
|
|
auto complexity = this->getComplexity(status);
|
|
if (U_FAILURE(status)) {
|
|
return 0;
|
|
}
|
|
|
|
if (complexity != UMEASURE_UNIT_SINGLE && complexity != UMEASURE_UNIT_COMPOUND) {
|
|
status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return 0;
|
|
}
|
|
|
|
if (this->fImpl == nullptr) {
|
|
return 0;
|
|
}
|
|
|
|
return this->fImpl->constantDenominator;
|
|
}
|
|
|
|
MeasureUnit MeasureUnit::withConstantDenominator(uint64_t denominator, UErrorCode &status) const {
|
|
// To match the behavior of the Java API, we do not allow a constant denominator
|
|
// bigger than LONG_MAX.
|
|
if (denominator > LONG_MAX) {
|
|
status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return {};
|
|
}
|
|
|
|
auto complexity = this->getComplexity(status);
|
|
if (U_FAILURE(status)) {
|
|
return {};
|
|
}
|
|
if (complexity != UMEASURE_UNIT_SINGLE && complexity != UMEASURE_UNIT_COMPOUND) {
|
|
status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return {};
|
|
}
|
|
|
|
MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
|
|
if (U_FAILURE(status)) {
|
|
return {};
|
|
}
|
|
|
|
impl.constantDenominator = denominator;
|
|
impl.complexity = (impl.singleUnits.length() < 2 && denominator == 0) ? UMEASURE_UNIT_SINGLE
|
|
: UMEASURE_UNIT_COMPOUND;
|
|
return std::move(impl).build(status);
|
|
}
|
|
|
|
int32_t MeasureUnit::getDimensionality(UErrorCode& status) const {
|
|
SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
|
|
if (U_FAILURE(status)) { return 0; }
|
|
if (singleUnit.isDimensionless()) {
|
|
return 0;
|
|
}
|
|
return singleUnit.dimensionality;
|
|
}
|
|
|
|
MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const {
|
|
SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
|
|
singleUnit.dimensionality = dimensionality;
|
|
return singleUnit.build(status);
|
|
}
|
|
|
|
MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const {
|
|
MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
|
|
// The reciprocal of a unit that has a constant denominator is not allowed.
|
|
if (impl.constantDenominator != 0) {
|
|
status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return {};
|
|
}
|
|
impl.takeReciprocal(status);
|
|
return std::move(impl).build(status);
|
|
}
|
|
|
|
MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const {
|
|
MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
|
|
MeasureUnitImpl temp;
|
|
const MeasureUnitImpl& otherImpl = MeasureUnitImpl::forMeasureUnit(other, temp, status);
|
|
if (impl.complexity == UMEASURE_UNIT_MIXED || otherImpl.complexity == UMEASURE_UNIT_MIXED) {
|
|
status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return {};
|
|
}
|
|
for (int32_t i = 0; i < otherImpl.singleUnits.length(); i++) {
|
|
impl.appendSingleUnit(*otherImpl.singleUnits[i], status);
|
|
}
|
|
|
|
uint64_t currentConstatDenominator = this->getConstantDenominator(status);
|
|
uint64_t otherConstantDenominator = other.getConstantDenominator(status);
|
|
|
|
// TODO: we can also multiply the constant denominators instead of returning an error.
|
|
if (currentConstatDenominator != 0 && otherConstantDenominator != 0) {
|
|
// There is only `one` constant denominator in a compound unit.
|
|
// Therefore, we Cannot multiply units that both of them have a constant denominator
|
|
status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return {};
|
|
}
|
|
|
|
// Because either one of the constant denominators is zero, we can use the maximum of them.
|
|
impl.constantDenominator = uprv_max(currentConstatDenominator, otherConstantDenominator);
|
|
|
|
if (impl.singleUnits.length() > 1 || impl.constantDenominator > 0) {
|
|
impl.complexity = UMEASURE_UNIT_COMPOUND;
|
|
}
|
|
|
|
return std::move(impl).build(status);
|
|
}
|
|
|
|
LocalArray<MeasureUnit> MeasureUnit::splitToSingleUnitsImpl(int32_t& outCount, UErrorCode& status) const {
|
|
MeasureUnitImpl temp;
|
|
const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(*this, temp, status);
|
|
outCount = impl.singleUnits.length();
|
|
MeasureUnit* arr = new MeasureUnit[outCount];
|
|
if (arr == nullptr) {
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
return LocalArray<MeasureUnit>();
|
|
}
|
|
for (int32_t i = 0; i < outCount; i++) {
|
|
arr[i] = impl.singleUnits[i]->build(status);
|
|
}
|
|
return LocalArray<MeasureUnit>(arr, status);
|
|
}
|
|
|
|
|
|
U_NAMESPACE_END
|
|
|
|
#endif /* !UNCONFIG_NO_FORMATTING */
|