Additional WIP

This commit is contained in:
Shane F. Carr 2020-01-14 23:25:36 +01:00
parent 9a6caa01af
commit fb1129a188
3 changed files with 331 additions and 48 deletions

View file

@ -2025,6 +2025,10 @@ MeasureUnit::MeasureUnit(MeasureUnit &&other) noexcept
other.fId = nullptr;
}
MeasureUnit::MeasureUnit(char* idToAdopt)
: fId(idToAdopt), fSubTypeId(-1), fTypeId(-1) {
}
MeasureUnit &MeasureUnit::operator=(const MeasureUnit &other) {
if (this == &other) {
return *this;

View file

@ -13,6 +13,7 @@
#define UNISTR_FROM_STRING_EXPLICIT
#include "cstring.h"
#include "uassert.h"
#include "ucln_in.h"
#include "umutex.h"
#include "unicode/errorcode.h"
@ -20,6 +21,8 @@
#include "unicode/ucharstrie.h"
#include "unicode/ucharstriebuilder.h"
#include "cstr.h"
U_NAMESPACE_BEGIN
@ -28,27 +31,64 @@ namespace {
// This is to ensure we only insert positive integers into the trie
constexpr int32_t kSIPrefixOffset = 64;
constexpr int32_t kSyntaxPartOffset = 256;
constexpr int32_t kCompoundPartOffset = 128;
enum SyntaxPart {
SYNTAX_PART_PER = kSyntaxPartOffset,
SYNTAX_PART_SQUARE,
SYNTAX_PART_CUBIC,
SYNTAX_PART_P1,
SYNTAX_PART_P2,
SYNTAX_PART_P3,
SYNTAX_PART_P4,
SYNTAX_PART_P5,
SYNTAX_PART_P6,
SYNTAX_PART_P7,
SYNTAX_PART_P8,
SYNTAX_PART_P9,
enum CompoundPart {
COMPOUND_PART_PER = kCompoundPartOffset,
COMPOUND_PART_TIMES,
COMPOUND_PART_ONE_PER,
COMPOUND_PART_PLUS,
};
constexpr int32_t kPowerPartOffset = 256;
enum PowerPart {
POWER_PART_P2 = kPowerPartOffset + 2,
POWER_PART_P3,
POWER_PART_P4,
POWER_PART_P5,
POWER_PART_P6,
POWER_PART_P7,
POWER_PART_P8,
POWER_PART_P9,
POWER_PART_P10,
POWER_PART_P11,
POWER_PART_P12,
POWER_PART_P13,
POWER_PART_P14,
POWER_PART_P15,
};
constexpr int32_t kSimpleUnitOffset = 512;
const struct SIPrefixStrings {
const char* const string;
UMeasureSIPrefix value;
} gSIPrefixStrings[] = {
{ "yotta", UMEASURE_SI_PREFIX_YOTTA },
{ "zetta", UMEASURE_SI_PREFIX_ZETTA },
{ "exa", UMEASURE_SI_PREFIX_EXA },
{ "peta", UMEASURE_SI_PREFIX_PETA },
{ "tera", UMEASURE_SI_PREFIX_TERA },
{ "giga", UMEASURE_SI_PREFIX_GIGA },
{ "mega", UMEASURE_SI_PREFIX_MEGA },
{ "kilo", UMEASURE_SI_PREFIX_KILO },
{ "hecto", UMEASURE_SI_PREFIX_HECTO },
{ "deka", UMEASURE_SI_PREFIX_DEKA },
{ "deci", UMEASURE_SI_PREFIX_DECI },
{ "centi", UMEASURE_SI_PREFIX_CENTI },
{ "milli", UMEASURE_SI_PREFIX_MILLI },
{ "micro", UMEASURE_SI_PREFIX_MICRO },
{ "nano", UMEASURE_SI_PREFIX_NANO },
{ "pico", UMEASURE_SI_PREFIX_PICO },
{ "femto", UMEASURE_SI_PREFIX_FEMTO },
{ "atto", UMEASURE_SI_PREFIX_ATTO },
{ "zepto", UMEASURE_SI_PREFIX_ZEPTO },
{ "yocto", UMEASURE_SI_PREFIX_YOCTO },
};
// FIXME: Get this list from data
const char16_t* gSimpleUnits[] = {
const char16_t* const gSimpleUnits[] = {
u"100kilometer",
u"acre",
u"ampere",
@ -168,41 +208,33 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) {
if (U_FAILURE(status)) { return; }
// Add SI prefixes
b.add(u"yotta", kSIPrefixOffset + UMEASURE_SI_PREFIX_YOTTA, status);
b.add(u"zetta", kSIPrefixOffset + UMEASURE_SI_PREFIX_ZETTA, status);
b.add(u"exa", kSIPrefixOffset + UMEASURE_SI_PREFIX_EXA, status);
b.add(u"peta", kSIPrefixOffset + UMEASURE_SI_PREFIX_PETA, status);
b.add(u"tera", kSIPrefixOffset + UMEASURE_SI_PREFIX_TERA, status);
b.add(u"giga", kSIPrefixOffset + UMEASURE_SI_PREFIX_GIGA, status);
b.add(u"mega", kSIPrefixOffset + UMEASURE_SI_PREFIX_MEGA, status);
b.add(u"kilo", kSIPrefixOffset + UMEASURE_SI_PREFIX_KILO, status);
b.add(u"hecto", kSIPrefixOffset + UMEASURE_SI_PREFIX_HECTO, status);
b.add(u"deka", kSIPrefixOffset + UMEASURE_SI_PREFIX_DEKA, status);
b.add(u"deci", kSIPrefixOffset + UMEASURE_SI_PREFIX_DECI, status);
b.add(u"centi", kSIPrefixOffset + UMEASURE_SI_PREFIX_CENTI, status);
b.add(u"milli", kSIPrefixOffset + UMEASURE_SI_PREFIX_MILLI, status);
b.add(u"micro", kSIPrefixOffset + UMEASURE_SI_PREFIX_MICRO, status);
b.add(u"nano", kSIPrefixOffset + UMEASURE_SI_PREFIX_NANO, status);
b.add(u"pico", kSIPrefixOffset + UMEASURE_SI_PREFIX_PICO, status);
b.add(u"femto", kSIPrefixOffset + UMEASURE_SI_PREFIX_FEMTO, status);
b.add(u"atto", kSIPrefixOffset + UMEASURE_SI_PREFIX_ATTO, status);
b.add(u"zepto", kSIPrefixOffset + UMEASURE_SI_PREFIX_ZEPTO, status);
b.add(u"yocto", kSIPrefixOffset + UMEASURE_SI_PREFIX_YOCTO, status);
for (const auto& siPrefixInfo : gSIPrefixStrings) {
UnicodeString uSIPrefix(siPrefixInfo.string, -1, US_INV);
b.add(uSIPrefix, siPrefixInfo.value + kSIPrefixOffset, status);
}
if (U_FAILURE(status)) { return; }
// Add syntax parts (per, power prefixes)
b.add(u"-per-", SYNTAX_PART_PER, status);
b.add(u"square-", SYNTAX_PART_SQUARE, status);
b.add(u"cubic-", SYNTAX_PART_CUBIC, status);
b.add(u"p1", SYNTAX_PART_P1, status);
b.add(u"p2", SYNTAX_PART_P2, status);
b.add(u"p3", SYNTAX_PART_P3, status);
b.add(u"p4", SYNTAX_PART_P4, status);
b.add(u"p5", SYNTAX_PART_P5, status);
b.add(u"p6", SYNTAX_PART_P6, status);
b.add(u"p7", SYNTAX_PART_P7, status);
b.add(u"p8", SYNTAX_PART_P8, status);
b.add(u"p9", SYNTAX_PART_P9, status);
// Add syntax parts (compound, power prefixes)
b.add(u"-per-", COMPOUND_PART_PER, status);
b.add(u"-", COMPOUND_PART_TIMES, status);
b.add(u"one-per-", COMPOUND_PART_ONE_PER, status);
b.add(u"+", COMPOUND_PART_PLUS, status);
b.add(u"square-", POWER_PART_P2, status);
b.add(u"cubic-", POWER_PART_P3, status);
b.add(u"p2-", POWER_PART_P2, status);
b.add(u"p3-", POWER_PART_P3, status);
b.add(u"p4-", POWER_PART_P4, status);
b.add(u"p5-", POWER_PART_P5, status);
b.add(u"p6-", POWER_PART_P6, status);
b.add(u"p7-", POWER_PART_P7, status);
b.add(u"p8-", POWER_PART_P8, status);
b.add(u"p9-", POWER_PART_P9, status);
b.add(u"p10-", POWER_PART_P10, status);
b.add(u"p11-", POWER_PART_P11, status);
b.add(u"p12-", POWER_PART_P12, status);
b.add(u"p13-", POWER_PART_P13, status);
b.add(u"p14-", POWER_PART_P14, status);
b.add(u"p15-", POWER_PART_P15, status);
if (U_FAILURE(status)) { return; }
// Add sanctioned simple units by offset
@ -223,9 +255,255 @@ void U_CALLCONV initUnitExtras(UErrorCode& status) {
uprv_memcpy(kSerializedUnitExtrasStemTrie, result.getBuffer(), numBytes);
}
class UnitIdentifierParser {
public:
static UnitIdentifierParser from(StringPiece source, UErrorCode& status) {
umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
if (U_FAILURE(status)) {
return UnitIdentifierParser();
}
return UnitIdentifierParser(source);
}
int32_t nextToken(UErrorCode& status) {
fTrie.reset();
int32_t match = -1;
int32_t previ = -1;
do {
fTrie.next(fSource.data()[fIndex++]);
if (fTrie.current() == USTRINGTRIE_NO_MATCH) {
break;
} else if (fTrie.current() == USTRINGTRIE_NO_VALUE) {
continue;
} else if (fTrie.current() == USTRINGTRIE_FINAL_VALUE) {
match = fTrie.getValue();
previ = fIndex;
break;
} else if (fTrie.current() == USTRINGTRIE_INTERMEDIATE_VALUE) {
match = fTrie.getValue();
previ = fIndex;
continue;
} else {
UPRV_UNREACHABLE;
}
} while (fIndex < fSource.length());
if (match < 0) {
// TODO: Make a new status code?
status = U_ILLEGAL_ARGUMENT_ERROR;
} else {
fIndex = previ;
}
return match;
}
bool hasNext() const {
return fIndex < fSource.length();
}
int32_t currentIndex() const {
return fIndex;
}
private:
int32_t fIndex = 0;
StringPiece fSource;
UCharsTrie fTrie;
UnitIdentifierParser() : fSource(""), fTrie(u"") {}
UnitIdentifierParser(StringPiece source)
: fSource(source), fTrie(kSerializedUnitExtrasStemTrie) {}
};
} // namespace
MeasureUnit MeasureUnit::forIdentifier(const char* identifier, UErrorCode& status) {
UnitIdentifierParser parser = UnitIdentifierParser::from(identifier, status);
if (U_FAILURE(status)) {
// Unrecoverable error
return MeasureUnit();
}
while (parser.hasNext()) {
parser.nextToken(status);
if (U_FAILURE(status)) {
// Invalid syntax
return MeasureUnit();
}
// if (match < kCompoundPartOffset) {
// // SI Prefix
// auto prefix = static_cast<UMeasureSIPrefix>(match - kSIPrefixOffset);
// } else if (match < kPowerPartOffset) {
// // Compound part
// const char* operation = (match == COMPOUND_PART_PER) ? "per" : "times/plus";
// } else if (match < kSimpleUnitOffset) {
// // Power part
// int32_t power = match - kPowerPartOffset;
// } else {
// // Simple unit
// const char16_t* simpleUnit = gSimpleUnits[match - kSimpleUnitOffset];
// }
}
// Success
return MeasureUnit(uprv_strdup(identifier));
}
UMeasureSIPrefix MeasureUnit::getSIPrefix() const {
ErrorCode status;
const char* id = toString();
UnitIdentifierParser parser = UnitIdentifierParser::from(id, status);
if (status.isFailure()) {
// Unrecoverable error
return UMEASURE_SI_PREFIX_ONE;
}
int32_t match = parser.nextToken(status);
if (status.isFailure()) {
// Invalid syntax
return UMEASURE_SI_PREFIX_ONE;
}
if (match >= kPowerPartOffset && match < kSimpleUnitOffset) {
// Skip the power part
match = parser.nextToken(status);
if (status.isFailure()) {
// Invalid syntax
return UMEASURE_SI_PREFIX_ONE;
}
}
if (match >= kCompoundPartOffset) {
// No SI prefix
return UMEASURE_SI_PREFIX_ONE;
}
return static_cast<UMeasureSIPrefix>(match - kSIPrefixOffset);
}
MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix) const {
ErrorCode status;
const char* id = toString();
UnitIdentifierParser parser = UnitIdentifierParser::from(id, status);
if (status.isFailure()) {
// Unrecoverable error
return *this;
}
int32_t match = parser.nextToken(status);
if (status.isFailure()) {
// Invalid syntax
return *this;
}
CharString builder;
int32_t unitStart = 0;
if (match >= kPowerPartOffset && match < kSimpleUnitOffset) {
// Skip the power part
unitStart = parser.currentIndex();
builder.append(id, unitStart, status);
match = parser.nextToken(status);
}
// Append the new SI prefix
for (const auto& siPrefixInfo : gSIPrefixStrings) {
if (siPrefixInfo.value == prefix) {
builder.append(siPrefixInfo.string, status);
break;
}
}
if (match < kCompoundPartOffset) {
// Remove the old SI prefix
unitStart = parser.currentIndex();
}
builder.append(id + unitStart, status);
if (status.isFailure()) {
// Unrecoverable error
return *this;
}
return MeasureUnit(builder.cloneData(status));
}
int8_t MeasureUnit::getPower() const {
ErrorCode status;
const char* id = toString();
UnitIdentifierParser parser = UnitIdentifierParser::from(id, status);
if (status.isFailure()) {
// Unrecoverable error
return 0;
}
int32_t match = parser.nextToken(status);
if (status.isFailure()) {
// Invalid syntax
return 0;
}
if (match < kPowerPartOffset || match >= kSimpleUnitOffset) {
// No power part
return 0;
}
return static_cast<int8_t>(match - kPowerPartOffset);
}
MeasureUnit MeasureUnit::withPower(int8_t power) const {
if (power < 0) {
// Don't know how to handle this yet
U_ASSERT(FALSE);
}
ErrorCode status;
const char* id = toString();
UnitIdentifierParser parser = UnitIdentifierParser::from(id, status);
if (status.isFailure()) {
// Unrecoverable error
return *this;
}
int32_t match = parser.nextToken(status);
if (status.isFailure()) {
// Invalid syntax
return *this;
}
// Append the new power
CharString builder;
if (power == 2) {
builder.append("square-", status);
} else if (power == 3) {
builder.append("cubic-", status);
} else if (power < 10) {
builder.append('p', status);
builder.append(power + '0', status);
builder.append('-', status);
} else {
builder.append("p1", status);
builder.append('0' + (power % 10), status);
builder.append('-', status);
}
if (match < kCompoundPartOffset) {
// Remove the old power
builder.append(id + parser.currentIndex(), status);
} else {
// Append the whole identifier
builder.append(id, status);
}
if (status.isFailure()) {
// Unrecoverable error
return *this;
}
return MeasureUnit(builder.cloneData(status));
}
U_NAMESPACE_END
#endif /* !UNCONFIG_NO_FORMATTING */

View file

@ -3704,6 +3704,7 @@ private:
int8_t fTypeId;
MeasureUnit(int32_t typeId, int32_t subTypeId);
MeasureUnit(char* idToAdopt);
void setTo(int32_t typeId, int32_t subTypeId);
int32_t getOffset() const;
static MeasureUnit *create(int typeId, int subTypeId, UErrorCode &status);