ICU-21349 Adjust C++ MeasureUnitImpl::serialize to be as same as the Java one

See #1496
This commit is contained in:
Younies Mahmoud 2020-12-11 13:56:22 +00:00 committed by Younies Mahmoud
parent 3b1ec5032c
commit 0895a223a3
10 changed files with 196 additions and 119 deletions

View file

@ -141,6 +141,38 @@ CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &error
return *this;
}
CharString &CharString::appendNumber(int32_t number, UErrorCode &status) {
if (number < 0) {
this->append('-', status);
if (U_FAILURE(status)) {
return *this;
}
}
if (number == 0) {
this->append('0', status);
return *this;
}
int32_t numLen = 0;
while (number != 0) {
int32_t residue = number % 10;
number /= 10;
this->append(std::abs(residue) + '0', status);
numLen++;
if (U_FAILURE(status)) {
return *this;
}
}
int32_t start = this->length() - numLen, end = this->length() - 1;
while(start < end) {
std::swap(this->data()[start++], this->data()[end--]);
}
return *this;
}
char *CharString::getAppendBuffer(int32_t minCapacity,
int32_t desiredCapacityHint,
int32_t &resultCapacity,

View file

@ -127,6 +127,9 @@ public:
return append(s.data(), s.length(), errorCode);
}
CharString &append(const char *s, int32_t sLength, UErrorCode &status);
CharString &appendNumber(int32_t number, UErrorCode &status);
/**
* Returns a writable buffer for appending and writes the buffer's capacity to
* resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS().

View file

@ -13,10 +13,10 @@
#ifndef ICU_UTIL_H
#define ICU_UTIL_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "charstr.h"
#include "unicode/unistr.h"
#include "unicode/uobject.h"
#include "unicode/utypes.h"
//--------------------------------------------------------------------
// class ICU_Utility
// i18n utility functions, scoped into the class ICU_Utility.

View file

@ -30,6 +30,7 @@
#include "unicode/ures.h"
#include "unicode/ustringtrie.h"
#include "uresimp.h"
#include "util.h"
#include <cstdlib>
U_NAMESPACE_BEGIN
@ -628,107 +629,6 @@ compareSingleUnits(const void* /*context*/, const void* left, const void* right)
return (*realLeft)->compareTo(**realRight);
}
/**
* Generate the identifier string for a single unit in place.
*
* Does not support the dimensionless SingleUnitImpl: calling serializeSingle
* with the dimensionless unit results in an U_INTERNAL_PROGRAM_ERROR.
*
* @param first If singleUnit is part of a compound unit, and not its first
* single unit, set this to false. Otherwise: set to true.
*/
void serializeSingle(const SingleUnitImpl& singleUnit, bool first, CharString& output, UErrorCode& status) {
if (first && singleUnit.dimensionality < 0) {
// Essentially the "unary per". For compound units with a numerator, the
// caller takes care of the "binary per".
output.append("per-", status);
}
if (singleUnit.isDimensionless()) {
status = U_INTERNAL_PROGRAM_ERROR;
return;
}
int8_t posPower = std::abs(singleUnit.dimensionality);
if (posPower == 0) {
status = U_INTERNAL_PROGRAM_ERROR;
} else if (posPower == 1) {
// no-op
} else if (posPower == 2) {
output.append("square-", status);
} else if (posPower == 3) {
output.append("cubic-", status);
} else if (posPower < 10) {
output.append("pow", status);
output.append(posPower + '0', status);
output.append('-', status);
} else if (posPower <= 15) {
output.append("pow1", status);
output.append('0' + (posPower % 10), status);
output.append('-', status);
} else {
status = kUnitIdentifierSyntaxError;
}
if (U_FAILURE(status)) {
return;
}
if (singleUnit.siPrefix != UMEASURE_SI_PREFIX_ONE) {
for (const auto& siPrefixInfo : gSIPrefixStrings) {
if (siPrefixInfo.value == singleUnit.siPrefix) {
output.append(siPrefixInfo.string, status);
break;
}
}
}
if (U_FAILURE(status)) {
return;
}
output.append(singleUnit.getSimpleUnitID(), status);
}
/**
* Normalize a MeasureUnitImpl and generate the identifier string in place.
*/
void serialize(MeasureUnitImpl& impl, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
U_ASSERT(impl.identifier.isEmpty());
if (impl.singleUnits.length() == 0) {
// Dimensionless, constructed by the default constructor: no appending
// to impl.identifier, we wish it to contain the zero-length string.
return;
}
if (impl.complexity == UMEASURE_UNIT_COMPOUND) {
// Note: don't sort a MIXED unit
uprv_sortArray(impl.singleUnits.getAlias(), impl.singleUnits.length(),
sizeof(impl.singleUnits[0]), compareSingleUnits, nullptr, false, &status);
if (U_FAILURE(status)) {
return;
}
}
serializeSingle(*impl.singleUnits[0], true, impl.identifier, status);
if (impl.singleUnits.length() == 1) {
return;
}
for (int32_t i = 1; i < impl.singleUnits.length(); i++) {
const SingleUnitImpl &prev = *impl.singleUnits[i - 1];
const SingleUnitImpl &curr = *impl.singleUnits[i];
if (impl.complexity == UMEASURE_UNIT_MIXED) {
impl.identifier.append("-and-", status);
serializeSingle(curr, true, impl.identifier, status);
} else {
if (prev.dimensionality > 0 && curr.dimensionality < 0) {
impl.identifier.append("-per-", status);
} else {
impl.identifier.append('-', status);
}
serializeSingle(curr, false, impl.identifier, status);
}
}
}
} // namespace
@ -758,6 +658,42 @@ const char *SingleUnitImpl::getSimpleUnitID() const {
return gSimpleUnits[index];
}
void SingleUnitImpl::appendNeutralIdentifier(CharString &result, UErrorCode &status) const {
int32_t absPower = std::abs(this->dimensionality);
U_ASSERT(absPower > 0); // "this function does not support the dimensionless single units";
if (absPower == 1) {
// no-op
} else if (absPower == 2) {
result.append(StringPiece("square-"), status);
} else if (absPower == 3) {
result.append(StringPiece("cubic-"), status);
} else if (absPower <= 15) {
result.append(StringPiece("pow"), status);
result.appendNumber(absPower, status);
result.append(StringPiece("-"), status);
} else {
status = U_ILLEGAL_ARGUMENT_ERROR; // Unit Identifier Syntax Error
return;
}
if (U_FAILURE(status)) {
return;
}
if (this->siPrefix != UMEASURE_SI_PREFIX_ONE) {
for (const auto &siPrefixInfo : gSIPrefixStrings) {
if (siPrefixInfo.value == this->siPrefix) {
result.append(siPrefixInfo.string, status);
break;
}
}
}
result.append(StringPiece(this->getSimpleUnitID()), status);
}
MeasureUnitImpl::MeasureUnitImpl(const MeasureUnitImpl &other, UErrorCode &status) {
*this = other.copy(status);
}
@ -853,8 +789,69 @@ MaybeStackVector<MeasureUnitImpl> MeasureUnitImpl::extractIndividualUnits(UError
return result;
}
/**
* Normalize a MeasureUnitImpl and generate the identifier string in place.
*/
void MeasureUnitImpl::serialize(UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
if (this->singleUnits.length() == 0) {
// Dimensionless, constructed by the default constructor.
return;
}
if (this->complexity == UMEASURE_UNIT_COMPOUND) {
// Note: don't sort a MIXED unit
uprv_sortArray(this->singleUnits.getAlias(), this->singleUnits.length(),
sizeof(this->singleUnits[0]), compareSingleUnits, nullptr, false, &status);
if (U_FAILURE(status)) {
return;
}
}
CharString result;
bool beforePer = true;
bool firstTimeNegativeDimension = false;
for (int32_t i = 0; i < this->singleUnits.length(); i++) {
if (beforePer && (*this->singleUnits[i]).dimensionality < 0) {
beforePer = false;
firstTimeNegativeDimension = true;
} else if ((*this->singleUnits[i]).dimensionality < 0) {
firstTimeNegativeDimension = false;
}
if (U_FAILURE(status)) {
return;
}
if (this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) {
if (result.length() != 0) {
result.append(StringPiece("-and-"), status);
}
} else {
if (firstTimeNegativeDimension) {
if (result.length() == 0) {
result.append(StringPiece("per-"), status);
} else {
result.append(StringPiece("-per-"), status);
}
} else {
if (result.length() != 0) {
result.append(StringPiece("-"), status);
}
}
}
this->singleUnits[i]->appendNeutralIdentifier(result, status);
}
this->identifier = CharString(result, status);
}
MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && {
serialize(*this, status);
this->serialize(status);
return MeasureUnit(std::move(*this));
}

View file

@ -42,6 +42,12 @@ struct U_I18N_API SingleUnitImpl : public UMemory {
*/
const char *getSimpleUnitID() const;
/**
* Generates and append a neutral identifier string for a single unit which means we do not include
* the dimension signal.
*/
void appendNeutralIdentifier(CharString &result, UErrorCode &status) const;
/**
* Compare this SingleUnitImpl to another SingleUnitImpl for the sake of
* sorting and coalescing.
@ -133,7 +139,8 @@ template class U_I18N_API MaybeStackVector<SingleUnitImpl, 8>;
* Internal representation of measurement units. Capable of representing all complexities of units,
* including mixed and compound units.
*/
struct U_I18N_API MeasureUnitImpl : public UMemory {
class U_I18N_API MeasureUnitImpl : public UMemory {
public:
MeasureUnitImpl() = default;
MeasureUnitImpl(MeasureUnitImpl &&other) = default;
MeasureUnitImpl(const MeasureUnitImpl &other, UErrorCode &status);
@ -233,6 +240,12 @@ struct U_I18N_API MeasureUnitImpl : public UMemory {
* The full unit identifier. Owned by the MeasureUnitImpl. Empty if not computed.
*/
CharString identifier;
private:
/**
* Normalizes a MeasureUnitImpl and generate the identifier string in place.
*/
void serialize(UErrorCode &status);
};
U_NAMESPACE_END

View file

@ -30,7 +30,7 @@
U_NAMESPACE_BEGIN
class StringEnumeration;
struct MeasureUnitImpl;
class MeasureUnitImpl;
#ifndef U_HIDE_DRAFT_API
/**
@ -3568,7 +3568,7 @@ private:
/** Internal version of public API */
LocalArray<MeasureUnit> splitToSingleUnitsImpl(int32_t& outCount, UErrorCode& status) const;
friend struct MeasureUnitImpl;
friend class MeasureUnitImpl;
};
#ifndef U_HIDE_DRAFT_API // @draft ICU 68

View file

@ -255,6 +255,7 @@ void StringTest::runIndexedTest(int32_t index, UBool exec, const char *&name, ch
TESTCASE_AUTO(TestStringByteSinkAppendU8);
TESTCASE_AUTO(TestCharString);
TESTCASE_AUTO(TestCStr);
TESTCASE_AUTO(TestCharStrAppendNumber);
TESTCASE_AUTO(Testctou);
TESTCASE_AUTO_END;
}
@ -842,6 +843,36 @@ StringTest::TestCStr() {
}
}
void StringTest::TestCharStrAppendNumber() {
IcuTestErrorCode errorCode(*this, "TestCharStrAppendNumber()");
CharString testString;
testString.appendNumber(1, errorCode);
assertEquals("TestAppendNumber 1", "1", testString.data());
testString.clear();
testString.appendNumber(-1, errorCode);
assertEquals("TestAppendNumber -1", "-1", testString.data());
testString.clear();
testString.appendNumber(12345, errorCode);
assertEquals("TestAppendNumber 12345", "12345", testString.data());
testString.appendNumber(123, errorCode);
assertEquals("TestAppendNumber 12345 and then 123", "12345123", testString.data());
testString.clear();
testString.appendNumber(2147483647, errorCode);
assertEquals("TestAppendNumber when appending the biggest int32", "2147483647", testString.data());
testString.clear();
testString.appendNumber(-2147483648, errorCode);
assertEquals("TestAppendNumber when appending the smallest int32", "-2147483648", testString.data());
testString.clear();
testString.appendNumber(0, errorCode);
assertEquals("TestAppendNumber when appending zero", "0", testString.data());
}
void
StringTest::Testctou() {
const char *cs = "Fa\\u0127mu";

View file

@ -57,6 +57,7 @@ private:
void TestSTLCompatibility();
void TestCharString();
void TestCStr();
void TestCharStrAppendNumber();
void Testctou();
};

View file

@ -223,6 +223,8 @@ public class MeasureUnitImpl {
// to this.result, we wish it to contain the zero-length string.
return;
}
if (this.complexity == MeasureUnit.Complexity.COMPOUND) {
// Note: don't sort a MIXED unit
Collections.sort(this.getSingleUnits(), new SingleUnitComparator());
@ -240,7 +242,6 @@ public class MeasureUnitImpl {
firstTimeNegativeDimension = false;
}
String singleUnitIdentifier = singleUnit.getNeutralIdentifier();
if (this.getComplexity() == MeasureUnit.Complexity.MIXED) {
if (result.length() != 0) {
result.append("-and-");
@ -259,7 +260,7 @@ public class MeasureUnitImpl {
}
}
result.append(singleUnitIdentifier);
result.append(singleUnit.getNeutralIdentifier());
}
this.identifier = result.toString();

View file

@ -49,26 +49,25 @@ public class SingleUnitImpl {
}
/**
* Generates an neutral identifier string for a single unit which means we do not include the dimension signal.
* Generates a neutral identifier string for a single unit which means we do not include the dimension signal.
*/
public String getNeutralIdentifier() {
StringBuilder result = new StringBuilder();
int posPower = Math.abs(this.getDimensionality());
int absPower = Math.abs(this.getDimensionality());
assert posPower > 0 : "getIdentifier does not support the dimensionless";
assert absPower > 0 : "this function does not support the dimensionless single units";
if (posPower == 1) {
if (absPower == 1) {
// no-op
} else if (posPower == 2) {
} else if (absPower == 2) {
result.append("square-");
} else if (posPower == 3) {
} else if (absPower == 3) {
result.append("cubic-");
} else if (posPower <= 15) {
} else if (absPower <= 15) {
result.append("pow");
result.append(posPower);
result.append(absPower);
result.append('-');
} else {
// TODO: IllegalArgumentException might not be appropriate here
throw new IllegalArgumentException("Unit Identifier Syntax Error");
}