ICU-13574 Checkpoint commit. AffixMatcher is mostly implemented.

X-SVN-Rev: 40894
This commit is contained in:
Shane Carr 2018-02-10 14:29:26 +00:00
parent a335b723c7
commit afbb37febd
13 changed files with 663 additions and 297 deletions

View file

@ -7,6 +7,7 @@
#include "number_affixutils.h"
#include "unicode/utf16.h"
#include "unicode/uniset.h"
using namespace icu;
using namespace icu::number;
@ -239,6 +240,22 @@ UnicodeString AffixUtils::replaceType(const CharSequence &affixPattern, AffixPat
return output;
}
bool AffixUtils::containsOnlySymbolsAndIgnorables(const CharSequence& affixPattern,
const UnicodeSet& ignorables, UErrorCode& status) {
if (affixPattern.length() == 0) {
return true;
};
AffixTag tag;
while (hasNext(tag, affixPattern)) {
tag = nextToken(tag, affixPattern, status);
if (U_FAILURE(status)) { return false; }
if (tag.type == TYPE_CODEPOINT && !ignorables.contains(tag.codePoint)) {
return false;
}
}
return true;
}
void AffixUtils::iterateWithConsumer(const CharSequence& affixPattern, TokenConsumer& consumer,
UErrorCode& status) {
if (affixPattern.length() == 0) {

View file

@ -37,13 +37,14 @@ struct AffixTag {
AffixPatternState state;
AffixPatternType type;
AffixTag() : offset(0), state(STATE_BASE) {}
AffixTag()
: offset(0), state(STATE_BASE) {}
AffixTag(int32_t offset) : offset(offset) {}
AffixTag(int32_t offset)
: offset(offset) {}
AffixTag(int32_t offset, UChar32 codePoint, AffixPatternState state, AffixPatternType type)
: offset(offset), codePoint(codePoint), state(state), type(type)
{}
: offset(offset), codePoint(codePoint), state(state), type(type) {}
};
class TokenConsumer {
@ -112,7 +113,7 @@ class U_I18N_API AffixUtils {
* @param patternString The original string whose width will be estimated.
* @return The length of the unescaped string.
*/
static int32_t estimateLength(const CharSequence &patternString, UErrorCode &status);
static int32_t estimateLength(const CharSequence& patternString, UErrorCode& status);
/**
* Takes a string and escapes (quotes) characters that have special meaning in the affix pattern
@ -123,7 +124,7 @@ class U_I18N_API AffixUtils {
* @param input The string to be escaped.
* @return The resulting UnicodeString.
*/
static UnicodeString escape(const CharSequence &input);
static UnicodeString escape(const CharSequence& input);
static Field getFieldForType(AffixPatternType type);
@ -139,9 +140,8 @@ class U_I18N_API AffixUtils {
* @param position The index into the NumberStringBuilder to insert the string.
* @param provider An object to generate locale symbols.
*/
static int32_t
unescape(const CharSequence &affixPattern, NumberStringBuilder &output, int32_t position,
const SymbolProvider &provider, UErrorCode &status);
static int32_t unescape(const CharSequence& affixPattern, NumberStringBuilder& output,
int32_t position, const SymbolProvider& provider, UErrorCode& status);
/**
* Sames as {@link #unescape}, but only calculates the code point count. More efficient than {@link #unescape}
@ -151,8 +151,8 @@ class U_I18N_API AffixUtils {
* @param provider An object to generate locale symbols.
* @return The same return value as if you called {@link #unescape}.
*/
static int32_t unescapedCodePointCount(const CharSequence &affixPattern,
const SymbolProvider &provider, UErrorCode &status);
static int32_t unescapedCodePointCount(const CharSequence& affixPattern,
const SymbolProvider& provider, UErrorCode& status);
/**
* Checks whether the given affix pattern contains at least one token of the given type, which is
@ -162,8 +162,7 @@ class U_I18N_API AffixUtils {
* @param type The token type.
* @return true if the affix pattern contains the given token type; false otherwise.
*/
static bool
containsType(const CharSequence &affixPattern, AffixPatternType type, UErrorCode &status);
static bool containsType(const CharSequence& affixPattern, AffixPatternType type, UErrorCode& status);
/**
* Checks whether the specified affix pattern has any unquoted currency symbols ("¤").
@ -171,7 +170,7 @@ class U_I18N_API AffixUtils {
* @param affixPattern The string to check for currency symbols.
* @return true if the literal has at least one unquoted currency symbol; false otherwise.
*/
static bool hasCurrencySymbols(const CharSequence &affixPattern, UErrorCode &status);
static bool hasCurrencySymbols(const CharSequence& affixPattern, UErrorCode& status);
/**
* Replaces all occurrences of tokens with the given type with the given replacement char.
@ -181,9 +180,15 @@ class U_I18N_API AffixUtils {
* @param replacementChar The char to substitute in place of chars of the given token type.
* @return A string containing the new affix pattern.
*/
static UnicodeString
replaceType(const CharSequence &affixPattern, AffixPatternType type, char16_t replacementChar,
UErrorCode &status);
static UnicodeString replaceType(const CharSequence& affixPattern, AffixPatternType type,
char16_t replacementChar, UErrorCode& status);
/**
* Returns whether the given affix pattern contains only symbols and ignorables as defined by the
* given ignorables set.
*/
static bool containsOnlySymbolsAndIgnorables(const CharSequence& affixPattern,
const UnicodeSet& ignorables, UErrorCode& status);
/**
* Iterates over the affix pattern, calling the TokenConsumer for each token.
@ -201,7 +206,7 @@ class U_I18N_API AffixUtils {
* (never negative), or -1 if there were no more tokens in the affix pattern.
* @see #hasNext
*/
static AffixTag nextToken(AffixTag tag, const CharSequence &patternString, UErrorCode &status);
static AffixTag nextToken(AffixTag tag, const CharSequence& patternString, UErrorCode& status);
/**
* Returns whether the affix pattern string has any more tokens to be retrieved from a call to
@ -211,7 +216,7 @@ class U_I18N_API AffixUtils {
* @param string The affix pattern.
* @return true if there are more tokens to consume; false otherwise.
*/
static bool hasNext(const AffixTag &tag, const CharSequence &string);
static bool hasNext(const AffixTag& tag, const CharSequence& string);
private:
/**
@ -219,8 +224,8 @@ class U_I18N_API AffixUtils {
* The order of the arguments is consistent with Java, but the order of the stored
* fields is not necessarily the same.
*/
static inline AffixTag
makeTag(int32_t offset, AffixPatternType type, AffixPatternState state, UChar32 cp) {
static inline AffixTag makeTag(int32_t offset, AffixPatternType type, AffixPatternState state,
UChar32 cp) {
return {offset, cp, state, type};
}
};

View file

@ -15,9 +15,10 @@ using namespace icu;
using namespace icu::number;
using namespace icu::number::impl;
MutablePatternModifier::MutablePatternModifier(bool isStrong) : fStrong(isStrong) {}
MutablePatternModifier::MutablePatternModifier(bool isStrong)
: fStrong(isStrong) {}
void MutablePatternModifier::setPatternInfo(const AffixPatternProvider *patternInfo) {
void MutablePatternModifier::setPatternInfo(const AffixPatternProvider* patternInfo) {
this->patternInfo = patternInfo;
}
@ -26,12 +27,11 @@ void MutablePatternModifier::setPatternAttributes(UNumberSignDisplay signDisplay
this->perMilleReplacesPercent = perMille;
}
void
MutablePatternModifier::setSymbols(const DecimalFormatSymbols *symbols, const CurrencyUnit &currency,
const UNumberUnitWidth unitWidth, const PluralRules *rules) {
void MutablePatternModifier::setSymbols(const DecimalFormatSymbols* symbols, const CurrencyUnit& currency,
const UNumberUnitWidth unitWidth, const PluralRules* rules) {
U_ASSERT((rules != nullptr) == needsPlurals());
this->symbols = symbols;
uprv_memcpy(static_cast<char16_t *>(this->currencyCode),
uprv_memcpy(static_cast<char16_t*>(this->currencyCode),
currency.getISOCurrency(),
sizeof(char16_t) * 4);
this->unitWidth = unitWidth;
@ -49,12 +49,12 @@ bool MutablePatternModifier::needsPlurals() const {
// Silently ignore any error codes.
}
ImmutablePatternModifier *MutablePatternModifier::createImmutable(UErrorCode &status) {
ImmutablePatternModifier* MutablePatternModifier::createImmutable(UErrorCode& status) {
return createImmutableAndChain(nullptr, status);
}
ImmutablePatternModifier *
MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator *parent, UErrorCode &status) {
ImmutablePatternModifier*
MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator* parent, UErrorCode& status) {
// TODO: Move StandardPlural VALUES to standardplural.h
static const StandardPlural::Form STANDARD_PLURAL_VALUES[] = {
@ -89,11 +89,11 @@ MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator *paren
} else {
// Faster path when plural keyword is not needed.
setNumberProperties(1, StandardPlural::Form::COUNT);
Modifier *positive = createConstantModifier(status);
Modifier* positive = createConstantModifier(status);
setNumberProperties(0, StandardPlural::Form::COUNT);
Modifier *zero = createConstantModifier(status);
Modifier* zero = createConstantModifier(status);
setNumberProperties(-1, StandardPlural::Form::COUNT);
Modifier *negative = createConstantModifier(status);
Modifier* negative = createConstantModifier(status);
pm->adoptPositiveNegativeModifiers(positive, zero, negative);
if (U_FAILURE(status)) {
delete pm;
@ -103,29 +103,30 @@ MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator *paren
}
}
ConstantMultiFieldModifier *MutablePatternModifier::createConstantModifier(UErrorCode &status) {
ConstantMultiFieldModifier* MutablePatternModifier::createConstantModifier(UErrorCode& status) {
NumberStringBuilder a;
NumberStringBuilder b;
insertPrefix(a, 0, status);
insertSuffix(b, 0, status);
if (patternInfo->hasCurrencySign()) {
return new CurrencySpacingEnabledModifier(a, b, !patternInfo->hasBody(), fStrong, *symbols, status);
return new CurrencySpacingEnabledModifier(
a, b, !patternInfo->hasBody(), fStrong, *symbols, status);
} else {
return new ConstantMultiFieldModifier(a, b, !patternInfo->hasBody(), fStrong);
}
}
ImmutablePatternModifier::ImmutablePatternModifier(ParameterizedModifier *pm, const PluralRules *rules,
const MicroPropsGenerator *parent)
ImmutablePatternModifier::ImmutablePatternModifier(ParameterizedModifier* pm, const PluralRules* rules,
const MicroPropsGenerator* parent)
: pm(pm), rules(rules), parent(parent) {}
void ImmutablePatternModifier::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
UErrorCode &status) const {
void ImmutablePatternModifier::processQuantity(DecimalQuantity& quantity, MicroProps& micros,
UErrorCode& status) const {
parent->processQuantity(quantity, micros, status);
applyToMicros(micros, quantity);
}
void ImmutablePatternModifier::applyToMicros(MicroProps &micros, DecimalQuantity &quantity) const {
void ImmutablePatternModifier::applyToMicros(MicroProps& micros, DecimalQuantity& quantity) const {
if (rules == nullptr) {
micros.modMiddle = pm->getModifier(quantity.signum());
} else {
@ -138,17 +139,17 @@ void ImmutablePatternModifier::applyToMicros(MicroProps &micros, DecimalQuantity
}
/** Used by the unsafe code path. */
MicroPropsGenerator &MutablePatternModifier::addToChain(const MicroPropsGenerator *parent) {
MicroPropsGenerator& MutablePatternModifier::addToChain(const MicroPropsGenerator* parent) {
this->parent = parent;
return *this;
}
void MutablePatternModifier::processQuantity(DecimalQuantity &fq, MicroProps &micros,
UErrorCode &status) const {
void MutablePatternModifier::processQuantity(DecimalQuantity& fq, MicroProps& micros,
UErrorCode& status) const {
parent->processQuantity(fq, micros, status);
// The unsafe code path performs self-mutation, so we need a const_cast.
// This method needs to be const because it overrides a const method in the parent class.
auto nonConstThis = const_cast<MutablePatternModifier *>(this);
auto nonConstThis = const_cast<MutablePatternModifier*>(this);
if (needsPlurals()) {
// TODO: Fix this. Avoid the copy.
DecimalQuantity copy(fq);
@ -160,20 +161,24 @@ void MutablePatternModifier::processQuantity(DecimalQuantity &fq, MicroProps &mi
micros.modMiddle = this;
}
int32_t MutablePatternModifier::apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
UErrorCode &status) const {
int32_t MutablePatternModifier::apply(NumberStringBuilder& output, int32_t leftIndex, int32_t rightIndex,
UErrorCode& status) const {
// The unsafe code path performs self-mutation, so we need a const_cast.
// This method needs to be const because it overrides a const method in the parent class.
auto nonConstThis = const_cast<MutablePatternModifier *>(this);
auto nonConstThis = const_cast<MutablePatternModifier*>(this);
int32_t prefixLen = nonConstThis->insertPrefix(output, leftIndex, status);
int32_t suffixLen = nonConstThis->insertSuffix(output, rightIndex + prefixLen, status);
// If the pattern had no decimal stem body (like #,##0.00), overwrite the value.
int32_t overwriteLen = 0;
if (!patternInfo->hasBody()) {
overwriteLen = output.splice(
leftIndex + prefixLen, rightIndex + prefixLen,
UnicodeString(), 0, 0, UNUM_FIELD_COUNT,
status);
leftIndex + prefixLen,
rightIndex + prefixLen,
UnicodeString(),
0,
0,
UNUM_FIELD_COUNT,
status);
}
CurrencySpacingEnabledModifier::applyCurrencySpacing(
output,
@ -186,30 +191,36 @@ int32_t MutablePatternModifier::apply(NumberStringBuilder &output, int32_t leftI
return prefixLen + overwriteLen + suffixLen;
}
int32_t MutablePatternModifier::getPrefixLength(UErrorCode &status) const {
int32_t MutablePatternModifier::getPrefixLength(UErrorCode& status) const {
// The unsafe code path performs self-mutation, so we need a const_cast.
// This method needs to be const because it overrides a const method in the parent class.
auto nonConstThis = const_cast<MutablePatternModifier *>(this);
auto nonConstThis = const_cast<MutablePatternModifier*>(this);
// Enter and exit CharSequence Mode to get the length.
nonConstThis->enterCharSequenceMode(true);
int result = AffixUtils::unescapedCodePointCount(*this, *this, status); // prefix length
nonConstThis->exitCharSequenceMode();
nonConstThis->prepareAffix(true);
int result = AffixUtils::unescapedCodePointCount(
UnicodeStringCharSequence(currentAffix),
*this,
status); // prefix length
return result;
}
int32_t MutablePatternModifier::getCodePointCount(UErrorCode &status) const {
int32_t MutablePatternModifier::getCodePointCount(UErrorCode& status) const {
// The unsafe code path performs self-mutation, so we need a const_cast.
// This method needs to be const because it overrides a const method in the parent class.
auto nonConstThis = const_cast<MutablePatternModifier *>(this);
auto nonConstThis = const_cast<MutablePatternModifier*>(this);
// Enter and exit CharSequence Mode to get the length.
nonConstThis->enterCharSequenceMode(true);
int result = AffixUtils::unescapedCodePointCount(*this, *this, status); // prefix length
nonConstThis->exitCharSequenceMode();
nonConstThis->enterCharSequenceMode(false);
result += AffixUtils::unescapedCodePointCount(*this, *this, status); // suffix length
nonConstThis->exitCharSequenceMode();
// Render the affixes to get the length
nonConstThis->prepareAffix(true);
int result = AffixUtils::unescapedCodePointCount(
UnicodeStringCharSequence(currentAffix),
*this,
status); // prefix length
nonConstThis->prepareAffix(false);
result += AffixUtils::unescapedCodePointCount(
UnicodeStringCharSequence(currentAffix),
*this,
status); // suffix length
return result;
}
@ -217,20 +228,26 @@ bool MutablePatternModifier::isStrong() const {
return fStrong;
}
int32_t MutablePatternModifier::insertPrefix(NumberStringBuilder &sb, int position, UErrorCode &status) {
enterCharSequenceMode(true);
int length = AffixUtils::unescape(*this, sb, position, *this, status);
exitCharSequenceMode();
int32_t MutablePatternModifier::insertPrefix(NumberStringBuilder& sb, int position, UErrorCode& status) {
prepareAffix(true);
int length = AffixUtils::unescape(
UnicodeStringCharSequence(currentAffix), sb, position, *this, status);
return length;
}
int32_t MutablePatternModifier::insertSuffix(NumberStringBuilder &sb, int position, UErrorCode &status) {
enterCharSequenceMode(false);
int length = AffixUtils::unescape(*this, sb, position, *this, status);
exitCharSequenceMode();
int32_t MutablePatternModifier::insertSuffix(NumberStringBuilder& sb, int position, UErrorCode& status) {
prepareAffix(false);
int length = AffixUtils::unescape(
UnicodeStringCharSequence(currentAffix), sb, position, *this, status);
return length;
}
/** This method contains the heart of the logic for rendering LDML affix strings. */
void MutablePatternModifier::prepareAffix(bool isPrefix) {
PatternStringUtils::patternInfoToStringBuilder(
*patternInfo, isPrefix, signum, signDisplay, plural, perMilleReplacesPercent, currentAffix);
}
UnicodeString MutablePatternModifier::getSymbol(AffixPatternType type) const {
switch (type) {
case AffixPatternType::TYPE_MINUS_SIGN:
@ -249,12 +266,12 @@ UnicodeString MutablePatternModifier::getSymbol(AffixPatternType type) const {
return UnicodeString();
} else {
UCurrNameStyle selector = (unitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_NARROW)
? UCurrNameStyle::UCURR_NARROW_SYMBOL_NAME
: UCurrNameStyle::UCURR_SYMBOL_NAME;
? UCurrNameStyle::UCURR_NARROW_SYMBOL_NAME
: UCurrNameStyle::UCURR_SYMBOL_NAME;
UErrorCode status = U_ZERO_ERROR;
UBool isChoiceFormat = FALSE;
int32_t symbolLen = 0;
const char16_t *symbol = ucurr_getName(
const char16_t* symbol = ucurr_getName(
currencyCode,
symbols->getLocale().getName(),
selector,
@ -274,7 +291,7 @@ UnicodeString MutablePatternModifier::getSymbol(AffixPatternType type) const {
UErrorCode status = U_ZERO_ERROR;
UBool isChoiceFormat = FALSE;
int32_t symbolLen = 0;
const char16_t *symbol = ucurr_getPluralName(
const char16_t* symbol = ucurr_getPluralName(
currencyCode,
symbols->getLocale().getName(),
&isChoiceFormat,
@ -293,79 +310,6 @@ UnicodeString MutablePatternModifier::getSymbol(AffixPatternType type) const {
}
}
/** This method contains the heart of the logic for rendering LDML affix strings. */
void MutablePatternModifier::enterCharSequenceMode(bool isPrefix) {
U_ASSERT(!inCharSequenceMode);
inCharSequenceMode = true;
// Should the output render '+' where '-' would normally appear in the pattern?
plusReplacesMinusSign = signum != -1
&& (signDisplay == UNUM_SIGN_ALWAYS
|| signDisplay == UNUM_SIGN_ACCOUNTING_ALWAYS
|| (signum == 1
&& (signDisplay == UNUM_SIGN_EXCEPT_ZERO
|| signDisplay == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO)))
&& patternInfo->positiveHasPlusSign() == false;
// Should we use the affix from the negative subpattern? (If not, we will use the positive subpattern.)
bool useNegativeAffixPattern = patternInfo->hasNegativeSubpattern() && (
signum == -1 || (patternInfo->negativeHasMinusSign() && plusReplacesMinusSign));
// Resolve the flags for the affix pattern.
fFlags = 0;
if (useNegativeAffixPattern) {
fFlags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN;
}
if (isPrefix) {
fFlags |= AffixPatternProvider::AFFIX_PREFIX;
}
if (plural != StandardPlural::Form::COUNT) {
U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural));
fFlags |= plural;
}
// Should we prepend a sign to the pattern?
if (!isPrefix || useNegativeAffixPattern) {
prependSign = false;
} else if (signum == -1) {
prependSign = signDisplay != UNUM_SIGN_NEVER;
} else {
prependSign = plusReplacesMinusSign;
}
// Finally, compute the length of the affix pattern.
fLength = patternInfo->length(fFlags) + (prependSign ? 1 : 0);
}
void MutablePatternModifier::exitCharSequenceMode() {
U_ASSERT(inCharSequenceMode);
inCharSequenceMode = false;
}
int32_t MutablePatternModifier::length() const {
U_ASSERT(inCharSequenceMode);
return fLength;
}
char16_t MutablePatternModifier::charAt(int32_t index) const {
U_ASSERT(inCharSequenceMode);
char16_t candidate;
if (prependSign && index == 0) {
candidate = u'-';
} else if (prependSign) {
candidate = patternInfo->charAt(fFlags, index - 1);
} else {
candidate = patternInfo->charAt(fFlags, index);
}
if (plusReplacesMinusSign && candidate == u'-') {
return u'+';
}
if (perMilleReplacesPercent && candidate == u'%') {
return u'';
}
return candidate;
}
UnicodeString MutablePatternModifier::toUnicodeString() const {
// Never called by AffixUtils
U_ASSERT(false);

View file

@ -35,20 +35,21 @@ class MutablePatternModifier;
// Exported as U_I18N_API because it is needed for the unit test PatternModifierTest
class U_I18N_API ImmutablePatternModifier : public MicroPropsGenerator, public UMemory {
public:
~ImmutablePatternModifier() U_OVERRIDE = default;
~ImmutablePatternModifier() U_OVERRIDE = default;
void processQuantity(DecimalQuantity &, MicroProps &micros, UErrorCode &status) const U_OVERRIDE;
void processQuantity(DecimalQuantity&, MicroProps& micros, UErrorCode& status) const U_OVERRIDE;
void applyToMicros(MicroProps &micros, DecimalQuantity &quantity) const;
void applyToMicros(MicroProps& micros, DecimalQuantity& quantity) const;
private:
ImmutablePatternModifier(ParameterizedModifier *pm, const PluralRules *rules, const MicroPropsGenerator *parent);
ImmutablePatternModifier(ParameterizedModifier* pm, const PluralRules* rules,
const MicroPropsGenerator* parent);
const LocalPointer<ParameterizedModifier> pm;
const PluralRules *rules;
const MicroPropsGenerator *parent;
const PluralRules* rules;
const MicroPropsGenerator* parent;
friend class MutablePatternModifier;
friend class MutablePatternModifier;
};
/**
@ -74,7 +75,6 @@ class U_I18N_API MutablePatternModifier
: public MicroPropsGenerator,
public Modifier,
public SymbolProvider,
public CharSequence,
public UMemory {
public:
@ -187,13 +187,7 @@ class U_I18N_API MutablePatternModifier
*/
UnicodeString getSymbol(AffixPatternType type) const U_OVERRIDE;
int32_t length() const U_OVERRIDE;
char16_t charAt(int32_t index) const U_OVERRIDE;
// Use default implementation of codePointAt
UnicodeString toUnicodeString() const U_OVERRIDE;
UnicodeString toUnicodeString() const;
private:
// Modifier details (initialized in constructor)
@ -217,12 +211,8 @@ class U_I18N_API MutablePatternModifier
// QuantityChain details (initialized in addToChain)
const MicroPropsGenerator *parent;
// Transient CharSequence fields (initialized in enterCharSequenceMode)
bool inCharSequenceMode = false;
int32_t fFlags;
int32_t fLength;
bool prependSign;
bool plusReplacesMinusSign;
// Transient fields for rendering
UnicodeString currentAffix;
/**
* Uses the current properties to create a single {@link ConstantMultiFieldModifier} with currency spacing support
@ -244,9 +234,7 @@ class U_I18N_API MutablePatternModifier
int32_t insertSuffix(NumberStringBuilder &sb, int position, UErrorCode &status);
void enterCharSequenceMode(bool isPrefix);
void exitCharSequenceMode();
void prepareAffix(bool isPrefix);
};

View file

@ -14,25 +14,27 @@ using namespace icu;
using namespace icu::number;
using namespace icu::number::impl;
void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo, UErrorCode &status) {
void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo,
UErrorCode& status) {
patternInfo.consumePattern(patternString, status);
}
DecimalFormatProperties
PatternParser::parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding,
UErrorCode &status) {
UErrorCode& status) {
DecimalFormatProperties properties;
parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
return properties;
}
void PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties,
IgnoreRounding ignoreRounding, UErrorCode &status) {
void
PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties,
IgnoreRounding ignoreRounding, UErrorCode& status) {
parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
}
char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const {
const Endpoints &endpoints = getEndpoints(flags);
const Endpoints& endpoints = getEndpoints(flags);
if (index < 0 || index >= endpoints.end - endpoints.start) {
U_ASSERT(false);
}
@ -43,12 +45,12 @@ int32_t ParsedPatternInfo::length(int32_t flags) const {
return getLengthFromEndpoints(getEndpoints(flags));
}
int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints &endpoints) {
int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints& endpoints) {
return endpoints.end - endpoints.start;
}
UnicodeString ParsedPatternInfo::getString(int32_t flags) const {
const Endpoints &endpoints = getEndpoints(flags);
const Endpoints& endpoints = getEndpoints(flags);
if (endpoints.start == endpoints.end) {
return UnicodeString();
}
@ -56,7 +58,7 @@ UnicodeString ParsedPatternInfo::getString(int32_t flags) const {
return UnicodeString(pattern, endpoints.start, endpoints.end - endpoints.start);
}
const Endpoints &ParsedPatternInfo::getEndpoints(int32_t flags) const {
const Endpoints& ParsedPatternInfo::getEndpoints(int32_t flags) const {
bool prefix = (flags & AFFIX_PREFIX) != 0;
bool isNegative = (flags & AFFIX_NEGATIVE_SUBPATTERN) != 0;
bool padding = (flags & AFFIX_PADDING) != 0;
@ -91,7 +93,7 @@ bool ParsedPatternInfo::hasCurrencySign() const {
return positive.hasCurrencySign || (fHasNegativeSubpattern && negative.hasCurrencySign);
}
bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode &status) const {
bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode& status) const {
return AffixUtils::containsType(UnicodeStringCharSequence(pattern), type, status);
}
@ -117,7 +119,7 @@ UChar32 ParsedPatternInfo::ParserState::next() {
return codePoint;
}
void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode &status) {
void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode& status) {
if (U_FAILURE(status)) { return; }
this->pattern = patternString;
@ -141,7 +143,7 @@ void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErro
}
}
void ParsedPatternInfo::consumeSubpattern(UErrorCode &status) {
void ParsedPatternInfo::consumeSubpattern(UErrorCode& status) {
// subpattern := literals? number exponent? literals?
consumePadding(PadPosition::UNUM_PAD_BEFORE_PREFIX, status);
if (U_FAILURE(status)) { return; }
@ -161,7 +163,7 @@ void ParsedPatternInfo::consumeSubpattern(UErrorCode &status) {
if (U_FAILURE(status)) { return; }
}
void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode &status) {
void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode& status) {
if (state.peek() != u'*') {
return;
}
@ -177,7 +179,7 @@ void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode &
currentSubpattern->paddingEndpoints.end = state.offset;
}
void ParsedPatternInfo::consumeAffix(Endpoints &endpoints, UErrorCode &status) {
void ParsedPatternInfo::consumeAffix(Endpoints& endpoints, UErrorCode& status) {
// literals := { literal }
endpoints.start = state.offset;
while (true) {
@ -233,7 +235,7 @@ void ParsedPatternInfo::consumeAffix(Endpoints &endpoints, UErrorCode &status) {
endpoints.end = state.offset;
}
void ParsedPatternInfo::consumeLiteral(UErrorCode &status) {
void ParsedPatternInfo::consumeLiteral(UErrorCode& status) {
if (state.peek() == -1) {
state.toParseException(u"Expected unquoted literal but found EOL");
status = U_PATTERN_SYNTAX_ERROR;
@ -256,7 +258,7 @@ void ParsedPatternInfo::consumeLiteral(UErrorCode &status) {
}
}
void ParsedPatternInfo::consumeFormat(UErrorCode &status) {
void ParsedPatternInfo::consumeFormat(UErrorCode& status) {
consumeIntegerFormat(status);
if (U_FAILURE(status)) { return; }
if (state.peek() == u'.') {
@ -268,9 +270,9 @@ void ParsedPatternInfo::consumeFormat(UErrorCode &status) {
}
}
void ParsedPatternInfo::consumeIntegerFormat(UErrorCode &status) {
void ParsedPatternInfo::consumeIntegerFormat(UErrorCode& status) {
// Convenience reference:
ParsedSubpatternInfo &result = *currentSubpattern;
ParsedSubpatternInfo& result = *currentSubpattern;
while (true) {
switch (state.peek()) {
@ -359,9 +361,9 @@ void ParsedPatternInfo::consumeIntegerFormat(UErrorCode &status) {
}
}
void ParsedPatternInfo::consumeFractionFormat(UErrorCode &status) {
void ParsedPatternInfo::consumeFractionFormat(UErrorCode& status) {
// Convenience reference:
ParsedSubpatternInfo &result = *currentSubpattern;
ParsedSubpatternInfo& result = *currentSubpattern;
int32_t zeroCounter = 0;
while (true) {
@ -407,9 +409,9 @@ void ParsedPatternInfo::consumeFractionFormat(UErrorCode &status) {
}
}
void ParsedPatternInfo::consumeExponent(UErrorCode &status) {
void ParsedPatternInfo::consumeExponent(UErrorCode& status) {
// Convenience reference:
ParsedSubpatternInfo &result = *currentSubpattern;
ParsedSubpatternInfo& result = *currentSubpattern;
if (state.peek() != u'E') {
return;
@ -437,9 +439,9 @@ void ParsedPatternInfo::consumeExponent(UErrorCode &status) {
/// END RECURSIVE DESCENT PARSER IMPLEMENTATION ///
///////////////////////////////////////////////////
void
PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern, DecimalFormatProperties &properties,
IgnoreRounding ignoreRounding, UErrorCode &status) {
void PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern,
DecimalFormatProperties& properties,
IgnoreRounding ignoreRounding, UErrorCode& status) {
if (pattern.length() == 0) {
// Backwards compatibility requires that we reset to the default values.
// TODO: Only overwrite the properties that "saveToProperties" normally touches?
@ -453,13 +455,13 @@ PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern, Decim
patternInfoToProperties(properties, patternInfo, ignoreRounding, status);
}
void PatternParser::patternInfoToProperties(DecimalFormatProperties &properties,
ParsedPatternInfo& patternInfo,
IgnoreRounding _ignoreRounding, UErrorCode &status) {
void
PatternParser::patternInfoToProperties(DecimalFormatProperties& properties, ParsedPatternInfo& patternInfo,
IgnoreRounding _ignoreRounding, UErrorCode& status) {
// Translate from PatternParseResult to Properties.
// Note that most data from "negative" is ignored per the specification of DecimalFormat.
const ParsedSubpatternInfo &positive = patternInfo.positive;
const ParsedSubpatternInfo& positive = patternInfo.positive;
bool ignoreRounding;
if (_ignoreRounding == IGNORE_ROUNDING_NEVER) {
@ -508,8 +510,7 @@ void PatternParser::patternInfoToProperties(DecimalFormatProperties &properties,
properties.maximumFractionDigits = -1;
properties.roundingIncrement = 0.0;
properties.minimumSignificantDigits = positive.integerAtSigns;
properties.maximumSignificantDigits =
positive.integerAtSigns + positive.integerTrailingHashSigns;
properties.maximumSignificantDigits = positive.integerAtSigns + positive.integerTrailingHashSigns;
} else if (!positive.rounding.isZero()) {
if (!ignoreRounding) {
properties.minimumFractionDigits = minFrac;
@ -570,9 +571,9 @@ void PatternParser::patternInfoToProperties(DecimalFormatProperties &properties,
// Padding settings
if (!positive.paddingLocation.isNull()) {
// The width of the positive prefix and suffix templates are included in the padding
int paddingWidth =
positive.widthExceptAffixes + AffixUtils::estimateLength(UnicodeStringCharSequence(posPrefix), status) +
AffixUtils::estimateLength(UnicodeStringCharSequence(posSuffix), status);
int paddingWidth = positive.widthExceptAffixes +
AffixUtils::estimateLength(UnicodeStringCharSequence(posPrefix), status) +
AffixUtils::estimateLength(UnicodeStringCharSequence(posSuffix), status);
properties.formatWidth = paddingWidth;
UnicodeString rawPaddingString = patternInfo.getString(AffixPatternProvider::AFFIX_PADDING);
if (rawPaddingString.length() == 1) {
@ -622,8 +623,8 @@ void PatternParser::patternInfoToProperties(DecimalFormatProperties &properties,
/// End PatternStringParser.java; begin PatternStringUtils.java ///
///////////////////////////////////////////////////////////////////
UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties &properties,
UErrorCode &status) {
UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties& properties,
UErrorCode& status) {
UnicodeString sb;
// Convenience references
@ -632,7 +633,7 @@ UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatP
int groupingSize = uprv_min(properties.secondaryGroupingSize, dosMax);
int firstGroupingSize = uprv_min(properties.groupingSize, dosMax);
int paddingWidth = uprv_min(properties.formatWidth, dosMax);
NullableValue<PadPosition> paddingLocation = properties.padPosition;
NullableValue <PadPosition> paddingLocation = properties.padPosition;
UnicodeString paddingString = properties.padString;
int minInt = uprv_max(uprv_min(properties.minimumIntegerDigits, dosMax), 0);
int maxInt = uprv_min(properties.maximumIntegerDigits, dosMax);
@ -809,8 +810,8 @@ UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatP
}
int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex,
UErrorCode &status) {
(void)status;
UErrorCode& status) {
(void) status;
if (input.length() == 0) {
input.setTo(kFallbackPaddingString, -1);
}
@ -840,4 +841,69 @@ int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString&
return output.length() - startLength;
}
void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix,
int8_t signum, UNumberSignDisplay signDisplay,
StandardPlural::Form plural,
bool perMilleReplacesPercent, UnicodeString& output) {
// Should the output render '+' where '-' would normally appear in the pattern?
bool plusReplacesMinusSign = signum != -1 && (
signDisplay == UNUM_SIGN_ALWAYS || signDisplay == UNUM_SIGN_ACCOUNTING_ALWAYS || (
signum == 1 && (
signDisplay == UNUM_SIGN_EXCEPT_ZERO ||
signDisplay == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO))) &&
patternInfo.positiveHasPlusSign() == false;
// Should we use the affix from the negative subpattern? (If not, we will use the positive
// subpattern.)
bool useNegativeAffixPattern = patternInfo.hasNegativeSubpattern() && (
signum == -1 || (patternInfo.negativeHasMinusSign() && plusReplacesMinusSign));
// Resolve the flags for the affix pattern.
int flags = 0;
if (useNegativeAffixPattern) {
flags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN;
}
if (isPrefix) {
flags |= AffixPatternProvider::AFFIX_PREFIX;
}
if (plural != StandardPlural::Form::COUNT) {
U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural));
flags |= plural;
}
// Should we prepend a sign to the pattern?
bool prependSign;
if (!isPrefix || useNegativeAffixPattern) {
prependSign = false;
} else if (signum == -1) {
prependSign = signDisplay != UNUM_SIGN_NEVER;
} else {
prependSign = plusReplacesMinusSign;
}
// Compute the length of the affix pattern.
int length = patternInfo.length(flags) + (prependSign ? 1 : 0);
// Finally, set the result into the StringBuilder.
output.remove();
for (int index = 0; index < length; index++) {
char16_t candidate;
if (prependSign && index == 0) {
candidate = u'-';
} else if (prependSign) {
candidate = patternInfo.charAt(flags, index - 1);
} else {
candidate = patternInfo.charAt(flags, index);
}
if (plusReplacesMinusSign && candidate == u'-') {
candidate = u'+';
}
if (perMilleReplacesPercent && candidate == u'%') {
candidate = u'';
}
output.append(candidate);
}
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -62,17 +62,18 @@ struct U_I18N_API ParsedPatternInfo : public AffixPatternProvider, public UMemor
ParsedSubpatternInfo positive;
ParsedSubpatternInfo negative;
ParsedPatternInfo() : state(this->pattern), currentSubpattern(nullptr) {}
ParsedPatternInfo()
: state(this->pattern), currentSubpattern(nullptr) {}
~ParsedPatternInfo() U_OVERRIDE = default;
static int32_t getLengthFromEndpoints(const Endpoints &endpoints);
static int32_t getLengthFromEndpoints(const Endpoints& endpoints);
char16_t charAt(int32_t flags, int32_t index) const U_OVERRIDE;
int32_t length(int32_t flags) const U_OVERRIDE;
UnicodeString getString(int32_t flags) const;
UnicodeString getString(int32_t flags) const U_OVERRIDE;
bool positiveHasPlusSign() const U_OVERRIDE;
@ -82,16 +83,17 @@ struct U_I18N_API ParsedPatternInfo : public AffixPatternProvider, public UMemor
bool hasCurrencySign() const U_OVERRIDE;
bool containsSymbolType(AffixPatternType type, UErrorCode &status) const U_OVERRIDE;
bool containsSymbolType(AffixPatternType type, UErrorCode& status) const U_OVERRIDE;
bool hasBody() const U_OVERRIDE;
private:
struct U_I18N_API ParserState {
const UnicodeString &pattern; // reference to the parent
const UnicodeString& pattern; // reference to the parent
int32_t offset = 0;
explicit ParserState(const UnicodeString &_pattern) : pattern(_pattern) {};
explicit ParserState(const UnicodeString& _pattern)
: pattern(_pattern) {};
UChar32 peek();
@ -99,41 +101,40 @@ struct U_I18N_API ParsedPatternInfo : public AffixPatternProvider, public UMemor
// TODO: We don't currently do anything with the message string.
// This method is here as a shell for Java compatibility.
inline void toParseException(const char16_t *message) { (void)message; }
}
state;
inline void toParseException(const char16_t* message) { (void) message; }
} state;
// NOTE: In Java, these are written as pure functions.
// In C++, they're written as methods.
// The behavior is the same.
// Mutable transient pointer:
ParsedSubpatternInfo *currentSubpattern;
ParsedSubpatternInfo* currentSubpattern;
// In Java, "negative == null" tells us whether or not we had a negative subpattern.
// In C++, we need to remember in another boolean.
bool fHasNegativeSubpattern = false;
const Endpoints &getEndpoints(int32_t flags) const;
const Endpoints& getEndpoints(int32_t flags) const;
/** Run the recursive descent parser. */
void consumePattern(const UnicodeString &patternString, UErrorCode &status);
void consumePattern(const UnicodeString& patternString, UErrorCode& status);
void consumeSubpattern(UErrorCode &status);
void consumeSubpattern(UErrorCode& status);
void consumePadding(PadPosition paddingLocation, UErrorCode &status);
void consumePadding(PadPosition paddingLocation, UErrorCode& status);
void consumeAffix(Endpoints &endpoints, UErrorCode &status);
void consumeAffix(Endpoints& endpoints, UErrorCode& status);
void consumeLiteral(UErrorCode &status);
void consumeLiteral(UErrorCode& status);
void consumeFormat(UErrorCode &status);
void consumeFormat(UErrorCode& status);
void consumeIntegerFormat(UErrorCode &status);
void consumeIntegerFormat(UErrorCode& status);
void consumeFractionFormat(UErrorCode &status);
void consumeFractionFormat(UErrorCode& status);
void consumeExponent(UErrorCode &status);
void consumeExponent(UErrorCode& status);
friend class PatternParser;
};
@ -153,8 +154,8 @@ class U_I18N_API PatternParser {
* The LDML decimal format pattern (Excel-style pattern) to parse.
* @return The results of the parse.
*/
static void
parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo &patternInfo, UErrorCode &status);
static void parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo,
UErrorCode& status);
enum IgnoreRounding {
IGNORE_ROUNDING_NEVER = 0, IGNORE_ROUNDING_IF_CURRENCY = 1, IGNORE_ROUNDING_ALWAYS = 2
@ -173,8 +174,8 @@ class U_I18N_API PatternParser {
* @throws IllegalArgumentException
* If there is a syntax error in the pattern string.
*/
static DecimalFormatProperties
parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding, UErrorCode &status);
static DecimalFormatProperties parseToProperties(const UnicodeString& pattern,
IgnoreRounding ignoreRounding, UErrorCode& status);
/**
* Parses a pattern string into an existing property bag. All properties that can be encoded into a pattern string
@ -190,18 +191,19 @@ class U_I18N_API PatternParser {
* @throws IllegalArgumentException
* If there was a syntax error in the pattern string.
*/
static void parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties,
IgnoreRounding ignoreRounding, UErrorCode &status);
static void parseToExistingProperties(const UnicodeString& pattern,
DecimalFormatProperties& properties,
IgnoreRounding ignoreRounding, UErrorCode& status);
private:
static void
parseToExistingPropertiesImpl(const UnicodeString& pattern, DecimalFormatProperties &properties,
IgnoreRounding ignoreRounding, UErrorCode &status);
static void parseToExistingPropertiesImpl(const UnicodeString& pattern,
DecimalFormatProperties& properties,
IgnoreRounding ignoreRounding, UErrorCode& status);
/** Finalizes the temporary data stored in the ParsedPatternInfo to the Properties. */
static void
patternInfoToProperties(DecimalFormatProperties &properties, ParsedPatternInfo& patternInfo,
IgnoreRounding _ignoreRounding, UErrorCode &status);
static void patternInfoToProperties(DecimalFormatProperties& properties,
ParsedPatternInfo& patternInfo, IgnoreRounding _ignoreRounding,
UErrorCode& status);
};
class U_I18N_API PatternStringUtils {
@ -217,8 +219,8 @@ class U_I18N_API PatternStringUtils {
* The property bag to serialize.
* @return A pattern string approximately serializing the property bag.
*/
static UnicodeString
propertiesToPatternString(const DecimalFormatProperties &properties, UErrorCode &status);
static UnicodeString propertiesToPatternString(const DecimalFormatProperties& properties,
UErrorCode& status);
/**
@ -248,14 +250,23 @@ class U_I18N_API PatternStringUtils {
* notation.
* @return The pattern expressed in the other notation.
*/
static UnicodeString
convertLocalized(UnicodeString input, DecimalFormatSymbols symbols, bool toLocalized,
UErrorCode &status);
static UnicodeString convertLocalized(UnicodeString input, DecimalFormatSymbols symbols,
bool toLocalized, UErrorCode& status);
/**
* This method contains the heart of the logic for rendering LDML affix strings. It handles
* sign-always-shown resolution, whether to use the positive or negative subpattern, permille
* substitution, and plural forms for CurrencyPluralInfo.
*/
static void patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix,
int8_t signum, UNumberSignDisplay signDisplay,
StandardPlural::Form plural, bool perMilleReplacesPercent,
UnicodeString& output);
private:
/** @return The number of chars inserted. */
static int
escapePaddingString(UnicodeString input, UnicodeString &output, int startIndex, UErrorCode &status);
static int escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex,
UErrorCode& status);
};
} // namespace impl

View file

@ -16,8 +16,7 @@
#include "uassert.h"
#include "unicode/platform.h"
U_NAMESPACE_BEGIN
namespace number {
U_NAMESPACE_BEGIN namespace number {
namespace impl {
// Typedef several enums for brevity and for easier comparison to Java.
@ -87,15 +86,14 @@ enum AffixPatternType {
};
enum CompactType {
TYPE_DECIMAL,
TYPE_CURRENCY
TYPE_DECIMAL, TYPE_CURRENCY
};
// TODO: Should this be moved somewhere else, maybe where other ICU classes can use it?
// Exported as U_I18N_API because it is a base class for other exported types
class U_I18N_API CharSequence {
public:
public:
virtual ~CharSequence() = default;
virtual int32_t length() const = 0;
@ -123,12 +121,20 @@ class U_I18N_API AffixPatternProvider {
static const int32_t AFFIX_NEGATIVE_SUBPATTERN = 0x200;
static const int32_t AFFIX_PADDING = 0x400;
// Convenience compound flags
static const int32_t AFFIX_POS_PREFIX = AFFIX_PREFIX;
static const int32_t AFFIX_POS_SUFFIX = 0;
static const int32_t AFFIX_NEG_PREFIX = AFFIX_PREFIX | AFFIX_NEGATIVE_SUBPATTERN;
static const int32_t AFFIX_NEG_SUFFIX = AFFIX_NEGATIVE_SUBPATTERN;
virtual ~AffixPatternProvider() = default;
virtual char16_t charAt(int flags, int i) const = 0;
virtual int length(int flags) const = 0;
virtual UnicodeString getString(int flags) const = 0;
virtual bool hasCurrencySign() const = 0;
virtual bool positiveHasPlusSign() const = 0;
@ -137,7 +143,7 @@ class U_I18N_API AffixPatternProvider {
virtual bool negativeHasMinusSign() const = 0;
virtual bool containsSymbolType(AffixPatternType, UErrorCode &) const = 0;
virtual bool containsSymbolType(AffixPatternType, UErrorCode&) const = 0;
/**
* True if the pattern has a number placeholder like "0" or "#,##0.00"; false if the pattern does not
@ -173,8 +179,8 @@ class U_I18N_API Modifier {
* formatted.
* @return The number of characters (UTF-16 code units) that were added to the string builder.
*/
virtual int32_t
apply(NumberStringBuilder &output, int leftIndex, int rightIndex, UErrorCode &status) const = 0;
virtual int32_t apply(NumberStringBuilder& output, int leftIndex, int rightIndex,
UErrorCode& status) const = 0;
/**
* Gets the length of the prefix. This information can be used in combination with {@link #apply} to extract the
@ -187,7 +193,7 @@ class U_I18N_API Modifier {
/**
* Returns the number of code points in the modifier, prefix plus suffix.
*/
virtual int32_t getCodePointCount(UErrorCode &status) const = 0;
virtual int32_t getCodePointCount(UErrorCode& status) const = 0;
/**
* Whether this modifier is strong. If a modifier is strong, it should always be applied immediately and not allowed
@ -230,7 +236,8 @@ class U_I18N_API MicroPropsGenerator {
* The MicroProps instance to populate.
* @return A MicroProps instance resolved for the quantity.
*/
virtual void processQuantity(DecimalQuantity& quantity, MicroProps& micros, UErrorCode& status) const = 0;
virtual void processQuantity(DecimalQuantity& quantity, MicroProps& micros,
UErrorCode& status) const = 0;
};
/**
@ -255,24 +262,25 @@ class MultiplierProducer {
template<typename T>
class U_I18N_API NullableValue {
public:
NullableValue() : fNull(true) {}
NullableValue()
: fNull(true) {}
NullableValue(const NullableValue<T> &other) = default;
NullableValue(const NullableValue<T>& other) = default;
explicit NullableValue(const T &other) {
explicit NullableValue(const T& other) {
fValue = other;
fNull = false;
}
NullableValue<T> &operator=(const NullableValue<T> &other) = default;
NullableValue<T>& operator=(const NullableValue<T>& other) = default;
NullableValue<T> &operator=(const T &other) {
NullableValue<T>& operator=(const T& other) {
fValue = other;
fNull = false;
return *this;
}
bool operator==(const NullableValue &other) const {
bool operator==(const NullableValue& other) const {
// "fValue == other.fValue" returns UBool, not bool (causes compiler warnings)
return fNull ? other.fNull : (other.fNull ? false : static_cast<bool>(fValue == other.fValue));
}
@ -286,7 +294,7 @@ class U_I18N_API NullableValue {
return fNull;
}
T get(UErrorCode &status) const {
T get(UErrorCode& status) const {
if (fNull) {
status = U_UNDEFINED_VARIABLE;
}

View file

@ -87,10 +87,10 @@ AffixPatternMatcher AffixPatternMatcherBuilder::build() {
AffixTokenMatcherWarehouse::AffixTokenMatcherWarehouse(const UChar* currencyCode,
const UnicodeString& currency1,
const UnicodeString& currency2,
const DecimalFormatSymbols& dfs,
IgnorablesMatcher* ignorables, const Locale& locale)
const UnicodeString* currency1,
const UnicodeString* currency2,
const DecimalFormatSymbols* dfs,
IgnorablesMatcher* ignorables, const Locale* locale)
: currency1(currency1),
currency2(currency2),
dfs(dfs),
@ -109,23 +109,23 @@ AffixTokenMatcherWarehouse::~AffixTokenMatcherWarehouse() {
}
NumberParseMatcher& AffixTokenMatcherWarehouse::minusSign() {
return fMinusSign = {dfs, true};
return fMinusSign = {*dfs, true};
}
NumberParseMatcher& AffixTokenMatcherWarehouse::plusSign() {
return fPlusSign = {dfs, true};
return fPlusSign = {*dfs, true};
}
NumberParseMatcher& AffixTokenMatcherWarehouse::percent() {
return fPercent = {dfs};
return fPercent = {*dfs};
}
NumberParseMatcher& AffixTokenMatcherWarehouse::permille() {
return fPermille = {dfs};
return fPermille = {*dfs};
}
NumberParseMatcher& AffixTokenMatcherWarehouse::currency(UErrorCode& status) {
return fCurrency = {{locale, status}, {currencyCode, currency1, currency2}};
return fCurrency = {{*locale, status}, {currencyCode, *currency1, *currency2}};
}
NumberParseMatcher& AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp) {
@ -193,8 +193,232 @@ AffixPatternMatcher AffixPatternMatcher::fromAffixPattern(const UnicodeString& a
AffixPatternMatcher::AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen,
const UnicodeString& pattern)
: ArraySeriesMatcher(matchers, matchersLen), fPattern(pattern) {
: ArraySeriesMatcher(matchers, matchersLen), fPattern(pattern) {}
UnicodeString AffixPatternMatcher::getPattern() const {
return fPattern.toAliasedUnicodeString();
}
bool AffixPatternMatcher::operator==(const AffixPatternMatcher& other) const {
return fPattern == other.fPattern;
}
AffixMatcherWarehouse::AffixMatcherWarehouse(const AffixPatternProvider& patternInfo,
NumberParserImpl& output,
AffixTokenMatcherWarehouse& warehouse,
const IgnorablesMatcher& ignorables, parse_flags_t parseFlags,
UErrorCode& status)
: fAffixTokenMatcherWarehouse(std::move(warehouse)) {
if (!isInteresting(patternInfo, ignorables, parseFlags, status)) {
return;
}
// The affixes have interesting characters, or we are in strict mode.
// Use initial capacity of 6, the highest possible number of AffixMatchers.
UnicodeString sb;
bool includeUnpaired = 0 != (parseFlags & PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES);
UNumberSignDisplay signDisplay = (0 != (parseFlags & PARSE_FLAG_PLUS_SIGN_ALLOWED)) ? UNUM_SIGN_ALWAYS
: UNUM_SIGN_NEVER;
int32_t numAffixMatchers = 0;
int32_t numAffixPatternMatchers = 0;
AffixPatternMatcher* posPrefix = nullptr;
AffixPatternMatcher* posSuffix = nullptr;
// Pre-process the affix strings to resolve LDML rules like sign display.
for (int8_t signum = 1; signum >= -1; signum--) {
// Generate Prefix
bool hasPrefix = false;
PatternStringUtils::patternInfoToStringBuilder(
patternInfo, true, signum, signDisplay, StandardPlural::OTHER, false, sb);
fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
sb, warehouse, parseFlags, &hasPrefix, status);
AffixPatternMatcher* prefix = hasPrefix ? &fAffixPatternMatchers[numAffixPatternMatchers++]
: nullptr;
// Generate Suffix
bool hasSuffix = false;
PatternStringUtils::patternInfoToStringBuilder(
patternInfo, false, signum, signDisplay, StandardPlural::OTHER, false, sb);
fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
sb, warehouse, parseFlags, &hasSuffix, status);
AffixPatternMatcher* suffix = hasSuffix ? &fAffixPatternMatchers[numAffixPatternMatchers++]
: nullptr;
if (signum == 1) {
posPrefix = prefix;
posSuffix = suffix;
} else if (equals(prefix, posPrefix) && equals(suffix, posSuffix)) {
// Skip adding these matchers (we already have equivalents)
continue;
}
// Flags for setting in the ParsedNumber
int flags = (signum == -1) ? FLAG_NEGATIVE : 0;
// Note: it is indeed possible for posPrefix and posSuffix to both be null.
// We still need to add that matcher for strict mode to work.
fAffixMatchers[numAffixMatchers++] = {prefix, suffix, flags};
if (includeUnpaired && prefix != nullptr && suffix != nullptr) {
// The following if statements are designed to prevent adding two identical matchers.
if (signum == 1 || equals(prefix, posPrefix)) {
fAffixMatchers[numAffixMatchers++] = {prefix, nullptr, flags};
}
if (signum == 1 || equals(suffix, posSuffix)) {
fAffixMatchers[numAffixMatchers++] = {nullptr, suffix, flags};
}
}
}
// Put the AffixMatchers in order, and then add them to the output.
// TODO
// Collections.sort(matchers, COMPARATOR);
// output.addMatchers(matchers);
}
bool AffixMatcherWarehouse::isInteresting(const AffixPatternProvider& patternInfo,
const IgnorablesMatcher& ignorables, parse_flags_t parseFlags,
UErrorCode& status) {
UnicodeStringCharSequence posPrefixString(patternInfo.getString(AffixPatternProvider::AFFIX_POS_PREFIX));
UnicodeStringCharSequence posSuffixString(patternInfo.getString(AffixPatternProvider::AFFIX_POS_SUFFIX));
UnicodeStringCharSequence negPrefixString(UnicodeString(u""));
UnicodeStringCharSequence negSuffixString(UnicodeString(u""));
if (patternInfo.hasNegativeSubpattern()) {
negPrefixString = UnicodeStringCharSequence(patternInfo.getString(AffixPatternProvider::AFFIX_NEG_PREFIX));
negSuffixString = UnicodeStringCharSequence(patternInfo.getString(AffixPatternProvider::AFFIX_NEG_SUFFIX));
}
if (0 == (parseFlags & PARSE_FLAG_USE_FULL_AFFIXES) &&
AffixUtils::containsOnlySymbolsAndIgnorables(posPrefixString, *ignorables.getSet(), status) &&
AffixUtils::containsOnlySymbolsAndIgnorables(posSuffixString, *ignorables.getSet(), status) &&
AffixUtils::containsOnlySymbolsAndIgnorables(negPrefixString, *ignorables.getSet(), status) &&
AffixUtils::containsOnlySymbolsAndIgnorables(negSuffixString, *ignorables.getSet(), status)
// HACK: Plus and minus sign are a special case: we accept them trailing only if they are
// trailing in the pattern string.
&& !AffixUtils::containsType(posSuffixString, TYPE_PLUS_SIGN, status) &&
!AffixUtils::containsType(posSuffixString, TYPE_MINUS_SIGN, status) &&
!AffixUtils::containsType(negSuffixString, TYPE_PLUS_SIGN, status) &&
!AffixUtils::containsType(negSuffixString, TYPE_MINUS_SIGN, status)) {
// The affixes contain only symbols and ignorables.
// No need to generate affix matchers.
return false;
}
return true;
}
bool AffixMatcherWarehouse::equals(const AffixPatternMatcher* lhs, const AffixPatternMatcher* rhs) {
if (lhs == nullptr && rhs == nullptr) {
return true;
}
if (lhs == nullptr || rhs == nullptr) {
return false;
}
return *lhs == *rhs;
}
AffixMatcher::AffixMatcher(AffixPatternMatcher* prefix, AffixPatternMatcher* suffix, result_flags_t flags)
: fPrefix(prefix), fSuffix(suffix), fFlags(flags) {}
bool AffixMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
if (!result.seenNumber()) {
// Prefix
// Do not match if:
// 1. We have already seen a prefix (result.prefix != null)
// 2. The prefix in this AffixMatcher is empty (prefix == null)
if (!result.prefix.isBogus() || fPrefix == nullptr) {
return false;
}
// Attempt to match the prefix.
int initialOffset = segment.getOffset();
bool maybeMore = fPrefix->match(segment, result, status);
if (initialOffset != segment.getOffset()) {
result.prefix = fPrefix->getPattern();
}
return maybeMore;
} else {
// Suffix
// Do not match if:
// 1. We have already seen a suffix (result.suffix != null)
// 2. The suffix in this AffixMatcher is empty (suffix == null)
// 3. The matched prefix does not equal this AffixMatcher's prefix
if (!result.suffix.isBogus() || fSuffix == nullptr || !matched(fPrefix, result.prefix)) {
return false;
}
// Attempt to match the suffix.
int initialOffset = segment.getOffset();
bool maybeMore = fSuffix->match(segment, result, status);
if (initialOffset != segment.getOffset()) {
result.suffix = fSuffix->getPattern();
}
return maybeMore;
}
}
const UnicodeSet& AffixMatcher::getLeadCodePoints() {
if (fLocalLeadCodePoints.isNull()) {
auto* leadCodePoints = new UnicodeSet();
if (fPrefix != nullptr) {
leadCodePoints->addAll(fPrefix->getLeadCodePoints());
}
if (fSuffix != nullptr) {
leadCodePoints->addAll(fSuffix->getLeadCodePoints());
}
leadCodePoints->freeze();
fLocalLeadCodePoints.adoptInstead(leadCodePoints);
}
return *fLocalLeadCodePoints;
}
void AffixMatcher::postProcess(ParsedNumber& result) const {
// Check to see if our affix is the one that was matched. If so, set the flags in the result.
if (matched(fPrefix, result.prefix) && matched(fSuffix, result.suffix)) {
// Fill in the result prefix and suffix with non-null values (empty string).
// Used by strict mode to determine whether an entire affix pair was matched.
if (result.prefix.isBogus()) {
result.prefix = UnicodeString();
}
if (result.suffix.isBogus()) {
result.suffix = UnicodeString();
}
result.flags |= fFlags;
}
}
bool AffixMatcher::matched(const AffixPatternMatcher* affix, const UnicodeString& patternString) {
return (affix == nullptr && patternString.isBogus()) ||
(affix != nullptr && affix->getPattern() == patternString);
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -19,6 +19,9 @@ namespace impl {
class AffixPatternMatcherBuilder;
class AffixPatternMatcher;
using ::icu::number::impl::AffixPatternProvider;
using ::icu::number::impl::TokenConsumer;
class CodePointMatcher : public NumberParseMatcher, public UMemory {
public:
@ -51,9 +54,13 @@ class AffixTokenMatcherWarehouse {
static constexpr int32_t CODE_POINT_BATCH_SIZE = 10; // Number of entries per heap allocation
public:
AffixTokenMatcherWarehouse(const UChar* currencyCode, const UnicodeString& currency1,
const UnicodeString& currency2, const DecimalFormatSymbols& dfs,
IgnorablesMatcher* ignorables, const Locale& locale);
AffixTokenMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state
AffixTokenMatcherWarehouse(const UChar* currencyCode, const UnicodeString* currency1,
const UnicodeString* currency2, const DecimalFormatSymbols* dfs,
IgnorablesMatcher* ignorables, const Locale* locale);
AffixTokenMatcherWarehouse(AffixTokenMatcherWarehouse&& src) = default;
~AffixTokenMatcherWarehouse();
@ -70,12 +77,13 @@ class AffixTokenMatcherWarehouse {
NumberParseMatcher& nextCodePointMatcher(UChar32 cp);
private:
// NOTE: The following fields may be unsafe to access after construction is done!
UChar currencyCode[4];
const UnicodeString& currency1;
const UnicodeString& currency2;
const DecimalFormatSymbols& dfs;
const UnicodeString* currency1;
const UnicodeString* currency2;
const DecimalFormatSymbols* dfs;
IgnorablesMatcher* ignorables;
const Locale locale;
const Locale* locale;
// NOTE: These are default-constructed and should not be used until initialized.
MinusSignMatcher fMinusSign;
@ -94,7 +102,7 @@ class AffixTokenMatcherWarehouse {
};
class AffixPatternMatcherBuilder : public ::icu::number::impl::TokenConsumer {
class AffixPatternMatcherBuilder : public TokenConsumer {
public:
AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherWarehouse& warehouse,
IgnorablesMatcher* ignorables);
@ -119,15 +127,19 @@ class AffixPatternMatcherBuilder : public ::icu::number::impl::TokenConsumer {
class AffixPatternMatcher : public ArraySeriesMatcher {
public:
AffixPatternMatcher() = default; // WARNING: Leaves the object in an unusable state
static AffixPatternMatcher fromAffixPattern(const UnicodeString& affixPattern,
AffixTokenMatcherWarehouse& warehouse,
parse_flags_t parseFlags, bool* success,
UErrorCode& status);
private:
UnicodeString fPattern;
UnicodeString getPattern() const;
AffixPatternMatcher() = default; // WARNING: Leaves the object in an unusable state
bool operator==(const AffixPatternMatcher& other) const;
private:
CompactUnicodeString<4> fPattern;
AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern);
@ -135,6 +147,65 @@ class AffixPatternMatcher : public ArraySeriesMatcher {
};
class AffixMatcher : public NumberParseMatcher, public UMemory {
public:
AffixMatcher() = default; // WARNING: Leaves the object in an unusable state
AffixMatcher(AffixPatternMatcher* prefix, AffixPatternMatcher* suffix, result_flags_t flags);
// static void createMatchers() is the constructor for AffixMatcherWarehouse in C++
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
void postProcess(ParsedNumber& result) const override;
const UnicodeSet& getLeadCodePoints() override;
private:
AffixPatternMatcher* fPrefix;
AffixPatternMatcher* fSuffix;
result_flags_t fFlags;
/**
* Helper method to return whether the given AffixPatternMatcher equals the given pattern string.
* Either both arguments must be null or the pattern string inside the AffixPatternMatcher must equal
* the given pattern string.
*/
static bool matched(const AffixPatternMatcher* affix, const UnicodeString& patternString);
};
/**
* A C++-only class to retain ownership of the AffixMatchers needed for parsing.
*/
class AffixMatcherWarehouse {
public:
AffixMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state
// in Java, this is AffixMatcher#createMatchers()
AffixMatcherWarehouse(const AffixPatternProvider& patternInfo, NumberParserImpl& output,
AffixTokenMatcherWarehouse& warehouse, const IgnorablesMatcher& ignorables,
parse_flags_t parseFlags, UErrorCode& status);
private:
// 9 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix
AffixMatcher fAffixMatchers[9];
// 6 is the limit: positive, zero, and negative, a prefix and a suffix for each
AffixPatternMatcher fAffixPatternMatchers[6];
// Store all the tokens used by the AffixPatternMatchers
AffixTokenMatcherWarehouse fAffixTokenMatcherWarehouse;
static bool isInteresting(const AffixPatternProvider& patternInfo, const IgnorablesMatcher& ignorables,
parse_flags_t parseFlags, UErrorCode& status);
/**
* Helper method to return whether (1) both lhs and rhs are null/invalid, or (2) if they are both
* valid, whether they are equal according to operator==. Similar to Java Objects.equals()
*/
static bool equals(const AffixPatternMatcher* lhs, const AffixPatternMatcher* rhs);
};
} // namespace impl
} // namespace numparse
U_NAMESPACE_END

View file

@ -23,7 +23,7 @@ SymbolMatcher::SymbolMatcher(const UnicodeString& symbolString, unisets::Key key
}
}
const UnicodeSet* SymbolMatcher::getSet() {
const UnicodeSet* SymbolMatcher::getSet() const {
return fUniSet;
}

View file

@ -24,7 +24,7 @@ class SymbolMatcher : public NumberParseMatcher, public UMemory {
public:
SymbolMatcher() = default; // WARNING: Leaves the object in an unusable state
const UnicodeSet* getSet();
const UnicodeSet* getSet() const;
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;

View file

@ -48,11 +48,35 @@ enum ParseFlags {
PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400,
};
//template<typename T>
//struct MaybeNeedsAdoption {
// T* ptr;
// bool needsAdoption;
//};
// TODO: Is this class worthwhile?
template<int32_t stackCapacity>
class CompactUnicodeString {
public:
CompactUnicodeString() {
static_assert(stackCapacity > 0, "cannot have zero space on stack");
fBuffer[0] = 0;
}
CompactUnicodeString(const UnicodeString& text)
: fBuffer(text.length() + 1) {
memcpy(fBuffer.getAlias(), text.getBuffer(), sizeof(UChar) * text.length());
fBuffer[text.length()] = 0;
}
inline UnicodeString toAliasedUnicodeString() const {
return UnicodeString(TRUE, fBuffer.getAlias(), -1);
}
bool operator==(const CompactUnicodeString& other) const {
// Use the alias-only constructor and then call UnicodeString operator==
return toAliasedUnicodeString() == other.toAliasedUnicodeString();
}
private:
MaybeStackArray<UChar, stackCapacity> fBuffer;
};
/**
* Struct-like class to hold the results of a parsing routine.

View file

@ -215,8 +215,12 @@ void NumberParserTest::testSeriesMatcher() {
void NumberParserTest::testCurrencyAnyMatcher() {
IcuTestErrorCode status(*this, "testCurrencyAnyMatcher");
UnicodeString currency1(u"IU$");
UnicodeString currency2(u"ICU");
DecimalFormatSymbols symbols("en", status);
IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
AffixTokenMatcherWarehouse warehouse(u"ICU", u"IU$", u"ICU", {"en",status}, &ignorables, "en");
Locale locale("en");
AffixTokenMatcherWarehouse warehouse(u"ICU", &currency1, &currency2, &symbols, &ignorables, &locale);
NumberParseMatcher& matcher = warehouse.currency(status);
static const struct TestCase{
@ -248,8 +252,12 @@ void NumberParserTest::testCurrencyAnyMatcher() {
void NumberParserTest::testAffixPatternMatcher() {
IcuTestErrorCode status(*this, "testAffixPatternMatcher");
UnicodeString currency1(u"foo");
UnicodeString currency2(u"bar");
DecimalFormatSymbols symbols("en", status);
IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
AffixTokenMatcherWarehouse warehouse(u"EUR", u"foo", u"bar", {"en", status}, &ignorables, "en");
Locale locale("en");
AffixTokenMatcherWarehouse warehouse(u"EUR", &currency1, &currency2, &symbols, &ignorables, &locale);
static const struct TestCase {
bool exactMatch;