mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 06:25:30 +00:00
ICU-13574 Checkpoint commit. AffixMatcher is mostly implemented.
X-SVN-Rev: 40894
This commit is contained in:
parent
a335b723c7
commit
afbb37febd
13 changed files with 663 additions and 297 deletions
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include "number_affixutils.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
using namespace icu;
|
||||
using namespace icu::number;
|
||||
|
@ -239,6 +240,22 @@ UnicodeString AffixUtils::replaceType(const CharSequence &affixPattern, AffixPat
|
|||
return output;
|
||||
}
|
||||
|
||||
bool AffixUtils::containsOnlySymbolsAndIgnorables(const CharSequence& affixPattern,
|
||||
const UnicodeSet& ignorables, UErrorCode& status) {
|
||||
if (affixPattern.length() == 0) {
|
||||
return true;
|
||||
};
|
||||
AffixTag tag;
|
||||
while (hasNext(tag, affixPattern)) {
|
||||
tag = nextToken(tag, affixPattern, status);
|
||||
if (U_FAILURE(status)) { return false; }
|
||||
if (tag.type == TYPE_CODEPOINT && !ignorables.contains(tag.codePoint)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void AffixUtils::iterateWithConsumer(const CharSequence& affixPattern, TokenConsumer& consumer,
|
||||
UErrorCode& status) {
|
||||
if (affixPattern.length() == 0) {
|
||||
|
|
|
@ -37,13 +37,14 @@ struct AffixTag {
|
|||
AffixPatternState state;
|
||||
AffixPatternType type;
|
||||
|
||||
AffixTag() : offset(0), state(STATE_BASE) {}
|
||||
AffixTag()
|
||||
: offset(0), state(STATE_BASE) {}
|
||||
|
||||
AffixTag(int32_t offset) : offset(offset) {}
|
||||
AffixTag(int32_t offset)
|
||||
: offset(offset) {}
|
||||
|
||||
AffixTag(int32_t offset, UChar32 codePoint, AffixPatternState state, AffixPatternType type)
|
||||
: offset(offset), codePoint(codePoint), state(state), type(type)
|
||||
{}
|
||||
: offset(offset), codePoint(codePoint), state(state), type(type) {}
|
||||
};
|
||||
|
||||
class TokenConsumer {
|
||||
|
@ -112,7 +113,7 @@ class U_I18N_API AffixUtils {
|
|||
* @param patternString The original string whose width will be estimated.
|
||||
* @return The length of the unescaped string.
|
||||
*/
|
||||
static int32_t estimateLength(const CharSequence &patternString, UErrorCode &status);
|
||||
static int32_t estimateLength(const CharSequence& patternString, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Takes a string and escapes (quotes) characters that have special meaning in the affix pattern
|
||||
|
@ -123,7 +124,7 @@ class U_I18N_API AffixUtils {
|
|||
* @param input The string to be escaped.
|
||||
* @return The resulting UnicodeString.
|
||||
*/
|
||||
static UnicodeString escape(const CharSequence &input);
|
||||
static UnicodeString escape(const CharSequence& input);
|
||||
|
||||
static Field getFieldForType(AffixPatternType type);
|
||||
|
||||
|
@ -139,9 +140,8 @@ class U_I18N_API AffixUtils {
|
|||
* @param position The index into the NumberStringBuilder to insert the string.
|
||||
* @param provider An object to generate locale symbols.
|
||||
*/
|
||||
static int32_t
|
||||
unescape(const CharSequence &affixPattern, NumberStringBuilder &output, int32_t position,
|
||||
const SymbolProvider &provider, UErrorCode &status);
|
||||
static int32_t unescape(const CharSequence& affixPattern, NumberStringBuilder& output,
|
||||
int32_t position, const SymbolProvider& provider, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Sames as {@link #unescape}, but only calculates the code point count. More efficient than {@link #unescape}
|
||||
|
@ -151,8 +151,8 @@ class U_I18N_API AffixUtils {
|
|||
* @param provider An object to generate locale symbols.
|
||||
* @return The same return value as if you called {@link #unescape}.
|
||||
*/
|
||||
static int32_t unescapedCodePointCount(const CharSequence &affixPattern,
|
||||
const SymbolProvider &provider, UErrorCode &status);
|
||||
static int32_t unescapedCodePointCount(const CharSequence& affixPattern,
|
||||
const SymbolProvider& provider, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Checks whether the given affix pattern contains at least one token of the given type, which is
|
||||
|
@ -162,8 +162,7 @@ class U_I18N_API AffixUtils {
|
|||
* @param type The token type.
|
||||
* @return true if the affix pattern contains the given token type; false otherwise.
|
||||
*/
|
||||
static bool
|
||||
containsType(const CharSequence &affixPattern, AffixPatternType type, UErrorCode &status);
|
||||
static bool containsType(const CharSequence& affixPattern, AffixPatternType type, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Checks whether the specified affix pattern has any unquoted currency symbols ("¤").
|
||||
|
@ -171,7 +170,7 @@ class U_I18N_API AffixUtils {
|
|||
* @param affixPattern The string to check for currency symbols.
|
||||
* @return true if the literal has at least one unquoted currency symbol; false otherwise.
|
||||
*/
|
||||
static bool hasCurrencySymbols(const CharSequence &affixPattern, UErrorCode &status);
|
||||
static bool hasCurrencySymbols(const CharSequence& affixPattern, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Replaces all occurrences of tokens with the given type with the given replacement char.
|
||||
|
@ -181,9 +180,15 @@ class U_I18N_API AffixUtils {
|
|||
* @param replacementChar The char to substitute in place of chars of the given token type.
|
||||
* @return A string containing the new affix pattern.
|
||||
*/
|
||||
static UnicodeString
|
||||
replaceType(const CharSequence &affixPattern, AffixPatternType type, char16_t replacementChar,
|
||||
UErrorCode &status);
|
||||
static UnicodeString replaceType(const CharSequence& affixPattern, AffixPatternType type,
|
||||
char16_t replacementChar, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Returns whether the given affix pattern contains only symbols and ignorables as defined by the
|
||||
* given ignorables set.
|
||||
*/
|
||||
static bool containsOnlySymbolsAndIgnorables(const CharSequence& affixPattern,
|
||||
const UnicodeSet& ignorables, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Iterates over the affix pattern, calling the TokenConsumer for each token.
|
||||
|
@ -201,7 +206,7 @@ class U_I18N_API AffixUtils {
|
|||
* (never negative), or -1 if there were no more tokens in the affix pattern.
|
||||
* @see #hasNext
|
||||
*/
|
||||
static AffixTag nextToken(AffixTag tag, const CharSequence &patternString, UErrorCode &status);
|
||||
static AffixTag nextToken(AffixTag tag, const CharSequence& patternString, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Returns whether the affix pattern string has any more tokens to be retrieved from a call to
|
||||
|
@ -211,7 +216,7 @@ class U_I18N_API AffixUtils {
|
|||
* @param string The affix pattern.
|
||||
* @return true if there are more tokens to consume; false otherwise.
|
||||
*/
|
||||
static bool hasNext(const AffixTag &tag, const CharSequence &string);
|
||||
static bool hasNext(const AffixTag& tag, const CharSequence& string);
|
||||
|
||||
private:
|
||||
/**
|
||||
|
@ -219,8 +224,8 @@ class U_I18N_API AffixUtils {
|
|||
* The order of the arguments is consistent with Java, but the order of the stored
|
||||
* fields is not necessarily the same.
|
||||
*/
|
||||
static inline AffixTag
|
||||
makeTag(int32_t offset, AffixPatternType type, AffixPatternState state, UChar32 cp) {
|
||||
static inline AffixTag makeTag(int32_t offset, AffixPatternType type, AffixPatternState state,
|
||||
UChar32 cp) {
|
||||
return {offset, cp, state, type};
|
||||
}
|
||||
};
|
||||
|
|
|
@ -15,9 +15,10 @@ using namespace icu;
|
|||
using namespace icu::number;
|
||||
using namespace icu::number::impl;
|
||||
|
||||
MutablePatternModifier::MutablePatternModifier(bool isStrong) : fStrong(isStrong) {}
|
||||
MutablePatternModifier::MutablePatternModifier(bool isStrong)
|
||||
: fStrong(isStrong) {}
|
||||
|
||||
void MutablePatternModifier::setPatternInfo(const AffixPatternProvider *patternInfo) {
|
||||
void MutablePatternModifier::setPatternInfo(const AffixPatternProvider* patternInfo) {
|
||||
this->patternInfo = patternInfo;
|
||||
}
|
||||
|
||||
|
@ -26,12 +27,11 @@ void MutablePatternModifier::setPatternAttributes(UNumberSignDisplay signDisplay
|
|||
this->perMilleReplacesPercent = perMille;
|
||||
}
|
||||
|
||||
void
|
||||
MutablePatternModifier::setSymbols(const DecimalFormatSymbols *symbols, const CurrencyUnit ¤cy,
|
||||
const UNumberUnitWidth unitWidth, const PluralRules *rules) {
|
||||
void MutablePatternModifier::setSymbols(const DecimalFormatSymbols* symbols, const CurrencyUnit& currency,
|
||||
const UNumberUnitWidth unitWidth, const PluralRules* rules) {
|
||||
U_ASSERT((rules != nullptr) == needsPlurals());
|
||||
this->symbols = symbols;
|
||||
uprv_memcpy(static_cast<char16_t *>(this->currencyCode),
|
||||
uprv_memcpy(static_cast<char16_t*>(this->currencyCode),
|
||||
currency.getISOCurrency(),
|
||||
sizeof(char16_t) * 4);
|
||||
this->unitWidth = unitWidth;
|
||||
|
@ -49,12 +49,12 @@ bool MutablePatternModifier::needsPlurals() const {
|
|||
// Silently ignore any error codes.
|
||||
}
|
||||
|
||||
ImmutablePatternModifier *MutablePatternModifier::createImmutable(UErrorCode &status) {
|
||||
ImmutablePatternModifier* MutablePatternModifier::createImmutable(UErrorCode& status) {
|
||||
return createImmutableAndChain(nullptr, status);
|
||||
}
|
||||
|
||||
ImmutablePatternModifier *
|
||||
MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator *parent, UErrorCode &status) {
|
||||
ImmutablePatternModifier*
|
||||
MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator* parent, UErrorCode& status) {
|
||||
|
||||
// TODO: Move StandardPlural VALUES to standardplural.h
|
||||
static const StandardPlural::Form STANDARD_PLURAL_VALUES[] = {
|
||||
|
@ -89,11 +89,11 @@ MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator *paren
|
|||
} else {
|
||||
// Faster path when plural keyword is not needed.
|
||||
setNumberProperties(1, StandardPlural::Form::COUNT);
|
||||
Modifier *positive = createConstantModifier(status);
|
||||
Modifier* positive = createConstantModifier(status);
|
||||
setNumberProperties(0, StandardPlural::Form::COUNT);
|
||||
Modifier *zero = createConstantModifier(status);
|
||||
Modifier* zero = createConstantModifier(status);
|
||||
setNumberProperties(-1, StandardPlural::Form::COUNT);
|
||||
Modifier *negative = createConstantModifier(status);
|
||||
Modifier* negative = createConstantModifier(status);
|
||||
pm->adoptPositiveNegativeModifiers(positive, zero, negative);
|
||||
if (U_FAILURE(status)) {
|
||||
delete pm;
|
||||
|
@ -103,29 +103,30 @@ MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator *paren
|
|||
}
|
||||
}
|
||||
|
||||
ConstantMultiFieldModifier *MutablePatternModifier::createConstantModifier(UErrorCode &status) {
|
||||
ConstantMultiFieldModifier* MutablePatternModifier::createConstantModifier(UErrorCode& status) {
|
||||
NumberStringBuilder a;
|
||||
NumberStringBuilder b;
|
||||
insertPrefix(a, 0, status);
|
||||
insertSuffix(b, 0, status);
|
||||
if (patternInfo->hasCurrencySign()) {
|
||||
return new CurrencySpacingEnabledModifier(a, b, !patternInfo->hasBody(), fStrong, *symbols, status);
|
||||
return new CurrencySpacingEnabledModifier(
|
||||
a, b, !patternInfo->hasBody(), fStrong, *symbols, status);
|
||||
} else {
|
||||
return new ConstantMultiFieldModifier(a, b, !patternInfo->hasBody(), fStrong);
|
||||
}
|
||||
}
|
||||
|
||||
ImmutablePatternModifier::ImmutablePatternModifier(ParameterizedModifier *pm, const PluralRules *rules,
|
||||
const MicroPropsGenerator *parent)
|
||||
ImmutablePatternModifier::ImmutablePatternModifier(ParameterizedModifier* pm, const PluralRules* rules,
|
||||
const MicroPropsGenerator* parent)
|
||||
: pm(pm), rules(rules), parent(parent) {}
|
||||
|
||||
void ImmutablePatternModifier::processQuantity(DecimalQuantity &quantity, MicroProps µs,
|
||||
UErrorCode &status) const {
|
||||
void ImmutablePatternModifier::processQuantity(DecimalQuantity& quantity, MicroProps& micros,
|
||||
UErrorCode& status) const {
|
||||
parent->processQuantity(quantity, micros, status);
|
||||
applyToMicros(micros, quantity);
|
||||
}
|
||||
|
||||
void ImmutablePatternModifier::applyToMicros(MicroProps µs, DecimalQuantity &quantity) const {
|
||||
void ImmutablePatternModifier::applyToMicros(MicroProps& micros, DecimalQuantity& quantity) const {
|
||||
if (rules == nullptr) {
|
||||
micros.modMiddle = pm->getModifier(quantity.signum());
|
||||
} else {
|
||||
|
@ -138,17 +139,17 @@ void ImmutablePatternModifier::applyToMicros(MicroProps µs, DecimalQuantity
|
|||
}
|
||||
|
||||
/** Used by the unsafe code path. */
|
||||
MicroPropsGenerator &MutablePatternModifier::addToChain(const MicroPropsGenerator *parent) {
|
||||
MicroPropsGenerator& MutablePatternModifier::addToChain(const MicroPropsGenerator* parent) {
|
||||
this->parent = parent;
|
||||
return *this;
|
||||
}
|
||||
|
||||
void MutablePatternModifier::processQuantity(DecimalQuantity &fq, MicroProps µs,
|
||||
UErrorCode &status) const {
|
||||
void MutablePatternModifier::processQuantity(DecimalQuantity& fq, MicroProps& micros,
|
||||
UErrorCode& status) const {
|
||||
parent->processQuantity(fq, micros, status);
|
||||
// The unsafe code path performs self-mutation, so we need a const_cast.
|
||||
// This method needs to be const because it overrides a const method in the parent class.
|
||||
auto nonConstThis = const_cast<MutablePatternModifier *>(this);
|
||||
auto nonConstThis = const_cast<MutablePatternModifier*>(this);
|
||||
if (needsPlurals()) {
|
||||
// TODO: Fix this. Avoid the copy.
|
||||
DecimalQuantity copy(fq);
|
||||
|
@ -160,20 +161,24 @@ void MutablePatternModifier::processQuantity(DecimalQuantity &fq, MicroProps &mi
|
|||
micros.modMiddle = this;
|
||||
}
|
||||
|
||||
int32_t MutablePatternModifier::apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
|
||||
UErrorCode &status) const {
|
||||
int32_t MutablePatternModifier::apply(NumberStringBuilder& output, int32_t leftIndex, int32_t rightIndex,
|
||||
UErrorCode& status) const {
|
||||
// The unsafe code path performs self-mutation, so we need a const_cast.
|
||||
// This method needs to be const because it overrides a const method in the parent class.
|
||||
auto nonConstThis = const_cast<MutablePatternModifier *>(this);
|
||||
auto nonConstThis = const_cast<MutablePatternModifier*>(this);
|
||||
int32_t prefixLen = nonConstThis->insertPrefix(output, leftIndex, status);
|
||||
int32_t suffixLen = nonConstThis->insertSuffix(output, rightIndex + prefixLen, status);
|
||||
// If the pattern had no decimal stem body (like #,##0.00), overwrite the value.
|
||||
int32_t overwriteLen = 0;
|
||||
if (!patternInfo->hasBody()) {
|
||||
overwriteLen = output.splice(
|
||||
leftIndex + prefixLen, rightIndex + prefixLen,
|
||||
UnicodeString(), 0, 0, UNUM_FIELD_COUNT,
|
||||
status);
|
||||
leftIndex + prefixLen,
|
||||
rightIndex + prefixLen,
|
||||
UnicodeString(),
|
||||
0,
|
||||
0,
|
||||
UNUM_FIELD_COUNT,
|
||||
status);
|
||||
}
|
||||
CurrencySpacingEnabledModifier::applyCurrencySpacing(
|
||||
output,
|
||||
|
@ -186,30 +191,36 @@ int32_t MutablePatternModifier::apply(NumberStringBuilder &output, int32_t leftI
|
|||
return prefixLen + overwriteLen + suffixLen;
|
||||
}
|
||||
|
||||
int32_t MutablePatternModifier::getPrefixLength(UErrorCode &status) const {
|
||||
int32_t MutablePatternModifier::getPrefixLength(UErrorCode& status) const {
|
||||
// The unsafe code path performs self-mutation, so we need a const_cast.
|
||||
// This method needs to be const because it overrides a const method in the parent class.
|
||||
auto nonConstThis = const_cast<MutablePatternModifier *>(this);
|
||||
auto nonConstThis = const_cast<MutablePatternModifier*>(this);
|
||||
|
||||
// Enter and exit CharSequence Mode to get the length.
|
||||
nonConstThis->enterCharSequenceMode(true);
|
||||
int result = AffixUtils::unescapedCodePointCount(*this, *this, status); // prefix length
|
||||
nonConstThis->exitCharSequenceMode();
|
||||
nonConstThis->prepareAffix(true);
|
||||
int result = AffixUtils::unescapedCodePointCount(
|
||||
UnicodeStringCharSequence(currentAffix),
|
||||
*this,
|
||||
status); // prefix length
|
||||
return result;
|
||||
}
|
||||
|
||||
int32_t MutablePatternModifier::getCodePointCount(UErrorCode &status) const {
|
||||
int32_t MutablePatternModifier::getCodePointCount(UErrorCode& status) const {
|
||||
// The unsafe code path performs self-mutation, so we need a const_cast.
|
||||
// This method needs to be const because it overrides a const method in the parent class.
|
||||
auto nonConstThis = const_cast<MutablePatternModifier *>(this);
|
||||
auto nonConstThis = const_cast<MutablePatternModifier*>(this);
|
||||
|
||||
// Enter and exit CharSequence Mode to get the length.
|
||||
nonConstThis->enterCharSequenceMode(true);
|
||||
int result = AffixUtils::unescapedCodePointCount(*this, *this, status); // prefix length
|
||||
nonConstThis->exitCharSequenceMode();
|
||||
nonConstThis->enterCharSequenceMode(false);
|
||||
result += AffixUtils::unescapedCodePointCount(*this, *this, status); // suffix length
|
||||
nonConstThis->exitCharSequenceMode();
|
||||
// Render the affixes to get the length
|
||||
nonConstThis->prepareAffix(true);
|
||||
int result = AffixUtils::unescapedCodePointCount(
|
||||
UnicodeStringCharSequence(currentAffix),
|
||||
*this,
|
||||
status); // prefix length
|
||||
nonConstThis->prepareAffix(false);
|
||||
result += AffixUtils::unescapedCodePointCount(
|
||||
UnicodeStringCharSequence(currentAffix),
|
||||
*this,
|
||||
status); // suffix length
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -217,20 +228,26 @@ bool MutablePatternModifier::isStrong() const {
|
|||
return fStrong;
|
||||
}
|
||||
|
||||
int32_t MutablePatternModifier::insertPrefix(NumberStringBuilder &sb, int position, UErrorCode &status) {
|
||||
enterCharSequenceMode(true);
|
||||
int length = AffixUtils::unescape(*this, sb, position, *this, status);
|
||||
exitCharSequenceMode();
|
||||
int32_t MutablePatternModifier::insertPrefix(NumberStringBuilder& sb, int position, UErrorCode& status) {
|
||||
prepareAffix(true);
|
||||
int length = AffixUtils::unescape(
|
||||
UnicodeStringCharSequence(currentAffix), sb, position, *this, status);
|
||||
return length;
|
||||
}
|
||||
|
||||
int32_t MutablePatternModifier::insertSuffix(NumberStringBuilder &sb, int position, UErrorCode &status) {
|
||||
enterCharSequenceMode(false);
|
||||
int length = AffixUtils::unescape(*this, sb, position, *this, status);
|
||||
exitCharSequenceMode();
|
||||
int32_t MutablePatternModifier::insertSuffix(NumberStringBuilder& sb, int position, UErrorCode& status) {
|
||||
prepareAffix(false);
|
||||
int length = AffixUtils::unescape(
|
||||
UnicodeStringCharSequence(currentAffix), sb, position, *this, status);
|
||||
return length;
|
||||
}
|
||||
|
||||
/** This method contains the heart of the logic for rendering LDML affix strings. */
|
||||
void MutablePatternModifier::prepareAffix(bool isPrefix) {
|
||||
PatternStringUtils::patternInfoToStringBuilder(
|
||||
*patternInfo, isPrefix, signum, signDisplay, plural, perMilleReplacesPercent, currentAffix);
|
||||
}
|
||||
|
||||
UnicodeString MutablePatternModifier::getSymbol(AffixPatternType type) const {
|
||||
switch (type) {
|
||||
case AffixPatternType::TYPE_MINUS_SIGN:
|
||||
|
@ -249,12 +266,12 @@ UnicodeString MutablePatternModifier::getSymbol(AffixPatternType type) const {
|
|||
return UnicodeString();
|
||||
} else {
|
||||
UCurrNameStyle selector = (unitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_NARROW)
|
||||
? UCurrNameStyle::UCURR_NARROW_SYMBOL_NAME
|
||||
: UCurrNameStyle::UCURR_SYMBOL_NAME;
|
||||
? UCurrNameStyle::UCURR_NARROW_SYMBOL_NAME
|
||||
: UCurrNameStyle::UCURR_SYMBOL_NAME;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UBool isChoiceFormat = FALSE;
|
||||
int32_t symbolLen = 0;
|
||||
const char16_t *symbol = ucurr_getName(
|
||||
const char16_t* symbol = ucurr_getName(
|
||||
currencyCode,
|
||||
symbols->getLocale().getName(),
|
||||
selector,
|
||||
|
@ -274,7 +291,7 @@ UnicodeString MutablePatternModifier::getSymbol(AffixPatternType type) const {
|
|||
UErrorCode status = U_ZERO_ERROR;
|
||||
UBool isChoiceFormat = FALSE;
|
||||
int32_t symbolLen = 0;
|
||||
const char16_t *symbol = ucurr_getPluralName(
|
||||
const char16_t* symbol = ucurr_getPluralName(
|
||||
currencyCode,
|
||||
symbols->getLocale().getName(),
|
||||
&isChoiceFormat,
|
||||
|
@ -293,79 +310,6 @@ UnicodeString MutablePatternModifier::getSymbol(AffixPatternType type) const {
|
|||
}
|
||||
}
|
||||
|
||||
/** This method contains the heart of the logic for rendering LDML affix strings. */
|
||||
void MutablePatternModifier::enterCharSequenceMode(bool isPrefix) {
|
||||
U_ASSERT(!inCharSequenceMode);
|
||||
inCharSequenceMode = true;
|
||||
|
||||
// Should the output render '+' where '-' would normally appear in the pattern?
|
||||
plusReplacesMinusSign = signum != -1
|
||||
&& (signDisplay == UNUM_SIGN_ALWAYS
|
||||
|| signDisplay == UNUM_SIGN_ACCOUNTING_ALWAYS
|
||||
|| (signum == 1
|
||||
&& (signDisplay == UNUM_SIGN_EXCEPT_ZERO
|
||||
|| signDisplay == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO)))
|
||||
&& patternInfo->positiveHasPlusSign() == false;
|
||||
|
||||
// Should we use the affix from the negative subpattern? (If not, we will use the positive subpattern.)
|
||||
bool useNegativeAffixPattern = patternInfo->hasNegativeSubpattern() && (
|
||||
signum == -1 || (patternInfo->negativeHasMinusSign() && plusReplacesMinusSign));
|
||||
|
||||
// Resolve the flags for the affix pattern.
|
||||
fFlags = 0;
|
||||
if (useNegativeAffixPattern) {
|
||||
fFlags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN;
|
||||
}
|
||||
if (isPrefix) {
|
||||
fFlags |= AffixPatternProvider::AFFIX_PREFIX;
|
||||
}
|
||||
if (plural != StandardPlural::Form::COUNT) {
|
||||
U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural));
|
||||
fFlags |= plural;
|
||||
}
|
||||
|
||||
// Should we prepend a sign to the pattern?
|
||||
if (!isPrefix || useNegativeAffixPattern) {
|
||||
prependSign = false;
|
||||
} else if (signum == -1) {
|
||||
prependSign = signDisplay != UNUM_SIGN_NEVER;
|
||||
} else {
|
||||
prependSign = plusReplacesMinusSign;
|
||||
}
|
||||
|
||||
// Finally, compute the length of the affix pattern.
|
||||
fLength = patternInfo->length(fFlags) + (prependSign ? 1 : 0);
|
||||
}
|
||||
|
||||
void MutablePatternModifier::exitCharSequenceMode() {
|
||||
U_ASSERT(inCharSequenceMode);
|
||||
inCharSequenceMode = false;
|
||||
}
|
||||
|
||||
int32_t MutablePatternModifier::length() const {
|
||||
U_ASSERT(inCharSequenceMode);
|
||||
return fLength;
|
||||
}
|
||||
|
||||
char16_t MutablePatternModifier::charAt(int32_t index) const {
|
||||
U_ASSERT(inCharSequenceMode);
|
||||
char16_t candidate;
|
||||
if (prependSign && index == 0) {
|
||||
candidate = u'-';
|
||||
} else if (prependSign) {
|
||||
candidate = patternInfo->charAt(fFlags, index - 1);
|
||||
} else {
|
||||
candidate = patternInfo->charAt(fFlags, index);
|
||||
}
|
||||
if (plusReplacesMinusSign && candidate == u'-') {
|
||||
return u'+';
|
||||
}
|
||||
if (perMilleReplacesPercent && candidate == u'%') {
|
||||
return u'‰';
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
|
||||
UnicodeString MutablePatternModifier::toUnicodeString() const {
|
||||
// Never called by AffixUtils
|
||||
U_ASSERT(false);
|
||||
|
|
|
@ -35,20 +35,21 @@ class MutablePatternModifier;
|
|||
// Exported as U_I18N_API because it is needed for the unit test PatternModifierTest
|
||||
class U_I18N_API ImmutablePatternModifier : public MicroPropsGenerator, public UMemory {
|
||||
public:
|
||||
~ImmutablePatternModifier() U_OVERRIDE = default;
|
||||
~ImmutablePatternModifier() U_OVERRIDE = default;
|
||||
|
||||
void processQuantity(DecimalQuantity &, MicroProps µs, UErrorCode &status) const U_OVERRIDE;
|
||||
void processQuantity(DecimalQuantity&, MicroProps& micros, UErrorCode& status) const U_OVERRIDE;
|
||||
|
||||
void applyToMicros(MicroProps µs, DecimalQuantity &quantity) const;
|
||||
void applyToMicros(MicroProps& micros, DecimalQuantity& quantity) const;
|
||||
|
||||
private:
|
||||
ImmutablePatternModifier(ParameterizedModifier *pm, const PluralRules *rules, const MicroPropsGenerator *parent);
|
||||
ImmutablePatternModifier(ParameterizedModifier* pm, const PluralRules* rules,
|
||||
const MicroPropsGenerator* parent);
|
||||
|
||||
const LocalPointer<ParameterizedModifier> pm;
|
||||
const PluralRules *rules;
|
||||
const MicroPropsGenerator *parent;
|
||||
const PluralRules* rules;
|
||||
const MicroPropsGenerator* parent;
|
||||
|
||||
friend class MutablePatternModifier;
|
||||
friend class MutablePatternModifier;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -74,7 +75,6 @@ class U_I18N_API MutablePatternModifier
|
|||
: public MicroPropsGenerator,
|
||||
public Modifier,
|
||||
public SymbolProvider,
|
||||
public CharSequence,
|
||||
public UMemory {
|
||||
public:
|
||||
|
||||
|
@ -187,13 +187,7 @@ class U_I18N_API MutablePatternModifier
|
|||
*/
|
||||
UnicodeString getSymbol(AffixPatternType type) const U_OVERRIDE;
|
||||
|
||||
int32_t length() const U_OVERRIDE;
|
||||
|
||||
char16_t charAt(int32_t index) const U_OVERRIDE;
|
||||
|
||||
// Use default implementation of codePointAt
|
||||
|
||||
UnicodeString toUnicodeString() const U_OVERRIDE;
|
||||
UnicodeString toUnicodeString() const;
|
||||
|
||||
private:
|
||||
// Modifier details (initialized in constructor)
|
||||
|
@ -217,12 +211,8 @@ class U_I18N_API MutablePatternModifier
|
|||
// QuantityChain details (initialized in addToChain)
|
||||
const MicroPropsGenerator *parent;
|
||||
|
||||
// Transient CharSequence fields (initialized in enterCharSequenceMode)
|
||||
bool inCharSequenceMode = false;
|
||||
int32_t fFlags;
|
||||
int32_t fLength;
|
||||
bool prependSign;
|
||||
bool plusReplacesMinusSign;
|
||||
// Transient fields for rendering
|
||||
UnicodeString currentAffix;
|
||||
|
||||
/**
|
||||
* Uses the current properties to create a single {@link ConstantMultiFieldModifier} with currency spacing support
|
||||
|
@ -244,9 +234,7 @@ class U_I18N_API MutablePatternModifier
|
|||
|
||||
int32_t insertSuffix(NumberStringBuilder &sb, int position, UErrorCode &status);
|
||||
|
||||
void enterCharSequenceMode(bool isPrefix);
|
||||
|
||||
void exitCharSequenceMode();
|
||||
void prepareAffix(bool isPrefix);
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -14,25 +14,27 @@ using namespace icu;
|
|||
using namespace icu::number;
|
||||
using namespace icu::number::impl;
|
||||
|
||||
void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo, UErrorCode &status) {
|
||||
void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo,
|
||||
UErrorCode& status) {
|
||||
patternInfo.consumePattern(patternString, status);
|
||||
}
|
||||
|
||||
DecimalFormatProperties
|
||||
PatternParser::parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding,
|
||||
UErrorCode &status) {
|
||||
UErrorCode& status) {
|
||||
DecimalFormatProperties properties;
|
||||
parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
|
||||
return properties;
|
||||
}
|
||||
|
||||
void PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties,
|
||||
IgnoreRounding ignoreRounding, UErrorCode &status) {
|
||||
void
|
||||
PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties,
|
||||
IgnoreRounding ignoreRounding, UErrorCode& status) {
|
||||
parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status);
|
||||
}
|
||||
|
||||
char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const {
|
||||
const Endpoints &endpoints = getEndpoints(flags);
|
||||
const Endpoints& endpoints = getEndpoints(flags);
|
||||
if (index < 0 || index >= endpoints.end - endpoints.start) {
|
||||
U_ASSERT(false);
|
||||
}
|
||||
|
@ -43,12 +45,12 @@ int32_t ParsedPatternInfo::length(int32_t flags) const {
|
|||
return getLengthFromEndpoints(getEndpoints(flags));
|
||||
}
|
||||
|
||||
int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints &endpoints) {
|
||||
int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints& endpoints) {
|
||||
return endpoints.end - endpoints.start;
|
||||
}
|
||||
|
||||
UnicodeString ParsedPatternInfo::getString(int32_t flags) const {
|
||||
const Endpoints &endpoints = getEndpoints(flags);
|
||||
const Endpoints& endpoints = getEndpoints(flags);
|
||||
if (endpoints.start == endpoints.end) {
|
||||
return UnicodeString();
|
||||
}
|
||||
|
@ -56,7 +58,7 @@ UnicodeString ParsedPatternInfo::getString(int32_t flags) const {
|
|||
return UnicodeString(pattern, endpoints.start, endpoints.end - endpoints.start);
|
||||
}
|
||||
|
||||
const Endpoints &ParsedPatternInfo::getEndpoints(int32_t flags) const {
|
||||
const Endpoints& ParsedPatternInfo::getEndpoints(int32_t flags) const {
|
||||
bool prefix = (flags & AFFIX_PREFIX) != 0;
|
||||
bool isNegative = (flags & AFFIX_NEGATIVE_SUBPATTERN) != 0;
|
||||
bool padding = (flags & AFFIX_PADDING) != 0;
|
||||
|
@ -91,7 +93,7 @@ bool ParsedPatternInfo::hasCurrencySign() const {
|
|||
return positive.hasCurrencySign || (fHasNegativeSubpattern && negative.hasCurrencySign);
|
||||
}
|
||||
|
||||
bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode &status) const {
|
||||
bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode& status) const {
|
||||
return AffixUtils::containsType(UnicodeStringCharSequence(pattern), type, status);
|
||||
}
|
||||
|
||||
|
@ -117,7 +119,7 @@ UChar32 ParsedPatternInfo::ParserState::next() {
|
|||
return codePoint;
|
||||
}
|
||||
|
||||
void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode &status) {
|
||||
void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode& status) {
|
||||
if (U_FAILURE(status)) { return; }
|
||||
this->pattern = patternString;
|
||||
|
||||
|
@ -141,7 +143,7 @@ void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErro
|
|||
}
|
||||
}
|
||||
|
||||
void ParsedPatternInfo::consumeSubpattern(UErrorCode &status) {
|
||||
void ParsedPatternInfo::consumeSubpattern(UErrorCode& status) {
|
||||
// subpattern := literals? number exponent? literals?
|
||||
consumePadding(PadPosition::UNUM_PAD_BEFORE_PREFIX, status);
|
||||
if (U_FAILURE(status)) { return; }
|
||||
|
@ -161,7 +163,7 @@ void ParsedPatternInfo::consumeSubpattern(UErrorCode &status) {
|
|||
if (U_FAILURE(status)) { return; }
|
||||
}
|
||||
|
||||
void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode &status) {
|
||||
void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode& status) {
|
||||
if (state.peek() != u'*') {
|
||||
return;
|
||||
}
|
||||
|
@ -177,7 +179,7 @@ void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode &
|
|||
currentSubpattern->paddingEndpoints.end = state.offset;
|
||||
}
|
||||
|
||||
void ParsedPatternInfo::consumeAffix(Endpoints &endpoints, UErrorCode &status) {
|
||||
void ParsedPatternInfo::consumeAffix(Endpoints& endpoints, UErrorCode& status) {
|
||||
// literals := { literal }
|
||||
endpoints.start = state.offset;
|
||||
while (true) {
|
||||
|
@ -233,7 +235,7 @@ void ParsedPatternInfo::consumeAffix(Endpoints &endpoints, UErrorCode &status) {
|
|||
endpoints.end = state.offset;
|
||||
}
|
||||
|
||||
void ParsedPatternInfo::consumeLiteral(UErrorCode &status) {
|
||||
void ParsedPatternInfo::consumeLiteral(UErrorCode& status) {
|
||||
if (state.peek() == -1) {
|
||||
state.toParseException(u"Expected unquoted literal but found EOL");
|
||||
status = U_PATTERN_SYNTAX_ERROR;
|
||||
|
@ -256,7 +258,7 @@ void ParsedPatternInfo::consumeLiteral(UErrorCode &status) {
|
|||
}
|
||||
}
|
||||
|
||||
void ParsedPatternInfo::consumeFormat(UErrorCode &status) {
|
||||
void ParsedPatternInfo::consumeFormat(UErrorCode& status) {
|
||||
consumeIntegerFormat(status);
|
||||
if (U_FAILURE(status)) { return; }
|
||||
if (state.peek() == u'.') {
|
||||
|
@ -268,9 +270,9 @@ void ParsedPatternInfo::consumeFormat(UErrorCode &status) {
|
|||
}
|
||||
}
|
||||
|
||||
void ParsedPatternInfo::consumeIntegerFormat(UErrorCode &status) {
|
||||
void ParsedPatternInfo::consumeIntegerFormat(UErrorCode& status) {
|
||||
// Convenience reference:
|
||||
ParsedSubpatternInfo &result = *currentSubpattern;
|
||||
ParsedSubpatternInfo& result = *currentSubpattern;
|
||||
|
||||
while (true) {
|
||||
switch (state.peek()) {
|
||||
|
@ -359,9 +361,9 @@ void ParsedPatternInfo::consumeIntegerFormat(UErrorCode &status) {
|
|||
}
|
||||
}
|
||||
|
||||
void ParsedPatternInfo::consumeFractionFormat(UErrorCode &status) {
|
||||
void ParsedPatternInfo::consumeFractionFormat(UErrorCode& status) {
|
||||
// Convenience reference:
|
||||
ParsedSubpatternInfo &result = *currentSubpattern;
|
||||
ParsedSubpatternInfo& result = *currentSubpattern;
|
||||
|
||||
int32_t zeroCounter = 0;
|
||||
while (true) {
|
||||
|
@ -407,9 +409,9 @@ void ParsedPatternInfo::consumeFractionFormat(UErrorCode &status) {
|
|||
}
|
||||
}
|
||||
|
||||
void ParsedPatternInfo::consumeExponent(UErrorCode &status) {
|
||||
void ParsedPatternInfo::consumeExponent(UErrorCode& status) {
|
||||
// Convenience reference:
|
||||
ParsedSubpatternInfo &result = *currentSubpattern;
|
||||
ParsedSubpatternInfo& result = *currentSubpattern;
|
||||
|
||||
if (state.peek() != u'E') {
|
||||
return;
|
||||
|
@ -437,9 +439,9 @@ void ParsedPatternInfo::consumeExponent(UErrorCode &status) {
|
|||
/// END RECURSIVE DESCENT PARSER IMPLEMENTATION ///
|
||||
///////////////////////////////////////////////////
|
||||
|
||||
void
|
||||
PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern, DecimalFormatProperties &properties,
|
||||
IgnoreRounding ignoreRounding, UErrorCode &status) {
|
||||
void PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern,
|
||||
DecimalFormatProperties& properties,
|
||||
IgnoreRounding ignoreRounding, UErrorCode& status) {
|
||||
if (pattern.length() == 0) {
|
||||
// Backwards compatibility requires that we reset to the default values.
|
||||
// TODO: Only overwrite the properties that "saveToProperties" normally touches?
|
||||
|
@ -453,13 +455,13 @@ PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern, Decim
|
|||
patternInfoToProperties(properties, patternInfo, ignoreRounding, status);
|
||||
}
|
||||
|
||||
void PatternParser::patternInfoToProperties(DecimalFormatProperties &properties,
|
||||
ParsedPatternInfo& patternInfo,
|
||||
IgnoreRounding _ignoreRounding, UErrorCode &status) {
|
||||
void
|
||||
PatternParser::patternInfoToProperties(DecimalFormatProperties& properties, ParsedPatternInfo& patternInfo,
|
||||
IgnoreRounding _ignoreRounding, UErrorCode& status) {
|
||||
// Translate from PatternParseResult to Properties.
|
||||
// Note that most data from "negative" is ignored per the specification of DecimalFormat.
|
||||
|
||||
const ParsedSubpatternInfo &positive = patternInfo.positive;
|
||||
const ParsedSubpatternInfo& positive = patternInfo.positive;
|
||||
|
||||
bool ignoreRounding;
|
||||
if (_ignoreRounding == IGNORE_ROUNDING_NEVER) {
|
||||
|
@ -508,8 +510,7 @@ void PatternParser::patternInfoToProperties(DecimalFormatProperties &properties,
|
|||
properties.maximumFractionDigits = -1;
|
||||
properties.roundingIncrement = 0.0;
|
||||
properties.minimumSignificantDigits = positive.integerAtSigns;
|
||||
properties.maximumSignificantDigits =
|
||||
positive.integerAtSigns + positive.integerTrailingHashSigns;
|
||||
properties.maximumSignificantDigits = positive.integerAtSigns + positive.integerTrailingHashSigns;
|
||||
} else if (!positive.rounding.isZero()) {
|
||||
if (!ignoreRounding) {
|
||||
properties.minimumFractionDigits = minFrac;
|
||||
|
@ -570,9 +571,9 @@ void PatternParser::patternInfoToProperties(DecimalFormatProperties &properties,
|
|||
// Padding settings
|
||||
if (!positive.paddingLocation.isNull()) {
|
||||
// The width of the positive prefix and suffix templates are included in the padding
|
||||
int paddingWidth =
|
||||
positive.widthExceptAffixes + AffixUtils::estimateLength(UnicodeStringCharSequence(posPrefix), status) +
|
||||
AffixUtils::estimateLength(UnicodeStringCharSequence(posSuffix), status);
|
||||
int paddingWidth = positive.widthExceptAffixes +
|
||||
AffixUtils::estimateLength(UnicodeStringCharSequence(posPrefix), status) +
|
||||
AffixUtils::estimateLength(UnicodeStringCharSequence(posSuffix), status);
|
||||
properties.formatWidth = paddingWidth;
|
||||
UnicodeString rawPaddingString = patternInfo.getString(AffixPatternProvider::AFFIX_PADDING);
|
||||
if (rawPaddingString.length() == 1) {
|
||||
|
@ -622,8 +623,8 @@ void PatternParser::patternInfoToProperties(DecimalFormatProperties &properties,
|
|||
/// End PatternStringParser.java; begin PatternStringUtils.java ///
|
||||
///////////////////////////////////////////////////////////////////
|
||||
|
||||
UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties &properties,
|
||||
UErrorCode &status) {
|
||||
UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties& properties,
|
||||
UErrorCode& status) {
|
||||
UnicodeString sb;
|
||||
|
||||
// Convenience references
|
||||
|
@ -632,7 +633,7 @@ UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatP
|
|||
int groupingSize = uprv_min(properties.secondaryGroupingSize, dosMax);
|
||||
int firstGroupingSize = uprv_min(properties.groupingSize, dosMax);
|
||||
int paddingWidth = uprv_min(properties.formatWidth, dosMax);
|
||||
NullableValue<PadPosition> paddingLocation = properties.padPosition;
|
||||
NullableValue <PadPosition> paddingLocation = properties.padPosition;
|
||||
UnicodeString paddingString = properties.padString;
|
||||
int minInt = uprv_max(uprv_min(properties.minimumIntegerDigits, dosMax), 0);
|
||||
int maxInt = uprv_min(properties.maximumIntegerDigits, dosMax);
|
||||
|
@ -809,8 +810,8 @@ UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatP
|
|||
}
|
||||
|
||||
int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex,
|
||||
UErrorCode &status) {
|
||||
(void)status;
|
||||
UErrorCode& status) {
|
||||
(void) status;
|
||||
if (input.length() == 0) {
|
||||
input.setTo(kFallbackPaddingString, -1);
|
||||
}
|
||||
|
@ -840,4 +841,69 @@ int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString&
|
|||
return output.length() - startLength;
|
||||
}
|
||||
|
||||
void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix,
|
||||
int8_t signum, UNumberSignDisplay signDisplay,
|
||||
StandardPlural::Form plural,
|
||||
bool perMilleReplacesPercent, UnicodeString& output) {
|
||||
|
||||
// Should the output render '+' where '-' would normally appear in the pattern?
|
||||
bool plusReplacesMinusSign = signum != -1 && (
|
||||
signDisplay == UNUM_SIGN_ALWAYS || signDisplay == UNUM_SIGN_ACCOUNTING_ALWAYS || (
|
||||
signum == 1 && (
|
||||
signDisplay == UNUM_SIGN_EXCEPT_ZERO ||
|
||||
signDisplay == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO))) &&
|
||||
patternInfo.positiveHasPlusSign() == false;
|
||||
|
||||
// Should we use the affix from the negative subpattern? (If not, we will use the positive
|
||||
// subpattern.)
|
||||
bool useNegativeAffixPattern = patternInfo.hasNegativeSubpattern() && (
|
||||
signum == -1 || (patternInfo.negativeHasMinusSign() && plusReplacesMinusSign));
|
||||
|
||||
// Resolve the flags for the affix pattern.
|
||||
int flags = 0;
|
||||
if (useNegativeAffixPattern) {
|
||||
flags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN;
|
||||
}
|
||||
if (isPrefix) {
|
||||
flags |= AffixPatternProvider::AFFIX_PREFIX;
|
||||
}
|
||||
if (plural != StandardPlural::Form::COUNT) {
|
||||
U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural));
|
||||
flags |= plural;
|
||||
}
|
||||
|
||||
// Should we prepend a sign to the pattern?
|
||||
bool prependSign;
|
||||
if (!isPrefix || useNegativeAffixPattern) {
|
||||
prependSign = false;
|
||||
} else if (signum == -1) {
|
||||
prependSign = signDisplay != UNUM_SIGN_NEVER;
|
||||
} else {
|
||||
prependSign = plusReplacesMinusSign;
|
||||
}
|
||||
|
||||
// Compute the length of the affix pattern.
|
||||
int length = patternInfo.length(flags) + (prependSign ? 1 : 0);
|
||||
|
||||
// Finally, set the result into the StringBuilder.
|
||||
output.remove();
|
||||
for (int index = 0; index < length; index++) {
|
||||
char16_t candidate;
|
||||
if (prependSign && index == 0) {
|
||||
candidate = u'-';
|
||||
} else if (prependSign) {
|
||||
candidate = patternInfo.charAt(flags, index - 1);
|
||||
} else {
|
||||
candidate = patternInfo.charAt(flags, index);
|
||||
}
|
||||
if (plusReplacesMinusSign && candidate == u'-') {
|
||||
candidate = u'+';
|
||||
}
|
||||
if (perMilleReplacesPercent && candidate == u'%') {
|
||||
candidate = u'‰';
|
||||
}
|
||||
output.append(candidate);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
|
|
@ -62,17 +62,18 @@ struct U_I18N_API ParsedPatternInfo : public AffixPatternProvider, public UMemor
|
|||
ParsedSubpatternInfo positive;
|
||||
ParsedSubpatternInfo negative;
|
||||
|
||||
ParsedPatternInfo() : state(this->pattern), currentSubpattern(nullptr) {}
|
||||
ParsedPatternInfo()
|
||||
: state(this->pattern), currentSubpattern(nullptr) {}
|
||||
|
||||
~ParsedPatternInfo() U_OVERRIDE = default;
|
||||
|
||||
static int32_t getLengthFromEndpoints(const Endpoints &endpoints);
|
||||
static int32_t getLengthFromEndpoints(const Endpoints& endpoints);
|
||||
|
||||
char16_t charAt(int32_t flags, int32_t index) const U_OVERRIDE;
|
||||
|
||||
int32_t length(int32_t flags) const U_OVERRIDE;
|
||||
|
||||
UnicodeString getString(int32_t flags) const;
|
||||
UnicodeString getString(int32_t flags) const U_OVERRIDE;
|
||||
|
||||
bool positiveHasPlusSign() const U_OVERRIDE;
|
||||
|
||||
|
@ -82,16 +83,17 @@ struct U_I18N_API ParsedPatternInfo : public AffixPatternProvider, public UMemor
|
|||
|
||||
bool hasCurrencySign() const U_OVERRIDE;
|
||||
|
||||
bool containsSymbolType(AffixPatternType type, UErrorCode &status) const U_OVERRIDE;
|
||||
bool containsSymbolType(AffixPatternType type, UErrorCode& status) const U_OVERRIDE;
|
||||
|
||||
bool hasBody() const U_OVERRIDE;
|
||||
|
||||
private:
|
||||
struct U_I18N_API ParserState {
|
||||
const UnicodeString &pattern; // reference to the parent
|
||||
const UnicodeString& pattern; // reference to the parent
|
||||
int32_t offset = 0;
|
||||
|
||||
explicit ParserState(const UnicodeString &_pattern) : pattern(_pattern) {};
|
||||
explicit ParserState(const UnicodeString& _pattern)
|
||||
: pattern(_pattern) {};
|
||||
|
||||
UChar32 peek();
|
||||
|
||||
|
@ -99,41 +101,40 @@ struct U_I18N_API ParsedPatternInfo : public AffixPatternProvider, public UMemor
|
|||
|
||||
// TODO: We don't currently do anything with the message string.
|
||||
// This method is here as a shell for Java compatibility.
|
||||
inline void toParseException(const char16_t *message) { (void)message; }
|
||||
}
|
||||
state;
|
||||
inline void toParseException(const char16_t* message) { (void) message; }
|
||||
} state;
|
||||
|
||||
// NOTE: In Java, these are written as pure functions.
|
||||
// In C++, they're written as methods.
|
||||
// The behavior is the same.
|
||||
|
||||
// Mutable transient pointer:
|
||||
ParsedSubpatternInfo *currentSubpattern;
|
||||
ParsedSubpatternInfo* currentSubpattern;
|
||||
|
||||
// In Java, "negative == null" tells us whether or not we had a negative subpattern.
|
||||
// In C++, we need to remember in another boolean.
|
||||
bool fHasNegativeSubpattern = false;
|
||||
|
||||
const Endpoints &getEndpoints(int32_t flags) const;
|
||||
const Endpoints& getEndpoints(int32_t flags) const;
|
||||
|
||||
/** Run the recursive descent parser. */
|
||||
void consumePattern(const UnicodeString &patternString, UErrorCode &status);
|
||||
void consumePattern(const UnicodeString& patternString, UErrorCode& status);
|
||||
|
||||
void consumeSubpattern(UErrorCode &status);
|
||||
void consumeSubpattern(UErrorCode& status);
|
||||
|
||||
void consumePadding(PadPosition paddingLocation, UErrorCode &status);
|
||||
void consumePadding(PadPosition paddingLocation, UErrorCode& status);
|
||||
|
||||
void consumeAffix(Endpoints &endpoints, UErrorCode &status);
|
||||
void consumeAffix(Endpoints& endpoints, UErrorCode& status);
|
||||
|
||||
void consumeLiteral(UErrorCode &status);
|
||||
void consumeLiteral(UErrorCode& status);
|
||||
|
||||
void consumeFormat(UErrorCode &status);
|
||||
void consumeFormat(UErrorCode& status);
|
||||
|
||||
void consumeIntegerFormat(UErrorCode &status);
|
||||
void consumeIntegerFormat(UErrorCode& status);
|
||||
|
||||
void consumeFractionFormat(UErrorCode &status);
|
||||
void consumeFractionFormat(UErrorCode& status);
|
||||
|
||||
void consumeExponent(UErrorCode &status);
|
||||
void consumeExponent(UErrorCode& status);
|
||||
|
||||
friend class PatternParser;
|
||||
};
|
||||
|
@ -153,8 +154,8 @@ class U_I18N_API PatternParser {
|
|||
* The LDML decimal format pattern (Excel-style pattern) to parse.
|
||||
* @return The results of the parse.
|
||||
*/
|
||||
static void
|
||||
parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo &patternInfo, UErrorCode &status);
|
||||
static void parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo,
|
||||
UErrorCode& status);
|
||||
|
||||
enum IgnoreRounding {
|
||||
IGNORE_ROUNDING_NEVER = 0, IGNORE_ROUNDING_IF_CURRENCY = 1, IGNORE_ROUNDING_ALWAYS = 2
|
||||
|
@ -173,8 +174,8 @@ class U_I18N_API PatternParser {
|
|||
* @throws IllegalArgumentException
|
||||
* If there is a syntax error in the pattern string.
|
||||
*/
|
||||
static DecimalFormatProperties
|
||||
parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding, UErrorCode &status);
|
||||
static DecimalFormatProperties parseToProperties(const UnicodeString& pattern,
|
||||
IgnoreRounding ignoreRounding, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Parses a pattern string into an existing property bag. All properties that can be encoded into a pattern string
|
||||
|
@ -190,18 +191,19 @@ class U_I18N_API PatternParser {
|
|||
* @throws IllegalArgumentException
|
||||
* If there was a syntax error in the pattern string.
|
||||
*/
|
||||
static void parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties,
|
||||
IgnoreRounding ignoreRounding, UErrorCode &status);
|
||||
static void parseToExistingProperties(const UnicodeString& pattern,
|
||||
DecimalFormatProperties& properties,
|
||||
IgnoreRounding ignoreRounding, UErrorCode& status);
|
||||
|
||||
private:
|
||||
static void
|
||||
parseToExistingPropertiesImpl(const UnicodeString& pattern, DecimalFormatProperties &properties,
|
||||
IgnoreRounding ignoreRounding, UErrorCode &status);
|
||||
static void parseToExistingPropertiesImpl(const UnicodeString& pattern,
|
||||
DecimalFormatProperties& properties,
|
||||
IgnoreRounding ignoreRounding, UErrorCode& status);
|
||||
|
||||
/** Finalizes the temporary data stored in the ParsedPatternInfo to the Properties. */
|
||||
static void
|
||||
patternInfoToProperties(DecimalFormatProperties &properties, ParsedPatternInfo& patternInfo,
|
||||
IgnoreRounding _ignoreRounding, UErrorCode &status);
|
||||
static void patternInfoToProperties(DecimalFormatProperties& properties,
|
||||
ParsedPatternInfo& patternInfo, IgnoreRounding _ignoreRounding,
|
||||
UErrorCode& status);
|
||||
};
|
||||
|
||||
class U_I18N_API PatternStringUtils {
|
||||
|
@ -217,8 +219,8 @@ class U_I18N_API PatternStringUtils {
|
|||
* The property bag to serialize.
|
||||
* @return A pattern string approximately serializing the property bag.
|
||||
*/
|
||||
static UnicodeString
|
||||
propertiesToPatternString(const DecimalFormatProperties &properties, UErrorCode &status);
|
||||
static UnicodeString propertiesToPatternString(const DecimalFormatProperties& properties,
|
||||
UErrorCode& status);
|
||||
|
||||
|
||||
/**
|
||||
|
@ -248,14 +250,23 @@ class U_I18N_API PatternStringUtils {
|
|||
* notation.
|
||||
* @return The pattern expressed in the other notation.
|
||||
*/
|
||||
static UnicodeString
|
||||
convertLocalized(UnicodeString input, DecimalFormatSymbols symbols, bool toLocalized,
|
||||
UErrorCode &status);
|
||||
static UnicodeString convertLocalized(UnicodeString input, DecimalFormatSymbols symbols,
|
||||
bool toLocalized, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* This method contains the heart of the logic for rendering LDML affix strings. It handles
|
||||
* sign-always-shown resolution, whether to use the positive or negative subpattern, permille
|
||||
* substitution, and plural forms for CurrencyPluralInfo.
|
||||
*/
|
||||
static void patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix,
|
||||
int8_t signum, UNumberSignDisplay signDisplay,
|
||||
StandardPlural::Form plural, bool perMilleReplacesPercent,
|
||||
UnicodeString& output);
|
||||
|
||||
private:
|
||||
/** @return The number of chars inserted. */
|
||||
static int
|
||||
escapePaddingString(UnicodeString input, UnicodeString &output, int startIndex, UErrorCode &status);
|
||||
static int escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex,
|
||||
UErrorCode& status);
|
||||
};
|
||||
|
||||
} // namespace impl
|
||||
|
|
|
@ -16,8 +16,7 @@
|
|||
#include "uassert.h"
|
||||
#include "unicode/platform.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
namespace number {
|
||||
U_NAMESPACE_BEGIN namespace number {
|
||||
namespace impl {
|
||||
|
||||
// Typedef several enums for brevity and for easier comparison to Java.
|
||||
|
@ -87,15 +86,14 @@ enum AffixPatternType {
|
|||
};
|
||||
|
||||
enum CompactType {
|
||||
TYPE_DECIMAL,
|
||||
TYPE_CURRENCY
|
||||
TYPE_DECIMAL, TYPE_CURRENCY
|
||||
};
|
||||
|
||||
|
||||
// TODO: Should this be moved somewhere else, maybe where other ICU classes can use it?
|
||||
// Exported as U_I18N_API because it is a base class for other exported types
|
||||
class U_I18N_API CharSequence {
|
||||
public:
|
||||
public:
|
||||
virtual ~CharSequence() = default;
|
||||
|
||||
virtual int32_t length() const = 0;
|
||||
|
@ -123,12 +121,20 @@ class U_I18N_API AffixPatternProvider {
|
|||
static const int32_t AFFIX_NEGATIVE_SUBPATTERN = 0x200;
|
||||
static const int32_t AFFIX_PADDING = 0x400;
|
||||
|
||||
// Convenience compound flags
|
||||
static const int32_t AFFIX_POS_PREFIX = AFFIX_PREFIX;
|
||||
static const int32_t AFFIX_POS_SUFFIX = 0;
|
||||
static const int32_t AFFIX_NEG_PREFIX = AFFIX_PREFIX | AFFIX_NEGATIVE_SUBPATTERN;
|
||||
static const int32_t AFFIX_NEG_SUFFIX = AFFIX_NEGATIVE_SUBPATTERN;
|
||||
|
||||
virtual ~AffixPatternProvider() = default;
|
||||
|
||||
virtual char16_t charAt(int flags, int i) const = 0;
|
||||
|
||||
virtual int length(int flags) const = 0;
|
||||
|
||||
virtual UnicodeString getString(int flags) const = 0;
|
||||
|
||||
virtual bool hasCurrencySign() const = 0;
|
||||
|
||||
virtual bool positiveHasPlusSign() const = 0;
|
||||
|
@ -137,7 +143,7 @@ class U_I18N_API AffixPatternProvider {
|
|||
|
||||
virtual bool negativeHasMinusSign() const = 0;
|
||||
|
||||
virtual bool containsSymbolType(AffixPatternType, UErrorCode &) const = 0;
|
||||
virtual bool containsSymbolType(AffixPatternType, UErrorCode&) const = 0;
|
||||
|
||||
/**
|
||||
* True if the pattern has a number placeholder like "0" or "#,##0.00"; false if the pattern does not
|
||||
|
@ -173,8 +179,8 @@ class U_I18N_API Modifier {
|
|||
* formatted.
|
||||
* @return The number of characters (UTF-16 code units) that were added to the string builder.
|
||||
*/
|
||||
virtual int32_t
|
||||
apply(NumberStringBuilder &output, int leftIndex, int rightIndex, UErrorCode &status) const = 0;
|
||||
virtual int32_t apply(NumberStringBuilder& output, int leftIndex, int rightIndex,
|
||||
UErrorCode& status) const = 0;
|
||||
|
||||
/**
|
||||
* Gets the length of the prefix. This information can be used in combination with {@link #apply} to extract the
|
||||
|
@ -187,7 +193,7 @@ class U_I18N_API Modifier {
|
|||
/**
|
||||
* Returns the number of code points in the modifier, prefix plus suffix.
|
||||
*/
|
||||
virtual int32_t getCodePointCount(UErrorCode &status) const = 0;
|
||||
virtual int32_t getCodePointCount(UErrorCode& status) const = 0;
|
||||
|
||||
/**
|
||||
* Whether this modifier is strong. If a modifier is strong, it should always be applied immediately and not allowed
|
||||
|
@ -230,7 +236,8 @@ class U_I18N_API MicroPropsGenerator {
|
|||
* The MicroProps instance to populate.
|
||||
* @return A MicroProps instance resolved for the quantity.
|
||||
*/
|
||||
virtual void processQuantity(DecimalQuantity& quantity, MicroProps& micros, UErrorCode& status) const = 0;
|
||||
virtual void processQuantity(DecimalQuantity& quantity, MicroProps& micros,
|
||||
UErrorCode& status) const = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -255,24 +262,25 @@ class MultiplierProducer {
|
|||
template<typename T>
|
||||
class U_I18N_API NullableValue {
|
||||
public:
|
||||
NullableValue() : fNull(true) {}
|
||||
NullableValue()
|
||||
: fNull(true) {}
|
||||
|
||||
NullableValue(const NullableValue<T> &other) = default;
|
||||
NullableValue(const NullableValue<T>& other) = default;
|
||||
|
||||
explicit NullableValue(const T &other) {
|
||||
explicit NullableValue(const T& other) {
|
||||
fValue = other;
|
||||
fNull = false;
|
||||
}
|
||||
|
||||
NullableValue<T> &operator=(const NullableValue<T> &other) = default;
|
||||
NullableValue<T>& operator=(const NullableValue<T>& other) = default;
|
||||
|
||||
NullableValue<T> &operator=(const T &other) {
|
||||
NullableValue<T>& operator=(const T& other) {
|
||||
fValue = other;
|
||||
fNull = false;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator==(const NullableValue &other) const {
|
||||
bool operator==(const NullableValue& other) const {
|
||||
// "fValue == other.fValue" returns UBool, not bool (causes compiler warnings)
|
||||
return fNull ? other.fNull : (other.fNull ? false : static_cast<bool>(fValue == other.fValue));
|
||||
}
|
||||
|
@ -286,7 +294,7 @@ class U_I18N_API NullableValue {
|
|||
return fNull;
|
||||
}
|
||||
|
||||
T get(UErrorCode &status) const {
|
||||
T get(UErrorCode& status) const {
|
||||
if (fNull) {
|
||||
status = U_UNDEFINED_VARIABLE;
|
||||
}
|
||||
|
|
|
@ -87,10 +87,10 @@ AffixPatternMatcher AffixPatternMatcherBuilder::build() {
|
|||
|
||||
|
||||
AffixTokenMatcherWarehouse::AffixTokenMatcherWarehouse(const UChar* currencyCode,
|
||||
const UnicodeString& currency1,
|
||||
const UnicodeString& currency2,
|
||||
const DecimalFormatSymbols& dfs,
|
||||
IgnorablesMatcher* ignorables, const Locale& locale)
|
||||
const UnicodeString* currency1,
|
||||
const UnicodeString* currency2,
|
||||
const DecimalFormatSymbols* dfs,
|
||||
IgnorablesMatcher* ignorables, const Locale* locale)
|
||||
: currency1(currency1),
|
||||
currency2(currency2),
|
||||
dfs(dfs),
|
||||
|
@ -109,23 +109,23 @@ AffixTokenMatcherWarehouse::~AffixTokenMatcherWarehouse() {
|
|||
}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::minusSign() {
|
||||
return fMinusSign = {dfs, true};
|
||||
return fMinusSign = {*dfs, true};
|
||||
}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::plusSign() {
|
||||
return fPlusSign = {dfs, true};
|
||||
return fPlusSign = {*dfs, true};
|
||||
}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::percent() {
|
||||
return fPercent = {dfs};
|
||||
return fPercent = {*dfs};
|
||||
}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::permille() {
|
||||
return fPermille = {dfs};
|
||||
return fPermille = {*dfs};
|
||||
}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::currency(UErrorCode& status) {
|
||||
return fCurrency = {{locale, status}, {currencyCode, currency1, currency2}};
|
||||
return fCurrency = {{*locale, status}, {currencyCode, *currency1, *currency2}};
|
||||
}
|
||||
|
||||
NumberParseMatcher& AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp) {
|
||||
|
@ -193,8 +193,232 @@ AffixPatternMatcher AffixPatternMatcher::fromAffixPattern(const UnicodeString& a
|
|||
|
||||
AffixPatternMatcher::AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen,
|
||||
const UnicodeString& pattern)
|
||||
: ArraySeriesMatcher(matchers, matchersLen), fPattern(pattern) {
|
||||
: ArraySeriesMatcher(matchers, matchersLen), fPattern(pattern) {}
|
||||
|
||||
UnicodeString AffixPatternMatcher::getPattern() const {
|
||||
return fPattern.toAliasedUnicodeString();
|
||||
}
|
||||
|
||||
bool AffixPatternMatcher::operator==(const AffixPatternMatcher& other) const {
|
||||
return fPattern == other.fPattern;
|
||||
}
|
||||
|
||||
|
||||
AffixMatcherWarehouse::AffixMatcherWarehouse(const AffixPatternProvider& patternInfo,
|
||||
NumberParserImpl& output,
|
||||
AffixTokenMatcherWarehouse& warehouse,
|
||||
const IgnorablesMatcher& ignorables, parse_flags_t parseFlags,
|
||||
UErrorCode& status)
|
||||
: fAffixTokenMatcherWarehouse(std::move(warehouse)) {
|
||||
if (!isInteresting(patternInfo, ignorables, parseFlags, status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// The affixes have interesting characters, or we are in strict mode.
|
||||
// Use initial capacity of 6, the highest possible number of AffixMatchers.
|
||||
UnicodeString sb;
|
||||
bool includeUnpaired = 0 != (parseFlags & PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES);
|
||||
UNumberSignDisplay signDisplay = (0 != (parseFlags & PARSE_FLAG_PLUS_SIGN_ALLOWED)) ? UNUM_SIGN_ALWAYS
|
||||
: UNUM_SIGN_NEVER;
|
||||
|
||||
int32_t numAffixMatchers = 0;
|
||||
int32_t numAffixPatternMatchers = 0;
|
||||
|
||||
AffixPatternMatcher* posPrefix = nullptr;
|
||||
AffixPatternMatcher* posSuffix = nullptr;
|
||||
|
||||
// Pre-process the affix strings to resolve LDML rules like sign display.
|
||||
for (int8_t signum = 1; signum >= -1; signum--) {
|
||||
// Generate Prefix
|
||||
bool hasPrefix = false;
|
||||
PatternStringUtils::patternInfoToStringBuilder(
|
||||
patternInfo, true, signum, signDisplay, StandardPlural::OTHER, false, sb);
|
||||
fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
|
||||
sb, warehouse, parseFlags, &hasPrefix, status);
|
||||
AffixPatternMatcher* prefix = hasPrefix ? &fAffixPatternMatchers[numAffixPatternMatchers++]
|
||||
: nullptr;
|
||||
|
||||
// Generate Suffix
|
||||
bool hasSuffix = false;
|
||||
PatternStringUtils::patternInfoToStringBuilder(
|
||||
patternInfo, false, signum, signDisplay, StandardPlural::OTHER, false, sb);
|
||||
fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern(
|
||||
sb, warehouse, parseFlags, &hasSuffix, status);
|
||||
AffixPatternMatcher* suffix = hasSuffix ? &fAffixPatternMatchers[numAffixPatternMatchers++]
|
||||
: nullptr;
|
||||
|
||||
if (signum == 1) {
|
||||
posPrefix = prefix;
|
||||
posSuffix = suffix;
|
||||
} else if (equals(prefix, posPrefix) && equals(suffix, posSuffix)) {
|
||||
// Skip adding these matchers (we already have equivalents)
|
||||
continue;
|
||||
}
|
||||
|
||||
// Flags for setting in the ParsedNumber
|
||||
int flags = (signum == -1) ? FLAG_NEGATIVE : 0;
|
||||
|
||||
// Note: it is indeed possible for posPrefix and posSuffix to both be null.
|
||||
// We still need to add that matcher for strict mode to work.
|
||||
fAffixMatchers[numAffixMatchers++] = {prefix, suffix, flags};
|
||||
if (includeUnpaired && prefix != nullptr && suffix != nullptr) {
|
||||
// The following if statements are designed to prevent adding two identical matchers.
|
||||
if (signum == 1 || equals(prefix, posPrefix)) {
|
||||
fAffixMatchers[numAffixMatchers++] = {prefix, nullptr, flags};
|
||||
}
|
||||
if (signum == 1 || equals(suffix, posSuffix)) {
|
||||
fAffixMatchers[numAffixMatchers++] = {nullptr, suffix, flags};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Put the AffixMatchers in order, and then add them to the output.
|
||||
// TODO
|
||||
// Collections.sort(matchers, COMPARATOR);
|
||||
// output.addMatchers(matchers);
|
||||
}
|
||||
|
||||
bool AffixMatcherWarehouse::isInteresting(const AffixPatternProvider& patternInfo,
|
||||
const IgnorablesMatcher& ignorables, parse_flags_t parseFlags,
|
||||
UErrorCode& status) {
|
||||
UnicodeStringCharSequence posPrefixString(patternInfo.getString(AffixPatternProvider::AFFIX_POS_PREFIX));
|
||||
UnicodeStringCharSequence posSuffixString(patternInfo.getString(AffixPatternProvider::AFFIX_POS_SUFFIX));
|
||||
UnicodeStringCharSequence negPrefixString(UnicodeString(u""));
|
||||
UnicodeStringCharSequence negSuffixString(UnicodeString(u""));
|
||||
if (patternInfo.hasNegativeSubpattern()) {
|
||||
negPrefixString = UnicodeStringCharSequence(patternInfo.getString(AffixPatternProvider::AFFIX_NEG_PREFIX));
|
||||
negSuffixString = UnicodeStringCharSequence(patternInfo.getString(AffixPatternProvider::AFFIX_NEG_SUFFIX));
|
||||
}
|
||||
|
||||
if (0 == (parseFlags & PARSE_FLAG_USE_FULL_AFFIXES) &&
|
||||
AffixUtils::containsOnlySymbolsAndIgnorables(posPrefixString, *ignorables.getSet(), status) &&
|
||||
AffixUtils::containsOnlySymbolsAndIgnorables(posSuffixString, *ignorables.getSet(), status) &&
|
||||
AffixUtils::containsOnlySymbolsAndIgnorables(negPrefixString, *ignorables.getSet(), status) &&
|
||||
AffixUtils::containsOnlySymbolsAndIgnorables(negSuffixString, *ignorables.getSet(), status)
|
||||
// HACK: Plus and minus sign are a special case: we accept them trailing only if they are
|
||||
// trailing in the pattern string.
|
||||
&& !AffixUtils::containsType(posSuffixString, TYPE_PLUS_SIGN, status) &&
|
||||
!AffixUtils::containsType(posSuffixString, TYPE_MINUS_SIGN, status) &&
|
||||
!AffixUtils::containsType(negSuffixString, TYPE_PLUS_SIGN, status) &&
|
||||
!AffixUtils::containsType(negSuffixString, TYPE_MINUS_SIGN, status)) {
|
||||
// The affixes contain only symbols and ignorables.
|
||||
// No need to generate affix matchers.
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AffixMatcherWarehouse::equals(const AffixPatternMatcher* lhs, const AffixPatternMatcher* rhs) {
|
||||
if (lhs == nullptr && rhs == nullptr) {
|
||||
return true;
|
||||
}
|
||||
if (lhs == nullptr || rhs == nullptr) {
|
||||
return false;
|
||||
}
|
||||
return *lhs == *rhs;
|
||||
}
|
||||
|
||||
|
||||
AffixMatcher::AffixMatcher(AffixPatternMatcher* prefix, AffixPatternMatcher* suffix, result_flags_t flags)
|
||||
: fPrefix(prefix), fSuffix(suffix), fFlags(flags) {}
|
||||
|
||||
bool AffixMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
|
||||
if (!result.seenNumber()) {
|
||||
// Prefix
|
||||
// Do not match if:
|
||||
// 1. We have already seen a prefix (result.prefix != null)
|
||||
// 2. The prefix in this AffixMatcher is empty (prefix == null)
|
||||
if (!result.prefix.isBogus() || fPrefix == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Attempt to match the prefix.
|
||||
int initialOffset = segment.getOffset();
|
||||
bool maybeMore = fPrefix->match(segment, result, status);
|
||||
if (initialOffset != segment.getOffset()) {
|
||||
result.prefix = fPrefix->getPattern();
|
||||
}
|
||||
return maybeMore;
|
||||
|
||||
} else {
|
||||
// Suffix
|
||||
// Do not match if:
|
||||
// 1. We have already seen a suffix (result.suffix != null)
|
||||
// 2. The suffix in this AffixMatcher is empty (suffix == null)
|
||||
// 3. The matched prefix does not equal this AffixMatcher's prefix
|
||||
if (!result.suffix.isBogus() || fSuffix == nullptr || !matched(fPrefix, result.prefix)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Attempt to match the suffix.
|
||||
int initialOffset = segment.getOffset();
|
||||
bool maybeMore = fSuffix->match(segment, result, status);
|
||||
if (initialOffset != segment.getOffset()) {
|
||||
result.suffix = fSuffix->getPattern();
|
||||
}
|
||||
return maybeMore;
|
||||
}
|
||||
}
|
||||
|
||||
const UnicodeSet& AffixMatcher::getLeadCodePoints() {
|
||||
if (fLocalLeadCodePoints.isNull()) {
|
||||
auto* leadCodePoints = new UnicodeSet();
|
||||
if (fPrefix != nullptr) {
|
||||
leadCodePoints->addAll(fPrefix->getLeadCodePoints());
|
||||
}
|
||||
if (fSuffix != nullptr) {
|
||||
leadCodePoints->addAll(fSuffix->getLeadCodePoints());
|
||||
}
|
||||
leadCodePoints->freeze();
|
||||
fLocalLeadCodePoints.adoptInstead(leadCodePoints);
|
||||
}
|
||||
return *fLocalLeadCodePoints;
|
||||
}
|
||||
|
||||
void AffixMatcher::postProcess(ParsedNumber& result) const {
|
||||
// Check to see if our affix is the one that was matched. If so, set the flags in the result.
|
||||
if (matched(fPrefix, result.prefix) && matched(fSuffix, result.suffix)) {
|
||||
// Fill in the result prefix and suffix with non-null values (empty string).
|
||||
// Used by strict mode to determine whether an entire affix pair was matched.
|
||||
if (result.prefix.isBogus()) {
|
||||
result.prefix = UnicodeString();
|
||||
}
|
||||
if (result.suffix.isBogus()) {
|
||||
result.suffix = UnicodeString();
|
||||
}
|
||||
result.flags |= fFlags;
|
||||
}
|
||||
}
|
||||
|
||||
bool AffixMatcher::matched(const AffixPatternMatcher* affix, const UnicodeString& patternString) {
|
||||
return (affix == nullptr && patternString.isBogus()) ||
|
||||
(affix != nullptr && affix->getPattern() == patternString);
|
||||
}
|
||||
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -19,6 +19,9 @@ namespace impl {
|
|||
class AffixPatternMatcherBuilder;
|
||||
class AffixPatternMatcher;
|
||||
|
||||
using ::icu::number::impl::AffixPatternProvider;
|
||||
using ::icu::number::impl::TokenConsumer;
|
||||
|
||||
|
||||
class CodePointMatcher : public NumberParseMatcher, public UMemory {
|
||||
public:
|
||||
|
@ -51,9 +54,13 @@ class AffixTokenMatcherWarehouse {
|
|||
static constexpr int32_t CODE_POINT_BATCH_SIZE = 10; // Number of entries per heap allocation
|
||||
|
||||
public:
|
||||
AffixTokenMatcherWarehouse(const UChar* currencyCode, const UnicodeString& currency1,
|
||||
const UnicodeString& currency2, const DecimalFormatSymbols& dfs,
|
||||
IgnorablesMatcher* ignorables, const Locale& locale);
|
||||
AffixTokenMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state
|
||||
|
||||
AffixTokenMatcherWarehouse(const UChar* currencyCode, const UnicodeString* currency1,
|
||||
const UnicodeString* currency2, const DecimalFormatSymbols* dfs,
|
||||
IgnorablesMatcher* ignorables, const Locale* locale);
|
||||
|
||||
AffixTokenMatcherWarehouse(AffixTokenMatcherWarehouse&& src) = default;
|
||||
|
||||
~AffixTokenMatcherWarehouse();
|
||||
|
||||
|
@ -70,12 +77,13 @@ class AffixTokenMatcherWarehouse {
|
|||
NumberParseMatcher& nextCodePointMatcher(UChar32 cp);
|
||||
|
||||
private:
|
||||
// NOTE: The following fields may be unsafe to access after construction is done!
|
||||
UChar currencyCode[4];
|
||||
const UnicodeString& currency1;
|
||||
const UnicodeString& currency2;
|
||||
const DecimalFormatSymbols& dfs;
|
||||
const UnicodeString* currency1;
|
||||
const UnicodeString* currency2;
|
||||
const DecimalFormatSymbols* dfs;
|
||||
IgnorablesMatcher* ignorables;
|
||||
const Locale locale;
|
||||
const Locale* locale;
|
||||
|
||||
// NOTE: These are default-constructed and should not be used until initialized.
|
||||
MinusSignMatcher fMinusSign;
|
||||
|
@ -94,7 +102,7 @@ class AffixTokenMatcherWarehouse {
|
|||
};
|
||||
|
||||
|
||||
class AffixPatternMatcherBuilder : public ::icu::number::impl::TokenConsumer {
|
||||
class AffixPatternMatcherBuilder : public TokenConsumer {
|
||||
public:
|
||||
AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherWarehouse& warehouse,
|
||||
IgnorablesMatcher* ignorables);
|
||||
|
@ -119,15 +127,19 @@ class AffixPatternMatcherBuilder : public ::icu::number::impl::TokenConsumer {
|
|||
|
||||
class AffixPatternMatcher : public ArraySeriesMatcher {
|
||||
public:
|
||||
AffixPatternMatcher() = default; // WARNING: Leaves the object in an unusable state
|
||||
|
||||
static AffixPatternMatcher fromAffixPattern(const UnicodeString& affixPattern,
|
||||
AffixTokenMatcherWarehouse& warehouse,
|
||||
parse_flags_t parseFlags, bool* success,
|
||||
UErrorCode& status);
|
||||
|
||||
private:
|
||||
UnicodeString fPattern;
|
||||
UnicodeString getPattern() const;
|
||||
|
||||
AffixPatternMatcher() = default; // WARNING: Leaves the object in an unusable state
|
||||
bool operator==(const AffixPatternMatcher& other) const;
|
||||
|
||||
private:
|
||||
CompactUnicodeString<4> fPattern;
|
||||
|
||||
AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern);
|
||||
|
||||
|
@ -135,6 +147,65 @@ class AffixPatternMatcher : public ArraySeriesMatcher {
|
|||
};
|
||||
|
||||
|
||||
class AffixMatcher : public NumberParseMatcher, public UMemory {
|
||||
public:
|
||||
AffixMatcher() = default; // WARNING: Leaves the object in an unusable state
|
||||
|
||||
AffixMatcher(AffixPatternMatcher* prefix, AffixPatternMatcher* suffix, result_flags_t flags);
|
||||
|
||||
// static void createMatchers() is the constructor for AffixMatcherWarehouse in C++
|
||||
|
||||
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
|
||||
|
||||
void postProcess(ParsedNumber& result) const override;
|
||||
|
||||
const UnicodeSet& getLeadCodePoints() override;
|
||||
|
||||
private:
|
||||
AffixPatternMatcher* fPrefix;
|
||||
AffixPatternMatcher* fSuffix;
|
||||
result_flags_t fFlags;
|
||||
|
||||
/**
|
||||
* Helper method to return whether the given AffixPatternMatcher equals the given pattern string.
|
||||
* Either both arguments must be null or the pattern string inside the AffixPatternMatcher must equal
|
||||
* the given pattern string.
|
||||
*/
|
||||
static bool matched(const AffixPatternMatcher* affix, const UnicodeString& patternString);
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* A C++-only class to retain ownership of the AffixMatchers needed for parsing.
|
||||
*/
|
||||
class AffixMatcherWarehouse {
|
||||
public:
|
||||
AffixMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state
|
||||
|
||||
// in Java, this is AffixMatcher#createMatchers()
|
||||
AffixMatcherWarehouse(const AffixPatternProvider& patternInfo, NumberParserImpl& output,
|
||||
AffixTokenMatcherWarehouse& warehouse, const IgnorablesMatcher& ignorables,
|
||||
parse_flags_t parseFlags, UErrorCode& status);
|
||||
|
||||
private:
|
||||
// 9 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix
|
||||
AffixMatcher fAffixMatchers[9];
|
||||
// 6 is the limit: positive, zero, and negative, a prefix and a suffix for each
|
||||
AffixPatternMatcher fAffixPatternMatchers[6];
|
||||
// Store all the tokens used by the AffixPatternMatchers
|
||||
AffixTokenMatcherWarehouse fAffixTokenMatcherWarehouse;
|
||||
|
||||
static bool isInteresting(const AffixPatternProvider& patternInfo, const IgnorablesMatcher& ignorables,
|
||||
parse_flags_t parseFlags, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Helper method to return whether (1) both lhs and rhs are null/invalid, or (2) if they are both
|
||||
* valid, whether they are equal according to operator==. Similar to Java Objects.equals()
|
||||
*/
|
||||
static bool equals(const AffixPatternMatcher* lhs, const AffixPatternMatcher* rhs);
|
||||
};
|
||||
|
||||
|
||||
} // namespace impl
|
||||
} // namespace numparse
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -23,7 +23,7 @@ SymbolMatcher::SymbolMatcher(const UnicodeString& symbolString, unisets::Key key
|
|||
}
|
||||
}
|
||||
|
||||
const UnicodeSet* SymbolMatcher::getSet() {
|
||||
const UnicodeSet* SymbolMatcher::getSet() const {
|
||||
return fUniSet;
|
||||
}
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ class SymbolMatcher : public NumberParseMatcher, public UMemory {
|
|||
public:
|
||||
SymbolMatcher() = default; // WARNING: Leaves the object in an unusable state
|
||||
|
||||
const UnicodeSet* getSet();
|
||||
const UnicodeSet* getSet() const;
|
||||
|
||||
bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override;
|
||||
|
||||
|
|
|
@ -48,11 +48,35 @@ enum ParseFlags {
|
|||
PARSE_FLAG_PLUS_SIGN_ALLOWED = 0x0400,
|
||||
};
|
||||
|
||||
//template<typename T>
|
||||
//struct MaybeNeedsAdoption {
|
||||
// T* ptr;
|
||||
// bool needsAdoption;
|
||||
//};
|
||||
|
||||
// TODO: Is this class worthwhile?
|
||||
template<int32_t stackCapacity>
|
||||
class CompactUnicodeString {
|
||||
public:
|
||||
CompactUnicodeString() {
|
||||
static_assert(stackCapacity > 0, "cannot have zero space on stack");
|
||||
fBuffer[0] = 0;
|
||||
}
|
||||
|
||||
CompactUnicodeString(const UnicodeString& text)
|
||||
: fBuffer(text.length() + 1) {
|
||||
memcpy(fBuffer.getAlias(), text.getBuffer(), sizeof(UChar) * text.length());
|
||||
fBuffer[text.length()] = 0;
|
||||
}
|
||||
|
||||
inline UnicodeString toAliasedUnicodeString() const {
|
||||
return UnicodeString(TRUE, fBuffer.getAlias(), -1);
|
||||
}
|
||||
|
||||
bool operator==(const CompactUnicodeString& other) const {
|
||||
// Use the alias-only constructor and then call UnicodeString operator==
|
||||
return toAliasedUnicodeString() == other.toAliasedUnicodeString();
|
||||
}
|
||||
|
||||
private:
|
||||
MaybeStackArray<UChar, stackCapacity> fBuffer;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Struct-like class to hold the results of a parsing routine.
|
||||
|
|
|
@ -215,8 +215,12 @@ void NumberParserTest::testSeriesMatcher() {
|
|||
void NumberParserTest::testCurrencyAnyMatcher() {
|
||||
IcuTestErrorCode status(*this, "testCurrencyAnyMatcher");
|
||||
|
||||
UnicodeString currency1(u"IU$");
|
||||
UnicodeString currency2(u"ICU");
|
||||
DecimalFormatSymbols symbols("en", status);
|
||||
IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
|
||||
AffixTokenMatcherWarehouse warehouse(u"ICU", u"IU$", u"ICU", {"en",status}, &ignorables, "en");
|
||||
Locale locale("en");
|
||||
AffixTokenMatcherWarehouse warehouse(u"ICU", ¤cy1, ¤cy2, &symbols, &ignorables, &locale);
|
||||
NumberParseMatcher& matcher = warehouse.currency(status);
|
||||
|
||||
static const struct TestCase{
|
||||
|
@ -248,8 +252,12 @@ void NumberParserTest::testCurrencyAnyMatcher() {
|
|||
void NumberParserTest::testAffixPatternMatcher() {
|
||||
IcuTestErrorCode status(*this, "testAffixPatternMatcher");
|
||||
|
||||
UnicodeString currency1(u"foo");
|
||||
UnicodeString currency2(u"bar");
|
||||
DecimalFormatSymbols symbols("en", status);
|
||||
IgnorablesMatcher ignorables(unisets::DEFAULT_IGNORABLES);
|
||||
AffixTokenMatcherWarehouse warehouse(u"EUR", u"foo", u"bar", {"en", status}, &ignorables, "en");
|
||||
Locale locale("en");
|
||||
AffixTokenMatcherWarehouse warehouse(u"EUR", ¤cy1, ¤cy2, &symbols, &ignorables, &locale);
|
||||
|
||||
static const struct TestCase {
|
||||
bool exactMatch;
|
||||
|
|
Loading…
Add table
Reference in a new issue