ICU-11276 Plural ranges loaded from data; first implementations of ModifierStore.

This commit is contained in:
Shane Carr 2018-09-14 00:53:32 -07:00
parent e8d2ec8531
commit 18431084c2
No known key found for this signature in database
GPG key ID: FCED3B24AAB18B5C
8 changed files with 220 additions and 32 deletions

View file

@ -265,4 +265,8 @@ void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps &mic
micros.modOuter = &fModifiers[utils::getStandardPlural(rules, copy)];
}
const Modifier* LongNameHandler::getModifier(int8_t /*signum*/, StandardPlural::Form plural) const {
return &fModifiers[plural];
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -14,7 +14,7 @@
U_NAMESPACE_BEGIN namespace number {
namespace impl {
class LongNameHandler : public MicroPropsGenerator, public UMemory {
class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public UMemory {
public:
static LongNameHandler
forCurrencyLongNames(const Locale &loc, const CurrencyUnit &currency, const PluralRules *rules,
@ -28,6 +28,8 @@ class LongNameHandler : public MicroPropsGenerator, public UMemory {
void
processQuantity(DecimalQuantity &quantity, MicroProps &micros, UErrorCode &status) const U_OVERRIDE;
const Modifier* getModifier(int8_t signum, StandardPlural::Form plural) const U_OVERRIDE;
private:
SimpleModifier fModifiers[StandardPlural::Form::COUNT];
const PluralRules *rules;

View file

@ -53,6 +53,12 @@ void U_CALLCONV initDefaultCurrencySpacing(UErrorCode &status) {
Modifier::~Modifier() = default;
Modifier::Parameters Modifier::Parameters::getBogus() {
Modifier::Parameters result;
result.obj = nullptr;
return result;
}
ModifierStore::~ModifierStore() = default;
AdoptingModifierStore::~AdoptingModifierStore() {
@ -62,6 +68,14 @@ AdoptingModifierStore::~AdoptingModifierStore() {
}
ModifierWithParameters::ModifierWithParameters(const Modifier::Parameters& parameters)
: fParameters(parameters) {}
void ModifierWithParameters::getParameters(Parameters& output) const {
output = fParameters;
}
int32_t ConstantAffixModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
UErrorCode &status) const {
// Insert the suffix first since inserting the prefix will change the rightIndex
@ -108,7 +122,12 @@ bool ConstantAffixModifier::semanticallyEquivalent(const Modifier& other) const
SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong)
: fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong) {
: SimpleModifier(simpleFormatter, field, strong, Modifier::Parameters::getBogus()) {}
SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong,
const Modifier::Parameters parameters)
: ModifierWithParameters(parameters),
fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong) {
int32_t argLimit = SimpleFormatter::getArgumentLimit(
fCompiledPattern.getBuffer(), fCompiledPattern.length());
if (argLimit == 0) {
@ -140,7 +159,8 @@ SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field fie
}
SimpleModifier::SimpleModifier()
: fField(UNUM_FIELD_COUNT), fStrong(false), fPrefixLength(0), fSuffixLength(0) {
: ModifierWithParameters(Modifier::Parameters::getBogus()),
fField(UNUM_FIELD_COUNT), fStrong(false), fPrefixLength(0), fSuffixLength(0) {
}
int32_t SimpleModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
@ -174,12 +194,6 @@ bool SimpleModifier::containsField(UNumberFormatFields field) const {
return false;
}
void SimpleModifier::getParameters(Parameters& output) const {
(void)output;
// This method is not currently used.
U_ASSERT(false);
}
bool SimpleModifier::semanticallyEquivalent(const Modifier& other) const {
auto* _other = dynamic_cast<const SimpleModifier*>(&other);
if (_other == nullptr) {
@ -303,12 +317,6 @@ bool ConstantMultiFieldModifier::containsField(UNumberFormatFields field) const
return fPrefix.containsField(field) || fSuffix.containsField(field);
}
void ConstantMultiFieldModifier::getParameters(Parameters& output) const {
(void)output;
// This method is not currently used.
U_ASSERT(false);
}
bool ConstantMultiFieldModifier::semanticallyEquivalent(const Modifier& other) const {
auto* _other = dynamic_cast<const ConstantMultiFieldModifier*>(&other);
if (_other == nullptr) {

View file

@ -18,6 +18,19 @@
U_NAMESPACE_BEGIN namespace number {
namespace impl {
/**
* A base class for modifiers that need to be able to keep a reference to a ModifierStore.
*/
class U_I18N_API ModifierWithParameters : public Modifier, public UMemory {
public:
ModifierWithParameters(const Modifier::Parameters& parameters);
void getParameters(Parameters& output) const U_OVERRIDE;
private:
Modifier::Parameters fParameters;
};
/**
* The canonical implementation of {@link Modifier}, containing a prefix and suffix string.
* TODO: This is not currently being used by real code and could be removed.
@ -54,10 +67,13 @@ class U_I18N_API ConstantAffixModifier : public Modifier, public UObject {
* The second primary implementation of {@link Modifier}, this one consuming a {@link SimpleFormatter}
* pattern.
*/
class U_I18N_API SimpleModifier : public Modifier, public UMemory {
class U_I18N_API SimpleModifier : public ModifierWithParameters {
public:
SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong);
SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong,
const Modifier::Parameters parameters);
// Default constructor for LongNameHandler.h
SimpleModifier();
@ -72,8 +88,6 @@ class U_I18N_API SimpleModifier : public Modifier, public UMemory {
bool containsField(UNumberFormatFields field) const U_OVERRIDE;
void getParameters(Parameters& output) const U_OVERRIDE;
bool semanticallyEquivalent(const Modifier& other) const U_OVERRIDE;
/**
@ -129,18 +143,27 @@ class U_I18N_API SimpleModifier : public Modifier, public UMemory {
* An implementation of {@link Modifier} that allows for multiple types of fields in the same modifier. Constructed
* based on the contents of two {@link NumberStringBuilder} instances (one for the prefix, one for the suffix).
*/
class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory {
class U_I18N_API ConstantMultiFieldModifier :public ModifierWithParameters {
public:
ConstantMultiFieldModifier(
const NumberStringBuilder &prefix,
const NumberStringBuilder &suffix,
bool overwrite,
bool strong)
: fPrefix(prefix),
bool strong,
const Modifier::Parameters parameters)
: ModifierWithParameters(parameters),
fPrefix(prefix),
fSuffix(suffix),
fOverwrite(overwrite),
fStrong(strong) {}
ConstantMultiFieldModifier(
const NumberStringBuilder &prefix,
const NumberStringBuilder &suffix,
bool overwrite,
bool strong)
: ConstantMultiFieldModifier(prefix, suffix, overwrite, strong, Modifier::Parameters::getBogus()) {}
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
UErrorCode &status) const U_OVERRIDE;
@ -152,8 +175,6 @@ class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory {
bool containsField(UNumberFormatFields field) const U_OVERRIDE;
void getParameters(Parameters& output) const U_OVERRIDE;
bool semanticallyEquivalent(const Modifier& other) const U_OVERRIDE;
protected:

View file

@ -101,9 +101,8 @@ bool ScientificModifier::containsField(UNumberFormatFields field) const {
}
void ScientificModifier::getParameters(Parameters& output) const {
(void)output;
// This method is not used for inner modifiers.
U_ASSERT(false);
// Not part of any plural sets
output.obj = nullptr;
}
bool ScientificModifier::semanticallyEquivalent(const Modifier& other) const {

View file

@ -194,6 +194,8 @@ class U_I18N_API Modifier {
const ModifierStore* obj = nullptr;
int8_t signum;
StandardPlural::Form plural;
static Parameters getBogus();
};
/**

View file

@ -26,6 +26,11 @@ constexpr int8_t identity2d(UNumberRangeIdentityFallback a, UNumberRangeIdentity
}
struct NumberRangeData {
SimpleFormatter rangePattern;
SimpleFormatter approximatelyPattern;
};
class NumberRangeDataSink : public ResourceSink {
public:
NumberRangeDataSink(NumberRangeData& data) : fData(data) {}
@ -77,9 +82,94 @@ void getNumberRangeData(const char* localeName, const char* nsName, NumberRangeD
}
}
class PluralRangesDataSink : public ResourceSink {
public:
PluralRangesDataSink(StandardPluralRanges& output) : fOutput(output) {}
void put(const char* /*key*/, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE {
ResourceArray entriesArray = value.getArray(status);
if (U_FAILURE(status)) { return; }
fOutput.setCapacity(entriesArray.getSize());
for (int i = 0; entriesArray.getValue(i, value); i++) {
ResourceArray pluralFormsArray = value.getArray(status);
if (U_FAILURE(status)) { return; }
pluralFormsArray.getValue(0, value);
StandardPlural::Form first = StandardPlural::fromString(value.getUnicodeString(status), status);
if (U_FAILURE(status)) { return; }
pluralFormsArray.getValue(1, value);
StandardPlural::Form second = StandardPlural::fromString(value.getUnicodeString(status), status);
if (U_FAILURE(status)) { return; }
pluralFormsArray.getValue(2, value);
StandardPlural::Form result = StandardPlural::fromString(value.getUnicodeString(status), status);
if (U_FAILURE(status)) { return; }
fOutput.addPluralRange(first, second, result);
}
}
private:
StandardPluralRanges& fOutput;
};
void getPluralRangesData(const Locale& locale, StandardPluralRanges& output, UErrorCode& status) {
if (U_FAILURE(status)) { return; }
LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "pluralRanges", &status));
if (U_FAILURE(status)) { return; }
CharString dataPath;
dataPath.append("locales/", -1, status);
dataPath.append(locale.getLanguage(), -1, status);
if (U_FAILURE(status)) { return; }
int32_t setLen;
// Not all languages are covered: fail gracefully
UErrorCode internalStatus = U_ZERO_ERROR;
const UChar* set = ures_getStringByKey(rb.getAlias(), dataPath.data(), &setLen, &internalStatus);
if (U_FAILURE(internalStatus)) { return; }
dataPath.clear();
dataPath.append("rules/", -1, status);
dataPath.appendInvariantChars(set, setLen, status);
if (U_FAILURE(status)) { return; }
PluralRangesDataSink sink(output);
ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, status);
if (U_FAILURE(status)) { return; }
}
} // namespace
void StandardPluralRanges::initialize(const Locale& locale, UErrorCode& status) {
getPluralRangesData(locale, *this, status);
}
void StandardPluralRanges::addPluralRange(
StandardPlural::Form first,
StandardPlural::Form second,
StandardPlural::Form result) {
U_ASSERT(fTriplesLen < fTriples.getCapacity());
fTriples[fTriplesLen] = {first, second, result};
fTriplesLen++;
}
void StandardPluralRanges::setCapacity(int32_t length) {
if (length > fTriples.getCapacity()) {
fTriples.resize(length, 0);
}
}
StandardPlural::Form
StandardPluralRanges::resolve(StandardPlural::Form first, StandardPlural::Form second) const {
for (int32_t i=0; i<fTriplesLen; i++) {
const auto& triple = fTriples[i];
if (triple.first == first && triple.second == second) {
return triple.result;
}
}
// Default fallback
return StandardPlural::OTHER;
}
NumberRangeFormatterImpl::NumberRangeFormatterImpl(const RangeMacroProps& macros, UErrorCode& status)
: formatterImpl1(macros.formatter1.fMacros, status),
formatterImpl2(macros.formatter2.fMacros, status),
@ -98,6 +188,10 @@ NumberRangeFormatterImpl::NumberRangeFormatterImpl(const RangeMacroProps& macros
if (U_FAILURE(status)) { return; }
fRangeFormatter = data.rangePattern;
fApproximatelyModifier = {data.approximatelyPattern, UNUM_FIELD_COUNT, false};
// TODO: Get locale from PluralRules instead?
fPluralRanges.initialize(macros.locale, status);
if (U_FAILURE(status)) { return; }
}
void NumberRangeFormatterImpl::format(UFormattedNumberRangeData& data, bool equalBeforeRounding, UErrorCode& status) const {
@ -320,7 +414,8 @@ void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data,
if (collapseInner) {
// Note: this is actually a mix of prefix and suffix, but adding to infix length works
lengthInfix += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
const Modifier& mod = resolveModifierPlurals(*micros1.modInner, *micros2.modInner);
lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
} else {
length1 += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
@ -328,7 +423,8 @@ void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data,
if (collapseMiddle) {
// Note: this is actually a mix of prefix and suffix, but adding to infix length works
lengthInfix += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
const Modifier& mod = resolveModifierPlurals(*micros1.modMiddle, *micros2.modMiddle);
lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
} else {
length1 += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
@ -336,7 +432,8 @@ void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data,
if (collapseOuter) {
// Note: this is actually a mix of prefix and suffix, but adding to infix length works
lengthInfix += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
const Modifier& mod = resolveModifierPlurals(*micros1.modOuter, *micros2.modOuter);
lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
} else {
length1 += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
@ -344,5 +441,32 @@ void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data,
}
const Modifier&
NumberRangeFormatterImpl::resolveModifierPlurals(const Modifier& first, const Modifier& second) const {
Modifier::Parameters parameters;
first.getParameters(parameters);
if (parameters.obj == nullptr) {
// No plural form; return a fallback (e.g., the first)
return first;
}
StandardPlural::Form firstPlural = parameters.plural;
second.getParameters(parameters);
if (parameters.obj == nullptr) {
// No plural form; return a fallback (e.g., the first)
return first;
}
StandardPlural::Form secondPlural = parameters.plural;
// Get the required plural form from data
StandardPlural::Form resultPlural = fPluralRanges.resolve(firstPlural, secondPlural);
// Get and return the new Modifier
const Modifier* mod = parameters.obj->getModifier(parameters.signum, resultPlural);
U_ASSERT(mod != nullptr);
return *mod;
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -44,9 +44,33 @@ struct UFormattedNumberRangeData : public UMemory {
};
struct NumberRangeData {
SimpleFormatter rangePattern;
SimpleFormatter approximatelyPattern;
class StandardPluralRanges : public UMemory {
public:
void initialize(const Locale& locale, UErrorCode& status);
StandardPlural::Form resolve(StandardPlural::Form first, StandardPlural::Form second) const;
/** Used for data loading. */
void addPluralRange(
StandardPlural::Form first,
StandardPlural::Form second,
StandardPlural::Form result);
/** Used for data loading. */
void setCapacity(int32_t length);
private:
struct StandardPluralRangeTriple {
StandardPlural::Form first;
StandardPlural::Form second;
StandardPlural::Form result;
};
// TODO: An array is simple here, but it results in linear lookup time.
// Certain locales have 20-30 entries in this list.
// Consider changing to a smarter data structure.
typedef MaybeStackArray<StandardPluralRangeTriple, 3> PluralRangeTriples;
PluralRangeTriples fTriples;
int32_t fTriplesLen = 0;
};
@ -67,6 +91,8 @@ class NumberRangeFormatterImpl : public UMemory {
SimpleFormatter fRangeFormatter;
SimpleModifier fApproximatelyModifier;
StandardPluralRanges fPluralRanges;
void formatSingleValue(UFormattedNumberRangeData& data,
MicroProps& micros1, MicroProps& micros2,
UErrorCode& status) const;
@ -78,6 +104,8 @@ class NumberRangeFormatterImpl : public UMemory {
void formatRange(UFormattedNumberRangeData& data,
MicroProps& micros1, MicroProps& micros2,
UErrorCode& status) const;
const Modifier& resolveModifierPlurals(const Modifier& first, const Modifier& second) const;
};