From eda8266715192abf101ffa0d08b1379e85429ef9 Mon Sep 17 00:00:00 2001 From: George Rhoten Date: Mon, 18 Aug 2014 20:47:36 +0000 Subject: [PATCH] ICU-11064 Add syntax for PluralFormat in RBNF X-SVN-Rev: 36197 --- icu4c/source/i18n/nfrs.cpp | 10 +- icu4c/source/i18n/nfrs.h | 64 +++---- icu4c/source/i18n/nfrule.cpp | 266 ++++++++++++++------------ icu4c/source/i18n/nfrule.h | 15 +- icu4c/source/i18n/nfsubs.cpp | 60 +++--- icu4c/source/i18n/nfsubs.h | 6 +- icu4c/source/i18n/plurfmt.cpp | 74 ++++++- icu4c/source/i18n/rbnf.cpp | 24 ++- icu4c/source/i18n/unicode/plurfmt.h | 7 +- icu4c/source/i18n/unicode/rbnf.h | 19 ++ icu4c/source/test/intltest/itrbnf.cpp | 140 +++++++++++--- icu4c/source/test/intltest/itrbnf.h | 7 +- 12 files changed, 464 insertions(+), 228 deletions(-) diff --git a/icu4c/source/i18n/nfrs.cpp b/icu4c/source/i18n/nfrs.cpp index 6cc0c638188..1a4bf493f3e 100644 --- a/icu4c/source/i18n/nfrs.cpp +++ b/icu4c/source/i18n/nfrs.cpp @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 1997-2012, International Business Machines +* Copyright (C) 1997-2014, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * file name: nfrs.cpp @@ -335,7 +335,7 @@ NFRuleSet::operator==(const NFRuleSet& rhs) const #define RECURSION_LIMIT 50 void -NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos) const +NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, UErrorCode& status) const { NFRule *rule = findNormalRule(number); if (rule) { // else error, but can't report it @@ -344,14 +344,14 @@ NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos) const // stop recursion ncThis->fRecursionCount = 0; } else { - rule->doFormat(number, toAppendTo, pos); + rule->doFormat(number, toAppendTo, pos, status); ncThis->fRecursionCount--; } } } void -NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const +NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, UErrorCode& status) const { NFRule *rule = findDoubleRule(number); if (rule) { // else error, but can't report it @@ -360,7 +360,7 @@ NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const // stop recursion ncThis->fRecursionCount = 0; } else { - rule->doFormat(number, toAppendTo, pos); + rule->doFormat(number, toAppendTo, pos, status); ncThis->fRecursionCount--; } } diff --git a/icu4c/source/i18n/nfrs.h b/icu4c/source/i18n/nfrs.h index 9bac043f76f..93498ee2461 100644 --- a/icu4c/source/i18n/nfrs.h +++ b/icu4c/source/i18n/nfrs.h @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 1997-2012, International Business Machines +* Copyright (C) 1997-2014, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * file name: nfrs.h @@ -29,49 +29,49 @@ U_NAMESPACE_BEGIN class NFRuleSet : public UMemory { - public: - NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status); - void parseRules(UnicodeString& rules, const RuleBasedNumberFormat* owner, UErrorCode& status); - void makeIntoFractionRuleSet() { fIsFractionRuleSet = TRUE; } +public: + NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status); + void parseRules(UnicodeString& rules, const RuleBasedNumberFormat* owner, UErrorCode& status); + void makeIntoFractionRuleSet() { fIsFractionRuleSet = TRUE; } - ~NFRuleSet(); + ~NFRuleSet(); - UBool operator==(const NFRuleSet& rhs) const; - UBool operator!=(const NFRuleSet& rhs) const { return !operator==(rhs); } + UBool operator==(const NFRuleSet& rhs) const; + UBool operator!=(const NFRuleSet& rhs) const { return !operator==(rhs); } - UBool isPublic() const { return fIsPublic; } + UBool isPublic() const { return fIsPublic; } - UBool isParseable() const { return fIsParseable; } + UBool isParseable() const { return fIsParseable; } - UBool isFractionRuleSet() const { return fIsFractionRuleSet; } + UBool isFractionRuleSet() const { return fIsFractionRuleSet; } - void getName(UnicodeString& result) const { result.setTo(name); } - UBool isNamed(const UnicodeString& _name) const { return this->name == _name; } + void getName(UnicodeString& result) const { result.setTo(name); } + UBool isNamed(const UnicodeString& _name) const { return this->name == _name; } - void format(int64_t number, UnicodeString& toAppendTo, int32_t pos) const; - void format(double number, UnicodeString& toAppendTo, int32_t pos) const; + void format(int64_t number, UnicodeString& toAppendTo, int32_t pos, UErrorCode& status) const; + void format(double number, UnicodeString& toAppendTo, int32_t pos, UErrorCode& status) const; - UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const; + UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const; - void appendRules(UnicodeString& result) const; // toString + void appendRules(UnicodeString& result) const; // toString - private: - NFRule * findNormalRule(int64_t number) const; - NFRule * findDoubleRule(double number) const; - NFRule * findFractionRuleSetRule(double number) const; +private: + NFRule * findNormalRule(int64_t number) const; + NFRule * findDoubleRule(double number) const; + NFRule * findFractionRuleSetRule(double number) const; - private: - UnicodeString name; - NFRuleList rules; - NFRule *negativeNumberRule; - NFRule *fractionRules[3]; - UBool fIsFractionRuleSet; - UBool fIsPublic; - UBool fIsParseable; - int32_t fRecursionCount; +private: + UnicodeString name; + NFRuleList rules; + NFRule *negativeNumberRule; + NFRule *fractionRules[3]; + UBool fIsFractionRuleSet; + UBool fIsPublic; + UBool fIsParseable; + int32_t fRecursionCount; - NFRuleSet(const NFRuleSet &other); // forbid copying of this class - NFRuleSet &operator=(const NFRuleSet &other); // forbid copying of this class + NFRuleSet(const NFRuleSet &other); // forbid copying of this class + NFRuleSet &operator=(const NFRuleSet &other); // forbid copying of this class }; // utilities from old llong.h diff --git a/icu4c/source/i18n/nfrule.cpp b/icu4c/source/i18n/nfrule.cpp index ddf86c18591..c51774b92c9 100644 --- a/icu4c/source/i18n/nfrule.cpp +++ b/icu4c/source/i18n/nfrule.cpp @@ -20,6 +20,8 @@ #include "unicode/localpointer.h" #include "unicode/rbnf.h" #include "unicode/tblcoll.h" +#include "unicode/plurfmt.h" +#include "unicode/upluralrules.h" #include "unicode/coleitr.h" #include "unicode/uchar.h" #include "nfrs.h" @@ -37,13 +39,17 @@ NFRule::NFRule(const RuleBasedNumberFormat* _rbnf) , sub1(NULL) , sub2(NULL) , formatter(_rbnf) + , rulePatternFormat(NULL) { } NFRule::~NFRule() { - delete sub1; - delete sub2; + if (sub1 != sub2) { + delete sub2; + } + delete sub1; + delete rulePatternFormat; } static const UChar gLeftBracket = 0x005b; @@ -66,6 +72,9 @@ static const UChar gXDotX[] = {0x78, 0x2E, 0x78, 0}; /* "x.x" static const UChar gXDotZero[] = {0x78, 0x2E, 0x30, 0}; /* "x.0" */ static const UChar gZeroDotX[] = {0x30, 0x2E, 0x78, 0}; /* "0.x" */ +static const UChar gDollarOpenParenthesis[] = {0x24, 0x28, 0}; /* "$(" */ +static const UChar gClosedParenthesis[] = {0x29, 0}; /* ")" */ + static const UChar gLessLess[] = {0x3C, 0x3C, 0}; /* "<<" */ static const UChar gLessPercent[] = {0x3C, 0x25, 0}; /* "<%" */ static const UChar gLessHash[] = {0x3C, 0x23, 0}; /* "<#" */ @@ -117,8 +126,7 @@ NFRule::makeRules(UnicodeString& description, if (brack1 == -1 || brack2 == -1 || brack1 > brack2 || rule1->getType() == kProperFractionRule || rule1->getType() == kNegativeNumberRule) { - rule1->ruleText = description; - rule1->extractSubstitutions(ruleSet, predecessor, rbnf, status); + rule1->extractSubstitutions(ruleSet, description, predecessor, status); rules.add(rule1); } else { // if the description does contain a matched pair of brackets, @@ -178,8 +186,7 @@ NFRule::makeRules(UnicodeString& description, if (brack2 + 1 < description.length()) { sbuf.append(description, brack2 + 1, description.length() - brack2 - 1); } - rule2->ruleText.setTo(sbuf); - rule2->extractSubstitutions(ruleSet, predecessor, rbnf, status); + rule2->extractSubstitutions(ruleSet, sbuf, predecessor, status); } // rule1's text includes the text in the brackets but omits @@ -190,8 +197,7 @@ NFRule::makeRules(UnicodeString& description, if (brack2 + 1 < description.length()) { sbuf.append(description, brack2 + 1, description.length() - brack2 - 1); } - rule1->ruleText.setTo(sbuf); - rule1->extractSubstitutions(ruleSet, predecessor, rbnf, status); + rule1->extractSubstitutions(ruleSet, sbuf, predecessor, status); // if we only have one rule, return it; if we have two, return // a two-element array containing them (notice that rule2 goes @@ -370,13 +376,43 @@ NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status) */ void NFRule::extractSubstitutions(const NFRuleSet* ruleSet, + const UnicodeString &ruleText, const NFRule* predecessor, - const RuleBasedNumberFormat* rbnf, UErrorCode& status) { - if (U_SUCCESS(status)) { - sub1 = extractSubstitution(ruleSet, predecessor, rbnf, status); - sub2 = extractSubstitution(ruleSet, predecessor, rbnf, status); + if (U_FAILURE(status)) { + return; + } + this->ruleText = ruleText; + this->rulePatternFormat = NULL; + sub1 = extractSubstitution(ruleSet, predecessor, status); + if (sub1 == NULL || sub1->isNullSubstitution()) { + // Small optimization. There is no need to create a redundant NullSubstitution. + sub2 = sub1; + } + else { + sub2 = extractSubstitution(ruleSet, predecessor, status); + } + if (this->ruleText.startsWith(gDollarOpenParenthesis, -1) && this->ruleText.endsWith(gClosedParenthesis, -1)) { + int32_t endType = this->ruleText.indexOf(gComma); + if (endType < 0) { + status = U_PARSE_ERROR; + return; + } + UnicodeString type(this->ruleText.tempSubString(2, endType - 2)); + UPluralType pluralType; + if (type.startsWith(UNICODE_STRING_SIMPLE("cardinal"))) { + pluralType = UPLURAL_TYPE_CARDINAL; + } + else if (type.startsWith(UNICODE_STRING_SIMPLE("ordinal"))) { + pluralType = UPLURAL_TYPE_ORDINAL; + } + else { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + rulePatternFormat = formatter->createPluralFormat(pluralType, + this->ruleText.tempSubString(endType + 1, this->ruleText.length() - 2 - endType), status); } } @@ -395,7 +431,6 @@ NFRule::extractSubstitutions(const NFRuleSet* ruleSet, NFSubstitution * NFRule::extractSubstitution(const NFRuleSet* ruleSet, const NFRule* predecessor, - const RuleBasedNumberFormat* rbnf, UErrorCode& status) { NFSubstitution* result = NULL; @@ -409,7 +444,7 @@ NFRule::extractSubstitution(const NFRuleSet* ruleSet, // at the end of the rule text if (subStart == -1) { return NFSubstitution::makeSubstitution(ruleText.length(), this, predecessor, - ruleSet, rbnf, UnicodeString(), status); + ruleSet, this->formatter, UnicodeString(), status); } // special-case the ">>>" token, since searching for the > at the @@ -437,7 +472,7 @@ NFRule::extractSubstitution(const NFRuleSet* ruleSet, // at the end of the rule if (subEnd == -1) { return NFSubstitution::makeSubstitution(ruleText.length(), this, predecessor, - ruleSet, rbnf, UnicodeString(), status); + ruleSet, this->formatter, UnicodeString(), status); } // if we get here, we have a real substitution token (or at least @@ -446,7 +481,7 @@ NFRule::extractSubstitution(const NFRuleSet* ruleSet, UnicodeString subToken; subToken.setTo(ruleText, subStart, subEnd + 1 - subStart); result = NFSubstitution::makeSubstitution(subStart, this, predecessor, ruleSet, - rbnf, subToken, status); + this->formatter, subToken, status); // remove the substitution from the rule text ruleText.removeBetween(subStart, subEnd+1); @@ -645,16 +680,27 @@ NFRule::_appendRuleText(UnicodeString& result) const * should be inserted */ void -NFRule::doFormat(int64_t number, UnicodeString& toInsertInto, int32_t pos) const +NFRule::doFormat(int64_t number, UnicodeString& toInsertInto, int32_t pos, UErrorCode& status) const { // first, insert the rule's rule text into toInsertInto at the // specified position, then insert the results of the substitutions // into the right places in toInsertInto (notice we do the // substitutions in reverse order so that the offsets don't get // messed up) - toInsertInto.insert(pos, ruleText); - sub2->doSubstitution(number, toInsertInto, pos); - sub1->doSubstitution(number, toInsertInto, pos); + if (!rulePatternFormat) { + toInsertInto.insert(pos, ruleText); + } + else { + toInsertInto.insert(pos, + rulePatternFormat->format((double)(baseValue == 0 ? number : number/baseValue), status)); + } + + if (!sub2->isNullSubstitution()) { + sub2->doSubstitution(number, toInsertInto, pos, status); + } + if (!sub1->isNullSubstitution()) { + sub1->doSubstitution(number, toInsertInto, pos, status); + } } /** @@ -667,7 +713,7 @@ NFRule::doFormat(int64_t number, UnicodeString& toInsertInto, int32_t pos) const * should be inserted */ void -NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos) const +NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos, UErrorCode& status) const { // first, insert the rule's rule text into toInsertInto at the // specified position, then insert the results of the substitutions @@ -675,9 +721,19 @@ NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos) const // [again, we have two copies of this routine that do the same thing // so that we don't sacrifice precision in a long by casting it // to a double] - toInsertInto.insert(pos, ruleText); - sub2->doSubstitution(number, toInsertInto, pos); - sub1->doSubstitution(number, toInsertInto, pos); + if (!rulePatternFormat) { + toInsertInto.insert(pos, ruleText); + } + else { + toInsertInto.insert(pos, + rulePatternFormat->format(baseValue == 0 ? number : number/baseValue, status)); + } + if (!sub2->isNullSubstitution()) { + sub2->doSubstitution(number, toInsertInto, pos, status); + } + if (!sub1->isNullSubstitution()) { + sub1->doSubstitution(number, toInsertInto, pos, status); + } } /** @@ -1309,107 +1365,75 @@ NFRule::findText(const UnicodeString& str, int32_t startingAt, int32_t* length) const { -#if !UCONFIG_NO_COLLATION - // if lenient parsing is turned off, this is easy: just call - // String.indexOf() and we're done - if (!formatter->isLenient()) { - *length = key.length(); - return str.indexOf(key, startingAt); - - // but if lenient parsing is turned ON, we've got some work - // ahead of us - } else -#endif - { - //---------------------------------------------------------------- - // JDK 1.1 HACK (take out of 1.2-specific code) - - // in JDK 1.2, CollationElementIterator provides us with an - // API to map between character offsets and collation elements - // and we can do this by marching through the string comparing - // collation elements. We can't do that in JDK 1.1. Insted, - // we have to go through this horrible slow mess: - int32_t p = startingAt; - int32_t keyLen = 0; - - // basically just isolate smaller and smaller substrings of - // the target string (each running to the end of the string, - // and with the first one running from startingAt to the end) - // and then use prefixLength() to see if the search key is at - // the beginning of each substring. This is excruciatingly - // slow, but it will locate the key and tell use how long the - // matching text was. - UnicodeString temp; - UErrorCode status = U_ZERO_ERROR; - while (p < str.length() && keyLen == 0) { - temp.setTo(str, p, str.length() - p); - keyLen = prefixLength(temp, key, status); - if (U_FAILURE(status)) { - break; - } - if (keyLen != 0) { - *length = keyLen; - return p; - } - ++p; + if (rulePatternFormat) { + Formattable result; + FieldPosition position(UNUM_INTEGER_FIELD); + position.setBeginIndex(startingAt); + rulePatternFormat->parseType(str, this, result, position); + int start = position.getBeginIndex(); + if (start >= 0) { + *length = position.getEndIndex() - start; + return start; } - // if we make it to here, we didn't find it. Return -1 for the - // location. The length should be ignored, but set it to 0, - // which should be "safe" *length = 0; return -1; - - //---------------------------------------------------------------- - // JDK 1.2 version of this routine - //RuleBasedCollator collator = (RuleBasedCollator)formatter.getCollator(); - // - //CollationElementIterator strIter = collator.getCollationElementIterator(str); - //CollationElementIterator keyIter = collator.getCollationElementIterator(key); - // - //int keyStart = -1; - // - //str.setOffset(startingAt); - // - //int oStr = strIter.next(); - //int oKey = keyIter.next(); - //while (oKey != CollationElementIterator.NULLORDER) { - // while (oStr != CollationElementIterator.NULLORDER && - // CollationElementIterator.primaryOrder(oStr) == 0) - // oStr = strIter.next(); - // - // while (oKey != CollationElementIterator.NULLORDER && - // CollationElementIterator.primaryOrder(oKey) == 0) - // oKey = keyIter.next(); - // - // if (oStr == CollationElementIterator.NULLORDER) { - // return new int[] { -1, 0 }; - // } - // - // if (oKey == CollationElementIterator.NULLORDER) { - // break; - // } - // - // if (CollationElementIterator.primaryOrder(oStr) == - // CollationElementIterator.primaryOrder(oKey)) { - // keyStart = strIter.getOffset(); - // oStr = strIter.next(); - // oKey = keyIter.next(); - // } else { - // if (keyStart != -1) { - // keyStart = -1; - // keyIter.reset(); - // } else { - // oStr = strIter.next(); - // } - // } - //} - // - //if (oKey == CollationElementIterator.NULLORDER) { - // return new int[] { keyStart, strIter.getOffset() - keyStart }; - //} else { - // return new int[] { -1, 0 }; - //} } + if (!formatter->isLenient()) { + // if lenient parsing is turned off, this is easy: just call + // String.indexOf() and we're done + *length = key.length(); + return str.indexOf(key, startingAt); + } + else { + // but if lenient parsing is turned ON, we've got some work + // ahead of us + return findTextLenient(str, key, startingAt, length); + } +} + +int32_t +NFRule::findTextLenient(const UnicodeString& str, + const UnicodeString& key, + int32_t startingAt, + int32_t* length) const +{ + //---------------------------------------------------------------- + // JDK 1.1 HACK (take out of 1.2-specific code) + + // in JDK 1.2, CollationElementIterator provides us with an + // API to map between character offsets and collation elements + // and we can do this by marching through the string comparing + // collation elements. We can't do that in JDK 1.1. Insted, + // we have to go through this horrible slow mess: + int32_t p = startingAt; + int32_t keyLen = 0; + + // basically just isolate smaller and smaller substrings of + // the target string (each running to the end of the string, + // and with the first one running from startingAt to the end) + // and then use prefixLength() to see if the search key is at + // the beginning of each substring. This is excruciatingly + // slow, but it will locate the key and tell use how long the + // matching text was. + UnicodeString temp; + UErrorCode status = U_ZERO_ERROR; + while (p < str.length() && keyLen == 0) { + temp.setTo(str, p, str.length() - p); + keyLen = prefixLength(temp, key, status); + if (U_FAILURE(status)) { + break; + } + if (keyLen != 0) { + *length = keyLen; + return p; + } + ++p; + } + // if we make it to here, we didn't find it. Return -1 for the + // location. The length should be ignored, but set it to 0, + // which should be "safe" + *length = 0; + return -1; } /** diff --git a/icu4c/source/i18n/nfrule.h b/icu4c/source/i18n/nfrule.h index 8422a20c1d6..96474630a45 100644 --- a/icu4c/source/i18n/nfrule.h +++ b/icu4c/source/i18n/nfrule.h @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 1997-2008, International Business Machines +* Copyright (C) 1997-2014, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* */ @@ -25,6 +25,7 @@ class NFRuleList; class NFRuleSet; class NFSubstitution; class ParsePosition; +class PluralFormat; class RuleBasedNumberFormat; class UnicodeString; @@ -61,8 +62,8 @@ public: double getDivisor() const { return uprv_pow(radix, exponent); } - void doFormat(int64_t number, UnicodeString& toAppendTo, int32_t pos) const; - void doFormat(double number, UnicodeString& toAppendTo, int32_t pos) const; + void doFormat(int64_t number, UnicodeString& toAppendTo, int32_t pos, UErrorCode& status) const; + void doFormat(double number, UnicodeString& toAppendTo, int32_t pos, UErrorCode& status) const; UBool doParse(const UnicodeString& text, ParsePosition& pos, @@ -74,10 +75,13 @@ public: void _appendRuleText(UnicodeString& result) const; + int32_t findTextLenient(const UnicodeString& str, const UnicodeString& key, + int32_t startingAt, int32_t* resultCount) const; + private: void parseRuleDescriptor(UnicodeString& descriptor, UErrorCode& status); - void extractSubstitutions(const NFRuleSet* ruleSet, const NFRule* predecessor, const RuleBasedNumberFormat* rbnf, UErrorCode& status); - NFSubstitution* extractSubstitution(const NFRuleSet* ruleSet, const NFRule* predecessor, const RuleBasedNumberFormat* rbnf, UErrorCode& status); + void extractSubstitutions(const NFRuleSet* ruleSet, const UnicodeString &ruleText, const NFRule* predecessor, UErrorCode& status); + NFSubstitution* extractSubstitution(const NFRuleSet* ruleSet, const NFRule* predecessor, UErrorCode& status); int16_t expectedExponent() const; int32_t indexOfAny(const UChar* const strings[]) const; @@ -99,6 +103,7 @@ private: NFSubstitution* sub1; NFSubstitution* sub2; const RuleBasedNumberFormat* formatter; + const PluralFormat* rulePatternFormat; NFRule(const NFRule &other); // forbid copying of this class NFRule &operator=(const NFRule &other); // forbid copying of this class diff --git a/icu4c/source/i18n/nfsubs.cpp b/icu4c/source/i18n/nfsubs.cpp index 35c9ce1a3c8..62646003f69 100644 --- a/icu4c/source/i18n/nfsubs.cpp +++ b/icu4c/source/i18n/nfsubs.cpp @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 1997-2012, International Business Machines +* Copyright (C) 1997-2014, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * file name: nfsubs.cpp @@ -149,8 +149,8 @@ public: virtual UBool operator==(const NFSubstitution& rhs) const; - virtual void doSubstitution(int64_t number, UnicodeString& toInsertInto, int32_t pos) const; - virtual void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos) const; + virtual void doSubstitution(int64_t number, UnicodeString& toInsertInto, int32_t pos, UErrorCode& status) const; + virtual void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos, UErrorCode& status) const; virtual int64_t transformNumber(int64_t number) const { return number % ldivisor; } virtual double transformNumber(double number) const { return uprv_fmod(number, divisor); } @@ -218,8 +218,8 @@ public: virtual UBool operator==(const NFSubstitution& rhs) const; - virtual void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos) const; - virtual void doSubstitution(int64_t /*number*/, UnicodeString& /*toInsertInto*/, int32_t /*_pos*/) const {} + virtual void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos, UErrorCode& status) const; + virtual void doSubstitution(int64_t /*number*/, UnicodeString& /*toInsertInto*/, int32_t /*_pos*/, UErrorCode& /*status*/) const {} virtual int64_t transformNumber(int64_t /*number*/) const { return 0; } virtual double transformNumber(double number) const { return number - uprv_floor(number); } @@ -294,8 +294,8 @@ public: virtual int64_t transformNumber(int64_t number) const { return number * ldenominator; } virtual double transformNumber(double number) const { return uprv_round(number * denominator); } - virtual void doSubstitution(int64_t /*number*/, UnicodeString& /*toInsertInto*/, int32_t /*_pos*/) const {} - virtual void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos) const; + virtual void doSubstitution(int64_t /*number*/, UnicodeString& /*toInsertInto*/, int32_t /*_pos*/, UErrorCode& /*status*/) const {} + virtual void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos, UErrorCode& status) const; virtual UBool doParse(const UnicodeString& text, ParsePosition& parsePosition, double baseValue, @@ -327,8 +327,8 @@ public: virtual ~NullSubstitution(); virtual void toString(UnicodeString& /*result*/) const {} - virtual void doSubstitution(double /*number*/, UnicodeString& /*toInsertInto*/, int32_t /*_pos*/) const {} - virtual void doSubstitution(int64_t /*number*/, UnicodeString& /*toInsertInto*/, int32_t /*_pos*/) const {} + virtual void doSubstitution(double /*number*/, UnicodeString& /*toInsertInto*/, int32_t /*_pos*/, UErrorCode& /*status*/) const {} + virtual void doSubstitution(int64_t /*number*/, UnicodeString& /*toInsertInto*/, int32_t /*_pos*/, UErrorCode& /*status*/) const {} virtual int64_t transformNumber(int64_t /*number*/) const { return 0; } virtual double transformNumber(double /*number*/) const { return 0; } virtual UBool doParse(const UnicodeString& /*text*/, @@ -602,13 +602,13 @@ NFSubstitution::toString(UnicodeString& text) const * position to determine exactly where to insert the new text) */ void -NFSubstitution::doSubstitution(int64_t number, UnicodeString& toInsertInto, int32_t _pos) const +NFSubstitution::doSubstitution(int64_t number, UnicodeString& toInsertInto, int32_t _pos, UErrorCode& status) const { if (ruleSet != NULL) { // perform a transformation on the number that is dependent // on the type of substitution this is, then just call its // rule set's format() method to format the result - ruleSet->format(transformNumber(number), toInsertInto, _pos + this->pos); + ruleSet->format(transformNumber(number), toInsertInto, _pos + this->pos, status); } else if (numberFormat != NULL) { // or perform the transformation on the number (preserving // the result's fractional part if the formatter it set @@ -620,7 +620,7 @@ NFSubstitution::doSubstitution(int64_t number, UnicodeString& toInsertInto, int3 } UnicodeString temp; - numberFormat->format(numberToFormat, temp); + numberFormat->format(numberToFormat, temp, status); toInsertInto.insert(_pos + this->pos, temp); } } @@ -636,7 +636,7 @@ NFSubstitution::doSubstitution(int64_t number, UnicodeString& toInsertInto, int3 * position to determine exactly where to insert the new text) */ void -NFSubstitution::doSubstitution(double number, UnicodeString& toInsertInto, int32_t _pos) const { +NFSubstitution::doSubstitution(double number, UnicodeString& toInsertInto, int32_t _pos, UErrorCode& status) const { // perform a transformation on the number being formatted that // is dependent on the type of substitution this is double numberToFormat = transformNumber(number); @@ -644,14 +644,14 @@ NFSubstitution::doSubstitution(double number, UnicodeString& toInsertInto, int32 // if the result is an integer, from here on out we work in integer // space (saving time and memory and preserving accuracy) if (numberToFormat == uprv_floor(numberToFormat) && ruleSet != NULL) { - ruleSet->format(util64_fromDouble(numberToFormat), toInsertInto, _pos + this->pos); + ruleSet->format(util64_fromDouble(numberToFormat), toInsertInto, _pos + this->pos, status); // if the result isn't an integer, then call either our rule set's // format() method or our DecimalFormat's format() method to // format the result } else { if (ruleSet != NULL) { - ruleSet->format(numberToFormat, toInsertInto, _pos + this->pos); + ruleSet->format(numberToFormat, toInsertInto, _pos + this->pos, status); } else if (numberFormat != NULL) { UnicodeString temp; numberFormat->format(numberToFormat, temp); @@ -894,19 +894,19 @@ UBool ModulusSubstitution::operator==(const NFSubstitution& rhs) const * @param pos The position of the rule text in toInsertInto */ void -ModulusSubstitution::doSubstitution(int64_t number, UnicodeString& toInsertInto, int32_t _pos) const +ModulusSubstitution::doSubstitution(int64_t number, UnicodeString& toInsertInto, int32_t _pos, UErrorCode& status) const { // if this isn't a >>> substitution, just use the inherited version // of this function (which uses either a rule set or a DecimalFormat // to format its substitution value) if (ruleToUse == NULL) { - NFSubstitution::doSubstitution(number, toInsertInto, _pos); + NFSubstitution::doSubstitution(number, toInsertInto, _pos, status); // a >>> substitution goes straight to a particular rule to // format the substitution value } else { int64_t numberToFormat = transformNumber(number); - ruleToUse->doFormat(numberToFormat, toInsertInto, _pos + getPos()); + ruleToUse->doFormat(numberToFormat, toInsertInto, _pos + getPos(), status); } } @@ -919,20 +919,20 @@ ModulusSubstitution::doSubstitution(int64_t number, UnicodeString& toInsertInto, * @param pos The position of the rule text in toInsertInto */ void -ModulusSubstitution::doSubstitution(double number, UnicodeString& toInsertInto, int32_t _pos) const +ModulusSubstitution::doSubstitution(double number, UnicodeString& toInsertInto, int32_t _pos, UErrorCode& status) const { // if this isn't a >>> substitution, just use the inherited version // of this function (which uses either a rule set or a DecimalFormat // to format its substitution value) if (ruleToUse == NULL) { - NFSubstitution::doSubstitution(number, toInsertInto, _pos); + NFSubstitution::doSubstitution(number, toInsertInto, _pos, status); // a >>> substitution goes straight to a particular rule to // format the substitution value } else { double numberToFormat = transformNumber(number); - ruleToUse->doFormat(numberToFormat, toInsertInto, _pos + getPos()); + ruleToUse->doFormat(numberToFormat, toInsertInto, _pos + getPos(), status); } } @@ -1057,12 +1057,13 @@ FractionalPartSubstitution::FractionalPartSubstitution(int32_t _pos, * toInsertInto */ void -FractionalPartSubstitution::doSubstitution(double number, UnicodeString& toInsertInto, int32_t _pos) const +FractionalPartSubstitution::doSubstitution(double number, UnicodeString& toInsertInto, + int32_t _pos, UErrorCode& status) const { // if we're not in "byDigits" mode, just use the inherited // doSubstitution() routine if (!byDigits) { - NFSubstitution::doSubstitution(number, toInsertInto, _pos); + NFSubstitution::doSubstitution(number, toInsertInto, _pos, status); // if we're in "byDigits" mode, transform the value into an integer // by moving the decimal point eight places to the right and @@ -1104,13 +1105,13 @@ FractionalPartSubstitution::doSubstitution(double number, UnicodeString& toInser pad = TRUE; } int64_t digit = didx>=0 ? dl.getDigit(didx) - '0' : 0; - getRuleSet()->format(digit, toInsertInto, _pos + getPos()); + getRuleSet()->format(digit, toInsertInto, _pos + getPos(), status); } if (!pad) { // hack around lack of precision in digitlist. if we would end up with // "foo point" make sure we add a " zero" to the end. - getRuleSet()->format((int64_t)0, toInsertInto, _pos + getPos()); + getRuleSet()->format((int64_t)0, toInsertInto, _pos + getPos(), status); } } } @@ -1229,7 +1230,7 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(AbsoluteValueSubstitution) //=================================================================== void -NumeratorSubstitution::doSubstitution(double number, UnicodeString& toInsertInto, int32_t apos) const { +NumeratorSubstitution::doSubstitution(double number, UnicodeString& toInsertInto, int32_t apos, UErrorCode& status) const { // perform a transformation on the number being formatted that // is dependent on the type of substitution this is @@ -1243,7 +1244,7 @@ NumeratorSubstitution::doSubstitution(double number, UnicodeString& toInsertInto int32_t len = toInsertInto.length(); while ((nf *= 10) < denominator) { toInsertInto.insert(apos + getPos(), gSpace); - aruleSet->format((int64_t)0, toInsertInto, apos + getPos()); + aruleSet->format((int64_t)0, toInsertInto, apos + getPos(), status); } apos += toInsertInto.length() - len; } @@ -1251,16 +1252,15 @@ NumeratorSubstitution::doSubstitution(double number, UnicodeString& toInsertInto // if the result is an integer, from here on out we work in integer // space (saving time and memory and preserving accuracy) if (numberToFormat == longNF && aruleSet != NULL) { - aruleSet->format(longNF, toInsertInto, apos + getPos()); + aruleSet->format(longNF, toInsertInto, apos + getPos(), status); // if the result isn't an integer, then call either our rule set's // format() method or our DecimalFormat's format() method to // format the result } else { if (aruleSet != NULL) { - aruleSet->format(numberToFormat, toInsertInto, apos + getPos()); + aruleSet->format(numberToFormat, toInsertInto, apos + getPos(), status); } else { - UErrorCode status = U_ZERO_ERROR; UnicodeString temp; getNumberFormat()->format(numberToFormat, temp, status); toInsertInto.insert(apos + getPos(), temp); diff --git a/icu4c/source/i18n/nfsubs.h b/icu4c/source/i18n/nfsubs.h index 644b4b2f6e1..c32164c551a 100644 --- a/icu4c/source/i18n/nfsubs.h +++ b/icu4c/source/i18n/nfsubs.h @@ -1,6 +1,6 @@ /* ****************************************************************************** -* Copyright (C) 1997-2007, International Business Machines +* Copyright (C) 1997-2014, International Business Machines * Corporation and others. All Rights Reserved. ****************************************************************************** * file name: nfsubs.h @@ -112,7 +112,7 @@ public: * rule text begins (this value is added to this substitution's * position to determine exactly where to insert the new text) */ - virtual void doSubstitution(int64_t number, UnicodeString& toInsertInto, int32_t pos) const; + virtual void doSubstitution(int64_t number, UnicodeString& toInsertInto, int32_t pos, UErrorCode& status) const; /** * Performs a mathematical operation on the number, formats it using @@ -124,7 +124,7 @@ public: * rule text begins (this value is added to this substitution's * position to determine exactly where to insert the new text) */ - virtual void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos) const; + virtual void doSubstitution(double number, UnicodeString& toInsertInto, int32_t pos, UErrorCode& status) const; protected: /** diff --git a/icu4c/source/i18n/plurfmt.cpp b/icu4c/source/i18n/plurfmt.cpp index 6ffd8206a8a..b20bc2ed83f 100644 --- a/icu4c/source/i18n/plurfmt.cpp +++ b/icu4c/source/i18n/plurfmt.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 2009-2013, International Business Machines Corporation and +* Copyright (C) 2009-2014, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* * @@ -15,6 +15,7 @@ #include "unicode/utypes.h" #include "cmemory.h" #include "messageimpl.h" +#include "nfrule.h" #include "plurrule_impl.h" #include "uassert.h" #include "uhash.h" @@ -481,6 +482,77 @@ int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t part return msgStart; } +void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const { + // If no pattern was applied, return null. + if (msgPattern.countParts() == 0) { + pos.setBeginIndex(-1); + pos.setEndIndex(-1); + return; + } + int partIndex = 0; + int currMatchIndex; + int count=msgPattern.countParts(); + int startingAt = pos.getBeginIndex(); + if (startingAt < 0) { + startingAt = 0; + } + + // The keyword is null until we need to match against a non-explicit, not-"other" value. + // Then we get the keyword from the selector. + // (In other words, we never call the selector if we match against an explicit value, + // or if the only non-explicit keyword is "other".) + UnicodeString keyword; + UnicodeString matchedWord; + const UnicodeString& pattern = msgPattern.getPatternString(); + int matchedIndex = -1; + // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples + // until the end of the plural-only pattern. + while (partIndex < count) { + const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++); + if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) { + // Bad format + continue; + } + + const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++); + if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) { + // Bad format + continue; + } + + const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++); + if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) { + // Bad format + continue; + } + + UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit()); + if (rbnfLenientScanner != NULL) { + // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us. + int32_t length = -1; + currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length); + } + else { + currMatchIndex = source.indexOf(currArg); + } + if (currMatchIndex > matchedIndex && currArg.length() > matchedWord.length()) { + matchedIndex = currMatchIndex; + matchedWord = currArg; + keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit()); + } + } + if (matchedIndex >= 0) { + pos.setBeginIndex(matchedIndex); + pos.setEndIndex(matchedIndex + matchedWord.length()); + result.setString(keyword); + return; + } + + // Not found! + pos.setBeginIndex(-1); + pos.setEndIndex(-1); +} + PluralFormat::PluralSelector::~PluralSelector() {} PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() { diff --git a/icu4c/source/i18n/rbnf.cpp b/icu4c/source/i18n/rbnf.cpp index 4bd7c2ccd5b..bdfd71a4dc3 100644 --- a/icu4c/source/i18n/rbnf.cpp +++ b/icu4c/source/i18n/rbnf.cpp @@ -13,6 +13,7 @@ #if U_HAVE_RBNF #include "unicode/normlzr.h" +#include "unicode/plurfmt.h" #include "unicode/tblcoll.h" #include "unicode/uchar.h" #include "unicode/ucol.h" @@ -1065,8 +1066,9 @@ RuleBasedNumberFormat::format(int32_t number, FieldPosition& /* pos */) const { if (defaultRuleSet) { + UErrorCode status = U_ZERO_ERROR; int32_t startPos = toAppendTo.length(); - defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length()); + defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length(), status); adjustForCapitalizationContext(startPos, toAppendTo); } return toAppendTo; @@ -1079,8 +1081,9 @@ RuleBasedNumberFormat::format(int64_t number, FieldPosition& /* pos */) const { if (defaultRuleSet) { + UErrorCode status = U_ZERO_ERROR; int32_t startPos = toAppendTo.length(); - defaultRuleSet->format(number, toAppendTo, toAppendTo.length()); + defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), status); adjustForCapitalizationContext(startPos, toAppendTo); } return toAppendTo; @@ -1100,7 +1103,8 @@ RuleBasedNumberFormat::format(double number, toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol); } } else if (defaultRuleSet) { - defaultRuleSet->format(number, toAppendTo, toAppendTo.length()); + UErrorCode status = U_ZERO_ERROR; + defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), status); } return adjustForCapitalizationContext(startPos, toAppendTo); } @@ -1122,7 +1126,7 @@ RuleBasedNumberFormat::format(int32_t number, NFRuleSet *rs = findRuleSet(ruleSetName, status); if (rs) { int32_t startPos = toAppendTo.length(); - rs->format((int64_t)number, toAppendTo, toAppendTo.length()); + rs->format((int64_t)number, toAppendTo, toAppendTo.length(), status); adjustForCapitalizationContext(startPos, toAppendTo); } } @@ -1146,7 +1150,7 @@ RuleBasedNumberFormat::format(int64_t number, NFRuleSet *rs = findRuleSet(ruleSetName, status); if (rs) { int32_t startPos = toAppendTo.length(); - rs->format(number, toAppendTo, toAppendTo.length()); + rs->format(number, toAppendTo, toAppendTo.length(), status); adjustForCapitalizationContext(startPos, toAppendTo); } } @@ -1170,7 +1174,7 @@ RuleBasedNumberFormat::format(double number, NFRuleSet *rs = findRuleSet(ruleSetName, status); if (rs) { int32_t startPos = toAppendTo.length(); - rs->format(number, toAppendTo, toAppendTo.length()); + rs->format(number, toAppendTo, toAppendTo.length(), status); adjustForCapitalizationContext(startPos, toAppendTo); } } @@ -1743,6 +1747,14 @@ RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbo adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols)); } +PluralFormat * +RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType, + const UnicodeString &pattern, + UErrorCode& status) const +{ + return new PluralFormat(locale, pluralType, pattern, status); +} + U_NAMESPACE_END /* U_HAVE_RBNF */ diff --git a/icu4c/source/i18n/unicode/plurfmt.h b/icu4c/source/i18n/unicode/plurfmt.h index bc5aff01d6d..f7099d81e23 100644 --- a/icu4c/source/i18n/unicode/plurfmt.h +++ b/icu4c/source/i18n/unicode/plurfmt.h @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 2007-2013, International Business Machines Corporation and +* Copyright (C) 2007-2014, International Business Machines Corporation and * others. All Rights Reserved. ******************************************************************************* * @@ -28,6 +28,7 @@ U_NAMESPACE_BEGIN class Hashtable; +class NFRule; /** *

@@ -599,7 +600,11 @@ private: const MessagePattern& pattern, int32_t partIndex, const PluralSelector& selector, void *context, double number, UErrorCode& ec); /**< @internal */ + void parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, + Formattable& result, FieldPosition& pos) const; + friend class MessageFormat; + friend class NFRule; }; U_NAMESPACE_END diff --git a/icu4c/source/i18n/unicode/rbnf.h b/icu4c/source/i18n/unicode/rbnf.h index 5e4f3b33148..5d079a15d82 100644 --- a/icu4c/source/i18n/unicode/rbnf.h +++ b/icu4c/source/i18n/unicode/rbnf.h @@ -34,11 +34,13 @@ #include "unicode/unistr.h" #include "unicode/strenum.h" #include "unicode/brkiter.h" +#include "unicode/upluralrules.h" U_NAMESPACE_BEGIN class NFRuleSet; class LocalizationInfo; +class PluralFormat; class RuleBasedCollator; /** @@ -428,6 +430,20 @@ enum URBNFRuleSetTag { * in rule in fraction rule set * Omit the optional text if multiplying the number by the rule's base value yields 1. * + * + * $(cardinal,plural syntax) + * + * in all rule sets + * This provides the ability to choose a word based on the number divided by the base value for the specified locale. + * This uses the cardinal plural rules from PluralFormat. All strings used in the plural format are treated as the same base value for parsing. + * + * + * $(ordinal,plural syntax) + * + * in all rule sets + * This provides the ability to choose a word based on the number divided by the base value for the specified locale. + * This uses the ordinal plural rules from PluralFormat. All strings used in the plural format are treated as the same base value for parsing. + * * * *

The substitution descriptor (i.e., the text between the token characters) may take one @@ -495,6 +511,8 @@ enum URBNFRuleSetTag { * @author Richard Gillam * @see NumberFormat * @see DecimalFormat + * @see PluralFormat + * @see PluralRules * @stable ICU 2.0 */ class U_I18N_API RuleBasedNumberFormat : public NumberFormat { @@ -964,6 +982,7 @@ private: inline NFRuleSet * getDefaultRuleSet() const; const RuleBasedCollator * getCollator() const; DecimalFormatSymbols * getDecimalFormatSymbols() const; + PluralFormat *createPluralFormat(UPluralType pluralType, const UnicodeString &pattern, UErrorCode& status) const; UnicodeString& adjustForCapitalizationContext(int32_t startPos, UnicodeString& currentResult) const; private: diff --git a/icu4c/source/test/intltest/itrbnf.cpp b/icu4c/source/test/intltest/itrbnf.cpp index c608869d6a2..64fd81c3377 100644 --- a/icu4c/source/test/intltest/itrbnf.cpp +++ b/icu4c/source/test/intltest/itrbnf.cpp @@ -21,8 +21,6 @@ #include "unicode/udata.h" #include "testutil.h" -//#include "llong.h" - #include // import com.ibm.text.RuleBasedNumberFormat; @@ -67,6 +65,7 @@ void IntlTestRBNF::runIndexedTest(int32_t index, UBool exec, const char* &name, TESTCASE(17, TestPortugueseSpellout); TESTCASE(18, TestMultiplierSubstitution); TESTCASE(19, TestSetDecimalFormatSymbols); + TESTCASE(20, TestPluralRules); #else TESTCASE(0, TestRBNFDisabled); #endif @@ -1831,8 +1830,10 @@ IntlTestRBNF::TestAllLocales() UnicodeString str; f->format(n, str); - logln(UnicodeString(loc->getName()) + names[j] - + "success: " + n + " -> " + str); + if (verbose) { + logln(UnicodeString(loc->getName()) + names[j] + + "success: " + n + " -> " + str); + } // We do not validate the result in this test case, // because there are cases which do not round trip by design. @@ -1893,27 +1894,27 @@ IntlTestRBNF::TestAllLocales() void IntlTestRBNF::TestMultiplierSubstitution(void) { - UnicodeString rules("=#,##0=;1,000,000: <##0.###< million;"); - UErrorCode status = U_ZERO_ERROR; - UParseError parse_error; - RuleBasedNumberFormat *rbnf = - new RuleBasedNumberFormat(rules, Locale::getUS(), parse_error, status); - if (U_SUCCESS(status)) { - UnicodeString res; - FieldPosition pos; - double n = 1234000.0; - rbnf->format(n, res, pos); - delete rbnf; + UnicodeString rules("=#,##0=;1,000,000: <##0.###< million;"); + UErrorCode status = U_ZERO_ERROR; + UParseError parse_error; + RuleBasedNumberFormat *rbnf = + new RuleBasedNumberFormat(rules, Locale::getUS(), parse_error, status); + if (U_SUCCESS(status)) { + UnicodeString res; + FieldPosition pos; + double n = 1234000.0; + rbnf->format(n, res, pos); + delete rbnf; - UnicodeString expected = UNICODE_STRING_SIMPLE("1.234 million"); - if (expected != res) { - UnicodeString msg = "Expected: "; - msg.append(expected); - msg.append(" but got "); - msg.append(res); - errln(msg); + UnicodeString expected(UNICODE_STRING_SIMPLE("1.234 million")); + if (expected != res) { + UnicodeString msg = "Expected: "; + msg.append(expected); + msg.append(" but got "); + msg.append(res); + errln(msg); + } } - } } void @@ -1958,6 +1959,99 @@ IntlTestRBNF::TestSetDecimalFormatSymbols() { } } +void IntlTestRBNF::TestPluralRules() { + UErrorCode status = U_ZERO_ERROR; + UnicodeString enRules("%digits-ordinal:-x: −>>;0: =#,##0=$(ordinal,one{st}two{nd}few{rd}other{th});"); + UParseError parseError; + RuleBasedNumberFormat enFormatter(enRules, Locale::getEnglish(), parseError, status); + if (U_FAILURE(status)) { + errln("Unable to create RuleBasedNumberFormat - " + UnicodeString(u_errorName(status))); + return; + } + const char* const enTestData[][2] = { + { "1", "1st" }, + { "2", "2nd" }, + { "3", "3rd" }, + { "4", "4th" }, + { "11", "11th" }, + { "12", "12th" }, + { "13", "13th" }, + { "14", "14th" }, + { "21", "21st" }, + { "22", "22nd" }, + { "23", "23rd" }, + { "24", "24th" }, + { NULL, NULL } + }; + + doTest(&enFormatter, enTestData, TRUE); + + // This is trying to model the feminine form, but don't worry about the details too much. + // We're trying to test the plural rules. + UnicodeString ruRules("%spellout-numbering:" + "-x: minus >>;" + "x.x: << point >>;" + "0: zero;" + "1: one;" + "2: two;" + "3: three;" + "4: four;" + "5: five;" + "6: six;" + "7: seven;" + "8: eight;" + "9: nine;" + "10: ten;" + "11: eleven;" + "12: twelve;" + "13: thirteen;" + "14: fourteen;" + "15: fifteen;" + "16: sixteen;" + "17: seventeen;" + "18: eighteen;" + "19: nineteen;" + "20: twenty[->>];" + "30: thirty[->>];" + "40: forty[->>];" + "50: fifty[->>];" + "60: sixty[->>];" + "70: seventy[->>];" + "80: eighty[->>];" + "90: ninety[->>];" + "100: hundred[ >>];" + "200: << hundred[ >>];" + "300: << hundreds[ >>];" + "500: << hundredss[ >>];" + "1000: <<$(cardinal,one{ thousand}few{ thousands}other{ thousandss})[ >>];"); + RuleBasedNumberFormat ruFormatter(ruRules, Locale("ru"), parseError, status); + const char* const ruTestData[][2] = { + { "1", "one" }, + { "100", "hundred" }, + { "125", "hundred twenty-five" }, + { "399", "three hundreds ninety-nine" }, + { "1,000", "one thousand" }, + { "2,000", "two thousands" }, + { "5,000", "five thousandss" }, + { "21,000", "twenty-one thousand" }, + { "22,000", "twenty-two thousands" }, + { NULL, NULL } + }; + + if (U_FAILURE(status)) { + errln("Unable to create RuleBasedNumberFormat - " + UnicodeString(u_errorName(status))); + return; + } + doTest(&ruFormatter, ruTestData, TRUE); + + // Make sure there are no divide by 0 errors. + UnicodeString result; + RuleBasedNumberFormat(ruRules, Locale("ru"), parseError, status).format(21000, result); + if (result.compare(UNICODE_STRING_SIMPLE("twenty-one thousand")) != 0) { + errln("Got " + result + " for 21000"); + } + +} void IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing) diff --git a/icu4c/source/test/intltest/itrbnf.h b/icu4c/source/test/intltest/itrbnf.h index c1965249e2a..cc4076c0b6c 100644 --- a/icu4c/source/test/intltest/itrbnf.h +++ b/icu4c/source/test/intltest/itrbnf.h @@ -1,6 +1,6 @@ /* ******************************************************************************* - * Copyright (C) 1996-2012, International Business Machines Corporation and * + * Copyright (C) 1996-2014, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ @@ -133,6 +133,11 @@ class IntlTestRBNF : public IntlTest { */ virtual void TestSetDecimalFormatSymbols(); + /** + * Test the plural rules in RBNF + */ + virtual void TestPluralRules(); + protected: virtual void doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing); virtual void doLenientParseTest(RuleBasedNumberFormat* formatter, const char* testData[][2]);