diff --git a/icu4c/source/i18n/nfrule.cpp b/icu4c/source/i18n/nfrule.cpp index c51774b92c9..a2eb9d01cc8 100644 --- a/icu4c/source/i18n/nfrule.cpp +++ b/icu4c/source/i18n/nfrule.cpp @@ -54,6 +54,7 @@ NFRule::~NFRule() static const UChar gLeftBracket = 0x005b; static const UChar gRightBracket = 0x005d; +static const UChar gClosedParenthesis = 0x0029; static const UChar gColon = 0x003a; static const UChar gZero = 0x0030; static const UChar gNine = 0x0039; @@ -73,7 +74,6 @@ static const UChar gXDotZero[] = {0x78, 0x2E, 0x30, 0}; /* "x.0" static const UChar gZeroDotX[] = {0x30, 0x2E, 0x78, 0}; /* "0.x" */ static const UChar gDollarOpenParenthesis[] = {0x24, 0x28, 0}; /* "$(" */ -static const UChar gClosedParenthesis[] = {0x29, 0}; /* ")" */ static const UChar gLessLess[] = {0x3C, 0x3C, 0}; /* "<<" */ static const UChar gLessPercent[] = {0x3C, 0x25, 0}; /* "<%" */ @@ -393,13 +393,15 @@ NFRule::extractSubstitutions(const NFRuleSet* ruleSet, else { sub2 = extractSubstitution(ruleSet, predecessor, status); } - if (this->ruleText.startsWith(gDollarOpenParenthesis, -1) && this->ruleText.endsWith(gClosedParenthesis, -1)) { - int32_t endType = this->ruleText.indexOf(gComma); + int32_t pluralRuleStart = this->ruleText.indexOf(gDollarOpenParenthesis, -1, 0); + int32_t pluralRuleEnd = (pluralRuleStart >= 0 ? this->ruleText.indexOf(gClosedParenthesis, pluralRuleStart) : -1); + if (pluralRuleEnd >= 0) { + int32_t endType = this->ruleText.indexOf(gComma, pluralRuleStart); if (endType < 0) { status = U_PARSE_ERROR; return; } - UnicodeString type(this->ruleText.tempSubString(2, endType - 2)); + UnicodeString type(this->ruleText.tempSubString(pluralRuleStart + 2, endType - pluralRuleStart - 2)); UPluralType pluralType; if (type.startsWith(UNICODE_STRING_SIMPLE("cardinal"))) { pluralType = UPLURAL_TYPE_CARDINAL; @@ -412,7 +414,7 @@ NFRule::extractSubstitutions(const NFRuleSet* ruleSet, return; } rulePatternFormat = formatter->createPluralFormat(pluralType, - this->ruleText.tempSubString(endType + 1, this->ruleText.length() - 2 - endType), status); + this->ruleText.tempSubString(endType + 1, pluralRuleEnd - endType - 1), status); } } @@ -687,19 +689,31 @@ NFRule::doFormat(int64_t number, UnicodeString& toInsertInto, int32_t pos, UErro // into the right places in toInsertInto (notice we do the // substitutions in reverse order so that the offsets don't get // messed up) + int32_t pluralRuleStart = ruleText.length(); + int32_t lengthOffset = 0; if (!rulePatternFormat) { toInsertInto.insert(pos, ruleText); } else { + pluralRuleStart = ruleText.indexOf(gDollarOpenParenthesis, -1, 0); + int pluralRuleEnd = ruleText.indexOf(gClosedParenthesis, pluralRuleStart); + int initialLength = toInsertInto.length(); + if (pluralRuleEnd < ruleText.length() - 1) { + toInsertInto.insert(pos, ruleText.tempSubString(pluralRuleEnd + 1)); + } toInsertInto.insert(pos, - rulePatternFormat->format((double)(baseValue == 0 ? number : number/baseValue), status)); + rulePatternFormat->format((int32_t)(number/uprv_pow(radix, exponent)), status)); + if (pluralRuleStart > 0) { + toInsertInto.insert(pos, ruleText.tempSubString(0, pluralRuleStart)); + } + lengthOffset = ruleText.length() - (toInsertInto.length() - initialLength); } if (!sub2->isNullSubstitution()) { - sub2->doSubstitution(number, toInsertInto, pos, status); + sub2->doSubstitution(number, toInsertInto, pos - (sub2->getPos() > pluralRuleStart ? lengthOffset : 0), status); } if (!sub1->isNullSubstitution()) { - sub1->doSubstitution(number, toInsertInto, pos, status); + sub1->doSubstitution(number, toInsertInto, pos - (sub1->getPos() > pluralRuleStart ? lengthOffset : 0), status); } } @@ -721,18 +735,31 @@ NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos, UError // [again, we have two copies of this routine that do the same thing // so that we don't sacrifice precision in a long by casting it // to a double] + int32_t pluralRuleStart = ruleText.length(); + int32_t lengthOffset = 0; if (!rulePatternFormat) { toInsertInto.insert(pos, ruleText); } else { + pluralRuleStart = ruleText.indexOf(gDollarOpenParenthesis, -1, 0); + int pluralRuleEnd = ruleText.indexOf(gClosedParenthesis, pluralRuleStart); + int initialLength = toInsertInto.length(); + if (pluralRuleEnd < ruleText.length() - 1) { + toInsertInto.insert(pos, ruleText.tempSubString(pluralRuleEnd + 1)); + } toInsertInto.insert(pos, - rulePatternFormat->format(baseValue == 0 ? number : number/baseValue, status)); + rulePatternFormat->format((int32_t)(number/uprv_pow(radix, exponent)), status)); + if (pluralRuleStart > 0) { + toInsertInto.insert(pos, ruleText.tempSubString(0, pluralRuleStart)); + } + lengthOffset = ruleText.length() - (toInsertInto.length() - initialLength); } + if (!sub2->isNullSubstitution()) { - sub2->doSubstitution(number, toInsertInto, pos, status); + sub2->doSubstitution(number, toInsertInto, pos - (sub2->getPos() > pluralRuleStart ? lengthOffset : 0), status); } if (!sub1->isNullSubstitution()) { - sub1->doSubstitution(number, toInsertInto, pos, status); + sub1->doSubstitution(number, toInsertInto, pos - (sub1->getPos() > pluralRuleStart ? lengthOffset : 0), status); } } @@ -1372,8 +1399,17 @@ NFRule::findText(const UnicodeString& str, rulePatternFormat->parseType(str, this, result, position); int start = position.getBeginIndex(); if (start >= 0) { - *length = position.getEndIndex() - start; - return start; + int32_t pluralRuleStart = ruleText.indexOf(gDollarOpenParenthesis, -1, 0); + int32_t pluralRuleSuffix = ruleText.indexOf(gClosedParenthesis, pluralRuleStart) + 1; + int32_t matchLen = position.getEndIndex() - start; + UnicodeString prefix(ruleText.tempSubString(0, pluralRuleStart)); + UnicodeString suffix(ruleText.tempSubString(pluralRuleSuffix)); + if (str.compare(start - prefix.length(), prefix.length(), prefix, 0, prefix.length()) == 0 + && str.compare(start + matchLen, suffix.length(), suffix, 0, suffix.length()) == 0) + { + *length = matchLen + prefix.length() + suffix.length(); + return start - prefix.length(); + } } *length = 0; return -1; diff --git a/icu4c/source/i18n/plurfmt.cpp b/icu4c/source/i18n/plurfmt.cpp index b20bc2ed83f..7f6cd86e4c8 100644 --- a/icu4c/source/i18n/plurfmt.cpp +++ b/icu4c/source/i18n/plurfmt.cpp @@ -535,7 +535,7 @@ void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLeni else { currMatchIndex = source.indexOf(currArg); } - if (currMatchIndex > matchedIndex && currArg.length() > matchedWord.length()) { + if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) { matchedIndex = currMatchIndex; matchedWord = currArg; keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit()); diff --git a/icu4c/source/i18n/unicode/rbnf.h b/icu4c/source/i18n/unicode/rbnf.h index 5d079a15d82..f3b4ff103b1 100644 --- a/icu4c/source/i18n/unicode/rbnf.h +++ b/icu4c/source/i18n/unicode/rbnf.h @@ -434,15 +434,19 @@ enum URBNFRuleSetTag { * $(cardinal,plural syntax) * * in all rule sets - * This provides the ability to choose a word based on the number divided by the base value for the specified locale. - * This uses the cardinal plural rules from PluralFormat. All strings used in the plural format are treated as the same base value for parsing. + * This provides the ability to choose a word based on the number divided by the radix to the power of the + * exponent of the base value for the specified locale, which is normally equivalent to the << value. + * This uses the cardinal plural rules from PluralFormat. All strings used in the plural format are treated + * as the same base value for parsing. * * * $(ordinal,plural syntax) * * in all rule sets - * This provides the ability to choose a word based on the number divided by the base value for the specified locale. - * This uses the ordinal plural rules from PluralFormat. All strings used in the plural format are treated as the same base value for parsing. + * This provides the ability to choose a word based on the number divided by the radix to the power of the + * exponent of the base value for the specified locale, which is normally equivalent to the << value. + * This uses the ordinal plural rules from PluralFormat. All strings used in the plural format are treated + * as the same base value for parsing. * * * diff --git a/icu4c/source/test/intltest/itrbnf.cpp b/icu4c/source/test/intltest/itrbnf.cpp index 64fd81c3377..8830258a61f 100644 --- a/icu4c/source/test/intltest/itrbnf.cpp +++ b/icu4c/source/test/intltest/itrbnf.cpp @@ -2023,7 +2023,8 @@ void IntlTestRBNF::TestPluralRules() { "200: << hundred[ >>];" "300: << hundreds[ >>];" "500: << hundredss[ >>];" - "1000: <<$(cardinal,one{ thousand}few{ thousands}other{ thousandss})[ >>];"); + "1000: << $(cardinal,one{thousand}few{thousands}other{thousandss})[ >>];" + "1000000: << $(cardinal,one{million}few{millions}other{millionss})[ >>];"); RuleBasedNumberFormat ruFormatter(ruRules, Locale("ru"), parseError, status); const char* const ruTestData[][2] = { { "1", "one" }, @@ -2031,8 +2032,13 @@ void IntlTestRBNF::TestPluralRules() { { "125", "hundred twenty-five" }, { "399", "three hundreds ninety-nine" }, { "1,000", "one thousand" }, + { "1,001", "one thousand one" }, { "2,000", "two thousands" }, + { "2,001", "two thousands one" }, + { "2,002", "two thousands two" }, + { "3,333", "three thousands three hundreds thirty-three" }, { "5,000", "five thousandss" }, + { "11,000", "eleven thousandss" }, { "21,000", "twenty-one thousand" }, { "22,000", "twenty-two thousands" }, { NULL, NULL }