From c11cd154da9c2962904e0863c15078e018560f08 Mon Sep 17 00:00:00 2001 From: Claire Ho Date: Mon, 17 Dec 2007 01:39:55 +0000 Subject: [PATCH] ICU-5794 merge from feature branch. X-SVN-Rev: 23095 --- icu4c/source/common/unicode/utypes.h | 4 + icu4c/source/common/utypes.c | 3 +- icu4c/source/i18n/Makefile.in | 3 +- icu4c/source/i18n/i18n.vcproj | 20 + icu4c/source/i18n/msgfmt.cpp | 388 ++++++- icu4c/source/i18n/msgfmt_impl.h | 33 + icu4c/source/i18n/plurfmt.cpp | 462 ++++++++ icu4c/source/i18n/plurrule.cpp | 1101 ++++++++++++++++++++ icu4c/source/i18n/plurrule_impl.h | 192 ++++ icu4c/source/i18n/unicode/msgfmt.h | 194 +++- icu4c/source/i18n/unicode/plurfmt.h | 507 +++++++++ icu4c/source/i18n/unicode/plurrule.h | 262 +++++ icu4c/source/test/intltest/Makefile.in | 2 +- icu4c/source/test/intltest/intltest.vcproj | 12 + icu4c/source/test/intltest/itformat.cpp | 6 +- icu4c/source/test/intltest/plurfmts.cpp | 506 +++++++++ icu4c/source/test/intltest/plurfmts.h | 48 + icu4c/source/test/intltest/plurults.cpp | 261 +++++ icu4c/source/test/intltest/plurults.h | 31 + icu4c/source/test/intltest/tmsgfmt.cpp | 95 ++ icu4c/source/test/intltest/tmsgfmt.h | 4 + 21 files changed, 4064 insertions(+), 70 deletions(-) create mode 100644 icu4c/source/i18n/msgfmt_impl.h create mode 100644 icu4c/source/i18n/plurfmt.cpp create mode 100644 icu4c/source/i18n/plurrule.cpp create mode 100644 icu4c/source/i18n/plurrule_impl.h create mode 100644 icu4c/source/i18n/unicode/plurfmt.h create mode 100644 icu4c/source/i18n/unicode/plurrule.h create mode 100644 icu4c/source/test/intltest/plurfmts.cpp create mode 100644 icu4c/source/test/intltest/plurfmts.h create mode 100644 icu4c/source/test/intltest/plurults.cpp create mode 100644 icu4c/source/test/intltest/plurults.h diff --git a/icu4c/source/common/unicode/utypes.h b/icu4c/source/common/unicode/utypes.h index 4c07c101105..61b27e35210 100644 --- a/icu4c/source/common/unicode/utypes.h +++ b/icu4c/source/common/unicode/utypes.h @@ -678,6 +678,10 @@ typedef enum UErrorCode { U_UNMATCHED_BRACES, /**< Braces do not match in message pattern */ U_UNSUPPORTED_PROPERTY, /**< UNUSED as of ICU 2.4 */ U_UNSUPPORTED_ATTRIBUTE, /**< UNUSED as of ICU 2.4 */ + U_ARGUMENT_TYPE_MISMATCH, /**< Argument name and argument index mismatch in MessageFormat functions */ + U_DUPLICATE_KEYWORD, /**< Duplicate keyword in PluralFormat */ + U_UNDEFINED_KEYWORD, /**< Undefined Pluarl keyword */ + U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */ U_FMT_PARSE_ERROR_LIMIT, /**< The limit for format library errors */ /* diff --git a/icu4c/source/common/utypes.c b/icu4c/source/common/utypes.c index f37dd0db534..1674a737d8c 100644 --- a/icu4c/source/common/utypes.c +++ b/icu4c/source/common/utypes.c @@ -116,7 +116,8 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = { "U_ILLEGAL_PAD_POSITION", "U_UNMATCHED_BRACES", "U_UNSUPPORTED_PROPERTY", - "U_UNSUPPORTED_ATTRIBUTE" + "U_UNSUPPORTED_ATTRIBUTE", + "U_ARGUMENT_TYPE_MISMATCH" }; static const char * const diff --git a/icu4c/source/i18n/Makefile.in b/icu4c/source/i18n/Makefile.in index 4b394134d90..9a74850502e 100644 --- a/icu4c/source/i18n/Makefile.in +++ b/icu4c/source/i18n/Makefile.in @@ -78,7 +78,8 @@ name2uni.o uni2name.o nortrans.o quant.o transreg.o \ regexcmp.o rematch.o repattrn.o regexst.o udatpg.o uregex.o uregexc.o \ ulocdata.o measfmt.o currfmt.o curramt.o currunit.o measure.o utmscale.o \ csdetect.o csmatch.o csr2022.o csrecog.o csrmbcs.o csrsbcs.o csrucode.o csrutf8.o inputext.o \ -windtfmt.o winnmfmt.o basictz.o dtrule.o rbtz.o tzrule.o tztrans.o vtzone.o zonemeta.o zstrfmt.o +windtfmt.o winnmfmt.o basictz.o dtrule.o rbtz.o tzrule.o tztrans.o vtzone.o \ +zonemeta.o zstrfmt.o plurrule.o plurfmt.o ## Header files to install HEADERS = $(srcdir)/unicode/*.h diff --git a/icu4c/source/i18n/i18n.vcproj b/icu4c/source/i18n/i18n.vcproj index e4a8dcaefb9..0ae3f0104ca 100644 --- a/icu4c/source/i18n/i18n.vcproj +++ b/icu4c/source/i18n/i18n.vcproj @@ -1062,6 +1062,10 @@ /> + + @@ -1158,6 +1162,22 @@ RelativePath=".\persncal.h" > + + + + + + + + diff --git a/icu4c/source/i18n/msgfmt.cpp b/icu4c/source/i18n/msgfmt.cpp index 9eb7bcb303a..c1186f45242 100644 --- a/icu4c/source/i18n/msgfmt.cpp +++ b/icu4c/source/i18n/msgfmt.cpp @@ -1,6 +1,6 @@ /* ******************************************************************************* -* Copyright (C) 1997-2006, International Business Machines Corporation and * +* Copyright (C) 2007, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* * @@ -28,15 +28,18 @@ #include "unicode/datefmt.h" #include "unicode/smpdtfmt.h" #include "unicode/choicfmt.h" +#include "unicode/plurfmt.h" #include "unicode/ustring.h" #include "unicode/ucnv_err.h" #include "unicode/uchar.h" #include "unicode/umsg.h" #include "unicode/rbnf.h" -#include "ustrfmt.h" #include "cmemory.h" +#include "msgfmt_impl.h" #include "util.h" #include "uassert.h" +#include "ustrfmt.h" +#include "uvector.h" // ***************************************************************************** // class MessageFormat @@ -75,6 +78,9 @@ static const UChar ID_ORDINAL[] = { static const UChar ID_DURATION[] = { 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */ }; +static const UChar ID_PLURAL[] = { + 0x70, 0x6c, 0x75, 0x72, 0x61, 0x6c, 0 /* "plural" */ +}; // MessageFormat Type List Number, Date, Time or Choice static const UChar * const TYPE_IDS[] = { @@ -86,6 +92,7 @@ static const UChar * const TYPE_IDS[] = { ID_SPELLOUT, ID_ORDINAL, ID_DURATION, + ID_PLURAL, NULL, }; @@ -145,6 +152,7 @@ U_NAMESPACE_BEGIN // ------------------------------------- UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat) +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FormatNameEnumeration) //-------------------------------------------------------------------- @@ -197,7 +205,10 @@ MessageFormat::MessageFormat(const UnicodeString& pattern, argTypeCount(0), argTypeCapacity(0), defaultNumberFormat(NULL), - defaultDateFormat(NULL) + defaultDateFormat(NULL), + isArgNumeric(TRUE), + idStart(UCHAR_ID_START), + idContinue(UCHAR_ID_CONTINUE) { if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) || !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) { @@ -221,7 +232,10 @@ MessageFormat::MessageFormat(const UnicodeString& pattern, argTypeCount(0), argTypeCapacity(0), defaultNumberFormat(NULL), - defaultDateFormat(NULL) + defaultDateFormat(NULL), + isArgNumeric(TRUE), + idStart(UCHAR_ID_START), + idContinue(UCHAR_ID_CONTINUE) { if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) || !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) { @@ -246,7 +260,10 @@ MessageFormat::MessageFormat(const UnicodeString& pattern, argTypeCount(0), argTypeCapacity(0), defaultNumberFormat(NULL), - defaultDateFormat(NULL) + defaultDateFormat(NULL), + isArgNumeric(TRUE), + idStart(UCHAR_ID_START), + idContinue(UCHAR_ID_CONTINUE) { if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) || !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) { @@ -268,7 +285,9 @@ MessageFormat::MessageFormat(const MessageFormat& that) argTypeCount(0), argTypeCapacity(0), defaultNumberFormat(NULL), - defaultDateFormat(NULL) + defaultDateFormat(NULL), + isArgNumeric(TRUE), + idStart(UCHAR_ID_START) { *this = that; } @@ -278,6 +297,7 @@ MessageFormat::~MessageFormat() int32_t idx; for (idx = 0; idx < subformatCount; idx++) { delete subformats[idx].format; + delete subformats[idx].argName; } uprv_free(subformats); subformats = NULL; @@ -380,7 +400,7 @@ MessageFormat::operator=(const MessageFormat& that) fPattern = that.fPattern; setLocale(that.fLocale); - + isArgNumeric = that.isArgNumeric; int32_t j; for (j=0; jtoPattern(buffer); + } + else if (fmt->getDynamicClassID() == PluralFormat::getStaticClassID()) { + UnicodeString buffer; + appendTo += ((PluralFormat*)fmt)->toPattern(buffer); } else { //appendTo += ", unknown"; @@ -781,7 +811,7 @@ MessageFormat::adoptFormats(Format** newFormats, } } - // TODO: What about the .offset and .arg fields? + // TODO: What about the .offset and .argNum fields? } // ------------------------------------- @@ -812,8 +842,8 @@ MessageFormat::setFormats(const Format** newFormats, } // ------------------------------------- -// Adopt a single format. -// Do nothing is the format number is not less than the array count. +// Adopt a single format by format number. +// Do nothing if the format number is not less than the array count. void MessageFormat::adoptFormat(int32_t n, Format *newFormat) { @@ -825,9 +855,38 @@ MessageFormat::adoptFormat(int32_t n, Format *newFormat) { } } +// ------------------------------------- +// Adopt a single format by format name. +// Do nothing if there is no match of formatName. +void +MessageFormat::adoptFormat(const UnicodeString& formatName, + Format* formatToAdopt, + UErrorCode& status) { + if (isArgNumeric ) { + int32_t argumentNumber = stou(formatName); + if (argumentNumber<0) { + status = U_ARGUMENT_TYPE_MISMATCH; + return; + } + adoptFormat(argumentNumber, formatToAdopt); + return; + } + for (int32_t i=0; i= subformatCount) { + return subformats[argumentNumber].format; + } + else { + return NULL; + } + } + + for (int32_t i=0; iaddElement(new UnicodeString(*subformats[i].argName), status); + } + + StringEnumeration* nameEnumerator = new FormatNameEnumeration(fFormatNames, status); + return nameEnumerator; +} + // ------------------------------------- // Formats the source Formattable array and copy into the result buffer. // Ignore the FieldPosition result for error checking. @@ -937,10 +1075,17 @@ MessageFormat::format(const Formattable& source, return format(tmpPtr, cnt, appendTo, ignore, 0, success); } - -// ------------------------------------- -// Formats the arguments Formattable array and copy into the appendTo buffer. -// Ignore the FieldPosition result for error checking. + + +UnicodeString& +MessageFormat::format(const UnicodeString* argumentNames, + const Formattable* arguments, + int32_t count, + UnicodeString& appendTo, + UErrorCode& success) const { + FieldPosition ignore(0); + return format(arguments, argumentNames, count, appendTo, ignore, 0, success); +} UnicodeString& MessageFormat::format(const Formattable* arguments, @@ -950,49 +1095,95 @@ MessageFormat::format(const Formattable* arguments, int32_t recursionProtection, UErrorCode& success) const { - // Allow NULL array only if cnt == 0 + return format(arguments, NULL, cnt, appendTo, status, recursionProtection, success); +} + +// ------------------------------------- +// Formats the arguments Formattable array and copy into the appendTo buffer. +// Ignore the FieldPosition result for error checking. + +UnicodeString& +MessageFormat::format(const Formattable* arguments, + const UnicodeString *argumentNames, + int32_t cnt, + UnicodeString& appendTo, + FieldPosition& status, + int32_t recursionProtection, + UErrorCode& success) const +{ + int32_t lastOffset = 0; + int32_t argumentNumber=0; if (cnt < 0 || (cnt && arguments == NULL)) { success = U_ILLEGAL_ARGUMENT_ERROR; return appendTo; } - - int32_t lastOffset = 0; + + if ( !isArgNumeric && argumentNames== NULL ) { + success = U_ILLEGAL_ARGUMENT_ERROR; + return appendTo; + } + + const Formattable *obj=NULL; for (int32_t i=0; i= cnt) { - appendTo += LEFT_CURLY_BRACE; - itos(argumentNumber, appendTo); - appendTo += RIGHT_CURLY_BRACE; - continue; - } + obj = NULL; + if (isArgNumeric) { + argumentNumber = subformats[i].argNum; - const Formattable *obj = arguments + argumentNumber; + // Checks the scope of the argument number. + if (argumentNumber >= cnt) { + appendTo += LEFT_CURLY_BRACE; + itos(argumentNumber, appendTo); + appendTo += RIGHT_CURLY_BRACE; + continue; + } + obj = arguments+argumentNumber; + } + else { + for (int32_t j=0; jgetType(); // Recursively calling the format process only if the current // format argument refers to a ChoiceFormat object. Format* fmt = subformats[i].format; if (fmt != NULL) { - UnicodeString arg; - fmt->format(*obj, arg, success); + UnicodeString argNum; + fmt->format(*obj, argNum, success); // Needs to reprocess the ChoiceFormat option by using the // MessageFormat pattern application. - if (fmt->getDynamicClassID() == ChoiceFormat::getStaticClassID() && - arg.indexOf(LEFT_CURLY_BRACE) >= 0) { - MessageFormat temp(arg, fLocale, success); + if ((fmt->getDynamicClassID() == ChoiceFormat::getStaticClassID() || + fmt->getDynamicClassID() == PluralFormat::getStaticClassID()) && + argNum.indexOf(LEFT_CURLY_BRACE) >= 0) { + MessageFormat temp(argNum, fLocale, success); // TODO: Implement recursion protection - temp.format(arguments, cnt, appendTo, status, recursionProtection, success); + if ( isArgNumeric ) { + temp.format(arguments, NULL, cnt, appendTo, status, recursionProtection, success); + } + else { + temp.format(arguments, argumentNames, cnt, appendTo, status, recursionProtection, success); + } if (U_FAILURE(success)) { return appendTo; } } else { - appendTo += arg; + appendTo += argNum; } } // If the obj data type is a number, use a NumberFormat instance. @@ -1066,7 +1257,7 @@ MessageFormat::parse(const UnicodeString& source, // now use format Format* fmt = subformats[i].format; - int32_t arg = subformats[i].arg; + int32_t argNum = subformats[i].argNum; if (fmt == NULL) { // string format // if at end, use longest possible match // otherwise uses first match to intervening string @@ -1093,14 +1284,19 @@ MessageFormat::parse(const UnicodeString& source, UnicodeString strValue = buffer; UnicodeString temp(LEFT_CURLY_BRACE); // {sfb} check this later - itos(arg, temp); + if (isArgNumeric) { + itos(argNum, temp); + } + else { + temp+=(*subformats[i].argName); + } temp += RIGHT_CURLY_BRACE; if (strValue != temp) { source.extract(sourceOffset,next - sourceOffset, buffer); - resultArray[arg].setString(buffer); + resultArray[argNum].setString(buffer); // {sfb} not sure about this - if ((arg + 1) > count) { - count = arg + 1; + if ((argNum + 1) > count) { + count = argNum + 1; } } sourceOffset = next; @@ -1108,13 +1304,13 @@ MessageFormat::parse(const UnicodeString& source, } else { tempPos.setIndex(sourceOffset); - fmt->parseObject(source, resultArray[arg], tempPos); + fmt->parseObject(source, resultArray[argNum], tempPos); if (tempPos.getIndex() == sourceOffset) { goto PARSE_ERROR; } - if ((arg + 1) > count) { - count = arg + 1; + if ((argNum + 1) > count) { + count = argNum + 1; } sourceOffset = tempPos.getIndex(); // update } @@ -1144,6 +1340,10 @@ MessageFormat::parse(const UnicodeString& source, int32_t& cnt, UErrorCode& success) const { + if (!isArgNumeric ) { + success = U_ARGUMENT_TYPE_MISMATCH; + return NULL; + } ParsePosition status(0); // Calls the actual implementation method and starts // from zero offset of the source text. @@ -1226,9 +1426,21 @@ MessageFormat::makeFormat(int32_t formatNumber, // Parse the argument number int32_t argumentNumber = stou(segments[1]); // always unlocalized! + UnicodeString argumentName; if (argumentNumber < 0) { - ec = U_INVALID_FORMAT_ERROR; - return; + if ( (isArgNumeric==TRUE) && (formatNumber !=0) ) { + ec = U_INVALID_FORMAT_ERROR; + return; + } + isArgNumeric = FALSE; + argumentNumber=formatNumber; + } + if (!isArgNumeric) { + if ( !isLegalArgName(segments[1]) ) { + ec = U_INVALID_FORMAT_ERROR; + return; + } + argumentName = segments[1]; } // Parse the format, recording the argument type and creating a @@ -1237,6 +1449,8 @@ MessageFormat::makeFormat(int32_t formatNumber, Format *fmt = NULL; int32_t typeID, styleID; DateFormat::EStyle style; + UnicodeString unquotedPattern, quotedPattern; + UBool inQuote = FALSE; switch (typeID = findKeyword(segments[2], TYPE_IDS)) { @@ -1308,6 +1522,25 @@ MessageFormat::makeFormat(int32_t formatNumber, argType = Formattable::kDouble; fmt = makeRBNF(URBNF_DURATION, fLocale, segments[3], ec); break; + case 8: // plural + quotedPattern = segments[3]; + for (int32_t i = 0; i < quotedPattern.length(); ++i) { + UChar ch = quotedPattern.charAt(i); + if (ch == SINGLE_QUOTE) { + if (i+1 < quotedPattern.length() && quotedPattern.charAt(i+1)==SINGLE_QUOTE) { + unquotedPattern+=ch; + ++i; + } + else { + inQuote = !inQuote; + } + } + else { + unquotedPattern += ch; + } + } + fmt = new PluralFormat(fLocale, unquotedPattern, ec); + break; default: argType = Formattable::kString; ec = U_ILLEGAL_ARGUMENT_ERROR; @@ -1331,7 +1564,14 @@ MessageFormat::makeFormat(int32_t formatNumber, // Parse succeeded; record results in our arrays subformats[formatNumber].format = fmt; subformats[formatNumber].offset = segments[0].length(); - subformats[formatNumber].arg = argumentNumber; + if (isArgNumeric) { + subformats[formatNumber].argName = NULL; + subformats[formatNumber].argNum = argumentNumber; + } + else { + subformats[formatNumber].argName = new UnicodeString(argumentName); + subformats[formatNumber].argNum = -1; + } subformatCount = formatNumber+1; // Careful here: argumentNumber may in general arrive out of @@ -1459,6 +1699,56 @@ const DateFormat* MessageFormat::getDefaultDateFormat(UErrorCode& ec) const { return defaultDateFormat; } +UBool +MessageFormat::usesNamedArguments() const { + return !isArgNumeric; +} + +UBool +MessageFormat::isLegalArgName(const UnicodeString& argName) const { + if(!u_hasBinaryProperty(argName.charAt(0), idStart)) { + return FALSE; + } + for (int32_t i=1; isize()) { + return (const UnicodeString*)fFormatNames->elementAt(pos++); + } + return NULL; +} + +void +FormatNameEnumeration::reset(UErrorCode& /*status*/) { + pos=0; +} + +int32_t +FormatNameEnumeration::count(UErrorCode& /*status*/) const { + return (fFormatNames==NULL) ? 0 : fFormatNames->size(); +} + +FormatNameEnumeration::~FormatNameEnumeration() { + UnicodeString *s; + for (int32_t i=0; isize(); ++i) { + if ((s=(UnicodeString *)fFormatNames->elementAt(i))!=NULL) { + delete s; + } + } + delete fFormatNames; +} U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/msgfmt_impl.h b/icu4c/source/i18n/msgfmt_impl.h new file mode 100644 index 00000000000..340f9a1487c --- /dev/null +++ b/icu4c/source/i18n/msgfmt_impl.h @@ -0,0 +1,33 @@ +/* +******************************************************************************* +* Copyright (C) 2007, International Business Machines Corporation and +* others. All Rights Reserved. * +******************************************************************************* +* +* File MSGFMT.H +* +******************************************************************************* +*/ + +#ifndef __MSGFMT_IMPL_H__ +#define __MSGFMT_IMPL_H__ + +U_NAMESPACE_BEGIN + +class U_I18N_API FormatNameEnumeration : public StringEnumeration { +public: + FormatNameEnumeration(UVector *fFormatNames, UErrorCode& status); + virtual ~FormatNameEnumeration(); + static UClassID U_EXPORT2 getStaticClassID(void); + virtual UClassID getDynamicClassID(void) const; + virtual const UnicodeString* snext(UErrorCode& status); + virtual void reset(UErrorCode& status); + virtual int32_t count(UErrorCode& status) const; +private: + int32_t pos; + UVector *fFormatNames; +}; + +U_NAMESPACE_END + +#endif diff --git a/icu4c/source/i18n/plurfmt.cpp b/icu4c/source/i18n/plurfmt.cpp new file mode 100644 index 00000000000..24dae0af9d3 --- /dev/null +++ b/icu4c/source/i18n/plurfmt.cpp @@ -0,0 +1,462 @@ +/* +******************************************************************************* +* Copyright (C) 2007, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +* +* File PLURFMT.CPP +* +* Modification History: +* +* Date Name Description +******************************************************************************* +*/ + + +#include "mutex.h" +#include "plurrule_impl.h" +#include "unicode/utypes.h" +#include "unicode/plurfmt.h" +#include "unicode/plurrule.h" + +#include +#include + +#if !UCONFIG_NO_FORMATTING + +U_CDECL_BEGIN +static void U_CALLCONV +deleteHashStrings(void *obj) { + delete (UnicodeString *)obj; +} +U_CDECL_END + +U_NAMESPACE_BEGIN +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat); + +#define MAX_KEYWORD_SIZE 30 + +PluralFormat::PluralFormat(UErrorCode& status) { + init(NULL, Locale::getDefault(), status); +} + +PluralFormat::PluralFormat(const Locale& locale, UErrorCode& status) { + init(NULL, locale, status); +} + +PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status) { + init(&rules, Locale::getDefault(), status); +} + +PluralFormat::PluralFormat(const Locale& locale, const PluralRules& rules, UErrorCode& status) { + init(&rules, Locale::getDefault(), status); +} + +PluralFormat::PluralFormat(const UnicodeString& pattern, UErrorCode& status) { + init(NULL, Locale::getDefault(), status); + applyPattern(pattern, status); +} + +PluralFormat::PluralFormat(const Locale& locale, const UnicodeString& pattern, UErrorCode& status) { + init(NULL, locale, status); + applyPattern(pattern, status); +} + +PluralFormat::PluralFormat(const PluralRules& rules, const UnicodeString& pattern, UErrorCode& status) { + init(NULL, locale, status); + applyPattern(pattern, status); +} + +PluralFormat::PluralFormat(const Locale& locale, const PluralRules& rules, const UnicodeString& pattern, UErrorCode& status) { + init(NULL, locale, status); + applyPattern(pattern, status); +} + +PluralFormat::PluralFormat(const PluralFormat& other) { + UErrorCode status = U_ZERO_ERROR; + locale = other.locale; + pluralRules = other.pluralRules->clone(); + pattern = other.pattern; + copyHashtable(other.fParsedValuesHash, status); + numberFormat=NumberFormat::createInstance(locale, status); + replacedNumberFormat=other.replacedNumberFormat; +} + +PluralFormat::~PluralFormat() { + delete pluralRules; + delete fParsedValuesHash; + delete numberFormat; +} + +void +PluralFormat::init(const PluralRules* rules, const Locale& curLocale, UErrorCode& status) { + status = U_ZERO_ERROR; + locale = curLocale; + if ( rules==NULL) { + pluralRules = PluralRules::forLocale(locale, status); + } + else { + pluralRules = rules->clone(); + } + fParsedValuesHash=NULL; + pattern.remove(); + numberFormat= NumberFormat::createInstance(curLocale, status); + replacedNumberFormat=NULL; +} + +void +PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) { + this->pattern = newPattern; + UnicodeString token; + int32_t braceCount=0; + fmtToken type; + UBool spaceIncluded=FALSE; + + if (fParsedValuesHash==NULL) { + Mutex mutex; + fParsedValuesHash = new Hashtable(TRUE, status); + if (U_FAILURE(status)) { + return; + } + fParsedValuesHash->setValueDeleter(deleteHashStrings); + } + + UBool getKeyword=TRUE; + UnicodeString hashKeyword; + UnicodeString *hashPattern; + + for (int32_t i=0; iget(token)!= NULL) { + status = U_DUPLICATE_KEYWORD; + return; + } + if (token.length()==0) { + status = U_PATTERN_SYNTAX_ERROR; + return; + } + if (!pluralRules->isKeyword(token) && + pluralRules->getKeywordOther()!=token) { + status = U_UNDEFINED_KEYWORD; + return; + } + hashKeyword = token; + getKeyword = FALSE; + token.remove(); + } + else { + if (braceCount==0) { + status = U_UNEXPECTED_TOKEN; + return; + } + else { + token += ch; + } + } + braceCount++; + spaceIncluded = FALSE; + break; + case tRightBrace: + if ( getKeyword ) { + status = U_UNEXPECTED_TOKEN; + return; + } + else { + hashPattern = new UnicodeString(token); + fParsedValuesHash->put(hashKeyword, hashPattern, status); + braceCount--; + if ( braceCount==0 ) { + getKeyword=TRUE; + hashKeyword.remove(); + hashPattern=NULL; + token.remove(); + } + else { + token += ch; + } + } + spaceIncluded = FALSE; + break; + case tLetter: + case tNumberSign: + if (spaceIncluded) { + status = U_PATTERN_SYNTAX_ERROR; + return; + } + default: + token+=ch; + break; + } + } + if ( checkSufficientDefinition() ) { + return; + } + else { + status = U_DEFAULT_KEYWORD_MISSING; + return; + } +} + +UnicodeString& +PluralFormat::format(const Formattable& obj, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const +{ + if (U_FAILURE(status)) return appendTo; + int32_t number; + + switch (obj.getType()) + { + case Formattable::kDouble: + return format((int32_t)obj.getDouble(), appendTo, pos, status); + break; + case Formattable::kLong: + number = (int32_t)obj.getLong(); + return format(number, appendTo, pos, status); + break; + case Formattable::kInt64: + return format((int32_t)obj.getInt64(), appendTo, pos, status); + default: + status = U_ILLEGAL_ARGUMENT_ERROR; + return appendTo; + } +} + +UnicodeString +PluralFormat::format(int32_t number, UErrorCode& status) const { + FieldPosition fpos(0); + UnicodeString result; + + return format(number, result, fpos, status); +} + +UnicodeString& +PluralFormat::format(int32_t number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const{ + + if (fParsedValuesHash==NULL) { + if ( replacedNumberFormat== NULL ) { + return numberFormat->format(number, appendTo, pos); + } + else { + replacedNumberFormat->format(number, appendTo, pos); + } + } + UnicodeString selectedRule = pluralRules->select(number); + UnicodeString *selectedPattern = (UnicodeString *)fParsedValuesHash->get(selectedRule); + if (selectedPattern==NULL) { + selectedPattern = (UnicodeString *)fParsedValuesHash->get(pluralRules->getKeywordOther()); + } + appendTo = insertFormattedNumber(number, *selectedPattern, appendTo, pos); + + return appendTo; +} + +UnicodeString& +PluralFormat::toPattern(UnicodeString& appendTo) { + appendTo+= pattern; + return appendTo; +} + +UBool +PluralFormat::inRange(UChar ch, fmtToken& type) { + if ((ch>=CAP_A) and (ch<=CAP_Z)) { + // we assume all characters are in lower case already. + return FALSE; + } + if ((ch>=LOW_A) and (ch<=LOW_Z)) { + type = tLetter; + return TRUE; + } + switch (ch) { + case LEFTBRACE: + type = tLeftBrace; + return TRUE; + case SPACE: + type = tSpace; + return TRUE; + case RIGHTBRACE: + type = tRightBrace; + return TRUE; + case NUMBER_SIGN: + type = tNumberSign; + return TRUE; + default : + type = none; + return FALSE; + } +} + +UBool +PluralFormat::checkSufficientDefinition() { + // Check that at least the default rule is defined. + if (fParsedValuesHash==NULL) return FALSE; + if (fParsedValuesHash->get(pluralRules->getKeywordOther()) == NULL) { + return FALSE; + } + else { + return TRUE; + } +} + +void +PluralFormat::setLocale(const Locale& locale, UErrorCode& status) { + if (pluralRules!=NULL) { + delete pluralRules; + pluralRules=NULL; + } + if (fParsedValuesHash!= NULL) { + delete fParsedValuesHash; + fParsedValuesHash = NULL; + } + if (numberFormat!=NULL) { + delete numberFormat; + numberFormat = NULL; + replacedNumberFormat=NULL; + } + init(NULL, locale, status); +} + +void +PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) { + // TODO: The copy constructor and assignment op of NumberFormat class are protected. + // create a pointer as the workaround. + replacedNumberFormat = (NumberFormat *)format; + return; +} + +Format* +PluralFormat::clone() const +{ + return new PluralFormat(*this); +} + +/* +Format* +PluralFormat::clone() const { +} +*/ + +UBool +PluralFormat::operator==(const Format& other) const { + // This protected comparison operator should only be called by subclasses + // which have confirmed that the other object being compared against is + // an instance of a sublcass of PluralFormat. THIS IS IMPORTANT. + // Format::operator== guarantees that this cast is safe + PluralFormat* fmt = (PluralFormat*)&other; + return ((*pluralRules == *(fmt->pluralRules)) && + (*numberFormat == *(fmt->numberFormat))); +} + +UBool +PluralFormat::operator!=(const Format& other) const { + return !operator==(other); +} + +void +PluralFormat::parseObject(const UnicodeString& source, + Formattable& result, + ParsePosition& pos) const +{ + // TODO: not yet supported in icu4j and icu4c +} + +UnicodeString +PluralFormat::insertFormattedNumber(int32_t number, + UnicodeString& message, + UnicodeString& appendTo, + FieldPosition& pos) const { + UnicodeString result; + int32_t braceStack=0; + int32_t startIndex=0; + + if (message.length()==0) { + return result; + } + appendTo = numberFormat->format(number, appendTo, pos); + for(int32_t i=0; isetValueDeleter(deleteHashStrings); + int32_t pos = -1; + const UHashElement* elem = NULL; + // walk through the hash table and create a deep clone + while((elem = other->nextElement(pos))!= NULL){ + const UHashTok otherKeyTok = elem->key; + UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer; + const UHashTok otherKeyToVal = elem->value; + UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer; + fParsedValuesHash->put(*otherKey, new UnicodeString(*otherValue), status); + if(U_FAILURE(status)){ + return; + } + } +} + + +U_NAMESPACE_END + + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +//eof diff --git a/icu4c/source/i18n/plurrule.cpp b/icu4c/source/i18n/plurrule.cpp new file mode 100644 index 00000000000..8ad2d4a81fa --- /dev/null +++ b/icu4c/source/i18n/plurrule.cpp @@ -0,0 +1,1101 @@ +/* +******************************************************************************* +* Copyright (C) 2007, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +* +* File PLURRULE.CPP +* +* Modification History: +* +* Date Name Description +******************************************************************************* +*/ + + +#include "unicode/uniset.h" +#include "unicode/utypes.h" +#include "unicode/plurrule.h" +#include "cmemory.h" +#include "cstring.h" +#include "hash.h" +#include "mutex.h" +#include "plurrule_impl.h" +#include "putilimp.h" +#include "ustrfmt.h" +#include +#include + +#if !UCONFIG_NO_FORMATTING + +U_NAMESPACE_BEGIN + + +// Plural rule data - will move to ResourceBundle. +static const UnicodeString PLURAL_RULE_DATA[] = { + UNICODE_STRING_SIMPLE("other: n/ja,ko,tr,vi"), + UNICODE_STRING_SIMPLE("one: n is 1/da,de,el,en,eo,es,et,fi,fo,he,hu,it,nb,nl,nn,no,pt,sv"), + UNICODE_STRING_SIMPLE("one: n in 0..1/fr,pt_BR"), + UNICODE_STRING_SIMPLE("zero: n is 0; one: n mod 10 is 1 and n mod 100 is not 11/lv"), + UNICODE_STRING_SIMPLE("one: n is 1; two: n is 2/ga"), + UNICODE_STRING_SIMPLE("zero: n is 0; one: n is 1; zero: n mod 100 in 1..19/ro"), + UNICODE_STRING_SIMPLE("other: n mod 100 in 11..19; one: n mod 10 is 1; few: n mod 10 in 2..9/lt"), + UNICODE_STRING_SIMPLE("one: n mod 10 is 1 and n mod 100 is not 11; few: n mod 10 in 2..4 ") + + UNICODE_STRING_SIMPLE("and n mod 100 not in 12..14/hr,ru,sr,uk"), + UNICODE_STRING_SIMPLE("one: n is 1; few: n in 2..4/cs,sk"), + UNICODE_STRING_SIMPLE("one: n is 1; few: n mod 10 in 2..4 and n mod 100 not in 12..14/pl"), + UNICODE_STRING_SIMPLE("one: n mod 100 is 1; two: n mod 100 is 2; few: n mod 100 in 3..4/sl"), + UNICODE_STRING_SIMPLE("zero: n is 0; one: n is 1; two: n is 2; few: n is 3..10; many: n in 11..99/ar"), + "" +}; + +static Hashtable *fPluralRuleLocaleHash=NULL; + +static const UnicodeString PLURAL_KEYWORD_ZERO = UNICODE_STRING_SIMPLE("zero"); +static const UnicodeString PLURAL_KEYWORD_ONE = UNICODE_STRING_SIMPLE("one"); +static const UnicodeString PLURAL_KEYWORD_TWO = UNICODE_STRING_SIMPLE("two"); +static const UnicodeString PLURAL_KEYWORD_FEW = UNICODE_STRING_SIMPLE("few"); +static const UnicodeString PLURAL_KEYWORD_MANY = UNICODE_STRING_SIMPLE("many"); +static const UnicodeString PLURAL_KEYWORD_OTHER = UNICODE_STRING_SIMPLE("other"); +static const UnicodeString PLURAL_DEFAULT_RULE = UNICODE_STRING_SIMPLE("other: n"); + +static const UnicodeString PK_IN=UNICODE_STRING_SIMPLE("in"); +static const UnicodeString PK_NOT=UNICODE_STRING_SIMPLE("not"); +static const UnicodeString PK_IS=UNICODE_STRING_SIMPLE("is"); +static const UnicodeString PK_MOD=UNICODE_STRING_SIMPLE("mod"); +static const UnicodeString PK_AND=UNICODE_STRING_SIMPLE("and"); +static const UnicodeString PK_OR=UNICODE_STRING_SIMPLE("or"); +static const UnicodeString PK_VAR_N=UNICODE_STRING_SIMPLE("n"); + + + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules); +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration) + +PluralRules::PluralRules(UErrorCode& status) { + fLocaleStringsHash=NULL; + rules = NULL; + parser = new RuleParser(); + initHashtable(status); + if (U_SUCCESS(status)) { + getRuleData(status); + } +} + +PluralRules::PluralRules(const PluralRules& other) { + *this=other; +} + +PluralRules::~PluralRules() { + delete rules; + delete parser; +} + + +PluralRules* +PluralRules::clone() const { + return new PluralRules(*this); +} + +PluralRules& +PluralRules:: operator=(const PluralRules& other) { + fLocaleStringsHash=other.fLocaleStringsHash; + rules = new RuleChain(*other.rules); + parser = new RuleParser(); + + return *this; +} + +PluralRules* U_EXPORT2 +PluralRules::createRules(const UnicodeString& description, UErrorCode& status) { + RuleChain rules; + + PluralRules *newRules = new PluralRules(status); + if ( (newRules != NULL)&& U_SUCCESS(status) ) { + status = newRules->parseDescription((UnicodeString &)description, rules); + if (U_SUCCESS(status)) { + newRules->addRules(rules, status); + } + } + if (U_FAILURE(status)) { + delete newRules; + return NULL; + } + else { + return newRules; + } +} + +PluralRules* U_EXPORT2 +PluralRules::createDefaultRules(UErrorCode& status) { + return createRules(PLURAL_DEFAULT_RULE, status); +} + +PluralRules* U_EXPORT2 +PluralRules::forLocale(const Locale& locale, UErrorCode& status) { + RuleChain *locRules; + + PluralRules *newRules = new PluralRules(status); + UnicodeString localeName=UnicodeString(locale.getName()); + locRules = (RuleChain *) (fPluralRuleLocaleHash->get(localeName)); + if ( locRules==NULL ) { + // Check parent locales. + char parentLocale[50]; + const char *curLocaleName=locale.getName(); + int32_t localeNameLen=0; + uprv_strcpy(parentLocale, curLocaleName); + while((localeNameLen=uloc_getParent(parentLocale, parentLocale, 50, &status))>=0 ) { + locRules = (RuleChain *) (fPluralRuleLocaleHash->get(localeName)); + if ( locRules != NULL ) { + break; + } + } + } + if (locRules==NULL) { + return createRules(PLURAL_DEFAULT_RULE, status); + } + + newRules->addRules(*locRules, status); + return newRules; +} + +UnicodeString +PluralRules::select(int32_t number) const { + if (rules == NULL) { + return PLURAL_DEFAULT_RULE; + } + else { + return rules->select(number); + } +} + +StringEnumeration* +PluralRules::getKeywords(UErrorCode& status) const { + if (U_FAILURE(status)) return NULL; + StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(status); + return nameEnumerator; +} + + +UBool +PluralRules::isKeyword(const UnicodeString& keyword) const { + if ( rules == NULL) { + if ( keyword != PLURAL_DEFAULT_RULE ) { + return FALSE; + } + else { + return TRUE; + } + } + else { + return rules->isKeyword(keyword); + } +} + +UnicodeString +PluralRules::getKeywordOther() const { + return PLURAL_KEYWORD_OTHER; + +} + +UBool +PluralRules::operator==(const PluralRules& other) const { + int32_t limit; + UBool sameList = TRUE; + const UnicodeString *ptrKeyword; + UErrorCode status= U_ZERO_ERROR; + + if ( this == &other ) { + return TRUE; + } + StringEnumeration* myKeywordList = getKeywords(status); + StringEnumeration* otherKeywordList =other.getKeywords(status); + + if (myKeywordList->count(status)!=otherKeywordList->count(status)) { + sameList = FALSE; + } + else { + myKeywordList->reset(status); + while (sameList && (ptrKeyword=myKeywordList->snext(status))!=NULL) { + if (!other.isKeyword(*ptrKeyword)) { + sameList = FALSE; + } + } + otherKeywordList->reset(status); + while (sameList && (ptrKeyword=otherKeywordList->snext(status))!=NULL) { + if (!this->isKeyword(*ptrKeyword)) { + sameList = FALSE; + } + } + delete myKeywordList; + delete otherKeywordList; + if (!sameList) { + return FALSE; + } + } + + if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) { + return FALSE; + } + UnicodeString myKeyword, otherKeyword; + for (int32_t i=0; iselect(i); + otherKeyword = other.select(i); + if (myKeyword!=otherKeyword) { + return FALSE; + } + } + return TRUE; +} + +void +PluralRules::getRuleData(UErrorCode& status) { + UnicodeString ruleData; + UnicodeString localeData; + UnicodeString localeName; + int32_t i=0; + UChar cSlash = (UChar)0x002F; + status=U_ZERO_ERROR; + + + while ( (PLURAL_RULE_DATA[i].length() > 0) && U_SUCCESS(status) ) { + RuleChain rules; + int32_t slashIndex = PLURAL_RULE_DATA[i].indexOf(cSlash); + if ( slashIndex < 0 ) { + break; + } + ruleData=UnicodeString(PLURAL_RULE_DATA[i], 0, slashIndex); + localeData=UnicodeString(PLURAL_RULE_DATA[i], slashIndex+1); + status = parseDescription(ruleData, rules); + int32_t curIndex=0; + while (curIndex < localeData.length() && U_SUCCESS(status)) { + getNextLocale(localeData, &curIndex, localeName); + addRules(localeName, rules, TRUE, status); + } + + i++; + } +} + +UErrorCode +PluralRules::parseDescription(UnicodeString& data, RuleChain& rules) { + UErrorCode status=U_ZERO_ERROR; + int32_t ruleIndex=0; + UnicodeString token; + tokenType type; + tokenType prevType=none; + RuleChain *ruleChain=NULL; + AndConstraint *curAndConstraint=NULL; + OrConstraint *orNode=NULL; + + UnicodeString ruleData = data.toLower(); + while (ruleIndex< ruleData.length()) { + if ((status=parser->getNextToken(ruleData, &ruleIndex, token, type))!=U_ZERO_ERROR) { + return status; + } + if ((status=parser->checkSyntax(prevType, type))!=U_ZERO_ERROR) { + return status; + } + switch (type) { + case tAnd: + curAndConstraint = curAndConstraint->add(); + break; + case tOr: + orNode=rules.ruleHeader; + while (orNode->next != NULL) { + orNode = orNode->next; + } + orNode->next= new OrConstraint(); + orNode=orNode->next; + orNode->next=NULL; + curAndConstraint = orNode->add(); + break; + case tIs: + curAndConstraint->rangeHigh=-1; + break; + case tNot: + curAndConstraint->notIn=TRUE; + break; + case tIn: + curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH; + break; + case tNumber: + if ( (curAndConstraint->op==AndConstraint::MOD)&& + (curAndConstraint->opNum == -1 ) ) { + curAndConstraint->opNum=getNumberValue(token); + } + else { + if (curAndConstraint->rangeLow == -1) { + curAndConstraint->rangeLow=getNumberValue(token); + } + else { + curAndConstraint->rangeHigh=getNumberValue(token); + } + } + break; + case tMod: + curAndConstraint->op=AndConstraint::MOD; + break; + case tKeyword: + if (ruleChain==NULL) { + ruleChain = &rules; + } + else { + while (ruleChain->next!=NULL){ + ruleChain=ruleChain->next; + } + ruleChain=ruleChain->next=new RuleChain(); + } + orNode = ruleChain->ruleHeader = new OrConstraint(); + curAndConstraint = orNode->add(); + ruleChain->keyword = token; + break; + } + prevType=type; + } + + return status; +} + +int32_t +PluralRules::getNumberValue(const UnicodeString& token) const { + int32_t i; + char digits[128]; + + for (i=0; igetRepeatLimit(); +} + +void +PluralRules::initHashtable(UErrorCode& status) { + if (fLocaleStringsHash!=NULL) { + return; + } + if ( fPluralRuleLocaleHash == NULL ) { + Mutex mutex; + // This static PluralRule hashtable residents in memory until end of application. + if ((fPluralRuleLocaleHash = new Hashtable(TRUE, status))!=NULL) { + fLocaleStringsHash = fPluralRuleLocaleHash; + return; + } + } + else { + fLocaleStringsHash = fPluralRuleLocaleHash; + } +} + +void +PluralRules::addRules(RuleChain& rules, UErrorCode& status) { + addRules(localeName, rules, FALSE, status); +} + +void +PluralRules::addRules(const UnicodeString& localeName, RuleChain& rules, UBool addToHash, UErrorCode& status) { + RuleChain *newRule = new RuleChain(rules); + if ( addToHash ) + { + { + Mutex mutex; + if ( (RuleChain *)fLocaleStringsHash->get(localeName) == NULL ) { + fLocaleStringsHash->put(localeName, newRule, status); + this->rules=newRule; + } + else { + delete newRule; + return; + } + } + } + else { + this->rules=newRule; + } + newRule->setRepeatLimit(); +} + +AndConstraint::AndConstraint() { + op = AndConstraint::NONE; + opNum=-1; + rangeLow=-1; + rangeHigh=-1; + notIn=FALSE; + next=NULL; +} + + +AndConstraint::AndConstraint(const AndConstraint& other) { + this->op = other.op; + this->opNum=other.opNum; + this->rangeLow=other.rangeLow; + this->rangeHigh=other.rangeHigh; + this->notIn=other.notIn; + if (other.next==NULL) { + this->next=NULL; + } + else { + this->next = new AndConstraint(*other.next); + } +} + +AndConstraint::~AndConstraint() { + if (next!=NULL) { + delete next; + } +}; + + +UBool +AndConstraint::isFulfilled(int32_t number) { + UBool result=TRUE; + int32_t value=number; + + if ( op == MOD ) { + value = value % opNum; + } + if ( rangeHigh == -1 ) { + if ( rangeLow == -1 ) { + result = TRUE; // empty rule + } + else { + if ( value == rangeLow ) { + result = TRUE; + } + else { + result = FALSE; + } + } + } + else { + if ((rangeLow <= value) && (value <= rangeHigh)) { + result = TRUE; + } + else { + result = FALSE; + } + } + if (notIn) { + return !result; + } + else { + return result; + } +} + +int32_t +AndConstraint::updateRepeatLimit(int32_t maxLimit) { + + if ( op == MOD ) { + return uprv_max(opNum, maxLimit); + } + else { + if ( rangeHigh == -1 ) { + return(rangeLow>maxLimit? rangeLow : maxLimit); + return uprv_max(rangeLow, maxLimit); + } + else{ + return uprv_max(rangeHigh, maxLimit); + } + } +} + + +AndConstraint* +AndConstraint::add() +{ + this->next = new AndConstraint(); + return this->next; +} + +OrConstraint::OrConstraint() { + childNode=NULL; + next=NULL; +} + +OrConstraint::OrConstraint(const OrConstraint& other) { + if ( other.childNode == NULL ) { + this->childNode = NULL; + } + else { + this->childNode = new AndConstraint(*(other.childNode)); + } + if (other.next == NULL ) { + this->next = NULL; + } + else { + this->next = new OrConstraint(*(other.next)); + } +} + +OrConstraint::~OrConstraint() { + if (childNode!=NULL) { + delete childNode; + } + if (next!=NULL) { + delete next; + } +} + +AndConstraint* +OrConstraint::add() +{ + OrConstraint *curOrConstraint=this; + { + Mutex mutex; + + while (curOrConstraint->next!=NULL) { + curOrConstraint = curOrConstraint->next; + } + curOrConstraint->next = NULL; + curOrConstraint->childNode = new AndConstraint(); + } + return curOrConstraint->childNode; +} + +UBool +OrConstraint::isFulfilled(int32_t number) { + OrConstraint* orRule=this; + UBool result=FALSE; + + while (orRule!=NULL && !result) { + result=TRUE; + AndConstraint* andRule = orRule->childNode; + while (andRule!=NULL && result) { + result = andRule->isFulfilled(number); + andRule=andRule->next; + } + orRule = orRule->next; + } + + return result; +} + + +RuleChain::RuleChain() { + ruleHeader=NULL; + next = NULL; + repeatLimit=0; +} + +RuleChain::RuleChain(const RuleChain& other) { + + this->repeatLimit = other.repeatLimit; + this->keyword=other.keyword; + if (other.ruleHeader != NULL) { + this->ruleHeader = new OrConstraint(*(other.ruleHeader)); + } + else { + this->ruleHeader = NULL; + } + if (other.next != NULL ) { + this->next = new RuleChain(*other.next); + } + else + { + this->next = NULL; + } +} + +RuleChain::~RuleChain() { + if (next != NULL) { + delete next; + } + if ( ruleHeader != NULL ) { + delete ruleHeader; + } +} + +UnicodeString +RuleChain::select(int32_t number) const { + + if ( ruleHeader != NULL ) { + if (ruleHeader->isFulfilled(number)) { + return keyword; + } + } + if ( next != NULL ) { + return next->select(number); + } + else { + return PLURAL_KEYWORD_OTHER; + } + +} + +void +RuleChain::dumpRules(UnicodeString& result) { + UChar digitString[16]; + + if ( ruleHeader != NULL ) { + result += keyword; + OrConstraint* orRule=ruleHeader; + while ( orRule != NULL ) { + AndConstraint* andRule=orRule->childNode; + while ( andRule != NULL ) { + if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) { + result += UNICODE_STRING_SIMPLE(" n is "); + if (andRule->notIn) { + result += UNICODE_STRING_SIMPLE("not "); + } + uprv_itou(digitString,16, andRule->rangeLow,10,0); + result += UnicodeString(digitString); + } + else { + if (andRule->op==AndConstraint::MOD) { + result += UNICODE_STRING_SIMPLE(" n mod "); + uprv_itou(digitString,16, andRule->opNum,10,0); + result += UnicodeString(digitString); + } + else { + result += UNICODE_STRING_SIMPLE(" n "); + } + + if (andRule->rangeHigh==-1) { + if (andRule->notIn) { + result += UNICODE_STRING_SIMPLE(" is not "); + uprv_itou(digitString,16, andRule->rangeLow,10,0); + result += UnicodeString(digitString); + } + else { + result += UNICODE_STRING_SIMPLE(" is "); + uprv_itou(digitString,16, andRule->rangeLow,10,0); + result += UnicodeString(digitString); + } + } + else { + if (andRule->notIn) { + result += UNICODE_STRING_SIMPLE(" not in "); + uprv_itou(digitString,16, andRule->rangeLow,10,0); + result += UnicodeString(digitString); + result += UNICODE_STRING_SIMPLE(" .. "); + uprv_itou(digitString,16, andRule->rangeHigh,10,0); + result += UnicodeString(digitString); + } + else { + result += UNICODE_STRING_SIMPLE(" in "); + uprv_itou(digitString,16, andRule->rangeLow,10,0); + result += UnicodeString(digitString); + result += UNICODE_STRING_SIMPLE(" .. "); + uprv_itou(digitString,16, andRule->rangeHigh,10,0); + } + } + } + + if ( (andRule=andRule->next) != NULL) { + result += PK_AND; + } + } + if ( (orRule = orRule->next) != NULL ) { + result += PK_OR; + } + } + } + + if ( next != NULL ) { + next->dumpRules(result); + } +} + +int32_t +RuleChain::getRepeatLimit () { + return repeatLimit; +} + +void +RuleChain::setRepeatLimit () { + int32_t limit=0; + + if ( next != NULL ) { + next->setRepeatLimit(); + limit = next->repeatLimit; + } + + if ( ruleHeader != NULL ) { + OrConstraint* orRule=ruleHeader; + while ( orRule != NULL ) { + AndConstraint* andRule=orRule->childNode; + while ( andRule != NULL ) { + limit = andRule->updateRepeatLimit(limit); + andRule = andRule->next; + } + orRule = orRule->next; + } + } + + repeatLimit = limit; +} + +UErrorCode +RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const { + if ( arraySize < capacityOfKeywords-1 ) { + keywords[arraySize++]=keyword; + } + else { + return U_BUFFER_OVERFLOW_ERROR; + } + + if ( next != NULL ) { + return next->getKeywords(capacityOfKeywords, keywords, arraySize); + } + else { + return U_ZERO_ERROR; + } +} + +UBool +RuleChain::isKeyword(const UnicodeString& keyword) const { + if ( this->keyword == keyword ) { + return TRUE; + } + + if ( next != NULL ) { + return next->isKeyword(keyword); + } + else { + return FALSE; + } +} + + +RuleParser::RuleParser() { + UErrorCode err=U_ZERO_ERROR; + const UnicodeString idStart=UNICODE_STRING_SIMPLE("[[a-z]]"); + const UnicodeString idContinue=UNICODE_STRING_SIMPLE("[[a-z][A-Z][_][0-9]]"); + idStartFilter = new UnicodeSet(idStart, err); + idContinueFilter = new UnicodeSet(idContinue, err); +} + +RuleParser::~RuleParser() { + delete idStartFilter; + delete idContinueFilter; +} + +UErrorCode +RuleParser::checkSyntax(tokenType prevType, tokenType curType ) { + UErrorCode status=U_ZERO_ERROR; + + switch(prevType) { + case none: + case tSemiColon: + if (curType!=tKeyword) { + return U_UNEXPECTED_TOKEN; + } + else { + return U_ZERO_ERROR; + } + case tVariableN : + if ( (curType == tIs) || (curType == tMod) || (curType == tIn) || (curType == tNot) ) { + return U_ZERO_ERROR; + } + else { + return U_UNEXPECTED_TOKEN; + } + case tZero: + case tOne: + case tTwo: + case tFew: + case tMany: + case tOther: + case tKeyword: + if ( curType == tColon ) { + return U_ZERO_ERROR; + } + else { + return U_UNEXPECTED_TOKEN; + } + case tColon : + if ( curType == tVariableN ) { + return U_ZERO_ERROR; + } + else { + return U_UNEXPECTED_TOKEN; + } + case tIs: + if ( (curType == tNumber) || (curType == tNot)) { + return U_ZERO_ERROR; + } + else { + return U_UNEXPECTED_TOKEN; + } + case tNot: + if ((curType == tNumber) || (curType == tIn)){ + return U_ZERO_ERROR; + } + else { + return U_UNEXPECTED_TOKEN; + } + case tMod: + case tDot: + case tIn: + case tAnd: + case tOr: + if ( (curType == tNumber) || (curType == tVariableN) ){ + return U_ZERO_ERROR; + } + else { + return U_UNEXPECTED_TOKEN; + } + case tNumber: + if ((curType == tDot) || (curType == tSemiColon) || (curType == tIs) || (curType == tNot) || + (curType == tIn) || (curType == tAnd) || (curType == tOr) ){ + return U_ZERO_ERROR; + } + else { + return U_UNEXPECTED_TOKEN; + } + default: + return U_UNEXPECTED_TOKEN; + } +} + +UErrorCode +RuleParser::getNextToken(const UnicodeString& ruleData, + int32_t *ruleIndex, + UnicodeString& token, + tokenType& type) { + UErrorCode status=U_ZERO_ERROR; + int32_t curIndex= *ruleIndex; + UChar ch; + tokenType prevType=none; + + while (curIndex=ruleData.length() ) { + if ( (type == tLetter)||(type == tNumber) ) { + token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex); + status=getKeyType(token, type); + } + *ruleIndex = ruleData.length(); + } + return status; +} + +UBool +RuleParser::inRange(UChar ch, tokenType& type) { + if ((ch>=CAP_A) and (ch<=CAP_Z)) { + // we assume all characters are in lower case already. + return FALSE; + } + if ((ch>=LOW_A) and (ch<=LOW_Z)) { + type = tLetter; + return TRUE; + } + if ((ch>=U_ZERO) and (ch<=U_NINE)) { + type = tNumber; + return TRUE; + } + switch (ch) { + case COLON: + type = tColon; + return TRUE; + case SPACE: + type = tSpace; + return TRUE; + case SEMI_COLON: + type = tSemiColon; + return TRUE; + case DOT: + type = tDot; + return TRUE; + default : + type = none; + return FALSE; + } +} + + +UErrorCode +RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType) { + if ( keyType==tNumber) { + return U_ZERO_ERROR; + } + if (token==PK_VAR_N) { + keyType = tVariableN; + return U_ZERO_ERROR; + } + if (token==PK_IS) { + keyType = tIs; + return U_ZERO_ERROR; + } + if (token==PK_AND) { + keyType = tAnd; + return U_ZERO_ERROR; + } + if (token==PK_IN) { + keyType = tIn; + return U_ZERO_ERROR; + } + if (token==PK_NOT) { + keyType = tNot; + return U_ZERO_ERROR; + } + if (token==PK_MOD) { + keyType = tMod; + return U_ZERO_ERROR; + } + if (token==PK_OR) { + keyType = tOr; + return U_ZERO_ERROR; + } + + if ( isValidKeyword(token) ) { + keyType = tKeyword; + return U_ZERO_ERROR; + } + + return U_UNEXPECTED_TOKEN; +} + +UBool +RuleParser::isValidKeyword(const UnicodeString& token) { + if ( token.length()==0 ) { + return FALSE; + } + if ( idStartFilter->contains(token.charAt(0) )==TRUE ) { + int32_t i; + for (i=1; i< token.length(); i++) { + if (idContinueFilter->contains(token.charAt(i))== FALSE) { + return FALSE; + } + } + return TRUE; + } + else { + return FALSE; + } +} + +PluralKeywordEnumeration::PluralKeywordEnumeration(UErrorCode& status) : +fKeywordNames(status) +{ + pos=0; +} + +const UnicodeString* +PluralKeywordEnumeration::snext(UErrorCode& status) { + if (U_SUCCESS(status) && pos < fKeywordNames.size()) { + return (const UnicodeString*)fKeywordNames.elementAt(pos++); + } + return NULL; +} + +void +PluralKeywordEnumeration::reset(UErrorCode& /*status*/) { + pos=0; +} + +int32_t +PluralKeywordEnumeration::count(UErrorCode& /*status*/) const { + return fKeywordNames.size(); +} + +PluralKeywordEnumeration::~PluralKeywordEnumeration() { + UnicodeString *s; + for (int32_t i=0; i + * Note: + * In ICU 4.0 MessageFormat supports named arguments. If a named argument + * is used, all arguments must be named. Names start with a character in + * UCHAR_ID_START and continue with characters in + * UCHARID_CONTINUE, in particular they do not start with a digit. + * If named arguments are used, {@link #usesNamedArguments()} will return true. + *

+ * The other new methods supporting named arguments are + * {@link #getFormatNames(UErrorCode& status)}, + * {@link #getFormat(const UnicodeString& formatName, UErrorCode& status)} + * {@link #setFormat(const UnicodeString& formatName, const Format& format, UErrorCode& status)}, + * {@link #adoptFormat(const UnicodeString& formatName, Format* formatToAdopt, UErrorCode& status)}, + * {@link #format(const Formattable* arguments, const UnicodeString *argumentNames, int32_t cnt, UnicodeString& appendTo, FieldPosition& status, int32_t recursionProtection, UErrorCode& success)}, + * {@link #format(const UnicodeString* argumentNames, const Formattable* arguments, int32_t count, UnicodeString& appendTo,UErrorCode& status)}. + * These methods are all compatible with patterns that do not used named arguments-- + * in these cases the keys in the input or output use UnicodeStrings + * that name the argument indices, e.g. "0", "1", "2"... etc. + *

+ * When named arguments are used, certain methods on MessageFormat that take or + * return arrays do not perform any action, since it is not possible to + * identify positions in an array using a name. UErrorCode is set to + * U_ARGUMENT_TYPE_MISMATCH if there is a status/success field in the method. + * These methods are + * {@link #adoptFormats(Format** newFormats, int32_t count)}, + * {@link #setFormats(const Format** newFormats,int32_t count)}, + * {@link #adoptFormat(int32_t n, Format *newFormat)}, + * {@link #getFormats(int32_t& cnt)}, + * {@link #format(const Formattable* source,int32_t cnt,UnicodeString& appendTo, FieldPosition& ignore, UErrorCode& success)}, + * {@link #format(const UnicodeString& pattern,const Formattable* arguments,int32_t cnt,UnicodeString& appendTo,UErrorCode& success)}, + * {@link #format(const Formattable& source, UnicodeString& appendTo,FieldPosition& ignore, UErrorCode& success)}, + * {@link #format(const Formattable* arguments, int32_t cnt, UnicodeString& appendTo, FieldPosition& status, int32_t recursionProtection,UErrorCode& success)}, + * {@link #parse(const UnicodeString& source, ParsePosition& pos,int32_t& count)}, + * {@link #parse(const UnicodeString& source, int32_t& cnt, UErrorCode& status)} + *

+ * *

* During parsing, an input string is matched against the string * template of the MessageFormat to produce an array of Formattable @@ -147,7 +184,7 @@ class DateFormat; * \code * messageFormatPattern := string ( "{" messageFormatElement "}" string )* * - * messageFormatElement := argumentIndex { "," elementFormat } + * messageFormatElement := argumentIndex | argumentName { "," elementFormat } * * elementFormat := "time" { "," datetimeStyle } * | "date" { "," datetimeStyle } @@ -166,6 +203,8 @@ class DateFormat; * | numberFormatPattern * * choiceStyle := choiceFormatPattern + * + * pluralStyle := pluralFormatPattern * \endcode * * If there is no elementFormat, then the argument must be a string, @@ -427,7 +466,7 @@ public: * @param newFormats the new format to be set. * @param cnt the size of the array. */ - virtual void setFormats(const Format** newFormats,int32_t cnt); + virtual void setFormats(const Format** newFormats, int32_t cnt); /** @@ -453,6 +492,62 @@ public: */ virtual void setFormat(int32_t formatNumber, const Format& format); + /** + * Gets format names. This function returns formatNames in StringEnumerations + * which can be used with getFormat() and setFormat() to export formattable + * array from current MessageFormat to another. It is caller's resposibility + * to delete the returned formatNames. + * @param status output param set to success/failure code. + * @draft ICU 4.0 + */ + virtual StringEnumeration* getFormatNames(UErrorCode& status); + + /** + * Gets subformat pointer for given format name. + * This function supports both named and numbered + * arguments-- if numbered, the formatName is the + * corresponding UnicodeStrings (e.g. "0", "1", "2"...). + * The returned Format object should not be deleted by the caller, + * nor should the ponter of other object . The pointer and its + * contents remain valid only until the next call to any method + * of this class is made with this object. + * @param status output param set to success/failure code. + * @draft ICU 4.0 + */ + virtual Format* getFormat(const UnicodeString& formatName, UErrorCode& status); + + /** + * Sets one subformat for given format name. + * See the class description about format name. + * This function supports both named and numbered + * arguments-- if numbered, the formatName is the + * corresponding UnicodeStrings (e.g. "0", "1", "2"...). + * If there is no matched formatName or wrong type, + * the item will be ignored. + * @param formatName Name of the subformat. + * @param format the format to be set. + * @param status output param set to success/failure code. + * @draft ICU 4.0 + */ + virtual void setFormat(const UnicodeString& formatName, const Format& format, UErrorCode& status); + + /** + * Sets one subformat for given format name. + * See the class description about format name. + * This function supports both named and numbered + * arguments-- if numbered, the formatName is the + * corresponding UnicodeStrings (e.g. "0", "1", "2"...). + * If there is no matched formatName or wrong type, + * the item will be ignored. + * The caller should not delete the Format object after this call. + * @param formatName Name of the subformat. + * @param format Format to be adopted. + * @param status output param set to success/failure code. + * @draft ICU 4.0 + */ + virtual void adoptFormat(const UnicodeString& formatName, Format* formatToAdopt, UErrorCode& status); + + /** * Gets an array of subformats of this object. The returned array * should not be deleted by the caller, nor should the pointers @@ -500,11 +595,11 @@ public: * @return Reference to 'appendTo' parameter. * @stable ICU 2.0 */ - static UnicodeString& format( const UnicodeString& pattern, - const Formattable* arguments, - int32_t count, - UnicodeString& appendTo, - UErrorCode& status); + static UnicodeString& format(const UnicodeString& pattern, + const Formattable* arguments, + int32_t count, + UnicodeString& appendTo, + UErrorCode& status); /** * Formats the given array of arguments into a user-readable @@ -545,7 +640,31 @@ public: UnicodeString& format(const Formattable& obj, UnicodeString& appendTo, UErrorCode& status) const; + + /** + * Formats the given array of arguments into a user-defined argument name + * array. This function supports both named and numbered + * arguments-- if numbered, the formatName is the + * corresponding UnicodeStrings (e.g. "0", "1", "2"...). + * + * @param argumentNames argument name array + * @param arguments An array of objects to be formatted. + * @param count The number of elements of 'argumentNames' and + * arguments. The number of argumentNames and arguments + * must be the same. + * @param appendTo Output parameter to receive result. + * Result is appended to existing contents. + * @param status Input/output error code. If the + * pattern cannot be parsed, set to failure code. + * @return Reference to 'appendTo' parameter. + * @stable ICU 4.0 + */ + UnicodeString& format(const UnicodeString* argumentNames, + const Formattable* arguments, + int32_t count, + UnicodeString& appendTo, + UErrorCode& status) const; /** * Parses the given string into an array of output arguments. * @@ -569,9 +688,12 @@ public: * @param source String to be parsed. * @param count Output param to receive size of returned array. * @param status Input/output error code. If the - * pattern cannot be parsed, set to failure code. + * pattern cannot be parsed, set to failure code. + * If the MessageFormat is named argument, the status is + * set to U_ARGUMENT_TYPE_MISMATCH. * @return an array of parsed arguments. The caller owns both - * the array and its contents. + * the array and its contents. Return NULL if status is not U_ZERO_ERROR. + * * @stable ICU 2.0 */ virtual Formattable* parse( const UnicodeString& source, @@ -615,7 +737,16 @@ public: */ static UnicodeString autoQuoteApostrophe(const UnicodeString& pattern, UErrorCode& status); - + + /** + * Returns true if this MessageFormat uses named arguments, + * and false otherwise. See class description. + * + * @return true if named arguments are used. + * @draft ICU 4.0 + */ + UBool usesNamedArguments() const; + /** * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. * This method is to implement a simple version of RTTI, since not all @@ -648,6 +779,8 @@ private: UnicodeString fPattern; Format** formatAliases; // see getFormats int32_t formatAliasesCapacity; + UProperty idStart; + UProperty idContinue; MessageFormat(); // default constructor not implemented @@ -671,8 +804,13 @@ private: /** * @internal */ - int32_t arg; // 0-based argument number - + // TODO (claireho) or save the number to argName and use itos to convert to number.=> we need this number + int32_t argNum; // 0-based argument number + /** + * @internal + */ + UnicodeString* argName; // argument name or number + /** * Clone that.format and assign it to this.format * Do NOT delete this.format @@ -681,7 +819,8 @@ private: Subformat& operator=(const Subformat& that) { format = that.format ? that.format->clone() : NULL; offset = that.offset; - arg = that.arg; + argNum = that.argNum; + argName = (that.argNum==-1) ? new UnicodeString(*that.argName): NULL; return *this; } @@ -691,7 +830,9 @@ private: UBool operator==(const Subformat& that) const { // Do cheap comparisons first return offset == that.offset && - arg == that.arg && + argNum == that.argNum && + ((argName == that.argName) || + (*argName == *that.argName)) && ((format == that.format) || // handles NULL (*format == *that.format)); } @@ -724,6 +865,12 @@ private: int32_t argTypeCount; int32_t argTypeCapacity; + /** + * Is true iff all argument names are non-negative numbers. + * + */ + UBool isArgNumeric; + // Variable-size array management UBool allocateSubformats(int32_t capacity); UBool allocateArgTypes(int32_t capacity); @@ -776,6 +923,14 @@ private: FieldPosition& status, int32_t recursionProtection, UErrorCode& success) const; + + UnicodeString& format( const Formattable* arguments, + const UnicodeString *argumentNames, + int32_t cnt, + UnicodeString& appendTo, + FieldPosition& status, + int32_t recursionProtection, + UErrorCode& success) const; void makeFormat(int32_t offsetNumber, UnicodeString* segments, @@ -810,7 +965,14 @@ private: listCount = argTypeCount; return argTypes; } - + + /** + * Returns FALSE if the argument name is not legal. + * @param argName argument name. + * @return TRUE if the argument name is legal, otherwise return FALSE. + */ + UBool isLegalArgName(const UnicodeString& argName) const; + friend class MessageFormatAdapter; // getFormatTypeList() access }; diff --git a/icu4c/source/i18n/unicode/plurfmt.h b/icu4c/source/i18n/unicode/plurfmt.h new file mode 100644 index 00000000000..101412f6476 --- /dev/null +++ b/icu4c/source/i18n/unicode/plurfmt.h @@ -0,0 +1,507 @@ +/* +******************************************************************************* +* Copyright (C) 2007, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +* + +* File PLURFMT.H +* +* Modification History:* +* Date Name Description +* +******************************************************************************** +*/ + +#ifndef PLURFMT +#define PLURFMT + +/** + * \file + * \brief C++ API: PluralFormat object + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/numfmt.h" +#include "unicode/plurfmt.h" +#include "unicode/plurrule.h" +#include "unicode/utypes.h" + +U_NAMESPACE_BEGIN + +class Hashtable; +class PluralRules; +class NumberFormat; +/** + *

+ * PluralFormat supports the creation of internationalized + * messages with plural inflection. It is based on plural + * selection, i.e. the caller specifies messages for each + * plural case that can appear in the users language and the + * PluralFormat selects the appropriate message based on + * the number. + *

+ *

The Problem of Plural Forms in Internationalized Messages

+ *

+ * Different languages have different ways to inflect + * plurals. Creating internationalized messages that include plural + * forms is only feasible when the framework is able to handle plural + * forms of all languages correctly. ChoiceFormat + * doesn't handle this well, because it attaches a number interval to + * each message and selects the message whose interval contains a + * given number. This can only handle a finite number of + * intervals. But in some languages, like Polish, one plural case + * applies to infinitely many intervals (e.g., paucal applies to + * numbers ending with 2, 3, or 4 except those ending with 12, 13, or + * 14). Thus ChoiceFormat is not adequate. + *

+ * PluralFormat deals with this by breaking the problem + * into two parts: + *

    + *
  • It uses PluralRules that can define more complex + * conditions for a plural case than just a single interval. These plural + * rules define both what plural cases exist in a language, and to + * which numbers these cases apply. + *
  • It provides predefined plural rules for many locales. Thus, the programmer + * need not worry about the plural cases of a language. On the flip side, + * the localizer does not have to specify the plural cases; he can simply + * use the predefined keywords. The whole plural formatting of messages can + * be done using localized patterns from resource bundles. + *
+ *

+ *

Usage of PluralFormat

+ *

+ * This discussion assumes that you use PluralFormat with + * a predefined set of plural rules. You can create one using one of + * the constructors that takes a locale object. To + * specify the message pattern, you can either pass it to the + * constructor or set it explicitly using the + * applyPattern() method. The format() + * method takes a number object and selects the message of the + * matching plural case. This message will be returned. + *

+ *
Patterns and Their Interpretation
+ *

+ * The pattern text defines the message output for each plural case of the + * used locale. The pattern is a sequence of + * caseKeyword{message} clauses, separated by white + * space characters. Each clause assigns the message message + * to the plural case identified by caseKeyword. + *

+ * You always have to define a message text for the default plural case + * "other" which is contained in every rule set. If the plural + * rules of the PluralFormat object do not contain a plural case + * identified by caseKeyword, U_DEFAULT_KEYWORD_MISSING + * will be set to status. + * If you do not specify a message text for a particular plural case, the + * message text of the plural case "other" gets assigned to this + * plural case. If you specify more than one message for the same plural case, + * U_DUPLICATE_KEYWORD will be set to status. + *
+ * Spaces between caseKeyword and + * message will be ignored; spaces within + * message will be preserved. + *

+ * The message text for a particular plural case may contain other message + * format patterns. PluralFormat preserves these so that you + * can use the strings produced by PluralFormat with other + * formatters. If you are using PluralFormat inside a + * MessageFormat pattern, MessageFormat will + * automatically evaluate the resulting format pattern.
+ * Thus, curly braces ({, }) are only allowed + * in message texts to define a nested format pattern.
+ * The pound sign (#) will be interpreted as the number placeholder + * in the message text, if it is not contained in curly braces (to preserve + * NumberFormat patterns). PluralFormat will + * replace each of those pound signs by the number passed to the + * format() method. It will be formatted using a + * NumberFormat for the PluralFormat's locale. If you + * need special number formatting, you have to explicitly specify a + * NumberFormat for the PluralFormat to use. + *

+ * Example + *
+ * UErrorCode status = U_ZERO_ERROR;
+ * MessageFormat* msgFmt = new MessageFormat(UnicodeString("{0, plural, 
+ *   one{0, number, C''est #,##0.0#  fichier} other {Ce sont # fichiers}} dans la liste."),
+ *   Locale("fr"), status);
+ * if (U_FAILURE(status)) {
+ *     return;
+ * }
+ * Formattable args1[] = {(int32_t)0};
+ * Formattable args2[] = {(int32_t)3};
+ * FieldPosition ignore(FieldPosition::DONT_CARE);
+ * UnicodeString result;
+ * msgFmt->format(args1, 1, result, ignore, status);
+ * cout << result << endl;
+ * result.remove();
+ * msgFmt->format(args2, 1, result, ignore, status);
+ * cout << result << endl;
+ * 
+ * Produces the output:
+ * C'est 0,0 fichier dans la liste.
+ * Ce sont 3 fichiers dans la liste." + *

+ * Note:
+ * Currently PluralFormat + * does not make use of quotes like MessageFormat. + * If you use plural format strings with MessageFormat and want + * to use a quote sign "'", you have to write "''". + * MessageFormat unquotes this pattern and passes the unquoted + * pattern to PluralFormat. It's a bit trickier if you use + * nested formats that do quoting. In the example above, we wanted to insert + * "'" in the number format pattern. Since + * NumberFormat supports quotes, we had to insert + * "''". But since MessageFormat unquotes the + * pattern before it gets passed to PluralFormat, we have to + * double these quotes, i.e. write "''''". + *

+ *

Defining Custom Plural Rules

+ *

If you need to use PluralFormat with custom rules, you can + * create a PluralRules object and pass it to + * PluralFormat's constructor. If you also specify a locale in this + * constructor, this locale will be used to format the number in the message + * texts. + *

+ * For more information about PluralRules, see + * {@link PluralRules}. + *

+ * + * ported from Java + * @draft ICU 4.0 + */ + +class U_I18N_API PluralFormat : public Format { +public: + + /** + * Creates a new PluralFormat for the default locale. + * This locale will be used to get the set of plural rules and for standard + * number formatting. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @draft ICU 4.0 + */ + PluralFormat(UErrorCode& status); + + /** + * Creates a new PluralFormat for a given locale. + * @param locale the PluralFormat will be configured with + * rules for this locale. This locale will also be used for + * standard number formatting. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @draft ICU 4.0 + */ + PluralFormat(const Locale& locale, UErrorCode& status); + + /** + * Creates a new PluralFormat for a given set of rules. + * The standard number formatting will be done using the default locale. + * @param rules defines the behavior of the PluralFormat + * object. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @draft ICU 4.0 + */ + PluralFormat(const PluralRules& rules, UErrorCode& status); + + /** + * Creates a new PluralFormat for a given set of rules. + * The standard number formatting will be done using the given locale. + * @param locale the default number formatting will be done using this + * locale. + * @param rules defines the behavior of the PluralFormat + * object. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @draft ICU 4.0 + */ + PluralFormat(const Locale& locale, const PluralRules& rules, UErrorCode& status); + + /** + * Creates a new PluralFormat for a given pattern string. + * The default locale will be used to get the set of plural rules and for + * standard number formatting. + * @param pattern the pattern for this PluralFormat. + * errors are returned to status if the pattern is invalid. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @draft ICU 4.0 + */ + PluralFormat(const UnicodeString& pattern, UErrorCode& status); + + /** + * Creates a new PluralFormat for a given pattern string and + * locale. + * The locale will be used to get the set of plural rules and for + * standard number formatting. + * @param locale the PluralFormat will be configured with + * rules for this locale. This locale will also be used for + * standard number formatting. + * @param pattern the pattern for this PluralFormat. + * errors are returned to status if the pattern is invalid. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @draft ICU 4.0 + */ + PluralFormat(const Locale& locale, const UnicodeString& pattern, UErrorCode& status); + + /** + * Creates a new PluralFormat for a given set of rules, a + * pattern and a locale. + * @param rules defines the behavior of the PluralFormat + * object. + * @param pattern the pattern for this PluralFormat. + * errors are returned to status if the pattern is invalid. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @draft ICU 4.0 + */ + PluralFormat(const PluralRules& rules, + const UnicodeString& pattern, + UErrorCode& status); + + /** + * Creates a new PluralFormat for a given set of rules, a + * pattern and a locale. + * @param locale the PluralFormat will be configured with + * rules for this locale. This locale will also be used for + * standard number formatting. + * @param rules defines the behavior of the PluralFormat + * object. + * @param pattern the pattern for this PluralFormat. + * errors are returned to status if the pattern is invalid. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @draft ICU 4.0 + */ + PluralFormat(const Locale& locale, + const PluralRules& rules, + const UnicodeString& pattern, + UErrorCode& status); + + /** + * copy constructor. + * @draft ICU 4.0 + */ + PluralFormat(const PluralFormat& other); + + /** + * Destructor. + * @draft ICU 4.0 + */ + virtual ~PluralFormat(); + + /** + * Sets the pattern used by this plural format. + * The method parses the pattern and creates a map of format strings + * for the plural rules. + * Patterns and their interpretation are specified in the class description. + * + * @param pattern the pattern for this plural format + * errors are returned to status if the pattern is invalid. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @draft ICU 4.0 + */ + void applyPattern(const UnicodeString& pattern, UErrorCode& status); + + /** + * Formats a plural message for a given number. + * + * @param number a number for which the plural message should be formatted + * for. If no pattern has been applied to this + * PluralFormat object yet, the formatted number + * will be returned. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @return the string containing the formatted plural message. + * @draft ICU 4.0 + */ + UnicodeString format(int32_t number, UErrorCode& status) const; + + /** + * Formats a plural message for a given number. + * + * @param number a number for which the plural message should be formatted + * for. If no pattern has been applied to this + * PluralFormat object yet, the formatted number + * will be returned. + * @param appendTo output parameter to receive result. + * result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @return the string containing the formatted plural message. + * @draft ICU 4.0 + */ + UnicodeString& format(int32_t number, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; + + /** + * Sets the locale used by this PluraFormat object. + * Note: Calling this method resets this PluraFormat object, + * i.e., a pattern that was applied previously will be removed, + * and the NumberFormat is set to the default number format for + * the locale. The resulting format behaves the same as one + * constructed from {@link #PluralFormat(locale)}. + * @param locale the locale to use to configure the formatter. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @draft ICU 4.0 + */ + void setLocale(const Locale& locale, UErrorCode& status); + + /** + * Sets the number format used by this formatter. You only need to + * call this if you want a different number format than the default + * formatter for the locale. + * @param format the number format to use. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @draft ICU 4.0 + */ + void setNumberFormat(const NumberFormat* format, UErrorCode& status); + + /** + * Assignment operator + * + * @param other the PluralFormat object to copy from. + * @draft ICU 4.0 + */ + PluralFormat& operator=(const PluralFormat& other); + + /** + * Return true if another object is semantically equal to this one. + * + * @param other the PluralFormat object to be compared with. + * @return true if other is semantically equal to this. + * @draft ICU 4.0 + */ + virtual UBool operator==(const Format& other) const; + + /** + * Return true if another object is semantically unequal to this one. + * + * @param other the PluralFormat object to be compared with. + * @return true if other is semantically unequal to this. + * @draft ICU 4.0 + */ + virtual UBool operator!=(const Format& other) const; + + /** + * Clones this Format object polymorphically. The caller owns the + * result and should delete it when done. + * @draft ICU 4.0 + */ + virtual Format* clone(void) const; + + /** + * Redeclared Format method. + * + * @param obj The object to be formatted into a string. + * @param appendTo output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @draft ICU 4.0 + */ + UnicodeString& format(const Formattable& obj, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; + + /** + * Returns the pattern from applyPattern() or constructor(). + * + * @param appendTo output parameter to receive result. + * Result is appended to existing contents. + * @return the UnicodeString with inserted pattern. + * @draft ICU 4.0 + */ + UnicodeString& toPattern(UnicodeString& appendTo); + + /** + * This method is not yet supported by PluralFormat. + *

+ * Before calling, set parse_pos.index to the offset you want to start + * parsing at in the source. After calling, parse_pos.index is the end of + * the text you parsed. If error occurs, index is unchanged. + *

+ * When parsing, leading whitespace is discarded (with a successful parse), + * while trailing whitespace is left as is. + *

+ * See Format::parseObject() for more. + * + * @param source The string to be parsed into an object. + * @param result Formattable to be set to the parse result. + * If parse fails, return contents are undefined. + * @param parse_pos The position to start parsing at. Upon return + * this param is set to the position after the + * last character successfully parsed. If the + * source is not parsed successfully, this param + * will remain unchanged. + * @draft ICU 4.0 + */ + virtual void parseObject(const UnicodeString& source, + Formattable& result, + ParsePosition& parse_pos) const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @draft ICU 4.0 + * + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @draft ICU 4.0 + */ + virtual UClassID getDynamicClassID() const; + +private: +typedef enum fmtToken { + none, + tLetter, + tNumber, + tSpace, + tNumberSign, + tLeftBrace, + tRightBrace, + }fmtToken; + + Locale locale; + PluralRules* pluralRules; + UnicodeString pattern; + Hashtable *fParsedValuesHash; + NumberFormat* numberFormat; + NumberFormat* replacedNumberFormat; + + PluralFormat(); // default constructor not implemented + void init(const PluralRules* rules, const Locale& curlocale, UErrorCode& status); + UBool inRange(UChar ch, fmtToken& type); + UBool checkSufficientDefinition(); + void parsingFailure(); + UnicodeString insertFormattedNumber(int32_t number, + UnicodeString& message, + UnicodeString& appendTo, + FieldPosition& pos) const; + void copyHashtable(Hashtable *other, UErrorCode& status); +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _PLURFMT +//eof diff --git a/icu4c/source/i18n/unicode/plurrule.h b/icu4c/source/i18n/unicode/plurrule.h new file mode 100644 index 00000000000..8b635aea526 --- /dev/null +++ b/icu4c/source/i18n/unicode/plurrule.h @@ -0,0 +1,262 @@ +/* +******************************************************************************* +* Copyright (C) 2007, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +* +* +* File PLURRULE.H +* +* Modification History:* +* Date Name Description +* +******************************************************************************** +*/ + +#ifndef PLURRULE +#define PLURRULE + +/** + * \file + * \brief C++ API: PluralRules object + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/format.h" +#include "unicode/utypes.h" + +U_NAMESPACE_BEGIN + +class Hashtable; +class RuleChain; +class RuleParser; + + /** + * Defines rules for mapping positive long values onto a small set of + * keywords. Rules are constructed from a text description, consisting + * of a series of keywords and conditions. The {@link #select} method + * examines each condition in order and returns the keyword for the + * first condition that matches the number. If none match, + * default rule(other) is returned. + * + * Examples:

+    *   "one: n is 1; few: n in 2..4"
+ * This defines two rules, for 'one' and 'few'. The condition for + * 'one' is "n is 1" which means that the number must be equal to + * 1 for this condition to pass. The condition for 'few' is + * "n in 2..4" which means that the number must be between 2 and + * 4 inclusive for this condition to pass. All other numbers + * are assigned the keyword "other" by the default rule. + *

+    *    "zero: n is 0; one: n is 1; zero: n mod 100 in 1..19"
+ * This illustrates that the same keyword can be defined multiple times. + * Each rule is examined in order, and the first keyword whose condition + * passes is the one returned. Also notes that a modulus is applied + * to n in the last rule. Thus its condition holds for 119, 219, 319... + *

+    *    "one: n is 1; few: n mod 10 in 2..4 and n mod 100 not in 12..14"
+ * This illustrates conjunction and negation. The condition for 'few' + * has two parts, both of which must be met: "n mod 10 in 2..4" and + * "n mod 100 not in 12..14". The first part applies a modulus to n + * before the test as in the previous example. The second part applies + * a different modulus and also uses negation, thus it matches all + * numbers _not_ in 12, 13, 14, 112, 113, 114, 212, 213, 214... + *

+    * 
+    *  Keywords
+    *  could be defined by users or from ICU locale data. There are 6
+    *  predefined values in ICU - 'zero', 'one', 'two', 'few', 'many' and
+    *  'other'. Callers need to check the value of keyword returned by
+    *  {@link #select} method.
+    *  

+    * 
+    * Examples:
+    * UnicodeString keyword = pl->select(number);
+    * if (keyword== UnicodeString("one") {
+    *     ...
+    * } 
+    * else if ( ... ) 
+    * 
+    */
+class U_I18N_API PluralRules : public UObject {
+public:
+    
+    /**
+     * Constructor.
+     * @param status  Output param set to success/failure code on exit, which
+     *                must not indicate a failure before the function call.
+     * 
+     * @draft ICU 4.0
+     */
+    PluralRules(UErrorCode& status);
+    
+    /**
+     * Copy constructor.
+     * @draft ICU 4.0
+     */
+    PluralRules(const PluralRules& other);
+
+    /**
+     * Destructor.
+     * @draft ICU 4.0
+     */
+    virtual ~PluralRules();
+    
+    /**
+     * Clone
+     * @draft ICU 4.0
+     */
+    PluralRules* clone() const;
+
+    /**
+      * Assignment operator.
+      * @draft ICU 4.0
+      */
+    PluralRules& operator=(const PluralRules&);
+    
+    /**
+     * Creates a PluralRules from a description if it is parsable, otherwise 
+     * returns null.
+     * 
+     * @param description rule description
+     * @param status      Output param set to success/failure code on exit, which
+     *                    must not indicate a failure before the function call.
+     * @return            new PluralRules pointer. NULL if there is an error.
+     * @draft ICU 4.0
+     */
+    static PluralRules* U_EXPORT2 createRules(const UnicodeString& description,
+                                              UErrorCode& status);
+    
+    /**
+     * The default rules that accept any number.
+     * 
+     * @param status  Output param set to success/failure code on exit, which
+     *                must not indicate a failure before the function call.
+     * @return        new PluralRules pointer. NULL if there is an error.
+     * @draft ICU 4.0
+     */
+    static PluralRules* U_EXPORT2 createDefaultRules(UErrorCode& status);
+    
+    /**
+     * Provides access to the predefined PluralRules for a given
+     * locale.
+     * 
+     * @param locale  The locale for which a PluralRules object is
+     *                returned.
+     * @param status  Output param set to success/failure code on exit, which
+     *                must not indicate a failure before the function call.
+     * @return        The predefined PluralRules object pointer for
+     *                this locale. If there's no predefined rules for this locale,
+     *                the rules for the closest parent in the locale hierarchy
+     *                that has one will  be returned.  The final fallback always
+     *                returns the default 'other' rules.
+     * @draft ICU 4.0
+     */
+    static PluralRules* U_EXPORT2 forLocale(const Locale& locale, UErrorCode& status);
+    
+    /**
+     * Given a number, returns the keyword of the first rule that applies to
+     * the number.  This function can be used with isKeyword* functions to
+     * determine the keyword for default plural rules.
+     * 
+     * @param number  The number for which the rule has to be determined.
+     * @return        The keyword of the selected rule.
+     * @draft ICU 4.0
+     */
+     UnicodeString select(int32_t number) const;
+
+     /**
+      * Returns a list of all rule keywords used in this PluralRules
+      * object.  The rule 'other' is always present by default.
+      * 
+      * @param status Output param set to success/failure code on exit, which
+      *               must not indicate a failure before the function call.
+      * @return       StringEnumeration with the keywords.
+      *               The caller must delete the object.
+      * @draft ICU 4.0
+      */
+      StringEnumeration* getKeywords(UErrorCode& status) const;
+  
+      /**
+       * Returns TRUE if the given keyword is defined in this 
+       * PluralRules object.
+       * 
+       * @param keyword  the input keyword.
+       * @return         TRUE if the input keyword is defined.
+       *                 Otherwise, return FALSE.
+       * @draft ICU 4.0
+       */    
+      UBool isKeyword(const UnicodeString& keyword) const;
+      
+
+      /**
+       * Returns keyword for default plural form.
+       * 
+       * @return         keyword for default plural form.
+       * @internal 4.0
+       * @draft ICU 4.0
+       */    
+      UnicodeString getKeywordOther() const;
+
+      /**
+       * Compares the equality of two PluralRules objects.
+       *
+       * @param other The other PluralRules object to be compared with.
+       * @return      True if the given PluralRules is the same as this 
+       *              PluralRules; false otherwise.
+       * @draft ICU 4.0
+       */     
+      virtual UBool operator==(const PluralRules& other) const;
+      
+      /**
+       * Compares the inequality of two PluralRules objects.
+       *
+       * @param other The PluralRules object to be compared with.
+       * @return      True if the given PluralRules is not the same as this 
+       *              PluralRules; false otherwise.
+       * @draft ICU 4.0
+       */
+      UBool operator!=(const PluralRules& other) const  {return !operator==(other);}
+
+      
+      /**
+       * ICU "poor man's RTTI", returns a UClassID for this class.
+       *
+       * @draft ICU 4.0
+       *
+      */
+      static UClassID U_EXPORT2 getStaticClassID(void);
+
+      /**
+       * ICU "poor man's RTTI", returns a UClassID for the actual class.
+       *
+       * @draft ICU 4.0
+       */
+      virtual UClassID getDynamicClassID() const;
+      
+
+private: 
+    Hashtable       *fLocaleStringsHash;
+    UnicodeString   localeName;
+    RuleChain       *rules;
+    RuleParser      *parser;
+    
+    PluralRules();   // default constructor not implemented
+    void getRuleData(UErrorCode& status);   
+    int32_t getRepeatLimit() const; 
+    UErrorCode parseDescription(UnicodeString& ruleData, RuleChain& rules);
+    void getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName);
+    void addRules(RuleChain& rules, UErrorCode& err);
+    void addRules(const UnicodeString& localeName, RuleChain& rules, UBool addToHash, UErrorCode& err);
+    void initHashtable(UErrorCode& err);
+    int32_t getNumberValue(const UnicodeString& token) const;
+   
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // _PLURRULE
+//eof
diff --git a/icu4c/source/test/intltest/Makefile.in b/icu4c/source/test/intltest/Makefile.in
index 27a0424b3bd..0ba2939bce4 100644
--- a/icu4c/source/test/intltest/Makefile.in
+++ b/icu4c/source/test/intltest/Makefile.in
@@ -45,7 +45,7 @@ fldset.o dadrfmt.o dadrcal.o dadrcoll.o dcfmapts.o decoll.o dtfmapts.o dtfmrgts.
 dtptngts.o encoll.o escoll.o ficoll.o frcoll.o g7coll.o intltest.o	\
 itercoll.o itformat.o itmajor.o itutil.o jacoll.o lcukocol.o	\
 loctest.o miscdtfm.o mnkytst.o msfmrgts.o nmfmapts.o nmfmtrt.o		\
-numfmtst.o numrgts.o pptest.o regcoll.o restest.o restsnew.o \
+numfmtst.o numrgts.o  plurults.o plurfmts.o pptest.o regcoll.o restest.o restsnew.o \
 sdtfmtts.o svccoll.o tchcfmt.o	\
 tfsmalls.o tmsgfmt.o trcoll.o tscoll.o tsdate.o tsdcfmsy.o tsdtfmsy.o	\
 tsmthred.o tsnmfmt.o tsputil.o tstnrapi.o tstnorm.o tzbdtest.o		\
diff --git a/icu4c/source/test/intltest/intltest.vcproj b/icu4c/source/test/intltest/intltest.vcproj
index 7284d2e15e1..ec52ac8e1c3 100644
--- a/icu4c/source/test/intltest/intltest.vcproj
+++ b/icu4c/source/test/intltest/intltest.vcproj
@@ -717,6 +717,18 @@
 				RelativePath=".\pptest.h"
 				>
 			
+			
+			
+			
+			
+			
+			
+			
+			
 			
diff --git a/icu4c/source/test/intltest/itformat.cpp b/icu4c/source/test/intltest/itformat.cpp
index eb80bad7c14..1ce15d3887d 100644
--- a/icu4c/source/test/intltest/itformat.cpp
+++ b/icu4c/source/test/intltest/itformat.cpp
@@ -48,7 +48,8 @@
 #include "dtptngts.h"       // IntlTestDateTimePatternGeneratorAPI
 #include "tzoffloc.h"       // TimeZoneOffsetLocalTest
 #include "tzfmttst.h"       // TimeZoneFormatTest
-
+#include "plurults.h"       // PluralRulesTest
+#include "plurfmts.h"       // PluralFormatTest
 
 #define TESTCLASS(id, TestClass)          \
     case id:                              \
@@ -114,7 +115,8 @@ void IntlTestFormat::runIndexedTest( int32_t index, UBool exec, const char* &nam
         TESTCLASS(32,IntlTestDateTimePatternGeneratorAPI);
         TESTCLASS(33,TimeZoneOffsetLocalTest);
         TESTCLASS(34,TimeZoneFormatTest);
-
+        TESTCLASS(35,PluralRulesTest);
+        TESTCLASS(36,PluralFormatTest);
 
         default: name = ""; break; //needed to end loop
     }
diff --git a/icu4c/source/test/intltest/plurfmts.cpp b/icu4c/source/test/intltest/plurfmts.cpp
new file mode 100644
index 00000000000..dbc271a88b5
--- /dev/null
+++ b/icu4c/source/test/intltest/plurfmts.cpp
@@ -0,0 +1,506 @@
+
+/********************************************************************
+ * COPYRIGHT: 
+ * Copyright (c) 1997-2006, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ********************************************************************/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "plurults.h"
+#include "plurfmts.h"
+#include "cmemory.h"
+#include "unicode/plurrule.h"
+#include "unicode/plurfmt.h"
+
+
+#if defined( U_DEBUG_PLURFMT ) 
+#include 
+#endif
+
+const UnicodeString oddAndEvenRule = UNICODE_STRING_SIMPLE("odd: n mod 2 is 1");
+#define PLURAL_PATTERN_DATA 4
+#define PLURAL_TEST_ARRAY_SIZE 256
+
+const UnicodeString patternTestData[PLURAL_PATTERN_DATA] = {
+    UNICODE_STRING_SIMPLE("odd {# is odd.} other{# is even.}"),
+    UNICODE_STRING_SIMPLE("other{# is odd or even.}"),
+    UNICODE_STRING_SIMPLE("odd{The number {0, number, #.#0} is odd.}other{The number {0, number, #.#0} is even.}"),
+    UNICODE_STRING_SIMPLE("odd{The number {#} is odd.}other{The number {#} is even.}"),
+};
+
+const UnicodeString patternOddTestResult[PLURAL_PATTERN_DATA] = {
+    UNICODE_STRING_SIMPLE(" is odd."),
+    UNICODE_STRING_SIMPLE(" is odd or even."),
+    UNICODE_STRING_SIMPLE("The number {0, number, #.#0} is odd."),
+    UNICODE_STRING_SIMPLE("The number {#} is odd."),
+};
+
+const UnicodeString patternEvenTestResult[PLURAL_PATTERN_DATA] = {
+    UNICODE_STRING_SIMPLE(" is even."),
+    UNICODE_STRING_SIMPLE(" is odd or even."),
+    UNICODE_STRING_SIMPLE("The number {0, number, #.#0} is even."),
+    UNICODE_STRING_SIMPLE("The number {#} is even."),
+};
+
+#define PLURAL_SYNTAX_DATA 8
+const UnicodeString checkSyntaxtData[PLURAL_SYNTAX_DATA] = {
+    UNICODE_STRING_SIMPLE("odd{foo} odd{bar} other{foobar}"),
+    UNICODE_STRING_SIMPLE("odd{foo} other{bar} other{foobar}"),
+    UNICODE_STRING_SIMPLE("odd{foo}"),
+    UNICODE_STRING_SIMPLE("otto{foo} other{bar}"),
+    UNICODE_STRING_SIMPLE("1odd{foo} other{bar}"),
+    UNICODE_STRING_SIMPLE("odd{foo},other{bar}"),
+    UNICODE_STRING_SIMPLE("od d{foo} other{bar}"),
+    UNICODE_STRING_SIMPLE("odd{foo}{foobar}other{foo}"),
+};
+
+const UnicodeString PLKeywordLookups[6] = {
+    UNICODE_STRING_SIMPLE("zero"),
+    UNICODE_STRING_SIMPLE("one"),
+    UNICODE_STRING_SIMPLE("two"),
+    UNICODE_STRING_SIMPLE("few"),
+    UNICODE_STRING_SIMPLE("many"),
+    UNICODE_STRING_SIMPLE("other"),
+};
+
+// The value must be same as PLKeywordLookups[] order.
+#define PFT_ZERO   0
+#define PFT_ONE    1
+#define PFT_TWO    2
+#define PFT_FEW    3
+#define PFT_MANY   4
+#define PFT_OTHER  5
+
+void PluralFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
+{
+    if (exec) logln("TestSuite PluralFormat");
+    switch (index) {
+        case 0: name = "PluralFormat basic test"; 
+                if (exec) pluralFormatBasicTest();
+                break;
+        case 1: name = "PluralFormat unit tests"; 
+                if (exec) pluralFormatUnitTest();
+                break;
+        case 2: name = "PluralFormat locale test"; 
+                if (exec) pluralFormatLocaleTest();
+                break;
+        default: name = "";
+        break;
+    }
+}
+
+/**
+ * Test various generic API methods of PluralFormat for Basic usage.
+ */
+void PluralFormatTest::pluralFormatBasicTest(/*char *par*/)
+{
+    UErrorCode status[8];
+    PluralFormat* plFmt[8];
+    Locale        locale = Locale::getDefault();
+    UnicodeString otherPattern = UnicodeString("other{#}");
+    UnicodeString message=UnicodeString("ERROR: PluralFormat basic test");
+
+    // ========= Test constructors
+    logln(" Testing PluralFormat constructors ...");
+    status[0] = U_ZERO_ERROR;
+    PluralRules*  plRules = PluralRules::createDefaultRules(status[0]);
+  
+    status[0] = U_ZERO_ERROR;
+    NumberFormat *numFmt = NumberFormat::createInstance(status[0]);
+    if (U_FAILURE(status[0])) {
+        dataerrln("ERROR: Could not create NumberFormat instance with default locale ");
+    }   
+    
+    for (int32_t i=0; i< 8; ++i) {
+        status[i] = U_ZERO_ERROR;
+    }
+    plFmt[0] = new PluralFormat(status[0]);
+    plFmt[1] = new PluralFormat(*plRules, status[1]);
+    plFmt[2] = new PluralFormat(locale, status[2]);
+    plFmt[3] = new PluralFormat(locale, *plRules, status[3]);
+    plFmt[4] = new PluralFormat(otherPattern, status[4]);
+    plFmt[5] = new PluralFormat(*plRules, otherPattern, status[5]);
+    plFmt[6] = new PluralFormat(locale, otherPattern, status[6]);
+    plFmt[7] = new PluralFormat(locale, *plRules, otherPattern, status[7]);
+    
+    for (int32_t i=0; i< 8; ++i) {
+        if (U_SUCCESS(status[i])) {
+            numberFormatTest(plFmt[i], numFmt, 1, 12, NULL, NULL, FALSE, &message);
+            numberFormatTest(plFmt[i], numFmt, 100, 112, NULL, NULL, FALSE, &message);
+        }
+        else {
+            dataerrln("ERROR: PluralFormat constructor failed!");
+        }
+       delete plFmt[i];
+    }
+    delete numFmt;
+    delete plRules;
+}
+
+/**
+ * Unit tests of PluralFormat class.
+ */
+void PluralFormatTest::pluralFormatUnitTest(/*char *par*/)
+{
+    UErrorCode status = U_ZERO_ERROR;
+    PluralRules*  plRules = PluralRules::createRules(oddAndEvenRule, status);
+    if (U_FAILURE(status)) {
+        dataerrln("ERROR:  create PluralRules instance failed in unit tests.- exitting");
+        return;
+    }
+    
+    // ======= Test PluralRules pattern syntax.
+    logln("Testing PluralRules pattern syntax.");
+    for (int32_t i=0; iformat(i, numResult);
+        plResult = plFmt->format(i, status);
+        if ((numOddAppendStr!= NULL)&&(numEvenAppendStr!=NULL)) {
+            if (overwrite) {
+                if (i&1) {
+                    numResult = *numOddAppendStr;
+                }
+                else {
+                    numResult = *numEvenAppendStr;
+                }
+            }
+            else {  // Append the string
+                if (i&1) {
+                    numResult += *numOddAppendStr;
+                }
+                else{
+                    numResult += *numEvenAppendStr;
+                }
+            }
+        }
+        if ( (numResult!=plResult) || U_FAILURE(status) ) {
+            if ( message == NULL ) {
+                errln("ERROR: Unexpected plural format - got:"+plResult+ UnicodeString("  expecting:")+numResult);
+            }
+            else {
+                errln( *message+UnicodeString("  got:")+plResult+UnicodeString("  expecting:")+numResult);
+                
+            }
+        }
+    }
+    return;
+}
+
+
+void
+PluralFormatTest::helperTestRusults(const char** localeArray, 
+                                    int32_t capacityOfArray, 
+                                    UnicodeString& testPattern,
+                                    int8_t *expResults) {
+    UErrorCode status;
+    UnicodeString plResult;
+    
+    for (int32_t i=0; i
+#endif
+
+#define PLURAL_TEST_NUM    13
+const UnicodeString pluralTestData[PLURAL_TEST_NUM] = {
+        UNICODE_STRING_SIMPLE("a: n is 1"),
+        UNICODE_STRING_SIMPLE("a: n mod 10 is 2"),
+        UNICODE_STRING_SIMPLE("a: n is not 1"),
+        UNICODE_STRING_SIMPLE("a: n mod 3 is not 1"),
+        UNICODE_STRING_SIMPLE("a: n in 2..5"),
+        UNICODE_STRING_SIMPLE("a: n not in 2..5"),
+        UNICODE_STRING_SIMPLE("a: n mod 10 in 2..5"),
+        UNICODE_STRING_SIMPLE("a: n mod 10 is 2 and n is not 12"),
+        UNICODE_STRING_SIMPLE("a: n mod 10 in 2..3 or n mod 10 is 5"),
+        UNICODE_STRING_SIMPLE("a: n is 1 or n is 4 or n is 23"),
+        UNICODE_STRING_SIMPLE("a: n mod 2 is 1 and n is not 3 and n in 1..11"),
+        UNICODE_STRING_SIMPLE("a: n mod 2 is 1 or n mod 5 is 1 and n is not 6"),
+        "",
+};
+ 
+
+const int32_t pluralTestResult[PLURAL_TEST_NUM][30] = {
+    {1, 0},
+    {2,12,22, 0},
+    {0,2,3,4,5,0},
+    {0,2,3,5,6,8,9,0},
+    {2,3,4,5,0},
+    {0,1,6,7,8, 0},
+    {2,3,4,5,12,13,14,15,22,23,24,25,0},
+    {2,22,32,42,0},
+    {2,3,5,12,13,15,22,23,25,0},
+    {1,4,23,0},
+    {1,5,7,9,11,0},
+    {1,3,5,7,9,11,13,15,16,0},
+};
+
+
+#define MAX_EQ_ROW  2
+#define MAX_EQ_COL  5
+UnicodeString testEquRules[MAX_EQ_ROW][MAX_EQ_COL] = {
+    {   UNICODE_STRING_SIMPLE("a: n in 2..3"),
+        UNICODE_STRING_SIMPLE("a: n is 2 or n is 3"), 
+        UNICODE_STRING_SIMPLE( "a:n is 3 and n in 2..5 or n is 2"),
+        "",
+    },
+    {   UNICODE_STRING_SIMPLE("a: n is 12; b:n mod 10 in 2..3"),
+        UNICODE_STRING_SIMPLE("b: n mod 10 in 2..3 and n is not 12; a: n in 12..12"),
+        UNICODE_STRING_SIMPLE("b: n is 13; a: n in 12..13; b: n mod 10 is 2 or n mod 10 is 3"),
+        "",
+    }
+};
+void setupResult(const int32_t testSource[], char result[], int32_t* max);
+UBool checkEqual(PluralRules *test, char *result, int32_t max);
+UBool testEquality(PluralRules *test);
+
+// This is an API test, not a unit test.  It doesn't test very many cases, and doesn't
+// try to test the full functionality.  It just calls each function in the class and
+// verifies that it works on a basic level.
+
+void PluralRulesTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
+{
+    if (exec) logln("TestSuite PluralRulesAPI");
+    switch (index) {
+        case 0: name = "PluralRules API test"; 
+                if (exec) {
+                    logln("PluralRules API test---"); logln("");
+                    UErrorCode status = U_ZERO_ERROR;
+                    Locale saveLocale;
+                    Locale::setDefault(Locale::getEnglish(), status);
+                    if(U_FAILURE(status)) {
+                        errln("ERROR: Could not set default locale, test may not give correct results");
+                    }
+                    testAPI(/*par*/);
+                    Locale::setDefault(saveLocale, status);
+                }
+                break;
+
+        default: name = ""; break;
+    }
+}
+
+/**
+ * Test various generic API methods of PluralRules for API coverage.
+ */
+void PluralRulesTest::testAPI(/*char *par*/)
+{
+    UErrorCode status = U_ZERO_ERROR;
+
+    // ======= Test constructors
+    logln("Testing PluralRules constructors");
+    
+        
+    logln("\n start default locale test case ..\n");
+        
+    PluralRules defRule(status); 
+    PluralRules* test=new PluralRules(status);
+    PluralRules* newEnPlural= test->forLocale(Locale::getEnglish(), status);
+    if(U_FAILURE(status)) {
+        dataerrln("ERROR: Could not create PluralRules (default) - exitting");
+        delete test;
+        return;
+    }
+    delete newEnPlural;
+
+    // ======= Test empty plural rules   
+    logln("Testing Simple PluralRules");
+      
+    PluralRules* empRule = test->createRules(UNICODE_STRING_SIMPLE("a:n"), status);
+    UnicodeString key;
+    for (int32_t i=0; i<10; ++i) {
+        key = empRule->select(i);
+        if ( key.charAt(0)!='a' ) {
+            errln("ERROR:  empty plural rules test failed! - exitting");
+        }
+    }
+    if (empRule!=NULL) {
+        delete empRule;
+    }
+    
+    // ======= Test simple plural rules   
+    logln("Testing Simple PluralRules");
+        
+    char result[100];
+    int32_t max;
+        
+    for (int32_t i=0; icreateRules(pluralTestData[i], status);
+       setupResult(pluralTestResult[i], result, &max);
+       if ( !checkEqual(newRules, result, max) ) {
+            errln("ERROR:  simple plural rules failed! - exitting");
+            delete test;
+            return;
+        }
+       if (newRules!=NULL) {
+           delete newRules;
+       }
+    }
+       
+
+    // ======= Test complex plural rules   
+    logln("Testing Complex PluralRules");
+    // TODO: the complex test data is hard coded. It's better to implement 
+    // a parser to parse the test data.
+    UnicodeString complexRule = UNICODE_STRING_SIMPLE("a: n in 2..5; b: n in 5..8; c: n mod 2 is 1"); 
+    char cRuleResult[] = {
+        'o','c','a','a','a','a','b','b','b','c',
+        'o','c'};
+    PluralRules *newRules = test->createRules(complexRule, status);
+    if ( !checkEqual(newRules, cRuleResult, 12) ) {
+         errln("ERROR:  complex plural rules failed! - exitting");
+         delete test;
+         return;
+     }
+    if (newRules!=NULL) {
+        delete newRules;
+        newRules=NULL;
+    }
+    
+    // ======= Test Equality
+    logln("Testing Equality of PluralRules");
+    
+
+    if ( !testEquality(test) ) {
+         errln("ERROR:  complex plural rules failed! - exitting");
+         delete test;
+         return;
+     }
+
+  
+// ======= Test getStaticClassID()
+    logln("Testing getStaticClassID()");
+    
+    if(test->getDynamicClassID() != PluralRules::getStaticClassID()) {
+        errln("ERROR: getDynamicClassID() didn't return the expected value");
+    }
+    delete test;
+}
+
+void setupResult(const int32_t testSource[], char result[], int32_t* max) {
+    int32_t i=0;
+    int32_t curIndex=0;
+    
+    do {
+        while (curIndex < testSource[i]) {
+            result[curIndex++]='o'; //other
+        }
+        result[curIndex++]='a';
+        
+    } while(testSource[++i]>0);
+    *max=curIndex;
+}
+
+
+UBool checkEqual(PluralRules *test, char *result, int32_t max) {
+    UnicodeString key;
+    for (int32_t i=0; iselect(i);
+        if ( key.charAt(0)!=result[i] ) {
+            return FALSE;
+        }
+    }
+    return TRUE;
+}
+
+UBool testEquality(PluralRules *test) {
+    UErrorCode status = U_ZERO_ERROR;
+    UnicodeString key[MAX_EQ_COL];
+    UBool ret=TRUE;
+    for (int32_t i=0; i0) ) {
+            rules[totalRules]=test->createRules(testEquRules[i][totalRules], status);
+            totalRules++;
+        }
+        for (int32_t n=0; n<300 && ret ; ++n) {
+            for(int32_t j=0; jselect(n);
+            }
+            for(int32_t j=0; jformat(testArgs1, 1, numResult1, ignore, err);
+   
+    MessageFormat* mfAlpha = new MessageFormat(t2, Locale("fr"), err);
+    UnicodeString argName[] = {UnicodeString("argument")};
+    UnicodeString argNameResult;
+    mfAlpha->format(argName, testArgs1, 1, argNameResult, err);
+    if (U_FAILURE(err)) {
+        errln("TestMessageFormat::testMsgFormatPlural #1 - argumentName");
+        logln(UnicodeString("TestMessageFormat::testMsgFormatPlural #1 with error code ")+(int32_t)err);
+        delete mfNum;
+        return;
+    }
+    if ( numResult1 != argNameResult){
+        errln("TestMessageFormat::testMsgFormatPlural #1");
+        logln(UnicodeString("The results of argumentName and argumentIndex are not the same."));
+    }
+    if ( numResult1 != UnicodeString("C\'est 0 fichier dans la liste.")) {
+        errln("TestMessageFormat::testMsgFormatPlural #1");
+        logln(UnicodeString("The results of argumentName and argumentIndex are not the same."));
+    }
+    err = U_ZERO_ERROR;
+    int32_t fmtsCnt=0;
+  
+    delete mfNum;
+    delete mfAlpha;
+
+    MessageFormat* mfNum2 = new MessageFormat(t3, Locale("ru"), err);
+    numResult1.remove();
+    Formattable testArgs2[] = {(int32_t)4};
+    mfNum2->format(testArgs2, 1, numResult1, ignore, err);
+    MessageFormat* mfAlpha2 = new MessageFormat(t4, Locale("ru"), err);
+    argNameResult.remove();
+    mfAlpha2->format(argName, testArgs2, 1, argNameResult, err);
+
+    if (U_FAILURE(err)) {
+        errln("TestMessageFormat::testMsgFormatPlural #2 - argumentName");
+        logln(UnicodeString("TestMessageFormat::testMsgFormatPlural #2 with error code ")+(int32_t)err);
+        delete mfNum2;
+        return;
+    }
+    if ( numResult1 != argNameResult){
+        errln("TestMessageFormat::testMsgFormatPlural #2");
+        logln(UnicodeString("The results of argumentName and argumentIndex are not the same."));
+    }
+    if ( numResult1 != UnicodeString("There are 4,0 zavoda in the directory.")) {
+        errln("TestMessageFormat::testMsgFormatPlural #2");
+        logln(UnicodeString("The results of argumentName and argumentIndex are not the same."));
+    }
+
+    delete mfNum2;
+    delete mfAlpha2;
+    
+    // nested formats
+    err = U_ZERO_ERROR;
+    MessageFormat* msgFmt = new MessageFormat(t5, Locale("fr"), err);
+    if (U_FAILURE(err)) {
+        errln("TestMessageFormat::test nested PluralFormat with argumentName");
+        logln(UnicodeString("TestMessageFormat::test nested PluralFormat with error code ")+(int32_t)err);
+        delete msgFmt;
+        return;
+    }
+    Formattable testArgs3[] = {(int32_t)0};
+    argNameResult.remove();
+    msgFmt->format(testArgs3, 1, argNameResult, ignore, err);
+    if (U_FAILURE(err)) {
+        errln("TestMessageFormat::test nested PluralFormat with argumentName");
+    }
+    if ( argNameResult!= UnicodeString("C'est 0,0 fichier dans la liste.")) {
+        errln(UnicodeString("TestMessageFormat::test nested named PluralFormat."));
+        logln(UnicodeString("The unexpected nested named PluralFormat."));
+    }
+    delete msgFmt;
+}
+
+
 //---------------------------------
 //  API Tests
 //---------------------------------
diff --git a/icu4c/source/test/intltest/tmsgfmt.h b/icu4c/source/test/intltest/tmsgfmt.h
index 5534dadfe14..65308060220 100644
--- a/icu4c/source/test/intltest/tmsgfmt.h
+++ b/icu4c/source/test/intltest/tmsgfmt.h
@@ -56,6 +56,10 @@ public:
      * tests MesageFormat functionality with a format including a ChoiceFormat
      **/
     void testMsgFormatChoice(/* char* par */);
+    /** 
+     * tests MesageFormat functionality with a PluralFormat.
+     **/
+    void testMsgFormatPlural(/* char* par */);
 
     /**
      * Verify that MessageFormat accomodates more than 10 arguments