diff --git a/.gitattributes b/.gitattributes index 1723a1a2666..722c419b305 100644 --- a/.gitattributes +++ b/.gitattributes @@ -54,7 +54,12 @@ icu4c/source/data/in/nfkc.nrm -text icu4c/source/data/in/nfkc_cf.nrm -text icu4c/source/data/in/unorm.icu -text icu4c/source/data/locales/pool.res -text +icu4c/source/i18n/selfmt.cpp -text +icu4c/source/i18n/selfmtimpl.h -text +icu4c/source/i18n/unicode/selfmt.h -text icu4c/source/samples/ucnv/data02.bin -text +icu4c/source/test/intltest/selfmts.cpp -text +icu4c/source/test/intltest/selfmts.h -text icu4c/source/test/perf/README -text icu4c/source/test/testdata/TestFont1.otf -text icu4c/source/test/testdata/icu26_testtypes.res -text diff --git a/icu4c/source/i18n/Makefile.in b/icu4c/source/i18n/Makefile.in index 076d8f38318..ca19b7c63d4 100644 --- a/icu4c/source/i18n/Makefile.in +++ b/icu4c/source/i18n/Makefile.in @@ -80,7 +80,7 @@ regexcmp.o rematch.o repattrn.o regexst.o udatpg.o uregex.o uregexc.o \ ulocdata.o measfmt.o currfmt.o curramt.o currunit.o measure.o utmscale.o \ csdetect.o csmatch.o csr2022.o csrecog.o csrmbcs.o csrsbcs.o csrucode.o csrutf8.o inputext.o \ wintzimpl.o windtfmt.o winnmfmt.o basictz.o dtrule.o rbtz.o tzrule.o tztrans.o vtzone.o \ -zonemeta.o zstrfmt.o plurrule.o plurfmt.o dtitvfmt.o dtitvinf.o \ +zonemeta.o zstrfmt.o plurrule.o plurfmt.o selfmt.o dtitvfmt.o dtitvinf.o \ tmunit.o tmutamt.o tmutfmt.o colldata.o bmsearch.o bms.o currpinf.o \ uspoof.o uspoof_impl.o uspoof_build.o uspoof_conf.o uspoof_wsconf.o \ ztrans.o zrule.o vzone.o fphdlimp.o fpositer.o locdspnm.o \ diff --git a/icu4c/source/i18n/msgfmt.cpp b/icu4c/source/i18n/msgfmt.cpp index c664738db68..013ca7676bb 100644 --- a/icu4c/source/i18n/msgfmt.cpp +++ b/icu4c/source/i18n/msgfmt.cpp @@ -1,23 +1,23 @@ -/* -******************************************************************************* -* Copyright (C) 2007-2008, International Business Machines Corporation and * -* others. All Rights Reserved. * -******************************************************************************* -* -* File MSGFMT.CPP -* -* Modification History: -* -* Date Name Description -* 02/19/97 aliu Converted from java. -* 03/20/97 helena Finished first cut of implementation. -* 04/10/97 aliu Made to work on AIX. Added stoi to replace wtoi. -* 06/11/97 helena Fixed addPattern to take the pattern correctly. -* 06/17/97 helena Fixed the getPattern to return the correct pattern. -* 07/09/97 helena Made ParsePosition into a class. -* 02/22/99 stephen Removed character literals for EBCDIC safety -******************************************************************************** -*/ +/******************************************************************** + * COPYRIGHT: + * Copyright (c) 1997-2010, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************** + * + * File MSGFMT.CPP + * + * Modification History: + * + * Date Name Description + * 02/19/97 aliu Converted from java. + * 03/20/97 helena Finished first cut of implementation. + * 04/10/97 aliu Made to work on AIX. Added stoi to replace wtoi. + * 06/11/97 helena Fixed addPattern to take the pattern correctly. + * 06/17/97 helena Fixed the getPattern to return the correct pattern. + * 07/09/97 helena Made ParsePosition into a class. + * 02/22/99 stephen Removed character literals for EBCDIC safety + * 11/01/09 kirtig Added SelectFormat + ********************************************************************/ #include "unicode/utypes.h" @@ -29,6 +29,7 @@ #include "unicode/smpdtfmt.h" #include "unicode/choicfmt.h" #include "unicode/plurfmt.h" +#include "unicode/selfmt.h" #include "unicode/ustring.h" #include "unicode/ucnv_err.h" #include "unicode/uchar.h" @@ -41,6 +42,10 @@ #include "ustrfmt.h" #include "uvector.h" +//Todo:remove stdio +#include "stdio.h" + + // ***************************************************************************** // class MessageFormat // ***************************************************************************** @@ -81,6 +86,9 @@ static const UChar ID_DURATION[] = { static const UChar ID_PLURAL[] = { 0x70, 0x6c, 0x75, 0x72, 0x61, 0x6c, 0 /* "plural" */ }; +static const UChar ID_SELECT[] = { + 0x73, 0x65, 0x6C, 0x65, 0x63, 0x74, 0 /* "select" */ +}; // MessageFormat Type List Number, Date, Time or Choice static const UChar * const TYPE_IDS[] = { @@ -93,6 +101,7 @@ static const UChar * const TYPE_IDS[] = { ID_ORDINAL, ID_DURATION, ID_PLURAL, + ID_SELECT, NULL, }; @@ -602,7 +611,7 @@ MessageFormat::applyPattern(const UnicodeString& pattern, parseError.preContext[0] = parseError.postContext[0] = (UChar)0; int32_t patLen = pattern.length(); int32_t i; - + for (i=0; itoPattern(buffer); } + else if (fmt->getDynamicClassID() == SelectFormat::getStaticClassID()) { + UnicodeString buffer; + appendTo += ((SelectFormat*)fmt)->toPattern(buffer); + } else { //appendTo += ", unknown"; } @@ -1223,16 +1237,19 @@ MessageFormat::format(const Formattable* arguments, Formattable::Type type = obj->getType(); // Recursively calling the format process only if the current - // format argument refers to a ChoiceFormat object. + // format argument refers to either of the following: + // a ChoiceFormat object ,a PluralFormat object, a SelectFormat object. Format* fmt = subformats[i].format; if (fmt != NULL) { UnicodeString argNum; fmt->format(*obj, argNum, success); - // Needs to reprocess the ChoiceFormat option by using the + // Needs to reprocess the ChoiceFormat and PluralFormat and SelectFormat option by using the // MessageFormat pattern application. if ((fmt->getDynamicClassID() == ChoiceFormat::getStaticClassID() || - fmt->getDynamicClassID() == PluralFormat::getStaticClassID()) && + fmt->getDynamicClassID() == PluralFormat::getStaticClassID() || + fmt->getDynamicClassID() == SelectFormat::getStaticClassID() + ) && argNum.indexOf(LEFT_CURLY_BRACE) >= 0) { MessageFormat temp(argNum, fLocale, success); // TODO: Implement recursion protection @@ -1592,7 +1609,11 @@ MessageFormat::makeFormat(int32_t formatNumber, fmt = makeRBNF(URBNF_DURATION, fLocale, segments[3], ec); break; case 8: // plural - argType = Formattable::kDouble; + case 9: // Select + if(typeID == 8) + argType = Formattable::kDouble; + else + argType = Formattable::kString; quotedPattern = segments[3]; for (int32_t i = 0; i < quotedPattern.length(); ++i) { UChar ch = quotedPattern.charAt(i); @@ -1609,7 +1630,10 @@ MessageFormat::makeFormat(int32_t formatNumber, unquotedPattern += ch; } } - fmt = new PluralFormat(fLocale, unquotedPattern, ec); + if(typeID == 8) + fmt = new PluralFormat(fLocale, unquotedPattern, ec); + else + fmt = new SelectFormat(unquotedPattern, ec); break; default: argType = Formattable::kString; @@ -1790,8 +1814,7 @@ MessageFormat::isLegalArgName(const UnicodeString& argName) const { int32_t MessageFormat::getArgTypeCount() const { return argTypeCount; -} - +} FormatNameEnumeration::FormatNameEnumeration(UVector *fNameList, UErrorCode& /*status*/) { pos=0; @@ -1825,8 +1848,6 @@ FormatNameEnumeration::~FormatNameEnumeration() { } delete fFormatNames; } - - U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/i18n/selfmt.cpp b/icu4c/source/i18n/selfmt.cpp new file mode 100755 index 00000000000..6be17643c75 --- /dev/null +++ b/icu4c/source/i18n/selfmt.cpp @@ -0,0 +1,526 @@ +/******************************************************************** + * COPYRIGHT: + * Copyright (c) 1997-2010, International Business Machines Corporation and + * others. All Rights Reserved. + * Copyright (C) 2010 , Yahoo! Inc. + ******************************************************************** + * + * File SELFMT.CPP + * + * Modification History: + * + * Date Name Description + * 11/11/09 kirtig Finished first cut of implementation. + * 11/16/09 kirtig Improved version + ********************************************************************/ + +#include "unicode/utypes.h" +#include "unicode/ustring.h" +#include "unicode/ucnv_err.h" +#include "unicode/uchar.h" +#include "unicode/umsg.h" +#include "unicode/rbnf.h" +#include "cmemory.h" +#include "util.h" +#include "uassert.h" +#include "ustrfmt.h" +#include "uvector.h" + +#include "unicode/selfmt.h" +#include "selfmtimpl.h" + +#if !UCONFIG_NO_FORMATTING + +U_NAMESPACE_BEGIN + +U_CDECL_BEGIN + +static void U_CALLCONV +deleteHashStrings(void *obj) { + delete (UnicodeString *)obj; +} + +U_CDECL_END + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SelectFormat) + +#define MAX_KEYWORD_SIZE 30 +static const UChar SELECT_KEYWORD_OTHER[] = {LOW_O, LOW_T, LOW_H, LOW_E, LOW_R, 0}; + +SelectFormat::SelectFormat(UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + init(status); +} + +SelectFormat::SelectFormat(const UnicodeString& pat, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + init(status); + applyPattern(pat, status); +} + +SelectFormat::SelectFormat(const SelectFormat& other) : Format(other) { + UErrorCode status = U_ZERO_ERROR; + pattern = other.pattern; + copyHashtable(other.parsedValuesHash, status); +} + +SelectFormat::~SelectFormat() { + delete parsedValuesHash; +} + +void +SelectFormat::init(UErrorCode& status) { + parsedValuesHash = NULL; + pattern.remove(); + status = U_ZERO_ERROR; +} + + +void +SelectFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + + this->parsedValuesHash = NULL; + this->pattern = newPattern; + enum State{ startState, keywordState, pastKeywordState, phraseState}; + + //Initialization + UnicodeString keyword = UnicodeString(); + UnicodeString phrase = UnicodeString(); + UnicodeString* ptrPhrase ; + int32_t braceCount = 0; + + if (parsedValuesHash == NULL) { + parsedValuesHash = new Hashtable(TRUE, status); + parsedValuesHash = new Hashtable(TRUE, status); + if (U_FAILURE(status)) { + return; + } + parsedValuesHash->setValueDeleter(deleteHashStrings); + } + + //Process the state machine + State state = startState; + for (int32_t i = 0; i < pattern.length(); ++i) { + //Get the character and check its type + UChar ch = pattern.charAt(i); + characterClass type; + classifyCharacter(ch, type); + + //Allow any character in phrase but nowhere else + if ( type == tOther ) { + if ( state == phraseState ){ + phrase += ch; + continue; + }else { + status = U_PATTERN_SYNTAX_ERROR; + return; + } + } + + //Process the state machine + switch (state) { + //At the start of pattern + case startState: + switch (type) { + case tSpace: + break; + case tStartKeyword: + state = keywordState; + keyword += ch; + break; + //If anything else is encountered, it's a syntax error + default: + status = U_PATTERN_SYNTAX_ERROR; + return; + }//end of switch(type) + break; + + //Handle the keyword state + case keywordState: + switch (type) { + case tSpace: + state = pastKeywordState; + break; + case tStartKeyword: + case tContinueKeyword: + keyword += ch; + break; + case tLeftBrace: + state = phraseState; + break; + //If anything else is encountered, it's a syntax error + default: + status = U_PATTERN_SYNTAX_ERROR; + return; + }//end of switch(type) + break; + + //Handle the pastkeyword state + case pastKeywordState: + switch (type) { + case tSpace: + break; + case tLeftBrace: + state = phraseState; + break; + //If anything else is encountered, it's a syntax error + default: + status = U_PATTERN_SYNTAX_ERROR; + return; + }//end of switch(type) + break; + + //Handle the phrase state + case phraseState: + switch (type) { + case tLeftBrace: + braceCount++; + phrase += ch; + break; + case tRightBrace: + //Matching keyword, phrase pair found + if (braceCount == 0){ + //Check validity of keyword + if (parsedValuesHash->get(keyword) != NULL) { + status = U_DUPLICATE_KEYWORD; + return; + } + if (keyword.length() == 0) { + status = U_PATTERN_SYNTAX_ERROR; + return; + } + + //Store the keyword, phrase pair in hashTable + ptrPhrase = new UnicodeString(phrase); + parsedValuesHash->put( keyword, ptrPhrase, status); + + //Reinitialize + keyword.remove(); + phrase.remove(); + ptrPhrase = NULL; + state = startState; + } + + if (braceCount > 0){ + braceCount-- ; + phrase += ch; + } + break; + default: + phrase += ch; + }//end of switch(type) + break; + + //Handle the default case of switch(state) + default: + status = U_PATTERN_SYNTAX_ERROR; + return; + + }//end of switch(state) + } + + //Check if the stae machine is back to startState + if ( state != startState){ + status = U_PATTERN_SYNTAX_ERROR; + return; + } + + //Check if "other" keyword is present + if ( !checkSufficientDefinition() ) { + status = U_DEFAULT_KEYWORD_MISSING; + } + return; +} + +UnicodeString& +SelectFormat::format(const Formattable& obj, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const +{ + if (U_FAILURE(status)) return appendTo; + + switch (obj.getType()) + { + case Formattable::kString: + return format((UnicodeString)obj.getString(), appendTo, pos, status); + default: + status = U_ILLEGAL_ARGUMENT_ERROR; + return appendTo; + } +} + +UnicodeString& +SelectFormat::format(const UnicodeString& sInput, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const { + + if (U_FAILURE(status)) return appendTo; + + //Check for the validity of the keyword + if ( !checkValidKeyword(sInput) ){ + status = U_ILLEGAL_ARGUMENT_ERROR; + return appendTo; + } + + if (parsedValuesHash == NULL) { + status = U_INVALID_FORMAT_ERROR; + return appendTo; + } + + UnicodeString *selectedPattern = (UnicodeString *)parsedValuesHash->get(sInput); + if (selectedPattern == NULL) { + selectedPattern = (UnicodeString *)parsedValuesHash->get(SELECT_KEYWORD_OTHER); + } + + return appendTo += *selectedPattern; +} + +UnicodeString& +SelectFormat::toPattern(UnicodeString& appendTo) { + appendTo += pattern; + return appendTo; +} + +void +SelectFormat::classifyCharacter(UChar ch, characterClass& type) const{ + if ((ch >= CAP_A) && (ch <= CAP_Z)) { + type = tStartKeyword; + return; + } + if ((ch >= LOW_A) && (ch <= LOW_Z)) { + type = tStartKeyword; + return; + } + if ((ch >= U_ZERO) && (ch <= U_NINE)) { + type = tContinueKeyword; + return; + } + switch (ch) { + case LEFTBRACE: + type = tLeftBrace; + break; + case RIGHTBRACE: + type = tRightBrace; + break; + case SPACE: + case TAB: + type = tSpace; + break; + case HYPHEN: + case LOWLINE: + type = tContinueKeyword; + break; + default : + type = tOther; + } +} + +UBool +SelectFormat::checkSufficientDefinition() { + // Check that at least the default rule is defined. + if (parsedValuesHash == NULL) return FALSE; + if (parsedValuesHash->get(SELECT_KEYWORD_OTHER) == NULL) { + return FALSE; + } + else { + return TRUE; + } +} + +UBool +SelectFormat::checkValidKeyword(const UnicodeString& argKeyword ) const{ + UnicodeString keyword = UnicodeString(); + enum State{ startState, keywordState, pastKeywordState }; + + //Initialize + State state = startState; + keyword.remove(); + + //Start the processing + for (int32_t i = 0; i < argKeyword.length(); ++i) { + //Get the character and check its type + UChar ch = argKeyword.charAt(i); + characterClass type; + classifyCharacter(ch, type); + + //Any character that is not allowed + if ( type == tOther ) { + return FALSE; + } + + //Process the state machine + switch (state) { + //At the start of pattern + case startState: + switch (type) { + case tSpace: + break; + case tStartKeyword: + state = keywordState; + keyword += ch; + break; + //If anything else is encountered, it's a syntax error + default: + return FALSE; + }//end of switch(type) + break; + + //Handle the keyword state + case keywordState: + switch (type) { + case tSpace: + state = pastKeywordState; + break; + case tStartKeyword: + case tContinueKeyword: + keyword += ch; + break; + //If anything else is encountered,it's a syntax error + default: + return FALSE; + }//end of switch(type) + break; + + //Handle the pastkeyword state + case pastKeywordState: + switch (type) { + case tSpace: + break; + //If anything else is encountered,it's a syntax error + default: + return FALSE; + }//end of switch(type) + break; + + default: + return FALSE; + }//end of switch(state) + + }//end of loop of argKeyword + + return TRUE; +} + +Format* SelectFormat::clone() const +{ + return new SelectFormat(*this); +} + +SelectFormat& +SelectFormat::operator=(const SelectFormat& other) { + if (this != &other) { + UErrorCode status = U_ZERO_ERROR; + delete parsedValuesHash; + pattern = other.pattern; + copyHashtable(other.parsedValuesHash, status); + } + return *this; +} + +UBool +SelectFormat::operator==(const Format& other) const { + // This protected comparison operator should only be called by subclasses + // which have confirmed that the other object being compared against is + // an instance of a sublcass of SelectFormat. THIS IS IMPORTANT. + // Format::operator== guarantees that this cast is safe + SelectFormat* fmt = (SelectFormat*)&other; + Hashtable* hashOther = fmt->parsedValuesHash; + if ( parsedValuesHash == NULL && hashOther == NULL) + return TRUE; + if ( parsedValuesHash == NULL || hashOther == NULL) + return FALSE; + if ( hashOther->count() != parsedValuesHash->count() ){ + return FALSE; + } + + const UHashElement* elem = NULL; + int32_t pos = -1; + while ((elem = hashOther->nextElement(pos)) != NULL) { + const UHashTok otherKeyTok = elem->key; + UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer; + const UHashTok otherKeyToVal = elem->value; + UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer; + + UnicodeString* thisElemValue = (UnicodeString*)parsedValuesHash->get(*otherKey); + if ( thisElemValue == NULL ){ + return FALSE; + } + if ( *thisElemValue != *otherValue){ + return FALSE; + } + + } + pos = -1; + while ((elem = parsedValuesHash->nextElement(pos)) != NULL) { + const UHashTok thisKeyTok = elem->key; + UnicodeString* thisKey = (UnicodeString*)thisKeyTok.pointer; + const UHashTok thisKeyToVal = elem->value; + UnicodeString* thisValue = (UnicodeString*)thisKeyToVal.pointer; + + UnicodeString* otherElemValue = (UnicodeString*)hashOther->get(*thisKey); + if ( otherElemValue == NULL ){ + return FALSE; + } + if ( *otherElemValue != *thisValue){ + return FALSE; + } + + } + return TRUE; +} + +UBool +SelectFormat::operator!=(const Format& other) const { + return !operator==(other); +} + +void +SelectFormat::parseObject(const UnicodeString& /*source*/, + Formattable& /*result*/, + ParsePosition& /*pos*/) const +{ + // TODO: not yet supported in icu4j and icu4c +} + +void +SelectFormat::copyHashtable(Hashtable *other, UErrorCode& status) { + if (other == NULL) { + parsedValuesHash = NULL; + return; + } + parsedValuesHash = new Hashtable(TRUE, status); + if (U_FAILURE(status)){ + return; + } + parsedValuesHash->setValueDeleter(deleteHashStrings); + + int32_t pos = -1; + const UHashElement* elem = NULL; + + // walk through the hash table and create a deep clone + while ((elem = other->nextElement(pos)) != NULL){ + const UHashTok otherKeyTok = elem->key; + UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer; + const UHashTok otherKeyToVal = elem->value; + UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer; + parsedValuesHash->put(*otherKey, new UnicodeString(*otherValue), status); + if (U_FAILURE(status)){ + return; + } + } +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +//eof diff --git a/icu4c/source/i18n/selfmtimpl.h b/icu4c/source/i18n/selfmtimpl.h new file mode 100755 index 00000000000..39d631390a0 --- /dev/null +++ b/icu4c/source/i18n/selfmtimpl.h @@ -0,0 +1,97 @@ +/******************************************************************** + * COPYRIGHT: + * Copyright (c) 1997-2010, International Business Machines Corporation and + * others. All Rights Reserved. + * Copyright (C) 2010 , Yahoo! Inc. + ******************************************************************** + * File SELECTFMT_IMPL.H + * + * Date Name Description + * 11/11/09 kirtig Finished first cut of implementation. + *********************************************************************/ + + +#ifndef SELFMTIMPL +#define SELFMTIMPL + +/** + * \file + * \brief C++ API: Defines rules for mapping positive long values onto a small set of keywords. + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/format.h" +#include "unicode/locid.h" +#include "unicode/parseerr.h" +#include "unicode/utypes.h" +#include "uvector.h" +#include "hash.h" + +U_NAMESPACE_BEGIN + +#define DOT ((UChar)0x002E) +#define SINGLE_QUOTE ((UChar)0x0027) +#define SLASH ((UChar)0x002F) +#define BACKSLASH ((UChar)0x005C) +#define SPACE ((UChar)0x0020) +#define TAB ((UChar)0x0009) +#define QUOTATION_MARK ((UChar)0x0022) +#define ASTERISK ((UChar)0x002A) +#define COMMA ((UChar)0x002C) +#define HYPHEN ((UChar)0x002D) +#define U_ZERO ((UChar)0x0030) +#define U_ONE ((UChar)0x0031) +#define U_TWO ((UChar)0x0032) +#define U_THREE ((UChar)0x0033) +#define U_FOUR ((UChar)0x0034) +#define U_FIVE ((UChar)0x0035) +#define U_SIX ((UChar)0x0036) +#define U_SEVEN ((UChar)0x0037) +#define U_EIGHT ((UChar)0x0038) +#define U_NINE ((UChar)0x0039) +#define COLON ((UChar)0x003A) +#define SEMI_COLON ((UChar)0x003B) +#define CAP_A ((UChar)0x0041) +#define CAP_B ((UChar)0x0042) +#define CAP_R ((UChar)0x0052) +#define CAP_Z ((UChar)0x005A) +#define LOWLINE ((UChar)0x005F) +#define LEFTBRACE ((UChar)0x007B) +#define RIGHTBRACE ((UChar)0x007D) + +#define LOW_A ((UChar)0x0061) +#define LOW_B ((UChar)0x0062) +#define LOW_C ((UChar)0x0063) +#define LOW_D ((UChar)0x0064) +#define LOW_E ((UChar)0x0065) +#define LOW_F ((UChar)0x0066) +#define LOW_G ((UChar)0x0067) +#define LOW_H ((UChar)0x0068) +#define LOW_I ((UChar)0x0069) +#define LOW_J ((UChar)0x006a) +#define LOW_K ((UChar)0x006B) +#define LOW_L ((UChar)0x006C) +#define LOW_M ((UChar)0x006D) +#define LOW_N ((UChar)0x006E) +#define LOW_O ((UChar)0x006F) +#define LOW_P ((UChar)0x0070) +#define LOW_Q ((UChar)0x0071) +#define LOW_R ((UChar)0x0072) +#define LOW_S ((UChar)0x0073) +#define LOW_T ((UChar)0x0074) +#define LOW_U ((UChar)0x0075) +#define LOW_V ((UChar)0x0076) +#define LOW_W ((UChar)0x0077) +#define LOW_X ((UChar)0x0078) +#define LOW_Y ((UChar)0x0079) +#define LOW_Z ((UChar)0x007A) + +class UnicodeSet; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // SELFMTIMPL +//eof diff --git a/icu4c/source/i18n/unicode/msgfmt.h b/icu4c/source/i18n/unicode/msgfmt.h index 30979de5e20..beed2b2af27 100644 --- a/icu4c/source/i18n/unicode/msgfmt.h +++ b/icu4c/source/i18n/unicode/msgfmt.h @@ -12,8 +12,7 @@ * 03/20/97 helena Finished first cut of implementation. * 07/22/98 stephen Removed operator!= (defined in Format) * 08/19/2002 srl Removing Javaisms -******************************************************************************** -*/ +*******************************************************************************/ #ifndef MSGFMT_H #define MSGFMT_H @@ -195,6 +194,7 @@ class DateFormat; * | "ordinal" { "," spelloutStyle } * | "duration" { "," spelloutStyle } * | "plural" "," pluralStyle + * | "select" "," selectStyle * * datetimeStyle := "short" * | "medium" @@ -211,6 +211,8 @@ class DateFormat; * * pluralStyle := pluralFormatPattern * + * selectStyle := selectFormatPattern + * * spelloutStyle := ruleSetName * \endcode * @@ -220,7 +222,7 @@ class DateFormat; * DateFormat::createTimeInstance(DateFormat::kDefault, ...) or * DateFormat::createDateInstance(DateFormat::kDefault, ...). For * a RuleBasedNumberFormat, if there is no ruleSetName, the default - * rule set is used. For a ChoiceFormat or PluralFormat, the pattern + * rule set is used. For a ChoiceFormat or PluralFormat or SelectFormat, the pattern * must always be specified, since there is no default. *

* In strings, single quotes can be used to quote syntax characters. diff --git a/icu4c/source/i18n/unicode/selfmt.h b/icu4c/source/i18n/unicode/selfmt.h new file mode 100755 index 00000000000..9b714015e04 --- /dev/null +++ b/icu4c/source/i18n/unicode/selfmt.h @@ -0,0 +1,373 @@ +/******************************************************************** + * COPYRIGHT: + * Copyright (c) 1997-2010, International Business Machines Corporation and + * others. All Rights Reserved. + * Copyright (C) 2010 , Yahoo! Inc. + ******************************************************************** + * + * File SELFMT.H + * + * Modification History: + * + * Date Name Description + * 11/11/09 kirtig Finished first cut of implementation. + ********************************************************************/ + +#ifndef SELFMT +#define SELFMT + +#include "unicode/utypes.h" +#include "unicode/numfmt.h" + +/** + * \file + * \brief C++ API: SelectFormat object + */ + +#if !UCONFIG_NO_FORMATTING + +U_NAMESPACE_BEGIN + +class Hashtable; + +/** + *

SelectFormat supports the creation of internationalized + * messages by selecting phrases based on keywords. The pattern specifies + * how to map keywords to phrases and provides a default phrase. The + * object provided to the format method is a string that's matched + * against the keywords. If there is a match, the corresponding phrase + * is selected; otherwise, the default phrase is used.

+ * + *

Using SelectFormat for Gender Agreement

+ * + *

The main use case for the select format is gender based inflection. + * When names or nouns are inserted into sentences, their gender can affect pronouns, + * verb forms, articles, and adjectives. Special care needs to be + * taken for the case where the gender cannot be determined. + * The impact varies between languages:

+ * + * + * + *

Some other languages have noun classes that are not related to gender, + * but similar in grammatical use. + * Some African languages have around 20 noun classes.

+ * + *

To enable localizers to create sentence patterns that take their + * language's gender dependencies into consideration, software has to provide + * information about the gender associated with a noun or name to + * MessageFormat. + * Two main cases can be distinguished:

+ * + * + * + *

The resulting keyword is provided to MessageFormat as a + * parameter separate from the name or noun it's associated with. For example, + * to generate a message such as "Jean went to Paris", three separate arguments + * would be provided: The name of the person as argument 0, the gender of + * the person as argument 1, and the name of the city as argument 2. + * The sentence pattern for English, where the gender of the person has + * no impact on this simple sentence, would not refer to argument 1 at all:

+ * + *
{0} went to {2}.
+ * + *

The sentence pattern for French, where the gender of the person affects + * the form of the participle, uses a select format based on argument 1:

+ * + *
{0} est {1, select, female {all&#u00E9;e} other {all&#u00E9;}} &#u00E0; {2}.
+ * + *

Patterns can be nested, so that it's possible to handle interactions of + * number and gender where necessary. For example, if the above sentence should + * allow for the names of several people to be inserted, the following sentence + * pattern can be used (with argument 0 the list of people's names, + * argument 1 the number of people, argument 2 their combined gender, and + * argument 3 the city name):

+ * + *
{0} {1, plural, 
+  *                 one {est {2, select, female {all&#u00E9;e} other  {all&#u00E9;}}}
+  *                 other {sont {2, select, female {all&#u00E9;es} other {all&#u00E9;s}}}
+  *          }&#u00E0; {3}.
+ * + *

Patterns and Their Interpretation

+ * + *

The SelectFormat pattern text defines the phrase output + * for each user-defined keyword. + * The pattern is a sequence of keyword{phrase} + * clauses. + * Each clause assigns the phrase phrase + * to the user-defined keyword.

+ * + *

Keywords must match the pattern [a-zA-Z][a-zA-Z0-9_-]*; keywords + * that don't match this pattern result in the error code + * U_ILLEGAL_CHARACTER. + * You always have to define a phrase for the default keyword + * other; this phrase is returned when the keyword + * provided to + * the format method matches no other keyword. + * If a pattern does not provide a phrase for other, the method + * it's provided to returns the error U_DEFAULT_KEYWORD_MISSING. + * If a pattern provides more than one phrase for the same keyword, the + * error U_DUPLICATE_KEYWORD is returned. + *
+ * Spaces between keyword and + * {phrase} will be ignored; spaces within + * {phrase} will be preserved.

+ * + *

The phrase for a particular select case may contain other message + * format patterns. SelectFormat preserves these so that you + * can use the strings produced by SelectFormat with other + * formatters. If you are using SelectFormat inside a + * MessageFormat pattern, MessageFormat will + * automatically evaluate the resulting format pattern. + * Thus, curly braces ({, }) are only allowed + * in phrases to define a nested format pattern.

+ * + *

Example: + *

+  *
+  * UErrorCode status = U_ZERO_ERROR;
+  * MessageFormat *msgFmt = new MessageFormat(UnicodeString("{0} est  {1, select, female {all&#u00E9;e} other {all&#u00E9;}} &#u00E0; Paris."), Locale("fr"),  status);
+  * if (U_FAILURE(status)) {
+  *       return;
+  * }
+  * FieldPosition ignore(FieldPosition::DONT_CARE);
+  * UnicodeString result;
+  *
+  * char* str1= "Kirti,female";
+  * Formattable args1[] = {"Kirti","female"};
+  * msgFmt->format(args1, 2, result, ignore, status);
+  * cout << "Input is " << str1 << " and result is: " << result << endl;
+  * delete msgFmt;
+  *
+  * 
+ * Produces the output:
+ * Input is Kirti,female and result is: Kirti est all&#u00E9;e &#u00E0; Paris. + * + * @draft ICU 4.4 + */ + +class U_I18N_API SelectFormat : public Format { +public: + + /** + * Creates a new SelectFormat . + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @draft ICU 4.4 + */ + SelectFormat(UErrorCode& status); + + /** + * Creates a new SelectFormat for a given pattern string. + * @param pattern the pattern for this SelectFormat. + * errors are returned to status if the pattern is invalid. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @draft ICU 4.4 + */ + SelectFormat(const UnicodeString& pattern, UErrorCode& status); + + /** + * copy constructor. + * @draft ICU 4.4 + */ + SelectFormat(const SelectFormat& other); + + /** + * Destructor. + * @draft ICU 4.4 + */ + virtual ~SelectFormat(); + + /** + * Sets the pattern used by this select format. + * for the keyword rules. + * Patterns and their interpretation are specified in the class description. + * + * @param pattern the pattern for this select format + * errors are returned to status if the pattern is invalid. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @draft ICU 4.4 + */ + void applyPattern(const UnicodeString& pattern, UErrorCode& status); + + /** + * Selects the phrase for the given keyword + * + * @param keyword The keyword that is used to select an alternative. + * @param appendTo output parameter to receive result. + * result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status output param set to success/failure code on exit, which + * must not indicate a failure before the function call. + * @return Reference to 'appendTo' parameter. + * @draft ICU 4.4 + */ + UnicodeString& format(const UnicodeString& keyword, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; + + /** + * Assignment operator + * + * @param other the SelectFormat object to copy from. + * @draft ICU 4.4 + */ + SelectFormat& operator=(const SelectFormat& other); + + /** + * Return true if another object is semantically equal to this one. + * + * @param other the SelectFormat object to be compared with. + * @return true if other is semantically equal to this. + * @draft ICU 4.4 + */ + virtual UBool operator==(const Format& other) const; + + /** + * Return true if another object is semantically unequal to this one. + * + * @param other the SelectFormat object to be compared with. + * @return true if other is semantically unequal to this. + * @draft ICU 4.4 + */ + virtual UBool operator!=(const Format& other) const; + + /** + * Clones this Format object polymorphically. The caller owns the + * result and should delete it when done. + * @draft ICU 4.4 + */ + virtual Format* clone(void) const; + + /** + * Format an object to produce a string. + * This method handles keyword strings. + * If the Formattable object is not a UnicodeString, + * then it returns a failing UErrorCode. + * + * @param obj A keyword string that is used to select an alternative. + * @param appendTo output parameter to receive result. + * Result is appended to existing contents. + * @param pos On input: an alignment field, if desired. + * On output: the offsets of the alignment field. + * @param status output param filled with success/failure status. + * @return Reference to 'appendTo' parameter. + * @draft ICU 4.4 + */ + UnicodeString& format(const Formattable& obj, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const; + + /** + * Returns the pattern from applyPattern() or constructor. + * + * @param appendTo output parameter to receive result. + * Result is appended to existing contents. + * @return the UnicodeString with inserted pattern. + * @draft ICU 4.4 + */ + UnicodeString& toPattern(UnicodeString& appendTo); + + /** + * This method is not yet supported by SelectFormat. + *

+ * Before calling, set parse_pos.index to the offset you want to start + * parsing at in the source. After calling, parse_pos.index is the end of + * the text you parsed. If error occurs, index is unchanged. + *

+ * When parsing, leading whitespace is discarded (with a successful parse), + * while trailing whitespace is left as is. + *

+ * See Format::parseObject() for more. + * + * @param source The string to be parsed into an object. + * @param result Formattable to be set to the parse result. + * If parse fails, return contents are undefined. + * @param parse_pos The position to start parsing at. Upon return + * this param is set to the position after the + * last character successfully parsed. If the + * source is not parsed successfully, this param + * will remain unchanged. + * @draft ICU 4.4 + */ + virtual void parseObject(const UnicodeString& source, + Formattable& result, + ParsePosition& parse_pos) const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * @draft ICU 4.4 + */ + virtual UClassID getDynamicClassID() const; + +private: + typedef enum characterClass{ + tStartKeyword, + tContinueKeyword, + tLeftBrace, + tRightBrace, + tSpace, + tOther + }characterClass; + + UnicodeString pattern; + //Hash to store the keyword, phrase pairs + Hashtable *parsedValuesHash; + + SelectFormat(); // default constructor not implemented + void init(UErrorCode& status); + //For the applyPattern , classifies char.s in one of the characterClass + void classifyCharacter(UChar ch, characterClass& type) const; + //Checks if the "other" keyword is present in pattern + UBool checkSufficientDefinition(); + //Checks if the keyword passed is valid + UBool checkValidKeyword(const UnicodeString& argKeyword) const; + void parsingFailure(); + void copyHashtable(Hashtable *other, UErrorCode& status); +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // _SELFMT +//eof diff --git a/icu4c/source/i18n/unicode/umsg.h b/icu4c/source/i18n/unicode/umsg.h index 813302376d1..c2b162910ad 100644 --- a/icu4c/source/i18n/unicode/umsg.h +++ b/icu4c/source/i18n/unicode/umsg.h @@ -1,20 +1,20 @@ -/* -******************************************************************************* -* Copyright (C) 1996-2009, International Business Machines Corporation -* and others. All Rights Reserved. -******************************************************************************* -* -* file name: umsg.h -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* Change history: -* -* 08/5/2001 Ram Added C wrappers for C++ API. -* -* -*/ +/******************************************************************** + * COPYRIGHT: + * Copyright (c) 1997-2010, International Business Machines Corporation and + * others. All Rights Reserved. + * Copyright (C) 2010 , Yahoo! Inc. + ******************************************************************** + * + * file name: umsg.h + * encoding: US-ASCII + * tab size: 8 (not used) + * indentation:4 + * + * Change history: + * + * 08/5/2001 Ram Added C wrappers for C++ API. + * + ********************************************************************/ #ifndef UMSG_H #define UMSG_H @@ -88,8 +88,8 @@ * u_uastrcpy(str, "MyDisk"); * u_uastrcpy(pattern, "The disk {1} contains {0,choice,0#no files|1#one file|1<{0,number,integer} files}"); * for(i=0; i<3; i++){ - * resultlength=0; - * resultLengthOut=u_formatMessage( "en_US", pattern, u_strlen(pattern), NULL, resultlength, &status, testArgs[i], str); + * resultlength=0; + * resultLengthOut=u_formatMessage( "en_US", pattern, u_strlen(pattern), NULL, resultlength, &status, testArgs[i], str); * if(status==U_BUFFER_OVERFLOW_ERROR){ * status=U_ZERO_ERROR; * resultlength=resultLengthOut+1; @@ -106,6 +106,45 @@ * \endcode * * + * + * Example 3: + *

+ * \code
+ * UChar* str;
+ * UChar* str1;
+ * UErrorCode status = U_ZERO_ERROR;
+ * UChar *result;
+ * UChar pattern[100];
+ * UChar expected[100];
+ * int32_t resultlength,resultLengthOut;
+
+ * str=(UChar*)malloc(sizeof(UChar) * 25);
+ * u_uastrcpy(str, "Kirti");
+ * str1=(UChar*)malloc(sizeof(UChar) * 25);
+ * u_uastrcpy(str1, "female");
+ * log_verbose("Testing message format with Select test #1\n:");
+ * u_uastrcpy(pattern, "{0} est {1, select, female {all\\u00E9e} other {all\\u00E9}} \\u00E0 Paris.");
+ * u_uastrcpy(expected, "Kirti est all\\u00E9e \\u00E0 Paris.");
+ * resultlength=0;
+ * resultLengthOut=u_formatMessage( "fr", pattern, u_strlen(pattern), NULL, resultlength, &status, str , str1);
+ * if(status==U_BUFFER_OVERFLOW_ERROR)
+ *  {
+ *      status=U_ZERO_ERROR;
+ *      resultlength=resultLengthOut+1;
+ *      result=(UChar*)malloc(sizeof(UChar) * resultlength);
+ *      u_formatMessage( "fr", pattern, u_strlen(pattern), result, resultlength, &status, str , str1);
+ *      if(u_strcmp(result, expected)==0)
+ *          log_verbose("PASS: MessagFormat successful on Select test#1\n");
+ *      else{
+ *          log_err("FAIL: Error in MessageFormat on Select test#1\n GOT %s EXPECTED %s\n", austrdup(result),
+ *          austrdup(expected) );
+ *      }
+ *      free(result);
+ * }
+ * \endcode
+ *  
+ * + * The pattern is of the following form. Legend: *
  * \code
@@ -125,6 +164,7 @@
  *                      | "date" { "," datetimeStyle }
  *                      | "number" { "," numberStyle }
  *                      | "choice" "," choiceStyle
+ *                      | "select" "," selectStyle
  *
  *       datetimeStyle := "short"
  *                      | "medium"
@@ -138,6 +178,8 @@
  *                      | numberFormatPattern
  *
  *       choiceStyle :=   choiceFormatPattern
+ *
+ *       selectStyle :=   selectFormatPattern
  * \endcode
  * 
* If there is no elementFormat, then the argument must be a string, diff --git a/icu4c/source/test/cintltst/cmsgtst.c b/icu4c/source/test/cintltst/cmsgtst.c index c1d4a8d21d2..1a8a6ff6769 100644 --- a/icu4c/source/test/cintltst/cmsgtst.c +++ b/icu4c/source/test/cintltst/cmsgtst.c @@ -1,17 +1,15 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2009, International Business Machines Corporation and + * Copyright (c) 1997-2010, International Business Machines Corporation and * others. All Rights Reserved. + ******************************************************************** + * + * File CMSGTST.C + * + * Modification History: + * Name Description + * Madhu Katragadda Creation ********************************************************************/ -/******************************************************************************** -* -* File CMSGTST.C -* -* Modification History: -* Name Description -* Madhu Katragadda Creation -********************************************************************************* -*/ /* C API TEST FOR MESSAGE FORMAT */ #include "unicode/utypes.h" @@ -646,6 +644,74 @@ static void TestSampleFormatAndParse(void) ctest_resetTimeZone(); } +/* Test message format with a Select option */ +static void TestMsgFormatSelect(void) +{ + UChar* str; + UChar* str1; + UErrorCode status = U_ZERO_ERROR; + UChar *result; + UChar pattern[100]; + UChar expected[100]; + int32_t resultlength,resultLengthOut; + + str=(UChar*)malloc(sizeof(UChar) * 25); + u_uastrcpy(str, "Kirti"); + str1=(UChar*)malloc(sizeof(UChar) * 25); + u_uastrcpy(str1, "female"); + log_verbose("Testing message format with Select test #1\n:"); + u_uastrcpy(pattern, "{0} est {1, select, female {all\\u00E9e} other {all\\u00E9}} \\u00E0 Paris."); + u_uastrcpy(expected, "Kirti est all\\u00E9e \\u00E0 Paris."); + resultlength=0; + resultLengthOut=u_formatMessage( "fr", pattern, u_strlen(pattern), NULL, resultlength, &status, str , str1); + if(status==U_BUFFER_OVERFLOW_ERROR) + { + status=U_ZERO_ERROR; + resultlength=resultLengthOut+1; + result=(UChar*)malloc(sizeof(UChar) * resultlength); + u_formatMessage( "fr", pattern, u_strlen(pattern), result, resultlength, &status, str , str1); + if(u_strcmp(result, expected)==0) + log_verbose("PASS: MessagFormat successful on Select test#1\n"); + else{ + log_err("FAIL: Error in MessageFormat on Select test#1\n GOT %s EXPECTED %s\n", austrdup(result), + austrdup(expected) ); + } + free(result); + } + if(U_FAILURE(status)){ + log_data_err("ERROR: failure in message format on Select test#1 : %s \n", myErrorName(status)); + } + + /*Test a nested pattern*/ + str=(UChar*)malloc(sizeof(UChar) * 25); + u_uastrcpy(str, "Noname"); + str1=(UChar*)malloc(sizeof(UChar) * 25); + u_uastrcpy(str1, "other"); + log_verbose("Testing message format with Select test #2\n:"); + u_uastrcpy(pattern, "{0} est {1, select, female {{2,number,integer} all\\u00E9e} other {all\\u00E9}} \\u00E0 Paris."); + u_uastrcpy(expected, "Noname est all\\u00E9 \\u00E0 Paris."); + resultlength=0; + resultLengthOut=u_formatMessage( "fr", pattern, u_strlen(pattern), NULL, resultlength, &status, str , str1,6); + if(status==U_BUFFER_OVERFLOW_ERROR) + { + status=U_ZERO_ERROR; + resultlength=resultLengthOut+1; + result=(UChar*)malloc(sizeof(UChar) * resultlength); + u_formatMessage( "fr", pattern, u_strlen(pattern), result, resultlength, &status, str , str1); + if(u_strcmp(result, expected)==0) + log_verbose("PASS: MessagFormat successful on Select test#2\n"); + else{ + log_err("FAIL: Error in MessageFormat on Select test#2\n GOT %s EXPECTED %s\n", austrdup(result), + austrdup(expected) ); + } + free(result); + } + if(U_FAILURE(status)){ + log_data_err("ERROR: failure in message format on Select test#2 : %s \n", myErrorName(status)); + } + +} + /* test message format with a choice option */ static void TestMsgFormatChoice(void) { @@ -1082,6 +1148,7 @@ void addMsgForTest(TestNode** root) addTest(root, &TestJ904, "tsformat/cmsgtst/TestJ904"); addTest(root, &MessageLength, "tsformat/cmsgtst/MessageLength"); addTest(root, &TestErrorChaining, "tsformat/cmsgtst/TestErrorChaining"); + addTest(root, &TestMsgFormatSelect, "tsformat/cmsgtst/TestMsgFormatSelect"); } #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/cintltst/cmsgtst.h b/icu4c/source/test/cintltst/cmsgtst.h index 6936283980e..70ba6c6c115 100644 --- a/icu4c/source/test/cintltst/cmsgtst.h +++ b/icu4c/source/test/cintltst/cmsgtst.h @@ -1,17 +1,15 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2001, International Business Machines Corporation and + * Copyright (c) 1997-2010, International Business Machines Corporation and * others. All Rights Reserved. + ******************************************************************** + * + * File CMSGTST.H + * + * Modification History: + * Name Description + * Madhu Katragadda Creation ********************************************************************/ -/******************************************************************************** -* -* File CMSGTST.H -* -* Modification History: -* Name Description -* Madhu Katragadda Creation -********************************************************************************* -*/ /* C API TEST FOR MESSAGE FORMAT */ #ifndef _CMSGFRMTST #define _CMSGFRMTST @@ -41,6 +39,10 @@ * Test u_formatMessage() with choice option **/ static void TestMsgFormatChoice(void); + /** + * Test u_formatMessage() with Select option + **/ + static void TestMsgFormatSelect(void); /** * Test u_parseMessage() with various test patterns() **/ diff --git a/icu4c/source/test/intltest/Makefile.in b/icu4c/source/test/intltest/Makefile.in index 6ac9a8412fd..f8100033453 100644 --- a/icu4c/source/test/intltest/Makefile.in +++ b/icu4c/source/test/intltest/Makefile.in @@ -46,7 +46,7 @@ dtptngts.o encoll.o escoll.o ficoll.o frcoll.o g7coll.o intltest.o \ itercoll.o itformat.o itmajor.o itutil.o jacoll.o lcukocol.o \ loctest.o miscdtfm.o mnkytst.o msfmrgts.o nmfmapts.o nmfmtrt.o \ numfmtst.o numrgts.o plurults.o plurfmts.o pptest.o regcoll.o restest.o restsnew.o \ -sdtfmtts.o svccoll.o tchcfmt.o \ +sdtfmtts.o svccoll.o tchcfmt.o selfmts.o \ tfsmalls.o tmsgfmt.o trcoll.o tscoll.o tsdate.o tsdcfmsy.o tsdtfmsy.o \ tsmthred.o tsnmfmt.o tsputil.o tstnrapi.o tstnorm.o tzbdtest.o \ tzregts.o tztest.o ucdtest.o usettest.o ustrtest.o strcase.o transtst.o strtest.o thcoll.o \ diff --git a/icu4c/source/test/intltest/itformat.cpp b/icu4c/source/test/intltest/itformat.cpp index 45676ef0487..0de98ecba8f 100644 --- a/icu4c/source/test/intltest/itformat.cpp +++ b/icu4c/source/test/intltest/itformat.cpp @@ -50,6 +50,7 @@ #include "tzfmttst.h" // TimeZoneFormatTest #include "plurults.h" // PluralRulesTest #include "plurfmts.h" // PluralFormatTest +#include "selfmts.h" // PluralFormatTest #include "dtifmtts.h" // DateIntervalFormatTest #include "tufmtts.h" // TimeUnitTest #include "locnmtst.h" // LocaleDisplayNamesTest @@ -124,7 +125,8 @@ void IntlTestFormat::runIndexedTest( int32_t index, UBool exec, const char* &nam TESTCLASS(36,PluralFormatTest); TESTCLASS(37,DateIntervalFormatTest); TESTCLASS(38,TimeUnitTest); - TESTCLASS(39,LocaleDisplayNamesTest); + TESTCLASS(39,SelectFormatTest); + TESTCLASS(40,LocaleDisplayNamesTest); default: name = ""; break; //needed to end loop } diff --git a/icu4c/source/test/intltest/selfmts.cpp b/icu4c/source/test/intltest/selfmts.cpp new file mode 100644 index 00000000000..d8b7852f5bd --- /dev/null +++ b/icu4c/source/test/intltest/selfmts.cpp @@ -0,0 +1,308 @@ +/******************************************************************** + * COPYRIGHT: + * Copyright (c) 1997-2010, International Business Machines Corporation and + * others. All Rights Reserved. + * Copyright (C) 2010 , Yahoo! Inc. + ********************************************************************/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "selfmts.h" +#include "cmemory.h" +#include "unicode/selfmt.h" +#include "stdio.h" + +const UnicodeString SIMPLE_PATTERN = UnicodeString("feminine {feminineVerbValue} other{otherVerbValue}"); +#define SELECT_PATTERN_DATA 4 +#define SELECT_SYNTAX_DATA 10 +#define EXP_FORMAT_RESULT_DATA 12 +#define NUM_OF_FORMAT_ARGS 3 + +void SelectFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) +{ + if (exec) logln("TestSuite SelectFormat"); + switch (index) { + TESTCASE(0, selectFormatAPITest); + TESTCASE(1, selectFormatUnitTest); + default: name = ""; + break; + } +} + +/** + * Unit tests of SelectFormat class. + */ +void SelectFormatTest::selectFormatUnitTest(/*char *par*/) +{ + UnicodeString patternTestData[SELECT_PATTERN_DATA] = { + UNICODE_STRING_SIMPLE("fem {femValue} other{even}"), + UNICODE_STRING_SIMPLE("other{odd or even}"), + UNICODE_STRING_SIMPLE("odd{The number {0, number, integer} is odd.}other{The number {0, number, integer} is even.}"), + UNICODE_STRING_SIMPLE("odd{The number {1} is odd}other{The number {1} is even}"), + }; + + UnicodeString formatArgs[NUM_OF_FORMAT_ARGS] = { + UNICODE_STRING_SIMPLE("fem"), + UNICODE_STRING_SIMPLE("other"), + UNICODE_STRING_SIMPLE("odd") + }; + + UnicodeString expFormatResult[EXP_FORMAT_RESULT_DATA][NUM_OF_FORMAT_ARGS] = { + { + UNICODE_STRING_SIMPLE("femValue"), + UNICODE_STRING_SIMPLE("even"), + UNICODE_STRING_SIMPLE("even") + }, + { + UNICODE_STRING_SIMPLE("odd or even"), + UNICODE_STRING_SIMPLE("odd or even"), + UNICODE_STRING_SIMPLE("odd or even"), + }, + { + UNICODE_STRING_SIMPLE("The number {0, number, integer} is even."), + UNICODE_STRING_SIMPLE("The number {0, number, integer} is even."), + UNICODE_STRING_SIMPLE("The number {0, number, integer} is odd."), + }, + { + UNICODE_STRING_SIMPLE("The number {1} is even"), + UNICODE_STRING_SIMPLE("The number {1} is even"), + UNICODE_STRING_SIMPLE("The number {1} is odd"), + } + }; + + UnicodeString checkSyntaxData[SELECT_SYNTAX_DATA] = { + UNICODE_STRING_SIMPLE("odd{foo} odd{bar} other{foobar}"), + UNICODE_STRING_SIMPLE("odd{foo} other{bar} other{foobar}"), + UNICODE_STRING_SIMPLE("odd{foo}"), + UNICODE_STRING_SIMPLE("1odd{foo} other{bar}"), + UNICODE_STRING_SIMPLE("odd{foo},other{bar}"), + UNICODE_STRING_SIMPLE("od d{foo} other{bar}"), + UNICODE_STRING_SIMPLE("odd{foo}{foobar}other{foo}"), + UNICODE_STRING_SIMPLE("odd{foo1}other{foo2}}"), + UNICODE_STRING_SIMPLE("odd{foo1}other{{foo2}"), + UNICODE_STRING_SIMPLE("odd{fo{o1}other{foo2}}") + }; + + UErrorCode expErrorCodes[SELECT_SYNTAX_DATA]={ + U_DUPLICATE_KEYWORD, + U_DUPLICATE_KEYWORD, + U_DEFAULT_KEYWORD_MISSING, + U_PATTERN_SYNTAX_ERROR, + U_PATTERN_SYNTAX_ERROR, + U_PATTERN_SYNTAX_ERROR, + U_PATTERN_SYNTAX_ERROR, + U_PATTERN_SYNTAX_ERROR, + U_PATTERN_SYNTAX_ERROR, + U_DEFAULT_KEYWORD_MISSING + }; + + UErrorCode status = U_ZERO_ERROR; + SelectFormat* selFmt = new SelectFormat( SIMPLE_PATTERN , status); + if (U_FAILURE(status)) { + dataerrln("ERROR: SelectFormat Unit Test constructor failed in unit tests.- exitting"); + return; + } + + // ======= Test SelectFormat pattern syntax. + logln("SelectFormat Unit Test : Testing SelectFormat pattern syntax."); + for (int32_t i=0; iapplyPattern(checkSyntaxData[i], status); + if( status!= expErrorCodes[i] ){ + errln("\nERROR: Unexpected result - SelectFormat Unit Test failed to detect syntax error with pattern: "+checkSyntaxData[i]+" and expected status="+ u_errorName(expErrorCodes[i]) + " and resulted status="+u_errorName(status)); + } + } + + logln("SelectFormat Unit Test : Creaing format object for Testing applying various patterns"); + status = U_ZERO_ERROR; + selFmt = new SelectFormat( SIMPLE_PATTERN , status); + //SelectFormat* selFmt1 = new SelectFormat( SIMPLE_PATTERN , status); + if (U_FAILURE(status)) { + errln("ERROR: SelectFormat Unit Test constructor failed in unit tests.- exitting"); + return; + } + + // ======= Test applying and formatting with various pattern + logln("SelectFormat Unit test: Testing applyPattern() and format() ..."); + UnicodeString result; + FieldPosition ignore(FieldPosition::DONT_CARE); + + for(int32_t i=0; iapplyPattern(patternTestData[i], status); + if (U_FAILURE(status)) { + errln("ERROR: SelectFormat Unit Test failed to apply pattern- "+patternTestData[i] ); + continue; + } + + //Format with the keyword array + for(int32_t j=0; j<3; j++) { + result.remove(); + selFmt->format( formatArgs[j], result , ignore , status); + if (U_FAILURE(status)) { + errln("ERROR: SelectFormat Unit test failed in format() with argument: "+ formatArgs[j] + " and error is " + u_errorName(status) ); + }else{ + if( result != expFormatResult[i][j] ){ + errln("ERROR: SelectFormat Unit test failed in format() with unexpected result\n with argument: "+ formatArgs[j] + "\n result obtained: " + result + "\n and expected is: " + expFormatResult[i][j] ); + } + } + } + } + +/* + //Test with an invalid keyword + logln("SelectFormat Unit test: Testing format() with keyword method and with invalid keywords..."); + status = U_ZERO_ERROR; + result.remove(); + UnicodeString keywords[] = { + "9Keyword-_", //Starts with a digit + "-Keyword-_", //Starts with a hyphen + "_Keyword-_", //Starts with a underscore + "\\u00E9Keyword-_", //Starts with non-ASCII character + "Key*word-_" //Contains a sepial character not allowed + "*Keyword-_" //Starts with a sepial character not allowed + }; + + selFmt = new SelectFormat( SIMPLE_PATTERN , status); + for (int i = 0; i< 6; i++ ){ + selFmt->format( keywords[i], result , ignore , status); + if (!U_FAILURE(status)) { + errln("ERROR: SelectFormat Unit test failed in format() with keyWord and with an invalid keyword as : "+ keywords[i]); + } + } +*/ + + delete selFmt; +} + +/** + * Test various generic API methods of SelectFormat for Basic API usage. + * This is to make sure the API test coverage is 100% . + */ +void SelectFormatTest::selectFormatAPITest(/*char *par*/) +{ + int numOfConstructors =3; + UErrorCode status[3]; + SelectFormat* selFmt[3]; + + // ========= Test constructors + logln("SelectFormat API test: Testing SelectFormat constructors ..."); + for (int32_t i=0; i< numOfConstructors; ++i) { + status[i] = U_ZERO_ERROR; + } + selFmt[0]= new SelectFormat(status[0]); + if ( U_FAILURE(status[0]) ) { + errln("ERROR: SelectFormat API test constructor with status failed!"); + return; + } + selFmt[1]= new SelectFormat(SIMPLE_PATTERN, status[1]); + if ( U_FAILURE(status[1]) ) { + errln("ERROR: SelectFormat API test constructor with pattern and status failed!"); + return; + } + + // =========== Test copy constructor + logln("SelectFormat API test: Testing copy constructor and == operator ..."); + SelectFormat fmt = *selFmt[1]; + SelectFormat* dupPFmt = new SelectFormat(fmt); + if ((*selFmt[1]) != (*dupPFmt)) { + errln("ERROR: SelectFormat API test Failed in copy constructor or == operator!"); + } + delete dupPFmt; + + // ======= Test clone && == operator. + logln("SelectFormat API test: Testing clone and == operator ..."); + if ( U_SUCCESS(status[0]) && U_SUCCESS(status[1]) ) { + selFmt[1] = (SelectFormat*)selFmt[0]->clone(); + if (selFmt[1]!=NULL) { + if ( *selFmt[1] != *selFmt[0] ) { + errln("ERROR: SelectFormat API test clone test failed!"); + } + } + } + + // ======= Test assignment operator && == operator. + logln("SelectFormat API test: Testing assignment operator and == operator ..."); + selFmt[2]= new SelectFormat(SIMPLE_PATTERN, status[2]); + if ( U_SUCCESS(status[2]) ) { + *selFmt[1] = *selFmt[2]; + if (selFmt[1]!=NULL) { + if ( (*selFmt[1] != *selFmt[2]) ) { + errln("ERROR: SelectFormat API test assignment operator test failed!"); + } + } + delete selFmt[1]; + } + else { + errln("ERROR: SelectFormat constructor failed in assignment operator!"); + } + delete selFmt[0]; + delete selFmt[2]; + + // ======= Test getStaticClassID() and getStaticClassID() + logln("SelectFormat API test: Testing getStaticClassID() and getStaticClassID() ..."); + UErrorCode status1 = U_ZERO_ERROR; + SelectFormat* selFmt1 = new SelectFormat( SIMPLE_PATTERN , status1); + if( U_FAILURE(status1)) { + errln("ERROR: SelectFormat constructor failed in staticClassID test! Exitting"); + return; + } + + logln("Testing getStaticClassID()"); + if(selFmt1->getDynamicClassID() !=SelectFormat::getStaticClassID()) { + errln("ERROR: SelectFormat API test getDynamicClassID() didn't return the expected value"); + } + + // ======= Test applyPattern() and toPattern() + logln("SelectFormat API test: Testing applyPattern() and toPattern() ..."); + UnicodeString pattern = UnicodeString("masculine{masculineVerbValue} other{otherVerbValue}"); + status1 = U_ZERO_ERROR; + selFmt1->applyPattern( pattern, status1); + if (U_FAILURE(status1)) { + errln("ERROR: SelectFormat API test failed in applyPattern() with pattern: "+ pattern); + }else{ + UnicodeString checkPattern; + selFmt1->toPattern( checkPattern); + if( checkPattern != pattern ){ + errln("ERROR: SelectFormat API test failed in toPattern() with unexpected result with pattern: "+ pattern); + } + } + + // ======= Test different format() methods + logln("SelectFormat API test: Testing format() with keyword method ..."); + status1 = U_ZERO_ERROR; + UnicodeString result; + FieldPosition ignore(FieldPosition::DONT_CARE); + UnicodeString keyWord = UnicodeString("masculine"); + + selFmt1->format( keyWord, result , ignore , status1); + if (U_FAILURE(status1)) { + errln("ERROR: SelectFormat API test failed in format() with keyWord: "+ keyWord); + }else{ + UnicodeString expected=UnicodeString("masculineVerbValue"); + if( result != expected ){ + errln("ERROR: SelectFormat API test failed in format() with unexpected result with keyWord: "+ keyWord); + } + } + + logln("SelectFormat API test: Testing format() with Formattable obj method ..."); + status1 = U_ZERO_ERROR; + result.remove(); + UnicodeString result1; + Formattable testArgs = Formattable("other"); + selFmt1->format( testArgs, result1 , ignore , status1); + if (U_FAILURE(status1)) { + errln("ERROR: SelectFormat API test failed in format() with Formattable"); + }else{ + UnicodeString expected=UnicodeString("otherVerbValue"); + if( result1 != expected ){ + errln("ERROR: SelectFormat API test failed in format() with unexpected result with Formattable"); + } + } + + + delete selFmt1; +} +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icu4c/source/test/intltest/selfmts.h b/icu4c/source/test/intltest/selfmts.h new file mode 100644 index 00000000000..c5b640de591 --- /dev/null +++ b/icu4c/source/test/intltest/selfmts.h @@ -0,0 +1,35 @@ +/******************************************************************** + * COPYRIGHT: + * Copyright (c) 1997-2010, International Business Machines Corporation and + * others. All Rights Reserved. + * Copyright (C) 2010 , Yahoo! Inc. + ********************************************************************/ + +#ifndef _SELECTFORMATTEST +#define _SELECTFORMATTEST + +#include "unicode/utypes.h" +#include "unicode/selfmt.h" + + +#if !UCONFIG_NO_FORMATTING + +#include "intltest.h" + +/** + * Test basic functionality of various API functions + **/ +class SelectFormatTest : public IntlTest { + void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = NULL ); + +private: + /** + * Performs tests on many API functions, see detailed comments in source code + **/ + void selectFormatAPITest(/* char* par */); + void selectFormatUnitTest(/* char* par */); +}; + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif diff --git a/icu4c/source/test/intltest/tmsgfmt.cpp b/icu4c/source/test/intltest/tmsgfmt.cpp index c435d13244f..aefbadd8956 100644 --- a/icu4c/source/test/intltest/tmsgfmt.cpp +++ b/icu4c/source/test/intltest/tmsgfmt.cpp @@ -1,6 +1,7 @@ /******************************************************************** - * Copyright (c) 1997-2009, International Business Machines - * Corporation and others. All Rights Reserved. + * COPYRIGHT: + * Copyright (c) 1997-2010, International Business Machines Corporation and + * others. All Rights Reserved. ******************************************************************** * File TMSGFMT.CPP * @@ -24,9 +25,14 @@ #include "unicode/msgfmt.h" #include "unicode/numfmt.h" #include "unicode/choicfmt.h" +#include "unicode/selfmt.h" #include "unicode/gregocal.h" #include +#define E_WITH_ACUTE ((UChar)0x00E9) +#define A_WITH_GRAVE ((UChar)0x00E0) +static const char E_ACCENTED[]={E_WITH_ACUTE,0}; + void TestMessageFormat::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/) { @@ -54,6 +60,7 @@ TestMessageFormat::runIndexedTest(int32_t index, UBool exec, TESTCASE(20,testAutoQuoteApostrophe); TESTCASE(21,testMsgFormatPlural); TESTCASE(22,testCoverage); + TESTCASE(23,testMsgFormatSelect); default: name = ""; break; } } @@ -611,6 +618,207 @@ void TestMessageFormat::testMsgFormatPlural(/* char* par */) delete msgFmt; } +void TestMessageFormat::internalFormat(MessageFormat* msgFmt , + Formattable* args , int32_t numOfArgs , + UnicodeString expected ,char* errMsg) +{ + UnicodeString result; + FieldPosition ignore(FieldPosition::DONT_CARE); + UErrorCode status = U_ZERO_ERROR; + + //Format with passed arguments + msgFmt->format( args , numOfArgs , result, ignore, status); + if (U_FAILURE(status)) { + errln( "%serror while formatting with ErrorCode as %s" ,errMsg, u_errorName(status) ); + } + //Compare expected with obtained result + if ( result!= expected ) { + UnicodeString err = UnicodeString(errMsg); + err+= UnicodeString(":Unexpected Result \n Expected: " + expected + "\n Obtained: " + result); + errln(err); + } +} + +MessageFormat* TestMessageFormat::internalCreate( + UnicodeString pattern ,Locale locale ,UErrorCode &status , char* errMsg) +{ + //Create the MessageFormat with simple SelectFormat + MessageFormat* msgFmt = new MessageFormat(pattern, locale, status); + if (U_FAILURE(status)) { + errln( "%serror while constructing with ErrorCode as %s" ,errMsg, u_errorName(status) ); + logln(UnicodeString("TestMessageFormat::testMsgFormatSelect #1 with error code ")+(int32_t)status); + return NULL; + } + return msgFmt; +} + +void TestMessageFormat::testMsgFormatSelect(/* char* par */) +{ + logln("running TestMessageFormat::testMsgFormatSelect"); + + UErrorCode err = U_ZERO_ERROR; + //French Pattern + UnicodeString t1("{0} est {1, select, female {all\\u00E9e} other {all\\u00E9}} \\u00E0 Paris."); + + err = U_ZERO_ERROR; + //Create the MessageFormat with simple French pattern + MessageFormat* msgFmt1 = internalCreate(t1.unescape(), Locale("fr"),err,(char*)"From TestMessageFormat::TestSelectFormat create t1"); + if (!U_FAILURE(err)) { + //Arguments + Formattable testArgs10[] = {"Kirti","female"}; + Formattable testArgs11[] = {"Victor","other"}; + Formattable testArgs12[] = {"Ash","unknown"}; + Formattable* testArgs[] = {testArgs10,testArgs11,testArgs12}; + UnicodeString exp[] = { + "Kirti est all\\u00E9e \\u00E0 Paris." , + "Victor est all\\u00E9 \\u00E0 Paris.", + "Ash est all\\u00E9 \\u00E0 Paris."}; + //Format + for( int i=0; i< 3; i++){ + internalFormat( msgFmt1 , testArgs[i], 2, exp[i].unescape() ,(char*)"From TestMessageFormat::testSelectFormat format t1"); + } + } + delete msgFmt1; + + //Quoted French Pattern + UnicodeString t2("{0} est {1, select, female {all\\u00E9e c''est} other {all\\u00E9 c''est}} \\u00E0 Paris."); + err = U_ZERO_ERROR; + //Create the MessageFormat with Quoted French pattern + MessageFormat* msgFmt2 = internalCreate(t2.unescape(), Locale("fr"),err,(char*)"From TestMessageFormat::TestSelectFormat create t2"); + if (!U_FAILURE(err)) { + //Arguments + Formattable testArgs10[] = {"Kirti","female"}; + Formattable testArgs11[] = {"Victor","other"}; + Formattable testArgs12[] = {"Ash","male"}; + Formattable* testArgs[] = {testArgs10,testArgs11,testArgs12}; + UnicodeString exp[] = { + "Kirti est all\\u00E9e c'est \\u00E0 Paris." , + "Victor est all\\u00E9 c'est \\u00E0 Paris.", + "Ash est all\\u00E9 c'est \\u00E0 Paris."}; + //Format + for( int i=0; i< 3; i++){ + internalFormat( msgFmt2 , testArgs[i], 2, exp[i].unescape() ,(char*)"From TestMessageFormat::testSelectFormat format t2"); + } + } + delete msgFmt2; + + //English Pattern + UnicodeString t3("{0, select , male {MALE FR company} female {FEMALE FR company} other {FR otherValue}} published new books."); + err = U_ZERO_ERROR; + //Create the MessageFormat with English pattern + MessageFormat* msgFmt3 = internalCreate(t3, Locale("en"),err,(char*)"From TestMessageFormat::TestSelectFormat create t3"); + if (!U_FAILURE(err)) { + //Arguments + Formattable testArgs10[] = {"female"}; + Formattable testArgs11[] = {"other"}; + Formattable testArgs12[] = {"male"}; + Formattable* testArgs[] = {testArgs10,testArgs11,testArgs12}; + UnicodeString exp[] = { + "FEMALE FR company published new books." , + "FR otherValue published new books.", + "MALE FR company published new books."}; + //Format + for( int i=0; i< 3; i++){ + internalFormat( msgFmt3 , testArgs[i], 1, exp[i] ,(char*)"From TestMessageFormat::testSelectFormat format t3"); + } + } + delete msgFmt3; + + //Nested patterns with plural, number ,choice ,select format etc. + //Select Format with embedded number format + UnicodeString t4("{0} est {1, select, female {{2,number,integer} all\\u00E9e} other {all\\u00E9}} \\u00E0 Paris."); + //Create the MessageFormat with Select Format with embedded number format (nested pattern) + MessageFormat* msgFmt4 = internalCreate(t4.unescape(), Locale("fr"),err,(char*)"From TestMessageFormat::TestSelectFormat create t4"); + if (!U_FAILURE(err)) { + //Arguments + Formattable testArgs10[] = {"Kirti","female",6}; + Formattable testArgs11[] = {"Kirti","female",100.100}; + Formattable testArgs12[] = {"Kirti","other",6}; + Formattable* testArgs[] = {testArgs10,testArgs11,testArgs12}; + UnicodeString exp[] = { + "Kirti est 6 all\\u00E9e \\u00E0 Paris." , + "Kirti est 100 all\\u00E9e \\u00E0 Paris.", + "Kirti est all\\u00E9 \\u00E0 Paris."}; + //Format + for( int i=0; i< 3; i++){ + internalFormat( msgFmt4 , testArgs[i], 3, exp[i].unescape() ,(char*)"From TestMessageFormat::testSelectFormat format t4"); + } + } + delete msgFmt4; + + err = U_ZERO_ERROR; + //Plural format with embedded select format + UnicodeString t5("{0} {1, plural, one {est {2, select, female {all\\u00E9e} other {all\\u00E9}}} other {sont {2, select, female {all\\u00E9es} other {all\\u00E9s}}}} \\u00E0 Paris."); + err = U_ZERO_ERROR; + //Create the MessageFormat with Plural format with embedded select format(nested pattern) + MessageFormat* msgFmt5 = internalCreate(t5.unescape(), Locale("fr"),err,(char*)"From TestMessageFormat::TestSelectFormat create t5"); + if (!U_FAILURE(err)) { + //Arguments + Formattable testArgs10[] = {"Kirti",6,"female"}; + Formattable testArgs11[] = {"Kirti",1,"female"}; + Formattable testArgs12[] = {"Ash",1,"other"}; + Formattable testArgs13[] = {"Ash",5,"other"}; + Formattable* testArgs[] = {testArgs10,testArgs11,testArgs12,testArgs13}; + UnicodeString exp[] = { + "Kirti sont all\\u00E9es \\u00E0 Paris." , + "Kirti est all\\u00E9e \\u00E0 Paris.", + "Ash est all\\u00E9 \\u00E0 Paris.", + "Ash sont all\\u00E9s \\u00E0 Paris."}; + //Format + for( int i=0; i< 4; i++){ + internalFormat( msgFmt5 , testArgs[i], 3, exp[i].unescape() ,(char*)"From TestMessageFormat::testSelectFormat format t5"); + } + } + delete msgFmt5; + + err = U_ZERO_ERROR; + //Select, plural, and number formats heavily nested + UnicodeString t6("{0} und {1, select, female {{2, plural, one {{3, select, female {ihre Freundin} other {ihr Freund}} } other {ihre {2, number, integer} {3, select, female {Freundinnen} other {Freunde}} } }} other{{2, plural, one {{3, select, female {seine Freundin} other {sein Freund}}} other {seine {2, number, integer} {3, select, female {Freundinnen} other {Freunde}}}}} } gingen nach Paris."); + //Create the MessageFormat with Select, plural, and number formats heavily nested + MessageFormat* msgFmt6 = internalCreate(t6, Locale("de"),err,(char*)"From TestMessageFormat::TestSelectFormat create t6"); + if (!U_FAILURE(err)) { + //Arguments + Formattable testArgs10[] = {"Kirti","other",1,"other"}; + Formattable testArgs11[] = {"Kirti","other",6,"other"}; + Formattable testArgs12[] = {"Kirti","other",1,"female"}; + Formattable testArgs13[] = {"Kirti","other",3,"female"}; + Formattable testArgs14[] = {"Kirti","female",1,"female"}; + Formattable testArgs15[] = {"Kirti","female",5,"female"}; + Formattable testArgs16[] = {"Kirti","female",1,"other"}; + Formattable testArgs17[] = {"Kirti","female",5,"other"}; + Formattable testArgs18[] = {"Kirti","mixed",1,"mixed"}; + Formattable testArgs19[] = {"Kirti","mixed",1,"other"}; + Formattable testArgs20[] = {"Kirti","female",1,"mixed"}; + Formattable testArgs21[] = {"Kirti","mixed",5,"mixed"}; + Formattable testArgs22[] = {"Kirti","mixed",5,"other"}; + Formattable testArgs23[] = {"Kirti","female",5,"mixed"}; + Formattable* testArgs[] = {testArgs10,testArgs11,testArgs12,testArgs13, + testArgs14,testArgs15,testArgs16,testArgs17, + testArgs18,testArgs19,testArgs20,testArgs21, + testArgs22,testArgs23 }; + UnicodeString exp[] = { + "Kirti und sein Freund gingen nach Paris." , + "Kirti und seine 6 Freunde gingen nach Paris." , + "Kirti und seine Freundin gingen nach Paris.", + "Kirti und seine 3 Freundinnen gingen nach Paris.", + "Kirti und ihre Freundin gingen nach Paris.", + "Kirti und ihre 5 Freundinnen gingen nach Paris.", + "Kirti und ihr Freund gingen nach Paris.", + "Kirti und ihre 5 Freunde gingen nach Paris.", + "Kirti und sein Freund gingen nach Paris.", + "Kirti und sein Freund gingen nach Paris.", + "Kirti und ihr Freund gingen nach Paris.", + "Kirti und seine 5 Freunde gingen nach Paris." , + "Kirti und seine 5 Freunde gingen nach Paris." , + "Kirti und ihre 5 Freunde gingen nach Paris." + }; + //Format + for( int i=0; i< 14; i++){ + internalFormat( msgFmt6 , testArgs[i], 4, exp[i] ,(char*)"From TestMessageFormat::testSelectFormat format t6"); + } + } + delete msgFmt6; +} //--------------------------------- // API Tests diff --git a/icu4c/source/test/intltest/tmsgfmt.h b/icu4c/source/test/intltest/tmsgfmt.h index 59c71a738a3..9d47c860b83 100644 --- a/icu4c/source/test/intltest/tmsgfmt.h +++ b/icu4c/source/test/intltest/tmsgfmt.h @@ -1,10 +1,8 @@ -/*********************************************************************** +/******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2009, International Business Machines Corporation - * and others. All Rights Reserved. - ***********************************************************************/ - - + * Copyright (c) 1997-2010, International Business Machines Corporation and + * others. All Rights Reserved. + ********************************************************************/ #ifndef _TESTMESSAGEFORMAT #define _TESTMESSAGEFORMAT @@ -14,6 +12,7 @@ #include "unicode/unistr.h" #include "unicode/fmtable.h" +#include "unicode/msgfmt.h" #include "intltest.h" /** @@ -61,6 +60,24 @@ public: **/ void testMsgFormatPlural(/* char* par */); + /** + * tests MessageFormat functionality with a SelectFormat. + **/ + void testMsgFormatSelect(/* char* par */); + + /** + * Internal method to format a MessageFormat object with passed args + **/ + void internalFormat(MessageFormat* msgFmt , + Formattable* args , int32_t numOfArgs , + UnicodeString expected ,char* errMsg); + + /** + * Internal method to create a MessageFormat object with passed args + **/ + MessageFormat* internalCreate( + UnicodeString pattern ,Locale locale , UErrorCode& err, char* errMsg); + /** * Verify that MessageFormat accomodates more than 10 arguments * and more than 10 subformats. diff --git a/icu4c/source/test/intltest/uobjtest.cpp b/icu4c/source/test/intltest/uobjtest.cpp index 7e1c8b36c92..cbd456fd4d2 100644 --- a/icu4c/source/test/intltest/uobjtest.cpp +++ b/icu4c/source/test/intltest/uobjtest.cpp @@ -1,7 +1,8 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 2002-2009, International Business Machines Corporation and + * Copyright (c) 1997-2010, International Business Machines Corporation and * others. All Rights Reserved. + * Copyright (C) 2010 , Yahoo! Inc. ********************************************************************/ #include "uobjtest.h" @@ -192,6 +193,7 @@ UObject *UObjectTest::testClass(UObject *obj, #include "unicode/parsepos.h" #include "unicode/plurrule.h" #include "unicode/plurfmt.h" +#include "unicode/selfmt.h" #include "unicode/rbbi.h" #include "unicode/rbnf.h" #include "unicode/regex.h" @@ -258,6 +260,7 @@ void UObjectTest::testIDs() TESTCLASSID_CTOR(DateFormatSymbols, (status)); TESTCLASSID_CTOR(PluralFormat, (status)); TESTCLASSID_CTOR(PluralRules, (status)); + TESTCLASSID_CTOR(SelectFormat, (status)); TESTCLASSID_FACTORY(DateTimePatternGenerator, DateTimePatternGenerator::createInstance(status)); TESTCLASSID_FACTORY(RelativeDateFormat, DateFormat::createDateInstance(DateFormat::kFullRelative, Locale::getUS())); TESTCLASSID_CTOR(DecimalFormatSymbols, (status));