diff --git a/.gitattributes b/.gitattributes index 9ee5e949672..537dfa5a216 100644 --- a/.gitattributes +++ b/.gitattributes @@ -54,12 +54,7 @@ icu4c/source/data/in/nfkc.nrm -text icu4c/source/data/in/nfkc_cf.nrm -text icu4c/source/data/in/unorm.icu -text icu4c/source/data/locales/pool.res -text -icu4c/source/i18n/selfmt.cpp -text -icu4c/source/i18n/selfmtimpl.h -text -icu4c/source/i18n/unicode/selfmt.h -text icu4c/source/samples/ucnv/data02.bin -text -icu4c/source/test/intltest/selfmts.cpp -text -icu4c/source/test/intltest/selfmts.h -text icu4c/source/test/perf/README -text icu4c/source/test/testdata/TestFont1.otf -text icu4c/source/test/testdata/importtest.bin -text @@ -67,9 +62,6 @@ icu4c/source/test/testdata/iscii.bin -text icu4c/source/test/testdata/old_e_testtypes.res -text icu4c/source/test/testdata/old_l_testtypes.res -text icu4c/source/test/testdata/uni-text.bin -text -icu4c/source/tools/icuinfo/icuinfo.vcproj -text -icu4c/source/tools/icuinfo/icuplugins_windows_sample.txt -text -icu4c/source/tools/icuinfo/testplug.vcproj -text icu4j/build.properties -text icu4j/demos/manifest.stub -text icu4j/icu-eclipse/misc/ICUConfig.properties -text diff --git a/icu4c/source/i18n/selfmt.cpp b/icu4c/source/i18n/selfmt.cpp index 6be17643c75..de1f8da3a41 100755 --- a/icu4c/source/i18n/selfmt.cpp +++ b/icu4c/source/i18n/selfmt.cpp @@ -1,526 +1,526 @@ -/******************************************************************** - * COPYRIGHT: - * Copyright (c) 1997-2010, International Business Machines Corporation and - * others. All Rights Reserved. - * Copyright (C) 2010 , Yahoo! Inc. - ******************************************************************** - * - * File SELFMT.CPP - * - * Modification History: - * - * Date Name Description - * 11/11/09 kirtig Finished first cut of implementation. - * 11/16/09 kirtig Improved version - ********************************************************************/ - -#include "unicode/utypes.h" -#include "unicode/ustring.h" -#include "unicode/ucnv_err.h" -#include "unicode/uchar.h" -#include "unicode/umsg.h" -#include "unicode/rbnf.h" -#include "cmemory.h" -#include "util.h" -#include "uassert.h" -#include "ustrfmt.h" -#include "uvector.h" - -#include "unicode/selfmt.h" -#include "selfmtimpl.h" - -#if !UCONFIG_NO_FORMATTING - -U_NAMESPACE_BEGIN - -U_CDECL_BEGIN - -static void U_CALLCONV -deleteHashStrings(void *obj) { - delete (UnicodeString *)obj; -} - -U_CDECL_END - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SelectFormat) - -#define MAX_KEYWORD_SIZE 30 -static const UChar SELECT_KEYWORD_OTHER[] = {LOW_O, LOW_T, LOW_H, LOW_E, LOW_R, 0}; - -SelectFormat::SelectFormat(UErrorCode& status) { - if (U_FAILURE(status)) { - return; - } - init(status); -} - -SelectFormat::SelectFormat(const UnicodeString& pat, UErrorCode& status) { - if (U_FAILURE(status)) { - return; - } - init(status); - applyPattern(pat, status); -} - -SelectFormat::SelectFormat(const SelectFormat& other) : Format(other) { - UErrorCode status = U_ZERO_ERROR; - pattern = other.pattern; - copyHashtable(other.parsedValuesHash, status); -} - -SelectFormat::~SelectFormat() { - delete parsedValuesHash; -} - -void -SelectFormat::init(UErrorCode& status) { - parsedValuesHash = NULL; - pattern.remove(); - status = U_ZERO_ERROR; -} - - -void -SelectFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) { - if (U_FAILURE(status)) { - return; - } - - this->parsedValuesHash = NULL; - this->pattern = newPattern; - enum State{ startState, keywordState, pastKeywordState, phraseState}; - - //Initialization - UnicodeString keyword = UnicodeString(); - UnicodeString phrase = UnicodeString(); - UnicodeString* ptrPhrase ; - int32_t braceCount = 0; - - if (parsedValuesHash == NULL) { - parsedValuesHash = new Hashtable(TRUE, status); - parsedValuesHash = new Hashtable(TRUE, status); - if (U_FAILURE(status)) { - return; - } - parsedValuesHash->setValueDeleter(deleteHashStrings); - } - - //Process the state machine - State state = startState; - for (int32_t i = 0; i < pattern.length(); ++i) { - //Get the character and check its type - UChar ch = pattern.charAt(i); - characterClass type; - classifyCharacter(ch, type); - - //Allow any character in phrase but nowhere else - if ( type == tOther ) { - if ( state == phraseState ){ - phrase += ch; - continue; - }else { - status = U_PATTERN_SYNTAX_ERROR; - return; - } - } - - //Process the state machine - switch (state) { - //At the start of pattern - case startState: - switch (type) { - case tSpace: - break; - case tStartKeyword: - state = keywordState; - keyword += ch; - break; - //If anything else is encountered, it's a syntax error - default: - status = U_PATTERN_SYNTAX_ERROR; - return; - }//end of switch(type) - break; - - //Handle the keyword state - case keywordState: - switch (type) { - case tSpace: - state = pastKeywordState; - break; - case tStartKeyword: - case tContinueKeyword: - keyword += ch; - break; - case tLeftBrace: - state = phraseState; - break; - //If anything else is encountered, it's a syntax error - default: - status = U_PATTERN_SYNTAX_ERROR; - return; - }//end of switch(type) - break; - - //Handle the pastkeyword state - case pastKeywordState: - switch (type) { - case tSpace: - break; - case tLeftBrace: - state = phraseState; - break; - //If anything else is encountered, it's a syntax error - default: - status = U_PATTERN_SYNTAX_ERROR; - return; - }//end of switch(type) - break; - - //Handle the phrase state - case phraseState: - switch (type) { - case tLeftBrace: - braceCount++; - phrase += ch; - break; - case tRightBrace: - //Matching keyword, phrase pair found - if (braceCount == 0){ - //Check validity of keyword - if (parsedValuesHash->get(keyword) != NULL) { - status = U_DUPLICATE_KEYWORD; - return; - } - if (keyword.length() == 0) { - status = U_PATTERN_SYNTAX_ERROR; - return; - } - - //Store the keyword, phrase pair in hashTable - ptrPhrase = new UnicodeString(phrase); - parsedValuesHash->put( keyword, ptrPhrase, status); - - //Reinitialize - keyword.remove(); - phrase.remove(); - ptrPhrase = NULL; - state = startState; - } - - if (braceCount > 0){ - braceCount-- ; - phrase += ch; - } - break; - default: - phrase += ch; - }//end of switch(type) - break; - - //Handle the default case of switch(state) - default: - status = U_PATTERN_SYNTAX_ERROR; - return; - - }//end of switch(state) - } - - //Check if the stae machine is back to startState - if ( state != startState){ - status = U_PATTERN_SYNTAX_ERROR; - return; - } - - //Check if "other" keyword is present - if ( !checkSufficientDefinition() ) { - status = U_DEFAULT_KEYWORD_MISSING; - } - return; -} - -UnicodeString& -SelectFormat::format(const Formattable& obj, - UnicodeString& appendTo, - FieldPosition& pos, - UErrorCode& status) const -{ - if (U_FAILURE(status)) return appendTo; - - switch (obj.getType()) - { - case Formattable::kString: - return format((UnicodeString)obj.getString(), appendTo, pos, status); - default: - status = U_ILLEGAL_ARGUMENT_ERROR; - return appendTo; - } -} - -UnicodeString& -SelectFormat::format(const UnicodeString& sInput, - UnicodeString& appendTo, - FieldPosition& pos, - UErrorCode& status) const { - - if (U_FAILURE(status)) return appendTo; - - //Check for the validity of the keyword - if ( !checkValidKeyword(sInput) ){ - status = U_ILLEGAL_ARGUMENT_ERROR; - return appendTo; - } - - if (parsedValuesHash == NULL) { - status = U_INVALID_FORMAT_ERROR; - return appendTo; - } - - UnicodeString *selectedPattern = (UnicodeString *)parsedValuesHash->get(sInput); - if (selectedPattern == NULL) { - selectedPattern = (UnicodeString *)parsedValuesHash->get(SELECT_KEYWORD_OTHER); - } - - return appendTo += *selectedPattern; -} - -UnicodeString& -SelectFormat::toPattern(UnicodeString& appendTo) { - appendTo += pattern; - return appendTo; -} - -void -SelectFormat::classifyCharacter(UChar ch, characterClass& type) const{ - if ((ch >= CAP_A) && (ch <= CAP_Z)) { - type = tStartKeyword; - return; - } - if ((ch >= LOW_A) && (ch <= LOW_Z)) { - type = tStartKeyword; - return; - } - if ((ch >= U_ZERO) && (ch <= U_NINE)) { - type = tContinueKeyword; - return; - } - switch (ch) { - case LEFTBRACE: - type = tLeftBrace; - break; - case RIGHTBRACE: - type = tRightBrace; - break; - case SPACE: - case TAB: - type = tSpace; - break; - case HYPHEN: - case LOWLINE: - type = tContinueKeyword; - break; - default : - type = tOther; - } -} - -UBool -SelectFormat::checkSufficientDefinition() { - // Check that at least the default rule is defined. - if (parsedValuesHash == NULL) return FALSE; - if (parsedValuesHash->get(SELECT_KEYWORD_OTHER) == NULL) { - return FALSE; - } - else { - return TRUE; - } -} - -UBool -SelectFormat::checkValidKeyword(const UnicodeString& argKeyword ) const{ - UnicodeString keyword = UnicodeString(); - enum State{ startState, keywordState, pastKeywordState }; - - //Initialize - State state = startState; - keyword.remove(); - - //Start the processing - for (int32_t i = 0; i < argKeyword.length(); ++i) { - //Get the character and check its type - UChar ch = argKeyword.charAt(i); - characterClass type; - classifyCharacter(ch, type); - - //Any character that is not allowed - if ( type == tOther ) { - return FALSE; - } - - //Process the state machine - switch (state) { - //At the start of pattern - case startState: - switch (type) { - case tSpace: - break; - case tStartKeyword: - state = keywordState; - keyword += ch; - break; - //If anything else is encountered, it's a syntax error - default: - return FALSE; - }//end of switch(type) - break; - - //Handle the keyword state - case keywordState: - switch (type) { - case tSpace: - state = pastKeywordState; - break; - case tStartKeyword: - case tContinueKeyword: - keyword += ch; - break; - //If anything else is encountered,it's a syntax error - default: - return FALSE; - }//end of switch(type) - break; - - //Handle the pastkeyword state - case pastKeywordState: - switch (type) { - case tSpace: - break; - //If anything else is encountered,it's a syntax error - default: - return FALSE; - }//end of switch(type) - break; - - default: - return FALSE; - }//end of switch(state) - - }//end of loop of argKeyword - - return TRUE; -} - -Format* SelectFormat::clone() const -{ - return new SelectFormat(*this); -} - -SelectFormat& -SelectFormat::operator=(const SelectFormat& other) { - if (this != &other) { - UErrorCode status = U_ZERO_ERROR; - delete parsedValuesHash; - pattern = other.pattern; - copyHashtable(other.parsedValuesHash, status); - } - return *this; -} - -UBool -SelectFormat::operator==(const Format& other) const { - // This protected comparison operator should only be called by subclasses - // which have confirmed that the other object being compared against is - // an instance of a sublcass of SelectFormat. THIS IS IMPORTANT. - // Format::operator== guarantees that this cast is safe - SelectFormat* fmt = (SelectFormat*)&other; - Hashtable* hashOther = fmt->parsedValuesHash; - if ( parsedValuesHash == NULL && hashOther == NULL) - return TRUE; - if ( parsedValuesHash == NULL || hashOther == NULL) - return FALSE; - if ( hashOther->count() != parsedValuesHash->count() ){ - return FALSE; - } - - const UHashElement* elem = NULL; - int32_t pos = -1; - while ((elem = hashOther->nextElement(pos)) != NULL) { - const UHashTok otherKeyTok = elem->key; - UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer; - const UHashTok otherKeyToVal = elem->value; - UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer; - - UnicodeString* thisElemValue = (UnicodeString*)parsedValuesHash->get(*otherKey); - if ( thisElemValue == NULL ){ - return FALSE; - } - if ( *thisElemValue != *otherValue){ - return FALSE; - } - - } - pos = -1; - while ((elem = parsedValuesHash->nextElement(pos)) != NULL) { - const UHashTok thisKeyTok = elem->key; - UnicodeString* thisKey = (UnicodeString*)thisKeyTok.pointer; - const UHashTok thisKeyToVal = elem->value; - UnicodeString* thisValue = (UnicodeString*)thisKeyToVal.pointer; - - UnicodeString* otherElemValue = (UnicodeString*)hashOther->get(*thisKey); - if ( otherElemValue == NULL ){ - return FALSE; - } - if ( *otherElemValue != *thisValue){ - return FALSE; - } - - } - return TRUE; -} - -UBool -SelectFormat::operator!=(const Format& other) const { - return !operator==(other); -} - -void -SelectFormat::parseObject(const UnicodeString& /*source*/, - Formattable& /*result*/, - ParsePosition& /*pos*/) const -{ - // TODO: not yet supported in icu4j and icu4c -} - -void -SelectFormat::copyHashtable(Hashtable *other, UErrorCode& status) { - if (other == NULL) { - parsedValuesHash = NULL; - return; - } - parsedValuesHash = new Hashtable(TRUE, status); - if (U_FAILURE(status)){ - return; - } - parsedValuesHash->setValueDeleter(deleteHashStrings); - - int32_t pos = -1; - const UHashElement* elem = NULL; - - // walk through the hash table and create a deep clone - while ((elem = other->nextElement(pos)) != NULL){ - const UHashTok otherKeyTok = elem->key; - UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer; - const UHashTok otherKeyToVal = elem->value; - UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer; - parsedValuesHash->put(*otherKey, new UnicodeString(*otherValue), status); - if (U_FAILURE(status)){ - return; - } - } -} - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_FORMATTING */ - -//eof +/******************************************************************** + * COPYRIGHT: + * Copyright (c) 1997-2010, International Business Machines Corporation and + * others. All Rights Reserved. + * Copyright (C) 2010 , Yahoo! Inc. + ******************************************************************** + * + * File SELFMT.CPP + * + * Modification History: + * + * Date Name Description + * 11/11/09 kirtig Finished first cut of implementation. + * 11/16/09 kirtig Improved version + ********************************************************************/ + +#include "unicode/utypes.h" +#include "unicode/ustring.h" +#include "unicode/ucnv_err.h" +#include "unicode/uchar.h" +#include "unicode/umsg.h" +#include "unicode/rbnf.h" +#include "cmemory.h" +#include "util.h" +#include "uassert.h" +#include "ustrfmt.h" +#include "uvector.h" + +#include "unicode/selfmt.h" +#include "selfmtimpl.h" + +#if !UCONFIG_NO_FORMATTING + +U_NAMESPACE_BEGIN + +U_CDECL_BEGIN + +static void U_CALLCONV +deleteHashStrings(void *obj) { + delete (UnicodeString *)obj; +} + +U_CDECL_END + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SelectFormat) + +#define MAX_KEYWORD_SIZE 30 +static const UChar SELECT_KEYWORD_OTHER[] = {LOW_O, LOW_T, LOW_H, LOW_E, LOW_R, 0}; + +SelectFormat::SelectFormat(UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + init(status); +} + +SelectFormat::SelectFormat(const UnicodeString& pat, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + init(status); + applyPattern(pat, status); +} + +SelectFormat::SelectFormat(const SelectFormat& other) : Format(other) { + UErrorCode status = U_ZERO_ERROR; + pattern = other.pattern; + copyHashtable(other.parsedValuesHash, status); +} + +SelectFormat::~SelectFormat() { + delete parsedValuesHash; +} + +void +SelectFormat::init(UErrorCode& status) { + parsedValuesHash = NULL; + pattern.remove(); + status = U_ZERO_ERROR; +} + + +void +SelectFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + + this->parsedValuesHash = NULL; + this->pattern = newPattern; + enum State{ startState, keywordState, pastKeywordState, phraseState}; + + //Initialization + UnicodeString keyword = UnicodeString(); + UnicodeString phrase = UnicodeString(); + UnicodeString* ptrPhrase ; + int32_t braceCount = 0; + + if (parsedValuesHash == NULL) { + parsedValuesHash = new Hashtable(TRUE, status); + parsedValuesHash = new Hashtable(TRUE, status); + if (U_FAILURE(status)) { + return; + } + parsedValuesHash->setValueDeleter(deleteHashStrings); + } + + //Process the state machine + State state = startState; + for (int32_t i = 0; i < pattern.length(); ++i) { + //Get the character and check its type + UChar ch = pattern.charAt(i); + characterClass type; + classifyCharacter(ch, type); + + //Allow any character in phrase but nowhere else + if ( type == tOther ) { + if ( state == phraseState ){ + phrase += ch; + continue; + }else { + status = U_PATTERN_SYNTAX_ERROR; + return; + } + } + + //Process the state machine + switch (state) { + //At the start of pattern + case startState: + switch (type) { + case tSpace: + break; + case tStartKeyword: + state = keywordState; + keyword += ch; + break; + //If anything else is encountered, it's a syntax error + default: + status = U_PATTERN_SYNTAX_ERROR; + return; + }//end of switch(type) + break; + + //Handle the keyword state + case keywordState: + switch (type) { + case tSpace: + state = pastKeywordState; + break; + case tStartKeyword: + case tContinueKeyword: + keyword += ch; + break; + case tLeftBrace: + state = phraseState; + break; + //If anything else is encountered, it's a syntax error + default: + status = U_PATTERN_SYNTAX_ERROR; + return; + }//end of switch(type) + break; + + //Handle the pastkeyword state + case pastKeywordState: + switch (type) { + case tSpace: + break; + case tLeftBrace: + state = phraseState; + break; + //If anything else is encountered, it's a syntax error + default: + status = U_PATTERN_SYNTAX_ERROR; + return; + }//end of switch(type) + break; + + //Handle the phrase state + case phraseState: + switch (type) { + case tLeftBrace: + braceCount++; + phrase += ch; + break; + case tRightBrace: + //Matching keyword, phrase pair found + if (braceCount == 0){ + //Check validity of keyword + if (parsedValuesHash->get(keyword) != NULL) { + status = U_DUPLICATE_KEYWORD; + return; + } + if (keyword.length() == 0) { + status = U_PATTERN_SYNTAX_ERROR; + return; + } + + //Store the keyword, phrase pair in hashTable + ptrPhrase = new UnicodeString(phrase); + parsedValuesHash->put( keyword, ptrPhrase, status); + + //Reinitialize + keyword.remove(); + phrase.remove(); + ptrPhrase = NULL; + state = startState; + } + + if (braceCount > 0){ + braceCount-- ; + phrase += ch; + } + break; + default: + phrase += ch; + }//end of switch(type) + break; + + //Handle the default case of switch(state) + default: + status = U_PATTERN_SYNTAX_ERROR; + return; + + }//end of switch(state) + } + + //Check if the stae machine is back to startState + if ( state != startState){ + status = U_PATTERN_SYNTAX_ERROR; + return; + } + + //Check if "other" keyword is present + if ( !checkSufficientDefinition() ) { + status = U_DEFAULT_KEYWORD_MISSING; + } + return; +} + +UnicodeString& +SelectFormat::format(const Formattable& obj, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const +{ + if (U_FAILURE(status)) return appendTo; + + switch (obj.getType()) + { + case Formattable::kString: + return format((UnicodeString)obj.getString(), appendTo, pos, status); + default: + status = U_ILLEGAL_ARGUMENT_ERROR; + return appendTo; + } +} + +UnicodeString& +SelectFormat::format(const UnicodeString& sInput, + UnicodeString& appendTo, + FieldPosition& pos, + UErrorCode& status) const { + + if (U_FAILURE(status)) return appendTo; + + //Check for the validity of the keyword + if ( !checkValidKeyword(sInput) ){ + status = U_ILLEGAL_ARGUMENT_ERROR; + return appendTo; + } + + if (parsedValuesHash == NULL) { + status = U_INVALID_FORMAT_ERROR; + return appendTo; + } + + UnicodeString *selectedPattern = (UnicodeString *)parsedValuesHash->get(sInput); + if (selectedPattern == NULL) { + selectedPattern = (UnicodeString *)parsedValuesHash->get(SELECT_KEYWORD_OTHER); + } + + return appendTo += *selectedPattern; +} + +UnicodeString& +SelectFormat::toPattern(UnicodeString& appendTo) { + appendTo += pattern; + return appendTo; +} + +void +SelectFormat::classifyCharacter(UChar ch, characterClass& type) const{ + if ((ch >= CAP_A) && (ch <= CAP_Z)) { + type = tStartKeyword; + return; + } + if ((ch >= LOW_A) && (ch <= LOW_Z)) { + type = tStartKeyword; + return; + } + if ((ch >= U_ZERO) && (ch <= U_NINE)) { + type = tContinueKeyword; + return; + } + switch (ch) { + case LEFTBRACE: + type = tLeftBrace; + break; + case RIGHTBRACE: + type = tRightBrace; + break; + case SPACE: + case TAB: + type = tSpace; + break; + case HYPHEN: + case LOWLINE: + type = tContinueKeyword; + break; + default : + type = tOther; + } +} + +UBool +SelectFormat::checkSufficientDefinition() { + // Check that at least the default rule is defined. + if (parsedValuesHash == NULL) return FALSE; + if (parsedValuesHash->get(SELECT_KEYWORD_OTHER) == NULL) { + return FALSE; + } + else { + return TRUE; + } +} + +UBool +SelectFormat::checkValidKeyword(const UnicodeString& argKeyword ) const{ + UnicodeString keyword = UnicodeString(); + enum State{ startState, keywordState, pastKeywordState }; + + //Initialize + State state = startState; + keyword.remove(); + + //Start the processing + for (int32_t i = 0; i < argKeyword.length(); ++i) { + //Get the character and check its type + UChar ch = argKeyword.charAt(i); + characterClass type; + classifyCharacter(ch, type); + + //Any character that is not allowed + if ( type == tOther ) { + return FALSE; + } + + //Process the state machine + switch (state) { + //At the start of pattern + case startState: + switch (type) { + case tSpace: + break; + case tStartKeyword: + state = keywordState; + keyword += ch; + break; + //If anything else is encountered, it's a syntax error + default: + return FALSE; + }//end of switch(type) + break; + + //Handle the keyword state + case keywordState: + switch (type) { + case tSpace: + state = pastKeywordState; + break; + case tStartKeyword: + case tContinueKeyword: + keyword += ch; + break; + //If anything else is encountered,it's a syntax error + default: + return FALSE; + }//end of switch(type) + break; + + //Handle the pastkeyword state + case pastKeywordState: + switch (type) { + case tSpace: + break; + //If anything else is encountered,it's a syntax error + default: + return FALSE; + }//end of switch(type) + break; + + default: + return FALSE; + }//end of switch(state) + + }//end of loop of argKeyword + + return TRUE; +} + +Format* SelectFormat::clone() const +{ + return new SelectFormat(*this); +} + +SelectFormat& +SelectFormat::operator=(const SelectFormat& other) { + if (this != &other) { + UErrorCode status = U_ZERO_ERROR; + delete parsedValuesHash; + pattern = other.pattern; + copyHashtable(other.parsedValuesHash, status); + } + return *this; +} + +UBool +SelectFormat::operator==(const Format& other) const { + // This protected comparison operator should only be called by subclasses + // which have confirmed that the other object being compared against is + // an instance of a sublcass of SelectFormat. THIS IS IMPORTANT. + // Format::operator== guarantees that this cast is safe + SelectFormat* fmt = (SelectFormat*)&other; + Hashtable* hashOther = fmt->parsedValuesHash; + if ( parsedValuesHash == NULL && hashOther == NULL) + return TRUE; + if ( parsedValuesHash == NULL || hashOther == NULL) + return FALSE; + if ( hashOther->count() != parsedValuesHash->count() ){ + return FALSE; + } + + const UHashElement* elem = NULL; + int32_t pos = -1; + while ((elem = hashOther->nextElement(pos)) != NULL) { + const UHashTok otherKeyTok = elem->key; + UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer; + const UHashTok otherKeyToVal = elem->value; + UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer; + + UnicodeString* thisElemValue = (UnicodeString*)parsedValuesHash->get(*otherKey); + if ( thisElemValue == NULL ){ + return FALSE; + } + if ( *thisElemValue != *otherValue){ + return FALSE; + } + + } + pos = -1; + while ((elem = parsedValuesHash->nextElement(pos)) != NULL) { + const UHashTok thisKeyTok = elem->key; + UnicodeString* thisKey = (UnicodeString*)thisKeyTok.pointer; + const UHashTok thisKeyToVal = elem->value; + UnicodeString* thisValue = (UnicodeString*)thisKeyToVal.pointer; + + UnicodeString* otherElemValue = (UnicodeString*)hashOther->get(*thisKey); + if ( otherElemValue == NULL ){ + return FALSE; + } + if ( *otherElemValue != *thisValue){ + return FALSE; + } + + } + return TRUE; +} + +UBool +SelectFormat::operator!=(const Format& other) const { + return !operator==(other); +} + +void +SelectFormat::parseObject(const UnicodeString& /*source*/, + Formattable& /*result*/, + ParsePosition& /*pos*/) const +{ + // TODO: not yet supported in icu4j and icu4c +} + +void +SelectFormat::copyHashtable(Hashtable *other, UErrorCode& status) { + if (other == NULL) { + parsedValuesHash = NULL; + return; + } + parsedValuesHash = new Hashtable(TRUE, status); + if (U_FAILURE(status)){ + return; + } + parsedValuesHash->setValueDeleter(deleteHashStrings); + + int32_t pos = -1; + const UHashElement* elem = NULL; + + // walk through the hash table and create a deep clone + while ((elem = other->nextElement(pos)) != NULL){ + const UHashTok otherKeyTok = elem->key; + UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer; + const UHashTok otherKeyToVal = elem->value; + UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer; + parsedValuesHash->put(*otherKey, new UnicodeString(*otherValue), status); + if (U_FAILURE(status)){ + return; + } + } +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +//eof diff --git a/icu4c/source/i18n/selfmtimpl.h b/icu4c/source/i18n/selfmtimpl.h index 39d631390a0..208a6599a2e 100755 --- a/icu4c/source/i18n/selfmtimpl.h +++ b/icu4c/source/i18n/selfmtimpl.h @@ -1,97 +1,97 @@ -/******************************************************************** - * COPYRIGHT: - * Copyright (c) 1997-2010, International Business Machines Corporation and - * others. All Rights Reserved. - * Copyright (C) 2010 , Yahoo! Inc. - ******************************************************************** - * File SELECTFMT_IMPL.H - * - * Date Name Description - * 11/11/09 kirtig Finished first cut of implementation. - *********************************************************************/ - - -#ifndef SELFMTIMPL -#define SELFMTIMPL - -/** - * \file - * \brief C++ API: Defines rules for mapping positive long values onto a small set of keywords. - */ - -#if !UCONFIG_NO_FORMATTING - -#include "unicode/format.h" -#include "unicode/locid.h" -#include "unicode/parseerr.h" -#include "unicode/utypes.h" -#include "uvector.h" -#include "hash.h" - -U_NAMESPACE_BEGIN - -#define DOT ((UChar)0x002E) -#define SINGLE_QUOTE ((UChar)0x0027) -#define SLASH ((UChar)0x002F) -#define BACKSLASH ((UChar)0x005C) -#define SPACE ((UChar)0x0020) -#define TAB ((UChar)0x0009) -#define QUOTATION_MARK ((UChar)0x0022) -#define ASTERISK ((UChar)0x002A) -#define COMMA ((UChar)0x002C) -#define HYPHEN ((UChar)0x002D) -#define U_ZERO ((UChar)0x0030) -#define U_ONE ((UChar)0x0031) -#define U_TWO ((UChar)0x0032) -#define U_THREE ((UChar)0x0033) -#define U_FOUR ((UChar)0x0034) -#define U_FIVE ((UChar)0x0035) -#define U_SIX ((UChar)0x0036) -#define U_SEVEN ((UChar)0x0037) -#define U_EIGHT ((UChar)0x0038) -#define U_NINE ((UChar)0x0039) -#define COLON ((UChar)0x003A) -#define SEMI_COLON ((UChar)0x003B) -#define CAP_A ((UChar)0x0041) -#define CAP_B ((UChar)0x0042) -#define CAP_R ((UChar)0x0052) -#define CAP_Z ((UChar)0x005A) -#define LOWLINE ((UChar)0x005F) -#define LEFTBRACE ((UChar)0x007B) -#define RIGHTBRACE ((UChar)0x007D) - -#define LOW_A ((UChar)0x0061) -#define LOW_B ((UChar)0x0062) -#define LOW_C ((UChar)0x0063) -#define LOW_D ((UChar)0x0064) -#define LOW_E ((UChar)0x0065) -#define LOW_F ((UChar)0x0066) -#define LOW_G ((UChar)0x0067) -#define LOW_H ((UChar)0x0068) -#define LOW_I ((UChar)0x0069) -#define LOW_J ((UChar)0x006a) -#define LOW_K ((UChar)0x006B) -#define LOW_L ((UChar)0x006C) -#define LOW_M ((UChar)0x006D) -#define LOW_N ((UChar)0x006E) -#define LOW_O ((UChar)0x006F) -#define LOW_P ((UChar)0x0070) -#define LOW_Q ((UChar)0x0071) -#define LOW_R ((UChar)0x0072) -#define LOW_S ((UChar)0x0073) -#define LOW_T ((UChar)0x0074) -#define LOW_U ((UChar)0x0075) -#define LOW_V ((UChar)0x0076) -#define LOW_W ((UChar)0x0077) -#define LOW_X ((UChar)0x0078) -#define LOW_Y ((UChar)0x0079) -#define LOW_Z ((UChar)0x007A) - -class UnicodeSet; - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_FORMATTING */ - -#endif // SELFMTIMPL -//eof +/******************************************************************** + * COPYRIGHT: + * Copyright (c) 1997-2010, International Business Machines Corporation and + * others. All Rights Reserved. + * Copyright (C) 2010 , Yahoo! Inc. + ******************************************************************** + * File SELECTFMT_IMPL.H + * + * Date Name Description + * 11/11/09 kirtig Finished first cut of implementation. + *********************************************************************/ + + +#ifndef SELFMTIMPL +#define SELFMTIMPL + +/** + * \file + * \brief C++ API: Defines rules for mapping positive long values onto a small set of keywords. + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/format.h" +#include "unicode/locid.h" +#include "unicode/parseerr.h" +#include "unicode/utypes.h" +#include "uvector.h" +#include "hash.h" + +U_NAMESPACE_BEGIN + +#define DOT ((UChar)0x002E) +#define SINGLE_QUOTE ((UChar)0x0027) +#define SLASH ((UChar)0x002F) +#define BACKSLASH ((UChar)0x005C) +#define SPACE ((UChar)0x0020) +#define TAB ((UChar)0x0009) +#define QUOTATION_MARK ((UChar)0x0022) +#define ASTERISK ((UChar)0x002A) +#define COMMA ((UChar)0x002C) +#define HYPHEN ((UChar)0x002D) +#define U_ZERO ((UChar)0x0030) +#define U_ONE ((UChar)0x0031) +#define U_TWO ((UChar)0x0032) +#define U_THREE ((UChar)0x0033) +#define U_FOUR ((UChar)0x0034) +#define U_FIVE ((UChar)0x0035) +#define U_SIX ((UChar)0x0036) +#define U_SEVEN ((UChar)0x0037) +#define U_EIGHT ((UChar)0x0038) +#define U_NINE ((UChar)0x0039) +#define COLON ((UChar)0x003A) +#define SEMI_COLON ((UChar)0x003B) +#define CAP_A ((UChar)0x0041) +#define CAP_B ((UChar)0x0042) +#define CAP_R ((UChar)0x0052) +#define CAP_Z ((UChar)0x005A) +#define LOWLINE ((UChar)0x005F) +#define LEFTBRACE ((UChar)0x007B) +#define RIGHTBRACE ((UChar)0x007D) + +#define LOW_A ((UChar)0x0061) +#define LOW_B ((UChar)0x0062) +#define LOW_C ((UChar)0x0063) +#define LOW_D ((UChar)0x0064) +#define LOW_E ((UChar)0x0065) +#define LOW_F ((UChar)0x0066) +#define LOW_G ((UChar)0x0067) +#define LOW_H ((UChar)0x0068) +#define LOW_I ((UChar)0x0069) +#define LOW_J ((UChar)0x006a) +#define LOW_K ((UChar)0x006B) +#define LOW_L ((UChar)0x006C) +#define LOW_M ((UChar)0x006D) +#define LOW_N ((UChar)0x006E) +#define LOW_O ((UChar)0x006F) +#define LOW_P ((UChar)0x0070) +#define LOW_Q ((UChar)0x0071) +#define LOW_R ((UChar)0x0072) +#define LOW_S ((UChar)0x0073) +#define LOW_T ((UChar)0x0074) +#define LOW_U ((UChar)0x0075) +#define LOW_V ((UChar)0x0076) +#define LOW_W ((UChar)0x0077) +#define LOW_X ((UChar)0x0078) +#define LOW_Y ((UChar)0x0079) +#define LOW_Z ((UChar)0x007A) + +class UnicodeSet; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // SELFMTIMPL +//eof diff --git a/icu4c/source/i18n/unicode/selfmt.h b/icu4c/source/i18n/unicode/selfmt.h index 9b714015e04..817b31be0a1 100755 --- a/icu4c/source/i18n/unicode/selfmt.h +++ b/icu4c/source/i18n/unicode/selfmt.h @@ -1,373 +1,373 @@ -/******************************************************************** - * COPYRIGHT: - * Copyright (c) 1997-2010, International Business Machines Corporation and - * others. All Rights Reserved. - * Copyright (C) 2010 , Yahoo! Inc. - ******************************************************************** - * - * File SELFMT.H - * - * Modification History: - * - * Date Name Description - * 11/11/09 kirtig Finished first cut of implementation. - ********************************************************************/ - -#ifndef SELFMT -#define SELFMT - -#include "unicode/utypes.h" -#include "unicode/numfmt.h" - -/** - * \file - * \brief C++ API: SelectFormat object - */ - -#if !UCONFIG_NO_FORMATTING - -U_NAMESPACE_BEGIN - -class Hashtable; - -/** - *
SelectFormat
supports the creation of internationalized
- * messages by selecting phrases based on keywords. The pattern specifies
- * how to map keywords to phrases and provides a default phrase. The
- * object provided to the format method is a string that's matched
- * against the keywords. If there is a match, the corresponding phrase
- * is selected; otherwise, the default phrase is used.
SelectFormat
for Gender AgreementThe main use case for the select format is gender based inflection. - * When names or nouns are inserted into sentences, their gender can affect pronouns, - * verb forms, articles, and adjectives. Special care needs to be - * taken for the case where the gender cannot be determined. - * The impact varies between languages:
- * - *Some other languages have noun classes that are not related to gender, - * but similar in grammatical use. - * Some African languages have around 20 noun classes.
- * - *To enable localizers to create sentence patterns that take their
- * language's gender dependencies into consideration, software has to provide
- * information about the gender associated with a noun or name to
- * MessageFormat
.
- * Two main cases can be distinguished:
The resulting keyword is provided to MessageFormat
as a
- * parameter separate from the name or noun it's associated with. For example,
- * to generate a message such as "Jean went to Paris", three separate arguments
- * would be provided: The name of the person as argument 0, the gender of
- * the person as argument 1, and the name of the city as argument 2.
- * The sentence pattern for English, where the gender of the person has
- * no impact on this simple sentence, would not refer to argument 1 at all:
{0} went to {2}.- * - *
The sentence pattern for French, where the gender of the person affects - * the form of the participle, uses a select format based on argument 1:
- * - *{0} est {1, select, female {allu00E9;e} other {allu00E9;}} u00E0; {2}.- * - *
Patterns can be nested, so that it's possible to handle interactions of - * number and gender where necessary. For example, if the above sentence should - * allow for the names of several people to be inserted, the following sentence - * pattern can be used (with argument 0 the list of people's names, - * argument 1 the number of people, argument 2 their combined gender, and - * argument 3 the city name):
- * - *{0} {1, plural, - * one {est {2, select, female {allu00E9;e} other {allu00E9;}}} - * other {sont {2, select, female {allu00E9;es} other {allu00E9;s}}} - * }u00E0; {3}.- * - *
The SelectFormat
pattern text defines the phrase output
- * for each user-defined keyword.
- * The pattern is a sequence of keyword{phrase}
- * clauses.
- * Each clause assigns the phrase phrase
- * to the user-defined keyword
.
Keywords must match the pattern [a-zA-Z][a-zA-Z0-9_-]*; keywords
- * that don't match this pattern result in the error code
- * U_ILLEGAL_CHARACTER
.
- * You always have to define a phrase for the default keyword
- * other
; this phrase is returned when the keyword
- * provided to
- * the format
method matches no other keyword.
- * If a pattern does not provide a phrase for other
, the method
- * it's provided to returns the error U_DEFAULT_KEYWORD_MISSING
.
- * If a pattern provides more than one phrase for the same keyword, the
- * error U_DUPLICATE_KEYWORD
is returned.
- *
- * Spaces between keyword
and
- * {phrase}
will be ignored; spaces within
- * {phrase}
will be preserved.
- * - *
The phrase for a particular select case may contain other message
- * format patterns. SelectFormat
preserves these so that you
- * can use the strings produced by SelectFormat
with other
- * formatters. If you are using SelectFormat
inside a
- * MessageFormat
pattern, MessageFormat
will
- * automatically evaluate the resulting format pattern.
- * Thus, curly braces ({
, }
) are only allowed
- * in phrases to define a nested format pattern.
Example: - *
- * - * UErrorCode status = U_ZERO_ERROR; - * MessageFormat *msgFmt = new MessageFormat(UnicodeString("{0} est {1, select, female {allu00E9;e} other {allu00E9;}} u00E0; Paris."), Locale("fr"), status); - * if (U_FAILURE(status)) { - * return; - * } - * FieldPosition ignore(FieldPosition::DONT_CARE); - * UnicodeString result; - * - * char* str1= "Kirti,female"; - * Formattable args1[] = {"Kirti","female"}; - * msgFmt->format(args1, 2, result, ignore, status); - * cout << "Input is " << str1 << " and result is: " << result << endl; - * delete msgFmt; - * - *- * Produces the output:
Input is Kirti,female and result is: Kirti est allu00E9;e u00E0; Paris.
- *
- * @draft ICU 4.4
- */
-
-class U_I18N_API SelectFormat : public Format {
-public:
-
- /**
- * Creates a new SelectFormat
.
- * @param status output param set to success/failure code on exit, which
- * must not indicate a failure before the function call.
- * @draft ICU 4.4
- */
- SelectFormat(UErrorCode& status);
-
- /**
- * Creates a new SelectFormat
for a given pattern string.
- * @param pattern the pattern for this SelectFormat
.
- * errors are returned to status if the pattern is invalid.
- * @param status output param set to success/failure code on exit, which
- * must not indicate a failure before the function call.
- * @draft ICU 4.4
- */
- SelectFormat(const UnicodeString& pattern, UErrorCode& status);
-
- /**
- * copy constructor.
- * @draft ICU 4.4
- */
- SelectFormat(const SelectFormat& other);
-
- /**
- * Destructor.
- * @draft ICU 4.4
- */
- virtual ~SelectFormat();
-
- /**
- * Sets the pattern used by this select format.
- * for the keyword rules.
- * Patterns and their interpretation are specified in the class description.
- *
- * @param pattern the pattern for this select format
- * errors are returned to status if the pattern is invalid.
- * @param status output param set to success/failure code on exit, which
- * must not indicate a failure before the function call.
- * @draft ICU 4.4
- */
- void applyPattern(const UnicodeString& pattern, UErrorCode& status);
-
- /**
- * Selects the phrase for the given keyword
- *
- * @param keyword The keyword that is used to select an alternative.
- * @param appendTo output parameter to receive result.
- * result is appended to existing contents.
- * @param pos On input: an alignment field, if desired.
- * On output: the offsets of the alignment field.
- * @param status output param set to success/failure code on exit, which
- * must not indicate a failure before the function call.
- * @return Reference to 'appendTo' parameter.
- * @draft ICU 4.4
- */
- UnicodeString& format(const UnicodeString& keyword,
- UnicodeString& appendTo,
- FieldPosition& pos,
- UErrorCode& status) const;
-
- /**
- * Assignment operator
- *
- * @param other the SelectFormat object to copy from.
- * @draft ICU 4.4
- */
- SelectFormat& operator=(const SelectFormat& other);
-
- /**
- * Return true if another object is semantically equal to this one.
- *
- * @param other the SelectFormat object to be compared with.
- * @return true if other is semantically equal to this.
- * @draft ICU 4.4
- */
- virtual UBool operator==(const Format& other) const;
-
- /**
- * Return true if another object is semantically unequal to this one.
- *
- * @param other the SelectFormat object to be compared with.
- * @return true if other is semantically unequal to this.
- * @draft ICU 4.4
- */
- virtual UBool operator!=(const Format& other) const;
-
- /**
- * Clones this Format object polymorphically. The caller owns the
- * result and should delete it when done.
- * @draft ICU 4.4
- */
- virtual Format* clone(void) const;
-
- /**
- * Format an object to produce a string.
- * This method handles keyword strings.
- * If the Formattable object is not a UnicodeString code>,
- * then it returns a failing UErrorCode.
- *
- * @param obj A keyword string that is used to select an alternative.
- * @param appendTo output parameter to receive result.
- * Result is appended to existing contents.
- * @param pos On input: an alignment field, if desired.
- * On output: the offsets of the alignment field.
- * @param status output param filled with success/failure status.
- * @return Reference to 'appendTo' parameter.
- * @draft ICU 4.4
- */
- UnicodeString& format(const Formattable& obj,
- UnicodeString& appendTo,
- FieldPosition& pos,
- UErrorCode& status) const;
-
- /**
- * Returns the pattern from applyPattern() or constructor.
- *
- * @param appendTo output parameter to receive result.
- * Result is appended to existing contents.
- * @return the UnicodeString with inserted pattern.
- * @draft ICU 4.4
- */
- UnicodeString& toPattern(UnicodeString& appendTo);
-
- /**
- * This method is not yet supported by SelectFormat
.
- *
- * Before calling, set parse_pos.index to the offset you want to start
- * parsing at in the source. After calling, parse_pos.index is the end of
- * the text you parsed. If error occurs, index is unchanged.
- *
- * When parsing, leading whitespace is discarded (with a successful parse),
- * while trailing whitespace is left as is.
- *
- * See Format::parseObject() for more.
- *
- * @param source The string to be parsed into an object.
- * @param result Formattable to be set to the parse result.
- * If parse fails, return contents are undefined.
- * @param parse_pos The position to start parsing at. Upon return
- * this param is set to the position after the
- * last character successfully parsed. If the
- * source is not parsed successfully, this param
- * will remain unchanged.
- * @draft ICU 4.4
- */
- virtual void parseObject(const UnicodeString& source,
- Formattable& result,
- ParsePosition& parse_pos) const;
-
- /**
- * ICU "poor man's RTTI", returns a UClassID for this class.
- */
- static UClassID U_EXPORT2 getStaticClassID(void);
-
- /**
- * ICU "poor man's RTTI", returns a UClassID for the actual class.
- * @draft ICU 4.4
- */
- virtual UClassID getDynamicClassID() const;
-
-private:
- typedef enum characterClass{
- tStartKeyword,
- tContinueKeyword,
- tLeftBrace,
- tRightBrace,
- tSpace,
- tOther
- }characterClass;
-
- UnicodeString pattern;
- //Hash to store the keyword, phrase pairs
- Hashtable *parsedValuesHash;
-
- SelectFormat(); // default constructor not implemented
- void init(UErrorCode& status);
- //For the applyPattern , classifies char.s in one of the characterClass
- void classifyCharacter(UChar ch, characterClass& type) const;
- //Checks if the "other" keyword is present in pattern
- UBool checkSufficientDefinition();
- //Checks if the keyword passed is valid
- UBool checkValidKeyword(const UnicodeString& argKeyword) const;
- void parsingFailure();
- void copyHashtable(Hashtable *other, UErrorCode& status);
-};
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // _SELFMT
-//eof
+/********************************************************************
+ * COPYRIGHT:
+ * Copyright (c) 1997-2010, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ * Copyright (C) 2010 , Yahoo! Inc.
+ ********************************************************************
+ *
+ * File SELFMT.H
+ *
+ * Modification History:
+ *
+ * Date Name Description
+ * 11/11/09 kirtig Finished first cut of implementation.
+ ********************************************************************/
+
+#ifndef SELFMT
+#define SELFMT
+
+#include "unicode/utypes.h"
+#include "unicode/numfmt.h"
+
+/**
+ * \file
+ * \brief C++ API: SelectFormat object
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+U_NAMESPACE_BEGIN
+
+class Hashtable;
+
+/**
+ *
SelectFormat
supports the creation of internationalized
+ * messages by selecting phrases based on keywords. The pattern specifies
+ * how to map keywords to phrases and provides a default phrase. The
+ * object provided to the format method is a string that's matched
+ * against the keywords. If there is a match, the corresponding phrase
+ * is selected; otherwise, the default phrase is used.
+ *
+ * Using SelectFormat
for Gender Agreement
+ *
+ * The main use case for the select format is gender based inflection.
+ * When names or nouns are inserted into sentences, their gender can affect pronouns,
+ * verb forms, articles, and adjectives. Special care needs to be
+ * taken for the case where the gender cannot be determined.
+ * The impact varies between languages:
+ *
+ *
+ * - English has three genders, and unknown gender is handled as a special
+ * case. Names use the gender of the named person (if known), nouns referring
+ * to people use natural gender, and inanimate objects are usually neutral.
+ * The gender only affects pronouns: "he", "she", "it", "they".
+ *
+ *
- German differs from English in that the gender of nouns is rather
+ * arbitrary, even for nouns referring to people ("Mu00E4;dchen", girl, is neutral).
+ * The gender affects pronouns ("er", "sie", "es"), articles ("der", "die",
+ * "das"), and adjective forms ("guter Mann", "gute Frau", "gutes Mu00E4;dchen").
+ *
+ *
- French has only two genders; as in German the gender of nouns
+ * is rather arbitrary – for sun and moon, the genders
+ * are the opposite of those in German. The gender affects
+ * pronouns ("il", "elle"), articles ("le", "la"),
+ * adjective forms ("bon", "bonne"), and sometimes
+ * verb forms ("allu00E9;", "allu00E9;e").
+ *
+ *
- Polish distinguishes five genders (or noun classes),
+ * human masculine, animate non-human masculine, inanimate masculine,
+ * feminine, and neuter.
+ *
+ *
+ * Some other languages have noun classes that are not related to gender,
+ * but similar in grammatical use.
+ * Some African languages have around 20 noun classes.
+ *
+ * To enable localizers to create sentence patterns that take their
+ * language's gender dependencies into consideration, software has to provide
+ * information about the gender associated with a noun or name to
+ * MessageFormat
.
+ * Two main cases can be distinguished:
+ *
+ *
+ * - For people, natural gender information should be maintained for each person.
+ * The keywords "male", "female", "mixed" (for groups of people)
+ * and "unknown" are used.
+ *
+ *
- For nouns, grammatical gender information should be maintained for
+ * each noun and per language, e.g., in resource bundles.
+ * The keywords "masculine", "feminine", and "neuter" are commonly used,
+ * but some languages may require other keywords.
+ *
+ *
+ * The resulting keyword is provided to MessageFormat
as a
+ * parameter separate from the name or noun it's associated with. For example,
+ * to generate a message such as "Jean went to Paris", three separate arguments
+ * would be provided: The name of the person as argument 0, the gender of
+ * the person as argument 1, and the name of the city as argument 2.
+ * The sentence pattern for English, where the gender of the person has
+ * no impact on this simple sentence, would not refer to argument 1 at all:
+ *
+ * {0} went to {2}.
+ *
+ * The sentence pattern for French, where the gender of the person affects
+ * the form of the participle, uses a select format based on argument 1:
+ *
+ * {0} est {1, select, female {allu00E9;e} other {allu00E9;}} u00E0; {2}.
+ *
+ * Patterns can be nested, so that it's possible to handle interactions of
+ * number and gender where necessary. For example, if the above sentence should
+ * allow for the names of several people to be inserted, the following sentence
+ * pattern can be used (with argument 0 the list of people's names,
+ * argument 1 the number of people, argument 2 their combined gender, and
+ * argument 3 the city name):
+ *
+ * {0} {1, plural,
+ * one {est {2, select, female {allu00E9;e} other {allu00E9;}}}
+ * other {sont {2, select, female {allu00E9;es} other {allu00E9;s}}}
+ * }u00E0; {3}.
+ *
+ * Patterns and Their Interpretation
+ *
+ * The SelectFormat
pattern text defines the phrase output
+ * for each user-defined keyword.
+ * The pattern is a sequence of keyword{phrase}
+ * clauses.
+ * Each clause assigns the phrase phrase
+ * to the user-defined keyword
.
+ *
+ * Keywords must match the pattern [a-zA-Z][a-zA-Z0-9_-]*; keywords
+ * that don't match this pattern result in the error code
+ * U_ILLEGAL_CHARACTER
.
+ * You always have to define a phrase for the default keyword
+ * other
; this phrase is returned when the keyword
+ * provided to
+ * the format
method matches no other keyword.
+ * If a pattern does not provide a phrase for other
, the method
+ * it's provided to returns the error U_DEFAULT_KEYWORD_MISSING
.
+ * If a pattern provides more than one phrase for the same keyword, the
+ * error U_DUPLICATE_KEYWORD
is returned.
+ *
+ * Spaces between keyword
and
+ * {phrase}
will be ignored; spaces within
+ * {phrase}
will be preserved.
+ *
+ *
The phrase for a particular select case may contain other message
+ * format patterns. SelectFormat
preserves these so that you
+ * can use the strings produced by SelectFormat
with other
+ * formatters. If you are using SelectFormat
inside a
+ * MessageFormat
pattern, MessageFormat
will
+ * automatically evaluate the resulting format pattern.
+ * Thus, curly braces ({
, }
) are only allowed
+ * in phrases to define a nested format pattern.
+ *
+ * Example:
+ *
+ *
+ * UErrorCode status = U_ZERO_ERROR;
+ * MessageFormat *msgFmt = new MessageFormat(UnicodeString("{0} est {1, select, female {allu00E9;e} other {allu00E9;}} u00E0; Paris."), Locale("fr"), status);
+ * if (U_FAILURE(status)) {
+ * return;
+ * }
+ * FieldPosition ignore(FieldPosition::DONT_CARE);
+ * UnicodeString result;
+ *
+ * char* str1= "Kirti,female";
+ * Formattable args1[] = {"Kirti","female"};
+ * msgFmt->format(args1, 2, result, ignore, status);
+ * cout << "Input is " << str1 << " and result is: " << result << endl;
+ * delete msgFmt;
+ *
+ *
+ * Produces the output:
+ * Input is Kirti,female and result is: Kirti est allu00E9;e u00E0; Paris.
+ *
+ * @draft ICU 4.4
+ */
+
+class U_I18N_API SelectFormat : public Format {
+public:
+
+ /**
+ * Creates a new SelectFormat
.
+ * @param status output param set to success/failure code on exit, which
+ * must not indicate a failure before the function call.
+ * @draft ICU 4.4
+ */
+ SelectFormat(UErrorCode& status);
+
+ /**
+ * Creates a new SelectFormat
for a given pattern string.
+ * @param pattern the pattern for this SelectFormat
.
+ * errors are returned to status if the pattern is invalid.
+ * @param status output param set to success/failure code on exit, which
+ * must not indicate a failure before the function call.
+ * @draft ICU 4.4
+ */
+ SelectFormat(const UnicodeString& pattern, UErrorCode& status);
+
+ /**
+ * copy constructor.
+ * @draft ICU 4.4
+ */
+ SelectFormat(const SelectFormat& other);
+
+ /**
+ * Destructor.
+ * @draft ICU 4.4
+ */
+ virtual ~SelectFormat();
+
+ /**
+ * Sets the pattern used by this select format.
+ * for the keyword rules.
+ * Patterns and their interpretation are specified in the class description.
+ *
+ * @param pattern the pattern for this select format
+ * errors are returned to status if the pattern is invalid.
+ * @param status output param set to success/failure code on exit, which
+ * must not indicate a failure before the function call.
+ * @draft ICU 4.4
+ */
+ void applyPattern(const UnicodeString& pattern, UErrorCode& status);
+
+ /**
+ * Selects the phrase for the given keyword
+ *
+ * @param keyword The keyword that is used to select an alternative.
+ * @param appendTo output parameter to receive result.
+ * result is appended to existing contents.
+ * @param pos On input: an alignment field, if desired.
+ * On output: the offsets of the alignment field.
+ * @param status output param set to success/failure code on exit, which
+ * must not indicate a failure before the function call.
+ * @return Reference to 'appendTo' parameter.
+ * @draft ICU 4.4
+ */
+ UnicodeString& format(const UnicodeString& keyword,
+ UnicodeString& appendTo,
+ FieldPosition& pos,
+ UErrorCode& status) const;
+
+ /**
+ * Assignment operator
+ *
+ * @param other the SelectFormat object to copy from.
+ * @draft ICU 4.4
+ */
+ SelectFormat& operator=(const SelectFormat& other);
+
+ /**
+ * Return true if another object is semantically equal to this one.
+ *
+ * @param other the SelectFormat object to be compared with.
+ * @return true if other is semantically equal to this.
+ * @draft ICU 4.4
+ */
+ virtual UBool operator==(const Format& other) const;
+
+ /**
+ * Return true if another object is semantically unequal to this one.
+ *
+ * @param other the SelectFormat object to be compared with.
+ * @return true if other is semantically unequal to this.
+ * @draft ICU 4.4
+ */
+ virtual UBool operator!=(const Format& other) const;
+
+ /**
+ * Clones this Format object polymorphically. The caller owns the
+ * result and should delete it when done.
+ * @draft ICU 4.4
+ */
+ virtual Format* clone(void) const;
+
+ /**
+ * Format an object to produce a string.
+ * This method handles keyword strings.
+ * If the Formattable object is not a UnicodeString code>,
+ * then it returns a failing UErrorCode.
+ *
+ * @param obj A keyword string that is used to select an alternative.
+ * @param appendTo output parameter to receive result.
+ * Result is appended to existing contents.
+ * @param pos On input: an alignment field, if desired.
+ * On output: the offsets of the alignment field.
+ * @param status output param filled with success/failure status.
+ * @return Reference to 'appendTo' parameter.
+ * @draft ICU 4.4
+ */
+ UnicodeString& format(const Formattable& obj,
+ UnicodeString& appendTo,
+ FieldPosition& pos,
+ UErrorCode& status) const;
+
+ /**
+ * Returns the pattern from applyPattern() or constructor.
+ *
+ * @param appendTo output parameter to receive result.
+ * Result is appended to existing contents.
+ * @return the UnicodeString with inserted pattern.
+ * @draft ICU 4.4
+ */
+ UnicodeString& toPattern(UnicodeString& appendTo);
+
+ /**
+ * This method is not yet supported by SelectFormat
.
+ *
+ * Before calling, set parse_pos.index to the offset you want to start
+ * parsing at in the source. After calling, parse_pos.index is the end of
+ * the text you parsed. If error occurs, index is unchanged.
+ *
+ * When parsing, leading whitespace is discarded (with a successful parse),
+ * while trailing whitespace is left as is.
+ *
+ * See Format::parseObject() for more.
+ *
+ * @param source The string to be parsed into an object.
+ * @param result Formattable to be set to the parse result.
+ * If parse fails, return contents are undefined.
+ * @param parse_pos The position to start parsing at. Upon return
+ * this param is set to the position after the
+ * last character successfully parsed. If the
+ * source is not parsed successfully, this param
+ * will remain unchanged.
+ * @draft ICU 4.4
+ */
+ virtual void parseObject(const UnicodeString& source,
+ Formattable& result,
+ ParsePosition& parse_pos) const;
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ * @draft ICU 4.4
+ */
+ virtual UClassID getDynamicClassID() const;
+
+private:
+ typedef enum characterClass{
+ tStartKeyword,
+ tContinueKeyword,
+ tLeftBrace,
+ tRightBrace,
+ tSpace,
+ tOther
+ }characterClass;
+
+ UnicodeString pattern;
+ //Hash to store the keyword, phrase pairs
+ Hashtable *parsedValuesHash;
+
+ SelectFormat(); // default constructor not implemented
+ void init(UErrorCode& status);
+ //For the applyPattern , classifies char.s in one of the characterClass
+ void classifyCharacter(UChar ch, characterClass& type) const;
+ //Checks if the "other" keyword is present in pattern
+ UBool checkSufficientDefinition();
+ //Checks if the keyword passed is valid
+ UBool checkValidKeyword(const UnicodeString& argKeyword) const;
+ void parsingFailure();
+ void copyHashtable(Hashtable *other, UErrorCode& status);
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif // _SELFMT
+//eof
diff --git a/icu4c/source/tools/icuinfo/icuinfo.vcproj b/icu4c/source/tools/icuinfo/icuinfo.vcproj
index 5796474c47c..952dbe3bf0e 100644
--- a/icu4c/source/tools/icuinfo/icuinfo.vcproj
+++ b/icu4c/source/tools/icuinfo/icuinfo.vcproj
@@ -1,395 +1,395 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/icu4c/source/tools/icuinfo/icuplugins_windows_sample.txt b/icu4c/source/tools/icuinfo/icuplugins_windows_sample.txt
index 5f82c21f515..598de01228d 100644
--- a/icu4c/source/tools/icuinfo/icuplugins_windows_sample.txt
+++ b/icu4c/source/tools/icuinfo/icuplugins_windows_sample.txt
@@ -1,57 +1,57 @@
-# Copyright (C) 2009-2010 IBM Corporation and Others. All Rights Reserved.
-#
-# This is a sample ICU Plugins control file for Windows.
-# It's also an example control file for any platform.
-#
-# This file can be copied to, for example, C:\SOMEDIRECTORY\icuplugins##.txt
-# where ## is the major and minor ICU versions (i.e. just 96 for version 9.6.3)
-# and C:\SOMEDIRECTORY is any directory.
-#
-# Then, set the variable ICU_PLUGINS to C:\SOMEDIRECTORY
-#
-# Then, ICU will load the test plugin from either the debug or non-debug
-# plugin DLL (depending on whether ICU is in debug or non-debug state).
-#
-# To see the results, run the command "icuinfo -v -L"
-#
-# The format of this file is pretty simple.
-# These lines are comments.
-#
-# Non-comment lines have two or three elements in them, and look like this:
-#
-# LIBRARYNAME ENTRYPOINT [ CONFIGURATION .. ]
-#
-# Tabs or spaces separate the three items.
-#
-# LIBRARYNAME is the name of a shared library, either a short name if it is on the PATH,
-# or a full pathname.
-#
-# ENTRYPOINT is the short (undecorated) symbol name of the plugin's entrypoint.
-# see unicode/icuplug.h for information.
-#
-# CONFIGURATION is the entire rest of the line. It's passed as-is to the plugin.
-#
-#
-# This sample file tries to load 'myPlugin'.
-# It is in the testplug project. (You will need to rebuild either the debug or release version of this DLL.)
-# The configuration string isn't used, but is just an example
-
-## A high level test plugin that does nothing.
-testplug.dll myPlugin hello=world
-
-## A "bad" plugin that is low level but performs a malloc.
-## Sometimes this is desired, but, note that it may cause
-## later plugins to fail to load.
-#testplug.dll myPluginBad hello=world
-
-## A "high-level" plugin that does nothing.
-## It will be loaded after the low level plugins.
-#testplug.dll myPluginHigh
-
-## A "low-level" plugin that does nothing.
-## It will be loaded before the high level plugins.
-#testplug.dll myPluginLow
-
-## A low level plugin that just prints a message when uprv_malloc and related functions are called
-## Note, it cannot be unloaded.
-#testplug.dll debugMemoryPlugin
+# Copyright (C) 2009-2010 IBM Corporation and Others. All Rights Reserved.
+#
+# This is a sample ICU Plugins control file for Windows.
+# It's also an example control file for any platform.
+#
+# This file can be copied to, for example, C:\SOMEDIRECTORY\icuplugins##.txt
+# where ## is the major and minor ICU versions (i.e. just 96 for version 9.6.3)
+# and C:\SOMEDIRECTORY is any directory.
+#
+# Then, set the variable ICU_PLUGINS to C:\SOMEDIRECTORY
+#
+# Then, ICU will load the test plugin from either the debug or non-debug
+# plugin DLL (depending on whether ICU is in debug or non-debug state).
+#
+# To see the results, run the command "icuinfo -v -L"
+#
+# The format of this file is pretty simple.
+# These lines are comments.
+#
+# Non-comment lines have two or three elements in them, and look like this:
+#
+# LIBRARYNAME ENTRYPOINT [ CONFIGURATION .. ]
+#
+# Tabs or spaces separate the three items.
+#
+# LIBRARYNAME is the name of a shared library, either a short name if it is on the PATH,
+# or a full pathname.
+#
+# ENTRYPOINT is the short (undecorated) symbol name of the plugin's entrypoint.
+# see unicode/icuplug.h for information.
+#
+# CONFIGURATION is the entire rest of the line. It's passed as-is to the plugin.
+#
+#
+# This sample file tries to load 'myPlugin'.
+# It is in the testplug project. (You will need to rebuild either the debug or release version of this DLL.)
+# The configuration string isn't used, but is just an example
+
+## A high level test plugin that does nothing.
+testplug.dll myPlugin hello=world
+
+## A "bad" plugin that is low level but performs a malloc.
+## Sometimes this is desired, but, note that it may cause
+## later plugins to fail to load.
+#testplug.dll myPluginBad hello=world
+
+## A "high-level" plugin that does nothing.
+## It will be loaded after the low level plugins.
+#testplug.dll myPluginHigh
+
+## A "low-level" plugin that does nothing.
+## It will be loaded before the high level plugins.
+#testplug.dll myPluginLow
+
+## A low level plugin that just prints a message when uprv_malloc and related functions are called
+## Note, it cannot be unloaded.
+#testplug.dll debugMemoryPlugin
diff --git a/icu4c/source/tools/icuinfo/testplug.vcproj b/icu4c/source/tools/icuinfo/testplug.vcproj
index 64e4374abd0..2c70b0f6378 100644
--- a/icu4c/source/tools/icuinfo/testplug.vcproj
+++ b/icu4c/source/tools/icuinfo/testplug.vcproj
@@ -1,424 +1,424 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+