From eb31068f2a79799ba24eb1490c2e88432c151687 Mon Sep 17 00:00:00 2001 From: Doug Felt Date: Fri, 24 Sep 2004 22:58:00 +0000 Subject: [PATCH] ICU-3634 localized RBNF rule set names - text description X-SVN-Rev: 16357 --- icu4c/source/i18n/rbnf.cpp | 776 +++++++++++++++++++++++++- icu4c/source/i18n/unicode/rbnf.h | 183 +++++- icu4c/source/test/intltest/itrbnf.cpp | 169 +++++- icu4c/source/test/intltest/itrbnf.h | 5 + 4 files changed, 1085 insertions(+), 48 deletions(-) diff --git a/icu4c/source/i18n/rbnf.cpp b/icu4c/source/i18n/rbnf.cpp index 54c07dd4bc5..d0c2217c6a4 100644 --- a/icu4c/source/i18n/rbnf.cpp +++ b/icu4c/source/i18n/rbnf.cpp @@ -24,6 +24,13 @@ #include "cstring.h" #include "uprops.h" +// debugging +// #define DEBUG + +#ifdef DEBUG +#include "stdio.h" +#endif + #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf" static const UChar gPercentPercent[] = @@ -51,7 +58,574 @@ U_NAMESPACE_BEGIN UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat) -RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, const Locale& alocale, UParseError& perror, UErrorCode& status) +class LocalizationInfo : public UObject { +protected: + virtual ~LocalizationInfo() {}; + uint32_t refcount; + +public: + LocalizationInfo() : refcount(0) {} + + LocalizationInfo* ref(void) { + ++refcount; + return this; + } + + LocalizationInfo* unref(void) { + if (refcount && --refcount == 0) { + delete this; + } + return NULL; + } + + virtual UBool operator==(const LocalizationInfo* rhs) const; + inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); } + + virtual int32_t getNumberOfRuleSets(void) const = 0; + virtual const UChar* getRuleSetName(int32_t index) const = 0; + virtual int32_t getNumberOfDisplayLocales(void) const = 0; + virtual const UChar* getLocaleName(int32_t index) const = 0; + virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0; + + virtual int32_t indexForLocale(const UChar* locale) const; + virtual int32_t indexForRuleSet(const UChar* ruleset) const; + + virtual UClassID getDynamicClassID() const = 0; + static UClassID getStaticClassID(void); +}; + +UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo) + +// if both strings are NULL, this returns TRUE +UBool +streq(const UChar* lhs, const UChar* rhs) { + if (rhs == lhs) { + return TRUE; + } + if (lhs && rhs) { + return u_strcmp(lhs, rhs) == 0; + } + return FALSE; +} + +UBool +LocalizationInfo::operator==(const LocalizationInfo* rhs) const { + if (rhs) { + if (this == rhs) { + return TRUE; + } + + int32_t rsc = getNumberOfRuleSets(); + if (rsc == rhs->getNumberOfRuleSets()) { + for (int i = 0; i < rsc; ++i) { + if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) { + return FALSE; + } + } + int32_t dlc = getNumberOfDisplayLocales(); + if (dlc == rhs->getNumberOfDisplayLocales()) { + for (int i = 0; i < dlc; ++i) { + const UChar* locale = getLocaleName(i); + int32_t ix = rhs->indexForLocale(locale); + // if no locale, ix is -1, getLocaleName returns null, so streq returns false + if (!streq(locale, rhs->getLocaleName(ix))) { + return FALSE; + } + for (int j = 0; j < rsc; ++j) { + if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) { + return FALSE; + } + } + } + return TRUE; + } + } + } + return FALSE; +} + +int32_t +LocalizationInfo::indexForLocale(const UChar* locale) const { + for (int i = 0; i < getNumberOfDisplayLocales(); ++i) { + if (streq(locale, getLocaleName(i))) { + return i; + } + } + return -1; +} + +int32_t +LocalizationInfo::indexForRuleSet(const UChar* ruleset) const { + if (ruleset) { + for (int i = 0; i < getNumberOfRuleSets(); ++i) { + if (streq(ruleset, getRuleSetName(i))) { + return i; + } + } + } + return -1; +} + + +typedef void (*Fn_Deleter)(void*); + +class VArray { + void** buf; + int32_t cap; + int32_t size; + Fn_Deleter deleter; +public: + VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {} + + VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {} + + ~VArray() { + if (deleter) { + for (int i = 0; i < size; ++i) { + (*deleter)(buf[i]); + } + } + uprv_free(buf); + } + + int32_t length() { + return size; + } + + void add(void* elem, UErrorCode& status) { + if (U_SUCCESS(status)) { + if (size == cap) { + if (cap == 0) { + cap = 1; + } else if (cap < 256) { + cap *= 2; + } else { + cap += 256; + } + if (buf == NULL) { + buf = (void**)uprv_malloc(cap * sizeof(void*)); + } else { + buf = (void**)uprv_realloc(buf, cap * sizeof(void*)); + } + if (buf == NULL) { + // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_memset(buf + cap / 2, 0, cap / 2 * sizeof(void*)); + } + buf[size++] = elem; + } + } + + void** release(void) { + void** result = buf; + buf = NULL; + cap = 0; + size = 0; + return result; + } +}; + +class LocDataParser; + +class StringLocalizationInfo : public LocalizationInfo { + UChar* info; + UChar*** data; + int32_t numRuleSets; + int32_t numLocales; + +friend class LocDataParser; + + StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs) + : info(i), data(d), numRuleSets(numRS), numLocales(numLocs) + { + } + +public: + static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status); + + virtual ~StringLocalizationInfo(); + virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; } + virtual const UChar* getRuleSetName(int32_t index) const; + virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; } + virtual const UChar* getLocaleName(int32_t index) const; + virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const; + + virtual UClassID getDynamicClassID() const; + static UClassID getStaticClassID(void); + +private: + void init(UErrorCode& status) const; +}; + + +/** + * Utility for parsing a localization string and returning a StringLocalizationInfo*. + */ +class LocDataParser { + UChar* data; + const UChar* e; + UChar* p; + UChar ch; + UParseError& pe; + UErrorCode& ec; + +public: + LocDataParser(UParseError& parseError, UErrorCode& status) + : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {} + ~LocDataParser() {} + + /* + * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status, + * and return NULL. The StringLocalizationInfo will adopt locData if it is created. + */ + StringLocalizationInfo* parse(UChar* data, int32_t len); + +private: + enum { + OPEN_ANGLE = 0x003c, /* '<' */ + CLOSE_ANGLE = 0x003e, /* '>' */ + COMMA = 0x002c, + TICK = 0x0027, + QUOTE = 0x0022, + SPACE = 0x0020, + }; + + void inc(void) { ++p; ch = 0xffff; } + UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; } + UBool check(UChar c) { return p < e && (ch == c || *p == c); } + void skipWhitespace(void) { while (p < e && uprv_isRuleWhiteSpace(ch != 0xffff ? ch : *p)) inc();} + UBool inList(UChar c, const UChar* list) const { + if (*list == SPACE && uprv_isRuleWhiteSpace(c)) return TRUE; + while (*list && *list != c) ++list; return *list == c; + } + void parseError(const char* msg); + + StringLocalizationInfo* doParse(void); + + UChar** nextArray(int32_t& requiredLength); + UChar* nextString(void); + + static UChar DQUOTE_STOPLIST[]; + static UChar SQUOTE_STOPLIST[]; + static UChar NOQUOTE_STOPLIST[]; +}; + +#ifdef DEBUG +#define ERROR(msg) parseError(msg); return NULL; +#else +#define ERROR(msg) parseError(NULL); return NULL; +#endif + + +UChar LocDataParser::DQUOTE_STOPLIST[] = { + LocDataParser::QUOTE, NULL +}; + +UChar LocDataParser::SQUOTE_STOPLIST[] = { + LocDataParser::TICK, NULL +}; + +UChar LocDataParser::NOQUOTE_STOPLIST[] = { + LocDataParser::SPACE, LocDataParser::COMMA, LocDataParser::CLOSE_ANGLE, LocDataParser::OPEN_ANGLE, + LocDataParser::TICK, LocDataParser::QUOTE, NULL +}; + +void +DeleteFn(void* p) { + uprv_free(p); +} + +StringLocalizationInfo* +LocDataParser::parse(UChar* _data, int32_t len) { + if (U_FAILURE(ec)) { + if (_data) uprv_free(_data); + return NULL; + } + + pe.line = 0; + pe.offset = -1; + pe.postContext[0] = 0; + pe.preContext[0] = 0; + + if (_data == NULL) { + ec = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + if (len <= 0) { + ec = U_ILLEGAL_ARGUMENT_ERROR; + uprv_free(_data); + return NULL; + } + + data = _data; + e = data + len; + p = _data; + ch = 0xffff; + + return doParse(); +} + + +StringLocalizationInfo* +LocDataParser::doParse(void) { + skipWhitespace(); + if (!checkInc(OPEN_ANGLE)) { + ERROR("Missing open angle"); + } else { + VArray array(DeleteFn); + UBool mightHaveNext = TRUE; + int32_t requiredLength = -1; + while (mightHaveNext) { + mightHaveNext = FALSE; + UChar** elem = nextArray(requiredLength); + skipWhitespace(); + UBool haveComma = check(COMMA); + if (elem) { + array.add(elem, ec); + if (haveComma) { + inc(); + mightHaveNext = TRUE; + } + } else if (haveComma) { + ERROR("Unexpected character"); + } + } + + skipWhitespace(); + if (!checkInc(CLOSE_ANGLE)) { + if (check(OPEN_ANGLE)) { + ERROR("Missing comma in outer array"); + } else { + ERROR("Missing close angle bracket in outer array"); + } + } + + skipWhitespace(); + if (p != e) { + ERROR("Extra text after close of localization data"); + } + + array.add(NULL, ec); + if (U_SUCCESS(ec)) { + int32_t numLocs = array.length() - 2; // subtract first, NULL + UChar*** result = (UChar***)array.release(); + + return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL + } + } + + ERROR("Unknown error"); +} + +UChar** +LocDataParser::nextArray(int32_t& requiredLength) { + if (U_FAILURE(ec)) { + return NULL; + } + + skipWhitespace(); + if (!checkInc(OPEN_ANGLE)) { + ERROR("Missing open angle"); + } + + VArray array; + UBool mightHaveNext = TRUE; + while (mightHaveNext) { + mightHaveNext = FALSE; + UChar* elem = nextString(); + skipWhitespace(); + UBool haveComma = check(COMMA); + if (elem) { + array.add(elem, ec); + if (haveComma) { + inc(); + mightHaveNext = TRUE; + } + } else if (haveComma) { + ERROR("Unexpected comma"); + } + } + skipWhitespace(); + if (!checkInc(CLOSE_ANGLE)) { + if (check(OPEN_ANGLE)) { + ERROR("Missing close angle bracket in inner array"); + } else { + ERROR("Missing comma in inner array"); + } + } + + array.add(NULL, ec); + if (U_SUCCESS(ec)) { + if (requiredLength == -1) { + requiredLength = array.length() + 1; + } else if (array.length() != requiredLength) { + ec = U_ILLEGAL_ARGUMENT_ERROR; + ERROR("Array not of required length"); + } + + return (UChar**)array.release(); + } + ERROR("Unknown Error"); +} + +UChar* +LocDataParser::nextString() { + UChar* result = NULL; + + skipWhitespace(); + if (p < e) { + const UChar* terminators; + UChar c = *p; + UBool haveQuote = c == QUOTE || c == TICK; + if (haveQuote) { + inc(); + terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST; + } else { + terminators = NOQUOTE_STOPLIST; + } + UChar* start = p; + while (p < e && !inList(*p, terminators)) ++p; + if (p == e) { + ERROR("Unexpected end of data"); + } + + UChar x = *p; + if (p > start) { + ch = x; + *p = 0x0; // terminate by writing to data + result = start; // just point into data + } + if (haveQuote) { + if (x != c) { + ERROR("Missing matching quote"); + } else if (p == start) { + ERROR("Empty string"); + } + inc(); + } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) { + ERROR("Unexpected character in string"); + } + } + + // ok for there to be no next string + return result; +} + +void +LocDataParser::parseError(const char* str) { + if (!data) { + return; + } + + const UChar* start = p - U_PARSE_CONTEXT_LEN - 1; + if (start < data) start = data; + for (UChar* x = p; --x >= start;) if (!*x) { start = x+1; break; } + const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1; + if (limit > e) limit = e; + u_strncpy(pe.preContext, start, p-start); + pe.preContext[p-start] = 0; + u_strncpy(pe.postContext, p, limit-p); + pe.postContext[limit-p] = 0; + pe.offset = p - data; + +#ifdef DEBUG + fprintf(stderr, "%s at or near character %d: ", str, p-data); + + UnicodeString msg; + msg.append(start, p - start); + msg.append((UChar)0x002f); /* SOLIDUS/SLASH */ + msg.append(p, limit-p); + msg.append("'"); + + char buf[128]; + int32_t len = msg.extract(0, msg.length(), buf, 128); + if (len >= 128) { + buf[127] = 0; + } else { + buf[len] = 0; + } + fprintf(stderr, "%s\n", buf); + fflush(stderr); +#endif + + uprv_free(data); + data = NULL; + + if (U_SUCCESS(ec)) { + ec = U_PARSE_ERROR; + } +} + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo) + +StringLocalizationInfo* +StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) { + if (U_FAILURE(status)) { + return NULL; + } + + int32_t len = info.length(); + if (len == 0) { + return NULL; // no error; + } + + UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar)); + if (!p) { + status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + info.extract(p, len, status); + if (!U_FAILURE(status)) { + status = U_ZERO_ERROR; // clear warning about non-termination + } + + LocDataParser parser(perror, status); + return parser.parse(p, len); +} + +StringLocalizationInfo::~StringLocalizationInfo() { + for (UChar*** p = (UChar***)data; *p; ++p) { + // remaining data is simply pointer into our unicode string data. + if (*p) uprv_free(*p); + } + if (data) uprv_free(data); + if (info) uprv_free(info); +} + + +const UChar* +StringLocalizationInfo::getRuleSetName(int32_t index) const { + if (index >= 0 && index < getNumberOfRuleSets()) { + return data[0][index]; + } + return NULL; +} + +const UChar* +StringLocalizationInfo::getLocaleName(int32_t index) const { + if (index >= 0 && index < getNumberOfDisplayLocales()) { + return data[index+1][0]; + } + return NULL; +} + +const UChar* +StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const { + if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() && + ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) { + return data[localeIndex+1][ruleIndex+1]; + } + return NULL; +} + +// ---------- + +RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, + const UnicodeString& locs, + const Locale& alocale, UParseError& perror, UErrorCode& status) : ruleSets(NULL) , defaultRuleSet(NULL) , locale(alocale) @@ -59,8 +633,41 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, c , decimalFormatSymbols(NULL) , lenient(FALSE) , lenientParseRules(NULL) + , localizations(NULL) { - init(description, perror, status); + LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); + init(description, locinfo, perror, status); +} + +RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, + const UnicodeString& locs, + UParseError& perror, UErrorCode& status) + : ruleSets(NULL) + , defaultRuleSet(NULL) + , locale(Locale::getDefault()) + , collator(NULL) + , decimalFormatSymbols(NULL) + , lenient(FALSE) + , lenientParseRules(NULL) + , localizations(NULL) +{ + LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status); + init(description, locinfo, perror, status); +} + +RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, + LocalizationInfo* info, + const Locale& alocale, UParseError& perror, UErrorCode& status) + : ruleSets(NULL) + , defaultRuleSet(NULL) + , locale(alocale) + , collator(NULL) + , decimalFormatSymbols(NULL) + , lenient(FALSE) + , lenientParseRules(NULL) + , localizations(NULL) +{ + init(description, info, perror, status); } RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status) @@ -71,6 +678,7 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& , decimalFormatSymbols(NULL) , lenient(FALSE) , lenientParseRules(NULL) + , localizations(NULL) { if (U_FAILURE(status)) { return; @@ -84,21 +692,19 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& default: status = U_ILLEGAL_ARGUMENT_ERROR; return; } - // the following didn't work for aliased resources, but Vladimir supposedly fixed it... - // const UChar* description = ures_getStringByKey(nfrb, fmt_tag, &len, &status); + // TODO: read localization info from resource + LocalizationInfo* locinfo = NULL; + int32_t len = 0; UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status); - // UResourceBundle* yuck = ures_getByKey(nfrb, fmt_tag, NULL, &status); - // const UChar* description = ures_getString(yuck, &len, &status); if (U_SUCCESS(status)) { setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status), ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status)); const UChar* description = ures_getStringByKey(nfrb, fmt_tag, &len, &status); UnicodeString desc(description, len); UParseError perror; - init (desc, perror, status); + init (desc, locinfo, perror, status); } - // ures_close(yuck); ures_close(nfrb); } @@ -111,20 +717,25 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs) , decimalFormatSymbols(NULL) , lenient(FALSE) , lenientParseRules(NULL) + , localizations(NULL) { this->operator=(rhs); } + // -------- + RuleBasedNumberFormat& RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs) { UErrorCode status = U_ZERO_ERROR; dispose(); locale = rhs.locale; + lenient = rhs.lenient; + UnicodeString rules = rhs.getRules(); UParseError perror; - init(rules, perror, status); - lenient = rhs.lenient; + init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status); + return *this; } @@ -140,7 +751,7 @@ RuleBasedNumberFormat::clone(void) const UnicodeString rules = getRules(); UErrorCode status = U_ZERO_ERROR; UParseError perror; - result = new RuleBasedNumberFormat(rules, locale, perror, status); + result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status); /* test for NULL */ if (result == 0) { status = U_MEMORY_ALLOCATION_ERROR; @@ -165,13 +776,18 @@ RuleBasedNumberFormat::operator==(const Format& other) const if (other.getDynamicClassID() == getStaticClassID()) { const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other; if (locale == rhs.locale && - lenient == rhs.lenient) { + lenient == rhs.lenient && + (localizations == NULL + ? rhs.localizations == NULL + : (rhs.localizations == NULL + ? FALSE + : *localizations == rhs.localizations))) { + NFRuleSet** p = ruleSets; NFRuleSet** q = rhs.ruleSets; - if ((p == NULL) != (q == NULL)) { - return TRUE; - } if (p == NULL) { + return q == NULL; + } else if (q == NULL) { return FALSE; } while (*p && *q && (**p == **q)) { @@ -200,8 +816,11 @@ RuleBasedNumberFormat::getRules() const UnicodeString RuleBasedNumberFormat::getRuleSetName(int32_t index) const { - UnicodeString result; - if (ruleSets) { + if (localizations) { + UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1); + return string; + } else if (ruleSets) { + UnicodeString result; for (NFRuleSet** p = ruleSets; *p; ++p) { NFRuleSet* rs = *p; if (rs->isPublic()) { @@ -212,14 +831,17 @@ RuleBasedNumberFormat::getRuleSetName(int32_t index) const } } } - return result; + UnicodeString empty; + return empty; } int32_t RuleBasedNumberFormat::getNumberOfRuleSetNames() const { int32_t result = 0; - if (ruleSets) { + if (localizations) { + result = localizations->getNumberOfRuleSets(); + } else if (ruleSets) { for (NFRuleSet** p = ruleSets; *p; ++p) { if ((**p).isPublic()) { ++result; @@ -229,6 +851,74 @@ RuleBasedNumberFormat::getNumberOfRuleSetNames() const return result; } +int32_t +RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const { + if (localizations) { + return localizations->getNumberOfDisplayLocales(); + } + return 0; +} + +Locale +RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const { + if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) { + UnicodeString name(TRUE, localizations->getLocaleName(index), -1); + char buffer[64]; + int32_t cap = name.length() + 1; + char* bp = buffer; + if (cap > 64) { + bp = new char[cap]; + } + int32_t len = name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant); + Locale locale(bp); + if (bp != buffer) { + delete[] bp; + } + return locale; + } + status = U_ILLEGAL_ARGUMENT_ERROR; + Locale locale; + return locale; +} + +UnicodeString +RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& locale) { + if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) { + UnicodeString localeName(locale.getBaseName(), -1, UnicodeString::kInvariant); + int32_t len = localeName.length(); + UChar* locale = localeName.getBuffer(len + 1); + while (len >= 0) { + locale[len] = 0; + int32_t ix = localizations->indexForLocale(locale); + if (ix >= 0) { + UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1); + return name; + } + + // trim trailing portion, skipping over ommitted sections + do { --len;} while (len > 0 && locale[len] != 0x005f); // underscore + while (len > 0 && locale[len-1] == 0x005F) --len; + } + UnicodeString name(TRUE, localizations->getRuleSetName(index), -1); + return name; + } + UnicodeString bogus; + bogus.setToBogus(); + return bogus; +} + +UnicodeString +RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& locale) { + if (localizations) { + UnicodeString rsn(ruleSetName); + int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer()); + return getRuleSetDisplayName(ix, locale); + } + UnicodeString bogus; + bogus.setToBogus(); + return bogus; +} + NFRuleSet* RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const { @@ -415,7 +1105,12 @@ void RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) { if (U_SUCCESS(status)) { if (ruleSetName.isEmpty()) { + if (localizations) { + UnicodeString name(TRUE, localizations->getRuleSetName(0), -1); + defaultRuleSet = findRuleSet(name, status); + } else { initDefaultRuleSet(); + } } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) { status = U_ILLEGAL_ARGUMENT_ERROR; } else { @@ -441,10 +1136,12 @@ RuleBasedNumberFormat::getDefaultRuleSetName() const { void RuleBasedNumberFormat::initDefaultRuleSet() { + defaultRuleSet = NULL; if (!ruleSets) { - defaultRuleSet = NULL; + return; } - NFRuleSet**p = &ruleSets[1]; + + NFRuleSet**p = &ruleSets[0]; while (*p) { ++p; } @@ -462,7 +1159,8 @@ RuleBasedNumberFormat::initDefaultRuleSet() void -RuleBasedNumberFormat::init(const UnicodeString& rules, UParseError& /* pErr */, UErrorCode& status) +RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizations, + UParseError& /* pErr */, UErrorCode& status) { // TODO: implement UParseError // Note: this can leave ruleSets == NULL, so remaining code should check @@ -470,6 +1168,8 @@ RuleBasedNumberFormat::init(const UnicodeString& rules, UParseError& /* pErr */, return; } + this->localizations = localizations == NULL ? NULL : localizations->ref(); + UnicodeString description(rules); if (!description.length()) { status = U_MEMORY_ALLOCATION_ERROR; @@ -588,7 +1288,11 @@ RuleBasedNumberFormat::init(const UnicodeString& rules, UParseError& /* pErr */, // rather than the first so that a user can create a new formatter // from an existing formatter and change its default behavior just // by appending more rule sets to the end) - initDefaultRuleSet(); + + // {dlf} Initialization of a fraction rule set requires the default rule + // set to be known. For purposes of initialization, this is always the + // last public rule set, no matter what the localization data says. + initDefaultRuleSet(); // finally, we can go back through the temporary descriptions // list and finish seting up the substructure (and we throw @@ -600,6 +1304,30 @@ RuleBasedNumberFormat::init(const UnicodeString& rules, UParseError& /* pErr */, } delete[] ruleSetDescriptions; + + // Now that the rules are initialized, the 'real' default rule + // set can be adjusted by the localization data. + + // The C code keeps the localization array as is, rather than building + // a separate array of the public rule set names, so we have less work + // to do here-- but we still need to check the names. + + if (localizations) { + // confirm the names, if any aren't in the rules, that's an error + // it is ok if the rules contain public rule sets that are not in this list + for (int32_t i = 0; i < localizations->getNumberOfRuleSets(); ++i) { + UnicodeString name(TRUE, localizations->getRuleSetName(i), -1); + NFRuleSet* rs = findRuleSet(name, status); + if (rs == NULL) { + break; // error + } + if (i == 0) { + defaultRuleSet = rs; + } + } + } else { + defaultRuleSet = getDefaultRuleSet(); + } } void @@ -664,6 +1392,8 @@ RuleBasedNumberFormat::dispose() delete lenientParseRules; lenientParseRules = NULL; + + if (localizations) localizations = localizations->unref(); } diff --git a/icu4c/source/i18n/unicode/rbnf.h b/icu4c/source/i18n/unicode/rbnf.h index a64f3550165..6d809f6dd00 100644 --- a/icu4c/source/i18n/unicode/rbnf.h +++ b/icu4c/source/i18n/unicode/rbnf.h @@ -28,10 +28,12 @@ #include "unicode/locid.h" #include "unicode/numfmt.h" #include "unicode/unistr.h" +#include "unicode/strenum.h" U_NAMESPACE_BEGIN class NFRuleSet; +class LocalizationInfo; /** * Tags for the predefined rulesets. @@ -473,6 +475,24 @@ class Collator; * subclasses, such code will not necessarily work and will not be * guaranteed to work stably from release to release. * + *

Localizations

+ *

Constructors are available that allow the specification of localizations for the + * public rule sets (and also allow more control over what public rule sets are available). + * Localization data is represented as a textual description. The description represents + * an array of arrays of string. The first element is an array of the public rule set names, + * each of these must be one of the public rule set names that appear in the rules. Only + * names in this array will be treated as public rule set names by the API. Each subsequent + * element is an array of localizations of these names. The first element of one of these + * subarrays is the locale name, and the remaining elements are localizations of the + * public rule set names, in the same order as they were listed in the first arrray.

+ *

In the syntax, angle brackets '<', '>' are used to delimit the arrays, and comma ',' is used + * to separate elements of an array. Whitespace is ignored, unless quoted.

+ *

For example:

+ * < < %foo, %bar, %baz >, + * < en, Foo, Bar, Baz >, + * < fr, 'le Foo', 'le Bar', 'le Baz' > + * < zh, \u7532, \u4e59, \u4e19 > > + *

* @author Richard Gillam * @see NumberFormat * @see DecimalFormat @@ -485,6 +505,44 @@ public: // constructors //----------------------------------------------------------------------- + /** + * Creates a RuleBasedNumberFormat that behaves according to the description + * passed in. The formatter uses the default locale. + * @param rules A description of the formatter's desired behavior. + * See the class documentation for a complete explanation of the description + * syntax. + * @param perror The parse error if an error was encountered. + * @param status The status indicating whether the constructor succeeded. + * @draft ICU 3.2 + */ + RuleBasedNumberFormat(const UnicodeString& rules, UParseError& perror, UErrorCode& status); + + /** + * Creates a RuleBasedNumberFormat that behaves according to the description + * passed in. The formatter uses the default locale. + *

+ * The localizations data provides information about the public + * rule sets and their localized display names for different + * locales. The first element in the list is an array of the names + * of the public rule sets. The first element in this array is + * the initial default ruleset. The remaining elements in the + * list are arrays of localizations of the names of the public + * rule sets. Each of these is one longer than the initial array, + * with the first String being the ULocale ID, and the remaining + * Strings being the localizations of the rule set names, in the + * same order as the initial array. Arrays are NULL-terminated. + * @param description A description of the formatter's desired behavior. + * See the class documentation for a complete explanation of the description + * syntax. + * @param localizations the localization information. + * names in the description. These will be copied by the constructor. + * @param perror The parse error if an error was encountered. + * @param status The status indicating whether the constructor succeeded. + * @draft ICU 3.2 + */ + RuleBasedNumberFormat(const UnicodeString& rules, const UnicodeString& localizations, + UParseError& perror, UErrorCode& status); + /** * Creates a RuleBasedNumberFormat that behaves according to the rules * passed in. The formatter uses the specified locale to determine the @@ -493,8 +551,8 @@ public: * @param rules The formatter rules. * See the class documentation for a complete explanation of the rule * syntax. - * @param locale A locale, that governs which characters are used for - * formatting values in numerals, and which characters are equivalent in + * @param locale A locale that governs which characters are used for + * formatting values in numerals and which characters are equivalent in * lenient parsing. * @param perror The parse error if an error was encountered. * @param status The status indicating whether the constructor succeeded. @@ -503,6 +561,35 @@ public: RuleBasedNumberFormat(const UnicodeString& rules, const Locale& locale, UParseError& perror, UErrorCode& status); + /** + * Creates a RuleBasedNumberFormat that behaves according to the description + * passed in. The formatter uses the default locale. + *

+ * The localizations data provides information about the public + * rule sets and their localized display names for different + * locales. The first element in the list is an array of the names + * of the public rule sets. The first element in this array is + * the initial default ruleset. The remaining elements in the + * list are arrays of localizations of the names of the public + * rule sets. Each of these is one longer than the initial array, + * with the first String being the ULocale ID, and the remaining + * Strings being the localizations of the rule set names, in the + * same order as the initial array. Arrays are NULL-terminated. + * @param rules A description of the formatter's desired behavior. + * See the class documentation for a complete explanation of the description + * syntax. + * @param localizations a list of localizations for the rule set + * names in the description. These will be copied by the constructor. + * @param locale A locale that governs which characters are used for + * formatting values in numerals and which characters are equivalent in + * lenient parsing. + * @param perror The parse error if an error was encountered. + * @param status The status indicating whether the constructor succeeded. + * @draft ICU 3.2 + */ + RuleBasedNumberFormat(const UnicodeString& rules, const UnicodeString& localizations, + const Locale& locale, UParseError& perror, UErrorCode& status); + /** * Creates a RuleBasedNumberFormat from a predefined ruleset. The selector * code choosed among three possible predefined formats: spellout, ordinal, @@ -570,6 +657,13 @@ public: */ virtual UnicodeString getRules() const; + /** + * Return the number of public rule set names. + * @return the number of public rule set names. + * @stable ICU 2.0 + */ + virtual int32_t getNumberOfRuleSetNames() const; + /** * Return the name of the index'th public ruleSet. If index is not valid, * the function returns null. @@ -580,11 +674,47 @@ public: virtual UnicodeString getRuleSetName(int32_t index) const; /** - * Return the number of public rule set names. - * @return the number of public rule set names. - * @stable ICU 2.0 + * Return the number of locales for which we have localized rule set display names. + * @return the number of locales for which we have localized rule set display names. + * @draft ICU 3.2 */ - virtual int32_t getNumberOfRuleSetNames() const; + virtual int32_t getNumberOfRuleSetDisplayNameLocales(void) const; + + /** + * Return the index'th display name locale. + * @param index the index of the locale + * @param return the locale + * @see #getNumberOfRuleSetDisplayNameLocales + * @draft ICU 3.2 + */ + virtual Locale getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const; + + /** + * Return the rule set display names for the provided locale. These are in the same order + * as those returned by getRuleSetName. The locale is matched against the locales for + * which there is display name data, using normal fallback rules. If no locale matches, + * the default display names are returned. (These are the internal rule set names minus + * the leading '%'.) + * @param index the index of the rule set + * @param locale the locale (returned by getRuleSetDisplayNameLocales) for which the localized + * display name is desired + * @return the display name for the given index, which might be bogus if there is an error + * @see #getRuleSetName + * @draft ICU 3.2 + */ + virtual UnicodeString getRuleSetDisplayName(int32_t index, + const Locale& locale = Locale::getDefault()); + + /** + * Return the rule set display name for the provided rule set and locale. + * The locale is matched against the locales for which there is display name data, using + * normal fallback rules. If no locale matches, the default display name is returned. + * @return the display name for the rule set + * @draft ICU 3.2 + * @see #getRuleSetDisplayNames + */ + virtual UnicodeString getRuleSetDisplayName(const UnicodeString& ruleSetName, + const Locale& locale = Locale::getDefault()); /** * Formats the specified 32-bit number using the default ruleset. @@ -829,7 +959,12 @@ public: private: RuleBasedNumberFormat(); // default constructor not implemented - void init(const UnicodeString& rules, UParseError& perror, UErrorCode& status); + // this will ref the localizations if they are not NULL + // caller must deref to get adoption + RuleBasedNumberFormat(const UnicodeString& description, LocalizationInfo* localizations, + const Locale& locale, UParseError& perror, UErrorCode& status); + + void init(const UnicodeString& rules, LocalizationInfo* localizations, UParseError& perror, UErrorCode& status); void dispose(); void stripWhitespace(UnicodeString& src); void initDefaultRuleSet(); @@ -853,10 +988,44 @@ private: DecimalFormatSymbols* decimalFormatSymbols; UBool lenient; UnicodeString* lenientParseRules; + LocalizationInfo* localizations; }; // --------------- +inline +RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, + UParseError& perror, + UErrorCode& status) + : ruleSets(NULL) + , defaultRuleSet(NULL) + , locale(Locale::getDefault()) + , collator(NULL) + , decimalFormatSymbols(NULL) + , lenient(FALSE) + , lenientParseRules(NULL) + , localizations(NULL) +{ + init(description, NULL, perror, status); +} + +inline +RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, + const Locale& aLocale, + UParseError& perror, + UErrorCode& status) + : ruleSets(NULL) + , defaultRuleSet(NULL) + , locale(aLocale) + , collator(NULL) + , decimalFormatSymbols(NULL) + , lenient(FALSE) + , lenientParseRules(NULL) + , localizations(NULL) +{ + init(description, NULL, perror, status); +} + inline UnicodeString& RuleBasedNumberFormat::format(const Formattable& obj, UnicodeString& result, diff --git a/icu4c/source/test/intltest/itrbnf.cpp b/icu4c/source/test/intltest/itrbnf.cpp index e42f9bc1cbd..570a464c346 100644 --- a/icu4c/source/test/intltest/itrbnf.cpp +++ b/icu4c/source/test/intltest/itrbnf.cpp @@ -59,6 +59,7 @@ void IntlTestRBNF::runIndexedTest(int32_t index, UBool exec, const char* &name, TESTCASE(11, TestSwedishSpellout); TESTCASE(12, TestBelgianFrenchSpellout); TESTCASE(13, TestSmallValues); + TESTCASE(14, TestLocalizations); #else TESTCASE(0, TestRBNFDisabled); #endif @@ -1476,6 +1477,135 @@ IntlTestRBNF::TestSmallValues() } } +void +IntlTestRBNF::TestLocalizations(void) +{ + UnicodeString rules("%main:0:no;1:some;100:a lot;1000:tons;\n" + "%other:0:nada;1:yah, some;100:plenty;1000:more'n you'll ever need"); + + UErrorCode status = U_ZERO_ERROR; + UParseError perror; + RuleBasedNumberFormat formatter(rules, perror, status); + if (U_FAILURE(status)) { + errln("FAIL: could not construct formatter"); + } else { + { + static const char* testData[][2] = { + { "0", "nada" }, + { "5", "yah, some" }, + { "423", "plenty" }, + { "12345", "more'n you'll ever need" }, + { NULL, NULL } + }; + doTest(&formatter, testData, FALSE); + } + + { + UnicodeString loc("<<%main, %other>,,,>"); + static const char* testData[][2] = { + { "0", "no" }, + { "5", "some" }, + { "423", "a lot" }, + { "12345", "tons" }, + { NULL, NULL } + }; + RuleBasedNumberFormat formatter0(rules, loc, perror, status); + if (U_FAILURE(status)) { + errln("failed to build second formatter"); + } else { + doTest(&formatter0, testData, FALSE); + + { + // exercise localization info + Locale locale0("en__VALLEY@turkey=gobblegobble"); + Locale locale1("de_DE_FOO"); + Locale locale2("ja_JP"); + logln(formatter0.getRuleSetDisplayName(0, locale0)); + logln(formatter0.getRuleSetDisplayName(0, locale1)); + logln(formatter0.getRuleSetDisplayName(0, locale2)); + // TODO: check against intended result + } + + for (int i = 0; i < formatter0.getNumberOfRuleSetDisplayNameLocales(); ++i) { + Locale locale = formatter0.getRuleSetDisplayNameLocale(i, status); + if (U_SUCCESS(status)) { + for (int j = 0; j < formatter0.getNumberOfRuleSetNames(); ++j) { + UnicodeString name = formatter0.getRuleSetName(j); + UnicodeString lname = formatter0.getRuleSetDisplayName(j, locale); + UnicodeString msg = locale.getName(); + msg.append(": "); + msg.append(name); + msg.append(" = "); + msg.append(lname); + logln(msg); + } + } + } + } + } + + { + static const char* goodLocs[] = { + "", // zero-length ok, same as providing no localization data + "<<>>", // no public rule sets ok + "<<%main>>", // no localizations ok + "<<%main,>,>", // comma before close angle ok + "<<%main>,\" '>>", // quotes everything until next quote + "<<%main>,<'en', \"it's ok\">>", // double quotes work too + " \n <\n <\n %main\n >\n , \t <\t en\t , \tfoo \t\t > \n\n > \n ", // rule whitespace ok + }; + int32_t goodLocsLen = sizeof(goodLocs)/sizeof(goodLocs[0]); + + static const char* badLocs[] = { + " ", // non-zero length + "<>", // empty array + "<", // unclosed outer array + "<<", // unclosed inner array + "<<,>>", // unexpected comma + "<<''>>", // empty string + " x<<%main>>", // first non space char not open angle bracket + "<%main>", // missing inner array + "<<%main %other>>", // elements missing separating commma (spaces must be quoted) + "<<%main>>", // arrays missing separating comma + "<<%main>,>", // too many elements in locale data + "<<%main>,>", // too few elements in locale data + "<<<%main>>>", // unexpected open angle + "<<%main<>>>", // unexpected open angle + "<<%main, %other>,>", // implicit empty strings + "<<%main>,>", // empty string + "<<%main>, < en, '>>", // unterminated quote + "<<%main>, < en, \"<>>", // unterminated quote + "<<%main\">>", // quote in string + "<<%main'>>", // quote in string + "<<%main<>>", // open angle in string + "<<%main>> x", // extra non-space text at end + + }; + int32_t badLocsLen = sizeof(badLocs)/sizeof(badLocs[0]); + + for (int i = 0; i < goodLocsLen; ++i) { + logln("[%d] '%s'", i, goodLocs[i]); + UErrorCode status = U_ZERO_ERROR; + UnicodeString loc(goodLocs[i]); + RuleBasedNumberFormat fmt(rules, loc, perror, status); + if (U_FAILURE(status)) { + errln("Failed parse of good localization string: '%s'", goodLocs[i]); + } + } + + for (int i = 0; i < badLocsLen; ++i) { + logln("[%d] '%s'", i, badLocs[i]); + UErrorCode status = U_ZERO_ERROR; + UnicodeString loc(badLocs[i]); + RuleBasedNumberFormat fmt(rules, loc, perror, status); + if (U_SUCCESS(status)) { + errln("Successful parse of bad localization string: '%s'", badLocs[i]); + } + } + } + } +} + void IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* testData[][2], UBool testParsing) { @@ -1491,7 +1621,7 @@ IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* testData[][2] const char* numString = testData[i][0]; const char* expectedWords = testData[i][1]; - logln("%i: %s\n", i, numString); + log("[%i] %s = ", i, numString); Formattable expectedNumber; decFmt->parse(numString, expectedNumber, status); if (U_FAILURE(status)) { @@ -1517,26 +1647,29 @@ IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* testData[][2] msg.append(actualString); errln(msg); break; - } else if (testParsing) { - Formattable parsedNumber; - formatter->parse(actualString, parsedNumber, status); - if (U_FAILURE(status)) { - UnicodeString msg = "FAIL: formatter could not parse "; - msg.append(actualString); - msg.append(" status code: " ); - msg.append(u_errorName(status)); - errln(msg); - break; - } else { - if (parsedNumber != expectedNumber) { - UnicodeString msg = "FAIL: parse failed for "; + } else { + logln(actualString); + if (testParsing) { + Formattable parsedNumber; + formatter->parse(actualString, parsedNumber, status); + if (U_FAILURE(status)) { + UnicodeString msg = "FAIL: formatter could not parse "; msg.append(actualString); - msg.append(", expected "); - decFmt->format(expectedNumber, msg, status); - msg.append(", but got "); - decFmt->format(parsedNumber, msg, status); + msg.append(" status code: " ); + msg.append(u_errorName(status)); errln(msg); break; + } else { + if (parsedNumber != expectedNumber) { + UnicodeString msg = "FAIL: parse failed for "; + msg.append(actualString); + msg.append(", expected "); + decFmt->format(expectedNumber, msg, status); + msg.append(", but got "); + decFmt->format(parsedNumber, msg, status); + errln(msg); + break; + } } } } diff --git a/icu4c/source/test/intltest/itrbnf.h b/icu4c/source/test/intltest/itrbnf.h index 1ad636d8532..db4e118fb42 100644 --- a/icu4c/source/test/intltest/itrbnf.h +++ b/icu4c/source/test/intltest/itrbnf.h @@ -102,6 +102,11 @@ class IntlTestRBNF : public IntlTest { */ virtual void TestSmallValues(); + /** + * Test localizations using string data. + */ + virtual void TestLocalizations(); + protected: virtual void doTest(RuleBasedNumberFormat* formatter, const char* testData[][2], UBool testParsing); virtual void doLenientParseTest(RuleBasedNumberFormat* formatter, const char* testData[][2]);