From 435623bc0529404e8a3d30d57cc7be3f3f77b4fc Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Wed, 13 Aug 2014 22:58:14 +0000 Subject: [PATCH] ICU-11058 support nested collation rule imports; make the importer stateless X-SVN-Rev: 36157 --- icu4c/source/data/coll/es.txt | 112 +--------------------- icu4c/source/i18n/collationbuilder.cpp | 18 ++-- icu4c/source/i18n/collationruleparser.cpp | 9 +- icu4c/source/i18n/collationruleparser.h | 3 +- icu4c/source/i18n/ucol_imp.h | 4 +- icu4c/source/i18n/ucol_res.cpp | 17 ++-- icu4c/source/tools/genrb/parse.cpp | 19 ++-- 7 files changed, 35 insertions(+), 147 deletions(-) diff --git a/icu4c/source/data/coll/es.txt b/icu4c/source/data/coll/es.txt index 7d47f7af495..4e2cf9c448b 100644 --- a/icu4c/source/data/coll/es.txt +++ b/icu4c/source/data/coll/es.txt @@ -14,117 +14,7 @@ es{ collations{ search{ Sequence{ - "[normalization on][suppressContractions [เ-ไ ເ-ໄ ꪵ ꪶ ꪹ ꪻ ꪼ]]" - "&'='<'≠'" - "&ا" - "<<<ﺎ<<<ﺍ" - "<<آ" - "<<<ﺂ<<<ﺁ" - "<<أ" - "<<<ﺄ<<<ﺃ" - "<<إ" - "<<<ﺈ<<<ﺇ" - "&و" - "<<<ۥ" - "<<<ﻮ<<<ﻭ" - "<<ؤ" - "<<<ﺆ<<<ﺅ" - "&ي" - "<<<ۦ" - "<<<ﻳ<<<ﻴ<<<ﻲ<<<ﻱ" - "<<ئ" - "<<<ﺋ<<<ﺌ<<<ﺊ<<<ﺉ" - "<<ى" - "<<<ﯨ<<<ﯩ" - "<<<ﻰ<<<ﻯ" - "&ه" - "<<<ﻫ<<<ﻬ<<<ﻪ<<<ﻩ" - "<<ة" - "<<<ﺔ<<<ﺓ" - "&[last primary ignorable]<<׳" - "<<״" - "<<ـ" - "<<ฺ" - "&ᄀ" - "=ᆨ" - "&ᄀᄀ" - "=ᄁ=ᆩ" - "&ᄀᄉ" - "=ᆪ" - "&ᄂ" - "=ᆫ" - "&ᄂᄌ" - "=ᆬ" - "&ᄂᄒ" - "=ᆭ" - "&ᄃ" - "=ᆮ" - "&ᄃᄃ" - "=ᄄ" - "&ᄅ" - "=ᆯ" - "&ᄅᄀ" - "=ᆰ" - "&ᄅᄆ" - "=ᆱ" - "&ᄅᄇ" - "=ᆲ" - "&ᄅᄉ" - "=ᆳ" - "&ᄅᄐ" - "=ᆴ" - "&ᄅᄑ" - "=ᆵ" - "&ᄅᄒ" - "=ᆶ" - "&ᄆ" - "=ᆷ" - "&ᄇ" - "=ᆸ" - "&ᄇᄇ" - "=ᄈ" - "&ᄇᄉ" - "=ᆹ" - "&ᄉ" - "=ᆺ" - "&ᄉᄉ" - "=ᄊ=ᆻ" - "&ᄋ" - "=ᆼ" - "&ᄌ" - "=ᆽ" - "&ᄌᄌ" - "=ᄍ" - "&ᄎ" - "=ᆾ" - "&ᄏ" - "=ᆿ" - "&ᄐ" - "=ᇀ" - "&ᄑ" - "=ᇁ" - "&ᄒ" - "=ᇂ" - "&ᅡᅵ" - "=ᅢ" - "&ᅣᅵ" - "=ᅤ" - "&ᅥᅵ" - "=ᅦ" - "&ᅧᅵ" - "=ᅨ" - "&ᅩᅡ" - "=ᅪ" - "&ᅩᅡᅵ" - "=ᅫ" - "&ᅩᅵ" - "=ᅬ" - "&ᅮᅴ" - "=ᅯ" - "&ᅮᅴᅵ" - "=ᅰ" - "&ᅮᅵ" - "=ᅱ" + "[import und-u-co-search]" "&NgetRules(baseID, - length > 0 ? collationType : "standard", - errorReason, errorCode); + UnicodeString importedRules; + importer->getRules(baseID, length > 0 ? collationType : "standard", + importedRules, errorReason, errorCode); if(U_FAILURE(errorCode)) { if(errorReason == NULL) { errorReason = "[import langTag] failed"; @@ -651,7 +650,7 @@ CollationRuleParser::parseSetting(UErrorCode &errorCode) { } const UnicodeString *outerRules = rules; int32_t outerRuleIndex = ruleIndex; - parse(*importedRules, errorCode); + parse(importedRules, errorCode); if(U_FAILURE(errorCode)) { if(parseError != NULL) { parseError->offset = outerRuleIndex; diff --git a/icu4c/source/i18n/collationruleparser.h b/icu4c/source/i18n/collationruleparser.h index 8df82bcb6a0..3c2b22c9dc4 100644 --- a/icu4c/source/i18n/collationruleparser.h +++ b/icu4c/source/i18n/collationruleparser.h @@ -93,8 +93,9 @@ public: class U_I18N_API Importer : public UObject { public: virtual ~Importer(); - virtual const UnicodeString *getRules( + virtual void getRules( const char *localeID, const char *collationType, + UnicodeString &rules, const char *&errorReason, UErrorCode &errorCode) = 0; }; diff --git a/icu4c/source/i18n/ucol_imp.h b/icu4c/source/i18n/ucol_imp.h index 39d1b28cbe7..dd1c85a0b0d 100644 --- a/icu4c/source/i18n/ucol_imp.h +++ b/icu4c/source/i18n/ucol_imp.h @@ -61,8 +61,8 @@ class UnicodeString; class CollationLoader { public: static void appendRootRules(UnicodeString &s); - static UnicodeString *loadRules(const char *localeID, const char *collationType, - UErrorCode &errorCode); + static void loadRules(const char *localeID, const char *collationType, + UnicodeString &rules, UErrorCode &errorCode); static const CollationTailoring *loadTailoring(const Locale &locale, Locale &validLocale, UErrorCode &errorCode); diff --git a/icu4c/source/i18n/ucol_res.cpp b/icu4c/source/i18n/ucol_res.cpp index a01c12127df..d7fd26da478 100644 --- a/icu4c/source/i18n/ucol_res.cpp +++ b/icu4c/source/i18n/ucol_res.cpp @@ -100,16 +100,17 @@ CollationLoader::appendRootRules(UnicodeString &s) { } } -UnicodeString * -CollationLoader::loadRules(const char *localeID, const char *collationType, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { return NULL; } +void +CollationLoader::loadRules(const char *localeID, const char *collationType, + UnicodeString &rules, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return; } U_ASSERT(collationType != NULL && *collationType != 0); // Copy the type for lowercasing. char type[16]; int32_t typeLength = uprv_strlen(collationType); if(typeLength >= LENGTHOF(type)) { errorCode = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; + return; } uprv_memcpy(type, collationType, typeLength + 1); T_CString_toLowerCase(type); @@ -121,15 +122,13 @@ CollationLoader::loadRules(const char *localeID, const char *collationType, UErr ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode)); int32_t length; const UChar *s = ures_getStringByKey(data.getAlias(), "Sequence", &length, &errorCode); - if(U_FAILURE(errorCode)) { return NULL; } + if(U_FAILURE(errorCode)) { return; } // No string pointer aliasing so that we need not hold onto the resource bundle. - UnicodeString *rules = new UnicodeString(s, length); - if(rules == NULL) { + rules.setTo(s, length); + if(rules.isBogus()) { errorCode = U_MEMORY_ALLOCATION_ERROR; - return NULL; } - return rules; } const CollationTailoring * diff --git a/icu4c/source/tools/genrb/parse.cpp b/icu4c/source/tools/genrb/parse.cpp index cb4db51e4b8..26573d424ae 100644 --- a/icu4c/source/tools/genrb/parse.cpp +++ b/icu4c/source/tools/genrb/parse.cpp @@ -673,21 +673,22 @@ class GenrbImporter : public icu::CollationRuleParser::Importer { public: GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {} virtual ~GenrbImporter(); - virtual const UnicodeString *getRules( + virtual void getRules( const char *localeID, const char *collationType, + UnicodeString &rules, const char *&errorReason, UErrorCode &errorCode); private: const char *inputDir; const char *outputDir; - UnicodeString rules; }; GenrbImporter::~GenrbImporter() {} -const UnicodeString * +void GenrbImporter::getRules( const char *localeID, const char *collationType, + UnicodeString &rules, const char *& /*errorReason*/, UErrorCode &errorCode) { struct SRBRoot *data = NULL; UCHARBUF *ucbuf = NULL; @@ -718,11 +719,11 @@ GenrbImporter::getRules( if (U_FAILURE(errorCode)) { - return NULL; + return; } if(filename==NULL){ errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return NULL; + return; }else{ filelen = (int32_t)uprv_strlen(filename); } @@ -810,6 +811,9 @@ GenrbImporter::getRules( /* Parse the data into an SRBRoot */ data = parse(ucbuf, inputDir, outputDir, filename, FALSE, FALSE, &errorCode); + if (U_FAILURE(errorCode)) { + goto finish; + } root = data->fRoot; collations = resLookup(root, "collations"); @@ -818,7 +822,8 @@ GenrbImporter::getRules( if (collation != NULL) { sequence = resLookup(collation, "Sequence"); if (sequence != NULL) { - rules.setTo(FALSE, sequence->u.fString.fChars, sequence->u.fString.fLength); + // No string pointer aliasing so that we need not hold onto the resource bundle. + rules.setTo(sequence->u.fString.fChars, sequence->u.fString.fLength); } } } @@ -835,8 +840,6 @@ finish: if(ucbuf) { ucbuf_close(ucbuf); } - - return &rules; } // Quick-and-dirty escaping function.