From 6d1999fbb9fd8dbca901b692cda6acb147982673 Mon Sep 17 00:00:00 2001 From: Fredrik Roubert Date: Wed, 2 Sep 2020 21:27:55 +0200 Subject: [PATCH] ICU-21289 Switch to using CharString for calling uloc_getKeywordValue(). --- icu4c/source/common/loclikely.cpp | 16 +++++-- icu4c/source/common/ucurr.cpp | 17 ++++--- icu4c/source/common/uresbund.cpp | 56 ++++++++++++----------- icu4c/source/i18n/calendar.cpp | 23 +++++----- icu4c/source/i18n/collationruleparser.cpp | 16 ++++--- icu4c/source/i18n/dtitvinf.cpp | 14 ++++-- icu4c/source/i18n/dtptngen.cpp | 23 +++++----- icu4c/source/i18n/rulebasedcollator.cpp | 10 +++- icu4c/source/i18n/ucol_sit.cpp | 21 ++++----- 9 files changed, 111 insertions(+), 85 deletions(-) diff --git a/icu4c/source/common/loclikely.cpp b/icu4c/source/common/loclikely.cpp index 92ae4e09ae3..99551e6cf1e 100644 --- a/icu4c/source/common/loclikely.cpp +++ b/icu4c/source/common/loclikely.cpp @@ -819,19 +819,25 @@ ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion, UErrorCode rgStatus = U_ZERO_ERROR; // First check for rg keyword value - int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus); + icu::CharString rg; + { + icu::CharStringByteSink sink(&rg); + ulocimp_getKeywordValue(localeID, "rg", sink, &rgStatus); + } + int32_t rgLen = rg.length(); if (U_FAILURE(rgStatus) || rgLen < 3 || rgLen > 7) { rgLen = 0; } else { - // rgBuf guaranteed to be zero terminated here, with text len 6 // chop off the subdivision code (which will generally be "zzzz" anyway) - if (uprv_isASCIILetter(rgBuf[0])) { + const char* const data = rg.data(); + if (uprv_isASCIILetter(data[0])) { rgLen = 2; - rgBuf[0] = uprv_toupper(rgBuf[0]); - rgBuf[1] = uprv_toupper(rgBuf[1]); + rgBuf[0] = uprv_toupper(data[0]); + rgBuf[1] = uprv_toupper(data[1]); } else { // assume three-digit region code rgLen = 3; + uprv_memcpy(rgBuf, data, rgLen); } } diff --git a/icu4c/source/common/ucurr.cpp b/icu4c/source/common/ucurr.cpp index ffca8aac5f1..0f254a6f267 100644 --- a/icu4c/source/common/ucurr.cpp +++ b/icu4c/source/common/ucurr.cpp @@ -20,6 +20,7 @@ #include "unicode/usetiter.h" #include "unicode/utf16.h" #include "ustr_imp.h" +#include "bytesinkutil.h" #include "charstr.h" #include "cmemory.h" #include "cstring.h" @@ -520,14 +521,18 @@ ucurr_forLocale(const char* locale, return 0; } - char currency[4]; // ISO currency codes are alpha3 codes. UErrorCode localStatus = U_ZERO_ERROR; - int32_t resLen = uloc_getKeywordValue(locale, "currency", - currency, UPRV_LENGTHOF(currency), &localStatus); - if (U_SUCCESS(localStatus) && resLen == 3 && uprv_isInvariantString(currency, resLen)) { + CharString currency; + { + CharStringByteSink sink(¤cy); + ulocimp_getKeywordValue(locale, "currency", sink, &localStatus); + } + int32_t resLen = currency.length(); + + if (U_SUCCESS(localStatus) && resLen == 3 && uprv_isInvariantString(currency.data(), resLen)) { if (resLen < buffCapacity) { - T_CString_toUpperCase(currency); - u_charsToUChars(currency, buff, resLen); + T_CString_toUpperCase(currency.data()); + u_charsToUChars(currency.data(), buff, resLen); } return u_terminateUChars(buff, buffCapacity, resLen, ec); } diff --git a/icu4c/source/common/uresbund.cpp b/icu4c/source/common/uresbund.cpp index 6652efe066b..416b42205e5 100644 --- a/icu4c/source/common/uresbund.cpp +++ b/icu4c/source/common/uresbund.cpp @@ -24,6 +24,7 @@ #include "unicode/ures.h" #include "unicode/ustring.h" #include "unicode/ucnv.h" +#include "bytesinkutil.h" #include "charstr.h" #include "uresimp.h" #include "ustr_imp.h" @@ -3063,7 +3064,6 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, const char *path, const char *resName, const char *keyword, const char *locid, UBool *isAvailable, UBool omitDefault, UErrorCode *status) { - char kwVal[1024] = ""; /* value of keyword 'keyword' */ char defVal[1024] = ""; /* default value for given locale */ char defLoc[1024] = ""; /* default value for given locale */ char base[1024] = ""; /* base locale */ @@ -3075,14 +3075,18 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, UErrorCode subStatus = U_ZERO_ERROR; int32_t length = 0; if(U_FAILURE(*status)) return 0; - uloc_getKeywordValue(locid, keyword, kwVal, 1024-1,&subStatus); - if(!uprv_strcmp(kwVal, DEFAULT_TAG)) { - kwVal[0]=0; + CharString kwVal; + { + CharStringByteSink sink(&kwVal); + ulocimp_getKeywordValue(locid, keyword, sink, &subStatus); + } + if(kwVal == DEFAULT_TAG) { + kwVal.clear(); } uloc_getBaseName(locid, base, 1024-1,&subStatus); #if defined(URES_TREE_DEBUG) fprintf(stderr, "getFunctionalEquivalent: \"%s\" [%s=%s] in %s - %s\n", - locid, keyword, kwVal, base, u_errorName(subStatus)); + locid, keyword, kwVal.data(), base, u_errorName(subStatus)); #endif ures_initStackObject(&bund1); ures_initStackObject(&bund2); @@ -3138,11 +3142,11 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, path?path:"ICUDATA", parent, keyword, defVal, u_errorName(subStatus)); #endif uprv_strcpy(defLoc, parent); - if(kwVal[0]==0) { - uprv_strcpy(kwVal, defVal); + if(kwVal.isEmpty()) { + kwVal.append(defVal, defLen, subStatus); #if defined(URES_TREE_DEBUG) fprintf(stderr, "%s;%s -> kwVal = %s\n", - path?path:"ICUDATA", parent, keyword, kwVal); + path?path:"ICUDATA", parent, keyword, kwVal.data()); #endif } } @@ -3177,7 +3181,7 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, #if defined(URES_TREE_DEBUG) fprintf(stderr, "%s;%s -> %s (looking for %s)\n", - path?path:"ICUDATA", parent, u_errorName(subStatus), kwVal); + path?path:"ICUDATA", parent, u_errorName(subStatus), kwVal.data()); #endif if(U_FAILURE(subStatus)) { *status = subStatus; @@ -3187,14 +3191,14 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, /**/ fprintf(stderr,"@%d [%s] %s\n", __LINE__, resName, u_errorName(subStatus)); #endif if(subStatus == U_ZERO_ERROR) { - ures_getByKey(&bund1, kwVal, &bund2, &subStatus); + ures_getByKey(&bund1, kwVal.data(), &bund2, &subStatus); #if defined(URES_TREE_DEBUG) -/**/ fprintf(stderr,"@%d [%s] %s\n", __LINE__, kwVal, u_errorName(subStatus)); +/**/ fprintf(stderr,"@%d [%s] %s\n", __LINE__, kwVal.data(), u_errorName(subStatus)); #endif if(subStatus == U_ZERO_ERROR) { #if defined(URES_TREE_DEBUG) fprintf(stderr, "%s;%s -> full0 %s=%s, %s\n", - path?path:"ICUDATA", parent, keyword, kwVal, u_errorName(subStatus)); + path?path:"ICUDATA", parent, keyword, kwVal.data(), u_errorName(subStatus)); #endif uprv_strcpy(full, parent); if(*full == 0) { @@ -3227,7 +3231,7 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, } else { #if defined(URES_TREE_DEBUG) fprintf(stderr, "err=%s in %s looking for %s\n", - u_errorName(subStatus), parent, kwVal); + u_errorName(subStatus), parent, kwVal.data()); #endif } } @@ -3262,12 +3266,12 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, getParentForFunctionalEquivalent(found,res,&bund1,parent,1023); ures_close(res); } while(!full[0] && *found && U_SUCCESS(*status)); - - if((full[0]==0) && uprv_strcmp(kwVal, defVal)) { + + if((full[0]==0) && kwVal != defVal) { #if defined(URES_TREE_DEBUG) - fprintf(stderr, "Failed to locate kw %s - try default %s\n", kwVal, defVal); + fprintf(stderr, "Failed to locate kw %s - try default %s\n", kwVal.data(), defVal); #endif - uprv_strcpy(kwVal, defVal); + kwVal.clear().append(defVal, subStatus); uprv_strcpy(parent, base); uprv_strcpy(found, base); @@ -3281,18 +3285,18 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, #if defined(URES_TREE_DEBUG) fprintf(stderr, "%s;%s -> %s (looking for default %s)\n", - path?path:"ICUDATA", parent, u_errorName(subStatus), kwVal); + path?path:"ICUDATA", parent, u_errorName(subStatus), kwVal.data()); #endif if(U_FAILURE(subStatus)) { *status = subStatus; } else if(subStatus == U_ZERO_ERROR) { ures_getByKey(res,resName,&bund1, &subStatus); if(subStatus == U_ZERO_ERROR) { - ures_getByKey(&bund1, kwVal, &bund2, &subStatus); + ures_getByKey(&bund1, kwVal.data(), &bund2, &subStatus); if(subStatus == U_ZERO_ERROR) { #if defined(URES_TREE_DEBUG) fprintf(stderr, "%s;%s -> full1 %s=%s, %s\n", path?path:"ICUDATA", - parent, keyword, kwVal, u_errorName(subStatus)); + parent, keyword, kwVal.data(), u_errorName(subStatus)); #endif uprv_strcpy(full, parent); if(*full == 0) { @@ -3336,7 +3340,7 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, if(U_SUCCESS(*status)) { if(!full[0]) { #if defined(URES_TREE_DEBUG) - fprintf(stderr, "Still could not load keyword %s=%s\n", keyword, kwVal); + fprintf(stderr, "Still could not load keyword %s=%s\n", keyword, kwVal.data()); #endif *status = U_MISSING_RESOURCE_ERROR; } else if(omitDefault) { @@ -3345,21 +3349,21 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, #endif if(uprv_strlen(defLoc) <= uprv_strlen(full)) { /* found the keyword in a *child* of where the default tag was present. */ - if(!uprv_strcmp(kwVal, defVal)) { /* if the requested kw is default, */ + if(kwVal == defVal) { /* if the requested kw is default, */ /* and the default is in or in an ancestor of the current locale */ #if defined(URES_TREE_DEBUG) - fprintf(stderr, "Removing unneeded var %s=%s\n", keyword, kwVal); + fprintf(stderr, "Removing unneeded var %s=%s\n", keyword, kwVal.data()); #endif - kwVal[0]=0; + kwVal.clear(); } } } uprv_strcpy(found, full); - if(kwVal[0]) { + if(!kwVal.isEmpty()) { uprv_strcat(found, "@"); uprv_strcat(found, keyword); uprv_strcat(found, "="); - uprv_strcat(found, kwVal); + uprv_strcat(found, kwVal.data()); } else if(!omitDefault) { uprv_strcat(found, "@"); uprv_strcat(found, keyword); diff --git a/icu4c/source/i18n/calendar.cpp b/icu4c/source/i18n/calendar.cpp index deb878ed243..5612fe9acc3 100644 --- a/icu4c/source/i18n/calendar.cpp +++ b/icu4c/source/i18n/calendar.cpp @@ -63,6 +63,8 @@ #include "sharedcalendar.h" #include "unifiedcache.h" #include "ulocimp.h" +#include "bytesinkutil.h" +#include "charstr.h" #if !UCONFIG_NO_SERVICE static icu::ICULocaleService* gService = nullptr; @@ -266,13 +268,13 @@ static ECalType getCalendarTypeForLocale(const char *locid) { } canonicalName[canonicalLen] = 0; // terminate - char calTypeBuf[32]; - int32_t calTypeBufLen; - - calTypeBufLen = uloc_getKeywordValue(canonicalName, "calendar", calTypeBuf, sizeof(calTypeBuf) - 1, &status); + CharString calTypeBuf; + { + CharStringByteSink sink(&calTypeBuf); + ulocimp_getKeywordValue(canonicalName, "calendar", sink, &status); + } if (U_SUCCESS(status)) { - calTypeBuf[calTypeBufLen] = 0; - calType = getCalendarType(calTypeBuf); + calType = getCalendarType(calTypeBuf.data()); if (calType != CALTYPE_UNKNOWN) { return calType; } @@ -296,16 +298,13 @@ static ECalType getCalendarTypeForLocale(const char *locid) { order = ures_getByKey(rb, "001", nullptr, &status); } - calTypeBuf[0] = 0; + calTypeBuf.clear(); if (U_SUCCESS(status) && order != nullptr) { // the first calendar type is the default for the region int32_t len = 0; const char16_t *uCalType = ures_getStringByIndex(order, 0, &len, &status); - if (len < (int32_t)sizeof(calTypeBuf)) { - u_UCharsToChars(uCalType, calTypeBuf, len); - *(calTypeBuf + len) = 0; // terminate; - calType = getCalendarType(calTypeBuf); - } + calTypeBuf.appendInvariantChars(uCalType, len, status); + calType = getCalendarType(calTypeBuf.data()); } ures_close(order); diff --git a/icu4c/source/i18n/collationruleparser.cpp b/icu4c/source/i18n/collationruleparser.cpp index 4cc25a1f5ce..d46004d489a 100644 --- a/icu4c/source/i18n/collationruleparser.cpp +++ b/icu4c/source/i18n/collationruleparser.cpp @@ -24,6 +24,7 @@ #include "unicode/uloc.h" #include "unicode/unistr.h" #include "unicode/utf16.h" +#include "bytesinkutil.h" #include "charstr.h" #include "cmemory.h" #include "collation.h" @@ -34,6 +35,7 @@ #include "cstring.h" #include "patternprops.h" #include "uassert.h" +#include "ulocimp.h" #include "uvectr32.h" U_NAMESPACE_BEGIN @@ -629,11 +631,12 @@ CollationRuleParser::parseSetting(UErrorCode &errorCode) { uprv_memcpy(baseID, "und", 3); } // @collation=type, or length=0 if not specified - char collationType[ULOC_KEYWORDS_CAPACITY]; - length = uloc_getKeywordValue(localeID, "collation", - collationType, ULOC_KEYWORDS_CAPACITY, - &errorCode); - if(U_FAILURE(errorCode) || length >= ULOC_KEYWORDS_CAPACITY) { + CharString collationType; + { + CharStringByteSink sink(&collationType); + ulocimp_getKeywordValue(localeID, "collation", sink, &errorCode); + } + if(U_FAILURE(errorCode)) { errorCode = U_ZERO_ERROR; setParseError("expected language tag in [import langTag]", errorCode); return; @@ -642,7 +645,8 @@ CollationRuleParser::parseSetting(UErrorCode &errorCode) { setParseError("[import langTag] is not supported", errorCode); } else { UnicodeString importedRules; - importer->getRules(baseID, length > 0 ? collationType : "standard", + importer->getRules(baseID, + !collationType.isEmpty() ? collationType.data() : "standard", importedRules, errorReason, errorCode); if(U_FAILURE(errorCode)) { if(errorReason == nullptr) { diff --git a/icu4c/source/i18n/dtitvinf.cpp b/icu4c/source/i18n/dtitvinf.cpp index 3733d04518d..c4b6bbcf401 100644 --- a/icu4c/source/i18n/dtitvinf.cpp +++ b/icu4c/source/i18n/dtitvinf.cpp @@ -23,6 +23,7 @@ #include #endif +#include "bytesinkutil.h" #include "cmemory.h" #include "cstring.h" #include "unicode/msgfmt.h" @@ -35,6 +36,7 @@ #include "uresimp.h" #include "hash.h" #include "gregoimp.h" +#include "ulocimp.h" #include "uresimp.h" @@ -397,17 +399,19 @@ DateIntervalInfo::initializeData(const Locale& locale, UErrorCode& status) // Get the correct calendar type const char * calendarTypeToUse = gGregorianTag; // initial default - char calendarType[ULOC_KEYWORDS_CAPACITY]; // to be filled in with the type to use, if all goes well char localeWithCalendarKey[ULOC_LOCALE_IDENTIFIER_CAPACITY]; // obtain a locale that always has the calendar key value that should be used (void)ures_getFunctionalEquivalent(localeWithCalendarKey, ULOC_LOCALE_IDENTIFIER_CAPACITY, nullptr, "calendar", "calendar", locName, nullptr, false, &status); localeWithCalendarKey[ULOC_LOCALE_IDENTIFIER_CAPACITY-1] = 0; // ensure null termination // now get the calendar key value from that locale - int32_t calendarTypeLen = uloc_getKeywordValue(localeWithCalendarKey, "calendar", calendarType, - ULOC_KEYWORDS_CAPACITY, &status); - if (U_SUCCESS(status) && calendarTypeLen < ULOC_KEYWORDS_CAPACITY) { - calendarTypeToUse = calendarType; + CharString calendarType; + { + CharStringByteSink sink(&calendarType); + ulocimp_getKeywordValue(localeWithCalendarKey, "calendar", sink, &status); + } + if (U_SUCCESS(status)) { + calendarTypeToUse = calendarType.data(); } status = U_ZERO_ERROR; diff --git a/icu4c/source/i18n/dtptngen.cpp b/icu4c/source/i18n/dtptngen.cpp index ea5b2455104..2947672013e 100644 --- a/icu4c/source/i18n/dtptngen.cpp +++ b/icu4c/source/i18n/dtptngen.cpp @@ -29,6 +29,7 @@ #include "unicode/ustring.h" #include "unicode/rep.h" #include "unicode/region.h" +#include "bytesinkutil.h" #include "cpputils.h" #include "mutex.h" #include "umutex.h" @@ -37,6 +38,7 @@ #include "locbased.h" #include "hash.h" #include "uhash.h" +#include "ulocimp.h" #include "uresimp.h" #include "ulocimp.h" #include "dtptngen_impl.h" @@ -903,22 +905,19 @@ DateTimePatternGenerator::getCalendarTypeToUse(const Locale& locale, CharString& &localStatus); localeWithCalendarKey[ULOC_LOCALE_IDENTIFIER_CAPACITY-1] = 0; // ensure null termination // now get the calendar key value from that locale - char calendarType[ULOC_KEYWORDS_CAPACITY]; - int32_t calendarTypeLen = uloc_getKeywordValue( - localeWithCalendarKey, - "calendar", - calendarType, - ULOC_KEYWORDS_CAPACITY, - &localStatus); + destination.clear(); + { + CharStringByteSink sink(&destination); + ulocimp_getKeywordValue( + localeWithCalendarKey, + "calendar", + sink, + &localStatus); + } // If the input locale was invalid, don't fail with missing resource error, instead // continue with default of Gregorian. if (U_FAILURE(localStatus) && localStatus != U_MISSING_RESOURCE_ERROR) { err = localStatus; - return; - } - if (calendarTypeLen > 0 && calendarTypeLen < ULOC_KEYWORDS_CAPACITY) { - destination.clear().append(calendarType, -1, err); - if (U_FAILURE(err)) { return; } } } } diff --git a/icu4c/source/i18n/rulebasedcollator.cpp b/icu4c/source/i18n/rulebasedcollator.cpp index e9482628d9b..cf4cfc87f2a 100644 --- a/icu4c/source/i18n/rulebasedcollator.cpp +++ b/icu4c/source/i18n/rulebasedcollator.cpp @@ -32,6 +32,7 @@ #include "unicode/utf8.h" #include "unicode/uversion.h" #include "bocsu.h" +#include "bytesinkutil.h" #include "charstr.h" #include "cmemory.h" #include "collation.h" @@ -50,6 +51,7 @@ #include "ucol_imp.h" #include "uhash.h" #include "uitercollationiterator.h" +#include "ulocimp.h" #include "ustr_imp.h" #include "utf16collationiterator.h" #include "utf8collationiterator.h" @@ -1579,8 +1581,12 @@ RuleBasedCollator::internalGetShortDefinitionString(const char *locale, appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode); } // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default. - length = uloc_getKeywordValue(resultLocale, "collation", subtag, UPRV_LENGTHOF(subtag), &errorCode); - appendSubtag(result, 'K', subtag, length, errorCode); + { + CharString collation; + CharStringByteSink sink(&collation); + ulocimp_getKeywordValue(resultLocale, "collation", sink, &errorCode); + appendSubtag(result, 'K', collation.data(), collation.length(), errorCode); + } length = uloc_getLanguage(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode); if (length == 0) { appendSubtag(result, 'L', "root", 4, errorCode); diff --git a/icu4c/source/i18n/ucol_sit.cpp b/icu4c/source/i18n/ucol_sit.cpp index a740286d79e..2cbe54332d2 100644 --- a/icu4c/source/i18n/ucol_sit.cpp +++ b/icu4c/source/i18n/ucol_sit.cpp @@ -20,6 +20,8 @@ #include "unicode/utf16.h" #include "utracimp.h" #include "ucol_imp.h" +#include "ulocimp.h" +#include "bytesinkutil.h" #include "cmemory.h" #include "cstring.h" #include "uresimp.h" @@ -458,30 +460,27 @@ ucol_prepareShortStringOpen( const char *definition, /* we try to find stuff from keyword */ UResourceBundle *collations = ures_getByKey(b, "collations", nullptr, status); UResourceBundle *collElem = nullptr; - char keyBuffer[256]; - // if there is a keyword, we pick it up and try to get elements - int32_t keyLen = uloc_getKeywordValue(buffer, "collation", keyBuffer, sizeof(keyBuffer), status); - // Treat too long a value as no keyword. - if(keyLen >= (int32_t)sizeof(keyBuffer)) { - keyLen = 0; - *status = U_ZERO_ERROR; + CharString keyBuffer; + { + // if there is a keyword, we pick it up and try to get elements + CharStringByteSink sink(&keyBuffer); + ulocimp_getKeywordValue(buffer, "collation", sink, status); } - if(keyLen == 0) { + if(keyBuffer.isEmpty()) { // no keyword // we try to find the default setting, which will give us the keyword value UResourceBundle *defaultColl = ures_getByKeyWithFallback(collations, "default", nullptr, status); if(U_SUCCESS(*status)) { int32_t defaultKeyLen = 0; const char16_t *defaultKey = ures_getString(defaultColl, &defaultKeyLen, status); - u_UCharsToChars(defaultKey, keyBuffer, defaultKeyLen); - keyBuffer[defaultKeyLen] = 0; + keyBuffer.appendInvariantChars(defaultKey, defaultKeyLen, *status); } else { *status = U_INTERNAL_PROGRAM_ERROR; return; } ures_close(defaultColl); } - collElem = ures_getByKeyWithFallback(collations, keyBuffer, collElem, status); + collElem = ures_getByKeyWithFallback(collations, keyBuffer.data(), collElem, status); ures_close(collElem); ures_close(collations); ures_close(b);