diff --git a/icu4c/source/common/locid.cpp b/icu4c/source/common/locid.cpp index b0766165604..04723dcd5a5 100644 --- a/icu4c/source/common/locid.cpp +++ b/icu4c/source/common/locid.cpp @@ -105,7 +105,6 @@ typedef enum ELocalePos { U_CFUNC int32_t locale_getKeywords(const char *localeID, char prev, char *keywords, int32_t keywordCapacity, - char *values, int32_t valuesCapacity, int32_t *valLen, UBool valuesToo, UErrorCode *status); @@ -185,17 +184,16 @@ Locale *locale_set_default_internal(const char *id, UErrorCode& status) { canonicalize = TRUE; // always canonicalize host ID } - char localeNameBuf[512]; - - if (canonicalize) { - uloc_canonicalize(id, localeNameBuf, sizeof(localeNameBuf)-1, &status); - } else { - uloc_getName(id, localeNameBuf, sizeof(localeNameBuf)-1, &status); + CharString localeNameBuf; + { + CharStringByteSink sink(&localeNameBuf); + if (canonicalize) { + ulocimp_canonicalize(id, sink, &status); + } else { + ulocimp_getName(id, sink, &status); + } } - localeNameBuf[sizeof(localeNameBuf)-1] = 0; // Force null termination in event of - // a long name filling the buffer. - // (long names are truncated.) - // + if (U_FAILURE(status)) { return gDefaultLocale; } @@ -209,14 +207,14 @@ Locale *locale_set_default_internal(const char *id, UErrorCode& status) { ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup); } - Locale *newDefault = (Locale *)uhash_get(gDefaultLocalesHashT, localeNameBuf); + Locale *newDefault = (Locale *)uhash_get(gDefaultLocalesHashT, localeNameBuf.data()); if (newDefault == NULL) { newDefault = new Locale(Locale::eBOGUS); if (newDefault == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return gDefaultLocale; } - newDefault->init(localeNameBuf, FALSE); + newDefault->init(localeNameBuf.data(), FALSE); uhash_put(gDefaultLocalesHashT, (char*) newDefault->getName(), newDefault, &status); if (U_FAILURE(status)) { return gDefaultLocale; @@ -1428,7 +1426,7 @@ Locale::createKeywords(UErrorCode &status) const const char* assignment = uprv_strchr(fullName, '='); if(variantStart) { if(assignment > variantStart) { - int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status); + int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, FALSE, &status); if(U_SUCCESS(status) && keyLen) { result = new KeywordEnumeration(keywords, keyLen, 0, status); if (!result) { @@ -1457,7 +1455,7 @@ Locale::createUnicodeKeywords(UErrorCode &status) const const char* assignment = uprv_strchr(fullName, '='); if(variantStart) { if(assignment > variantStart) { - int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status); + int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, FALSE, &status); if(U_SUCCESS(status) && keyLen) { result = new UnicodeKeywordEnumeration(keywords, keyLen, 0, status); if (!result) { diff --git a/icu4c/source/common/uloc.cpp b/icu4c/source/common/uloc.cpp index 28cee286e20..0e235d7958c 100644 --- a/icu4c/source/common/uloc.cpp +++ b/icu4c/source/common/uloc.cpp @@ -30,10 +30,14 @@ l = lang, C = ctry, M = charmap, V = variant */ +#include "unicode/bytestream.h" +#include "unicode/errorcode.h" +#include "unicode/stringpiece.h" #include "unicode/utypes.h" #include "unicode/ustring.h" #include "unicode/uloc.h" +#include "bytesinkutil.h" #include "putilimp.h" #include "ustr_imp.h" #include "ulocimp.h" @@ -46,6 +50,7 @@ #include "uassert.h" #include "charstr.h" +#include #include /* for sprintf */ U_NAMESPACE_USE @@ -59,7 +64,6 @@ U_CFUNC int32_t locale_getKeywords(const char *localeID, char prev, char *keywords, int32_t keywordCapacity, - char *values, int32_t valuesCapacity, int32_t *valLen, UBool valuesToo, UErrorCode *status); @@ -597,11 +601,10 @@ compareKeywordStructs(const void * /*context*/, const void *left, const void *ri return uprv_strcmp(leftString, rightString); } -static int32_t +static void _getKeywords(const char *localeID, char prev, - char *keywords, int32_t keywordCapacity, - char *values, int32_t valuesCapacity, int32_t *valLen, + ByteSink& sink, UBool valuesToo, UErrorCode *status) { @@ -613,8 +616,6 @@ _getKeywords(const char *localeID, const char* equalSign = NULL; const char* semicolon = NULL; int32_t i = 0, j, n; - int32_t keywordsLen = 0; - int32_t valuesLen = 0; if(prev == '@') { /* start of keyword definition */ /* we will grab pairs, trim spaces, lowercase keywords, sort and return */ @@ -629,7 +630,7 @@ _getKeywords(const char *localeID, } if(numKeywords == maxKeywords) { *status = U_INTERNAL_PROGRAM_ERROR; - return 0; + return; } equalSign = uprv_strchr(pos, '='); semicolon = uprv_strchr(pos, ';'); @@ -637,13 +638,13 @@ _getKeywords(const char *localeID, /* ';' before '=' [foo@currency;collation=pinyin] is illegal */ if(!equalSign || (semicolon && semicolon= ULOC_KEYWORD_BUFFER_LEN) { /* keyword name too long for internal buffer */ *status = U_INTERNAL_PROGRAM_ERROR; - return 0; + return; } for(i = 0, n = 0; i < equalSign - pos; ++i) { if (pos[i] != ' ') { @@ -654,7 +655,7 @@ _getKeywords(const char *localeID, /* zero-length keyword is an error. */ if (n == 0) { *status = U_INVALID_FORMAT_ERROR; - return 0; + return; } keywordList[numKeywords].keyword[n] = 0; @@ -669,7 +670,7 @@ _getKeywords(const char *localeID, /* Premature end or zero-length value */ if (!*equalSign || equalSign == semicolon) { *status = U_INVALID_FORMAT_ERROR; - return 0; + return; } keywordList[numKeywords].valueStart = equalSign; @@ -707,45 +708,17 @@ _getKeywords(const char *localeID, /* Now construct the keyword part */ for(i = 0; i < numKeywords; i++) { - if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) { - uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword); - if(valuesToo) { - keywords[keywordsLen + keywordList[i].keywordLen] = '='; - } else { - keywords[keywordsLen + keywordList[i].keywordLen] = 0; - } - } - keywordsLen += keywordList[i].keywordLen + 1; + sink.Append(keywordList[i].keyword, keywordList[i].keywordLen); if(valuesToo) { - if(keywordsLen + keywordList[i].valueLen <= keywordCapacity) { - uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen); - } - keywordsLen += keywordList[i].valueLen; - + sink.Append("=", 1); + sink.Append(keywordList[i].valueStart, keywordList[i].valueLen); if(i < numKeywords - 1) { - if(keywordsLen < keywordCapacity) { - keywords[keywordsLen] = ';'; - } - keywordsLen++; + sink.Append(";", 1); } - } - if(values) { - if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) { - uprv_strcpy(values+valuesLen, keywordList[i].valueStart); - values[valuesLen + keywordList[i].valueLen] = 0; - } - valuesLen += keywordList[i].valueLen + 1; + } else { + sink.Append("\0", 1); } } - if(values) { - values[valuesLen] = 0; - if(valLen) { - *valLen = valuesLen; - } - } - return u_terminateChars(keywords, keywordCapacity, keywordsLen, status); - } else { - return 0; } } @@ -753,12 +726,28 @@ U_CFUNC int32_t locale_getKeywords(const char *localeID, char prev, char *keywords, int32_t keywordCapacity, - char *values, int32_t valuesCapacity, int32_t *valLen, UBool valuesToo, UErrorCode *status) { - return _getKeywords(localeID, prev, keywords, keywordCapacity, - values, valuesCapacity, valLen, valuesToo, - status); + if (U_FAILURE(*status)) { + return 0; + } + + CheckedArrayByteSink sink(keywords, keywordCapacity); + _getKeywords(localeID, prev, sink, valuesToo, status); + + int32_t reslen = sink.NumberOfBytesAppended(); + + if (U_FAILURE(*status)) { + return reslen; + } + + if (sink.Overflowed()) { + *status = U_BUFFER_OVERFLOW_ERROR; + } else { + u_terminateChars(keywords, keywordCapacity, reslen, status); + } + + return reslen; } U_CAPI int32_t U_EXPORT2 @@ -1135,26 +1124,6 @@ static int16_t _findIndex(const char* const* list, const char* key) return -1; } -/* count the length of src while copying it to dest; return strlen(src) */ -static inline int32_t -_copyCount(char *dest, int32_t destCapacity, const char *src) { - const char *anchor; - char c; - - anchor=src; - for(;;) { - if((c=*src)==0) { - return (int32_t)(src-anchor); - } - if(destCapacity<=0) { - return (int32_t)((src-anchor)+uprv_strlen(src)); - } - ++src; - *dest++=c; - --destCapacity; - } -} - U_CFUNC const char* uloc_getCurrentCountryID(const char* oldID){ int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID); @@ -1179,13 +1148,11 @@ uloc_getCurrentLanguageID(const char* oldID){ * * TODO try to use this in Locale */ -U_CFUNC int32_t +static CharString ulocimp_getLanguage(const char *localeID, - char *language, int32_t languageCapacity, - const char **pEnd) { - int32_t i=0; - int32_t offset; - char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */ + const char **pEnd, + UErrorCode &status) { + CharString result; if (uprv_stricmp(localeID, "root") == 0) { localeID += 4; @@ -1199,48 +1166,52 @@ ulocimp_getLanguage(const char *localeID, /* if it starts with i- or x- then copy that prefix */ if(_isIDPrefix(localeID)) { - if(i=0); - lang[i]=(char)uprv_tolower(*localeID); - } - i++; + result.append((char)uprv_tolower(*localeID), status); localeID++; } - if(i==3) { + if(result.length()==3) { /* convert 3 character code to 2 character code if possible *CWB*/ - offset=_findIndex(LANGUAGES_3, lang); + int32_t offset = _findIndex(LANGUAGES_3, result.data()); if(offset>=0) { - i=_copyCount(language, languageCapacity, LANGUAGES[offset]); + result.clear(); + result.append(LANGUAGES[offset], status); } } if(pEnd!=NULL) { *pEnd=localeID; } - return i; + + return result; } U_CFUNC int32_t +ulocimp_getLanguage(const char *localeID, + char *language, int32_t languageCapacity, + const char **pEnd) { + ErrorCode status; + CharString result = ulocimp_getLanguage(localeID, pEnd, status); + if (status.isFailure()) { + return 0; + } + int32_t reslen = result.length(); + uprv_memcpy(language, result.data(), std::min(reslen, languageCapacity)); + return reslen; +} + +static CharString ulocimp_getScript(const char *localeID, - char *script, int32_t scriptCapacity, - const char **pEnd) -{ + const char **pEnd, + UErrorCode &status) { + CharString result; int32_t idLen = 0; if (pEnd != NULL) { @@ -1259,132 +1230,137 @@ ulocimp_getScript(const char *localeID, if (pEnd != NULL) { *pEnd = localeID+idLen; } - if(idLen > scriptCapacity) { - idLen = scriptCapacity; - } if (idLen >= 1) { - script[0]=(char)uprv_toupper(*(localeID++)); + result.append((char)uprv_toupper(*(localeID++)), status); } for (i = 1; i < idLen; i++) { - script[i]=(char)uprv_tolower(*(localeID++)); + result.append((char)uprv_tolower(*(localeID++)), status); } } - else { - idLen = 0; - } - return idLen; + + return result; } U_CFUNC int32_t +ulocimp_getScript(const char *localeID, + char *script, int32_t scriptCapacity, + const char **pEnd) { + ErrorCode status; + CharString result = ulocimp_getScript(localeID, pEnd, status); + if (status.isFailure()) { + return 0; + } + int32_t reslen = result.length(); + uprv_memcpy(script, result.data(), std::min(reslen, scriptCapacity)); + return reslen; +} + +static CharString ulocimp_getCountry(const char *localeID, - char *country, int32_t countryCapacity, - const char **pEnd) -{ + const char **pEnd, + UErrorCode &status) { + CharString result; int32_t idLen=0; - char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 }; - int32_t offset; /* copy the country as far as possible and count its length */ while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) { - if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/ - cnty[idLen]=(char)uprv_toupper(localeID[idLen]); - } + result.append((char)uprv_toupper(localeID[idLen]), status); idLen++; } /* the country should be either length 2 or 3 */ if (idLen == 2 || idLen == 3) { - UBool gotCountry = FALSE; /* convert 3 character code to 2 character code if possible *CWB*/ if(idLen==3) { - offset=_findIndex(COUNTRIES_3, cnty); + int32_t offset = _findIndex(COUNTRIES_3, result.data()); if(offset>=0) { - idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]); - gotCountry = TRUE; - } - } - if (!gotCountry) { - int32_t i = 0; - for (i = 0; i < idLen; i++) { - if (i < countryCapacity) { - country[i]=(char)uprv_toupper(localeID[i]); - } + result.clear(); + result.append(COUNTRIES[offset], status); } } localeID+=idLen; } else { - idLen = 0; + result.clear(); } if(pEnd!=NULL) { *pEnd=localeID; } - return idLen; + return result; +} + +U_CFUNC int32_t +ulocimp_getCountry(const char *localeID, + char *country, int32_t countryCapacity, + const char **pEnd) { + ErrorCode status; + CharString result = ulocimp_getCountry(localeID, pEnd, status); + if (status.isFailure()) { + return 0; + } + int32_t reslen = result.length(); + uprv_memcpy(country, result.data(), std::min(reslen, countryCapacity)); + return reslen; } /** * @param needSeparator if true, then add leading '_' if any variants * are added to 'variant' */ -static int32_t +static void _getVariantEx(const char *localeID, char prev, - char *variant, int32_t variantCapacity, + ByteSink& sink, UBool needSeparator) { - int32_t i=0; + UBool hasVariant = FALSE; /* get one or more variant tags and separate them with '_' */ if(_isIDSeparator(prev)) { /* get a variant string after a '-' or '_' */ while(!_isTerminator(*localeID)) { if (needSeparator) { - if (i 0) { /* Found optional script */ tmpLocaleID = scriptID; ++fieldCount; - len+=scriptSize; if (_isIDSeparator(*tmpLocaleID)) { /* If there is something else, then we add the _ */ - if(len 0) { + + CharString country = ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err); + tag.append(country, *err); + if (!country.isEmpty()) { /* Found optional country */ tmpLocaleID = cntryID; - len+=cntrySize; } if(_isIDSeparator(*tmpLocaleID)) { /* If there is something else, then we add the _ if we found country before. */ - if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) { + if (!_isIDSeparator(*(tmpLocaleID+1))) { ++fieldCount; - if(len 0) { - variant = len 0)) { do { - if(len 0)); + + int32_t posixVariantSize = -tag.length(); + { + CharStringByteSink s(&tag); + _getVariantEx(tmpLocaleID+1, '@', s, (UBool)(variantSize > 0)); + } + posixVariantSize += tag.length(); if (posixVariantSize > 0) { - if (variant == NULL) { - variant = name+len; - } - len += posixVariantSize; variantSize += posixVariantSize; } } /* Look up the ID in the canonicalization map */ for (j=0; j keywordAssign)) { - if(len resultCapacity) ? resultCapacity : len); - } - - return u_terminateChars(result, resultCapacity, len, err); } /* ### ID parsing API **************************************************/ @@ -1950,7 +1883,34 @@ uloc_getName(const char* localeID, int32_t nameCapacity, UErrorCode* err) { - return _canonicalize(localeID, name, nameCapacity, 0, err); + if (U_FAILURE(*err)) { + return 0; + } + + CheckedArrayByteSink sink(name, nameCapacity); + ulocimp_getName(localeID, sink, err); + + int32_t reslen = sink.NumberOfBytesAppended(); + + if (U_FAILURE(*err)) { + return reslen; + } + + if (sink.Overflowed()) { + *err = U_BUFFER_OVERFLOW_ERROR; + } else { + u_terminateChars(name, nameCapacity, reslen, err); + } + + return reslen; +} + +U_STABLE void U_EXPORT2 +ulocimp_getName(const char* localeID, + ByteSink& sink, + UErrorCode* err) +{ + _canonicalize(localeID, sink, 0, err); } U_CAPI int32_t U_EXPORT2 @@ -1959,7 +1919,34 @@ uloc_getBaseName(const char* localeID, int32_t nameCapacity, UErrorCode* err) { - return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err); + if (U_FAILURE(*err)) { + return 0; + } + + CheckedArrayByteSink sink(name, nameCapacity); + ulocimp_getBaseName(localeID, sink, err); + + int32_t reslen = sink.NumberOfBytesAppended(); + + if (U_FAILURE(*err)) { + return reslen; + } + + if (sink.Overflowed()) { + *err = U_BUFFER_OVERFLOW_ERROR; + } else { + u_terminateChars(name, nameCapacity, reslen, err); + } + + return reslen; +} + +U_STABLE void U_EXPORT2 +ulocimp_getBaseName(const char* localeID, + ByteSink& sink, + UErrorCode* err) +{ + _canonicalize(localeID, sink, _ULOC_STRIP_KEYWORDS, err); } U_CAPI int32_t U_EXPORT2 @@ -1968,7 +1955,34 @@ uloc_canonicalize(const char* localeID, int32_t nameCapacity, UErrorCode* err) { - return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err); + if (U_FAILURE(*err)) { + return 0; + } + + CheckedArrayByteSink sink(name, nameCapacity); + ulocimp_canonicalize(localeID, sink, err); + + int32_t reslen = sink.NumberOfBytesAppended(); + + if (U_FAILURE(*err)) { + return reslen; + } + + if (sink.Overflowed()) { + *err = U_BUFFER_OVERFLOW_ERROR; + } else { + u_terminateChars(name, nameCapacity, reslen, err); + } + + return reslen; +} + +U_STABLE void U_EXPORT2 +ulocimp_canonicalize(const char* localeID, + ByteSink& sink, + UErrorCode* err) +{ + _canonicalize(localeID, sink, _ULOC_CANONICALIZE, err); } U_CAPI const char* U_EXPORT2 diff --git a/icu4c/source/common/ulocimp.h b/icu4c/source/common/ulocimp.h index 31d3c44d0be..b9e2eb4bfeb 100644 --- a/icu4c/source/common/ulocimp.h +++ b/icu4c/source/common/ulocimp.h @@ -62,6 +62,21 @@ ulocimp_getCountry(const char *localeID, char *country, int32_t countryCapacity, const char **pEnd); +U_STABLE void U_EXPORT2 +ulocimp_getName(const char* localeID, + icu::ByteSink& sink, + UErrorCode* err); + +U_STABLE void U_EXPORT2 +ulocimp_getBaseName(const char* localeID, + icu::ByteSink& sink, + UErrorCode* err); + +U_STABLE void U_EXPORT2 +ulocimp_canonicalize(const char* localeID, + icu::ByteSink& sink, + UErrorCode* err); + /** * Writes a well-formed language tag for this locale ID. *