ICU-20803 Pass ByteSink to _canonicalize().

This eliminates the need for the fixed size scratch buffer inside of
locale_set_default_internal() and also eliminates the need for counting
bytes, something that ByteSink and CharString now will handle correctly,
when needed.

None of this should have any externally visible effect (apart from
removing the arbitrary size limit imposed by the fixed size scratch
buffer), it's all about cleaning up implementation internals.
This commit is contained in:
Fredrik Roubert 2020-03-12 22:45:00 +01:00 committed by Fredrik Roubert
parent 20c29becd6
commit 879f6728f0
3 changed files with 290 additions and 263 deletions
icu4c/source/common

View file

@ -105,7 +105,6 @@ typedef enum ELocalePos {
U_CFUNC int32_t locale_getKeywords(const char *localeID,
char prev,
char *keywords, int32_t keywordCapacity,
char *values, int32_t valuesCapacity, int32_t *valLen,
UBool valuesToo,
UErrorCode *status);
@ -185,17 +184,16 @@ Locale *locale_set_default_internal(const char *id, UErrorCode& status) {
canonicalize = TRUE; // always canonicalize host ID
}
char localeNameBuf[512];
if (canonicalize) {
uloc_canonicalize(id, localeNameBuf, sizeof(localeNameBuf)-1, &status);
} else {
uloc_getName(id, localeNameBuf, sizeof(localeNameBuf)-1, &status);
CharString localeNameBuf;
{
CharStringByteSink sink(&localeNameBuf);
if (canonicalize) {
ulocimp_canonicalize(id, sink, &status);
} else {
ulocimp_getName(id, sink, &status);
}
}
localeNameBuf[sizeof(localeNameBuf)-1] = 0; // Force null termination in event of
// a long name filling the buffer.
// (long names are truncated.)
//
if (U_FAILURE(status)) {
return gDefaultLocale;
}
@ -209,14 +207,14 @@ Locale *locale_set_default_internal(const char *id, UErrorCode& status) {
ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
}
Locale *newDefault = (Locale *)uhash_get(gDefaultLocalesHashT, localeNameBuf);
Locale *newDefault = (Locale *)uhash_get(gDefaultLocalesHashT, localeNameBuf.data());
if (newDefault == NULL) {
newDefault = new Locale(Locale::eBOGUS);
if (newDefault == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return gDefaultLocale;
}
newDefault->init(localeNameBuf, FALSE);
newDefault->init(localeNameBuf.data(), FALSE);
uhash_put(gDefaultLocalesHashT, (char*) newDefault->getName(), newDefault, &status);
if (U_FAILURE(status)) {
return gDefaultLocale;
@ -1428,7 +1426,7 @@ Locale::createKeywords(UErrorCode &status) const
const char* assignment = uprv_strchr(fullName, '=');
if(variantStart) {
if(assignment > variantStart) {
int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status);
int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, FALSE, &status);
if(U_SUCCESS(status) && keyLen) {
result = new KeywordEnumeration(keywords, keyLen, 0, status);
if (!result) {
@ -1457,7 +1455,7 @@ Locale::createUnicodeKeywords(UErrorCode &status) const
const char* assignment = uprv_strchr(fullName, '=');
if(variantStart) {
if(assignment > variantStart) {
int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status);
int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, FALSE, &status);
if(U_SUCCESS(status) && keyLen) {
result = new UnicodeKeywordEnumeration(keywords, keyLen, 0, status);
if (!result) {

View file

@ -30,10 +30,14 @@
l = lang, C = ctry, M = charmap, V = variant
*/
#include "unicode/bytestream.h"
#include "unicode/errorcode.h"
#include "unicode/stringpiece.h"
#include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "unicode/uloc.h"
#include "bytesinkutil.h"
#include "putilimp.h"
#include "ustr_imp.h"
#include "ulocimp.h"
@ -46,6 +50,7 @@
#include "uassert.h"
#include "charstr.h"
#include <algorithm>
#include <stdio.h> /* for sprintf */
U_NAMESPACE_USE
@ -59,7 +64,6 @@ U_CFUNC int32_t
locale_getKeywords(const char *localeID,
char prev,
char *keywords, int32_t keywordCapacity,
char *values, int32_t valuesCapacity, int32_t *valLen,
UBool valuesToo,
UErrorCode *status);
@ -597,11 +601,10 @@ compareKeywordStructs(const void * /*context*/, const void *left, const void *ri
return uprv_strcmp(leftString, rightString);
}
static int32_t
static void
_getKeywords(const char *localeID,
char prev,
char *keywords, int32_t keywordCapacity,
char *values, int32_t valuesCapacity, int32_t *valLen,
ByteSink& sink,
UBool valuesToo,
UErrorCode *status)
{
@ -613,8 +616,6 @@ _getKeywords(const char *localeID,
const char* equalSign = NULL;
const char* semicolon = NULL;
int32_t i = 0, j, n;
int32_t keywordsLen = 0;
int32_t valuesLen = 0;
if(prev == '@') { /* start of keyword definition */
/* we will grab pairs, trim spaces, lowercase keywords, sort and return */
@ -629,7 +630,7 @@ _getKeywords(const char *localeID,
}
if(numKeywords == maxKeywords) {
*status = U_INTERNAL_PROGRAM_ERROR;
return 0;
return;
}
equalSign = uprv_strchr(pos, '=');
semicolon = uprv_strchr(pos, ';');
@ -637,13 +638,13 @@ _getKeywords(const char *localeID,
/* ';' before '=' [foo@currency;collation=pinyin] is illegal */
if(!equalSign || (semicolon && semicolon<equalSign)) {
*status = U_INVALID_FORMAT_ERROR;
return 0;
return;
}
/* need to normalize both keyword and keyword name */
if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
/* keyword name too long for internal buffer */
*status = U_INTERNAL_PROGRAM_ERROR;
return 0;
return;
}
for(i = 0, n = 0; i < equalSign - pos; ++i) {
if (pos[i] != ' ') {
@ -654,7 +655,7 @@ _getKeywords(const char *localeID,
/* zero-length keyword is an error. */
if (n == 0) {
*status = U_INVALID_FORMAT_ERROR;
return 0;
return;
}
keywordList[numKeywords].keyword[n] = 0;
@ -669,7 +670,7 @@ _getKeywords(const char *localeID,
/* Premature end or zero-length value */
if (!*equalSign || equalSign == semicolon) {
*status = U_INVALID_FORMAT_ERROR;
return 0;
return;
}
keywordList[numKeywords].valueStart = equalSign;
@ -707,45 +708,17 @@ _getKeywords(const char *localeID,
/* Now construct the keyword part */
for(i = 0; i < numKeywords; i++) {
if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
if(valuesToo) {
keywords[keywordsLen + keywordList[i].keywordLen] = '=';
} else {
keywords[keywordsLen + keywordList[i].keywordLen] = 0;
}
}
keywordsLen += keywordList[i].keywordLen + 1;
sink.Append(keywordList[i].keyword, keywordList[i].keywordLen);
if(valuesToo) {
if(keywordsLen + keywordList[i].valueLen <= keywordCapacity) {
uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
}
keywordsLen += keywordList[i].valueLen;
sink.Append("=", 1);
sink.Append(keywordList[i].valueStart, keywordList[i].valueLen);
if(i < numKeywords - 1) {
if(keywordsLen < keywordCapacity) {
keywords[keywordsLen] = ';';
}
keywordsLen++;
sink.Append(";", 1);
}
}
if(values) {
if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
values[valuesLen + keywordList[i].valueLen] = 0;
}
valuesLen += keywordList[i].valueLen + 1;
} else {
sink.Append("\0", 1);
}
}
if(values) {
values[valuesLen] = 0;
if(valLen) {
*valLen = valuesLen;
}
}
return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
} else {
return 0;
}
}
@ -753,12 +726,28 @@ U_CFUNC int32_t
locale_getKeywords(const char *localeID,
char prev,
char *keywords, int32_t keywordCapacity,
char *values, int32_t valuesCapacity, int32_t *valLen,
UBool valuesToo,
UErrorCode *status) {
return _getKeywords(localeID, prev, keywords, keywordCapacity,
values, valuesCapacity, valLen, valuesToo,
status);
if (U_FAILURE(*status)) {
return 0;
}
CheckedArrayByteSink sink(keywords, keywordCapacity);
_getKeywords(localeID, prev, sink, valuesToo, status);
int32_t reslen = sink.NumberOfBytesAppended();
if (U_FAILURE(*status)) {
return reslen;
}
if (sink.Overflowed()) {
*status = U_BUFFER_OVERFLOW_ERROR;
} else {
u_terminateChars(keywords, keywordCapacity, reslen, status);
}
return reslen;
}
U_CAPI int32_t U_EXPORT2
@ -1135,26 +1124,6 @@ static int16_t _findIndex(const char* const* list, const char* key)
return -1;
}
/* count the length of src while copying it to dest; return strlen(src) */
static inline int32_t
_copyCount(char *dest, int32_t destCapacity, const char *src) {
const char *anchor;
char c;
anchor=src;
for(;;) {
if((c=*src)==0) {
return (int32_t)(src-anchor);
}
if(destCapacity<=0) {
return (int32_t)((src-anchor)+uprv_strlen(src));
}
++src;
*dest++=c;
--destCapacity;
}
}
U_CFUNC const char*
uloc_getCurrentCountryID(const char* oldID){
int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
@ -1179,13 +1148,11 @@ uloc_getCurrentLanguageID(const char* oldID){
*
* TODO try to use this in Locale
*/
U_CFUNC int32_t
static CharString
ulocimp_getLanguage(const char *localeID,
char *language, int32_t languageCapacity,
const char **pEnd) {
int32_t i=0;
int32_t offset;
char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
const char **pEnd,
UErrorCode &status) {
CharString result;
if (uprv_stricmp(localeID, "root") == 0) {
localeID += 4;
@ -1199,48 +1166,52 @@ ulocimp_getLanguage(const char *localeID,
/* if it starts with i- or x- then copy that prefix */
if(_isIDPrefix(localeID)) {
if(i<languageCapacity) {
language[i]=(char)uprv_tolower(*localeID);
}
if(i<languageCapacity) {
language[i+1]='-';
}
i+=2;
result.append((char)uprv_tolower(*localeID), status);
result.append('-', status);
localeID+=2;
}
/* copy the language as far as possible and count its length */
while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
if(i<languageCapacity) {
language[i]=(char)uprv_tolower(*localeID);
}
if(i<3) {
U_ASSERT(i>=0);
lang[i]=(char)uprv_tolower(*localeID);
}
i++;
result.append((char)uprv_tolower(*localeID), status);
localeID++;
}
if(i==3) {
if(result.length()==3) {
/* convert 3 character code to 2 character code if possible *CWB*/
offset=_findIndex(LANGUAGES_3, lang);
int32_t offset = _findIndex(LANGUAGES_3, result.data());
if(offset>=0) {
i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
result.clear();
result.append(LANGUAGES[offset], status);
}
}
if(pEnd!=NULL) {
*pEnd=localeID;
}
return i;
return result;
}
U_CFUNC int32_t
ulocimp_getLanguage(const char *localeID,
char *language, int32_t languageCapacity,
const char **pEnd) {
ErrorCode status;
CharString result = ulocimp_getLanguage(localeID, pEnd, status);
if (status.isFailure()) {
return 0;
}
int32_t reslen = result.length();
uprv_memcpy(language, result.data(), std::min(reslen, languageCapacity));
return reslen;
}
static CharString
ulocimp_getScript(const char *localeID,
char *script, int32_t scriptCapacity,
const char **pEnd)
{
const char **pEnd,
UErrorCode &status) {
CharString result;
int32_t idLen = 0;
if (pEnd != NULL) {
@ -1259,132 +1230,137 @@ ulocimp_getScript(const char *localeID,
if (pEnd != NULL) {
*pEnd = localeID+idLen;
}
if(idLen > scriptCapacity) {
idLen = scriptCapacity;
}
if (idLen >= 1) {
script[0]=(char)uprv_toupper(*(localeID++));
result.append((char)uprv_toupper(*(localeID++)), status);
}
for (i = 1; i < idLen; i++) {
script[i]=(char)uprv_tolower(*(localeID++));
result.append((char)uprv_tolower(*(localeID++)), status);
}
}
else {
idLen = 0;
}
return idLen;
return result;
}
U_CFUNC int32_t
ulocimp_getScript(const char *localeID,
char *script, int32_t scriptCapacity,
const char **pEnd) {
ErrorCode status;
CharString result = ulocimp_getScript(localeID, pEnd, status);
if (status.isFailure()) {
return 0;
}
int32_t reslen = result.length();
uprv_memcpy(script, result.data(), std::min(reslen, scriptCapacity));
return reslen;
}
static CharString
ulocimp_getCountry(const char *localeID,
char *country, int32_t countryCapacity,
const char **pEnd)
{
const char **pEnd,
UErrorCode &status) {
CharString result;
int32_t idLen=0;
char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
int32_t offset;
/* copy the country as far as possible and count its length */
while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
if(idLen<(ULOC_COUNTRY_CAPACITY-1)) { /*CWB*/
cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
}
result.append((char)uprv_toupper(localeID[idLen]), status);
idLen++;
}
/* the country should be either length 2 or 3 */
if (idLen == 2 || idLen == 3) {
UBool gotCountry = FALSE;
/* convert 3 character code to 2 character code if possible *CWB*/
if(idLen==3) {
offset=_findIndex(COUNTRIES_3, cnty);
int32_t offset = _findIndex(COUNTRIES_3, result.data());
if(offset>=0) {
idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
gotCountry = TRUE;
}
}
if (!gotCountry) {
int32_t i = 0;
for (i = 0; i < idLen; i++) {
if (i < countryCapacity) {
country[i]=(char)uprv_toupper(localeID[i]);
}
result.clear();
result.append(COUNTRIES[offset], status);
}
}
localeID+=idLen;
} else {
idLen = 0;
result.clear();
}
if(pEnd!=NULL) {
*pEnd=localeID;
}
return idLen;
return result;
}
U_CFUNC int32_t
ulocimp_getCountry(const char *localeID,
char *country, int32_t countryCapacity,
const char **pEnd) {
ErrorCode status;
CharString result = ulocimp_getCountry(localeID, pEnd, status);
if (status.isFailure()) {
return 0;
}
int32_t reslen = result.length();
uprv_memcpy(country, result.data(), std::min(reslen, countryCapacity));
return reslen;
}
/**
* @param needSeparator if true, then add leading '_' if any variants
* are added to 'variant'
*/
static int32_t
static void
_getVariantEx(const char *localeID,
char prev,
char *variant, int32_t variantCapacity,
ByteSink& sink,
UBool needSeparator) {
int32_t i=0;
UBool hasVariant = FALSE;
/* get one or more variant tags and separate them with '_' */
if(_isIDSeparator(prev)) {
/* get a variant string after a '-' or '_' */
while(!_isTerminator(*localeID)) {
if (needSeparator) {
if (i<variantCapacity) {
variant[i] = '_';
}
++i;
sink.Append("_", 1);
needSeparator = FALSE;
}
if(i<variantCapacity) {
variant[i]=(char)uprv_toupper(*localeID);
if(variant[i]=='-') {
variant[i]='_';
}
}
i++;
char c = (char)uprv_toupper(*localeID);
if (c == '-') c = '_';
sink.Append(&c, 1);
hasVariant = TRUE;
localeID++;
}
}
/* if there is no variant tag after a '-' or '_' then look for '@' */
if(i==0) {
if(!hasVariant) {
if(prev=='@') {
/* keep localeID */
} else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
++localeID; /* point after the '@' */
} else {
return 0;
return;
}
while(!_isTerminator(*localeID)) {
if (needSeparator) {
if (i<variantCapacity) {
variant[i] = '_';
}
++i;
sink.Append("_", 1);
needSeparator = FALSE;
}
if(i<variantCapacity) {
variant[i]=(char)uprv_toupper(*localeID);
if(variant[i]=='-' || variant[i]==',') {
variant[i]='_';
}
}
i++;
char c = (char)uprv_toupper(*localeID);
if (c == '-' || c == ',') c = '_';
sink.Append(&c, 1);
localeID++;
}
}
}
return i;
static int32_t
_getVariantEx(const char *localeID,
char prev,
char *variant, int32_t variantCapacity,
UBool needSeparator) {
CheckedArrayByteSink sink(variant, variantCapacity);
_getVariantEx(localeID, prev, sink, needSeparator);
return sink.NumberOfBytesAppended();
}
static int32_t
@ -1530,7 +1506,7 @@ uloc_openKeywords(const char* localeID,
/* keywords are located after '@' */
if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, FALSE, status);
}
if(i) {
@ -1557,24 +1533,20 @@ static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
*
* This is the code underlying uloc_getName and uloc_canonicalize.
*/
static int32_t
static void
_canonicalize(const char* localeID,
char* result,
int32_t resultCapacity,
ByteSink& sink,
uint32_t options,
UErrorCode* err) {
int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
char localeBuffer[ULOC_FULLNAME_CAPACITY];
int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
char tempBuffer[ULOC_FULLNAME_CAPACITY];
const char* origLocaleID;
const char* tmpLocaleID;
const char* keywordAssign = NULL;
const char* separatorIndicator = NULL;
char* name;
char* variant = NULL; /* pointer into name, or NULL */
if (U_FAILURE(*err)) {
return 0;
return;
}
if (_hasBCP47Extension(localeID)) {
@ -1588,77 +1560,55 @@ _canonicalize(const char* localeID,
origLocaleID=tmpLocaleID;
/* if we are doing a full canonicalization, then put results in
localeBuffer, if necessary; otherwise send them to result. */
if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
(result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
name = localeBuffer;
nameCapacity = (int32_t)sizeof(localeBuffer);
} else {
name = result;
nameCapacity = resultCapacity;
}
/* get all pieces, one after another, and separate with '_' */
len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
CharString tag = ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
const char *d = uloc_getDefault();
len = (int32_t)uprv_strlen(d);
if (name != NULL) {
uprv_memcpy(name, d, len);
}
if (tag.length() == I_DEFAULT_LENGTH &&
uprv_strncmp(origLocaleID, i_default, I_DEFAULT_LENGTH) == 0) {
tag.clear();
tag.append(uloc_getDefault(), *err);
} else if(_isIDSeparator(*tmpLocaleID)) {
const char *scriptID;
++fieldCount;
if(len<nameCapacity) {
name[len]='_';
}
++len;
tag.append('_', *err);
scriptSize=ulocimp_getScript(tmpLocaleID+1,
(len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
CharString script = ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
tag.append(script, *err);
scriptSize = script.length();
if(scriptSize > 0) {
/* Found optional script */
tmpLocaleID = scriptID;
++fieldCount;
len+=scriptSize;
if (_isIDSeparator(*tmpLocaleID)) {
/* If there is something else, then we add the _ */
if(len<nameCapacity) {
name[len]='_';
}
++len;
tag.append('_', *err);
}
}
if (_isIDSeparator(*tmpLocaleID)) {
const char *cntryID;
int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
(len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
if (cntrySize > 0) {
CharString country = ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
tag.append(country, *err);
if (!country.isEmpty()) {
/* Found optional country */
tmpLocaleID = cntryID;
len+=cntrySize;
}
if(_isIDSeparator(*tmpLocaleID)) {
/* If there is something else, then we add the _ if we found country before. */
if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
if (!_isIDSeparator(*(tmpLocaleID+1))) {
++fieldCount;
if(len<nameCapacity) {
name[len]='_';
}
++len;
tag.append('_', *err);
}
variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
(len<nameCapacity ? name+len : NULL), nameCapacity-len);
variantSize = -tag.length();
{
CharStringByteSink s(&tag);
_getVariantEx(tmpLocaleID+1, *tmpLocaleID, s, FALSE);
}
variantSize += tag.length();
if (variantSize > 0) {
variant = len<nameCapacity ? name+len : NULL;
len += variantSize;
tmpLocaleID += variantSize + 1; /* skip '_' and variant */
}
}
@ -1676,10 +1626,7 @@ _canonicalize(const char* localeID,
done = TRUE;
break;
default:
if (len<nameCapacity) {
name[len] = c;
}
++len;
tag.append(c, *err);
++tmpLocaleID;
break;
}
@ -1701,10 +1648,7 @@ _canonicalize(const char* localeID,
if (c == 0) {
break;
}
if (len<nameCapacity) {
name[len] = c;
}
++len;
tag.append(c, *err);
++tmpLocaleID;
}
}
@ -1712,60 +1656,49 @@ _canonicalize(const char* localeID,
if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
/* Handle @FOO variant if @ is present and not followed by = */
if (tmpLocaleID!=NULL && keywordAssign==NULL) {
int32_t posixVariantSize;
/* Add missing '_' if needed */
if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
do {
if(len<nameCapacity) {
name[len]='_';
}
++len;
tag.append('_', *err);
++fieldCount;
} while(fieldCount<2);
}
posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
(UBool)(variantSize > 0));
int32_t posixVariantSize = -tag.length();
{
CharStringByteSink s(&tag);
_getVariantEx(tmpLocaleID+1, '@', s, (UBool)(variantSize > 0));
}
posixVariantSize += tag.length();
if (posixVariantSize > 0) {
if (variant == NULL) {
variant = name+len;
}
len += posixVariantSize;
variantSize += posixVariantSize;
}
}
/* Look up the ID in the canonicalization map */
for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
const char* id = CANONICALIZE_MAP[j].id;
int32_t n = (int32_t)uprv_strlen(id);
if (len == n && uprv_strncmp(name, id, n) == 0) {
if (n == 0 && tmpLocaleID != NULL) {
StringPiece id(CANONICALIZE_MAP[j].id);
if (tag == id) {
if (id.empty() && tmpLocaleID != NULL) {
break; /* Don't remap "" if keywords present */
}
len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
tag.clear();
tag.append(CANONICALIZE_MAP[j].canonicalID, *err);
break;
}
}
}
sink.Append(tag.data(), tag.length());
if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
(!separatorIndicator || separatorIndicator > keywordAssign)) {
if(len<nameCapacity) {
name[len]='@';
}
++len;
sink.Append("@", 1);
++fieldCount;
len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
NULL, 0, NULL, TRUE, err);
_getKeywords(tmpLocaleID+1, '@', sink, TRUE, err);
}
}
if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
}
return u_terminateChars(result, resultCapacity, len, err);
}
/* ### ID parsing API **************************************************/
@ -1950,7 +1883,34 @@ uloc_getName(const char* localeID,
int32_t nameCapacity,
UErrorCode* err)
{
return _canonicalize(localeID, name, nameCapacity, 0, err);
if (U_FAILURE(*err)) {
return 0;
}
CheckedArrayByteSink sink(name, nameCapacity);
ulocimp_getName(localeID, sink, err);
int32_t reslen = sink.NumberOfBytesAppended();
if (U_FAILURE(*err)) {
return reslen;
}
if (sink.Overflowed()) {
*err = U_BUFFER_OVERFLOW_ERROR;
} else {
u_terminateChars(name, nameCapacity, reslen, err);
}
return reslen;
}
U_STABLE void U_EXPORT2
ulocimp_getName(const char* localeID,
ByteSink& sink,
UErrorCode* err)
{
_canonicalize(localeID, sink, 0, err);
}
U_CAPI int32_t U_EXPORT2
@ -1959,7 +1919,34 @@ uloc_getBaseName(const char* localeID,
int32_t nameCapacity,
UErrorCode* err)
{
return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
if (U_FAILURE(*err)) {
return 0;
}
CheckedArrayByteSink sink(name, nameCapacity);
ulocimp_getBaseName(localeID, sink, err);
int32_t reslen = sink.NumberOfBytesAppended();
if (U_FAILURE(*err)) {
return reslen;
}
if (sink.Overflowed()) {
*err = U_BUFFER_OVERFLOW_ERROR;
} else {
u_terminateChars(name, nameCapacity, reslen, err);
}
return reslen;
}
U_STABLE void U_EXPORT2
ulocimp_getBaseName(const char* localeID,
ByteSink& sink,
UErrorCode* err)
{
_canonicalize(localeID, sink, _ULOC_STRIP_KEYWORDS, err);
}
U_CAPI int32_t U_EXPORT2
@ -1968,7 +1955,34 @@ uloc_canonicalize(const char* localeID,
int32_t nameCapacity,
UErrorCode* err)
{
return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
if (U_FAILURE(*err)) {
return 0;
}
CheckedArrayByteSink sink(name, nameCapacity);
ulocimp_canonicalize(localeID, sink, err);
int32_t reslen = sink.NumberOfBytesAppended();
if (U_FAILURE(*err)) {
return reslen;
}
if (sink.Overflowed()) {
*err = U_BUFFER_OVERFLOW_ERROR;
} else {
u_terminateChars(name, nameCapacity, reslen, err);
}
return reslen;
}
U_STABLE void U_EXPORT2
ulocimp_canonicalize(const char* localeID,
ByteSink& sink,
UErrorCode* err)
{
_canonicalize(localeID, sink, _ULOC_CANONICALIZE, err);
}
U_CAPI const char* U_EXPORT2

View file

@ -62,6 +62,21 @@ ulocimp_getCountry(const char *localeID,
char *country, int32_t countryCapacity,
const char **pEnd);
U_STABLE void U_EXPORT2
ulocimp_getName(const char* localeID,
icu::ByteSink& sink,
UErrorCode* err);
U_STABLE void U_EXPORT2
ulocimp_getBaseName(const char* localeID,
icu::ByteSink& sink,
UErrorCode* err);
U_STABLE void U_EXPORT2
ulocimp_canonicalize(const char* localeID,
icu::ByteSink& sink,
UErrorCode* err);
/**
* Writes a well-formed language tag for this locale ID.
*