mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 05:25:34 +00:00
ICU-7882 Enable BCP47 ids and update a few test cases in ICU4C.
X-SVN-Rev: 28503
This commit is contained in:
parent
e9ef072967
commit
24055f8585
4 changed files with 106 additions and 59 deletions
|
@ -527,6 +527,17 @@ static const VariantMap VARIANT_MAP[] = {
|
|||
{ "STROKE", "collation", "stroke" } /* Solaris variant */
|
||||
};
|
||||
|
||||
/* ### BCP47 Conversion *******************************************/
|
||||
/* Test if the locale id has BCP47 u extension and does not have '@' */
|
||||
#define _hasBCP47Extension(id) (id && uprv_strstr(id, "-u-") != NULL && uprv_strstr(id, "@") == NULL)
|
||||
/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
|
||||
#define _ConvertBCP47(finalID, id, buffer, length,err) \
|
||||
if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
|
||||
finalID=id; \
|
||||
} else { \
|
||||
finalID=buffer; \
|
||||
}
|
||||
|
||||
/* ### Keywords **************************************************/
|
||||
|
||||
#define ULOC_KEYWORD_BUFFER_LEN 25
|
||||
|
@ -794,8 +805,16 @@ uloc_getKeywordValue(const char* localeID,
|
|||
int32_t result = 0;
|
||||
|
||||
if(status && U_SUCCESS(*status) && localeID) {
|
||||
char tempBuffer[ULOC_FULLNAME_CAPACITY];
|
||||
const char* tmpLocaleID;
|
||||
|
||||
if (_hasBCP47Extension(localeID)) {
|
||||
_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
|
||||
} else {
|
||||
tmpLocaleID=localeID;
|
||||
}
|
||||
|
||||
const char* startSearchHere = uprv_strchr(localeID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
|
||||
const char* startSearchHere = uprv_strchr(tmpLocaleID, '@'); /* TODO: REVISIT: shouldn't this be locale_getKeywordsStart ? */
|
||||
if(startSearchHere == NULL) {
|
||||
/* no keywords, return at once */
|
||||
return 0;
|
||||
|
@ -1504,36 +1523,44 @@ uloc_openKeywords(const char* localeID,
|
|||
int32_t i=0;
|
||||
char keywords[256];
|
||||
int32_t keywordsCapacity = 256;
|
||||
char tempBuffer[ULOC_FULLNAME_CAPACITY];
|
||||
const char* tmpLocaleID;
|
||||
|
||||
if(status==NULL || U_FAILURE(*status)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(localeID==NULL) {
|
||||
localeID=uloc_getDefault();
|
||||
if (_hasBCP47Extension(localeID)) {
|
||||
_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
|
||||
} else {
|
||||
if (localeID==NULL) {
|
||||
localeID=uloc_getDefault();
|
||||
}
|
||||
tmpLocaleID=localeID;
|
||||
}
|
||||
|
||||
/* Skip the language */
|
||||
ulocimp_getLanguage(localeID, NULL, 0, &localeID);
|
||||
if(_isIDSeparator(*localeID)) {
|
||||
ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
|
||||
if(_isIDSeparator(*tmpLocaleID)) {
|
||||
const char *scriptID;
|
||||
/* Skip the script if available */
|
||||
ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
|
||||
if(scriptID != localeID+1) {
|
||||
ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
|
||||
if(scriptID != tmpLocaleID+1) {
|
||||
/* Found optional script */
|
||||
localeID = scriptID;
|
||||
tmpLocaleID = scriptID;
|
||||
}
|
||||
/* Skip the Country */
|
||||
if (_isIDSeparator(*localeID)) {
|
||||
ulocimp_getCountry(localeID+1, NULL, 0, &localeID);
|
||||
if(_isIDSeparator(*localeID)) {
|
||||
_getVariant(localeID+1, *localeID, NULL, 0);
|
||||
if (_isIDSeparator(*tmpLocaleID)) {
|
||||
ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
|
||||
if(_isIDSeparator(*tmpLocaleID)) {
|
||||
_getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* keywords are located after '@' */
|
||||
if((localeID = locale_getKeywordsStart(localeID)) != NULL) {
|
||||
i=locale_getKeywords(localeID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
|
||||
if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
|
||||
i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
|
||||
}
|
||||
|
||||
if(i) {
|
||||
|
@ -1568,7 +1595,9 @@ _canonicalize(const char* localeID,
|
|||
UErrorCode* err) {
|
||||
int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
|
||||
char localeBuffer[ULOC_FULLNAME_CAPACITY];
|
||||
char tempBuffer[ULOC_FULLNAME_CAPACITY];
|
||||
const char* origLocaleID;
|
||||
const char* tmpLocaleID;
|
||||
const char* keywordAssign = NULL;
|
||||
const char* separatorIndicator = NULL;
|
||||
const char* addKeyword = NULL;
|
||||
|
@ -1580,10 +1609,16 @@ _canonicalize(const char* localeID,
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (localeID==NULL) {
|
||||
localeID=uloc_getDefault();
|
||||
if (_hasBCP47Extension(localeID)) {
|
||||
_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
|
||||
} else {
|
||||
if (localeID==NULL) {
|
||||
localeID=uloc_getDefault();
|
||||
}
|
||||
tmpLocaleID=localeID;
|
||||
}
|
||||
origLocaleID=localeID;
|
||||
|
||||
origLocaleID=tmpLocaleID;
|
||||
|
||||
/* if we are doing a full canonicalization, then put results in
|
||||
localeBuffer, if necessary; otherwise send them to result. */
|
||||
|
@ -1597,7 +1632,7 @@ _canonicalize(const char* localeID,
|
|||
}
|
||||
|
||||
/* get all pieces, one after another, and separate with '_' */
|
||||
len=ulocimp_getLanguage(localeID, name, nameCapacity, &localeID);
|
||||
len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
|
||||
|
||||
if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
|
||||
const char *d = uloc_getDefault();
|
||||
|
@ -1607,7 +1642,7 @@ _canonicalize(const char* localeID,
|
|||
if (name != NULL) {
|
||||
uprv_strncpy(name, d, len);
|
||||
}
|
||||
} else if(_isIDSeparator(*localeID)) {
|
||||
} else if(_isIDSeparator(*tmpLocaleID)) {
|
||||
const char *scriptID;
|
||||
|
||||
++fieldCount;
|
||||
|
@ -1616,13 +1651,13 @@ _canonicalize(const char* localeID,
|
|||
}
|
||||
++len;
|
||||
|
||||
scriptSize=ulocimp_getScript(localeID+1, name+len, nameCapacity-len, &scriptID);
|
||||
scriptSize=ulocimp_getScript(tmpLocaleID+1, name+len, nameCapacity-len, &scriptID);
|
||||
if(scriptSize > 0) {
|
||||
/* Found optional script */
|
||||
localeID = scriptID;
|
||||
tmpLocaleID = scriptID;
|
||||
++fieldCount;
|
||||
len+=scriptSize;
|
||||
if (_isIDSeparator(*localeID)) {
|
||||
if (_isIDSeparator(*tmpLocaleID)) {
|
||||
/* If there is something else, then we add the _ */
|
||||
if(len<nameCapacity) {
|
||||
name[len]='_';
|
||||
|
@ -1631,15 +1666,15 @@ _canonicalize(const char* localeID,
|
|||
}
|
||||
}
|
||||
|
||||
if (_isIDSeparator(*localeID)) {
|
||||
if (_isIDSeparator(*tmpLocaleID)) {
|
||||
const char *cntryID;
|
||||
int32_t cntrySize = ulocimp_getCountry(localeID+1, name+len, nameCapacity-len, &cntryID);
|
||||
int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1, name+len, nameCapacity-len, &cntryID);
|
||||
if (cntrySize > 0) {
|
||||
/* Found optional country */
|
||||
localeID = cntryID;
|
||||
tmpLocaleID = cntryID;
|
||||
len+=cntrySize;
|
||||
}
|
||||
if(_isIDSeparator(*localeID)) {
|
||||
if(_isIDSeparator(*tmpLocaleID)) {
|
||||
/* If there is something else, then we add the _ if we found country before.*/
|
||||
if (cntrySize > 0) {
|
||||
++fieldCount;
|
||||
|
@ -1649,21 +1684,21 @@ _canonicalize(const char* localeID,
|
|||
++len;
|
||||
}
|
||||
|
||||
variantSize = _getVariant(localeID+1, *localeID, name+len, nameCapacity-len);
|
||||
variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID, name+len, nameCapacity-len);
|
||||
if (variantSize > 0) {
|
||||
variant = name+len;
|
||||
len += variantSize;
|
||||
localeID += variantSize + 1; /* skip '_' and variant */
|
||||
tmpLocaleID += variantSize + 1; /* skip '_' and variant */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Copy POSIX-style charset specifier, if any [mr.utf8] */
|
||||
if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *localeID == '.') {
|
||||
if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
|
||||
UBool done = FALSE;
|
||||
do {
|
||||
char c = *localeID;
|
||||
char c = *tmpLocaleID;
|
||||
switch (c) {
|
||||
case 0:
|
||||
case '@':
|
||||
|
@ -1674,24 +1709,24 @@ _canonicalize(const char* localeID,
|
|||
name[len] = c;
|
||||
}
|
||||
++len;
|
||||
++localeID;
|
||||
++tmpLocaleID;
|
||||
break;
|
||||
}
|
||||
} while (!done);
|
||||
}
|
||||
|
||||
/* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
|
||||
After this, localeID either points to '@' or is NULL */
|
||||
if ((localeID=locale_getKeywordsStart(localeID))!=NULL) {
|
||||
keywordAssign = uprv_strchr(localeID, '=');
|
||||
separatorIndicator = uprv_strchr(localeID, ';');
|
||||
After this, tmpLocaleID either points to '@' or is NULL */
|
||||
if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
|
||||
keywordAssign = uprv_strchr(tmpLocaleID, '=');
|
||||
separatorIndicator = uprv_strchr(tmpLocaleID, ';');
|
||||
}
|
||||
|
||||
/* Copy POSIX-style variant, if any [mr@FOO] */
|
||||
if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
|
||||
localeID != NULL && keywordAssign == NULL) {
|
||||
tmpLocaleID != NULL && keywordAssign == NULL) {
|
||||
for (;;) {
|
||||
char c = *localeID;
|
||||
char c = *tmpLocaleID;
|
||||
if (c == 0) {
|
||||
break;
|
||||
}
|
||||
|
@ -1699,13 +1734,13 @@ _canonicalize(const char* localeID,
|
|||
name[len] = c;
|
||||
}
|
||||
++len;
|
||||
++localeID;
|
||||
++tmpLocaleID;
|
||||
}
|
||||
}
|
||||
|
||||
if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
|
||||
/* Handle @FOO variant if @ is present and not followed by = */
|
||||
if (localeID!=NULL && keywordAssign==NULL) {
|
||||
if (tmpLocaleID!=NULL && keywordAssign==NULL) {
|
||||
int32_t posixVariantSize;
|
||||
/* Add missing '_' if needed */
|
||||
if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
|
||||
|
@ -1717,7 +1752,7 @@ _canonicalize(const char* localeID,
|
|||
++fieldCount;
|
||||
} while(fieldCount<2);
|
||||
}
|
||||
posixVariantSize = _getVariantEx(localeID+1, '@', name+len, nameCapacity-len,
|
||||
posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
|
||||
(UBool)(variantSize > 0));
|
||||
if (posixVariantSize > 0) {
|
||||
if (variant == NULL) {
|
||||
|
@ -1754,7 +1789,7 @@ _canonicalize(const char* localeID,
|
|||
const char* id = CANONICALIZE_MAP[j].id;
|
||||
int32_t n = (int32_t)uprv_strlen(id);
|
||||
if (len == n && uprv_strncmp(name, id, n) == 0) {
|
||||
if (n == 0 && localeID != NULL) {
|
||||
if (n == 0 && tmpLocaleID != NULL) {
|
||||
break; /* Don't remap "" if keywords present */
|
||||
}
|
||||
len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
|
||||
|
@ -1768,14 +1803,14 @@ _canonicalize(const char* localeID,
|
|||
}
|
||||
|
||||
if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
|
||||
if (localeID!=NULL && keywordAssign!=NULL &&
|
||||
if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
|
||||
(!separatorIndicator || separatorIndicator > keywordAssign)) {
|
||||
if(len<nameCapacity) {
|
||||
name[len]='@';
|
||||
}
|
||||
++len;
|
||||
++fieldCount;
|
||||
len += _getKeywords(localeID+1, '@', name+len, nameCapacity-len, NULL, 0, NULL, TRUE,
|
||||
len += _getKeywords(tmpLocaleID+1, '@', name+len, nameCapacity-len, NULL, 0, NULL, TRUE,
|
||||
addKeyword, addValue, err);
|
||||
} else if (addKeyword != NULL) {
|
||||
U_ASSERT(addValue != NULL);
|
||||
|
@ -1908,40 +1943,47 @@ uloc_getVariant(const char* localeID,
|
|||
int32_t variantCapacity,
|
||||
UErrorCode* err)
|
||||
{
|
||||
char tempBuffer[ULOC_FULLNAME_CAPACITY];
|
||||
const char* tmpLocaleID;
|
||||
int32_t i=0;
|
||||
|
||||
if(err==NULL || U_FAILURE(*err)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(localeID==NULL) {
|
||||
localeID=uloc_getDefault();
|
||||
if (_hasBCP47Extension(localeID)) {
|
||||
_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
|
||||
} else {
|
||||
if (localeID==NULL) {
|
||||
localeID=uloc_getDefault();
|
||||
}
|
||||
tmpLocaleID=localeID;
|
||||
}
|
||||
|
||||
/* Skip the language */
|
||||
ulocimp_getLanguage(localeID, NULL, 0, &localeID);
|
||||
if(_isIDSeparator(*localeID)) {
|
||||
ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
|
||||
if(_isIDSeparator(*tmpLocaleID)) {
|
||||
const char *scriptID;
|
||||
/* Skip the script if available */
|
||||
ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
|
||||
if(scriptID != localeID+1) {
|
||||
ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
|
||||
if(scriptID != tmpLocaleID+1) {
|
||||
/* Found optional script */
|
||||
localeID = scriptID;
|
||||
tmpLocaleID = scriptID;
|
||||
}
|
||||
/* Skip the Country */
|
||||
if (_isIDSeparator(*localeID)) {
|
||||
if (_isIDSeparator(*tmpLocaleID)) {
|
||||
const char *cntryID;
|
||||
ulocimp_getCountry(localeID+1, NULL, 0, &cntryID);
|
||||
if (cntryID != localeID+1) {
|
||||
ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
|
||||
if (cntryID != tmpLocaleID+1) {
|
||||
/* Found optional country */
|
||||
localeID = cntryID;
|
||||
tmpLocaleID = cntryID;
|
||||
}
|
||||
if(_isIDSeparator(*localeID)) {
|
||||
if(_isIDSeparator(*tmpLocaleID)) {
|
||||
/* If there was no country ID, skip a possible extra IDSeparator */
|
||||
if (localeID != cntryID && _isIDSeparator(localeID[1])) {
|
||||
localeID++;
|
||||
if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
|
||||
tmpLocaleID++;
|
||||
}
|
||||
i=_getVariant(localeID+1, *localeID, variant, variantCapacity);
|
||||
i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -71,6 +71,7 @@ static const UCalGetTypeTest ucalGetTypeTests[] = {
|
|||
{ "ja_JP@calendar=japanese", UCAL_DEFAULT, "japanese" },
|
||||
{ "th_TH", UCAL_GREGORIAN, "gregorian" },
|
||||
{ "th_TH", UCAL_DEFAULT, "buddhist" },
|
||||
{ "th-TH-u-ca-gregory", UCAL_DEFAULT, "gregorian" },
|
||||
{ "ja_JP@calendar=japanese", UCAL_GREGORIAN, "gregorian" },
|
||||
{ "", UCAL_GREGORIAN, "gregorian" },
|
||||
{ NULL, UCAL_GREGORIAN, "gregorian" },
|
||||
|
|
|
@ -108,6 +108,7 @@ IntlCalendarTest::TestTypes()
|
|||
"th_TH", // Default calendar for th_TH is buddhist
|
||||
"th", // th's default region is TH and buddhist is used as default for TH
|
||||
"en_TH", // Default calendar for any locales with region TH is buddhist
|
||||
"en-TH-u-ca-gregory",
|
||||
NULL };
|
||||
const char *types[40] = { "gregorian",
|
||||
"japanese",
|
||||
|
@ -121,6 +122,7 @@ IntlCalendarTest::TestTypes()
|
|||
"buddhist",
|
||||
"buddhist",
|
||||
"buddhist",
|
||||
"gregorian",
|
||||
NULL };
|
||||
|
||||
for(j=0;locs[j];j++) {
|
||||
|
|
|
@ -694,7 +694,9 @@ static const char* testCases[][2]= {
|
|||
{"gl_ES_PREEURO", "1.150\\u00A0\\u20A7" },
|
||||
{"it_IT_PREEURO", "IT\\u20A4\\u00A01.150" },
|
||||
{"pt_PT_PREEURO", "1,150$50\\u00A0Esc."},
|
||||
{"en_US@currency=JPY", "\\u00A51,150"}
|
||||
{"en_US@currency=JPY", "\\u00A51,150"},
|
||||
{"en_US@currency=jpy", "\\u00A51,150"},
|
||||
{"en-US-u-cu-jpy", "\\u00A51,150"}
|
||||
};
|
||||
/**
|
||||
* Test localized currency patterns.
|
||||
|
|
Loading…
Add table
Reference in a new issue