ICU-7945 Merge BCP47 update in branch into trunk

X-SVN-Rev: 29813
This commit is contained in:
Michael Ow 2011-04-17 22:41:14 +00:00
parent 6d21ef8ca3
commit b33c3a62d0
2 changed files with 576 additions and 119 deletions

View file

@ -22,6 +22,12 @@ typedef struct VariantListEntry {
struct VariantListEntry *next;
} VariantListEntry;
/* struct holding a single attribute value */
typedef struct AttributeListEntry {
const char *attribute;
struct AttributeListEntry *next;
} AttributeListEntry;
/* struct holding a single extension */
typedef struct ExtensionListEntry {
const char *key;
@ -38,6 +44,7 @@ typedef struct ULanguageTag {
const char *region;
VariantListEntry *variants;
ExtensionListEntry *extensions;
AttributeListEntry *attributes;
const char *privateuse;
const char *grandfathered;
} ULanguageTag;
@ -61,22 +68,24 @@ static const char* PRIVATEUSE_KEY = "x";
static const char* _POSIX = "_POSIX";
static const char* POSIX_KEY = "va";
static const char* POSIX_VALUE = "posix";
static const char* LOCALE_ATTRIBUTE_KEY = "attribute";
static const char* PRIVUSE_VARIANT_PREFIX = "lvariant";
#define LANG_UND_LEN 3
static const char* GRANDFATHERED[] = {
/* grandfathered preferred */
"art-lojban", "jbo",
"cel-gaulish", "",
"en-GB-oed", "",
"cel-gaulish", "xtg-x-cel-gaulish",
"en-GB-oed", "en-GB-x-oed",
"i-ami", "ami",
"i-bnn", "bnn",
"i-default", "",
"i-enochian", "",
"i-default", "en-x-i-default",
"i-enochian", "und-x-i-enochian",
"i-hak", "hak",
"i-klingon", "tlh",
"i-lux", "lb",
"i-mingo", "",
"i-mingo", "see-x-i-mingo",
"i-navajo", "nv",
"i-pwn", "pwn",
"i-tao", "tao",
@ -89,7 +98,7 @@ static const char* GRANDFATHERED[] = {
"sgn-ch-de", "sgg",
"zh-guoyu", "cmn",
"zh-hakka", "hak",
"zh-min", "",
"zh-min", "nan-x-zh-min",
"zh-min-nan", "nan",
"zh-xiang", "hsn",
NULL, NULL
@ -143,6 +152,15 @@ ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
static int32_t
ultag_getVariantsSize(const ULanguageTag* langtag);
#if 0
/* Currently not being used. */
static const char*
ultag_getAttribute(const ULanguageTag* langtag, int32_t idx);
#endif
static int32_t
ultag_getAttributesSize(const ULanguageTag* langtag);
static const char*
ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
@ -275,7 +293,7 @@ _isVariantSubtag(const char* s, int32_t len) {
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
if (len >= 5 && len <= 8 && _isAlphaString(s, len)) {
if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
return TRUE;
}
if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
@ -284,6 +302,35 @@ _isVariantSubtag(const char* s, int32_t len) {
return FALSE;
}
static UBool
_isPrivateuseVariantSubtag(const char* s, int32_t len) {
/*
* variant = 1*8alphanum ; registered variants
* / (DIGIT 3alphanum)
*/
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
return TRUE;
}
return FALSE;
}
static UBool
_isAttributeSubtag(const char* s, int32_t len) {
/*
* attribute = 3*8alphanum
*/
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) {
return TRUE;
}
return FALSE;
}
static UBool
_isExtensionSingleton(const char* s, int32_t len) {
/*
@ -425,7 +472,7 @@ _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
VariantListEntry *prev, *cur;
int32_t cmp;
/* reorder variants in alphabetical order */
/* variants order should be preserved */
prev = NULL;
cur = *first;
while (TRUE) {
@ -434,14 +481,50 @@ _addVariantToList(VariantListEntry **first, VariantListEntry *var) {
var->next = NULL;
break;
}
/* Checking for duplicate variant */
cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
if (cmp == 0) {
/* duplicated variant */
bAdded = FALSE;
break;
}
prev = cur;
cur = cur->next;
}
}
return bAdded;
}
static UBool
_addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
UBool bAdded = TRUE;
if (*first == NULL) {
attr->next = NULL;
*first = attr;
} else {
AttributeListEntry *prev, *cur;
int32_t cmp;
/* reorder variants in alphabetical order */
prev = NULL;
cur = *first;
while (TRUE) {
if (cur == NULL) {
prev->next = attr;
attr->next = NULL;
break;
}
cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
if (cmp < 0) {
if (prev == NULL) {
*first = var;
*first = attr;
} else {
prev->next = var;
prev->next = attr;
}
var->next = cur;
attr->next = cur;
break;
}
if (cmp == 0) {
@ -544,6 +627,8 @@ _initializeULanguageTag(ULanguageTag* langtag) {
langtag->variants = NULL;
langtag->extensions = NULL;
langtag->attributes = NULL;
langtag->grandfathered = EMPTY;
langtag->privateuse = EMPTY;
}
@ -819,9 +904,9 @@ _bcp47ToLDMLType(const char* key, int32_t keyLen,
UErrorCode *status) {
UResourceBundle *rb;
char keyBuf[MAX_LDML_KEY_LEN];
char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
char bcpTypeBuf[ULOC_KEYWORDS_CAPACITY]; /* ensure buffter is large enough for multiple values (e.g. buddhist-greg) */
int32_t resultLen = 0;
int32_t i;
int32_t i, typeSize;
const char *resType = NULL;
UResourceBundle *mapData;
UErrorCode tmpStatus = U_ZERO_ERROR;
@ -849,9 +934,17 @@ _bcp47ToLDMLType(const char* key, int32_t keyLen,
bcpTypeLen = (int32_t)uprv_strlen(bcpType);
}
if (bcpTypeLen >= sizeof(bcpTypeBuf)) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
typeSize = 0;
for (i = 0; i < bcpTypeLen; i++) {
if (bcpType[i] == SEP) {
if (typeSize >= MAX_BCP47_SUBTAG_LEN) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
typeSize = 0;
} else {
typeSize++;
}
}
uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen);
@ -1108,7 +1201,7 @@ _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
/* validate */
if (_isVariantSubtag(pVar, -1)) {
if (uprv_strcmp(pVar,POSIX_VALUE)) {
if (uprv_strcmp(pVar,POSIX_VALUE) || len != uprv_strlen(POSIX_VALUE)) {
/* emit the variant to the list */
var = uprv_malloc(sizeof(VariantListEntry));
if (var == NULL) {
@ -1132,6 +1225,9 @@ _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
} else if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
} else if (_isPrivateuseValueSubtag(pVar, -1)) {
/* Handle private use subtags separately */
break;
}
}
/* reset variant starting position */
@ -1146,7 +1242,7 @@ _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
if (varFirst != NULL) {
int32_t varLen;
/* write out sorted/validated/normalized variants to the target */
/* write out validated/normalized variants to the target */
var = varFirst;
while (var != NULL) {
if (reslen < capacity) {
@ -1183,6 +1279,9 @@ _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
static int32_t
_appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
int32_t attrBufLength = 0;
UBool isAttribute = FALSE;
UEnumeration *keywordEnum = NULL;
int32_t reslen = 0;
@ -1197,6 +1296,9 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
const char *key;
ExtensionListEntry *firstExt = NULL;
ExtensionListEntry *ext;
AttributeListEntry *firstAttr = NULL;
AttributeListEntry *attr;
char *attrValue;
char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
char *pExtBuf = extBuf;
int32_t extBufCapacity = sizeof(extBuf);
@ -1206,6 +1308,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
UBool isLDMLKeyword;
while (TRUE) {
isAttribute = FALSE;
key = uenum_next(keywordEnum, NULL, status);
if (key == NULL) {
break;
@ -1224,7 +1327,53 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
keylen = (int32_t)uprv_strlen(key);
isLDMLKeyword = (keylen > 1);
if (isLDMLKeyword) {
/* special keyword used for representing Unicode locale attributes */
if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
isAttribute = TRUE;
if (len > 0) {
int32_t i = 0;
while (TRUE) {
attrBufLength = 0;
for (; i < len; i++) {
if (buf[i] != '-') {
attrBuf[attrBufLength++] = buf[i];
} else {
i++;
break;
}
}
if (attrBufLength > 0) {
attrBuf[attrBufLength] = 0;
} else if (i >= len){
break;
}
/* create AttributeListEntry */
attr = uprv_malloc(sizeof(AttributeListEntry));
if (attr == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
break;
}
attrValue = uprv_malloc(attrBufLength + 1);
if (attrValue == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
break;
}
uprv_strcpy(attrValue, attrBuf);
attr->attribute = attrValue;
if (!_addAttributeToList(&firstAttr, attr)) {
uprv_free(attr);
uprv_free(attrValue);
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
}
}
}
} else if (isLDMLKeyword) {
int32_t modKeyLen;
/* transform key and value to bcp47 style */
@ -1289,21 +1438,23 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
}
/* create ExtensionListEntry */
ext = uprv_malloc(sizeof(ExtensionListEntry));
if (ext == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
break;
}
ext->key = bcpKey;
ext->value = bcpValue;
if (!_addExtensionToList(&firstExt, ext, TRUE)) {
uprv_free(ext);
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
if (!isAttribute) {
/* create ExtensionListEntry */
ext = uprv_malloc(sizeof(ExtensionListEntry));
if (ext == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
break;
}
ext->key = bcpKey;
ext->value = bcpValue;
if (!_addExtensionToList(&firstExt, ext, TRUE)) {
uprv_free(ext);
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
}
}
}
@ -1322,46 +1473,63 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
}
if (U_SUCCESS(*status) && (firstExt != NULL)) {
if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
UBool startLDMLExtension = FALSE;
/* write out the sorted BCP47 extensions and private use */
attr = firstAttr;
ext = firstExt;
while (ext != NULL) {
if ((int32_t)uprv_strlen(ext->key) > 1 && !startLDMLExtension) {
/* write LDML singleton extension */
do {
if (!startLDMLExtension && (ext && uprv_strlen(ext->key) > 1)) {
/* write LDML singleton extension */
if (reslen < capacity) {
*(appendAt + reslen) = SEP;
}
reslen++;
if (reslen < capacity) {
*(appendAt + reslen) = LDMLEXT;
}
reslen++;
startLDMLExtension = TRUE;
}
/* write out the sorted BCP47 attributes, extensions and private use */
if (ext && (uprv_strlen(ext->key) == 1 || attr == NULL)) {
if (reslen < capacity) {
*(appendAt + reslen) = SEP;
}
reslen++;
len = (int32_t)uprv_strlen(ext->key);
if (reslen < capacity) {
*(appendAt + reslen) = LDMLEXT;
uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
}
reslen += len;
if (reslen < capacity) {
*(appendAt + reslen) = SEP;
}
reslen++;
startLDMLExtension = TRUE;
}
len = (int32_t)uprv_strlen(ext->value);
if (reslen < capacity) {
uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
}
reslen += len;
if (reslen < capacity) {
*(appendAt + reslen) = SEP;
}
reslen++;
len = (int32_t)uprv_strlen(ext->key);
if (reslen < capacity) {
uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
}
reslen += len;
if (reslen < capacity) {
*(appendAt + reslen) = SEP;
}
reslen++;
len = (int32_t)uprv_strlen(ext->value);
if (reslen < capacity) {
uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
}
reslen += len;
ext = ext->next;
} else if (attr) {
/* write the value for the attributes */
if (reslen < capacity) {
*(appendAt + reslen) = SEP;
}
reslen++;
len = (int32_t)uprv_strlen(attr->attribute);
if (reslen < capacity) {
uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
}
reslen += len;
ext = ext->next;
}
attr = attr->next;
}
} while (attr != NULL || ext != NULL);
}
/* clean up */
ext = firstExt;
@ -1371,6 +1539,15 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
ext = tmpExt;
}
attr = firstAttr;
while (attr != NULL) {
AttributeListEntry *tmpAttr = attr->next;
char *pValue = (char *)attr->attribute;
uprv_free(pValue);
uprv_free(attr);
attr = tmpAttr;
}
uenum_close(keywordEnum);
if (U_FAILURE(*status)) {
@ -1388,7 +1565,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
*/
static void
_appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
const char *p, *pNext, *pSep;
const char *p, *pNext, *pSep, *pTmp, *pTmpStart;
const char *pBcpKey, *pBcpType;
const char *pKey, *pType;
int32_t bcpKeyLen = 0, bcpTypeLen;
@ -1396,6 +1573,11 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
ExtensionListEntry *kwdFirst = NULL;
int32_t bufIdx = 0;
int32_t len;
UBool variantExists = *posixVariant;
UBool searchFurther;
/* Reset the posixVariant value */
*posixVariant = FALSE;
pNext = ldmlext;
pBcpKey = pBcpType = NULL;
@ -1405,7 +1587,27 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
/* locate next separator char */
while (*pSep) {
if (*pSep == SEP) {
break;
searchFurther = FALSE;
if (pBcpKey != NULL) {
pTmpStart = (pSep + 1);
pTmp = pTmpStart;
/* Look at the next subtag and see if it is part of the previous subtag or the start of new keyword */
while (*pTmp) {
if (*pTmp == SEP || *(pTmp + 1) == 0) {
if (!_isLDMLKey(pTmpStart, (int32_t)(pTmp - pTmpStart))) {
searchFurther = TRUE;
}
break;
}
pTmp++;
}
}
if (searchFurther) {
pSep++;
continue;
} else {
break;
}
}
pSep++;
}
@ -1446,7 +1648,7 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
/* Special handling for u-va-posix, since we want to treat this as a variant, not */
/* as a keyword. */
if ( !uprv_strcmp(pKey,POSIX_KEY) && !uprv_strcmp(pType,POSIX_VALUE) ) {
if (!variantExists && !uprv_strcmp(pKey,POSIX_KEY) && !uprv_strcmp(pType,POSIX_VALUE) ) {
*posixVariant = TRUE;
} else {
/* create an ExtensionListEntry for this keyword */
@ -1503,6 +1705,8 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
int32_t len;
ExtensionListEntry *kwdFirst = NULL;
ExtensionListEntry *kwd;
AttributeListEntry *attrFirst = NULL;
AttributeListEntry *attr;
const char *key, *type;
char kwdBuf[ULOC_KEYWORDS_CAPACITY];
UBool posixVariant = FALSE;
@ -1511,6 +1715,11 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
return 0;
}
/* Determine if variants already exists */
if (ultag_getVariantsSize(langtag)) {
posixVariant = TRUE;
}
n = ultag_getExtensionsSize(langtag);
/* resolve locale keywords and reordering keys */
@ -1566,43 +1775,78 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
reslen += len;
}
if (U_SUCCESS(*status) && kwdFirst != NULL) {
attrFirst = langtag->attributes;
if (U_SUCCESS(*status) && (kwdFirst != NULL || attrFirst != NULL)) {
/* write out the sorted keywords */
UBool firstValue = TRUE;
UBool firstAttr = TRUE;
kwd = kwdFirst;
while (kwd != NULL) {
attr = attrFirst;
do {
if (reslen < capacity) {
if (kwd == kwdFirst) {
if (firstValue) {
/* '@' */
*(appendAt + reslen) = LOCALE_EXT_SEP;
} else {
firstValue = FALSE;
} else if (attr) {
/* '-' */
*(appendAt + reslen) = SEP;
}else {
/* ';' */
*(appendAt + reslen) = LOCALE_KEYWORD_SEP;
}
}
reslen++;
/* key */
len = (int32_t)uprv_strlen(kwd->key);
if (reslen < capacity) {
uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
}
reslen += len;
if (attr) {
if (firstAttr) {
len = (int32_t)uprv_strlen(LOCALE_ATTRIBUTE_KEY);
if (reslen < capacity) {
uprv_memcpy(appendAt + reslen, LOCALE_ATTRIBUTE_KEY, uprv_min(len, capacity - reslen));
}
reslen += len;
/* '=' */
if (reslen < capacity) {
*(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
}
reslen++;
/* '=' */
if (reslen < capacity) {
*(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
}
/* type */
len = (int32_t)uprv_strlen(kwd->value);
if (reslen < capacity) {
uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
}
reslen += len;
reslen++;
kwd = kwd->next;
}
firstAttr = FALSE;
}
len = (int32_t)uprv_strlen(attr->attribute);
if (reslen < capacity) {
uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
}
reslen += len;
attr = attr->next;
} else if (kwd) {
/* key */
len = (int32_t)uprv_strlen(kwd->key);
if (reslen < capacity) {
uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
}
reslen += len;
/* '=' */
if (reslen < capacity) {
*(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
}
reslen++;
/* type */
len = (int32_t)uprv_strlen(kwd->value);
if (reslen < capacity) {
uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
}
reslen += len;
kwd = kwd->next;
}
} while (kwd || attr);
}
/* clean up */
@ -1620,6 +1864,123 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
return u_terminateChars(appendAt, capacity, reslen, status);
}
static int32_t
_appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
char buf[ULOC_FULLNAME_CAPACITY];
char tmpAppend[ULOC_FULLNAME_CAPACITY];
UErrorCode tmpStatus = U_ZERO_ERROR;
int32_t len, i;
int32_t reslen = 0;
if (U_FAILURE(*status)) {
return 0;
}
len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
}
return 0;
}
if (len > 0) {
char *p, *pPriv;
UBool bNext = TRUE;
UBool firstValue = TRUE;
UBool writeValue;
pPriv = NULL;
p = buf;
while (bNext) {
writeValue = FALSE;
if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
if (*p == 0) {
bNext = FALSE;
} else {
*p = 0; /* terminate */
}
if (pPriv != NULL) {
/* Private use in the canonical format is lowercase in BCP47 */
for (i = 0; *(pPriv + i) != 0; i++) {
*(pPriv + i) = uprv_tolower(*(pPriv + i));
}
/* validate */
if (_isPrivateuseValueSubtag(pPriv, -1)) {
if (firstValue) {
if (!_isVariantSubtag(pPriv, -1)) {
writeValue = TRUE;
}
} else {
writeValue = TRUE;
}
} else if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
} else {
break;
}
if (writeValue) {
if (reslen < capacity) {
tmpAppend[reslen++] = SEP;
}
if (firstValue) {
if (reslen < capacity) {
tmpAppend[reslen++] = *PRIVATEUSE_KEY;
}
if (reslen < capacity) {
tmpAppend[reslen++] = SEP;
}
len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
if (reslen < capacity) {
uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
}
reslen += len;
if (reslen < capacity) {
tmpAppend[reslen++] = SEP;
}
firstValue = FALSE;
}
len = (int32_t)uprv_strlen(pPriv);
if (reslen < capacity) {
uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
}
reslen += len;
}
}
/* reset private use starting position */
pPriv = NULL;
} else if (pPriv == NULL) {
pPriv = p;
}
p++;
}
if (U_FAILURE(*status)) {
return 0;
}
}
if (U_SUCCESS(*status)) {
len = reslen;
if (reslen < capacity) {
uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
}
}
u_terminateChars(appendAt, capacity, reslen, status);
return reslen;
}
/*
* -------------------------------------------------
*
@ -1637,6 +1998,7 @@ _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorC
#define EXTS 0x0020
#define EXTV 0x0040
#define PRIV 0x0080
#define ATTR 0x0100
static ULanguageTag*
ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
@ -1647,9 +2009,11 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
int32_t subtagLen;
int32_t extlangIdx;
ExtensionListEntry *pExtension;
AttributeListEntry *pAttribute;
char *pExtValueSubtag, *pExtValueSubtagEnd;
char *pAttrValue;
int32_t i;
UBool isLDMLExtension, reqLDMLType;
UBool isLDMLExtension, reqLDMLType, privateuseVar = FALSE;
if (parsedLen != NULL) {
*parsedLen = 0;
@ -1692,11 +2056,6 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
if (T_CString_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
/* a grandfathered tag is always longer than its preferred mapping */
uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
t->language = t->buf;
if (parsedLen != NULL) {
*parsedLen = tagLen;
}
return t;
}
}
@ -1715,6 +2074,8 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
pExtension = NULL;
pExtValueSubtag = NULL;
pExtValueSubtagEnd = NULL;
pAttribute = NULL;
pAttrValue = NULL;
isLDMLExtension = FALSE;
reqLDMLType = FALSE;
@ -1794,7 +2155,8 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
}
}
if (next & VART) {
if (_isVariantSubtag(pSubtag, subtagLen)) {
if (_isVariantSubtag(pSubtag, subtagLen) ||
(privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
VariantListEntry *var;
UBool isAdded;
@ -1886,12 +2248,25 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
} else {
/* LDML key */
if (!_isLDMLKey(pSubtag, subtagLen)) {
/* stop parsing here and let the valid LDML extension key/type
pairs processed by the code out of this while loop */
break;
/* May be part of incomplete type */
if (pExtValueSubtag != NULL) {
if (_isLDMLType(pSubtag, subtagLen)) {
pExtValueSubtagEnd = pSep;
reqLDMLType = FALSE;
next = EXTS | EXTV | PRIV;
}
} else if (pExtValueSubtag == NULL && _isAttributeSubtag(pSubtag, subtagLen)) {
/* Get attribute */
next = ATTR;
} else {
/* stop parsing here and let the valid LDML extension key/type
pairs processed by the code out of this while loop */
break;
}
} else {
reqLDMLType = TRUE;
next = EXTV;
}
reqLDMLType = TRUE;
next = EXTV;
}
} else {
/* Mark the end of this subtag */
@ -1899,12 +2274,15 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
next = EXTS | EXTV | PRIV;
}
if (pExtValueSubtag == NULL) {
/* if the start postion of this extension's value is not yet,
this one is the first value subtag */
pExtValueSubtag = pSubtag;
if (next != ATTR) {
if (pExtValueSubtag == NULL) {
/* if the start postion of this extension's value is not yet,
this one is the first value subtag */
pExtValueSubtag = pSubtag;
}
continue;
}
continue;
}
}
if (next & PRIV) {
@ -1962,12 +2340,22 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
}
subtagLen = (int32_t)(pSep - pSubtag);
if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
*pSep = 0;
next = VART;
privateuseVar = TRUE;
break;
} else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
pLastGoodPosition = pSep;
} else {
break;
}
}
if (next == VART) {
continue;
}
if (pLastGoodPosition - pPrivuseVal > 0) {
*pLastGoodPosition = 0;
t->privateuse = T_CString_toLowerCase(pPrivuseVal);
@ -1977,6 +2365,25 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
}
break;
}
if (next & ATTR) {
/* create a new attribute */
pAttribute = uprv_malloc(sizeof(AttributeListEntry));
if (pAttribute == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
goto error;
}
*pSep = 0;
pAttribute->attribute =T_CString_toLowerCase(pSubtag);
if (!_addAttributeToList(&(t->attributes), pAttribute)) {
uprv_free(pAttribute);
}
next = EXTS | EXTV | PRIV;
continue;
}
/* If we fell through here, it means this subtag is illegal - quit parsing */
break;
}
@ -2037,6 +2444,15 @@ ultag_close(ULanguageTag* langtag) {
}
}
if (langtag->attributes) {
AttributeListEntry *curAttr = langtag->attributes;
while (curAttr) {
AttributeListEntry *nextAttr = curAttr->next;
uprv_free(curAttr);
curAttr = nextAttr;
}
}
uprv_free(langtag);
}
@ -2118,6 +2534,39 @@ ultag_getVariantsSize(const ULanguageTag* langtag) {
return size;
}
#if 0
/* Currently not being used. */
static const char*
ultag_getAttribute(const ULanguageTag* langtag, int32_t idx) {
const char *attr = NULL;
AttributeListEntry *cur = langtag->attributes;
int32_t i = 0;
while (cur) {
if (i == idx) {
attr = cur->attribute;
break;
}
cur = cur->next;
i++;
}
return attr;
}
#endif
static int32_t
ultag_getAttributesSize(const ULanguageTag* langtag) {
int32_t size = 0;
AttributeListEntry *cur = langtag->attributes;
while (TRUE) {
if (cur == NULL) {
break;
}
size++;
cur = cur->next;
}
return size;
}
static const char*
ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
const char *key = NULL;
@ -2257,6 +2706,7 @@ uloc_toLanguageTag(const char* localeID,
reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
return reslen;
}
@ -2272,7 +2722,7 @@ uloc_forLanguageTag(const char* langtag,
int32_t reslen = 0;
const char *subtag, *p;
int32_t len;
int32_t i, n;
int32_t i, n, m;
UBool noRegion = TRUE;
lt = ultag_parse(langtag, -1, parsedLength, status);
@ -2366,9 +2816,10 @@ uloc_forLanguageTag(const char* langtag,
/* keywords */
n = ultag_getExtensionsSize(lt);
m = ultag_getAttributesSize(lt);
subtag = ultag_getPrivateUse(lt);
if (n > 0 || uprv_strlen(subtag) > 0) {
if (reslen == 0 && n > 0) {
if (n > 0 || m > 0 || uprv_strlen(subtag) > 0) {
if (reslen == 0 && (n > 0 || m > 0)) {
/* need a language */
if (reslen < localeIDCapacity) {
uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));

View file

@ -584,7 +584,7 @@ static int32_t UCharsToEscapedAscii(const UChar* utext, int32_t len, char* resul
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66
};
int32_t i, j, v;
int32_t i, j;
int32_t resultLen = 0;
const int32_t limit = len<0 ? buflen : len; /* buflen is long enough to hit the buffer limit */
const int32_t escapeLimit1 = buflen-2;
@ -717,7 +717,7 @@ static void TestDisplayNames()
/* test that we properly preflight and return data when there's a non-default pattern,
see ticket #8262. */
{
int32_t i, j, v;
int32_t i;
static const char *locale="az_Cyrl";
static const char *displayLocale="ja";
static const char *expectedChars =
@ -1982,7 +1982,7 @@ static void TestCanonicalization(void)
{ "ja_JP", "ja_JP", "ja_JP" },
/* test case for "i-default" */
{ "i-default", NULL, NULL }
{ "i-default", "en@x=i-default", "en@x=i-default" }
};
static const char* label[] = { "getName", "canonicalize" };
@ -5566,6 +5566,9 @@ const char* const locale_to_langtag[][3] = {
{"sr_Latn_SR", "sr-Latn-SR", "sr-Latn-SR"},
{"en__POSIX", "en-u-va-posix", "en-u-va-posix"},
{"en_POSIX", "en-u-va-posix", "en-u-va-posix"},
{"en_US_POSIX_VAR", "en-US-posix-x-lvariant-var", NULL}, /* variant POSIX_VAR is processed as regular variant */
{"en_US_VAR_POSIX", "en-US-x-lvariant-var-posix", NULL}, /* variant VAR_POSIX is processed as regular variant */
{"en_US_POSIX@va=posix2", "en-US-u-va-posix2", "en-US-u-va-posix2"}, /* if keyword va=xxx already exists, variant POSIX is simply dropped */
{"en_US_POSIX@ca=japanese", "en-US-u-ca-japanese-va-posix", "en-US-u-ca-japanese-va-posix"},
{"und_555", "und-555", "und-555"},
{"123", "und", NULL},
@ -5573,16 +5576,15 @@ const char* const locale_to_langtag[][3] = {
{"_Latn", "und-Latn", "und-Latn"},
{"_DE", "und-DE", "und-DE"},
{"und_FR", "und-FR", "und-FR"},
{"th_TH_TH", "th-TH", NULL},
{"th_TH_TH", "th-TH-x-lvariant-th", NULL},
{"bogus", "bogus", "bogus"},
{"foooobarrr", "und", NULL},
{"az_AZ_CYRL", "az-Cyrl-AZ", "az-Cyrl-AZ"},
{"aa_BB_CYRL", "aa-BB", NULL},
{"aa_BB_CYRL", "aa-BB-x-lvariant-cyrl", NULL},
{"en_US_1234", "en-US-1234", "en-US-1234"},
{"en_US_VARIANTA_VARIANTB", "en-US-varianta-variantb", "en-US-varianta-variantb"},
{"en_US_VARIANTB_VARIANTA", "en-US-varianta-variantb", "en-US-varianta-variantb"},
{"ja__9876_5432", "ja-5432-9876", "ja-5432-9876"},
{"zh_Hant__VAR", "zh-Hant", NULL},
{"ja__9876_5432", "ja-9876-5432", "ja-9876-5432"},
{"zh_Hant__VAR", "zh-Hant-x-lvariant-var", NULL},
{"es__BADVARIANT_GOODVAR", "es-goodvar", NULL},
{"en@calendar=gregorian", "en-u-ca-gregory", "en-u-ca-gregory"},
{"de@collation=phonebook;calendar=gregorian", "de-u-ca-gregory-co-phonebk", "de-u-ca-gregory-co-phonebk"},
@ -5670,22 +5672,26 @@ static const struct {
{"123", "", 0},
{"en_us", "", 0},
{"en-latn-x", "en_Latn", 7},
{"art-lojban", "jbo", 10},
{"zh-hakka", "hak", 8},
{"art-lojban", "jbo", 3},
{"zh-hakka", "hak", 3},
{"zh-cmn-CH", "cmn_CH", 9},
{"xxx-yy", "xxx_YY", 6},
{"fr-234", "fr_234", 6},
{"i-default", "", 9},
{"i-default", "en@x=i-default", 14},
{"i-test", "", 0},
{"ja-jp-jp", "ja_JP", 5},
{"bogus", "bogus", 5},
{"boguslang", "", 0},
{"EN-lATN-us", "en_Latn_US", 10},
{"und-variant-1234", "__1234_VARIANT", 16},
{"und-variant-1234", "__VARIANT_1234", 16},
{"und-varzero-var1-vartwo", "__VARZERO", 11},
{"en-u-ca-gregory", "en@calendar=gregorian", 15},
{"en-U-cu-USD", "en@currency=usd", 11},
{"en-US-u-va-posix", "en_US_POSIX", 16},
{"en-us-u-ca-gregory-va-posix", "en_US_POSIX@calendar=gregorian", 27},
{"en-us-posix-u-va-posix", "en_US_POSIX@va=posix", 22},
{"en-us-u-va-posix2", "en_US@va=posix2", 17},
{"en-us-vari1-u-va-posix", "en_US_VARI1@va=posix", 22},
{"ar-x-1-2-3", "ar@x=1-2-3", 10},
{"fr-u-nu-latn-cu-eur", "fr@currency=eur;numbers=latn", 19},
{"de-k-kext-u-co-phonebk-nu-latn", "de@collation=phonebook;k=kext;numbers=latn", 30},