ICU-8951 BCP 47 key/type converter APIs merged from the work branch.

X-SVN-Rev: 36320
This commit is contained in:
Yoshito Umaoka 2014-09-03 04:42:21 +00:00
parent 6dc89ee267
commit 8885f1ef78
12 changed files with 1129 additions and 469 deletions

1
.gitattributes vendored
View file

@ -53,6 +53,7 @@ icu4c/source/aclocal.m4 -text
icu4c/source/allinone/icucheck.bat -text
icu4c/source/common/common.vcxproj -text
icu4c/source/common/common.vcxproj.filters -text
icu4c/source/common/uloc_keytype.cpp -text
icu4c/source/common/unifiedcache.cpp -text
icu4c/source/common/unifiedcache.h -text
icu4c/source/data/coll/dsb.txt -text

View file

@ -105,7 +105,7 @@ serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
uidna.o usprep.o uts46.o punycode.o \
util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o dtintrv.o ucnvsel.o propsvec.o \
ulist.o uloc_tag.o icudataver.o icuplug.o listformatter.o \
sharedobject.o simplepatternformatter.o unifiedcache.o
sharedobject.o simplepatternformatter.o unifiedcache.o uloc_keytype.o
## Header files to install
HEADERS = $(srcdir)/unicode/*.h

View file

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
@ -236,6 +236,7 @@
<ClCompile Include="ubidi_props.c" />
<ClCompile Include="ubidiln.c" />
<ClCompile Include="ubidiwrt.c" />
<ClCompile Include="uloc_keytype.cpp" />
<ClCompile Include="ushape.cpp" />
<ClCompile Include="brkeng.cpp">
</ClCompile>
@ -1754,4 +1755,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View file

@ -421,9 +421,6 @@
<ClCompile Include="usetiter.cpp">
<Filter>properties &amp; sets</Filter>
</ClCompile>
<ClCompile Include="icuplug.c">
<Filter>registration</Filter>
</ClCompile>
<ClCompile Include="serv.cpp">
<Filter>registration</Filter>
</ClCompile>
@ -568,6 +565,10 @@
<ClCompile Include="stringtriebuilder.cpp">
<Filter>collections</Filter>
</ClCompile>
<ClCompile Include="icuplug.cpp" />
<ClCompile Include="uloc_keytype.cpp">
<Filter>locales &amp; resources</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="ubidi_props.h">
@ -1112,4 +1113,4 @@
<Filter>collections</Filter>
</CustomBuild>
</ItemGroup>
</Project>
</Project>

View file

@ -37,6 +37,7 @@ typedef enum ECleanupCommonType {
UCLN_COMMON_BREAKITERATOR,
UCLN_COMMON_BREAKITERATOR_DICT,
UCLN_COMMON_SERVICE,
UCLN_COMMON_LOCALE_KEY_TYPE,
UCLN_COMMON_LOCALE,
UCLN_COMMON_LOCALE_AVAILABLE,
UCLN_COMMON_ULOC,

View file

@ -2524,4 +2524,103 @@ uloc_acceptLanguage(char *result, int32_t resultAvailable,
return -1;
}
U_CAPI const char* U_EXPORT2
uloc_toUnicodeLocaleKey(const char* keyword)
{
const char* bcpKey = ulocimp_toBcpKey(keyword);
if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
// unknown keyword, but syntax is fine..
return keyword;
}
return bcpKey;
}
U_CAPI const char* U_EXPORT2
uloc_toUnicodeLocaleType(const char* keyword, const char* value)
{
const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
// unknown keyword, but syntax is fine..
return value;
}
return bcpType;
}
#define ISALPHANUM(c) ( (c) >= '0' && (c) <= '9' || (c) >= 'A' && (c) <= 'Z' || (c) >= 'a' && (c) <= 'z' )
static UBool
isWellFormedLegacyKey(const char* legacyKey)
{
const char* p = legacyKey;
while (*p) {
if (!ISALPHANUM(*p)) {
return FALSE;
}
p++;
}
return TRUE;
}
static UBool
isWellFormedLegacyType(const char* legacyType)
{
const char* p = legacyType;
int32_t alphaNumLen = 0;
while (*p) {
if (*p == '_' || *p == '/' || *p == '-') {
if (alphaNumLen == 0) {
return FALSE;
}
alphaNumLen = 0;
} else if (ISALPHANUM(*p)) {
alphaNumLen++;
} else {
return FALSE;
}
p++;
}
return (alphaNumLen != 0);
}
U_CAPI const char* U_EXPORT2
uloc_toLegacyKey(const char* keyword)
{
const char* legacyKey = ulocimp_toLegacyKey(keyword);
if (legacyKey == NULL) {
// Checks if the specified locale key is well-formed with the legacy locale syntax.
//
// Note:
// Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
// However, a key should not contain '=' obviously. For now, all existing
// keys are using ASCII alphabetic letters only. We won't add any new key
// that is not compatible with the BCP 47 syntax. Therefore, we assume
// a valid key consist from [0-9a-zA-Z], no symbols.
if (isWellFormedLegacyKey(keyword)) {
return keyword;
}
}
return legacyKey;
}
U_CAPI const char* U_EXPORT2
uloc_toLegacyType(const char* keyword, const char* value)
{
const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
if (legacyType == NULL) {
// Checks if the specified locale type is well-formed with the legacy locale syntax.
//
// Note:
// Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
// However, a type should not contain '=' obviously. For now, all existing
// types are using ASCII alphabetic letters with a few symbol letters. We won't
// add any new type that is not compatible with the BCP 47 syntax except timezone
// IDs. For now, we assume a valid type start with [0-9a-zA-Z], but may contain
// '-' '_' '/' in the middle.
if (isWellFormedLegacyType(value)) {
return value;
}
}
return legacyType;
}
/*eof*/

View file

@ -0,0 +1,577 @@
/*
**********************************************************************
* Copyright (C) 2014, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
#include "unicode/utypes.h"
#include "cstring.h"
#include "uassert.h"
#include "ucln_cmn.h"
#include "uhash.h"
#include "umutex.h"
#include "uresimp.h"
#include "uvector.h"
static UHashtable* gLocExtKeyMap = NULL;
static icu::UInitOnce gLocExtKeyMapInitOnce = U_INITONCE_INITIALIZER;
static icu::UVector* gKeyTypeStringPool = NULL;
static icu::UVector* gLocExtKeyDataEntries = NULL;
static icu::UVector* gLocExtTypeEntries = NULL;
// bit flags for special types
typedef enum {
SPECIALTYPE_NONE = 0,
SPECIALTYPE_CODEPOINTS = 1,
SPECIALTYPE_REORDER_CODE = 2
} SpecialType;
typedef struct LocExtKeyData {
const char* legacyId;
const char* bcpId;
UHashtable* typeMap;
uint32_t specialTypes;
} LocExtKeyData;
typedef struct LocExtType {
const char* legacyId;
const char* bcpId;
} LocExtType;
U_CDECL_BEGIN
static UBool U_CALLCONV
uloc_key_type_cleanup(void) {
if (gLocExtKeyMap != NULL) {
uhash_close(gLocExtKeyMap);
gLocExtKeyMap = NULL;
}
delete gLocExtKeyDataEntries;
gLocExtKeyDataEntries = NULL;
delete gLocExtTypeEntries;
gLocExtTypeEntries = NULL;
delete gKeyTypeStringPool;
gKeyTypeStringPool = NULL;
gLocExtKeyMapInitOnce.reset();
return TRUE;
}
static void U_CALLCONV
uloc_deleteKeyTypeStringPoolEntry(void* obj) {
uprv_free(obj);
}
static void U_CALLCONV
uloc_deleteKeyDataEntry(void* obj) {
LocExtKeyData* keyData = (LocExtKeyData*)obj;
if (keyData->typeMap != NULL) {
uhash_close(keyData->typeMap);
}
uprv_free(keyData);
}
static void U_CALLCONV
uloc_deleteTypeEntry(void* obj) {
uprv_free(obj);
}
U_CDECL_END
static void U_CALLCONV
initFromResourceBundle(UErrorCode& sts) {
ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KEY_TYPE, uloc_key_type_cleanup);
gLocExtKeyMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
if (U_FAILURE(sts)) {
return;
}
UResourceBundle *keyTypeDataRes = NULL;
UResourceBundle *keyMapRes = NULL;
UResourceBundle *typeMapRes = NULL;
UResourceBundle *typeAliasRes = NULL;
UResourceBundle *bcpTypeAliasRes = NULL;
keyTypeDataRes = ures_openDirect(NULL, "keyTypeData", &sts);
keyMapRes = ures_getByKey(keyTypeDataRes, "keyMap", NULL, &sts);
typeMapRes = ures_getByKey(keyTypeDataRes, "typeMap", NULL, &sts);
UErrorCode tmpSts = U_ZERO_ERROR;
typeAliasRes = ures_getByKey(keyTypeDataRes, "typeAlias", NULL, &tmpSts);
if (U_FAILURE(tmpSts)) {
typeAliasRes = NULL;
tmpSts = U_ZERO_ERROR;
}
bcpTypeAliasRes = ures_getByKey(keyTypeDataRes, "bcpTypeAlias", NULL, &tmpSts);
if (U_FAILURE(tmpSts)) {
bcpTypeAliasRes = NULL;
tmpSts = U_ZERO_ERROR;
}
// initialize vectors storing dynamically allocated objects
gKeyTypeStringPool = new UVector(uloc_deleteKeyTypeStringPoolEntry, NULL, sts);
if (gKeyTypeStringPool == NULL || U_FAILURE(sts)) {
goto close_bundles;
}
gLocExtKeyDataEntries = new UVector(uloc_deleteKeyDataEntry, NULL, sts);
if (gLocExtKeyDataEntries == NULL || U_FAILURE(sts)) {
goto close_bundles;
}
gLocExtTypeEntries = new UVector(uloc_deleteTypeEntry, NULL, sts);
if (gLocExtTypeEntries == NULL || U_FAILURE(sts)) {
goto close_bundles;
}
// iterate through keyMap resource
UResourceBundle keyMapEntry;
ures_initStackObject(&keyMapEntry);
while (ures_hasNext(keyMapRes)) {
ures_getNextResource(keyMapRes, &keyMapEntry, &sts);
if (U_FAILURE(sts)) {
break;
}
const char* legacyKeyId = ures_getKey(&keyMapEntry);
int32_t bcpKeyIdLen = 0;
const UChar* uBcpKeyId = ures_getString(&keyMapEntry, &bcpKeyIdLen, &sts);
if (U_FAILURE(sts)) {
break;
}
// empty value indicates that BCP key is same with the legacy key.
const char* bcpKeyId = legacyKeyId;
if (bcpKeyIdLen > 0) {
char* bcpKeyIdBuf = (char*)uprv_malloc(bcpKeyIdLen + 1);
if (bcpKeyIdBuf == NULL) {
sts = U_MEMORY_ALLOCATION_ERROR;
break;
}
u_UCharsToChars(uBcpKeyId, bcpKeyIdBuf, bcpKeyIdLen);
bcpKeyIdBuf[bcpKeyIdLen] = 0;
gKeyTypeStringPool->addElement(bcpKeyIdBuf, sts);
if (U_FAILURE(sts)) {
break;
}
bcpKeyId = bcpKeyIdBuf;
}
UBool isTZ = uprv_strcmp(legacyKeyId, "timezone") == 0;
UHashtable* typeDataMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
if (U_FAILURE(sts)) {
break;
}
uint32_t specialTypes = SPECIALTYPE_NONE;
UResourceBundle* typeAliasResByKey = NULL;
UResourceBundle* bcpTypeAliasResByKey = NULL;
if (typeAliasRes != NULL) {
typeAliasResByKey = ures_getByKey(typeAliasRes, legacyKeyId, NULL, &tmpSts);
if (U_FAILURE(tmpSts)) {
// only a few keys have type alias mapping
typeAliasResByKey = NULL;
tmpSts = U_ZERO_ERROR;
}
}
if (bcpTypeAliasRes != NULL) {
bcpTypeAliasResByKey = ures_getByKey(bcpTypeAliasRes, bcpKeyId, NULL, &tmpSts);
if (U_FAILURE(tmpSts)) {
// only a few keys have BCP type alias mapping
bcpTypeAliasResByKey = NULL;
tmpSts = U_ZERO_ERROR;
}
}
// look up type map for the key, and walk through the mapping data
UResourceBundle* typeMapResByKey = ures_getByKey(typeMapRes, legacyKeyId, NULL, &tmpSts);
if (U_FAILURE(tmpSts)) {
// type map for each key must exist
U_ASSERT(FALSE);
tmpSts = U_ZERO_ERROR;
} else {
UResourceBundle typeMapEntry;
ures_initStackObject(&typeMapEntry);
while (ures_hasNext(typeMapResByKey)) {
ures_getNextResource(typeMapResByKey, &typeMapEntry, &sts);
if (U_FAILURE(sts)) {
break;
}
const char* legacyTypeId = ures_getKey(&typeMapEntry);
// special types
if (uprv_strcmp(legacyTypeId, "CODEPOINTS") == 0) {
specialTypes |= SPECIALTYPE_CODEPOINTS;
continue;
}
if (uprv_strcmp(legacyTypeId, "REORDER_CODE") == 0) {
specialTypes |= SPECIALTYPE_REORDER_CODE;
continue;
}
if (isTZ) {
// a timezone key uses a colon instead of a slash in the resource.
// e.g. America:Los_Angeles
if (uprv_strchr(legacyTypeId, ':') != NULL) {
int32_t legacyTypeIdLen = uprv_strlen(legacyTypeId);
char* legacyTypeIdBuf = (char*)uprv_malloc(legacyTypeIdLen + 1);
if (legacyTypeIdBuf == NULL) {
sts = U_MEMORY_ALLOCATION_ERROR;
break;
}
const char* p = legacyTypeId;
char* q = legacyTypeIdBuf;
while (*p) {
if (*p == ':') {
*q++ = '/';
} else {
*q++ = *p;
}
p++;
}
*q = 0;
gKeyTypeStringPool->addElement(legacyTypeIdBuf, sts);
if (U_FAILURE(sts)) {
break;
}
legacyTypeId = legacyTypeIdBuf;
}
}
int32_t bcpTypeIdLen = 0;
const UChar* uBcpTypeId = ures_getString(&typeMapEntry, &bcpTypeIdLen, &sts);
if (U_FAILURE(sts)) {
break;
}
// empty value indicates that BCP type is same with the legacy type.
const char* bcpTypeId = legacyTypeId;
if (bcpTypeIdLen > 0) {
char* bcpTypeIdBuf = (char*)uprv_malloc(bcpTypeIdLen + 1);
if (bcpTypeIdBuf == NULL) {
sts = U_MEMORY_ALLOCATION_ERROR;
break;
}
u_UCharsToChars(uBcpTypeId, bcpTypeIdBuf, bcpTypeIdLen);
bcpTypeIdBuf[bcpTypeIdLen] = 0;
gKeyTypeStringPool->addElement(bcpTypeIdBuf, sts);
if (U_FAILURE(sts)) {
break;
}
bcpTypeId = bcpTypeIdBuf;
}
// Note: legacy type value should never be
// equivalent to bcp type value of a different
// type under the same key. So we use a single
// map for lookup.
LocExtType* t = (LocExtType*)uprv_malloc(sizeof(LocExtType));
if (t == NULL) {
sts = U_MEMORY_ALLOCATION_ERROR;
break;
}
t->bcpId = bcpTypeId;
t->legacyId = legacyTypeId;
gLocExtTypeEntries->addElement((void*)t, sts);
if (U_FAILURE(sts)) {
break;
}
uhash_put(typeDataMap, (void*)legacyTypeId, t, &sts);
if (bcpTypeId != legacyTypeId) {
// different type value
uhash_put(typeDataMap, (void*)bcpTypeId, t, &sts);
}
if (U_FAILURE(sts)) {
break;
}
// also put aliases in the map
if (typeAliasResByKey != NULL) {
UResourceBundle typeAliasDataEntry;
ures_initStackObject(&typeAliasDataEntry);
ures_resetIterator(typeAliasResByKey);
while (ures_hasNext(typeAliasResByKey) && U_SUCCESS(sts)) {
int32_t toLen;
ures_getNextResource(typeAliasResByKey, &typeAliasDataEntry, &sts);
const UChar* to = ures_getString(&typeAliasDataEntry, &toLen, &sts);
if (U_FAILURE(sts)) {
break;
}
// check if this is an alias of canoncal legacy type
if (uprv_compareInvAscii(NULL, legacyTypeId, -1, to, toLen) == 0) {
const char* from = ures_getKey(&typeAliasDataEntry);
if (isTZ) {
// replace colon with slash if necessary
if (uprv_strchr(from, ':') != NULL) {
int32_t fromLen = uprv_strlen(from);
char* fromBuf = (char*)uprv_malloc(fromLen + 1);
if (fromBuf == NULL) {
sts = U_MEMORY_ALLOCATION_ERROR;
break;
}
const char* p = from;
char* q = fromBuf;
while (*p) {
if (*p == ':') {
*q++ = '/';
} else {
*q++ = *p;
}
p++;
}
*q = 0;
gKeyTypeStringPool->addElement(fromBuf, sts);
if (U_FAILURE(sts)) {
break;
}
from = fromBuf;
}
}
uhash_put(typeDataMap, (void*)from, t, &sts);
}
}
ures_close(&typeAliasDataEntry);
if (U_FAILURE(sts)) {
break;
}
}
if (bcpTypeAliasResByKey != NULL) {
UResourceBundle bcpTypeAliasDataEntry;
ures_initStackObject(&bcpTypeAliasDataEntry);
ures_resetIterator(bcpTypeAliasResByKey);
while (ures_hasNext(bcpTypeAliasResByKey) && U_SUCCESS(sts)) {
int32_t toLen;
ures_getNextResource(bcpTypeAliasResByKey, &bcpTypeAliasDataEntry, &sts);
const UChar* to = ures_getString(&bcpTypeAliasDataEntry, &toLen, &sts);
if (U_FAILURE(sts)) {
break;
}
// check if this is an alias of bcp type
if (uprv_compareInvAscii(NULL, bcpTypeId, -1, to, toLen) == 0) {
const char* from = ures_getKey(&bcpTypeAliasDataEntry);
uhash_put(typeDataMap, (void*)from, t, &sts);
}
}
ures_close(&bcpTypeAliasDataEntry);
if (U_FAILURE(sts)) {
break;
}
}
}
ures_close(&typeMapEntry);
}
ures_close(typeMapResByKey);
ures_close(typeAliasResByKey);
ures_close(bcpTypeAliasResByKey);
if (U_FAILURE(sts)) {
break;
}
LocExtKeyData* keyData = (LocExtKeyData*)uprv_malloc(sizeof(LocExtKeyData));
if (keyData == NULL) {
sts = U_MEMORY_ALLOCATION_ERROR;
break;
}
keyData->bcpId = bcpKeyId;
keyData->legacyId = legacyKeyId;
keyData->specialTypes = specialTypes;
keyData->typeMap = typeDataMap;
gLocExtKeyDataEntries->addElement((void*)keyData, sts);
if (U_FAILURE(sts)) {
break;
}
uhash_put(gLocExtKeyMap, (void*)legacyKeyId, keyData, &sts);
if (legacyKeyId != bcpKeyId) {
// different key value
uhash_put(gLocExtKeyMap, (void*)bcpKeyId, keyData, &sts);
}
if (U_FAILURE(sts)) {
break;
}
}
ures_close(&keyMapEntry);
close_bundles:
ures_close(bcpTypeAliasRes);
ures_close(typeAliasRes);
ures_close(typeMapRes);
ures_close(keyMapRes);
ures_close(keyTypeDataRes);
}
static UBool
init() {
UErrorCode sts = U_ZERO_ERROR;
umtx_initOnce(gLocExtKeyMapInitOnce, &initFromResourceBundle, sts);
if (U_FAILURE(sts)) {
return FALSE;
}
return TRUE;
}
static UBool
isSpecialTypeCodepoints(const char* val) {
int32_t subtagLen = 0;
const char* p = val;
while (*p) {
if (*p == '-') {
if (subtagLen < 4 || subtagLen > 6) {
return FALSE;
}
subtagLen = 0;
} else if (('0' <= *p && *p <= '9') ||
('A' <= *p && *p <= 'F') || ('a' <= *p && *p <= 'f')) {
subtagLen++;
} else {
return FALSE;
}
p++;
}
return (subtagLen >= 4 && subtagLen <= 6);
}
static UBool
isSpecialTypeReorderCode(const char* val) {
int32_t subtagLen = 0;
const char* p = val;
while (*p) {
if (*p == '-') {
if (subtagLen < 3 || subtagLen > 8) {
return FALSE;
}
subtagLen = 0;
} else if (('A' <= *p && *p <= 'Z') || ('a' <= *p && *p <= 'z')) {
subtagLen++;
} else {
return FALSE;
}
p++;
}
return (subtagLen >=3 && subtagLen <=8);
}
U_CFUNC const char*
ulocimp_toBcpKey(const char* key) {
if (!init()) {
return NULL;
}
LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
if (keyData != NULL) {
return keyData->bcpId;
}
return NULL;
}
U_CFUNC const char*
ulocimp_toLegacyKey(const char* key) {
if (!init()) {
return NULL;
}
LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
if (keyData != NULL) {
return keyData->legacyId;
}
return NULL;
}
U_CFUNC const char*
ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
if (isKnownKey != NULL) {
*isKnownKey = FALSE;
}
if (isSpecialType != NULL) {
*isSpecialType = FALSE;
}
if (!init()) {
return NULL;
}
LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
if (keyData != NULL) {
if (isKnownKey != NULL) {
*isKnownKey = TRUE;
}
LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap, type);
if (t != NULL) {
return t->bcpId;
}
if (keyData->specialTypes != SPECIALTYPE_NONE) {
UBool matched = FALSE;
if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
matched = isSpecialTypeCodepoints(type);
}
if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
matched = isSpecialTypeReorderCode(type);
}
if (matched) {
if (isSpecialType != NULL) {
*isSpecialType = TRUE;
}
return type;
}
}
}
return NULL;
}
U_CFUNC const char*
ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
if (isKnownKey != NULL) {
*isKnownKey = FALSE;
}
if (isSpecialType != NULL) {
*isSpecialType = FALSE;
}
if (!init()) {
return NULL;
}
LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
if (keyData != NULL) {
if (isKnownKey != NULL) {
*isKnownKey = TRUE;
}
LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap, type);
if (t != NULL) {
return t->legacyId;
}
if (keyData->specialTypes != SPECIALTYPE_NONE) {
UBool matched = FALSE;
if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
matched = isSpecialTypeCodepoints(type);
}
if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
matched = isSpecialTypeReorderCode(type);
}
if (matched) {
if (isSpecialType != NULL) {
*isSpecialType = TRUE;
}
return type;
}
}
}
return NULL;
}

View file

@ -408,8 +408,8 @@ _isPrivateuseValueSubtags(const char* s, int32_t len) {
return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
}
static UBool
_isLDMLKey(const char* s, int32_t len) {
U_CFUNC UBool
ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
@ -419,17 +419,33 @@ _isLDMLKey(const char* s, int32_t len) {
return FALSE;
}
static UBool
_isLDMLType(const char* s, int32_t len) {
U_CFUNC UBool
ultag_isUnicodeLocaleType(const char*s, int32_t len) {
const char* p;
int32_t subtagLen = 0;
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) {
return TRUE;
}
return FALSE;
}
for (p = s; len > 0; p++, len--) {
if (*p == SEP) {
if (subtagLen < 3) {
return FALSE;
}
subtagLen = 0;
} else if (ISALPHA(*p) || ISNUMERIC(*p)) {
subtagLen++;
if (subtagLen > 8) {
return FALSE;
}
} else {
return FALSE;
}
}
return (subtagLen >= 3);
}
/*
* -------------------------------------------------
*
@ -608,417 +624,6 @@ _initializeULanguageTag(ULanguageTag* langtag) {
langtag->privateuse = EMPTY;
}
#define KEYTYPEDATA "keyTypeData"
#define KEYMAP "keyMap"
#define TYPEMAP "typeMap"
#define TYPEALIAS "typeAlias"
#define MAX_BCP47_SUBTAG_LEN 9 /* including null terminator */
#define MAX_LDML_KEY_LEN 22
#define MAX_LDML_TYPE_LEN 32
static int32_t
_ldmlKeyToBCP47(const char* key, int32_t keyLen,
char* bcpKey, int32_t bcpKeyCapacity,
UErrorCode *status) {
UResourceBundle *rb;
char keyBuf[MAX_LDML_KEY_LEN];
char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
int32_t resultLen = 0;
int32_t i;
UErrorCode tmpStatus = U_ZERO_ERROR;
const UChar *uBcpKey;
int32_t bcpKeyLen;
if (keyLen < 0) {
keyLen = (int32_t)uprv_strlen(key);
}
if (keyLen >= sizeof(keyBuf)) {
/* no known valid LDML key exceeding 21 */
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
uprv_memcpy(keyBuf, key, keyLen);
keyBuf[keyLen] = 0;
/* to lower case */
for (i = 0; i < keyLen; i++) {
keyBuf[i] = uprv_tolower(keyBuf[i]);
}
rb = ures_openDirect(NULL, KEYTYPEDATA, status);
ures_getByKey(rb, KEYMAP, rb, status);
if (U_FAILURE(*status)) {
ures_close(rb);
return 0;
}
uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus);
if (U_SUCCESS(tmpStatus)) {
if (bcpKeyLen == 0) {
/* empty value indicates the BCP47 key is same with the legacy key */
uprv_memcpy(bcpKeyBuf, key, keyLen);
bcpKeyBuf[keyLen] = 0;
resultLen = keyLen;
} else {
u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen);
bcpKeyBuf[bcpKeyLen] = 0;
resultLen = bcpKeyLen;
}
} else {
if (_isLDMLKey(key, keyLen)) {
uprv_memcpy(bcpKeyBuf, key, keyLen);
bcpKeyBuf[keyLen] = 0;
resultLen = keyLen;
} else {
/* mapping not availabe */
*status = U_ILLEGAL_ARGUMENT_ERROR;
}
}
ures_close(rb);
if (U_FAILURE(*status)) {
return 0;
}
uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity));
return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status);
}
static int32_t
_bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen,
char* key, int32_t keyCapacity,
UErrorCode *status) {
UResourceBundle *rb;
char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
int32_t resultLen = 0;
int32_t i;
const char *resKey = NULL;
UResourceBundle *mapData;
if (bcpKeyLen < 0) {
bcpKeyLen = (int32_t)uprv_strlen(bcpKey);
}
if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen);
bcpKeyBuf[bcpKeyLen] = 0;
/* to lower case */
for (i = 0; i < bcpKeyLen; i++) {
bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]);
}
rb = ures_openDirect(NULL, KEYTYPEDATA, status);
ures_getByKey(rb, KEYMAP, rb, status);
if (U_FAILURE(*status)) {
ures_close(rb);
return 0;
}
mapData = ures_getNextResource(rb, NULL, status);
while (U_SUCCESS(*status)) {
const UChar *uBcpKey;
char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
int32_t tmpBcpKeyLen;
const char *tmpBcpKey = tmpBcpKeyBuf;
uBcpKey = ures_getString(mapData, &tmpBcpKeyLen, status);
if (U_FAILURE(*status)) {
break;
}
if (tmpBcpKeyLen == 0) {
/* empty value indicates the BCP47 key is same with the legacy key */
tmpBcpKey = ures_getKey(mapData);
} else {
u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen);
tmpBcpKeyBuf[tmpBcpKeyLen] = 0;
}
if (uprv_compareInvCharsAsAscii(bcpKeyBuf, tmpBcpKey) == 0) {
/* found a matching BCP47 key */
resKey = ures_getKey(mapData);
resultLen = (int32_t)uprv_strlen(resKey);
break;
}
if (!ures_hasNext(rb)) {
break;
}
ures_getNextResource(rb, mapData, status);
}
ures_close(mapData);
ures_close(rb);
if (U_FAILURE(*status)) {
return 0;
}
if (resKey == NULL) {
resKey = bcpKeyBuf;
resultLen = bcpKeyLen;
}
uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity));
return u_terminateChars(key, keyCapacity, resultLen, status);
}
static int32_t
_ldmlTypeToBCP47(const char* key, int32_t keyLen,
const char* type, int32_t typeLen,
char* bcpType, int32_t bcpTypeCapacity,
UErrorCode *status) {
UResourceBundle *rb, *keyTypeData, *typeMapForKey;
char keyBuf[MAX_LDML_KEY_LEN];
char typeBuf[MAX_LDML_TYPE_LEN];
char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
int32_t resultLen = 0;
int32_t i;
UErrorCode tmpStatus = U_ZERO_ERROR;
const UChar *uBcpType, *uCanonicalType;
int32_t bcpTypeLen, canonicalTypeLen;
UBool isTimezone = FALSE;
if (keyLen < 0) {
keyLen = (int32_t)uprv_strlen(key);
}
if (keyLen >= sizeof(keyBuf)) {
/* no known valid LDML key exceeding 21 */
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
uprv_memcpy(keyBuf, key, keyLen);
keyBuf[keyLen] = 0;
/* to lower case */
for (i = 0; i < keyLen; i++) {
keyBuf[i] = uprv_tolower(keyBuf[i]);
}
if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
isTimezone = TRUE;
}
if (typeLen < 0) {
typeLen = (int32_t)uprv_strlen(type);
}
if (typeLen >= sizeof(typeBuf)) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
if (isTimezone) {
/* replace '/' with ':' */
for (i = 0; i < typeLen; i++) {
if (*(type + i) == '/') {
typeBuf[i] = ':';
} else {
typeBuf[i] = *(type + i);
}
}
typeBuf[typeLen] = 0;
type = &typeBuf[0];
}
keyTypeData = ures_openDirect(NULL, KEYTYPEDATA, status);
rb = ures_getByKey(keyTypeData, TYPEMAP, NULL, status);
if (U_FAILURE(*status)) {
ures_close(rb);
ures_close(keyTypeData);
return 0;
}
typeMapForKey = ures_getByKey(rb, keyBuf, NULL, &tmpStatus);
uBcpType = ures_getStringByKey(typeMapForKey, type, &bcpTypeLen, &tmpStatus);
if (U_SUCCESS(tmpStatus)) {
if (bcpTypeLen == 0) {
/* empty value indicates the BCP47 type is same with the legacy type */
uprv_memcpy(bcpTypeBuf, type, typeLen);
resultLen = typeLen;
} else {
u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
resultLen = bcpTypeLen;
}
} else if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
/* is this type alias? */
tmpStatus = U_ZERO_ERROR;
ures_getByKey(keyTypeData, TYPEALIAS, rb, &tmpStatus);
ures_getByKey(rb, keyBuf, rb, &tmpStatus);
uCanonicalType = ures_getStringByKey(rb, type, &canonicalTypeLen, &tmpStatus);
if (U_SUCCESS(tmpStatus)) {
u_UCharsToChars(uCanonicalType, typeBuf, canonicalTypeLen);
if (isTimezone) {
/* replace '/' with ':' */
for (i = 0; i < canonicalTypeLen; i++) {
if (typeBuf[i] == '/') {
typeBuf[i] = ':';
}
}
}
typeBuf[canonicalTypeLen] = 0;
/* look up the canonical type */
uBcpType = ures_getStringByKey(typeMapForKey, typeBuf, &bcpTypeLen, &tmpStatus);
if (U_SUCCESS(tmpStatus)) {
if (bcpTypeLen == 0) {
/* empty value indicates the BCP47 type is same with the legacy type */
uprv_memcpy(bcpTypeBuf, typeBuf, canonicalTypeLen);
resultLen = canonicalTypeLen;
} else {
u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
resultLen = bcpTypeLen;
}
}
}
if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
if (_isLDMLType(type, typeLen)) {
uprv_memcpy(bcpTypeBuf, type, typeLen);
resultLen = typeLen;
} else {
/* mapping not availabe */
*status = U_ILLEGAL_ARGUMENT_ERROR;
}
}
} else {
*status = tmpStatus;
}
ures_close(rb);
ures_close(typeMapForKey);
ures_close(keyTypeData);
if (U_FAILURE(*status)) {
return 0;
}
uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity));
return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status);
}
static int32_t
_bcp47ToLDMLType(const char* key, int32_t keyLen,
const char* bcpType, int32_t bcpTypeLen,
char* type, int32_t typeCapacity,
UErrorCode *status) {
UResourceBundle *rb;
char keyBuf[MAX_LDML_KEY_LEN];
char bcpTypeBuf[ULOC_KEYWORDS_CAPACITY]; /* ensure buffter is large enough for multiple values (e.g. buddhist-greg) */
int32_t resultLen = 0;
int32_t i, typeSize;
const char *resType = NULL;
UResourceBundle *mapData;
UErrorCode tmpStatus = U_ZERO_ERROR;
int32_t copyLen;
if (keyLen < 0) {
keyLen = (int32_t)uprv_strlen(key);
}
if (keyLen >= sizeof(keyBuf)) {
/* no known valid LDML key exceeding 21 */
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
uprv_memcpy(keyBuf, key, keyLen);
keyBuf[keyLen] = 0;
/* to lower case */
for (i = 0; i < keyLen; i++) {
keyBuf[i] = uprv_tolower(keyBuf[i]);
}
if (bcpTypeLen < 0) {
bcpTypeLen = (int32_t)uprv_strlen(bcpType);
}
typeSize = 0;
for (i = 0; i < bcpTypeLen; i++) {
if (bcpType[i] == SEP) {
if (typeSize >= MAX_BCP47_SUBTAG_LEN) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
typeSize = 0;
} else {
typeSize++;
}
}
uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen);
bcpTypeBuf[bcpTypeLen] = 0;
/* to lower case */
for (i = 0; i < bcpTypeLen; i++) {
bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]);
}
rb = ures_openDirect(NULL, KEYTYPEDATA, status);
ures_getByKey(rb, TYPEMAP, rb, status);
if (U_FAILURE(*status)) {
ures_close(rb);
return 0;
}
ures_getByKey(rb, keyBuf, rb, &tmpStatus);
mapData = ures_getNextResource(rb, NULL, &tmpStatus);
while (U_SUCCESS(tmpStatus)) {
const UChar *uBcpType;
char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
int32_t tmpBcpTypeLen;
const char *tmpBcpType = tmpBcpTypeBuf;
uBcpType = ures_getString(mapData, &tmpBcpTypeLen, &tmpStatus);
if (U_FAILURE(tmpStatus)) {
break;
}
if (tmpBcpTypeLen == 0) {
/* empty value indicates the BCP47 type is same with the legacy type */
tmpBcpType = ures_getKey(mapData);
} else {
u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen);
tmpBcpTypeBuf[tmpBcpTypeLen] = 0;
}
if (uprv_compareInvCharsAsAscii(bcpTypeBuf, tmpBcpType) == 0) {
/* found a matching BCP47 type */
resType = ures_getKey(mapData);
resultLen = (int32_t)uprv_strlen(resType);
break;
}
if (!ures_hasNext(rb)) {
break;
}
ures_getNextResource(rb, mapData, &tmpStatus);
}
ures_close(mapData);
ures_close(rb);
if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) {
*status = tmpStatus;
return 0;
}
if (resType == NULL) {
resType = bcpTypeBuf;
resultLen = bcpTypeLen;
}
copyLen = uprv_min(resultLen, typeCapacity);
uprv_memcpy(type, resType, copyLen);
if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
for (i = 0; i < copyLen; i++) {
if (*(type + i) == ':') {
*(type + i) = '/';
}
}
}
return u_terminateChars(type, typeCapacity, resultLen, status);
}
static int32_t
_appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
char buf[ULOC_LANG_CAPACITY];
@ -1311,7 +916,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
const char *bcpKey, *bcpValue;
UErrorCode tmpStatus = U_ZERO_ERROR;
int32_t keylen;
UBool isLDMLKeyword;
UBool isBcpUExt;
while (TRUE) {
isAttribute = FALSE;
@ -1320,7 +925,8 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
break;
}
len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
if (U_FAILURE(tmpStatus)) {
/* buf must be null-terminated */
if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
@ -1331,7 +937,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
keylen = (int32_t)uprv_strlen(key);
isLDMLKeyword = (keylen > 1);
isBcpUExt = (keylen > 1);
/* special keyword used for representing Unicode locale attributes */
if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
@ -1379,36 +985,49 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
}
}
} else if (isLDMLKeyword) {
int32_t modKeyLen;
/* transform key and value to bcp47 style */
modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity, &tmpStatus);
if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
} else if (isBcpUExt) {
bcpKey = uloc_toUnicodeLocaleKey(key);
if (bcpKey == NULL) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
tmpStatus = U_ZERO_ERROR;
continue;
}
bcpKey = pExtBuf;
pExtBuf += (modKeyLen + 1);
extBufCapacity -= (modKeyLen + 1);
len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCapacity, &tmpStatus);
if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
/* we've checked buf is null-terminated above */
bcpValue = uloc_toUnicodeLocaleType(key, buf);
if (bcpValue == NULL) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
tmpStatus = U_ZERO_ERROR;
continue;
}
bcpValue = pExtBuf;
pExtBuf += (len + 1);
extBufCapacity -= (len + 1);
if (bcpValue == buf) {
/*
When uloc_toUnicodeLocaleType(key, buf) returns the
input value as is, the value is well-formed, but has
no known mapping. This implementation normalizes the
the value to lower case
*/
int32_t bcpValueLen = uprv_strlen(bcpValue);
if (bcpValueLen < extBufCapacity) {
uprv_strcpy(pExtBuf, bcpValue);
T_CString_toLowerCase(pExtBuf);
bcpValue = pExtBuf;
pExtBuf += (bcpValueLen + 1);
extBufCapacity -= (bcpValueLen + 1);
} else {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
continue;
}
}
} else {
if (*key == PRIVATEUSE) {
if (!_isPrivateuseValueSubtags(buf, len)) {
@ -1600,7 +1219,7 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
/* locate next separator char */
for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
if (_isLDMLKey(pTag, len)) {
if (ultag_isUnicodeLocaleKey(pTag, len)) {
pKwds = pTag;
break;
}
@ -1708,7 +1327,7 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
/* locate next separator char */
for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
if (_isLDMLKey(pTag, len)) {
if (ultag_isUnicodeLocaleKey(pTag, len)) {
if (pBcpKey) {
emitKeyword = TRUE;
pNextBcpKey = pTag;
@ -1744,28 +1363,78 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
const char *pKey = NULL; /* LDML key */
const char *pType = NULL; /* LDML type */
char bcpKeyBuf[9]; /* BCP key length is always 2 for now */
U_ASSERT(pBcpKey != NULL);
/* u extension key to LDML key */
len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status);
if (U_FAILURE(*status)) {
if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
/* the BCP key is invalid */
*status = U_ILLEGAL_ARGUMENT_ERROR;
goto cleanup;
}
pKey = buf + bufIdx;
bufIdx += len;
*(buf + bufIdx) = 0;
bufIdx++;
if (pBcpType) {
/* BCP type to locale type */
len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status);
if (U_FAILURE(*status)) {
uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
bcpKeyBuf[bcpKeyLen] = 0;
/* u extension key to LDML key */
pKey = uloc_toLegacyKey(bcpKeyBuf);
if (pKey == NULL) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
goto cleanup;
}
if (pKey == bcpKeyBuf) {
/*
The key returned by toLegacyKey points to the input buffer.
We normalize the result key to lower case.
*/
T_CString_toLowerCase(bcpKeyBuf);
if (bufSize - bufIdx - 1 >= bcpKeyLen) {
uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen);
pKey = buf + bufIdx;
bufIdx += bcpKeyLen;
*(buf + bufIdx) = 0;
bufIdx++;
} else {
*status = U_BUFFER_OVERFLOW_ERROR;
goto cleanup;
}
pType = buf + bufIdx;
bufIdx += len;
*(buf + bufIdx) = 0;
bufIdx++;
}
if (pBcpType) {
char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */
if (bcpTypeLen >= sizeof(bcpTypeBuf)) {
/* the BCP type is too long */
*status = U_ILLEGAL_ARGUMENT_ERROR;
goto cleanup;
}
uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
bcpTypeBuf[bcpTypeLen] = 0;
/* BCP type to locale type */
pType = uloc_toLegacyType(pKey, bcpTypeBuf);
if (pType == NULL) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
goto cleanup;
}
if (pType == bcpTypeBuf) {
/*
The type returned by toLegacyType points to the input buffer.
We normalize the result type to lower case.
*/
/* normalize to lower case */
T_CString_toLowerCase(bcpTypeBuf);
if (bufSize - bufIdx - 1 >= bcpTypeLen) {
uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen);
pType = buf + bufIdx;
bufIdx += bcpTypeLen;
*(buf + bufIdx) = 0;
bufIdx++;
} else {
*status = U_BUFFER_OVERFLOW_ERROR;
goto cleanup;
}
}
} else {
/* typeless - default type value is "yes" */
pType = LOCALE_TYPE_YES;

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2004-2010, International Business Machines
* Copyright (C) 2004-2014, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
@ -62,4 +62,23 @@ ulocimp_getCountry(const char *localeID,
U_CAPI const char * U_EXPORT2
locale_getKeywordsStart(const char *localeID);
U_CFUNC UBool
ultag_isUnicodeLocaleKey(const char* s, int32_t len);
U_CFUNC UBool
ultag_isUnicodeLocaleType(const char* s, int32_t len);
U_CFUNC const char*
ulocimp_toBcpKey(const char* key);
U_CFUNC const char*
ulocimp_toLegacyKey(const char* key);
U_CFUNC const char*
ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
U_CFUNC const char*
ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
#endif

View file

@ -1149,4 +1149,106 @@ uloc_toLanguageTag(const char* localeID,
UBool strict,
UErrorCode* err);
#ifndef U_HIDE_DRAFT_API
/**
* Converts the specified keyword (legacy key, or BCP 47 Unicode locale
* extension key) to the equivalent BCP 47 Unicode locale extension key.
* For example, BCP 47 Unicode locale extension key "co" is returned for
* the input keyword "collation".
* <p>
* When the specified keyword is unknown, but satisfies the BCP syntax,
* then the pointer to the input keyword itself will be returned.
* For example,
* <code>uloc_toUnicodeLocaleKey("ZZ")</code> returns "ZZ".
*
* @param keyword the input locale keyword (either legacy key
* such as "collation" or BCP 47 Unicode locale extension
* key such as "co").
* @return the well-formed BCP 47 Unicode locale extension key,
* or NULL if the specified locale keyword cannot be
* mapped to a well-formed BCP 47 Unicode locale extension
* key.
* @see uloc_toLegacyKey
* @draft ICU 54
*/
U_DRAFT const char* U_EXPORT2
uloc_toUnicodeLocaleKey(const char* keyword);
/**
* Converts the specified keyword value (legacy type, or BCP 47
* Unicode locale extension type) to the well-formed BCP 47 Unicode locale
* extension type for the specified keyword (category). For example, BCP 47
* Unicode locale extension type "phonebk" is returned for the input
* keyword value "phonebook", with the keyword "collation" (or "co").
* <p>
* When the specified keyword is not recognized, but the specified value
* satisfies the syntax of the BCP 47 Unicode locale extension type,
* or when the specified keyword allows 'variable' type and the specified
* value satisfies the syntax, then the pointer to the input type value itself
* will be returned.
* For example,
* <code>uloc_toUnicodeLocaleType("Foo", "Bar")</code> returns "Bar",
* <code>uloc_toUnicodeLocaleType("variableTop", "00A4")</code> returns "00A4".
*
* @param keyword the locale keyword (either legacy key such as
* "collation" or BCP 47 Unicode locale extension
* key such as "co").
* @param value the locale keyword value (either legacy type
* such as "phonebook" or BCP 47 Unicode locale extension
* type such as "phonebk").
* @return the well-formed BCP47 Unicode locale extension type,
* or NULL if the locale keyword value cannot be mapped to
* a well-formed BCP 47 Unicode locale extension type.
* @see uloc_toLegacyType
* @draft ICU 54
*/
U_DRAFT const char* U_EXPORT2
uloc_toUnicodeLocaleType(const char* keyword, const char* value);
/**
* Converts the specified keyword (BCP 47 Unicode locale extension key, or
* legacy key) to the legacy key. For example, legacy key "collation" is
* returned for the input BCP 47 Unicode locale extension key "co".
*
* @param keyword the input locale keyword (either BCP 47 Unicode locale
* extension key or legacy key).
* @return the well-formed legacy key, or NULL if the specified
* keyword cannot be mapped to a well-formed legacy key.
* @see toUnicodeLocaleKey
* @draft ICU 54
*/
U_DRAFT const char* U_EXPORT2
uloc_toLegacyKey(const char* keyword);
/**
* Converts the specified keyword value (BCP 47 Unicode locale extension type,
* or legacy type or type alias) to the canonical legacy type. For example,
* the legacy type "phonebook" is returned for the input BCP 47 Unicode
* locale extension type "phonebk" with the keyword "collation" (or "co").
* <p>
* When the specified keyword is not recognized, but the specified value
* satisfies the syntax of legacy key, or when the specified keyword
* allows 'variable' type and the specified value satisfies the syntax,
* then the pointer to the input type value itself will be returned.
* For example,
* <code>uloc_toLegacyType("Foo", "Bar")</code> returns "Bar",
* <code>uloc_toLegacyType("vt", "00A4")</code> returns "00A4".
*
* @param keyword the locale keyword (either legacy keyword such as
* "collation" or BCP 47 Unicode locale extension
* key such as "co").
* @param value the locale keyword value (either BCP 47 Unicode locale
* extension type such as "phonebk" or legacy keyword value
* such as "phonebook").
* @return the well-formed legacy type, or NULL if the specified
* keyword value cannot be mapped to a well-formed legacy
* type.
* @see toUnicodeLocaleType
* @draft ICU 54
*/
U_DRAFT const char* U_EXPORT2
uloc_toLegacyType(const char* keyword, const char* value);
#endif /* U_HIDE_DRAFT_API */
#endif /*_ULOC*/

View file

@ -250,6 +250,10 @@ void addLocaleTest(TestNode** root)
TESTCASE(TestEnglishExemplarCharacters);
TESTCASE(TestDisplayNameBrackets);
TESTCASE(TestIsRightToLeft);
TESTCASE(TestToUnicodeLocaleKey);
TESTCASE(TestToLegacyKey);
TESTCASE(TestToUnicodeLocaleType);
TESTCASE(TestToLegacyType);
}
@ -5673,7 +5677,6 @@ static void TestLikelySubtags()
}
const char* const locale_to_langtag[][3] = {
{"@x=elmer", "x-elmer", "x-elmer"},
{"", "und", "und"},
{"en", "en", "en"},
{"en_US", "en-US", "en-US"},
@ -5707,9 +5710,9 @@ const char* const locale_to_langtag[][3] = {
{"en@timezone=America/New_York;calendar=japanese", "en-u-ca-japanese-tz-usnyc", "en-u-ca-japanese-tz-usnyc"},
{"en@timezone=US/Eastern", "en-u-tz-usnyc", "en-u-tz-usnyc"},
{"en@x=x-y-z;a=a-b-c", "en-x-x-y-z", NULL},
{"it@collation=badcollationtype;colStrength=identical;cu=usd-eur", "it-u-ks-identic", NULL},
{"it@collation=badcollationtype;colStrength=identical;cu=usd-eur", "it-u-cu-usd-eur-ks-identic", NULL},
{"en_US_POSIX", "en-US-u-va-posix", "en-US-u-va-posix"},
{"en_US_POSIX@calendar=japanese;currency=EUR","en-US-u-ca-japanese-cu-EUR-va-posix", "en-US-u-ca-japanese-cu-EUR-va-posix"},
{"en_US_POSIX@calendar=japanese;currency=EUR","en-US-u-ca-japanese-cu-eur-va-posix", "en-US-u-ca-japanese-cu-eur-va-posix"},
{"@x=elmer", "x-elmer", "x-elmer"},
{"en@x=elmer", "en-x-elmer", "en-x-elmer"},
{"@x=elmer;a=exta", "und-a-exta-x-elmer", "und-a-exta-x-elmer"},
@ -5779,6 +5782,7 @@ static const struct {
const char *locID;
int32_t len;
} langtag_to_locale[] = {
{"ja-u-ijkl-efgh-abcd-ca-japanese-xx-yyy-zzz-kn", "ja@attribute=abcd-efgh-ijkl;calendar=japanese;colnumeric=yes;xx=yyy-zzz", FULL_LENGTH},
{"en", "en", FULL_LENGTH},
{"en-us", "en_US", FULL_LENGTH},
{"und-US", "_US", FULL_LENGTH},
@ -5859,6 +5863,187 @@ static void TestForLanguageTag(void) {
}
}
static void TestToUnicodeLocaleKey(void)
{
/* $IN specifies the result should be the input pointer itself */
static const char* DATA[][2] = {
{"calendar", "ca"},
{"CALEndar", "ca"}, /* difference casing */
{"ca", "ca"}, /* bcp key itself */
{"kv", "kv"}, /* no difference between legacy and bcp */
{"foo", NULL}, /* unknown, bcp ill-formed */
{"ZZ", "$IN"}, /* unknown, bcp well-formed - */
{NULL, NULL}
};
int32_t i;
for (i = 0; DATA[i][0] != NULL; i++) {
const char* keyword = DATA[i][0];
const char* expected = DATA[i][1];
const char* bcpKey = NULL;
bcpKey = uloc_toUnicodeLocaleKey(keyword);
if (expected == NULL) {
if (bcpKey != NULL) {
log_err("toUnicodeLocaleKey: keyword=%s => %s, expected=NULL\n", keyword, bcpKey);
}
} else if (bcpKey == NULL) {
log_err("toUnicodeLocaleKey: keyword=%s => NULL, expected=%s\n", keyword, expected);
} else if (uprv_strcmp(expected, "$IN") == 0) {
if (bcpKey != keyword) {
log_err("toUnicodeLocaleKey: keyword=%s => %s, expected=%s(input pointer)\n", keyword, bcpKey, keyword);
}
} else if (uprv_strcmp(bcpKey, expected) != 0) {
log_err("toUnicodeLocaleKey: keyword=%s => %s, expected=%s\n", keyword, bcpKey, expected);
}
}
}
static void TestToLegacyKey(void)
{
/* $IN specifies the result should be the input pointer itself */
static const char* DATA[][2] = {
{"kb", "colbackwards"},
{"kB", "colbackwards"}, /* different casing */
{"Collation", "collation"}, /* keyword itself with different casing */
{"kv", "kv"}, /* no difference between legacy and bcp */
{"foo", "$IN"}, /* unknown, bcp ill-formed */
{"ZZ", "$IN"}, /* unknown, bcp well-formed */
{"e=mc2", NULL}, /* unknown, bcp/legacy ill-formed */
{NULL, NULL}
};
int32_t i;
for (i = 0; DATA[i][0] != NULL; i++) {
const char* keyword = DATA[i][0];
const char* expected = DATA[i][1];
const char* legacyKey = NULL;
legacyKey = uloc_toLegacyKey(keyword);
if (expected == NULL) {
if (legacyKey != NULL) {
log_err("toLegacyKey: keyword=%s => %s, expected=NULL\n", keyword, legacyKey);
}
} else if (legacyKey == NULL) {
log_err("toLegacyKey: keyword=%s => NULL, expected=%s\n", keyword, expected);
} else if (uprv_strcmp(expected, "$IN") == 0) {
if (legacyKey != keyword) {
log_err("toLegacyKey: keyword=%s => %s, expected=%s(input pointer)\n", keyword, legacyKey, keyword);
}
} else if (uprv_strcmp(legacyKey, expected) != 0) {
log_err("toUnicodeLocaleKey: keyword=%s, %s, expected=%s\n", keyword, legacyKey, expected);
}
}
}
static void TestToUnicodeLocaleType(void)
{
/* $IN specifies the result should be the input pointer itself */
static const char* DATA[][3] = {
{"tz", "Asia/Kolkata", "inccu"},
{"calendar", "gregorian", "gregory"},
{"ca", "gregorian", "gregory"},
{"ca", "Gregorian", "gregory"},
{"ca", "buddhist", "buddhist"},
{"Calendar", "Japanese", "japanese"},
{"calendar", "Islamic-Civil", "islamic-civil"},
{"calendar", "islamicc", "islamic-civil"}, /* bcp type alias */
{"colalternate", "NON-IGNORABLE", "noignore"},
{"colcaselevel", "yes", "true"},
{"tz", "america/new_york", "usnyc"},
{"tz", "Asia/Kolkata", "inccu"},
{"timezone", "navajo", "usden"},
{"ca", "aaaa", "$IN"}, /* unknown type, well-formed type */
{"ca", "gregory-japanese-islamic", "$IN"}, /* unknown type, well-formed type */
{"zz", "gregorian", NULL}, /* unknown key, ill-formed type */
{"co", "foo-", NULL}, /* unknown type, ill-formed type */
{"variableTop", "00A0", "$IN"}, /* valid codepoints type */
{"variableTop", "wxyz", "$IN"}, /* invalid codepoints type - return as is for now */
{"kr", "space-punct", "space-punct"}, /* valid reordercode type */
{"kr", "digit-spacepunct", NULL}, /* invalid (bcp ill-formed) reordercode type */
{NULL, NULL, NULL}
};
int32_t i;
for (i = 0; DATA[i][0] != NULL; i++) {
const char* keyword = DATA[i][0];
const char* value = DATA[i][1];
const char* expected = DATA[i][2];
const char* bcpType = NULL;
bcpType = uloc_toUnicodeLocaleType(keyword, value);
if (expected == NULL) {
if (bcpType != NULL) {
log_err("toUnicodeLocaleType: keyword=%s, value=%s => %s, expected=NULL\n", keyword, value, bcpType);
}
} else if (bcpType == NULL) {
log_err("toUnicodeLocaleType: keyword=%s, value=%s => NULL, expected=%s\n", keyword, value, expected);
} else if (uprv_strcmp(expected, "$IN") == 0) {
if (bcpType != value) {
log_err("toUnicodeLocaleType: keyword=%s, value=%s => %s, expected=%s(input pointer)\n", keyword, value, bcpType, value);
}
} else if (uprv_strcmp(bcpType, expected) != 0) {
log_err("toUnicodeLocaleType: keyword=%s, value=%s => %s, expected=%s\n", keyword, value, bcpType, expected);
}
}
}
static void TestToLegacyType(void)
{
/* $IN specifies the result should be the input pointer itself */
static const char* DATA[][3] = {
{"calendar", "gregory", "gregorian"},
{"ca", "gregory", "gregorian"},
{"ca", "Gregory", "gregorian"},
{"ca", "buddhist", "buddhist"},
{"Calendar", "Japanese", "japanese"},
{"calendar", "Islamic-Civil", "islamic-civil"},
{"calendar", "islamicc", "islamic-civil"}, /* bcp type alias */
{"colalternate", "noignore", "non-ignorable"},
{"colcaselevel", "true", "yes"},
{"tz", "usnyc", "America/New_York"},
{"tz", "inccu", "Asia/Calcutta"},
{"timezone", "usden", "America/Denver"},
{"timezone", "usnavajo", "America/Denver"}, /* bcp type alias */
{"colstrength", "quarternary", "quaternary"}, /* type alias */
{"ca", "aaaa", "$IN"}, /* unknown type */
{"calendar", "gregory-japanese-islamic", "$IN"}, /* unknown type, well-formed type */
{"zz", "gregorian", "$IN"}, /* unknown key, bcp ill-formed type */
{"ca", "gregorian-calendar", "$IN"}, /* known key, bcp ill-formed type */
{"co", "e=mc2", NULL}, /* known key, ill-formed bcp/legacy type */
{"variableTop", "00A0", "$IN"}, /* valid codepoints type */
{"variableTop", "wxyz", "$IN"}, /* invalid codepoints type - return as is for now */
{"kr", "space-punct", "space-punct"}, /* valid reordercode type */
{"kr", "digit-spacepunct", "digit-spacepunct"}, /* invalid reordercode type, bad ok for legacy syntax */
{NULL, NULL, NULL}
};
int32_t i;
for (i = 0; DATA[i][0] != NULL; i++) {
const char* keyword = DATA[i][0];
const char* value = DATA[i][1];
const char* expected = DATA[i][2];
const char* legacyType = NULL;
legacyType = uloc_toLegacyType(keyword, value);
if (expected == NULL) {
if (legacyType != NULL) {
log_err("toLegacyType: keyword=%s, value=%s => %s, expected=NULL\n", keyword, value, legacyType);
}
} else if (legacyType == NULL) {
log_err("toLegacyType: keyword=%s, value=%s => NULL, expected=%s\n", keyword, value, expected);
} else if (uprv_strcmp(expected, "$IN") == 0) {
if (legacyType != value) {
log_err("toLegacyType: keyword=%s, value=%s => %s, expected=%s(input pointer)\n", keyword, value, legacyType, value);
}
} else if (uprv_strcmp(legacyType, expected) != 0) {
log_err("toLegacyType: keyword=%s, value=%s => %s, expected=%s\n", keyword, value, legacyType, expected);
}
}
}
static void test_unicode_define(const char *namech, char ch, const char *nameu, UChar uch)
{
UChar asUch[1];

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2013, International Business Machines Corporation and
* Copyright (c) 1997-2014, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/********************************************************************************
@ -123,6 +123,11 @@ static void TestLikelySubtags(void);
static void TestForLanguageTag(void);
static void TestToLanguageTag(void);
static void TestToUnicodeLocaleKey(void);
static void TestToLegacyKey(void);
static void TestToUnicodeLocaleType(void);
static void TestToLegacyType(void);
/**
* locale data
*/