diff --git a/.gitattributes b/.gitattributes
index 2aa831b9bdc..ca69ef34e5e 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -53,6 +53,7 @@ icu4c/source/aclocal.m4 -text
icu4c/source/allinone/icucheck.bat -text
icu4c/source/common/common.vcxproj -text
icu4c/source/common/common.vcxproj.filters -text
+icu4c/source/common/uloc_keytype.cpp -text
icu4c/source/common/unifiedcache.cpp -text
icu4c/source/common/unifiedcache.h -text
icu4c/source/data/coll/dsb.txt -text
diff --git a/icu4c/source/common/Makefile.in b/icu4c/source/common/Makefile.in
index 2d8b12c0ab0..635b3f2630f 100644
--- a/icu4c/source/common/Makefile.in
+++ b/icu4c/source/common/Makefile.in
@@ -105,7 +105,7 @@ serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
uidna.o usprep.o uts46.o punycode.o \
util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o dtintrv.o ucnvsel.o propsvec.o \
ulist.o uloc_tag.o icudataver.o icuplug.o listformatter.o \
-sharedobject.o simplepatternformatter.o unifiedcache.o
+sharedobject.o simplepatternformatter.o unifiedcache.o uloc_keytype.o
## Header files to install
HEADERS = $(srcdir)/unicode/*.h
diff --git a/icu4c/source/common/common.vcxproj b/icu4c/source/common/common.vcxproj
index 12f993feb41..5bf22962f6f 100644
--- a/icu4c/source/common/common.vcxproj
+++ b/icu4c/source/common/common.vcxproj
@@ -1,4 +1,4 @@
-
+
@@ -236,6 +236,7 @@
+
@@ -1754,4 +1755,4 @@
-
+
\ No newline at end of file
diff --git a/icu4c/source/common/common.vcxproj.filters b/icu4c/source/common/common.vcxproj.filters
index a4571cf92e8..9befe71777f 100644
--- a/icu4c/source/common/common.vcxproj.filters
+++ b/icu4c/source/common/common.vcxproj.filters
@@ -421,9 +421,6 @@
properties & sets
-
- registration
-
registration
@@ -568,6 +565,10 @@
collections
+
+
+ locales & resources
+
@@ -1112,4 +1113,4 @@
collections
-
+
\ No newline at end of file
diff --git a/icu4c/source/common/ucln_cmn.h b/icu4c/source/common/ucln_cmn.h
index 0e2abc6a520..2290de868ba 100644
--- a/icu4c/source/common/ucln_cmn.h
+++ b/icu4c/source/common/ucln_cmn.h
@@ -37,6 +37,7 @@ typedef enum ECleanupCommonType {
UCLN_COMMON_BREAKITERATOR,
UCLN_COMMON_BREAKITERATOR_DICT,
UCLN_COMMON_SERVICE,
+ UCLN_COMMON_LOCALE_KEY_TYPE,
UCLN_COMMON_LOCALE,
UCLN_COMMON_LOCALE_AVAILABLE,
UCLN_COMMON_ULOC,
diff --git a/icu4c/source/common/uloc.cpp b/icu4c/source/common/uloc.cpp
index d8d8ad28cdf..13083b0c8e3 100644
--- a/icu4c/source/common/uloc.cpp
+++ b/icu4c/source/common/uloc.cpp
@@ -2524,4 +2524,103 @@ uloc_acceptLanguage(char *result, int32_t resultAvailable,
return -1;
}
+U_CAPI const char* U_EXPORT2
+uloc_toUnicodeLocaleKey(const char* keyword)
+{
+ const char* bcpKey = ulocimp_toBcpKey(keyword);
+ if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
+ // unknown keyword, but syntax is fine..
+ return keyword;
+ }
+ return bcpKey;
+}
+
+U_CAPI const char* U_EXPORT2
+uloc_toUnicodeLocaleType(const char* keyword, const char* value)
+{
+ const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
+ if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
+ // unknown keyword, but syntax is fine..
+ return value;
+ }
+ return bcpType;
+}
+
+#define ISALPHANUM(c) ( (c) >= '0' && (c) <= '9' || (c) >= 'A' && (c) <= 'Z' || (c) >= 'a' && (c) <= 'z' )
+
+static UBool
+isWellFormedLegacyKey(const char* legacyKey)
+{
+ const char* p = legacyKey;
+ while (*p) {
+ if (!ISALPHANUM(*p)) {
+ return FALSE;
+ }
+ p++;
+ }
+ return TRUE;
+}
+
+static UBool
+isWellFormedLegacyType(const char* legacyType)
+{
+ const char* p = legacyType;
+ int32_t alphaNumLen = 0;
+ while (*p) {
+ if (*p == '_' || *p == '/' || *p == '-') {
+ if (alphaNumLen == 0) {
+ return FALSE;
+ }
+ alphaNumLen = 0;
+ } else if (ISALPHANUM(*p)) {
+ alphaNumLen++;
+ } else {
+ return FALSE;
+ }
+ p++;
+ }
+ return (alphaNumLen != 0);
+}
+
+U_CAPI const char* U_EXPORT2
+uloc_toLegacyKey(const char* keyword)
+{
+ const char* legacyKey = ulocimp_toLegacyKey(keyword);
+ if (legacyKey == NULL) {
+ // Checks if the specified locale key is well-formed with the legacy locale syntax.
+ //
+ // Note:
+ // Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
+ // However, a key should not contain '=' obviously. For now, all existing
+ // keys are using ASCII alphabetic letters only. We won't add any new key
+ // that is not compatible with the BCP 47 syntax. Therefore, we assume
+ // a valid key consist from [0-9a-zA-Z], no symbols.
+ if (isWellFormedLegacyKey(keyword)) {
+ return keyword;
+ }
+ }
+ return legacyKey;
+}
+
+U_CAPI const char* U_EXPORT2
+uloc_toLegacyType(const char* keyword, const char* value)
+{
+ const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
+ if (legacyType == NULL) {
+ // Checks if the specified locale type is well-formed with the legacy locale syntax.
+ //
+ // Note:
+ // Neither ICU nor LDML/CLDR provides the definition of keyword syntax.
+ // However, a type should not contain '=' obviously. For now, all existing
+ // types are using ASCII alphabetic letters with a few symbol letters. We won't
+ // add any new type that is not compatible with the BCP 47 syntax except timezone
+ // IDs. For now, we assume a valid type start with [0-9a-zA-Z], but may contain
+ // '-' '_' '/' in the middle.
+ if (isWellFormedLegacyType(value)) {
+ return value;
+ }
+ }
+ return legacyType;
+}
+
/*eof*/
diff --git a/icu4c/source/common/uloc_keytype.cpp b/icu4c/source/common/uloc_keytype.cpp
new file mode 100644
index 00000000000..896ea9ee794
--- /dev/null
+++ b/icu4c/source/common/uloc_keytype.cpp
@@ -0,0 +1,577 @@
+/*
+**********************************************************************
+* Copyright (C) 2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+#include "unicode/utypes.h"
+
+#include "cstring.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+#include "uhash.h"
+#include "umutex.h"
+#include "uresimp.h"
+#include "uvector.h"
+
+static UHashtable* gLocExtKeyMap = NULL;
+static icu::UInitOnce gLocExtKeyMapInitOnce = U_INITONCE_INITIALIZER;
+static icu::UVector* gKeyTypeStringPool = NULL;
+static icu::UVector* gLocExtKeyDataEntries = NULL;
+static icu::UVector* gLocExtTypeEntries = NULL;
+
+// bit flags for special types
+typedef enum {
+ SPECIALTYPE_NONE = 0,
+ SPECIALTYPE_CODEPOINTS = 1,
+ SPECIALTYPE_REORDER_CODE = 2
+} SpecialType;
+
+typedef struct LocExtKeyData {
+ const char* legacyId;
+ const char* bcpId;
+ UHashtable* typeMap;
+ uint32_t specialTypes;
+} LocExtKeyData;
+
+typedef struct LocExtType {
+ const char* legacyId;
+ const char* bcpId;
+} LocExtType;
+
+U_CDECL_BEGIN
+
+static UBool U_CALLCONV
+uloc_key_type_cleanup(void) {
+ if (gLocExtKeyMap != NULL) {
+ uhash_close(gLocExtKeyMap);
+ gLocExtKeyMap = NULL;
+ }
+
+ delete gLocExtKeyDataEntries;
+ gLocExtKeyDataEntries = NULL;
+
+ delete gLocExtTypeEntries;
+ gLocExtTypeEntries = NULL;
+
+ delete gKeyTypeStringPool;
+ gKeyTypeStringPool = NULL;
+
+ gLocExtKeyMapInitOnce.reset();
+ return TRUE;
+}
+
+static void U_CALLCONV
+uloc_deleteKeyTypeStringPoolEntry(void* obj) {
+ uprv_free(obj);
+}
+
+static void U_CALLCONV
+uloc_deleteKeyDataEntry(void* obj) {
+ LocExtKeyData* keyData = (LocExtKeyData*)obj;
+ if (keyData->typeMap != NULL) {
+ uhash_close(keyData->typeMap);
+ }
+ uprv_free(keyData);
+}
+
+static void U_CALLCONV
+uloc_deleteTypeEntry(void* obj) {
+ uprv_free(obj);
+}
+
+U_CDECL_END
+
+
+static void U_CALLCONV
+initFromResourceBundle(UErrorCode& sts) {
+ ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KEY_TYPE, uloc_key_type_cleanup);
+
+ gLocExtKeyMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
+ if (U_FAILURE(sts)) {
+ return;
+ }
+
+ UResourceBundle *keyTypeDataRes = NULL;
+ UResourceBundle *keyMapRes = NULL;
+ UResourceBundle *typeMapRes = NULL;
+ UResourceBundle *typeAliasRes = NULL;
+ UResourceBundle *bcpTypeAliasRes = NULL;
+
+ keyTypeDataRes = ures_openDirect(NULL, "keyTypeData", &sts);
+ keyMapRes = ures_getByKey(keyTypeDataRes, "keyMap", NULL, &sts);
+ typeMapRes = ures_getByKey(keyTypeDataRes, "typeMap", NULL, &sts);
+
+ UErrorCode tmpSts = U_ZERO_ERROR;
+ typeAliasRes = ures_getByKey(keyTypeDataRes, "typeAlias", NULL, &tmpSts);
+ if (U_FAILURE(tmpSts)) {
+ typeAliasRes = NULL;
+ tmpSts = U_ZERO_ERROR;
+ }
+ bcpTypeAliasRes = ures_getByKey(keyTypeDataRes, "bcpTypeAlias", NULL, &tmpSts);
+ if (U_FAILURE(tmpSts)) {
+ bcpTypeAliasRes = NULL;
+ tmpSts = U_ZERO_ERROR;
+ }
+
+ // initialize vectors storing dynamically allocated objects
+ gKeyTypeStringPool = new UVector(uloc_deleteKeyTypeStringPoolEntry, NULL, sts);
+ if (gKeyTypeStringPool == NULL || U_FAILURE(sts)) {
+ goto close_bundles;
+ }
+ gLocExtKeyDataEntries = new UVector(uloc_deleteKeyDataEntry, NULL, sts);
+ if (gLocExtKeyDataEntries == NULL || U_FAILURE(sts)) {
+ goto close_bundles;
+ }
+ gLocExtTypeEntries = new UVector(uloc_deleteTypeEntry, NULL, sts);
+ if (gLocExtTypeEntries == NULL || U_FAILURE(sts)) {
+ goto close_bundles;
+ }
+
+ // iterate through keyMap resource
+ UResourceBundle keyMapEntry;
+ ures_initStackObject(&keyMapEntry);
+
+ while (ures_hasNext(keyMapRes)) {
+ ures_getNextResource(keyMapRes, &keyMapEntry, &sts);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ const char* legacyKeyId = ures_getKey(&keyMapEntry);
+ int32_t bcpKeyIdLen = 0;
+ const UChar* uBcpKeyId = ures_getString(&keyMapEntry, &bcpKeyIdLen, &sts);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+
+ // empty value indicates that BCP key is same with the legacy key.
+ const char* bcpKeyId = legacyKeyId;
+ if (bcpKeyIdLen > 0) {
+ char* bcpKeyIdBuf = (char*)uprv_malloc(bcpKeyIdLen + 1);
+ if (bcpKeyIdBuf == NULL) {
+ sts = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ u_UCharsToChars(uBcpKeyId, bcpKeyIdBuf, bcpKeyIdLen);
+ bcpKeyIdBuf[bcpKeyIdLen] = 0;
+ gKeyTypeStringPool->addElement(bcpKeyIdBuf, sts);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ bcpKeyId = bcpKeyIdBuf;
+ }
+
+ UBool isTZ = uprv_strcmp(legacyKeyId, "timezone") == 0;
+
+ UHashtable* typeDataMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ uint32_t specialTypes = SPECIALTYPE_NONE;
+
+ UResourceBundle* typeAliasResByKey = NULL;
+ UResourceBundle* bcpTypeAliasResByKey = NULL;
+
+ if (typeAliasRes != NULL) {
+ typeAliasResByKey = ures_getByKey(typeAliasRes, legacyKeyId, NULL, &tmpSts);
+ if (U_FAILURE(tmpSts)) {
+ // only a few keys have type alias mapping
+ typeAliasResByKey = NULL;
+ tmpSts = U_ZERO_ERROR;
+ }
+ }
+ if (bcpTypeAliasRes != NULL) {
+ bcpTypeAliasResByKey = ures_getByKey(bcpTypeAliasRes, bcpKeyId, NULL, &tmpSts);
+ if (U_FAILURE(tmpSts)) {
+ // only a few keys have BCP type alias mapping
+ bcpTypeAliasResByKey = NULL;
+ tmpSts = U_ZERO_ERROR;
+ }
+ }
+
+ // look up type map for the key, and walk through the mapping data
+ UResourceBundle* typeMapResByKey = ures_getByKey(typeMapRes, legacyKeyId, NULL, &tmpSts);
+ if (U_FAILURE(tmpSts)) {
+ // type map for each key must exist
+ U_ASSERT(FALSE);
+ tmpSts = U_ZERO_ERROR;
+ } else {
+ UResourceBundle typeMapEntry;
+ ures_initStackObject(&typeMapEntry);
+
+ while (ures_hasNext(typeMapResByKey)) {
+ ures_getNextResource(typeMapResByKey, &typeMapEntry, &sts);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ const char* legacyTypeId = ures_getKey(&typeMapEntry);
+
+ // special types
+ if (uprv_strcmp(legacyTypeId, "CODEPOINTS") == 0) {
+ specialTypes |= SPECIALTYPE_CODEPOINTS;
+ continue;
+ }
+ if (uprv_strcmp(legacyTypeId, "REORDER_CODE") == 0) {
+ specialTypes |= SPECIALTYPE_REORDER_CODE;
+ continue;
+ }
+
+ if (isTZ) {
+ // a timezone key uses a colon instead of a slash in the resource.
+ // e.g. America:Los_Angeles
+ if (uprv_strchr(legacyTypeId, ':') != NULL) {
+ int32_t legacyTypeIdLen = uprv_strlen(legacyTypeId);
+ char* legacyTypeIdBuf = (char*)uprv_malloc(legacyTypeIdLen + 1);
+ if (legacyTypeIdBuf == NULL) {
+ sts = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ const char* p = legacyTypeId;
+ char* q = legacyTypeIdBuf;
+ while (*p) {
+ if (*p == ':') {
+ *q++ = '/';
+ } else {
+ *q++ = *p;
+ }
+ p++;
+ }
+ *q = 0;
+
+ gKeyTypeStringPool->addElement(legacyTypeIdBuf, sts);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ legacyTypeId = legacyTypeIdBuf;
+ }
+ }
+
+ int32_t bcpTypeIdLen = 0;
+ const UChar* uBcpTypeId = ures_getString(&typeMapEntry, &bcpTypeIdLen, &sts);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+
+ // empty value indicates that BCP type is same with the legacy type.
+ const char* bcpTypeId = legacyTypeId;
+ if (bcpTypeIdLen > 0) {
+ char* bcpTypeIdBuf = (char*)uprv_malloc(bcpTypeIdLen + 1);
+ if (bcpTypeIdBuf == NULL) {
+ sts = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ u_UCharsToChars(uBcpTypeId, bcpTypeIdBuf, bcpTypeIdLen);
+ bcpTypeIdBuf[bcpTypeIdLen] = 0;
+ gKeyTypeStringPool->addElement(bcpTypeIdBuf, sts);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ bcpTypeId = bcpTypeIdBuf;
+ }
+
+ // Note: legacy type value should never be
+ // equivalent to bcp type value of a different
+ // type under the same key. So we use a single
+ // map for lookup.
+ LocExtType* t = (LocExtType*)uprv_malloc(sizeof(LocExtType));
+ if (t == NULL) {
+ sts = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ t->bcpId = bcpTypeId;
+ t->legacyId = legacyTypeId;
+ gLocExtTypeEntries->addElement((void*)t, sts);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+
+ uhash_put(typeDataMap, (void*)legacyTypeId, t, &sts);
+ if (bcpTypeId != legacyTypeId) {
+ // different type value
+ uhash_put(typeDataMap, (void*)bcpTypeId, t, &sts);
+ }
+ if (U_FAILURE(sts)) {
+ break;
+ }
+
+ // also put aliases in the map
+ if (typeAliasResByKey != NULL) {
+ UResourceBundle typeAliasDataEntry;
+ ures_initStackObject(&typeAliasDataEntry);
+
+ ures_resetIterator(typeAliasResByKey);
+ while (ures_hasNext(typeAliasResByKey) && U_SUCCESS(sts)) {
+ int32_t toLen;
+ ures_getNextResource(typeAliasResByKey, &typeAliasDataEntry, &sts);
+ const UChar* to = ures_getString(&typeAliasDataEntry, &toLen, &sts);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ // check if this is an alias of canoncal legacy type
+ if (uprv_compareInvAscii(NULL, legacyTypeId, -1, to, toLen) == 0) {
+ const char* from = ures_getKey(&typeAliasDataEntry);
+ if (isTZ) {
+ // replace colon with slash if necessary
+ if (uprv_strchr(from, ':') != NULL) {
+ int32_t fromLen = uprv_strlen(from);
+ char* fromBuf = (char*)uprv_malloc(fromLen + 1);
+ if (fromBuf == NULL) {
+ sts = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ const char* p = from;
+ char* q = fromBuf;
+ while (*p) {
+ if (*p == ':') {
+ *q++ = '/';
+ } else {
+ *q++ = *p;
+ }
+ p++;
+ }
+ *q = 0;
+
+ gKeyTypeStringPool->addElement(fromBuf, sts);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ from = fromBuf;
+ }
+ }
+ uhash_put(typeDataMap, (void*)from, t, &sts);
+ }
+ }
+ ures_close(&typeAliasDataEntry);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ }
+
+ if (bcpTypeAliasResByKey != NULL) {
+ UResourceBundle bcpTypeAliasDataEntry;
+ ures_initStackObject(&bcpTypeAliasDataEntry);
+
+ ures_resetIterator(bcpTypeAliasResByKey);
+ while (ures_hasNext(bcpTypeAliasResByKey) && U_SUCCESS(sts)) {
+ int32_t toLen;
+ ures_getNextResource(bcpTypeAliasResByKey, &bcpTypeAliasDataEntry, &sts);
+ const UChar* to = ures_getString(&bcpTypeAliasDataEntry, &toLen, &sts);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ // check if this is an alias of bcp type
+ if (uprv_compareInvAscii(NULL, bcpTypeId, -1, to, toLen) == 0) {
+ const char* from = ures_getKey(&bcpTypeAliasDataEntry);
+ uhash_put(typeDataMap, (void*)from, t, &sts);
+ }
+ }
+ ures_close(&bcpTypeAliasDataEntry);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ }
+ }
+ ures_close(&typeMapEntry);
+ }
+ ures_close(typeMapResByKey);
+ ures_close(typeAliasResByKey);
+ ures_close(bcpTypeAliasResByKey);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+
+ LocExtKeyData* keyData = (LocExtKeyData*)uprv_malloc(sizeof(LocExtKeyData));
+ if (keyData == NULL) {
+ sts = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ keyData->bcpId = bcpKeyId;
+ keyData->legacyId = legacyKeyId;
+ keyData->specialTypes = specialTypes;
+ keyData->typeMap = typeDataMap;
+
+ gLocExtKeyDataEntries->addElement((void*)keyData, sts);
+ if (U_FAILURE(sts)) {
+ break;
+ }
+
+ uhash_put(gLocExtKeyMap, (void*)legacyKeyId, keyData, &sts);
+ if (legacyKeyId != bcpKeyId) {
+ // different key value
+ uhash_put(gLocExtKeyMap, (void*)bcpKeyId, keyData, &sts);
+ }
+ if (U_FAILURE(sts)) {
+ break;
+ }
+ }
+
+ ures_close(&keyMapEntry);
+
+close_bundles:
+ ures_close(bcpTypeAliasRes);
+ ures_close(typeAliasRes);
+ ures_close(typeMapRes);
+ ures_close(keyMapRes);
+ ures_close(keyTypeDataRes);
+}
+
+static UBool
+init() {
+ UErrorCode sts = U_ZERO_ERROR;
+ umtx_initOnce(gLocExtKeyMapInitOnce, &initFromResourceBundle, sts);
+ if (U_FAILURE(sts)) {
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static UBool
+isSpecialTypeCodepoints(const char* val) {
+ int32_t subtagLen = 0;
+ const char* p = val;
+ while (*p) {
+ if (*p == '-') {
+ if (subtagLen < 4 || subtagLen > 6) {
+ return FALSE;
+ }
+ subtagLen = 0;
+ } else if (('0' <= *p && *p <= '9') ||
+ ('A' <= *p && *p <= 'F') || ('a' <= *p && *p <= 'f')) {
+ subtagLen++;
+ } else {
+ return FALSE;
+ }
+ p++;
+ }
+ return (subtagLen >= 4 && subtagLen <= 6);
+}
+
+static UBool
+isSpecialTypeReorderCode(const char* val) {
+ int32_t subtagLen = 0;
+ const char* p = val;
+ while (*p) {
+ if (*p == '-') {
+ if (subtagLen < 3 || subtagLen > 8) {
+ return FALSE;
+ }
+ subtagLen = 0;
+ } else if (('A' <= *p && *p <= 'Z') || ('a' <= *p && *p <= 'z')) {
+ subtagLen++;
+ } else {
+ return FALSE;
+ }
+ p++;
+ }
+ return (subtagLen >=3 && subtagLen <=8);
+}
+
+U_CFUNC const char*
+ulocimp_toBcpKey(const char* key) {
+ if (!init()) {
+ return NULL;
+ }
+
+ LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
+ if (keyData != NULL) {
+ return keyData->bcpId;
+ }
+ return NULL;
+}
+
+U_CFUNC const char*
+ulocimp_toLegacyKey(const char* key) {
+ if (!init()) {
+ return NULL;
+ }
+
+ LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
+ if (keyData != NULL) {
+ return keyData->legacyId;
+ }
+ return NULL;
+}
+
+U_CFUNC const char*
+ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
+ if (isKnownKey != NULL) {
+ *isKnownKey = FALSE;
+ }
+ if (isSpecialType != NULL) {
+ *isSpecialType = FALSE;
+ }
+
+ if (!init()) {
+ return NULL;
+ }
+
+ LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
+ if (keyData != NULL) {
+ if (isKnownKey != NULL) {
+ *isKnownKey = TRUE;
+ }
+ LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap, type);
+ if (t != NULL) {
+ return t->bcpId;
+ }
+ if (keyData->specialTypes != SPECIALTYPE_NONE) {
+ UBool matched = FALSE;
+ if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
+ matched = isSpecialTypeCodepoints(type);
+ }
+ if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
+ matched = isSpecialTypeReorderCode(type);
+ }
+ if (matched) {
+ if (isSpecialType != NULL) {
+ *isSpecialType = TRUE;
+ }
+ return type;
+ }
+ }
+ }
+ return NULL;
+}
+
+
+U_CFUNC const char*
+ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
+ if (isKnownKey != NULL) {
+ *isKnownKey = FALSE;
+ }
+ if (isSpecialType != NULL) {
+ *isSpecialType = FALSE;
+ }
+
+ if (!init()) {
+ return NULL;
+ }
+
+ LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
+ if (keyData != NULL) {
+ if (isKnownKey != NULL) {
+ *isKnownKey = TRUE;
+ }
+ LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap, type);
+ if (t != NULL) {
+ return t->legacyId;
+ }
+ if (keyData->specialTypes != SPECIALTYPE_NONE) {
+ UBool matched = FALSE;
+ if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
+ matched = isSpecialTypeCodepoints(type);
+ }
+ if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
+ matched = isSpecialTypeReorderCode(type);
+ }
+ if (matched) {
+ if (isSpecialType != NULL) {
+ *isSpecialType = TRUE;
+ }
+ return type;
+ }
+ }
+ }
+ return NULL;
+}
+
diff --git a/icu4c/source/common/uloc_tag.c b/icu4c/source/common/uloc_tag.c
index 3725955b361..c038026790d 100644
--- a/icu4c/source/common/uloc_tag.c
+++ b/icu4c/source/common/uloc_tag.c
@@ -408,8 +408,8 @@ _isPrivateuseValueSubtags(const char* s, int32_t len) {
return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
}
-static UBool
-_isLDMLKey(const char* s, int32_t len) {
+U_CFUNC UBool
+ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
@@ -419,17 +419,33 @@ _isLDMLKey(const char* s, int32_t len) {
return FALSE;
}
-static UBool
-_isLDMLType(const char* s, int32_t len) {
+U_CFUNC UBool
+ultag_isUnicodeLocaleType(const char*s, int32_t len) {
+ const char* p;
+ int32_t subtagLen = 0;
+
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
- if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) {
- return TRUE;
- }
- return FALSE;
-}
+ for (p = s; len > 0; p++, len--) {
+ if (*p == SEP) {
+ if (subtagLen < 3) {
+ return FALSE;
+ }
+ subtagLen = 0;
+ } else if (ISALPHA(*p) || ISNUMERIC(*p)) {
+ subtagLen++;
+ if (subtagLen > 8) {
+ return FALSE;
+ }
+ } else {
+ return FALSE;
+ }
+ }
+
+ return (subtagLen >= 3);
+}
/*
* -------------------------------------------------
*
@@ -608,417 +624,6 @@ _initializeULanguageTag(ULanguageTag* langtag) {
langtag->privateuse = EMPTY;
}
-#define KEYTYPEDATA "keyTypeData"
-#define KEYMAP "keyMap"
-#define TYPEMAP "typeMap"
-#define TYPEALIAS "typeAlias"
-#define MAX_BCP47_SUBTAG_LEN 9 /* including null terminator */
-#define MAX_LDML_KEY_LEN 22
-#define MAX_LDML_TYPE_LEN 32
-
-static int32_t
-_ldmlKeyToBCP47(const char* key, int32_t keyLen,
- char* bcpKey, int32_t bcpKeyCapacity,
- UErrorCode *status) {
- UResourceBundle *rb;
- char keyBuf[MAX_LDML_KEY_LEN];
- char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
- int32_t resultLen = 0;
- int32_t i;
- UErrorCode tmpStatus = U_ZERO_ERROR;
- const UChar *uBcpKey;
- int32_t bcpKeyLen;
-
- if (keyLen < 0) {
- keyLen = (int32_t)uprv_strlen(key);
- }
-
- if (keyLen >= sizeof(keyBuf)) {
- /* no known valid LDML key exceeding 21 */
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- uprv_memcpy(keyBuf, key, keyLen);
- keyBuf[keyLen] = 0;
-
- /* to lower case */
- for (i = 0; i < keyLen; i++) {
- keyBuf[i] = uprv_tolower(keyBuf[i]);
- }
-
- rb = ures_openDirect(NULL, KEYTYPEDATA, status);
- ures_getByKey(rb, KEYMAP, rb, status);
-
- if (U_FAILURE(*status)) {
- ures_close(rb);
- return 0;
- }
-
- uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus);
- if (U_SUCCESS(tmpStatus)) {
- if (bcpKeyLen == 0) {
- /* empty value indicates the BCP47 key is same with the legacy key */
- uprv_memcpy(bcpKeyBuf, key, keyLen);
- bcpKeyBuf[keyLen] = 0;
- resultLen = keyLen;
- } else {
- u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen);
- bcpKeyBuf[bcpKeyLen] = 0;
- resultLen = bcpKeyLen;
- }
- } else {
- if (_isLDMLKey(key, keyLen)) {
- uprv_memcpy(bcpKeyBuf, key, keyLen);
- bcpKeyBuf[keyLen] = 0;
- resultLen = keyLen;
- } else {
- /* mapping not availabe */
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- }
- ures_close(rb);
-
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity));
- return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status);
-}
-
-static int32_t
-_bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen,
- char* key, int32_t keyCapacity,
- UErrorCode *status) {
- UResourceBundle *rb;
- char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
- int32_t resultLen = 0;
- int32_t i;
- const char *resKey = NULL;
- UResourceBundle *mapData;
-
- if (bcpKeyLen < 0) {
- bcpKeyLen = (int32_t)uprv_strlen(bcpKey);
- }
-
- if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen);
- bcpKeyBuf[bcpKeyLen] = 0;
-
- /* to lower case */
- for (i = 0; i < bcpKeyLen; i++) {
- bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]);
- }
-
- rb = ures_openDirect(NULL, KEYTYPEDATA, status);
- ures_getByKey(rb, KEYMAP, rb, status);
- if (U_FAILURE(*status)) {
- ures_close(rb);
- return 0;
- }
-
- mapData = ures_getNextResource(rb, NULL, status);
- while (U_SUCCESS(*status)) {
- const UChar *uBcpKey;
- char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
- int32_t tmpBcpKeyLen;
- const char *tmpBcpKey = tmpBcpKeyBuf;
-
- uBcpKey = ures_getString(mapData, &tmpBcpKeyLen, status);
- if (U_FAILURE(*status)) {
- break;
- }
- if (tmpBcpKeyLen == 0) {
- /* empty value indicates the BCP47 key is same with the legacy key */
- tmpBcpKey = ures_getKey(mapData);
- } else {
- u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen);
- tmpBcpKeyBuf[tmpBcpKeyLen] = 0;
- }
- if (uprv_compareInvCharsAsAscii(bcpKeyBuf, tmpBcpKey) == 0) {
- /* found a matching BCP47 key */
- resKey = ures_getKey(mapData);
- resultLen = (int32_t)uprv_strlen(resKey);
- break;
- }
- if (!ures_hasNext(rb)) {
- break;
- }
- ures_getNextResource(rb, mapData, status);
- }
- ures_close(mapData);
- ures_close(rb);
-
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- if (resKey == NULL) {
- resKey = bcpKeyBuf;
- resultLen = bcpKeyLen;
- }
-
- uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity));
- return u_terminateChars(key, keyCapacity, resultLen, status);
-}
-
-static int32_t
-_ldmlTypeToBCP47(const char* key, int32_t keyLen,
- const char* type, int32_t typeLen,
- char* bcpType, int32_t bcpTypeCapacity,
- UErrorCode *status) {
- UResourceBundle *rb, *keyTypeData, *typeMapForKey;
- char keyBuf[MAX_LDML_KEY_LEN];
- char typeBuf[MAX_LDML_TYPE_LEN];
- char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
- int32_t resultLen = 0;
- int32_t i;
- UErrorCode tmpStatus = U_ZERO_ERROR;
- const UChar *uBcpType, *uCanonicalType;
- int32_t bcpTypeLen, canonicalTypeLen;
- UBool isTimezone = FALSE;
-
- if (keyLen < 0) {
- keyLen = (int32_t)uprv_strlen(key);
- }
- if (keyLen >= sizeof(keyBuf)) {
- /* no known valid LDML key exceeding 21 */
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- uprv_memcpy(keyBuf, key, keyLen);
- keyBuf[keyLen] = 0;
-
- /* to lower case */
- for (i = 0; i < keyLen; i++) {
- keyBuf[i] = uprv_tolower(keyBuf[i]);
- }
- if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
- isTimezone = TRUE;
- }
-
- if (typeLen < 0) {
- typeLen = (int32_t)uprv_strlen(type);
- }
- if (typeLen >= sizeof(typeBuf)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- if (isTimezone) {
- /* replace '/' with ':' */
- for (i = 0; i < typeLen; i++) {
- if (*(type + i) == '/') {
- typeBuf[i] = ':';
- } else {
- typeBuf[i] = *(type + i);
- }
- }
- typeBuf[typeLen] = 0;
- type = &typeBuf[0];
- }
-
- keyTypeData = ures_openDirect(NULL, KEYTYPEDATA, status);
- rb = ures_getByKey(keyTypeData, TYPEMAP, NULL, status);
- if (U_FAILURE(*status)) {
- ures_close(rb);
- ures_close(keyTypeData);
- return 0;
- }
-
- typeMapForKey = ures_getByKey(rb, keyBuf, NULL, &tmpStatus);
- uBcpType = ures_getStringByKey(typeMapForKey, type, &bcpTypeLen, &tmpStatus);
- if (U_SUCCESS(tmpStatus)) {
- if (bcpTypeLen == 0) {
- /* empty value indicates the BCP47 type is same with the legacy type */
- uprv_memcpy(bcpTypeBuf, type, typeLen);
- resultLen = typeLen;
- } else {
- u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
- resultLen = bcpTypeLen;
- }
- } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
- /* is this type alias? */
- tmpStatus = U_ZERO_ERROR;
- ures_getByKey(keyTypeData, TYPEALIAS, rb, &tmpStatus);
- ures_getByKey(rb, keyBuf, rb, &tmpStatus);
- uCanonicalType = ures_getStringByKey(rb, type, &canonicalTypeLen, &tmpStatus);
- if (U_SUCCESS(tmpStatus)) {
- u_UCharsToChars(uCanonicalType, typeBuf, canonicalTypeLen);
- if (isTimezone) {
- /* replace '/' with ':' */
- for (i = 0; i < canonicalTypeLen; i++) {
- if (typeBuf[i] == '/') {
- typeBuf[i] = ':';
- }
- }
- }
- typeBuf[canonicalTypeLen] = 0;
-
- /* look up the canonical type */
- uBcpType = ures_getStringByKey(typeMapForKey, typeBuf, &bcpTypeLen, &tmpStatus);
- if (U_SUCCESS(tmpStatus)) {
- if (bcpTypeLen == 0) {
- /* empty value indicates the BCP47 type is same with the legacy type */
- uprv_memcpy(bcpTypeBuf, typeBuf, canonicalTypeLen);
- resultLen = canonicalTypeLen;
- } else {
- u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
- resultLen = bcpTypeLen;
- }
- }
- }
- if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
- if (_isLDMLType(type, typeLen)) {
- uprv_memcpy(bcpTypeBuf, type, typeLen);
- resultLen = typeLen;
- } else {
- /* mapping not availabe */
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- }
- } else {
- *status = tmpStatus;
- }
- ures_close(rb);
- ures_close(typeMapForKey);
- ures_close(keyTypeData);
-
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity));
- return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status);
-}
-
-static int32_t
-_bcp47ToLDMLType(const char* key, int32_t keyLen,
- const char* bcpType, int32_t bcpTypeLen,
- char* type, int32_t typeCapacity,
- UErrorCode *status) {
- UResourceBundle *rb;
- char keyBuf[MAX_LDML_KEY_LEN];
- char bcpTypeBuf[ULOC_KEYWORDS_CAPACITY]; /* ensure buffter is large enough for multiple values (e.g. buddhist-greg) */
- int32_t resultLen = 0;
- int32_t i, typeSize;
- const char *resType = NULL;
- UResourceBundle *mapData;
- UErrorCode tmpStatus = U_ZERO_ERROR;
- int32_t copyLen;
-
- if (keyLen < 0) {
- keyLen = (int32_t)uprv_strlen(key);
- }
-
- if (keyLen >= sizeof(keyBuf)) {
- /* no known valid LDML key exceeding 21 */
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- uprv_memcpy(keyBuf, key, keyLen);
- keyBuf[keyLen] = 0;
-
- /* to lower case */
- for (i = 0; i < keyLen; i++) {
- keyBuf[i] = uprv_tolower(keyBuf[i]);
- }
-
-
- if (bcpTypeLen < 0) {
- bcpTypeLen = (int32_t)uprv_strlen(bcpType);
- }
-
- typeSize = 0;
- for (i = 0; i < bcpTypeLen; i++) {
- if (bcpType[i] == SEP) {
- if (typeSize >= MAX_BCP47_SUBTAG_LEN) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- typeSize = 0;
- } else {
- typeSize++;
- }
- }
-
- uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen);
- bcpTypeBuf[bcpTypeLen] = 0;
-
- /* to lower case */
- for (i = 0; i < bcpTypeLen; i++) {
- bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]);
- }
-
- rb = ures_openDirect(NULL, KEYTYPEDATA, status);
- ures_getByKey(rb, TYPEMAP, rb, status);
- if (U_FAILURE(*status)) {
- ures_close(rb);
- return 0;
- }
-
- ures_getByKey(rb, keyBuf, rb, &tmpStatus);
- mapData = ures_getNextResource(rb, NULL, &tmpStatus);
- while (U_SUCCESS(tmpStatus)) {
- const UChar *uBcpType;
- char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
- int32_t tmpBcpTypeLen;
- const char *tmpBcpType = tmpBcpTypeBuf;
-
- uBcpType = ures_getString(mapData, &tmpBcpTypeLen, &tmpStatus);
- if (U_FAILURE(tmpStatus)) {
- break;
- }
- if (tmpBcpTypeLen == 0) {
- /* empty value indicates the BCP47 type is same with the legacy type */
- tmpBcpType = ures_getKey(mapData);
- } else {
- u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen);
- tmpBcpTypeBuf[tmpBcpTypeLen] = 0;
- }
- if (uprv_compareInvCharsAsAscii(bcpTypeBuf, tmpBcpType) == 0) {
- /* found a matching BCP47 type */
- resType = ures_getKey(mapData);
- resultLen = (int32_t)uprv_strlen(resType);
- break;
- }
- if (!ures_hasNext(rb)) {
- break;
- }
- ures_getNextResource(rb, mapData, &tmpStatus);
- }
- ures_close(mapData);
- ures_close(rb);
-
- if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) {
- *status = tmpStatus;
- return 0;
- }
-
- if (resType == NULL) {
- resType = bcpTypeBuf;
- resultLen = bcpTypeLen;
- }
-
- copyLen = uprv_min(resultLen, typeCapacity);
- uprv_memcpy(type, resType, copyLen);
-
- if (uprv_compareInvCharsAsAscii(keyBuf, "timezone") == 0) {
- for (i = 0; i < copyLen; i++) {
- if (*(type + i) == ':') {
- *(type + i) = '/';
- }
- }
- }
-
- return u_terminateChars(type, typeCapacity, resultLen, status);
-}
-
static int32_t
_appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
char buf[ULOC_LANG_CAPACITY];
@@ -1311,7 +916,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
const char *bcpKey, *bcpValue;
UErrorCode tmpStatus = U_ZERO_ERROR;
int32_t keylen;
- UBool isLDMLKeyword;
+ UBool isBcpUExt;
while (TRUE) {
isAttribute = FALSE;
@@ -1320,7 +925,8 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
break;
}
len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
- if (U_FAILURE(tmpStatus)) {
+ /* buf must be null-terminated */
+ if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
@@ -1331,7 +937,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
keylen = (int32_t)uprv_strlen(key);
- isLDMLKeyword = (keylen > 1);
+ isBcpUExt = (keylen > 1);
/* special keyword used for representing Unicode locale attributes */
if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
@@ -1379,36 +985,49 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
}
}
- } else if (isLDMLKeyword) {
- int32_t modKeyLen;
-
- /* transform key and value to bcp47 style */
- modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity, &tmpStatus);
- if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ } else if (isBcpUExt) {
+ bcpKey = uloc_toUnicodeLocaleKey(key);
+ if (bcpKey == NULL) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
- tmpStatus = U_ZERO_ERROR;
continue;
}
- bcpKey = pExtBuf;
- pExtBuf += (modKeyLen + 1);
- extBufCapacity -= (modKeyLen + 1);
-
- len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCapacity, &tmpStatus);
- if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ /* we've checked buf is null-terminated above */
+ bcpValue = uloc_toUnicodeLocaleType(key, buf);
+ if (bcpValue == NULL) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
}
- tmpStatus = U_ZERO_ERROR;
continue;
}
- bcpValue = pExtBuf;
- pExtBuf += (len + 1);
- extBufCapacity -= (len + 1);
+ if (bcpValue == buf) {
+ /*
+ When uloc_toUnicodeLocaleType(key, buf) returns the
+ input value as is, the value is well-formed, but has
+ no known mapping. This implementation normalizes the
+ the value to lower case
+ */
+ int32_t bcpValueLen = uprv_strlen(bcpValue);
+ if (bcpValueLen < extBufCapacity) {
+ uprv_strcpy(pExtBuf, bcpValue);
+ T_CString_toLowerCase(pExtBuf);
+
+ bcpValue = pExtBuf;
+
+ pExtBuf += (bcpValueLen + 1);
+ extBufCapacity -= (bcpValueLen + 1);
+ } else {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ continue;
+ }
+ }
} else {
if (*key == PRIVATEUSE) {
if (!_isPrivateuseValueSubtags(buf, len)) {
@@ -1600,7 +1219,7 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
/* locate next separator char */
for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
- if (_isLDMLKey(pTag, len)) {
+ if (ultag_isUnicodeLocaleKey(pTag, len)) {
pKwds = pTag;
break;
}
@@ -1708,7 +1327,7 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
/* locate next separator char */
for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
- if (_isLDMLKey(pTag, len)) {
+ if (ultag_isUnicodeLocaleKey(pTag, len)) {
if (pBcpKey) {
emitKeyword = TRUE;
pNextBcpKey = pTag;
@@ -1744,28 +1363,78 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
const char *pKey = NULL; /* LDML key */
const char *pType = NULL; /* LDML type */
+ char bcpKeyBuf[9]; /* BCP key length is always 2 for now */
+
U_ASSERT(pBcpKey != NULL);
- /* u extension key to LDML key */
- len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status);
- if (U_FAILURE(*status)) {
+ if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
+ /* the BCP key is invalid */
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
goto cleanup;
}
- pKey = buf + bufIdx;
- bufIdx += len;
- *(buf + bufIdx) = 0;
- bufIdx++;
- if (pBcpType) {
- /* BCP type to locale type */
- len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status);
- if (U_FAILURE(*status)) {
+ uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
+ bcpKeyBuf[bcpKeyLen] = 0;
+
+ /* u extension key to LDML key */
+ pKey = uloc_toLegacyKey(bcpKeyBuf);
+ if (pKey == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ goto cleanup;
+ }
+ if (pKey == bcpKeyBuf) {
+ /*
+ The key returned by toLegacyKey points to the input buffer.
+ We normalize the result key to lower case.
+ */
+ T_CString_toLowerCase(bcpKeyBuf);
+ if (bufSize - bufIdx - 1 >= bcpKeyLen) {
+ uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen);
+ pKey = buf + bufIdx;
+ bufIdx += bcpKeyLen;
+ *(buf + bufIdx) = 0;
+ bufIdx++;
+ } else {
+ *status = U_BUFFER_OVERFLOW_ERROR;
goto cleanup;
}
- pType = buf + bufIdx;
- bufIdx += len;
- *(buf + bufIdx) = 0;
- bufIdx++;
+ }
+
+ if (pBcpType) {
+ char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */
+ if (bcpTypeLen >= sizeof(bcpTypeBuf)) {
+ /* the BCP type is too long */
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ goto cleanup;
+ }
+
+ uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
+ bcpTypeBuf[bcpTypeLen] = 0;
+
+ /* BCP type to locale type */
+ pType = uloc_toLegacyType(pKey, bcpTypeBuf);
+ if (pType == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ goto cleanup;
+ }
+ if (pType == bcpTypeBuf) {
+ /*
+ The type returned by toLegacyType points to the input buffer.
+ We normalize the result type to lower case.
+ */
+ /* normalize to lower case */
+ T_CString_toLowerCase(bcpTypeBuf);
+ if (bufSize - bufIdx - 1 >= bcpTypeLen) {
+ uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen);
+ pType = buf + bufIdx;
+ bufIdx += bcpTypeLen;
+ *(buf + bufIdx) = 0;
+ bufIdx++;
+ } else {
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ goto cleanup;
+ }
+ }
} else {
/* typeless - default type value is "yes" */
pType = LOCALE_TYPE_YES;
diff --git a/icu4c/source/common/ulocimp.h b/icu4c/source/common/ulocimp.h
index ebc525ef9de..164a730c005 100644
--- a/icu4c/source/common/ulocimp.h
+++ b/icu4c/source/common/ulocimp.h
@@ -1,6 +1,6 @@
/*
**********************************************************************
-* Copyright (C) 2004-2010, International Business Machines
+* Copyright (C) 2004-2014, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
@@ -62,4 +62,23 @@ ulocimp_getCountry(const char *localeID,
U_CAPI const char * U_EXPORT2
locale_getKeywordsStart(const char *localeID);
+
+U_CFUNC UBool
+ultag_isUnicodeLocaleKey(const char* s, int32_t len);
+
+U_CFUNC UBool
+ultag_isUnicodeLocaleType(const char* s, int32_t len);
+
+U_CFUNC const char*
+ulocimp_toBcpKey(const char* key);
+
+U_CFUNC const char*
+ulocimp_toLegacyKey(const char* key);
+
+U_CFUNC const char*
+ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
+
+U_CFUNC const char*
+ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
+
#endif
diff --git a/icu4c/source/common/unicode/uloc.h b/icu4c/source/common/unicode/uloc.h
index 5d9e1e7dc88..f16380f0378 100644
--- a/icu4c/source/common/unicode/uloc.h
+++ b/icu4c/source/common/unicode/uloc.h
@@ -1149,4 +1149,106 @@ uloc_toLanguageTag(const char* localeID,
UBool strict,
UErrorCode* err);
+#ifndef U_HIDE_DRAFT_API
+/**
+ * Converts the specified keyword (legacy key, or BCP 47 Unicode locale
+ * extension key) to the equivalent BCP 47 Unicode locale extension key.
+ * For example, BCP 47 Unicode locale extension key "co" is returned for
+ * the input keyword "collation".
+ *
+ * When the specified keyword is unknown, but satisfies the BCP syntax,
+ * then the pointer to the input keyword itself will be returned.
+ * For example,
+ * uloc_toUnicodeLocaleKey("ZZ")
returns "ZZ".
+ *
+ * @param keyword the input locale keyword (either legacy key
+ * such as "collation" or BCP 47 Unicode locale extension
+ * key such as "co").
+ * @return the well-formed BCP 47 Unicode locale extension key,
+ * or NULL if the specified locale keyword cannot be
+ * mapped to a well-formed BCP 47 Unicode locale extension
+ * key.
+ * @see uloc_toLegacyKey
+ * @draft ICU 54
+ */
+U_DRAFT const char* U_EXPORT2
+uloc_toUnicodeLocaleKey(const char* keyword);
+
+/**
+ * Converts the specified keyword value (legacy type, or BCP 47
+ * Unicode locale extension type) to the well-formed BCP 47 Unicode locale
+ * extension type for the specified keyword (category). For example, BCP 47
+ * Unicode locale extension type "phonebk" is returned for the input
+ * keyword value "phonebook", with the keyword "collation" (or "co").
+ *
+ * When the specified keyword is not recognized, but the specified value
+ * satisfies the syntax of the BCP 47 Unicode locale extension type,
+ * or when the specified keyword allows 'variable' type and the specified
+ * value satisfies the syntax, then the pointer to the input type value itself
+ * will be returned.
+ * For example,
+ * uloc_toUnicodeLocaleType("Foo", "Bar")
returns "Bar",
+ * uloc_toUnicodeLocaleType("variableTop", "00A4")
returns "00A4".
+ *
+ * @param keyword the locale keyword (either legacy key such as
+ * "collation" or BCP 47 Unicode locale extension
+ * key such as "co").
+ * @param value the locale keyword value (either legacy type
+ * such as "phonebook" or BCP 47 Unicode locale extension
+ * type such as "phonebk").
+ * @return the well-formed BCP47 Unicode locale extension type,
+ * or NULL if the locale keyword value cannot be mapped to
+ * a well-formed BCP 47 Unicode locale extension type.
+ * @see uloc_toLegacyType
+ * @draft ICU 54
+ */
+U_DRAFT const char* U_EXPORT2
+uloc_toUnicodeLocaleType(const char* keyword, const char* value);
+
+/**
+ * Converts the specified keyword (BCP 47 Unicode locale extension key, or
+ * legacy key) to the legacy key. For example, legacy key "collation" is
+ * returned for the input BCP 47 Unicode locale extension key "co".
+ *
+ * @param keyword the input locale keyword (either BCP 47 Unicode locale
+ * extension key or legacy key).
+ * @return the well-formed legacy key, or NULL if the specified
+ * keyword cannot be mapped to a well-formed legacy key.
+ * @see toUnicodeLocaleKey
+ * @draft ICU 54
+ */
+U_DRAFT const char* U_EXPORT2
+uloc_toLegacyKey(const char* keyword);
+
+/**
+ * Converts the specified keyword value (BCP 47 Unicode locale extension type,
+ * or legacy type or type alias) to the canonical legacy type. For example,
+ * the legacy type "phonebook" is returned for the input BCP 47 Unicode
+ * locale extension type "phonebk" with the keyword "collation" (or "co").
+ *
+ * When the specified keyword is not recognized, but the specified value
+ * satisfies the syntax of legacy key, or when the specified keyword
+ * allows 'variable' type and the specified value satisfies the syntax,
+ * then the pointer to the input type value itself will be returned.
+ * For example,
+ * uloc_toLegacyType("Foo", "Bar")
returns "Bar",
+ * uloc_toLegacyType("vt", "00A4")
returns "00A4".
+ *
+ * @param keyword the locale keyword (either legacy keyword such as
+ * "collation" or BCP 47 Unicode locale extension
+ * key such as "co").
+ * @param value the locale keyword value (either BCP 47 Unicode locale
+ * extension type such as "phonebk" or legacy keyword value
+ * such as "phonebook").
+ * @return the well-formed legacy type, or NULL if the specified
+ * keyword value cannot be mapped to a well-formed legacy
+ * type.
+ * @see toUnicodeLocaleType
+ * @draft ICU 54
+ */
+U_DRAFT const char* U_EXPORT2
+uloc_toLegacyType(const char* keyword, const char* value);
+
+#endif /* U_HIDE_DRAFT_API */
+
#endif /*_ULOC*/
diff --git a/icu4c/source/test/cintltst/cloctst.c b/icu4c/source/test/cintltst/cloctst.c
index 3c06c40e7d1..f3691867716 100644
--- a/icu4c/source/test/cintltst/cloctst.c
+++ b/icu4c/source/test/cintltst/cloctst.c
@@ -250,6 +250,10 @@ void addLocaleTest(TestNode** root)
TESTCASE(TestEnglishExemplarCharacters);
TESTCASE(TestDisplayNameBrackets);
TESTCASE(TestIsRightToLeft);
+ TESTCASE(TestToUnicodeLocaleKey);
+ TESTCASE(TestToLegacyKey);
+ TESTCASE(TestToUnicodeLocaleType);
+ TESTCASE(TestToLegacyType);
}
@@ -5673,7 +5677,6 @@ static void TestLikelySubtags()
}
const char* const locale_to_langtag[][3] = {
- {"@x=elmer", "x-elmer", "x-elmer"},
{"", "und", "und"},
{"en", "en", "en"},
{"en_US", "en-US", "en-US"},
@@ -5707,9 +5710,9 @@ const char* const locale_to_langtag[][3] = {
{"en@timezone=America/New_York;calendar=japanese", "en-u-ca-japanese-tz-usnyc", "en-u-ca-japanese-tz-usnyc"},
{"en@timezone=US/Eastern", "en-u-tz-usnyc", "en-u-tz-usnyc"},
{"en@x=x-y-z;a=a-b-c", "en-x-x-y-z", NULL},
- {"it@collation=badcollationtype;colStrength=identical;cu=usd-eur", "it-u-ks-identic", NULL},
+ {"it@collation=badcollationtype;colStrength=identical;cu=usd-eur", "it-u-cu-usd-eur-ks-identic", NULL},
{"en_US_POSIX", "en-US-u-va-posix", "en-US-u-va-posix"},
- {"en_US_POSIX@calendar=japanese;currency=EUR","en-US-u-ca-japanese-cu-EUR-va-posix", "en-US-u-ca-japanese-cu-EUR-va-posix"},
+ {"en_US_POSIX@calendar=japanese;currency=EUR","en-US-u-ca-japanese-cu-eur-va-posix", "en-US-u-ca-japanese-cu-eur-va-posix"},
{"@x=elmer", "x-elmer", "x-elmer"},
{"en@x=elmer", "en-x-elmer", "en-x-elmer"},
{"@x=elmer;a=exta", "und-a-exta-x-elmer", "und-a-exta-x-elmer"},
@@ -5779,6 +5782,7 @@ static const struct {
const char *locID;
int32_t len;
} langtag_to_locale[] = {
+ {"ja-u-ijkl-efgh-abcd-ca-japanese-xx-yyy-zzz-kn", "ja@attribute=abcd-efgh-ijkl;calendar=japanese;colnumeric=yes;xx=yyy-zzz", FULL_LENGTH},
{"en", "en", FULL_LENGTH},
{"en-us", "en_US", FULL_LENGTH},
{"und-US", "_US", FULL_LENGTH},
@@ -5859,6 +5863,187 @@ static void TestForLanguageTag(void) {
}
}
+static void TestToUnicodeLocaleKey(void)
+{
+ /* $IN specifies the result should be the input pointer itself */
+ static const char* DATA[][2] = {
+ {"calendar", "ca"},
+ {"CALEndar", "ca"}, /* difference casing */
+ {"ca", "ca"}, /* bcp key itself */
+ {"kv", "kv"}, /* no difference between legacy and bcp */
+ {"foo", NULL}, /* unknown, bcp ill-formed */
+ {"ZZ", "$IN"}, /* unknown, bcp well-formed - */
+ {NULL, NULL}
+ };
+
+ int32_t i;
+ for (i = 0; DATA[i][0] != NULL; i++) {
+ const char* keyword = DATA[i][0];
+ const char* expected = DATA[i][1];
+ const char* bcpKey = NULL;
+
+ bcpKey = uloc_toUnicodeLocaleKey(keyword);
+ if (expected == NULL) {
+ if (bcpKey != NULL) {
+ log_err("toUnicodeLocaleKey: keyword=%s => %s, expected=NULL\n", keyword, bcpKey);
+ }
+ } else if (bcpKey == NULL) {
+ log_err("toUnicodeLocaleKey: keyword=%s => NULL, expected=%s\n", keyword, expected);
+ } else if (uprv_strcmp(expected, "$IN") == 0) {
+ if (bcpKey != keyword) {
+ log_err("toUnicodeLocaleKey: keyword=%s => %s, expected=%s(input pointer)\n", keyword, bcpKey, keyword);
+ }
+ } else if (uprv_strcmp(bcpKey, expected) != 0) {
+ log_err("toUnicodeLocaleKey: keyword=%s => %s, expected=%s\n", keyword, bcpKey, expected);
+ }
+ }
+}
+
+static void TestToLegacyKey(void)
+{
+ /* $IN specifies the result should be the input pointer itself */
+ static const char* DATA[][2] = {
+ {"kb", "colbackwards"},
+ {"kB", "colbackwards"}, /* different casing */
+ {"Collation", "collation"}, /* keyword itself with different casing */
+ {"kv", "kv"}, /* no difference between legacy and bcp */
+ {"foo", "$IN"}, /* unknown, bcp ill-formed */
+ {"ZZ", "$IN"}, /* unknown, bcp well-formed */
+ {"e=mc2", NULL}, /* unknown, bcp/legacy ill-formed */
+ {NULL, NULL}
+ };
+
+ int32_t i;
+ for (i = 0; DATA[i][0] != NULL; i++) {
+ const char* keyword = DATA[i][0];
+ const char* expected = DATA[i][1];
+ const char* legacyKey = NULL;
+
+ legacyKey = uloc_toLegacyKey(keyword);
+ if (expected == NULL) {
+ if (legacyKey != NULL) {
+ log_err("toLegacyKey: keyword=%s => %s, expected=NULL\n", keyword, legacyKey);
+ }
+ } else if (legacyKey == NULL) {
+ log_err("toLegacyKey: keyword=%s => NULL, expected=%s\n", keyword, expected);
+ } else if (uprv_strcmp(expected, "$IN") == 0) {
+ if (legacyKey != keyword) {
+ log_err("toLegacyKey: keyword=%s => %s, expected=%s(input pointer)\n", keyword, legacyKey, keyword);
+ }
+ } else if (uprv_strcmp(legacyKey, expected) != 0) {
+ log_err("toUnicodeLocaleKey: keyword=%s, %s, expected=%s\n", keyword, legacyKey, expected);
+ }
+ }
+}
+
+static void TestToUnicodeLocaleType(void)
+{
+ /* $IN specifies the result should be the input pointer itself */
+ static const char* DATA[][3] = {
+ {"tz", "Asia/Kolkata", "inccu"},
+ {"calendar", "gregorian", "gregory"},
+ {"ca", "gregorian", "gregory"},
+ {"ca", "Gregorian", "gregory"},
+ {"ca", "buddhist", "buddhist"},
+ {"Calendar", "Japanese", "japanese"},
+ {"calendar", "Islamic-Civil", "islamic-civil"},
+ {"calendar", "islamicc", "islamic-civil"}, /* bcp type alias */
+ {"colalternate", "NON-IGNORABLE", "noignore"},
+ {"colcaselevel", "yes", "true"},
+ {"tz", "america/new_york", "usnyc"},
+ {"tz", "Asia/Kolkata", "inccu"},
+ {"timezone", "navajo", "usden"},
+ {"ca", "aaaa", "$IN"}, /* unknown type, well-formed type */
+ {"ca", "gregory-japanese-islamic", "$IN"}, /* unknown type, well-formed type */
+ {"zz", "gregorian", NULL}, /* unknown key, ill-formed type */
+ {"co", "foo-", NULL}, /* unknown type, ill-formed type */
+ {"variableTop", "00A0", "$IN"}, /* valid codepoints type */
+ {"variableTop", "wxyz", "$IN"}, /* invalid codepoints type - return as is for now */
+ {"kr", "space-punct", "space-punct"}, /* valid reordercode type */
+ {"kr", "digit-spacepunct", NULL}, /* invalid (bcp ill-formed) reordercode type */
+ {NULL, NULL, NULL}
+ };
+
+ int32_t i;
+ for (i = 0; DATA[i][0] != NULL; i++) {
+ const char* keyword = DATA[i][0];
+ const char* value = DATA[i][1];
+ const char* expected = DATA[i][2];
+ const char* bcpType = NULL;
+
+ bcpType = uloc_toUnicodeLocaleType(keyword, value);
+ if (expected == NULL) {
+ if (bcpType != NULL) {
+ log_err("toUnicodeLocaleType: keyword=%s, value=%s => %s, expected=NULL\n", keyword, value, bcpType);
+ }
+ } else if (bcpType == NULL) {
+ log_err("toUnicodeLocaleType: keyword=%s, value=%s => NULL, expected=%s\n", keyword, value, expected);
+ } else if (uprv_strcmp(expected, "$IN") == 0) {
+ if (bcpType != value) {
+ log_err("toUnicodeLocaleType: keyword=%s, value=%s => %s, expected=%s(input pointer)\n", keyword, value, bcpType, value);
+ }
+ } else if (uprv_strcmp(bcpType, expected) != 0) {
+ log_err("toUnicodeLocaleType: keyword=%s, value=%s => %s, expected=%s\n", keyword, value, bcpType, expected);
+ }
+ }
+}
+
+static void TestToLegacyType(void)
+{
+ /* $IN specifies the result should be the input pointer itself */
+ static const char* DATA[][3] = {
+ {"calendar", "gregory", "gregorian"},
+ {"ca", "gregory", "gregorian"},
+ {"ca", "Gregory", "gregorian"},
+ {"ca", "buddhist", "buddhist"},
+ {"Calendar", "Japanese", "japanese"},
+ {"calendar", "Islamic-Civil", "islamic-civil"},
+ {"calendar", "islamicc", "islamic-civil"}, /* bcp type alias */
+ {"colalternate", "noignore", "non-ignorable"},
+ {"colcaselevel", "true", "yes"},
+ {"tz", "usnyc", "America/New_York"},
+ {"tz", "inccu", "Asia/Calcutta"},
+ {"timezone", "usden", "America/Denver"},
+ {"timezone", "usnavajo", "America/Denver"}, /* bcp type alias */
+ {"colstrength", "quarternary", "quaternary"}, /* type alias */
+ {"ca", "aaaa", "$IN"}, /* unknown type */
+ {"calendar", "gregory-japanese-islamic", "$IN"}, /* unknown type, well-formed type */
+ {"zz", "gregorian", "$IN"}, /* unknown key, bcp ill-formed type */
+ {"ca", "gregorian-calendar", "$IN"}, /* known key, bcp ill-formed type */
+ {"co", "e=mc2", NULL}, /* known key, ill-formed bcp/legacy type */
+ {"variableTop", "00A0", "$IN"}, /* valid codepoints type */
+ {"variableTop", "wxyz", "$IN"}, /* invalid codepoints type - return as is for now */
+ {"kr", "space-punct", "space-punct"}, /* valid reordercode type */
+ {"kr", "digit-spacepunct", "digit-spacepunct"}, /* invalid reordercode type, bad ok for legacy syntax */
+ {NULL, NULL, NULL}
+ };
+
+ int32_t i;
+ for (i = 0; DATA[i][0] != NULL; i++) {
+ const char* keyword = DATA[i][0];
+ const char* value = DATA[i][1];
+ const char* expected = DATA[i][2];
+ const char* legacyType = NULL;
+
+ legacyType = uloc_toLegacyType(keyword, value);
+ if (expected == NULL) {
+ if (legacyType != NULL) {
+ log_err("toLegacyType: keyword=%s, value=%s => %s, expected=NULL\n", keyword, value, legacyType);
+ }
+ } else if (legacyType == NULL) {
+ log_err("toLegacyType: keyword=%s, value=%s => NULL, expected=%s\n", keyword, value, expected);
+ } else if (uprv_strcmp(expected, "$IN") == 0) {
+ if (legacyType != value) {
+ log_err("toLegacyType: keyword=%s, value=%s => %s, expected=%s(input pointer)\n", keyword, value, legacyType, value);
+ }
+ } else if (uprv_strcmp(legacyType, expected) != 0) {
+ log_err("toLegacyType: keyword=%s, value=%s => %s, expected=%s\n", keyword, value, legacyType, expected);
+ }
+ }
+}
+
+
+
static void test_unicode_define(const char *namech, char ch, const char *nameu, UChar uch)
{
UChar asUch[1];
diff --git a/icu4c/source/test/cintltst/cloctst.h b/icu4c/source/test/cintltst/cloctst.h
index ac313b3eb9a..8ae243f9dba 100644
--- a/icu4c/source/test/cintltst/cloctst.h
+++ b/icu4c/source/test/cintltst/cloctst.h
@@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 1997-2013, International Business Machines Corporation and
+ * Copyright (c) 1997-2014, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/********************************************************************************
@@ -123,6 +123,11 @@ static void TestLikelySubtags(void);
static void TestForLanguageTag(void);
static void TestToLanguageTag(void);
+static void TestToUnicodeLocaleKey(void);
+static void TestToLegacyKey(void);
+static void TestToUnicodeLocaleType(void);
+static void TestToLegacyType(void);
+
/**
* locale data
*/