diff --git a/icu4c/source/common/Makefile.in b/icu4c/source/common/Makefile.in
index 45bb9813d00..b46e2c645a0 100644
--- a/icu4c/source/common/Makefile.in
+++ b/icu4c/source/common/Makefile.in
@@ -87,7 +87,7 @@ rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.
serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
uidna.o usprep.o punycode.o \
util.o util_props.o parsepos.o locbased.o cwchar.o wintz.o mutex.o dtintrv.o ucnvsel.o propsvec.o \
-ulist.o
+ulist.o ultag.o
## Header files to install
HEADERS = $(srcdir)/unicode/*.h unicode/*.h
diff --git a/icu4c/source/common/common.vcproj b/icu4c/source/common/common.vcproj
index 3ef8763211b..eb6863d3d83 100644
--- a/icu4c/source/common/common.vcproj
+++ b/icu4c/source/common/common.vcproj
@@ -2677,6 +2677,14 @@
RelativePath=".\ulocimp.h"
>
+
+
+
+
diff --git a/icu4c/source/common/uloc.c b/icu4c/source/common/uloc.c
index 8b31bd800d6..692287fcb53 100644
--- a/icu4c/source/common/uloc.c
+++ b/icu4c/source/common/uloc.c
@@ -45,6 +45,7 @@
#include "uarrsort.h"
#include "uenumimp.h"
#include "uassert.h"
+#include "ultag.h"
#include /* for sprintf */
@@ -4381,8 +4382,7 @@ uloc_forLanguageTag(const char* langtag,
int32_t* parsedLength,
UErrorCode* err)
{
- /* TODO */
- return 0;
+ return ultag_languageTagToLocale(langtag, localeID, localeIDCapacity, parsedLength, err);
}
U_DRAFT int32_t U_EXPORT2
@@ -4392,8 +4392,7 @@ uloc_toLanguageTag(const char* localeID,
UBool strict,
UErrorCode* err)
{
- /* TODO */
- return 0;
+ return ultag_localeToLanguageTag(localeID, langtag, langtagCapacity, strict, err);
}
/*eof*/
diff --git a/icu4c/source/common/ultag.c b/icu4c/source/common/ultag.c
new file mode 100644
index 00000000000..6c28986186d
--- /dev/null
+++ b/icu4c/source/common/ultag.c
@@ -0,0 +1,2190 @@
+/*
+**********************************************************************
+* Copyright (C) 2009, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/ures.h"
+#include "unicode/putil.h"
+#include "ustr_imp.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "putilimp.h"
+#include "ultag.h"
+
+/* struct holding a single variant */
+typedef struct VariantListEntry {
+ const char *variant;
+ struct VariantListEntry *next;
+} VariantListEntry;
+
+/* struct holding a single extension */
+typedef struct ExtensionListEntry {
+ const char *key;
+ const char *value;
+ struct ExtensionListEntry *next;
+} ExtensionListEntry;
+
+#define MAXEXTLANG 3
+struct ULanguageTag {
+ char *buf; /* holding parsed subtags */
+ const char *language;
+ const char *extlang[MAXEXTLANG];
+ const char *script;
+ const char *region;
+ VariantListEntry *variants;
+ ExtensionListEntry *extensions;
+ const char *privateuse;
+ const char *grandfathered;
+};
+
+#define MINLEN 2
+#define SEP '-'
+#define PRIVATEUSE 'x'
+#define LDMLEXT 'u'
+
+#define LOCALE_SEP '_'
+#define LOCALE_EXT_SEP '@'
+#define LOCALE_KEYWORD_SEP ';'
+#define LOCALE_KEY_TYPE_SEP '='
+
+#define ISALPHA(c) (((c)>='A' && (c)<='Z') || ((c)>='a' && (c)<='z'))
+#define ISNUMERIC(c) ((c)>='0' && (c)<='9')
+
+static const char* EMPTY = "";
+static const char* LANG_UND = "und";
+static const char* PRIVATEUSE_KEY = "x";
+
+#define LANG_UND_LEN 3
+
+static const char* GRANDFATHERED[] = {
+/* grandfathered preferred */
+ "art-lojban", "jbo",
+ "cel-gaulish", "",
+ "en-GB-oed", "",
+ "i-ami", "ami",
+ "i-bnn", "bnn",
+ "i-default", "",
+ "i-enochian", "",
+ "i-hak", "hak",
+ "i-klingon", "tlh",
+ "i-lux", "lb",
+ "i-mingo", "",
+ "i-navajo", "nv",
+ "i-pwn", "pwn",
+ "i-tao", "tao",
+ "i-tay", "tay",
+ "i-tsu", "tsu",
+ "no-bok", "nb",
+ "no-nyn", "nn",
+ "sgn-be-fr", "sfb",
+ "sgn-be-nl", "vgt",
+ "sgn-ch-de", "sgg",
+ "zh-guoyu", "cmn",
+ "zh-hakka", "hak",
+ "zh-min", "",
+ "zh-min-nan", "nan",
+ "zh-xiang", "hsn",
+ NULL, NULL
+};
+
+static const char* DEPRECATEDLANGS[] = {
+/* deprecated new */
+ "iw", "he",
+ "ji", "yi",
+ "in", "id",
+ NULL, NULL
+};
+
+/*
+* -------------------------------------------------
+*
+* Language subtag syntax validation functions
+*
+* -------------------------------------------------
+*/
+
+static UBool
+_isAlphaString(const char* s, int32_t len) {
+ int32_t i;
+ for (i = 0; i < len; i++) {
+ if (!ISALPHA(*(s + i))) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+static UBool
+_isNumericString(const char* s, int32_t len) {
+ int32_t i;
+ for (i = 0; i < len; i++) {
+ if (!ISNUMERIC(*(s + i))) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+static UBool
+_isAlphaNumericString(const char* s, int32_t len) {
+ int32_t i;
+ for (i = 0; i < len; i++) {
+ if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+static UBool
+_isLanguageSubtag(const char* s, int32_t len) {
+ /*
+ * language = 2*3ALPHA ; shortest ISO 639 code
+ * ["-" extlang] ; sometimes followed by
+ * ; extended language subtags
+ * / 4ALPHA ; or reserved for future use
+ * / 5*8ALPHA ; or registered language subtag
+ */
+ if (len < 0) {
+ len = uprv_strlen(s);
+ }
+ if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static UBool
+_isExtlangSubtag(const char* s, int32_t len) {
+ /*
+ * extlang = 3ALPHA ; selected ISO 639 codes
+ * *2("-" 3ALPHA) ; permanently reserved
+ */
+ if (len < 0) {
+ len = uprv_strlen(s);
+ }
+ if (len == 3 && _isAlphaString(s, len)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static UBool
+_isScriptSubtag(const char* s, int32_t len) {
+ /*
+ * script = 4ALPHA ; ISO 15924 code
+ */
+ if (len < 0) {
+ len = uprv_strlen(s);
+ }
+ if (len == 4 && _isAlphaString(s, len)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static UBool
+_isRegionSubtag(const char* s, int32_t len) {
+ /*
+ * region = 2ALPHA ; ISO 3166-1 code
+ * / 3DIGIT ; UN M.49 code
+ */
+ if (len < 0) {
+ len = uprv_strlen(s);
+ }
+ if (len == 2 && _isAlphaString(s, len)) {
+ return TRUE;
+ }
+ if (len == 3 && _isNumericString(s, len)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static UBool
+_isVariantSubtag(const char* s, int32_t len) {
+ /*
+ * variant = 5*8alphanum ; registered variants
+ * / (DIGIT 3alphanum)
+ */
+ if (len < 0) {
+ len = uprv_strlen(s);
+ }
+ if (len >= 5 && len <= 8 && _isAlphaString(s, len)) {
+ return TRUE;
+ }
+ if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static UBool
+_isExtensionSingleton(const char* s, int32_t len) {
+ /*
+ * extension = singleton 1*("-" (2*8alphanum))
+ */
+ if (len < 0) {
+ len = uprv_strlen(s);
+ }
+ if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static UBool
+_isExtensionSubtag(const char* s, int32_t len) {
+ /*
+ * extension = singleton 1*("-" (2*8alphanum))
+ */
+ if (len < 0) {
+ len = uprv_strlen(s);
+ }
+ if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static UBool
+_isExtensionSubtags(const char* s, int32_t len) {
+ const char *p = s;
+ const char *pSubtag = NULL;
+
+ if (len < 0) {
+ len = uprv_strlen(s);
+ }
+
+ while ((p - s) < len) {
+ if (*p == SEP) {
+ if (pSubtag == NULL) {
+ return FALSE;
+ }
+ if (!_isExtensionSubtag(pSubtag, p - pSubtag)) {
+ return FALSE;
+ }
+ pSubtag = NULL;
+ } else if (pSubtag == NULL) {
+ pSubtag = p;
+ }
+ p++;
+ }
+ if (pSubtag == NULL) {
+ return FALSE;
+ }
+ return _isExtensionSubtag(pSubtag, p - pSubtag);
+}
+
+static UBool
+_isPrivateuseValueSubtag(const char* s, int32_t len) {
+ /*
+ * privateuse = "x" 1*("-" (1*8alphanum))
+ */
+ if (len < 0) {
+ len = uprv_strlen(s);
+ }
+ if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static UBool
+_isPrivateuseValueSubtags(const char* s, int32_t len) {
+ const char *p = s;
+ const char *pSubtag = NULL;
+
+ if (len < 0) {
+ len = uprv_strlen(s);
+ }
+
+ while ((p - s) < len) {
+ if (*p == SEP) {
+ if (pSubtag == NULL) {
+ return FALSE;
+ }
+ if (!_isPrivateuseValueSubtag(pSubtag, p - pSubtag)) {
+ return FALSE;
+ }
+ pSubtag = NULL;
+ } else if (pSubtag == NULL) {
+ pSubtag = p;
+ }
+ p++;
+ }
+ if (pSubtag == NULL) {
+ return FALSE;
+ }
+ return _isPrivateuseValueSubtag(pSubtag, p - pSubtag);
+}
+
+static UBool
+_isLDMLKey(const char* s, int32_t len) {
+ if (len < 0) {
+ len = uprv_strlen(s);
+ }
+ if (len == 2 && _isAlphaNumericString(s, len)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static UBool
+_isLDMLType(const char* s, int32_t len) {
+ if (len < 0) {
+ len = uprv_strlen(s);
+ }
+ if (len >= 3 && len <= 8 && _isAlphaNumericString(s, len)) {
+ return TRUE;
+ }
+ return FALSE;
+}
+
+/*
+* -------------------------------------------------
+*
+* Helper functions
+*
+* -------------------------------------------------
+*/
+
+static UBool
+_addVariantToList(VariantListEntry **first, VariantListEntry *var) {
+ UBool bAdded = TRUE;
+
+ if (*first == NULL) {
+ var->next = NULL;
+ *first = var;
+ } else {
+ VariantListEntry *prev, *cur;
+ int32_t cmp;
+
+ /* reorder variants in alphabetical order */
+ prev = NULL;
+ cur = *first;
+ while (TRUE) {
+ if (cur == NULL) {
+ prev->next = var;
+ var->next = NULL;
+ break;
+ }
+ cmp = uprv_strcmp(var->variant, cur->variant);
+ if (cmp < 0) {
+ if (prev == NULL) {
+ *first = var;
+ } else {
+ prev->next = var;
+ }
+ var->next = cur;
+ break;
+ }
+ if (cmp == 0) {
+ /* duplicated variant */
+ bAdded = FALSE;
+ break;
+ }
+ prev = cur;
+ cur = cur->next;
+ }
+ }
+
+ return bAdded;
+}
+
+
+static UBool
+_addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
+ UBool bAdded = TRUE;
+
+ if (*first == NULL) {
+ ext->next = NULL;
+ *first = ext;
+ } else {
+ ExtensionListEntry *prev, *cur;
+ int32_t cmp;
+
+ /* reorder variants in alphabetical order */
+ prev = NULL;
+ cur = *first;
+ while (TRUE) {
+ if (cur == NULL) {
+ prev->next = ext;
+ ext->next = NULL;
+ break;
+ }
+ if (localeToBCP) {
+ /* special handling for locale to bcp conversion */
+ int32_t len, curlen;
+
+ len = uprv_strlen(ext->key);
+ curlen = uprv_strlen(cur->key);
+
+ if (len == 1 && curlen == 1) {
+ if (*(ext->key) == *(cur->key)) {
+ cmp = 0;
+ } else if (*(ext->key) == PRIVATEUSE) {
+ cmp = 1;
+ } else if (*(cur->key) == PRIVATEUSE) {
+ cmp = -1;
+ } else {
+ cmp = *(ext->key) - *(cur->key);
+ }
+ } else if (len == 1) {
+ cmp = *(ext->key) - LDMLEXT;
+ } else if (curlen == 1) {
+ cmp = LDMLEXT - *(cur->key);
+ } else {
+ cmp = uprv_strcmp(ext->key, cur->key);
+ }
+ } else {
+ cmp = uprv_strcmp(ext->key, cur->key);
+ }
+ if (cmp < 0) {
+ if (prev == NULL) {
+ *first = ext;
+ } else {
+ prev->next = ext;
+ }
+ ext->next = cur;
+ break;
+ }
+ if (cmp == 0) {
+ /* duplicated extension key */
+ bAdded = FALSE;
+ break;
+ }
+ prev = cur;
+ cur = cur->next;
+ }
+ }
+
+ return bAdded;
+}
+
+static void
+_initializeULanguageTag(ULanguageTag* langtag) {
+ int32_t i;
+
+ langtag->buf = NULL;
+
+ langtag->language = EMPTY;
+ for (i = 0; i < MAXEXTLANG; i++) {
+ langtag->extlang[i] = NULL;
+ }
+
+ langtag->script = EMPTY;
+ langtag->region = EMPTY;
+
+ langtag->variants = NULL;
+ langtag->extensions = NULL;
+
+ langtag->grandfathered = EMPTY;
+ langtag->privateuse = EMPTY;
+}
+
+#define SUPPLEMENTAL "supplementalData"
+#define BCP47MAPPINGS "bcp47KeywordMappings"
+#define MAX_BCP47_SUBTAG_LEN 9 /* including null terminator */
+#define MAX_LDML_KEY_LEN 22
+#define MAX_LDML_TYPE_LEN 32
+
+static int32_t
+_ldmlKeyToBCP47(const char* key, int32_t keyLen,
+ char* bcpKey, int32_t bcpKeyCapacity,
+ UErrorCode *status) {
+ UResourceBundle *rb;
+ char keyBuf[MAX_LDML_KEY_LEN];
+ char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
+ int32_t resultLen = 0;
+ int32_t i;
+ UErrorCode tmpStatus = U_ZERO_ERROR;
+ const UChar *uBcpKey;
+ int32_t bcpKeyLen;
+
+ if (keyLen < 0) {
+ keyLen = uprv_strlen(key);
+ }
+
+ if (keyLen >= sizeof(keyBuf)) {
+ /* no known valid LDML key exceeding 21 */
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ uprv_memcpy(keyBuf, key, keyLen);
+ keyBuf[keyLen] = 0;
+
+ /* to lower case */
+ for (i = 0; i < keyLen; i++) {
+ keyBuf[i] = uprv_tolower(keyBuf[i]);
+ }
+
+ rb = ures_openDirect(NULL, SUPPLEMENTAL, status);
+ ures_getByKey(rb, BCP47MAPPINGS, rb, status);
+ ures_getByKey(rb, "key", rb, status);
+
+ if (U_FAILURE(*status)) {
+ ures_close(rb);
+ return 0;
+ }
+
+ uBcpKey = ures_getStringByKey(rb, keyBuf, &bcpKeyLen, &tmpStatus);
+ if (U_SUCCESS(tmpStatus)) {
+ u_UCharsToChars(uBcpKey, bcpKeyBuf, bcpKeyLen);
+ bcpKeyBuf[bcpKeyLen] = 0;
+ resultLen = bcpKeyLen;
+ } else {
+ if (_isLDMLKey(key, keyLen)) {
+ uprv_memcpy(bcpKeyBuf, key, keyLen);
+ bcpKeyBuf[keyLen] = 0;
+ resultLen = keyLen;
+ } else {
+ /* mapping not availabe */
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ }
+ ures_close(rb);
+
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ uprv_memcpy(bcpKey, bcpKeyBuf, uprv_min(resultLen, bcpKeyCapacity));
+ return u_terminateChars(bcpKey, bcpKeyCapacity, resultLen, status);
+}
+
+static int32_t
+_bcp47ToLDMLKey(const char* bcpKey, int32_t bcpKeyLen,
+ char* key, int32_t keyCapacity,
+ UErrorCode *status) {
+ UResourceBundle *rb;
+ char bcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
+ int32_t resultLen = 0;
+ int32_t i;
+ const char *resKey = NULL;
+ UResourceBundle *keyMap;
+
+ if (bcpKeyLen < 0) {
+ bcpKeyLen = uprv_strlen(bcpKey);
+ }
+
+ if (bcpKeyLen >= sizeof(bcpKeyBuf)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ uprv_memcpy(bcpKeyBuf, bcpKey, bcpKeyLen);
+ bcpKeyBuf[bcpKeyLen] = 0;
+
+ /* to lower case */
+ for (i = 0; i < bcpKeyLen; i++) {
+ bcpKeyBuf[i] = uprv_tolower(bcpKeyBuf[i]);
+ }
+
+ rb = ures_openDirect(NULL, SUPPLEMENTAL, status);
+ ures_getByKey(rb, BCP47MAPPINGS, rb, status);
+ ures_getByKey(rb, "key", rb, status);
+ if (U_FAILURE(*status)) {
+ ures_close(rb);
+ return 0;
+ }
+
+ keyMap = ures_getNextResource(rb, NULL, status);
+ while (U_SUCCESS(*status)) {
+ const UChar *uBcpKey;
+ char tmpBcpKeyBuf[MAX_BCP47_SUBTAG_LEN];
+ int32_t tmpBcpKeyLen;
+
+ uBcpKey = ures_getString(keyMap, &tmpBcpKeyLen, status);
+ if (U_FAILURE(*status)) {
+ break;
+ }
+ u_UCharsToChars(uBcpKey, tmpBcpKeyBuf, tmpBcpKeyLen);
+ tmpBcpKeyBuf[tmpBcpKeyLen] = 0;
+ if (uprv_strcmp(bcpKeyBuf, tmpBcpKeyBuf) == 0) {
+ /* found a matching BCP47 key */
+ resKey = ures_getKey(keyMap);
+ resultLen = uprv_strlen(resKey);
+ break;
+ }
+ if (!ures_hasNext(rb)) {
+ break;
+ }
+ ures_getNextResource(rb, keyMap, status);
+ }
+ ures_close(keyMap);
+ ures_close(rb);
+
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ if (resKey == NULL) {
+ resKey = bcpKeyBuf;
+ resultLen = bcpKeyLen;
+ }
+
+ uprv_memcpy(key, resKey, uprv_min(resultLen, keyCapacity));
+ return u_terminateChars(key, keyCapacity, resultLen, status);
+}
+
+static int32_t
+_ldmlTypeToBCP47(const char* key, int32_t keyLen,
+ const char* type, int32_t typeLen,
+ char* bcpType, int32_t bcpTypeCapacity,
+ UErrorCode *status) {
+
+ UResourceBundle *rb;
+ char keyBuf[MAX_LDML_KEY_LEN];
+ char typeBuf[MAX_LDML_TYPE_LEN];
+ char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
+ int32_t resultLen = 0;
+ int32_t i;
+ UErrorCode tmpStatus = U_ZERO_ERROR;
+ const UChar *uBcpType;
+ int32_t bcpTypeLen;
+ UBool isTimezone = FALSE;
+
+ if (keyLen < 0) {
+ keyLen = uprv_strlen(key);
+ }
+ if (keyLen >= sizeof(keyBuf)) {
+ /* no known valid LDML key exceeding 21 */
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ uprv_memcpy(keyBuf, key, keyLen);
+ keyBuf[keyLen] = 0;
+
+ /* to lower case */
+ for (i = 0; i < keyLen; i++) {
+ keyBuf[i] = uprv_tolower(keyBuf[i]);
+ }
+ if (uprv_strcmp(keyBuf, "timezone") == 0) {
+ isTimezone = TRUE;
+ }
+
+ if (typeLen < 0) {
+ typeLen = uprv_strlen(type);
+ }
+ if (typeLen >= sizeof(typeBuf)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ uprv_memcpy(typeBuf, type, typeLen);
+ typeBuf[typeLen] = 0;
+
+ for (i = 0; i < typeLen; i++) {
+ if (isTimezone && typeBuf[i] == '/') {
+ typeBuf[i] = ':';
+ } else {
+ typeBuf[i] = uprv_tolower(typeBuf[i]);
+ }
+ }
+
+ rb = ures_openDirect(NULL, SUPPLEMENTAL, status);
+ ures_getByKey(rb, BCP47MAPPINGS, rb, status);
+ if (U_FAILURE(*status)) {
+ ures_close(rb);
+ return 0;
+ }
+
+ ures_getByKey(rb, keyBuf, rb, &tmpStatus);
+ uBcpType = ures_getStringByKey(rb, typeBuf, &bcpTypeLen, &tmpStatus);
+ if (U_SUCCESS(tmpStatus)) {
+ u_UCharsToChars(uBcpType, bcpTypeBuf, bcpTypeLen);
+ resultLen = bcpTypeLen;
+ } else if (tmpStatus == U_MISSING_RESOURCE_ERROR) {
+ if (_isLDMLType(type, typeLen)) {
+ uprv_memcpy(bcpTypeBuf, type, typeLen);
+ resultLen = typeLen;
+ } else {
+ /* mapping not availabe */
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ } else {
+ *status = tmpStatus;
+ }
+ ures_close(rb);
+
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ uprv_memcpy(bcpType, bcpTypeBuf, uprv_min(resultLen, bcpTypeCapacity));
+ return u_terminateChars(bcpType, bcpTypeCapacity, resultLen, status);
+}
+
+static int32_t
+_bcp47ToLDMLType(const char* key, int32_t keyLen,
+ const char* bcpType, int32_t bcpTypeLen,
+ char* type, int32_t typeCapacity,
+ UErrorCode *status) {
+ UResourceBundle *rb;
+ char keyBuf[MAX_LDML_KEY_LEN];
+ char bcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
+ int32_t resultLen = 0;
+ int32_t i;
+ const char *resType = NULL;
+ UResourceBundle *typeMap;
+ UErrorCode tmpStatus = U_ZERO_ERROR;
+ int32_t copyLen;
+
+ if (keyLen < 0) {
+ keyLen = uprv_strlen(key);
+ }
+
+ if (keyLen >= sizeof(keyBuf)) {
+ /* no known valid LDML key exceeding 21 */
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ uprv_memcpy(keyBuf, key, keyLen);
+ keyBuf[keyLen] = 0;
+
+ /* to lower case */
+ for (i = 0; i < keyLen; i++) {
+ keyBuf[i] = uprv_tolower(keyBuf[i]);
+ }
+
+
+ if (bcpTypeLen < 0) {
+ bcpTypeLen = uprv_strlen(bcpType);
+ }
+
+ if (bcpTypeLen >= sizeof(bcpTypeBuf)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ uprv_memcpy(bcpTypeBuf, bcpType, bcpTypeLen);
+ bcpTypeBuf[bcpTypeLen] = 0;
+
+ /* to lower case */
+ for (i = 0; i < bcpTypeLen; i++) {
+ bcpTypeBuf[i] = uprv_tolower(bcpTypeBuf[i]);
+ }
+
+ rb = ures_openDirect(NULL, SUPPLEMENTAL, status);
+ ures_getByKey(rb, BCP47MAPPINGS, rb, status);
+ if (U_FAILURE(*status)) {
+ ures_close(rb);
+ return 0;
+ }
+
+ ures_getByKey(rb, keyBuf, rb, &tmpStatus);
+ typeMap = ures_getNextResource(rb, NULL, &tmpStatus);
+ while (U_SUCCESS(tmpStatus)) {
+ const UChar *uBcpType;
+ char tmpBcpTypeBuf[MAX_BCP47_SUBTAG_LEN];
+ int32_t tmpBcpTypeLen;
+
+ uBcpType = ures_getString(typeMap, &tmpBcpTypeLen, &tmpStatus);
+ if (U_FAILURE(tmpStatus)) {
+ break;
+ }
+ u_UCharsToChars(uBcpType, tmpBcpTypeBuf, tmpBcpTypeLen);
+ tmpBcpTypeBuf[tmpBcpTypeLen] = 0;
+ if (uprv_strcmp(bcpTypeBuf, tmpBcpTypeBuf) == 0) {
+ /* found a matching BCP47 type */
+ resType = ures_getKey(typeMap);
+ resultLen = uprv_strlen(resType);
+ break;
+ }
+ if (!ures_hasNext(rb)) {
+ break;
+ }
+ ures_getNextResource(rb, typeMap, &tmpStatus);
+ }
+ ures_close(typeMap);
+ ures_close(rb);
+
+ if (U_FAILURE(tmpStatus) && tmpStatus != U_MISSING_RESOURCE_ERROR) {
+ *status = tmpStatus;
+ return 0;
+ }
+
+ if (resType == NULL) {
+ resType = bcpTypeBuf;
+ resultLen = bcpTypeLen;
+ }
+
+ copyLen = uprv_min(resultLen, typeCapacity);
+ uprv_memcpy(type, resType, copyLen);
+
+ if (uprv_strcmp(keyBuf, "timezone") == 0) {
+ for (i = 0; i < copyLen; i++) {
+ if (*(type + i) == ':') {
+ *(type + i) = '/';
+ }
+ }
+ }
+
+ return u_terminateChars(type, typeCapacity, resultLen, status);
+}
+
+static int32_t
+_appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
+ char buf[ULOC_LANG_CAPACITY];
+ UErrorCode tmpStatus = U_ZERO_ERROR;
+ int32_t len, i;
+ int32_t reslen = 0;
+
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
+ if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ len = 0;
+ }
+
+ /* Note: returned language code is in lower case letters */
+
+ if (len == 0) {
+ if (reslen < capacity) {
+ uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
+ }
+ reslen += LANG_UND_LEN;
+ } else if (!_isLanguageSubtag(buf, len)) {
+ /* invalid language code */
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ if (reslen < capacity) {
+ uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
+ }
+ reslen += LANG_UND_LEN;
+ } else {
+ /* resolve deprecated */
+ for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
+ if (uprv_strcmp(buf, DEPRECATEDLANGS[i]) == 0) {
+ uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
+ len = uprv_strlen(buf);
+ break;
+ }
+ }
+ if (reslen < capacity) {
+ uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
+ }
+ reslen += len;
+ }
+ u_terminateChars(appendAt, capacity, reslen, status);
+ return reslen;
+}
+
+static int32_t
+_appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
+ char buf[ULOC_SCRIPT_CAPACITY];
+ UErrorCode tmpStatus = U_ZERO_ERROR;
+ int32_t len, i;
+ int32_t reslen = 0;
+
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
+ if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return 0;
+ }
+
+ if (len > 0) {
+ if (!_isScriptSubtag(buf, len)) {
+ /* invalid script code */
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return 0;
+ } else {
+ /* to lowercase */
+ for (i = 0; i < len; i++) {
+ buf[i] = uprv_tolower(buf[i]);
+ }
+ if (reslen < capacity) {
+ *(appendAt + reslen) = SEP;
+ }
+ reslen++;
+
+ if (reslen < capacity) {
+ uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
+ }
+ reslen += len;
+ }
+ }
+ u_terminateChars(appendAt, capacity, reslen, status);
+ return reslen;
+}
+
+static int32_t
+_appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
+ char buf[ULOC_COUNTRY_CAPACITY];
+ UErrorCode tmpStatus = U_ZERO_ERROR;
+ int32_t len, i;
+ int32_t reslen = 0;
+
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
+ if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return 0;
+ }
+
+ if (len > 0) {
+ if (!_isRegionSubtag(buf, len)) {
+ /* invalid region code */
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return 0;
+ } else {
+ /* to lowercase */
+ for (i = 0; i < len; i++) {
+ buf[i] = uprv_tolower(buf[i]);
+ }
+ if (reslen < capacity) {
+ *(appendAt + reslen) = SEP;
+ }
+ reslen++;
+
+ if (reslen < capacity) {
+ uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
+ }
+ reslen += len;
+ }
+ }
+ u_terminateChars(appendAt, capacity, reslen, status);
+ return reslen;
+}
+
+static int32_t
+_appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
+ char buf[ULOC_FULLNAME_CAPACITY];
+ UErrorCode tmpStatus = U_ZERO_ERROR;
+ int32_t len, i;
+ int32_t reslen = 0;
+
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
+ if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ return 0;
+ }
+
+ if (len > 0) {
+ char *p, *pVar;
+ UBool bNext = TRUE;
+ VariantListEntry *var;
+ VariantListEntry *varFirst = NULL;
+
+ pVar = NULL;
+ p = buf;
+ while (bNext) {
+ if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
+ if (*p == 0) {
+ bNext = FALSE;
+ } else {
+ *p = 0; /* terminate */
+ }
+ if (pVar == NULL) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ /* ignore empty variant */
+ } else {
+ /* to lowercase */
+ for (i = 0; *(pVar + i) != 0; i++) {
+ *(pVar + i) = uprv_tolower(*(pVar + i));
+ }
+
+ /* validate */
+ if (_isVariantSubtag(pVar, -1)) {
+ /* emit the variant to the list */
+ var = uprv_malloc(sizeof(VariantListEntry));
+ if (var == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ var->variant = pVar;
+ if (!_addVariantToList(&varFirst, var)) {
+ /* duplicated variant */
+ uprv_free(var);
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ }
+ } else if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ }
+ /* reset variant starting position */
+ pVar = NULL;
+ } else if (pVar == NULL) {
+ pVar = p;
+ }
+ p++;
+ }
+
+ if (U_SUCCESS(*status)) {
+ if (varFirst != NULL) {
+ int32_t varLen;
+
+ /* write out sorted/validated/normalized variants to the target */
+ var = varFirst;
+ while (var != NULL) {
+ if (reslen < capacity) {
+ *(appendAt + reslen) = SEP;
+ }
+ reslen++;
+ varLen = uprv_strlen(var->variant);
+ if (reslen < capacity) {
+ uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
+ }
+ reslen += varLen;
+ var = var->next;
+ }
+ }
+ }
+
+ /* clean up */
+ var = varFirst;
+ while (var != NULL) {
+ VariantListEntry *tmpVar = var->next;
+ uprv_free(var);
+ var = tmpVar;
+ }
+
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+ }
+
+ u_terminateChars(appendAt, capacity, reslen, status);
+ return reslen;
+}
+
+static int32_t
+_appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
+ char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
+ UEnumeration *keywordEnum = NULL;
+ int32_t reslen = 0;
+
+ keywordEnum = uloc_openKeywords(localeID, status);
+ if (U_FAILURE(*status)) {
+ uenum_close(keywordEnum);
+ return 0;
+ }
+ if (keywordEnum != NULL) {
+ /* reorder extensions */
+ int32_t len;
+ const char *key;
+ ExtensionListEntry *firstExt = NULL;
+ ExtensionListEntry *ext;
+ char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
+ char *pExtBuf = extBuf;
+ int32_t extBufCapacity = sizeof(extBuf);
+ const char *bcpKey, *bcpValue;
+ UErrorCode tmpStatus = U_ZERO_ERROR;
+ int32_t keylen;
+ UBool isLDMLKeyword;
+
+ while (TRUE) {
+ key = uenum_next(keywordEnum, NULL, status);
+ if (key == NULL) {
+ break;
+ }
+ len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
+ if (U_FAILURE(tmpStatus)) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ /* ignore this keyword */
+ tmpStatus = U_ZERO_ERROR;
+ continue;
+ }
+
+ keylen = uprv_strlen(key);
+ isLDMLKeyword = (keylen > 1);
+
+ if (isLDMLKeyword) {
+ int32_t modKeyLen;
+
+ /* transform key and value to bcp47 style */
+ modKeyLen = _ldmlKeyToBCP47(key, keylen, pExtBuf, extBufCapacity, &tmpStatus);
+ if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ tmpStatus = U_ZERO_ERROR;
+ continue;
+ }
+
+ bcpKey = pExtBuf;
+ pExtBuf += (modKeyLen + 1);
+ extBufCapacity -= (modKeyLen + 1);
+
+ len = _ldmlTypeToBCP47(key, keylen, buf, len, pExtBuf, extBufCapacity, &tmpStatus);
+ if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ tmpStatus = U_ZERO_ERROR;
+ continue;
+ }
+ bcpValue = pExtBuf;
+ pExtBuf += (len + 1);
+ extBufCapacity -= (len + 1);
+ } else {
+ if (*key == PRIVATEUSE) {
+ if (!_isPrivateuseValueSubtags(buf, len)) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ continue;
+ }
+ } else {
+ if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ continue;
+ }
+ }
+ bcpKey = key;
+ if ((len + 1) < extBufCapacity) {
+ uprv_memcpy(pExtBuf, buf, len);
+ bcpValue = pExtBuf;
+
+ pExtBuf += len;
+
+ *pExtBuf = 0;
+ pExtBuf++;
+
+ extBufCapacity -= (len + 1);
+ } else {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ }
+
+ /* create ExtensionListEntry */
+ ext = uprv_malloc(sizeof(ExtensionListEntry));
+ if (ext == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ ext->key = bcpKey;
+ ext->value = bcpValue;
+
+ if (!_addExtensionToList(&firstExt, ext, TRUE)) {
+ uprv_free(ext);
+ if (strict) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ }
+ }
+ if (U_SUCCESS(*status) && (firstExt != NULL)) {
+ UBool startLDMLExtension = FALSE;
+
+ /* write out the sorted BCP47 extensions and private use */
+ ext = firstExt;
+ while (ext != NULL) {
+ if (uprv_strlen(ext->key) > 1 && !startLDMLExtension) {
+ /* write LDML singleton extension */
+ if (reslen < capacity) {
+ *(appendAt + reslen) = SEP;
+ }
+ reslen++;
+ if (reslen < capacity) {
+ *(appendAt + reslen) = LDMLEXT;
+ }
+ reslen++;
+ startLDMLExtension = TRUE;
+ }
+
+ if (reslen < capacity) {
+ *(appendAt + reslen) = SEP;
+ }
+ reslen++;
+ len = uprv_strlen(ext->key);
+ if (reslen < capacity) {
+ uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
+ }
+ reslen += len;
+ if (reslen < capacity) {
+ *(appendAt + reslen) = SEP;
+ }
+ reslen++;
+ len = uprv_strlen(ext->value);
+ if (reslen < capacity) {
+ uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
+ }
+ reslen += len;
+
+ ext = ext->next;
+ }
+ }
+ /* clean up */
+ ext = firstExt;
+ while (ext != NULL) {
+ ExtensionListEntry *tmpExt = ext->next;
+ uprv_free(ext);
+ ext = tmpExt;
+ }
+
+ uenum_close(keywordEnum);
+
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+ }
+
+ return u_terminateChars(appendAt, capacity, reslen, status);
+}
+
+/*
+ * Append keywords parsed from LDML extension value
+ * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
+ * Note: char* buf is used for storing keywords
+ */
+static void
+_appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UErrorCode *status) {
+ const char *p, *pNext, *pSep;
+ const char *pBcpKey, *pBcpType;
+ const char *pKey, *pType;
+ int32_t bcpKeyLen, bcpTypeLen;
+ ExtensionListEntry *kwd, *nextKwd;
+ ExtensionListEntry *kwdFirst = NULL;
+ int32_t bufIdx = 0;
+ int32_t len;
+
+ pNext = ldmlext;
+ pBcpKey = pBcpType = NULL;
+ while (pNext) {
+ p = pSep = pNext;
+
+ /* locate next separator char */
+ while (*pSep) {
+ if (*pSep == SEP) {
+ break;
+ }
+ pSep++;
+ }
+ if (*pSep == 0) {
+ /* last subtag */
+ pNext = NULL;
+ } else {
+ pNext = pSep + 1;
+ }
+
+ if (pBcpKey == NULL) {
+ pBcpKey = p;
+ bcpKeyLen = pSep - p;
+ } else {
+ pBcpType = p;
+ bcpTypeLen = pSep - p;
+
+ /* BCP key to locale key */
+ len = _bcp47ToLDMLKey(pBcpKey, bcpKeyLen, buf + bufIdx, bufSize - bufIdx - 1, status);
+ if (U_FAILURE(*status)) {
+ goto cleanup;
+ }
+ pKey = buf + bufIdx;
+ bufIdx += len;
+ *(buf + bufIdx) = 0;
+ bufIdx++;
+
+ /* BCP type to locale type */
+ len = _bcp47ToLDMLType(pKey, -1, pBcpType, bcpTypeLen, buf + bufIdx, bufSize - bufIdx - 1, status);
+ if (U_FAILURE(*status)) {
+ goto cleanup;
+ }
+ pType = buf + bufIdx;
+ bufIdx += len;
+ *(buf + bufIdx) = 0;
+ bufIdx++;
+
+ /* create an ExtensionListEntry for this keyword */
+ kwd = uprv_malloc(sizeof(ExtensionListEntry));
+ if (kwd == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ goto cleanup;
+ }
+
+ kwd->key = pKey;
+ kwd->value = pType;
+
+ if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ uprv_free(kwd);
+ goto cleanup;
+ }
+
+ /* for next pair */
+ pBcpKey = NULL;
+ pBcpType = NULL;
+ }
+ }
+
+ if (pBcpKey != NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ goto cleanup;
+ }
+
+ kwd = kwdFirst;
+ while (kwd != NULL) {
+ nextKwd = kwd->next;
+ _addExtensionToList(appendTo, kwd, FALSE);
+ kwd = nextKwd;
+ }
+
+ return;
+
+cleanup:
+ kwd = kwdFirst;
+ while (kwd != NULL) {
+ nextKwd = kwd->next;
+ uprv_free(kwd);
+ kwd = nextKwd;
+ }
+}
+
+
+static int32_t
+_appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
+ int32_t reslen = 0;
+ int32_t i, n;
+ int32_t len;
+ ExtensionListEntry *kwdFirst = NULL;
+ ExtensionListEntry *kwd;
+ const char *key, *type;
+ char kwdBuf[ULOC_KEYWORDS_CAPACITY];
+
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ n = ultag_getExtensionsSize(langtag);
+
+ /* resolve locale keywords and reordering keys */
+ for (i = 0; i < n; i++) {
+ key = ultag_getExtensionKey(langtag, i);
+ type = ultag_getExtensionValue(langtag, i);
+ if (*key == LDMLEXT) {
+ _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, sizeof(kwdBuf), status);
+ if (U_FAILURE(*status)) {
+ break;
+ }
+ } else {
+ kwd = uprv_malloc(sizeof(ExtensionListEntry));
+ if (kwd == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ kwd->key = key;
+ kwd->value = type;
+ if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
+ uprv_free(kwd);
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ break;
+ }
+ }
+ }
+
+ if (U_SUCCESS(*status)) {
+ type = ultag_getPrivateUse(langtag);
+ if (uprv_strlen(type) > 0) {
+ /* add private use as a keyword */
+ kwd = uprv_malloc(sizeof(ExtensionListEntry));
+ if (kwd == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ kwd->key = PRIVATEUSE_KEY;
+ kwd->value = type;
+ if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
+ uprv_free(kwd);
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ }
+ }
+ }
+
+ if (U_SUCCESS(*status) && kwdFirst != NULL) {
+ /* write out the sorted keywords */
+ kwd = kwdFirst;
+ while (kwd != NULL) {
+ if (reslen < capacity) {
+ if (kwd == kwdFirst) {
+ /* '@' */
+ *(appendAt + reslen) = LOCALE_EXT_SEP;
+ } else {
+ /* ';' */
+ *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
+ }
+ }
+ reslen++;
+
+ /* key */
+ len = uprv_strlen(kwd->key);
+ if (reslen < capacity) {
+ uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
+ }
+ reslen += len;
+
+ /* '=' */
+ if (reslen < capacity) {
+ *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
+ }
+ reslen++;
+
+ /* type */
+ len = uprv_strlen(kwd->value);
+ if (reslen < capacity) {
+ uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
+ }
+ reslen += len;
+
+ kwd = kwd->next;
+ }
+ }
+
+ /* clean up */
+ kwd = kwdFirst;
+ while (kwd != NULL) {
+ ExtensionListEntry *tmpKwd = kwd->next;
+ uprv_free(kwd);
+ kwd = tmpKwd;
+ }
+
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ return u_terminateChars(appendAt, capacity, reslen, status);
+}
+
+/*
+* -------------------------------------------------
+*
+* ultag_ APIs
+*
+* -------------------------------------------------
+*/
+
+/* Bit flags used by the parser */
+#define LANG 0x0001
+#define EXTL 0x0002
+#define SCRT 0x0004
+#define REGN 0x0008
+#define VART 0x0010
+#define EXTS 0x0020
+#define EXTV 0x0040
+#define PRIV 0x0080
+
+U_CFUNC ULanguageTag*
+ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
+ ULanguageTag *t;
+ char *tagBuf;
+ int16_t next;
+ char *pSubtag, *pNext, *pLastGoodPosition;
+ int32_t subtagLen;
+ int32_t extlangIdx;
+ ExtensionListEntry *pExtension;
+ char *pExtValueSubtagEnd;
+ int32_t i;
+ UBool isLDMLExtension, reqLDMLType;
+
+ if (parsedLen != NULL) {
+ *parsedLen = 0;
+ }
+
+ if (U_FAILURE(*status)) {
+ return NULL;
+ }
+
+ if (tagLen < 0) {
+ tagLen = uprv_strlen(tag);
+ }
+
+ /* copy the entire string */
+ tagBuf = (char*)uprv_malloc(tagLen + 1);
+ if (tagBuf == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ uprv_memcpy(tagBuf, tag, tagLen);
+ *(tagBuf + tagLen) = 0;
+
+ /* to lower case */
+ for (i = 0; i < tagLen; i++) {
+ tagBuf[i] = uprv_tolower(tagBuf[i]);
+ }
+
+ /* create a ULanguageTag */
+ t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
+ _initializeULanguageTag(t);
+ t->buf = tagBuf;
+ if (t == NULL) {
+ uprv_free(tagBuf);
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+
+ if (tagLen < MINLEN) {
+ /* the input tag is too short - return empty ULanguageTag */
+ return t;
+ }
+
+ /* check if the tag is grandfathered */
+ for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
+ if (uprv_strcmp(GRANDFATHERED[i], tagBuf) == 0) {
+ /* a grandfathered tag is always longer than its preferred mapping */
+ uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
+ t->language = t->buf;
+ if (parsedLen != NULL) {
+ *parsedLen = tagLen;
+ }
+ return t;
+ }
+ }
+
+ /*
+ * langtag = language
+ * ["-" script]
+ * ["-" region]
+ * *("-" variant)
+ * *("-" extension)
+ * ["-" privateuse]
+ */
+
+ next = LANG | PRIV;
+ pNext = pLastGoodPosition = tagBuf;
+ extlangIdx = 0;
+ pExtension = NULL;
+ pExtValueSubtagEnd = NULL;
+ isLDMLExtension = FALSE;
+ reqLDMLType = FALSE;
+
+ while (pNext) {
+ char *pSep;
+
+ pSubtag = pNext;
+
+ /* locate next separator char */
+ pSep = pSubtag;
+ while (*pSep) {
+ if (*pSep == SEP) {
+ break;
+ }
+ pSep++;
+ }
+ if (*pSep == 0) {
+ /* last subtag */
+ pNext = NULL;
+ } else {
+ pNext = pSep + 1;
+ }
+ subtagLen = pSep - pSubtag;
+
+ if (next & LANG) {
+ if (_isLanguageSubtag(pSubtag, subtagLen)) {
+ *pSep = 0; /* terminate */
+ t->language = pSubtag;
+
+ pLastGoodPosition = pSep;
+ next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
+ continue;
+ }
+ }
+ if (next & EXTL) {
+ if (_isExtlangSubtag(pSubtag, subtagLen)) {
+ *pSep = 0;
+ t->extlang[extlangIdx++] = pSubtag;
+
+ pLastGoodPosition = pSep;
+ if (extlangIdx < 3) {
+ next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
+ } else {
+ next = SCRT | REGN | VART | EXTS | PRIV;
+ }
+ continue;
+ }
+ }
+ if (next & SCRT) {
+ if (_isScriptSubtag(pSubtag, subtagLen)) {
+ *pSep = 0;
+ t->script = pSubtag;
+
+ pLastGoodPosition = pSep;
+ next = REGN | VART | EXTS | PRIV;
+ continue;
+ }
+ }
+ if (next & REGN) {
+ if (_isRegionSubtag(pSubtag, subtagLen)) {
+ *pSep = 0;
+ t->region = pSubtag;
+
+ pLastGoodPosition = pSep;
+ next = VART | EXTS | PRIV;
+ continue;
+ }
+ }
+ if (next & VART) {
+ if (_isVariantSubtag(pSubtag, subtagLen)) {
+ VariantListEntry *var;
+ UBool isAdded;
+
+ var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
+ if (var == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ goto error;
+ }
+ *pSep = 0;
+ var->variant = pSubtag;
+ isAdded = _addVariantToList(&(t->variants), var);
+ if (!isAdded) {
+ /* duplicated variant entry */
+ uprv_free(var);
+ break;
+ }
+ pLastGoodPosition = pSep;
+ next = VART | EXTS | PRIV;
+ continue;
+ }
+ }
+ if (next & EXTS) {
+ if (_isExtensionSingleton(pSubtag, subtagLen)) {
+ if (pExtension != NULL) {
+ if (pExtValueSubtagEnd == NULL) {
+ /* the previous extension is incomplete */
+ uprv_free(pExtension);
+ break;
+ }
+
+ /* terminate the previous extension value */
+ *pExtValueSubtagEnd = 0;
+
+ /* insert the extension to the list */
+ if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
+ pLastGoodPosition = pExtValueSubtagEnd;
+ } else {
+ /* stop parsing here */
+ uprv_free(pExtension);
+ pExtension = NULL;
+ break;
+ }
+
+ if (isLDMLExtension && reqLDMLType) {
+ /* incomplete LDML extension key and type pair */
+ pExtension = NULL;
+ break;
+ }
+ }
+
+ isLDMLExtension = (*pSubtag == LDMLEXT);
+
+ /* create a new extension */
+ pExtension = uprv_malloc(sizeof(ExtensionListEntry));
+ if (pExtension == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ goto error;
+ }
+ *pSep = 0;
+ pExtension->key = pSubtag;
+ pExtension->value = NULL; /* will be set later */
+
+ /*
+ * reset the end location of extension value
+ * subtags for this extension
+ */
+ pExtValueSubtagEnd = NULL;
+
+ next = EXTV;
+ continue;
+ }
+ }
+ if (next & EXTV) {
+ if (_isExtensionSubtag(pSubtag, subtagLen)) {
+ if (isLDMLExtension) {
+ if (reqLDMLType) {
+ /* already saw an LDML key */
+ if (!_isLDMLType(pSubtag, subtagLen)) {
+ /* stop parsing here and let the valid LDML extension key/type
+ pairs processed by the code out of this while loop */
+ break;
+ }
+ pExtValueSubtagEnd = pSep;
+ reqLDMLType = FALSE;
+ next = EXTS | EXTV | PRIV;
+ } else {
+ /* LDML key */
+ if (!_isLDMLKey(pSubtag, subtagLen)) {
+ /* stop parsing here and let the valid LDML extension key/type
+ pairs processed by the code out of this while loop */
+ break;
+ }
+ reqLDMLType = TRUE;
+ next = EXTV;
+ }
+ } else {
+ /* Mark the end of this subtag */
+ pExtValueSubtagEnd = pSep;
+ next = EXTS | EXTV | PRIV;
+ }
+
+ if (pExtension->value == NULL) {
+ /* if the start postion of this extension's value is not yet,
+ this one is the first value subtag */
+ pExtension->value = pSubtag;
+ }
+ continue;
+ }
+ }
+ if (next & PRIV) {
+ if (*pSubtag == PRIVATEUSE) {
+ char *pPrivuseVal;
+
+ if (pExtension != NULL) {
+ /* Process the last extension */
+ if (pExtValueSubtagEnd == NULL) {
+ /* the previous extension is incomplete */
+ uprv_free(pExtension);
+ break;
+ } else {
+ /* terminate the previous extension value */
+ *pExtValueSubtagEnd = 0;
+
+ /* insert the extension to the list */
+ if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
+ pLastGoodPosition = pExtValueSubtagEnd;
+ pExtension = NULL;
+ pExtValueSubtagEnd = NULL;
+ } else {
+ /* stop parsing here */
+ uprv_free(pExtension);
+ pExtension = NULL;
+ pExtValueSubtagEnd = NULL;
+ break;
+ }
+ }
+ }
+
+ /* The rest of part will be private use value subtags */
+ if (pNext == NULL) {
+ /* empty private use subtag */
+ break;
+ }
+ /* back up the private use value start position */
+ pPrivuseVal = pNext;
+
+ /* validate private use value subtags */
+ while (pNext) {
+ pSubtag = pNext;
+ pSep = pSubtag;
+ while (*pSep) {
+ if (*pSep == SEP) {
+ break;
+ }
+ pSep++;
+ }
+ if (*pSep == 0) {
+ /* last subtag */
+ pNext = NULL;
+ } else {
+ pNext = pSep + 1;
+ }
+ subtagLen = pSep - pSubtag;
+
+ if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
+ pLastGoodPosition = pSep;
+ } else {
+ break;
+ }
+ }
+ if (pLastGoodPosition - pPrivuseVal > 0) {
+ *pLastGoodPosition = 0;
+ t->privateuse = pPrivuseVal;
+ }
+ /* No more subtags, exiting the parse loop */
+ break;
+ }
+ break;
+ }
+ /* If we fell through here, it means this subtag is illegal - quit parsing */
+ break;
+ }
+
+ if (pExtension != NULL) {
+ /* Process the last extension */
+ if (pExtValueSubtagEnd == NULL) {
+ /* the previous extension is incomplete */
+ uprv_free(pExtension);
+ } else {
+ /* terminate the previous extension value */
+ *pExtValueSubtagEnd = 0;
+ /* insert the extension to the list */
+ if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
+ pLastGoodPosition = pExtValueSubtagEnd;
+ } else {
+ uprv_free(pExtension);
+ }
+ }
+ }
+
+ if (parsedLen != NULL) {
+ *parsedLen = pLastGoodPosition - t->buf;
+ }
+
+ return t;
+
+error:
+ uprv_free(t);
+ return NULL;
+}
+
+U_CFUNC void
+ultag_close(ULanguageTag* langtag) {
+
+ if (langtag == NULL) {
+ return;
+ }
+
+ uprv_free(langtag->buf);
+
+ if (langtag->variants) {
+ VariantListEntry *curVar = langtag->variants;
+ while (curVar) {
+ VariantListEntry *nextVar = curVar->next;
+ uprv_free(curVar);
+ curVar = nextVar;
+ }
+ }
+
+ if (langtag->extensions) {
+ ExtensionListEntry *curExt = langtag->extensions;
+ while (curExt) {
+ ExtensionListEntry *nextExt = curExt->next;
+ uprv_free(curExt);
+ curExt = nextExt;
+ }
+ }
+
+ uprv_free(langtag);
+}
+
+U_CFUNC const char*
+ultag_getLanguage(const ULanguageTag* langtag) {
+ return langtag->language;
+}
+
+U_CFUNC const char*
+ultag_getJDKLanguage(const ULanguageTag* langtag) {
+ int32_t i;
+ for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
+ if (uprv_strcmp(DEPRECATEDLANGS[i], langtag->language) == 0) {
+ return DEPRECATEDLANGS[i + 1];
+ }
+ }
+ return langtag->language;
+}
+
+U_CFUNC const char*
+ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
+ if (idx >= 0 && idx < MAXEXTLANG) {
+ return langtag->extlang[idx];
+ }
+ return NULL;
+}
+
+U_CFUNC int32_t
+ultag_getExtlangSize(const ULanguageTag* langtag) {
+ int32_t size = 0;
+ int32_t i;
+ for (i = 0; i < MAXEXTLANG; i++) {
+ if (langtag->extlang[i]) {
+ size++;
+ }
+ }
+ return size;
+}
+
+U_CFUNC const char*
+ultag_getScript(const ULanguageTag* langtag) {
+ return langtag->script;
+}
+
+U_CFUNC const char*
+ultag_getRegion(const ULanguageTag* langtag) {
+ return langtag->region;
+}
+
+U_CFUNC const char*
+ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
+ const char *var = NULL;
+ VariantListEntry *cur = langtag->variants;
+ int32_t i = 0;
+ while (cur) {
+ if (i == idx) {
+ var = cur->variant;
+ break;
+ }
+ cur = cur->next;
+ i++;
+ }
+ return var;
+}
+
+U_CFUNC int32_t
+ultag_getVariantsSize(const ULanguageTag* langtag) {
+ int32_t size = 0;
+ VariantListEntry *cur = langtag->variants;
+ while (TRUE) {
+ if (cur == NULL) {
+ break;
+ }
+ size++;
+ cur = cur->next;
+ }
+ return size;
+}
+
+U_CFUNC const char*
+ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
+ const char *key = NULL;
+ ExtensionListEntry *cur = langtag->extensions;
+ int32_t i = 0;
+ while (cur) {
+ if (i == idx) {
+ key = cur->key;
+ break;
+ }
+ cur = cur->next;
+ i++;
+ }
+ return key;
+}
+
+U_CFUNC const char*
+ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
+ const char *val = NULL;
+ ExtensionListEntry *cur = langtag->extensions;
+ int32_t i = 0;
+ while (cur) {
+ if (i == idx) {
+ val = cur->value;
+ break;
+ }
+ cur = cur->next;
+ i++;
+ }
+ return val;
+}
+
+U_CFUNC int32_t
+ultag_getExtensionsSize(const ULanguageTag* langtag) {
+ int32_t size = 0;
+ ExtensionListEntry *cur = langtag->extensions;
+ while (TRUE) {
+ if (cur == NULL) {
+ break;
+ }
+ size++;
+ cur = cur->next;
+ }
+ return size;
+}
+
+U_CFUNC const char*
+ultag_getPrivateUse(const ULanguageTag* langtag) {
+ return langtag->privateuse;
+}
+
+U_CFUNC const char*
+ultag_getGrandfathered(const ULanguageTag* langtag) {
+ return langtag->grandfathered;
+}
+
+U_CFUNC int32_t
+ultag_localeToLanguageTag(const char* localeID,
+ char* langtag,
+ int32_t langtagCapacity,
+ UBool strict,
+ UErrorCode* status) {
+ /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
+ char canonical[256];
+ int32_t reslen = 0;
+ UErrorCode tmpStatus = U_ZERO_ERROR;
+
+ /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
+ canonical[0] = 0;
+ if (uprv_strlen(localeID) > 0) {
+ uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
+ if (tmpStatus != U_ZERO_ERROR) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ }
+
+ reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
+ reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
+ reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
+ reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
+ reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
+
+ return reslen;
+}
+
+U_CFUNC int32_t
+ultag_languageTagToLocale(const char* langtag,
+ char* localeID,
+ int32_t localeIDCapacity,
+ int32_t* parsedLength,
+ UErrorCode* status) {
+ ULanguageTag *lt;
+ int32_t reslen = 0;
+ const char *subtag, *p;
+ int32_t len;
+ int32_t i, n;
+ UBool noRegion = TRUE;
+
+ lt = ultag_parse(langtag, -1, parsedLength, status);
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ /* language */
+ subtag = ultag_getLanguage(lt);
+ if (uprv_strcmp(subtag, LANG_UND) != 0) {
+ len = uprv_strlen(subtag);
+ if (len > 0) {
+ if (reslen < localeIDCapacity) {
+ uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
+ }
+ reslen += len;
+ }
+ }
+
+ /* script */
+ subtag = ultag_getScript(lt);
+ len = uprv_strlen(subtag);
+ if (len > 0) {
+ if (reslen < localeIDCapacity) {
+ *(localeID + reslen) = LOCALE_SEP;
+ }
+ reslen++;
+
+ /* write out the script in title case */
+ p = subtag;
+ while (*p) {
+ if (reslen < localeIDCapacity) {
+ if (p == subtag) {
+ *(localeID + reslen) = uprv_toupper(*p);
+ } else {
+ *(localeID + reslen) = *p;
+ }
+ }
+ reslen++;
+ p++;
+ }
+ }
+
+ /* region */
+ subtag = ultag_getRegion(lt);
+ len = uprv_strlen(subtag);
+ if (len > 0) {
+ if (reslen < localeIDCapacity) {
+ *(localeID + reslen) = LOCALE_SEP;
+ }
+ reslen++;
+ /* write out the retion in upper case */
+ p = subtag;
+ while (*p) {
+ if (reslen < localeIDCapacity) {
+ *(localeID + reslen) = uprv_toupper(*p);
+ }
+ reslen++;
+ p++;
+ }
+ noRegion = FALSE;
+ }
+
+ /* variants */
+ n = ultag_getVariantsSize(lt);
+ if (n > 0) {
+ if (noRegion) {
+ if (reslen < localeIDCapacity) {
+ *(localeID + reslen) = LOCALE_SEP;
+ }
+ reslen++;
+ }
+
+ for (i = 0; i < n; i++) {
+ subtag = ultag_getVariant(lt, i);
+ if (reslen < localeIDCapacity) {
+ *(localeID + reslen) = LOCALE_SEP;
+ }
+ reslen++;
+ /* write out the variant in upper case */
+ p = subtag;
+ while (*p) {
+ if (reslen < localeIDCapacity) {
+ *(localeID + reslen) = uprv_toupper(*p);
+ }
+ reslen++;
+ p++;
+ }
+ }
+ }
+
+ /* keywords */
+ n = ultag_getExtensionsSize(lt);
+ subtag = ultag_getPrivateUse(lt);
+ if (n > 0 || uprv_strlen(subtag) > 0) {
+ if (reslen == 0) {
+ /* need a language */
+ if (reslen < localeIDCapacity) {
+ uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
+ }
+ reslen += LANG_UND_LEN;
+ }
+ len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
+ reslen += len;
+ }
+
+ ultag_close(lt);
+ return u_terminateChars(localeID, localeIDCapacity, reslen, status);
+}
+
+
diff --git a/icu4c/source/common/ultag.h b/icu4c/source/common/ultag.h
new file mode 100644
index 00000000000..eacaca2eade
--- /dev/null
+++ b/icu4c/source/common/ultag.h
@@ -0,0 +1,74 @@
+/*
+**********************************************************************
+* Copyright (C) 2009, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+#ifndef ULTAG_H
+#define ULTAG_H
+
+#include "unicode/utypes.h"
+
+typedef struct ULanguageTag ULanguageTag;
+
+U_CFUNC ULanguageTag*
+ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
+
+U_CFUNC void
+ultag_close(ULanguageTag* langtag);
+
+U_CFUNC const char*
+ultag_getLanguage(const ULanguageTag* langtag);
+
+U_CFUNC const char*
+ultag_getJDKLanguage(const ULanguageTag* langtag);
+
+U_CFUNC const char*
+ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
+
+U_CFUNC int32_t
+ultag_getExtlangSize(const ULanguageTag* langtag);
+
+U_CFUNC const char*
+ultag_getScript(const ULanguageTag* langtag);
+
+U_CFUNC const char*
+ultag_getRegion(const ULanguageTag* langtag);
+
+U_CFUNC const char*
+ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
+
+U_CFUNC int32_t
+ultag_getVariantsSize(const ULanguageTag* langtag);
+
+U_CFUNC const char*
+ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
+
+U_CFUNC const char*
+ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
+
+U_CFUNC int32_t
+ultag_getExtensionsSize(const ULanguageTag* langtag);
+
+U_CFUNC const char*
+ultag_getPrivateUse(const ULanguageTag* langtag);
+
+U_CFUNC const char*
+ultag_getGrandfathered(const ULanguageTag* langtag);
+
+U_CFUNC int32_t
+ultag_languageTagToLocale(const char* langtag,
+ char* localeID,
+ int32_t localeIDCapacity,
+ int32_t* parsedLength,
+ UErrorCode* status);
+
+U_CFUNC int32_t
+ultag_localeToLanguageTag(const char* localeID,
+ char* langtag,
+ int32_t langtagCapacity,
+ UBool strict,
+ UErrorCode* status);
+
+
+#endif /* ULTAG_H */
diff --git a/icu4c/source/test/cintltst/cloctst.c b/icu4c/source/test/cintltst/cloctst.c
index 23313d2f5cc..96b6ed3d25a 100644
--- a/icu4c/source/test/cintltst/cloctst.c
+++ b/icu4c/source/test/cintltst/cloctst.c
@@ -233,6 +233,8 @@ void addLocaleTest(TestNode** root)
TESTCASE(TestGetLocaleForLCID);
TESTCASE(TestOrientation);
TESTCASE(TestLikelySubtags);
+ TESTCASE(TestToLanguageTag);
+ TESTCASE(TestForLanguageTag);
}
@@ -5343,3 +5345,158 @@ static void TestLikelySubtags()
}
}
}
+
+const char* const locale_to_langtag[][3] = {
+ {"", "und", "und"},
+ {"en", "en", "en"},
+ {"en_US", "en-us", "en-us"},
+ {"iw_IL", "he-il", "he-il"},
+ {"sr_Latn_SR", "sr-latn-sr", "sr-latn-sr"},
+ {"en__POSIX", "en-posix", "en-posix"},
+ {"en_POSIX", "en", NULL},
+ {"und_555", "und-555", "und-555"},
+ {"123", "und", NULL},
+ {"%$#&", "und", NULL},
+ {"_Latn", "und-latn", "und-latn"},
+ {"_DE", "und-de", "und-de"},
+ {"und_FR", "und-fr", "und-fr"},
+ {"th_TH_TH", "th-th", NULL},
+ {"bogus", "bogus", "bogus"},
+ {"foooobarrr", "und", NULL},
+ {"az_AZ_CYRL", "az-cyrl-az", "az-cyrl-az"},
+ {"aa_BB_CYRL", "aa-bb", NULL},
+ {"en_US_1234", "en-us-1234", "en-us-1234"},
+ {"en_US_VARIANTA_VARIANTB", "en-us-varianta-variantb", "en-us-varianta-variantb"},
+ {"en_US_VARIANTB_VARIANTA", "en-us-varianta-variantb", "en-us-varianta-variantb"},
+ {"ja__9876_5432", "ja-5432-9876", "ja-5432-9876"},
+ {"zh_Hant__VAR", "zh-hant", NULL},
+ {"es__BADVARIANT_GOODVAR", "es-goodvar", NULL},
+ {"en@calendar=gregorian", "en-u-ca-gregory", "en-u-ca-gregory"},
+ {"de@collation=phonebook;calendar=gregorian", "de-u-ca-gregory-co-phonebk", "de-u-ca-gregory-co-phonebk"},
+ {"th@numbers=thai;z=extz;x=priv-use;a=exta", "th-a-exta-u-nu-thai-z-extz-x-priv-use", "th-a-exta-u-nu-thai-z-extz-x-priv-use"},
+ {"en@timezone=America/New_York;calendar=japanese", "en-u-ca-japanese-tz-usnyc", "en-u-ca-japanese-tz-usnyc"},
+ {"en@x=x-y-z;a=a-b-c", "en-x-x-y-z", NULL},
+ {"it@collation=badcollationtype;colStrength=identical;cu=usd-eur", "it-u-ks-identic", NULL},
+ {NULL, NULL, NULL}
+};
+
+static void TestToLanguageTag(void) {
+ char langtag[256];
+ int32_t i;
+ UErrorCode status;
+ int32_t len;
+ const char *inloc;
+ const char *expected;
+
+ for (i = 0; locale_to_langtag[i][0] != NULL; i++) {
+ inloc = locale_to_langtag[i][0];
+
+ /* testing non-strict mode */
+ status = U_ZERO_ERROR;
+ langtag[0] = 0;
+ expected = locale_to_langtag[i][1];
+
+ len = uloc_toLanguageTag(inloc, langtag, sizeof(langtag), FALSE, &status);
+ if (U_FAILURE(status)) {
+ if (expected != NULL) {
+ log_err("Error returned by uloc_toLanguageTag for locale id [%s] - error: %s\n",
+ inloc, u_errorName(status));
+ }
+ } else {
+ if (expected == NULL) {
+ log_err("Error should be returned by uloc_toLanguageTag for locale id [%s], but [%s] is returned without errors\n",
+ inloc, langtag);
+ } else if (uprv_strcmp(langtag, expected) != 0) {
+ log_err("uloc_toLanguageTag returned language tag [%s] for input locale [%s] - expected: [%s]\n",
+ langtag, inloc, expected);
+ }
+ }
+
+ /* testing strict mode */
+ status = U_ZERO_ERROR;
+ langtag[0] = 0;
+ expected = locale_to_langtag[i][2];
+
+ len = uloc_toLanguageTag(inloc, langtag, sizeof(langtag), TRUE, &status);
+ if (U_FAILURE(status)) {
+ if (expected != NULL) {
+ log_err("Error returned by uloc_toLanguageTag {strict} for locale id [%s] - error: %s\n",
+ inloc, u_errorName(status));
+ }
+ } else {
+ if (expected == NULL) {
+ log_err("Error should be returned by uloc_toLanguageTag {strict} for locale id [%s], but [%s] is returned without errors\n",
+ inloc, langtag);
+ } else if (uprv_strcmp(langtag, expected) != 0) {
+ log_err("uloc_toLanguageTag {strict} returned language tag [%s] for input locale [%s] - expected: [%s]\n",
+ langtag, inloc, expected);
+ }
+ }
+ }
+}
+
+static const struct {
+ const char *bcpID;
+ const char *locID;
+ int32_t len;
+} langtag_to_locale[] = {
+ {"en", "en", 2},
+ {"en-us", "en_US", 5},
+ {"und-us", "_US", 6},
+ {"und-latn", "_Latn", 8},
+ {"en-us-posix", "en_US_POSIX", 11},
+ {"de-de_euro", "de", 2},
+ {"kok-in", "kok_IN", 6},
+ {"123", "", 0},
+ {"en_us", "", 0},
+ {"en-latn-x", "en_Latn", 7},
+ {"art-lojban", "jbo", 10},
+ {"zh-hakka", "hak", 8},
+ {"xxx-yy", "xxx_YY", 6},
+ {"fr-234", "fr_234", 6},
+ {"i-default", "", 9},
+ {"i-test", "", 0},
+ {"ja-jp-jp", "ja_JP", 5},
+ {"bogus", "bogus", 5},
+ {"boguslang", "", 0},
+ {"EN-lATN-us", "en_Latn_US", 10},
+ {"und-variant-1234", "__1234_VARIANT", 16},
+ {"und-varzero-var1-vartwo", "__VARZERO", 11},
+ {"en-u-ca-gregory", "en@calendar=gregorian", 15},
+ {"en-U-cu-USD", "en@currency=usd", 11},
+ {"ar-x-1-2-3", "ar@x=1-2-3", 10},
+ {"fr-u-nu-latn-cu-eur", "fr@currency=eur;numbers=latn", 19},
+ {"de-k-kext-u-co-phonebk-nu-latn", "de@collation=phonebook;k=kext;numbers=latn", 30},
+ {"ja-u-cu-jpy-ca-jp", "ja@currency=jpy", 11},
+ {"en-us-u-tz-usnyc", "en_US@timezone=america/new_york", 16},
+ {"und-a-abc-def", "und@a=abc-def", 13},
+ {"zh-u-ca-chinese-x-u-ca-chinese", "zh@calendar=chinese;x=u-ca-chinese", 30},
+ {NULL, NULL, 0}
+};
+
+static void TestForLanguageTag(void) {
+ char locale[256];
+ int32_t i;
+ UErrorCode status;
+ int32_t parsedLen;
+
+ for (i = 0; langtag_to_locale[i].bcpID != NULL; i++) {
+ status = U_ZERO_ERROR;
+ locale[0] = 0;
+ uloc_forLanguageTag(langtag_to_locale[i].bcpID, locale, sizeof(locale), &parsedLen, &status);
+ if (U_FAILURE(status)) {
+ log_err("Error returned by uloc_forLanguageTag for language tag [%s] - error: %s\n",
+ langtag_to_locale[i].bcpID, u_errorName(status));
+ } else {
+ if (uprv_strcmp(langtag_to_locale[i].locID, locale) != 0) {
+ log_err("uloc_forLanguageTag returned locale [%s] for input language tag [%s] - expected: [%s]\n",
+ locale, langtag_to_locale[i].bcpID, langtag_to_locale[i].locID);
+ }
+ if (parsedLen != langtag_to_locale[i].len) {
+ log_err("uloc_forLanguageTag parsed length of %d for input language tag [%s] - expected parsed length: %d\n",
+ parsedLen, langtag_to_locale[i].bcpID, langtag_to_locale[i].len);
+ }
+ }
+ }
+}
+
diff --git a/icu4c/source/test/cintltst/cloctst.h b/icu4c/source/test/cintltst/cloctst.h
index 52d6c45b32d..8c5e73c5386 100644
--- a/icu4c/source/test/cintltst/cloctst.h
+++ b/icu4c/source/test/cintltst/cloctst.h
@@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
- * Copyright (c) 1997-2008, International Business Machines Corporation and
+ * Copyright (c) 1997-2009, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/********************************************************************************
@@ -115,4 +115,9 @@ static void TestOrientation(void);
static void TestLikelySubtags(void);
+/**
+ * lanuage tag
+ */
+static void TestForLanguageTag(void);
+static void TestToLanguageTag(void);
#endif