mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 23:10:40 +00:00
ICU-3748 implement canonicalize and getName for 3.0
X-SVN-Rev: 15320
This commit is contained in:
parent
eb4237ec2d
commit
b2d2fcd9c3
3 changed files with 541 additions and 245 deletions
|
@ -16,6 +16,8 @@
|
|||
* 04/06/99 stephen changed setDefault() to realloc and copy
|
||||
* 06/14/99 stephen Changed calls to ures_open for new params
|
||||
* 07/21/99 stephen Modified setDefault() to propagate to C++
|
||||
* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
|
||||
* brought canonicalization code into line with spec
|
||||
*****************************************************************************/
|
||||
|
||||
/*
|
||||
|
@ -26,7 +28,6 @@
|
|||
l = lang, C = ctry, M = charmap, V = variant
|
||||
*/
|
||||
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/uloc.h"
|
||||
|
@ -41,11 +42,9 @@
|
|||
#include "locmap.h"
|
||||
#include "uarrsort.h"
|
||||
#include "uenumimp.h"
|
||||
#include "uassert.h"
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
Global variable and type definitions
|
||||
*****************************************************************************/
|
||||
/* ### Declarations **************************************************/
|
||||
|
||||
/* Locale stuff from locid.cpp */
|
||||
U_CFUNC void locale_set_default(const char *id);
|
||||
|
@ -61,6 +60,17 @@ locale_getKeywords(const char *localeID,
|
|||
UErrorCode *status);
|
||||
static const char *
|
||||
locale_getKeywordsStart(const char *localeID);
|
||||
static int32_t
|
||||
_getKeywords(const char *localeID,
|
||||
char prev,
|
||||
char *keywords, int32_t keywordCapacity,
|
||||
char *values, int32_t valuesCapacity, int32_t *valLen,
|
||||
UBool valuesToo,
|
||||
const char* addKeyword,
|
||||
const char* addValue,
|
||||
UErrorCode *status);
|
||||
|
||||
/* ### Constants **************************************************/
|
||||
|
||||
/* These strings describe the resources we attempt to load from
|
||||
the locale ResourceBundle data file.*/
|
||||
|
@ -452,68 +462,69 @@ NULL
|
|||
};
|
||||
|
||||
typedef struct CanonicalizationMap {
|
||||
const char *id;
|
||||
const char *canonicalID;
|
||||
const char *id; /* input ID */
|
||||
const char *canonicalID; /* canonicalized output ID */
|
||||
const char *keyword; /* keyword, or NULL if none */
|
||||
const char *value; /* keyword value, or NULL if kw==NULL */
|
||||
} CanonicalizationMap;
|
||||
|
||||
/**
|
||||
* A map to canonicalize locale IDs. This handles a variety of
|
||||
* different semantic kinds of transformations. The left column is
|
||||
* the ID after processing by uloc_getName. The right column is its
|
||||
* canonicalized form.
|
||||
* different semantic kinds of transformations.
|
||||
*/
|
||||
static const CanonicalizationMap CANONICALIZATION_MAP[] = {
|
||||
{ "", "en_US_POSIX" }, /* .NET name */
|
||||
{ "C", "en_US_POSIX" }, /* POSIX name */
|
||||
{ "art_LOJBAN", "jbo" }, /* registered name */
|
||||
{ "az_AZ_CYRL", "az_Cyrl_AZ" }, /* .NET name */
|
||||
{ "az_AZ_LATN", "az_Latn_AZ" }, /* .NET name */
|
||||
{ "ca_ES_PREEURO", "ca_ES@currency=ESP" },
|
||||
{ "cel_GAULISH", "cel__GAULISH" }, /* registered name */
|
||||
{ "de_1901", "de__1901" }, /* registered name */
|
||||
{ "de_1906", "de__1906" }, /* registered name */
|
||||
{ "de__PHONEBOOK", "de@collation=phonebook" },
|
||||
{ "de_AT_PREEURO", "de_AT@currency=ATS" },
|
||||
{ "de_DE_PREEURO", "de_DE@currency=DEM" },
|
||||
{ "de_LU_PREEURO", "de_LU@currency=EUR" },
|
||||
{ "el_GR_PREEURO", "el_GR@currency=GRD" },
|
||||
{ "en_BOONT", "en__BOONT" }, /* registered name */
|
||||
{ "en_SCOUSE", "en__SCOUSE" }, /* registered name */
|
||||
{ "en_BE_PREEURO", "en_BE@currency=BEF" },
|
||||
{ "en_IE_PREEURO", "en_IE@currency=IEP" },
|
||||
{ "es__TRADITIONAL", "es@collation=traditional" },
|
||||
{ "es_ES_PREEURO", "es_ES@currency=ESP" },
|
||||
{ "eu_ES_PREEURO", "eu_ES@currency=ESP" },
|
||||
{ "fi_FI_PREEURO", "fi_FI@currency=FIM" },
|
||||
{ "fr_BE_PREEURO", "fr_BE@currency=BEF" },
|
||||
{ "fr_FR_PREEURO", "fr_FR@currency=FRF" },
|
||||
{ "fr_LU_PREEURO", "fr_LU@currency=LUF" },
|
||||
{ "ga_IE_PREEURO", "ga_IE@currency=IEP" },
|
||||
{ "gl_ES_PREEURO", "gl_ES@currency=ESP" },
|
||||
{ "hi__DIRECT", "hi@collation=direct" },
|
||||
{ "it_IT_PREEURO", "it_IT@currency=ITL" },
|
||||
{ "ja_JP_TRADITIONAL", "ja_JP@calendar=japanese" },
|
||||
{ "nl_BE_PREEURO", "nl_BE@currency=BEF" },
|
||||
{ "nl_NL_PREEURO", "nl_NL@currency=NLG" },
|
||||
{ "pt_PT_PREEURO", "pt_PT@currency=PTE" },
|
||||
{ "sl_ROZAJ", "sl__ROZAJ" }, /* registered name */
|
||||
{ "sr_SP_CYRL", "sr_Cyrl_SP" }, /* .NET name */
|
||||
{ "sr_SP_LATN", "sr_Latn_SP" }, /* .NET name */
|
||||
{ "uz_UZ_CYRL", "uz_Cyrl_UZ" }, /* .NET name */
|
||||
{ "uz_UZ_LATN", "uz_Latn_UZ" }, /* .NET name */
|
||||
{ "zh_CHS", "zh_Hans" }, /* .NET name */
|
||||
{ "zh_CHT", "zh_TW" }, /* .NET name TODO: This should really be zh_Hant once the locale structure is fixed. */
|
||||
{ "zh_GAN", "zh__GAN" }, /* registered name */
|
||||
{ "zh_GUOYU", "zh" }, /* registered name */
|
||||
{ "zh_HAKKA", "zh__HAKKA" }, /* registered name */
|
||||
{ "zh_MIN", "zh__MIN" }, /* registered name */
|
||||
{ "zh_MIN_NAN", "zh__MINNAN" }, /* registered name */
|
||||
{ "zh_WUU", "zh__WUU" }, /* registered name */
|
||||
{ "zh_XIANG", "zh__XIANG" }, /* registered name */
|
||||
{ "zh_YUE", "zh__YUE" }, /* registered name */
|
||||
{ "th_TH_TRADITIONAL", "th_TH@calendar=buddhist" },
|
||||
{ "zh_TW_STROKE", "zh_TW@collation=stroke" },
|
||||
{ "zh__PINYIN", "zh@collation=pinyin" }
|
||||
static const CanonicalizationMap CANONICALIZE_MAP[] = {
|
||||
{ "", "en_US_POSIX", NULL, NULL }, /* .NET name */
|
||||
{ "C", "en_US_POSIX", NULL, NULL }, /* POSIX name */
|
||||
{ "art_LOJBAN", "jbo", NULL, NULL }, /* registered name */
|
||||
{ "az_AZ_CYRL", "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
|
||||
{ "az_AZ_LATN", "az_Latn_AZ", NULL, NULL }, /* .NET name */
|
||||
{ "ca_ES_PREEURO", "ca_ES", "currency", "ESP" },
|
||||
{ "cel_GAULISH", "cel__GAULISH", NULL, NULL }, /* registered name */
|
||||
{ "de_1901", "de__1901", NULL, NULL }, /* registered name */
|
||||
{ "de_1906", "de__1906", NULL, NULL }, /* registered name */
|
||||
{ "de__PHONEBOOK", "de", "collation", "phonebook" },
|
||||
{ "de_AT_PREEURO", "de_AT", "currency", "ATS" },
|
||||
{ "de_DE_PREEURO", "de_DE", "currency", "DEM" },
|
||||
{ "de_LU_PREEURO", "de_LU", "currency", "EUR" },
|
||||
{ "el_GR_PREEURO", "el_GR", "currency", "GRD" },
|
||||
{ "en_BOONT", "en__BOONT", NULL, NULL }, /* registered name */
|
||||
{ "en_SCOUSE", "en__SCOUSE", NULL, NULL }, /* registered name */
|
||||
{ "en_BE_PREEURO", "en_BE", "currency", "BEF" },
|
||||
{ "en_IE_PREEURO", "en_IE", "currency", "IEP" },
|
||||
{ "es__TRADITIONAL", "es", "collation", "traditional" },
|
||||
{ "es_ES_PREEURO", "es_ES", "currency", "ESP" },
|
||||
{ "eu_ES_PREEURO", "eu_ES", "currency", "ESP" },
|
||||
{ "fi_FI_PREEURO", "fi_FI", "currency", "FIM" },
|
||||
{ "fr_BE_PREEURO", "fr_BE", "currency", "BEF" },
|
||||
{ "fr_FR_PREEURO", "fr_FR", "currency", "FRF" },
|
||||
{ "fr_LU_PREEURO", "fr_LU", "currency", "LUF" },
|
||||
{ "ga_IE_PREEURO", "ga_IE", "currency", "IEP" },
|
||||
{ "gl_ES_PREEURO", "gl_ES", "currency", "ESP" },
|
||||
{ "hi__DIRECT", "hi", "collation", "direct" },
|
||||
{ "it_IT_PREEURO", "it_IT", "currency", "ITL" },
|
||||
{ "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" },
|
||||
{ "nb_NO_NY", "nn_NO", NULL, NULL }, /* "markus said this was ok" :-) */
|
||||
{ "nl_BE_PREEURO", "nl_BE", "currency", "BEF" },
|
||||
{ "nl_NL_PREEURO", "nl_NL", "currency", "NLG" },
|
||||
{ "pt_PT_PREEURO", "pt_PT", "currency", "PTE" },
|
||||
{ "sl_ROZAJ", "sl__ROZAJ", NULL, NULL }, /* registered name */
|
||||
{ "sr_SP_CYRL", "sr_Cyrl_SP", NULL, NULL }, /* .NET name */
|
||||
{ "sr_SP_LATN", "sr_Latn_SP", NULL, NULL }, /* .NET name */
|
||||
{ "uz_UZ_CYRL", "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
|
||||
{ "uz_UZ_LATN", "uz_Latn_UZ", NULL, NULL }, /* .NET name */
|
||||
{ "zh_CHS", "zh_Hans", NULL, NULL }, /* .NET name */
|
||||
{ "zh_CHT", "zh_TW", NULL, NULL }, /* .NET name TODO: This should be zh_Hant once the locale structure is fixed. */
|
||||
{ "zh_GAN", "zh__GAN", NULL, NULL }, /* registered name */
|
||||
{ "zh_GUOYU", "zh", NULL, NULL }, /* registered name */
|
||||
{ "zh_HAKKA", "zh__HAKKA", NULL, NULL }, /* registered name */
|
||||
{ "zh_MIN", "zh__MIN", NULL, NULL }, /* registered name */
|
||||
{ "zh_MIN_NAN", "zh__MINNAN", NULL, NULL }, /* registered name */
|
||||
{ "zh_WUU", "zh__WUU", NULL, NULL }, /* registered name */
|
||||
{ "zh_XIANG", "zh__XIANG", NULL, NULL }, /* registered name */
|
||||
{ "zh_YUE", "zh__YUE", NULL, NULL }, /* registered name */
|
||||
{ "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" },
|
||||
{ "zh_TW_STROKE", "zh_TW", "collation", "stroke" },
|
||||
{ "zh__PINYIN", "zh", "collation", "pinyin" }
|
||||
};
|
||||
|
||||
/* ### ID parsing implementation **************************************************/
|
||||
|
@ -704,16 +715,28 @@ _getCountry(const char *localeID,
|
|||
return i;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param needSeparator if true, then add leading '_' if any variants
|
||||
* are added to 'variant'
|
||||
*/
|
||||
static int32_t
|
||||
_getVariant(const char *localeID,
|
||||
char prev,
|
||||
char *variant, int32_t variantCapacity) {
|
||||
_getVariantEx(const char *localeID,
|
||||
char prev,
|
||||
char *variant, int32_t variantCapacity,
|
||||
UBool needSeparator) {
|
||||
int32_t i=0;
|
||||
|
||||
/* get one or more variant tags and separate them with '_' */
|
||||
if(_isIDSeparator(prev)) {
|
||||
/* get a variant string after a '-' or '_' */
|
||||
while(!_isTerminator(*localeID)) {
|
||||
if (needSeparator) {
|
||||
if (i<variantCapacity) {
|
||||
variant[i] = '_';
|
||||
}
|
||||
++i;
|
||||
needSeparator = FALSE;
|
||||
}
|
||||
if(i<variantCapacity) {
|
||||
variant[i]=(char)uprv_toupper(*localeID);
|
||||
if(variant[i]=='-') {
|
||||
|
@ -735,6 +758,13 @@ _getVariant(const char *localeID,
|
|||
return 0;
|
||||
}
|
||||
while(!_isTerminator(*localeID)) {
|
||||
if (needSeparator) {
|
||||
if (i<variantCapacity) {
|
||||
variant[i] = '_';
|
||||
}
|
||||
++i;
|
||||
needSeparator = FALSE;
|
||||
}
|
||||
if(i<variantCapacity) {
|
||||
variant[i]=(char)uprv_toupper(*localeID);
|
||||
if(variant[i]=='-' || variant[i]==',') {
|
||||
|
@ -745,27 +775,79 @@ _getVariant(const char *localeID,
|
|||
localeID++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
static int32_t
|
||||
_getVariant(const char *localeID,
|
||||
char prev,
|
||||
char *variant, int32_t variantCapacity) {
|
||||
return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete ALL instances of a variant from the given list of one or
|
||||
* more variants. Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
|
||||
* @param variants the source string of one or more variants,
|
||||
* separated by '_'. This will be MODIFIED IN PLACE. Not zero
|
||||
* terminated; if it is, trailing zero will NOT be maintained.
|
||||
* @param variantsLen length of variants
|
||||
* @param toDelete variant to delete, without separators, e.g. "EURO"
|
||||
* or "PREEURO"; not zero terminated
|
||||
* @param toDeleteLen length of toDelete
|
||||
* @return number of characters deleted from variants
|
||||
*/
|
||||
static int32_t
|
||||
_deleteVariant(char* variants, int32_t variantsLen,
|
||||
const char* toDelete, int32_t toDeleteLen) {
|
||||
int32_t delta = 0; /* number of chars deleted */
|
||||
for (;;) {
|
||||
UBool flag = FALSE;
|
||||
if (variantsLen < toDeleteLen) {
|
||||
return delta;
|
||||
}
|
||||
if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
|
||||
(variantsLen == toDeleteLen ||
|
||||
(flag=(variants[toDeleteLen] == '_')))) {
|
||||
int32_t d = toDeleteLen + (flag?1:0);
|
||||
variantsLen -= d;
|
||||
delta += d;
|
||||
uprv_memmove(variants, variants+d, variantsLen);
|
||||
} else {
|
||||
char* p = uprv_strchr(variants, '_');
|
||||
if (p == NULL) {
|
||||
return delta;
|
||||
}
|
||||
++p;
|
||||
variantsLen -= p - variants;
|
||||
variants = p;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* bit-flags for 'options' parameter of _canonicalize */
|
||||
#define _ULOC_STRIP_KEYWORDS 0x8
|
||||
#define _ULOC_STRIP_KEYWORDS 0x2
|
||||
#define _ULOC_CANONICALIZE 0x1
|
||||
|
||||
#define OPTION_SET(options, mask) ((options & mask) != 0)
|
||||
|
||||
static int32_t
|
||||
_canonicalize(const char* localeID,
|
||||
char* _name,
|
||||
int32_t _nameCapacity,
|
||||
char* result,
|
||||
int32_t resultCapacity,
|
||||
uint32_t options,
|
||||
UErrorCode* err) {
|
||||
int32_t i, fieldCount, scriptSize;
|
||||
UBool alreadyAddedAKeyword = FALSE;
|
||||
int32_t i, j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
|
||||
char localeBuffer[ULOC_FULLNAME_CAPACITY];
|
||||
int32_t idx, len;
|
||||
char* name;
|
||||
const char* original = localeID;
|
||||
int32_t nameCapacity;
|
||||
const char* keywordAssign = NULL;
|
||||
const char* separatorIndicator = NULL;
|
||||
const char* addKeyword = NULL;
|
||||
const char* addValue = NULL;
|
||||
char* name;
|
||||
char* variant = NULL; /* pointer into name, or NULL */
|
||||
int32_t sawEuro = 0;
|
||||
|
||||
if (U_FAILURE(*err)) {
|
||||
return 0;
|
||||
|
@ -775,19 +857,17 @@ _canonicalize(const char* localeID,
|
|||
localeID=uloc_getDefault();
|
||||
}
|
||||
|
||||
/* if we are doing a full canonicalization, then put results in localeBuffer;
|
||||
otherwise send them to _name. */
|
||||
if ((options & _ULOC_CANONICALIZE) != 0) {
|
||||
/* if we are doing a full canonicalization, then put results in
|
||||
localeBuffer; otherwise send them to result. */
|
||||
if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
|
||||
name = localeBuffer;
|
||||
nameCapacity = sizeof(localeBuffer);
|
||||
} else {
|
||||
name = _name;
|
||||
nameCapacity = _nameCapacity;
|
||||
name = result;
|
||||
nameCapacity = resultCapacity;
|
||||
}
|
||||
|
||||
/* get all pieces, one after another, and separate with '_' */
|
||||
fieldCount=0;
|
||||
scriptSize=0;
|
||||
i=_getLanguage(localeID, name, nameCapacity, &localeID);
|
||||
if(_isIDSeparator(*localeID)) {
|
||||
const char *scriptID;
|
||||
|
@ -821,28 +901,65 @@ _canonicalize(const char* localeID,
|
|||
name[i]='_';
|
||||
}
|
||||
++i;
|
||||
i+=_getVariant(localeID+1, *localeID, name+i, nameCapacity-i);
|
||||
variantSize = _getVariant(localeID+1, *localeID, name+i, nameCapacity-i);
|
||||
if (variantSize > 0) {
|
||||
variant = name+i;
|
||||
i += variantSize;
|
||||
localeID += variantSize + 1; /* skip '_' and variant */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((options & _ULOC_STRIP_KEYWORDS) == 0) {
|
||||
/* if we do not have a variant tag yet then try a POSIX variant after '@' */
|
||||
if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
|
||||
const char *keywordIndicator = uprv_strchr(localeID, ULOC_KEYWORD_ASSIGN);
|
||||
const char *separatorIndicator = uprv_strchr(localeID, ULOC_KEYWORD_ITEM_SEPARATOR);
|
||||
if(keywordIndicator && (!separatorIndicator || separatorIndicator > keywordIndicator)) {
|
||||
if(i<nameCapacity) {
|
||||
if(alreadyAddedAKeyword) {
|
||||
name[i]=';';
|
||||
} else {
|
||||
name[i]='@';
|
||||
}
|
||||
/* Copy POSIX-style charset specifier, if any [mr.utf8] */
|
||||
if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *localeID == '.') {
|
||||
UBool done = FALSE;
|
||||
do {
|
||||
char c = *localeID;
|
||||
switch (c) {
|
||||
case 0:
|
||||
case '@':
|
||||
done = TRUE;
|
||||
break;
|
||||
default:
|
||||
if (i<nameCapacity) {
|
||||
name[i] = c;
|
||||
}
|
||||
++i;
|
||||
++fieldCount;
|
||||
i += locale_getKeywords(localeID+1, '@', name+i, nameCapacity-i, NULL, 0, NULL, TRUE, err);
|
||||
} else if(fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
|
||||
++localeID;
|
||||
break;
|
||||
}
|
||||
} while (!done);
|
||||
}
|
||||
|
||||
/* Scan ahead to next '@' and determine if it is followed by '=' and/or ';' */
|
||||
if ((localeID=locale_getKeywordsStart(localeID))!=NULL) {
|
||||
keywordAssign = uprv_strchr(localeID, '=');
|
||||
separatorIndicator = uprv_strchr(localeID, ';');
|
||||
}
|
||||
|
||||
/* Copy POSIX-style variant, if any [mr@FOO] */
|
||||
if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
|
||||
localeID != NULL && keywordAssign == NULL) {
|
||||
for (;;) {
|
||||
char c = *localeID;
|
||||
if (c == 0) {
|
||||
break;
|
||||
}
|
||||
if (i<nameCapacity) {
|
||||
name[i] = c;
|
||||
}
|
||||
++i;
|
||||
++localeID;
|
||||
}
|
||||
}
|
||||
|
||||
if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
|
||||
/* Handle @FOO variant if @ is present and not followed by = */
|
||||
if (localeID!=NULL && keywordAssign==NULL) {
|
||||
int32_t posixVariantSize;
|
||||
/* Add missing '_' if needed */
|
||||
if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
|
||||
do {
|
||||
if(i<nameCapacity) {
|
||||
name[i]='_';
|
||||
|
@ -850,43 +967,79 @@ _canonicalize(const char* localeID,
|
|||
++i;
|
||||
++fieldCount;
|
||||
} while(fieldCount<2);
|
||||
i+=_getVariant(localeID+1, '@', name+i, nameCapacity-i);
|
||||
}
|
||||
posixVariantSize = _getVariantEx(localeID+1, '@', name+i, nameCapacity-i,
|
||||
variantSize > 0);
|
||||
if (posixVariantSize > 0) {
|
||||
if (variant == NULL) {
|
||||
variant = name+i;
|
||||
}
|
||||
i += posixVariantSize;
|
||||
variantSize += posixVariantSize;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check for EURO variants. */
|
||||
sawEuro = _deleteVariant(variant, variantSize, "EURO", 4);
|
||||
i -= sawEuro;
|
||||
if (sawEuro > 0 && name[i-1] == '_') { /* delete trailing '_' */
|
||||
--i;
|
||||
}
|
||||
|
||||
/* Look up the ID in the canonicalization map */
|
||||
for (j=0; j<(int32_t)(sizeof(CANONICALIZE_MAP)/sizeof(CANONICALIZE_MAP[0])); j++) {
|
||||
const char* id = CANONICALIZE_MAP[j].id;
|
||||
int32_t n = uprv_strlen(id);
|
||||
if (i == n && uprv_strncmp(name, id, n) == 0) {
|
||||
i = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
|
||||
addKeyword = CANONICALIZE_MAP[j].keyword;
|
||||
addValue = CANONICALIZE_MAP[j].value;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Explicit EURO variant overrides keyword in CANONICALIZE_MAP */
|
||||
if (sawEuro > 0) {
|
||||
addKeyword = "currency";
|
||||
addValue = "EUR";
|
||||
}
|
||||
}
|
||||
|
||||
if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
|
||||
if (localeID!=NULL && keywordAssign!=NULL &&
|
||||
(!separatorIndicator || separatorIndicator > keywordAssign)) {
|
||||
if(i<nameCapacity) {
|
||||
name[i]='@';
|
||||
}
|
||||
++i;
|
||||
++fieldCount;
|
||||
i += _getKeywords(localeID+1, '@', name+i, nameCapacity-i, NULL, 0, NULL, TRUE,
|
||||
addKeyword, addValue, err);
|
||||
} else if (addKeyword != NULL) {
|
||||
U_ASSERT(addValue != NULL);
|
||||
/* inelegant but works -- later make _getKeywords do this? */
|
||||
i += _copyCount(name+i, nameCapacity-i, "@");
|
||||
i += _copyCount(name+i, nameCapacity-i, addKeyword);
|
||||
i += _copyCount(name+i, nameCapacity-i, "=");
|
||||
i += _copyCount(name+i, nameCapacity-i, addValue);
|
||||
}
|
||||
}
|
||||
len = u_terminateChars(name, nameCapacity, i, err);
|
||||
|
||||
if ((options & _ULOC_CANONICALIZE) != 0) {
|
||||
if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
|
||||
if (U_SUCCESS(*err) && *err != U_STRING_NOT_TERMINATED_WARNING) {
|
||||
char *euroVariant;
|
||||
/* See if this is an already known locale */
|
||||
for (idx = 0; idx < (int32_t)(sizeof(CANONICALIZATION_MAP)/sizeof(CANONICALIZATION_MAP[0])); idx++) {
|
||||
if (uprv_strncmp(localeBuffer, CANONICALIZATION_MAP[idx].id, len) == 0) {
|
||||
int32_t nameLen = uprv_strlen(CANONICALIZATION_MAP[idx].canonicalID);
|
||||
uprv_strncpy(localeBuffer, CANONICALIZATION_MAP[idx].canonicalID, nameLen);
|
||||
u_terminateChars(localeBuffer, sizeof(localeBuffer), nameLen, err);
|
||||
len = nameLen;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* convert the POSIX euro variant */
|
||||
euroVariant = (char *)uprv_strstr(localeBuffer, "_EURO");
|
||||
if (euroVariant && uprv_strlen(euroVariant) == 5) {
|
||||
int32_t euroKeyLen = 13; /* strlen("@currency=EUR")13 */
|
||||
int32_t euroDiff = 8; /* strlen("@currency=EUR")13 - strlen("_EURO")5 */
|
||||
len += euroDiff;
|
||||
if (euroDiff > (_nameCapacity - len)) {
|
||||
euroDiff -= (_nameCapacity - len);
|
||||
}
|
||||
uprv_strncpy(euroVariant, "@currency=EUR", euroKeyLen);
|
||||
u_terminateChars(localeBuffer, sizeof(localeBuffer), len, err);
|
||||
}
|
||||
uprv_strncpy(_name, localeBuffer, (len > _nameCapacity) ? _nameCapacity : len);
|
||||
u_terminateChars(_name, _nameCapacity, len, err);
|
||||
} else {
|
||||
/* It's too long. We can't convert anything meaningful out of this. */
|
||||
uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
|
||||
u_terminateChars(result, resultCapacity, len, err);
|
||||
|
||||
/* I consider the following a bad idea. If the user asks for full
|
||||
canonicalization (canonicalize()), and it fails, we shouldn't
|
||||
silently return 'lite' canonicalization (getName()) with no error
|
||||
result. We should just fail! [alan] */
|
||||
|
||||
/*} else {
|
||||
*err = U_ZERO_ERROR;
|
||||
len = _canonicalize(original, _name, _nameCapacity, 0, err);
|
||||
return _canonicalize(original, result, resultCapacity, 0, err);
|
||||
*/
|
||||
}
|
||||
}
|
||||
return len;
|
||||
|
@ -1270,7 +1423,7 @@ uloc_getKeywordValue(const char* localeID,
|
|||
|
||||
if(status && U_SUCCESS(*status) && localeID) {
|
||||
|
||||
const char* startSearchHere = uprv_strchr(localeID, ULOC_KEYWORD_SEPARATOR);
|
||||
const char* startSearchHere = uprv_strchr(localeID, '@');
|
||||
if(startSearchHere == NULL) {
|
||||
/* no keywords, return at once */
|
||||
return 0;
|
||||
|
@ -1295,7 +1448,7 @@ uloc_getKeywordValue(const char* localeID,
|
|||
while(*startSearchHere == ' ') {
|
||||
startSearchHere++;
|
||||
}
|
||||
nextSeparator = uprv_strchr(startSearchHere, ULOC_KEYWORD_ASSIGN);
|
||||
nextSeparator = uprv_strchr(startSearchHere, '=');
|
||||
/* need to normalize both keyword and keyword name */
|
||||
if(!nextSeparator) {
|
||||
break;
|
||||
|
@ -1314,7 +1467,7 @@ uloc_getKeywordValue(const char* localeID,
|
|||
}
|
||||
localeKeywordNameBuffer[i] = 0;
|
||||
|
||||
startSearchHere = uprv_strchr(nextSeparator, ULOC_KEYWORD_ITEM_SEPARATOR);
|
||||
startSearchHere = uprv_strchr(nextSeparator, ';');
|
||||
|
||||
if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
|
||||
nextSeparator++;
|
||||
|
@ -1351,22 +1504,11 @@ uloc_getKeywordValue(const char* localeID,
|
|||
return 0;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
char keyword[ULOC_KEYWORD_BUFFER_LEN];
|
||||
int32_t keywordLen;
|
||||
const char *valueStart;
|
||||
int32_t valueLen;
|
||||
} keywordStruct;
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
compareKeywordStructs(const void *context, const void *left, const void *right) {
|
||||
const char* leftString = ((const keywordStruct *)left)->keyword;
|
||||
const char* rightString = ((const keywordStruct *)right)->keyword;
|
||||
return uprv_strcmp(leftString, rightString);
|
||||
}
|
||||
|
||||
static const char *
|
||||
locale_getKeywordsStart(const char *localeID) {
|
||||
/* TODO This seems odd. No matter what charset we're on, won't '@'
|
||||
be '@'? Or are we building on one EBCDIC machine and moving the
|
||||
library to another? */
|
||||
const char *result = NULL;
|
||||
static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
|
||||
if((result = uprv_strchr(localeID, '@')) != NULL) {
|
||||
|
@ -1385,81 +1527,158 @@ locale_getKeywordsStart(const char *localeID) {
|
|||
|
||||
U_CFUNC int32_t
|
||||
locale_getKeywords(const char *localeID,
|
||||
char prev,
|
||||
char *keywords, int32_t keywordCapacity,
|
||||
char *values, int32_t valuesCapacity, int32_t *valLen,
|
||||
UBool valuesToo,
|
||||
UErrorCode *status)
|
||||
char prev,
|
||||
char *keywords, int32_t keywordCapacity,
|
||||
char *values, int32_t valuesCapacity, int32_t *valLen,
|
||||
UBool valuesToo,
|
||||
UErrorCode *status) {
|
||||
return _getKeywords(localeID, prev, keywords, keywordCapacity,
|
||||
values, valuesCapacity, valLen, valuesToo,
|
||||
NULL, NULL, status);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
char keyword[ULOC_KEYWORD_BUFFER_LEN];
|
||||
int32_t keywordLen;
|
||||
const char *valueStart;
|
||||
int32_t valueLen;
|
||||
} KeywordStruct;
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
compareKeywordStructs(const void *context, const void *left, const void *right) {
|
||||
const char* leftString = ((const KeywordStruct *)left)->keyword;
|
||||
const char* rightString = ((const KeywordStruct *)right)->keyword;
|
||||
return uprv_strcmp(leftString, rightString);
|
||||
}
|
||||
|
||||
/**
|
||||
* Both addKeyword and addValue must already be in canonical form.
|
||||
* Either both addKeyword and addValue are NULL, or neither is NULL.
|
||||
* If they are not NULL they must be zero terminated.
|
||||
* If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
|
||||
*/
|
||||
static int32_t
|
||||
_getKeywords(const char *localeID,
|
||||
char prev,
|
||||
char *keywords, int32_t keywordCapacity,
|
||||
char *values, int32_t valuesCapacity, int32_t *valLen,
|
||||
UBool valuesToo,
|
||||
const char* addKeyword,
|
||||
const char* addValue,
|
||||
UErrorCode *status)
|
||||
{
|
||||
keywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
|
||||
KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
|
||||
|
||||
int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
|
||||
int32_t numKeywords = 0;
|
||||
const char* startSearchHere = localeID;
|
||||
const char* nextSeparator = NULL;
|
||||
int32_t i = 0;
|
||||
const char* pos = localeID;
|
||||
const char* equalSign = NULL;
|
||||
const char* semicolon = NULL;
|
||||
int32_t i = 0, j, n;
|
||||
int32_t keywordsLen = 0;
|
||||
int32_t valuesLen = 0;
|
||||
|
||||
|
||||
if(prev == '@') { /* start of keyword definition */
|
||||
/* we will grab pairs, trim spaces, lowercase keywords, sort and return */
|
||||
do {
|
||||
UBool duplicate = FALSE;
|
||||
/* skip leading spaces */
|
||||
while(*pos == ' ') {
|
||||
pos++;
|
||||
}
|
||||
if (!*pos) { /* handle trailing "; " */
|
||||
break;
|
||||
}
|
||||
if(numKeywords == maxKeywords) {
|
||||
*status = U_INTERNAL_PROGRAM_ERROR;
|
||||
return 0;
|
||||
}
|
||||
/* skip leading spaces (allowed?) */
|
||||
while(*startSearchHere == ' ') {
|
||||
startSearchHere++;
|
||||
}
|
||||
nextSeparator = uprv_strchr(startSearchHere, ULOC_KEYWORD_ASSIGN);
|
||||
/* need to normalize both keyword and keyword name */
|
||||
if(!nextSeparator) {
|
||||
equalSign = uprv_strchr(pos, '=');
|
||||
semicolon = uprv_strchr(pos, ';');
|
||||
/* lack of '=' [foo@currency] is illegal */
|
||||
/* ';' before '=' [foo@currency;collation=pinyin] is illegal */
|
||||
if(!equalSign || (semicolon && semicolon<equalSign)) {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
if(nextSeparator - startSearchHere >= ULOC_KEYWORD_BUFFER_LEN) {
|
||||
/* need to normalize both keyword and keyword name */
|
||||
if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
|
||||
/* keyword name too long for internal buffer */
|
||||
*status = U_INTERNAL_PROGRAM_ERROR;
|
||||
return 0;
|
||||
}
|
||||
for(i = 0; i < nextSeparator - startSearchHere; i++) {
|
||||
keywordList[numKeywords].keyword[i] = uprv_tolower(startSearchHere[i]);
|
||||
for(i = 0, n = 0; i < equalSign - pos; ++i) {
|
||||
if (pos[i] != ' ') {
|
||||
keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
|
||||
}
|
||||
}
|
||||
/* trim trailing spaces */
|
||||
while(keywordList[numKeywords].keyword[i-1] == ' ') {
|
||||
i--;
|
||||
}
|
||||
keywordList[numKeywords].keyword[i] = 0;
|
||||
keywordList[numKeywords].keywordLen = i;
|
||||
keywordList[numKeywords].keyword[n] = 0;
|
||||
keywordList[numKeywords].keywordLen = n;
|
||||
/* now grab the value part. First we skip the '=' */
|
||||
nextSeparator++;
|
||||
equalSign++;
|
||||
/* then we leading spaces */
|
||||
while(*nextSeparator == ' ') {
|
||||
nextSeparator++;
|
||||
while(*equalSign == ' ') {
|
||||
equalSign++;
|
||||
}
|
||||
keywordList[numKeywords].valueStart = nextSeparator;
|
||||
keywordList[numKeywords].valueStart = equalSign;
|
||||
|
||||
startSearchHere = uprv_strchr(nextSeparator, ULOC_KEYWORD_ITEM_SEPARATOR);
|
||||
pos = semicolon;
|
||||
i = 0;
|
||||
if(startSearchHere) {
|
||||
while(*(startSearchHere - i - 1) == ' ') {
|
||||
if(pos) {
|
||||
while(*(pos - i - 1) == ' ') {
|
||||
i++;
|
||||
}
|
||||
keywordList[numKeywords].valueLen = startSearchHere - nextSeparator - i;
|
||||
startSearchHere++;
|
||||
keywordList[numKeywords].valueLen = pos - equalSign - i;
|
||||
pos++;
|
||||
} else {
|
||||
i = uprv_strlen(nextSeparator);
|
||||
while(nextSeparator[i-1] == ' ') {
|
||||
i = uprv_strlen(equalSign);
|
||||
while(equalSign[i-1] == ' ') {
|
||||
i--;
|
||||
}
|
||||
keywordList[numKeywords].valueLen = i;
|
||||
}
|
||||
numKeywords++;
|
||||
} while(startSearchHere);
|
||||
/* If this is a duplicate keyword, then ignore it */
|
||||
for (j=0; j<numKeywords; ++j) {
|
||||
if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
|
||||
duplicate = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!duplicate) {
|
||||
++numKeywords;
|
||||
}
|
||||
} while(pos);
|
||||
|
||||
/* Handle addKeyword/addValue. */
|
||||
if (addKeyword != NULL) {
|
||||
UBool duplicate = FALSE;
|
||||
U_ASSERT(addValue != NULL);
|
||||
/* Search for duplicate; if found, do nothing. Explicit keyword
|
||||
overrides addKeyword. */
|
||||
for (j=0; j<numKeywords; ++j) {
|
||||
if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
|
||||
duplicate = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!duplicate) {
|
||||
if (numKeywords == maxKeywords) {
|
||||
*status = U_INTERNAL_PROGRAM_ERROR;
|
||||
return 0;
|
||||
}
|
||||
uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
|
||||
keywordList[numKeywords].keywordLen = uprv_strlen(addKeyword);
|
||||
keywordList[numKeywords].valueStart = addValue;
|
||||
keywordList[numKeywords].valueLen = uprv_strlen(addValue);
|
||||
++numKeywords;
|
||||
}
|
||||
} else {
|
||||
U_ASSERT(addValue == NULL);
|
||||
}
|
||||
|
||||
/* now we have a list of keywords */
|
||||
/* we need to sort it */
|
||||
uprv_sortArray(keywordList, numKeywords, sizeof(keywordStruct), compareKeywordStructs, NULL, FALSE, status);
|
||||
uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
|
||||
|
||||
/* Now construct the keyword part */
|
||||
for(i = 0; i < numKeywords; i++) {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2003, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2004, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
/********************************************************************************
|
||||
|
@ -358,51 +358,60 @@ static void TestNullDefault() {
|
|||
static void TestPrefixes() {
|
||||
int row = 0;
|
||||
int n;
|
||||
const char *loc;
|
||||
const char *loc, *expected;
|
||||
|
||||
const char *testData[][6] =
|
||||
const char *testData[][7] =
|
||||
{
|
||||
{"sv", "", "FI", "AL", "sv-fi-al", "sv_FI_AL" },
|
||||
{"en", "", "GB", "", "en-gb", "en_GB" },
|
||||
{"i-hakka", "", "MT", "XEMXIJA", "i-hakka_MT_XEMXIJA", "i-hakka_MT_XEMXIJA"},
|
||||
{"i-hakka", "", "CN", "", "i-hakka_CN", "i-hakka_CN"},
|
||||
{"i-hakka", "", "MX", "", "I-hakka_MX", "i-hakka_MX"},
|
||||
{"x-klingon", "", "US", "SANJOSE", "X-KLINGON_us_SANJOSE", "x-klingon_US_SANJOSE"},
|
||||
/* NULL canonicalize() column means "expect same as getName()" */
|
||||
{"sv", "", "FI", "AL", "sv-fi-al", "sv_FI_AL", NULL},
|
||||
{"en", "", "GB", "", "en-gb", "en_GB", NULL},
|
||||
{"i-hakka", "", "MT", "XEMXIJA", "i-hakka_MT_XEMXIJA", "i-hakka_MT_XEMXIJA", NULL},
|
||||
{"i-hakka", "", "CN", "", "i-hakka_CN", "i-hakka_CN", NULL},
|
||||
{"i-hakka", "", "MX", "", "I-hakka_MX", "i-hakka_MX", NULL},
|
||||
{"x-klingon", "", "US", "SANJOSE", "X-KLINGON_us_SANJOSE", "x-klingon_US_SANJOSE", NULL},
|
||||
|
||||
{"mr", "", "", "", "mr.utf8", "mr"},
|
||||
{"de", "", "TV", "", "de-tv.koi8r", "de_TV"},
|
||||
{"x-piglatin", "", "ML", "", "x-piglatin_ML.MBE", "x-piglatin_ML"}, /* Multibyte English */
|
||||
{"i-cherokee", "","US", "", "i-Cherokee_US.utf7", "i-cherokee_US"},
|
||||
{"x-filfli", "", "MT", "FILFLA", "x-filfli_MT_FILFLA.gb-18030", "x-filfli_MT_FILFLA"},
|
||||
{"no", "", "NO", "NY", "no-no-ny.utf32@B", "no_NO_NY"}, /* @ ignored unless variant is empty */
|
||||
{"no", "", "NO", "", "no-no.utf32@B", "no_NO_B" },
|
||||
{"no", "", "", "NY", "no__ny", "no__NY" },
|
||||
{"no", "", "", "", "no@ny", "no__NY" },
|
||||
{"el", "Latn", "", "", "el-latn", "el_Latn" },
|
||||
{"en", "Cyrl", "RU", "", "en-cyrl-ru", "en_Cyrl_RU" },
|
||||
{"zh", "Hant", "TW", "STROKE", "zh-hant_TW_STROKE", "zh_Hant_TW_STROKE" },
|
||||
{"qq", "Qqqq", "QQ", "QQ", "qq_Qqqq_QQ_QQ", "qq_Qqqq_QQ_QQ" },
|
||||
{"qq", "Qqqq", "", "QQ", "qq_Qqqq__QQ", "qq_Qqqq__QQ" },
|
||||
{"12", "3456", "78", "90", "12_3456_78_90", "12_3456_78_90" }, /* total garbage */
|
||||
{"mr", "", "", "", "mr.utf8", "mr.utf8", "mr"},
|
||||
{"de", "", "TV", "", "de-tv.koi8r", "de_TV.koi8r", "de_TV"},
|
||||
{"x-piglatin", "", "ML", "", "x-piglatin_ML.MBE", "x-piglatin_ML.MBE", "x-piglatin_ML"}, /* Multibyte English */
|
||||
{"i-cherokee", "","US", "", "i-Cherokee_US.utf7", "i-cherokee_US.utf7", "i-cherokee_US"},
|
||||
{"x-filfli", "", "MT", "FILFLA", "x-filfli_MT_FILFLA.gb-18030", "x-filfli_MT_FILFLA.gb-18030", "x-filfli_MT_FILFLA"},
|
||||
{"no", "", "NO", "NY", "no-no-ny.utf32@B", "no_NO_NY.utf32@B", "no_NO_NY_B"},
|
||||
{"no", "", "NO", "", "no-no.utf32@B", "no_NO.utf32@B", "no_NO_B"},
|
||||
{"no", "", "", "NY", "no__ny", "no__NY", NULL},
|
||||
{"no", "", "", "", "no@ny", "no@ny", "no__NY"},
|
||||
{"el", "Latn", "", "", "el-latn", "el_Latn", NULL},
|
||||
{"en", "Cyrl", "RU", "", "en-cyrl-ru", "en_Cyrl_RU", NULL},
|
||||
{"zh", "Hant", "TW", "STROKE", "zh-hant_TW_STROKE", "zh_Hant_TW_STROKE", NULL},
|
||||
{"qq", "Qqqq", "QQ", "QQ", "qq_Qqqq_QQ_QQ", "qq_Qqqq_QQ_QQ", NULL},
|
||||
{"qq", "Qqqq", "", "QQ", "qq_Qqqq__QQ", "qq_Qqqq__QQ", NULL},
|
||||
{"12", "3456", "78", "90", "12_3456_78_90", "12_3456_78_90", NULL}, /* total garbage */
|
||||
|
||||
{ "","","","",""}
|
||||
{NULL,NULL,NULL,NULL,NULL,NULL,NULL}
|
||||
};
|
||||
|
||||
const char *testTitles[] = { "uloc_getLanguage()", "uloc_getScript()", "uloc_getCountry()", "uloc_getVariant()", "name", "uloc_getName()", "country3" };
|
||||
const char *testTitles[] = {
|
||||
"uloc_getLanguage()",
|
||||
"uloc_getScript()",
|
||||
"uloc_getCountry()",
|
||||
"uloc_getVariant()",
|
||||
"name",
|
||||
"uloc_getName()",
|
||||
"uloc_canonicalize()"
|
||||
};
|
||||
|
||||
char buf[PREFIXBUFSIZ];
|
||||
int32_t len;
|
||||
UErrorCode err;
|
||||
|
||||
|
||||
for(row=0;testData[row][0][0] != 0;row++) {
|
||||
for(row=0;testData[row][0] != NULL;row++) {
|
||||
loc = testData[row][NAME];
|
||||
log_verbose("Test #%d: %s\n", row, loc);
|
||||
|
||||
err = U_ZERO_ERROR;
|
||||
len=0;
|
||||
buf[0]=0;
|
||||
for(n=0;n<=(NAME+1);n++) {
|
||||
for(n=0;n<=(NAME+2);n++) {
|
||||
if(n==NAME) continue;
|
||||
|
||||
for(len=0;len<PREFIXBUFSIZ;len++) {
|
||||
|
@ -431,6 +440,10 @@ static void TestPrefixes() {
|
|||
len = uloc_getName(loc, buf, PREFIXBUFSIZ, &err);
|
||||
break;
|
||||
|
||||
case NAME+2:
|
||||
len = uloc_canonicalize(loc, buf, PREFIXBUFSIZ, &err);
|
||||
break;
|
||||
|
||||
default:
|
||||
strcpy(buf, "**??");
|
||||
len=4;
|
||||
|
@ -455,9 +468,14 @@ static void TestPrefixes() {
|
|||
row, testTitles[n], loc, buf, buf[len+1]);
|
||||
}
|
||||
|
||||
if(strcmp(buf, testData[row][n])) {
|
||||
expected = testData[row][n];
|
||||
if (expected == NULL && n == (NAME+2)) {
|
||||
/* NULL expected canonicalize() means "expect same as getName()" */
|
||||
expected = testData[row][NAME+1];
|
||||
}
|
||||
if(strcmp(buf, expected)) {
|
||||
log_err("#%d: %s on %s: -> [%s] (expected '%s'!)\n",
|
||||
row, testTitles[n], loc, buf, testData[row][n]);
|
||||
row, testTitles[n], loc, buf, expected);
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -2279,20 +2297,38 @@ static void TestKeywordVariants(void)
|
|||
const char *localeID;
|
||||
const char *expectedLocaleID;
|
||||
const char *expectedLocaleIDNoKeywords;
|
||||
const char *expectedCanonicalID;
|
||||
const char *expectedKeywords[10];
|
||||
int32_t numKeywords;
|
||||
UErrorCode expectedStatus;
|
||||
UErrorCode expectedStatus; /* from uloc_openKeywords */
|
||||
} testCases[] = {
|
||||
{
|
||||
"de_DE@ currency = euro; C o ll A t i o n = Phonebook ; C alen dar = budhist ",
|
||||
"de_DE@c alen dar=budhist;c o ll a t i o n=Phonebook;currency=euro",
|
||||
"de_DE@ currency = euro; C o ll A t i o n = Phonebook ; C alen dar = buddhist ",
|
||||
"de_DE@calendar=buddhist;collation=Phonebook;currency=euro",
|
||||
"de_DE",
|
||||
{"c alen dar", "c o ll a t i o n", "currency"},
|
||||
"de_DE@calendar=buddhist;collation=Phonebook;currency=euro",
|
||||
{"calendar", "collation", "currency"},
|
||||
3,
|
||||
U_ZERO_ERROR
|
||||
},
|
||||
{ "de_DE@euro", "de_DE_EURO", "de_DE", {""}, 0, U_INVALID_FORMAT_ERROR},
|
||||
/*{ "de_DE@euro;collation=phonebook", "", "", U_INVALID_FORMAT_ERROR}*/
|
||||
{
|
||||
"de_DE@euro",
|
||||
"de_DE@euro",
|
||||
"de_DE",
|
||||
"de_DE@currency=EUR",
|
||||
{"","","","","","",""},
|
||||
0,
|
||||
U_INVALID_FORMAT_ERROR /* must have '=' after '@' */
|
||||
},
|
||||
{
|
||||
"de_DE@euro;collation=phonebook",
|
||||
"de_DE", /* error result; bad format */
|
||||
"de_DE", /* error result; bad format */
|
||||
"de_DE", /* error result; bad format */
|
||||
{"","","","","","",""},
|
||||
0,
|
||||
U_INVALID_FORMAT_ERROR
|
||||
}
|
||||
};
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
|
@ -2310,8 +2346,9 @@ static void TestKeywordVariants(void)
|
|||
keywords = uloc_openKeywords(testCases[i].localeID, &status);
|
||||
|
||||
if(status != testCases[i].expectedStatus) {
|
||||
log_err("Expected to get status %s. Got %s instead\n",
|
||||
u_errorName(testCases[i].expectedStatus), u_errorName(status));
|
||||
log_err("Expected to uloc_openKeywords(\"%s\") => status %s. Got %s instead\n",
|
||||
testCases[i].localeID,
|
||||
u_errorName(testCases[i].expectedStatus), u_errorName(status));
|
||||
}
|
||||
status = U_ZERO_ERROR;
|
||||
if(keywords) {
|
||||
|
@ -2330,11 +2367,15 @@ static void TestKeywordVariants(void)
|
|||
uenum_close(keywords);
|
||||
}
|
||||
resultLen = uloc_getName(testCases[i].localeID, buffer, 256, &status);
|
||||
if(uprv_strcmp(testCases[i].expectedLocaleID, buffer) != 0) {
|
||||
log_err("Expected to get \"%s\" from \"%s\". Got \"%s\" instead\n",
|
||||
testCases[i].expectedLocaleID, testCases[i].localeID, buffer);
|
||||
if (uprv_strcmp(testCases[i].expectedLocaleID, buffer) != 0) {
|
||||
log_err("Expected uloc_getName(\"%s\") => \"%s\"; got \"%s\"\n",
|
||||
testCases[i].localeID, testCases[i].expectedLocaleID, buffer);
|
||||
}
|
||||
|
||||
resultLen = uloc_canonicalize(testCases[i].localeID, buffer, 256, &status);
|
||||
if (uprv_strcmp(testCases[i].expectedCanonicalID, buffer) != 0) {
|
||||
log_err("Expected uloc_canonicalize(\"%s\") => \"%s\"; got \"%s\"\n",
|
||||
testCases[i].localeID, testCases[i].expectedCanonicalID, buffer);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -2374,8 +2415,8 @@ static void TestCanonicalization(void)
|
|||
const char *localeID;
|
||||
const char *expectedValue;
|
||||
} testCases[] = {
|
||||
{ "ca_ES_PREEURO-with-extra-stuff-that really doesn't make any sense-unless-you're trying to increase code coverage",
|
||||
"ca_ES_PREEURO_WITH_EXTRA_STUFF_THAT REALLY DOESN'T MAKE ANY SENSE_UNLESS_YOU'RE TRYING TO INCREASE CODE COVERAGE"},
|
||||
/* { "ca_ES_PREEURO-with-extra-stuff-that really doesn't make any sense-unless-you're trying to increase code coverage",
|
||||
"ca_ES_PREEURO_WITH_EXTRA_STUFF_THAT REALLY DOESN'T MAKE ANY SENSE_UNLESS_YOU'RE TRYING TO INCREASE CODE COVERAGE"}, */
|
||||
{ "ca_ES_PREEURO", "ca_ES@currency=ESP" },
|
||||
{ "de_AT_PREEURO", "de_AT@currency=ATS" },
|
||||
{ "de_DE_PREEURO", "de_DE@currency=DEM" },
|
||||
|
@ -2397,7 +2438,7 @@ static void TestCanonicalization(void)
|
|||
{ "pt_PT_PREEURO", "pt_PT@currency=PTE" },
|
||||
{ "de__PHONEBOOK", "de@collation=phonebook" },
|
||||
{ "en_GB_EURO", "en_GB@currency=EUR" },
|
||||
{ "en_GB@EURO", "en_GB@currency=EUR" },
|
||||
{ "en_GB@EURO", "en_GB@currency=EUR" }, /* POSIX ID */
|
||||
{ "es__TRADITIONAL", "es@collation=traditional" },
|
||||
{ "hi__DIRECT", "hi@collation=direct" },
|
||||
{ "ja_JP_TRADITIONAL", "ja_JP@calendar=japanese" },
|
||||
|
@ -2409,9 +2450,9 @@ static void TestCanonicalization(void)
|
|||
{ "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin" },
|
||||
{ "en_US_POSIX", "en_US_POSIX" },
|
||||
{ "hy_AM_REVISED", "hy_AM_REVISED" },
|
||||
{ "no_NO_NY", "no_NO_NY" },
|
||||
{ "no@ny", "no__NY" },
|
||||
{ "no-no.utf32@B", "no_NO_B" },
|
||||
{ "no_NO_NY", "no_NO_NY" /* not: "nn_NO" [alan ICU3.0] */ },
|
||||
{ "no@ny", "no__NY" /* not: "nn" [alan ICU3.0] */ }, /* POSIX ID */
|
||||
{ "no-no.utf32@B", "no_NO_B" /* not: "nb_NO_B" [alan ICU3.0] */ }, /* POSIX ID */
|
||||
{ "qz-qz@Euro", "qz_QZ@currency=EUR" }, /* qz-qz uses private use iso codes */
|
||||
{ "en-BOONT", "en__BOONT" }, /* registered name */
|
||||
{ "de-1901", "de__1901" }, /* registered name */
|
||||
|
@ -2422,6 +2463,28 @@ static void TestCanonicalization(void)
|
|||
{ "uz-UZ-Latn", "uz_Latn_UZ" }, /* .NET name */
|
||||
{ "zh-CHS", "zh_Hans" }, /* .NET name */
|
||||
{ "zh-CHT", "zh_TW" }, /* .NET name This may change back to zh_Hant */
|
||||
|
||||
/* posix behavior that used to be performed by getName */
|
||||
{ "mr.utf8", "mr" },
|
||||
{ "de-tv.koi8r", "de_TV" },
|
||||
{ "x-piglatin_ML.MBE", "x-piglatin_ML" },
|
||||
{ "i-cherokee_US.utf7", "i-cherokee_US" },
|
||||
{ "x-filfli_MT_FILFLA.gb-18030", "x-filfli_MT_FILFLA" },
|
||||
{ "no-no-ny.utf8@B", "no_NO_NY_B" /* not: "nn_NO" [alan ICU3.0] */ }, /* @ ignored unless variant is empty */
|
||||
|
||||
/* fleshing out canonicalization */
|
||||
/* trim space and sort keywords, ';' is separator so not present at end in canonical form */
|
||||
{ "en_Hant_IL_VALLEY_GIRL@ currency = EUR; calendar = Japanese ;", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR" },
|
||||
/* already-canonical ids are not changed */
|
||||
{ "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR" },
|
||||
/* PRE_EURO and EURO conversions don't affect other keywords */
|
||||
{ "es_ES_PREEURO@CALendar=Japanese", "es_ES@calendar=Japanese;currency=ESP" },
|
||||
{ "es_ES_EURO@SHOUT=zipeedeedoodah", "es_ES@currency=EUR;shout=zipeedeedoodah" },
|
||||
/* currency keyword overrides PRE_EURO and EURO currency */
|
||||
{ "es_ES_PREEURO@currency=EUR", "es_ES@currency=EUR" },
|
||||
{ "es_ES_EURO@currency=ESP", "es_ES@currency=ESP" },
|
||||
/* norwegian is just too weird, if we handle things in their full generality */
|
||||
{ "no-Hant-GB_NY@currency=$$$", "no_Hant_GB_NY@currency=$$$" /* not: "nn_Hant_GB@currency=$$$" [alan ICU3.0] */ },
|
||||
};
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
|
|
@ -311,8 +311,11 @@ void LocaleTest::TestBasicGetters() {
|
|||
errln("Country code mismatch: " + temp + " versus \"ZX\"");
|
||||
|
||||
temp = test8.getVariant();
|
||||
if (temp != UnicodeString("SPECIAL") )
|
||||
errln("Variant code mismatch: " + temp + " versus \"SPECIAL\"");
|
||||
//if (temp != UnicodeString("SPECIAL") )
|
||||
// errln("Variant code mismatch: " + temp + " versus \"SPECIAL\"");
|
||||
// As of 3.0, the "@special" will *not* be parsed by uloc_getName()
|
||||
if (temp != UnicodeString("") )
|
||||
errln("Variant code mismatch: " + temp + " versus \"\"");
|
||||
|
||||
if (Locale::getDefault() != Locale::createFromName(NULL))
|
||||
errln("Locale::getDefault() == Locale::createFromName(NULL)");
|
||||
|
@ -528,7 +531,7 @@ void LocaleTest::TestSimpleObjectStuff() {
|
|||
Locale test5("aa", "AA", "");
|
||||
Locale test6("aa", "AA", "ANTARES");
|
||||
Locale test7("aa", "AA", "JUPITER");
|
||||
Locale test8 = Locale::createFromName("aa-aa.utf8@jupiter");
|
||||
Locale test8 = Locale::createFromName("aa-aa-jupiTER"); // was "aa-aa.utf8@jupiter" but in 3.0 getName won't normalize that
|
||||
|
||||
// now list them all for debugging usage.
|
||||
test_dumpLocale(test1);
|
||||
|
@ -1547,19 +1550,30 @@ LocaleTest::TestKeywordVariants(void) {
|
|||
struct {
|
||||
const char *localeID;
|
||||
const char *expectedLocaleID;
|
||||
//const char *expectedLocaleIDNoKeywords;
|
||||
//const char *expectedCanonicalID;
|
||||
const char *expectedKeywords[10];
|
||||
int32_t numKeywords;
|
||||
UErrorCode expectedStatus;
|
||||
} testCases[] = {
|
||||
{ "de_DE@ currency = euro; C o ll A t i o n = Phonebook ; C alen dar = budhist ",
|
||||
"de_DE@c alen dar=budhist;c o ll a t i o n=Phonebook;currency=euro",
|
||||
{ "c alen dar", "c o ll a t i o n", "currency"},
|
||||
3,
|
||||
U_ZERO_ERROR
|
||||
{
|
||||
"de_DE@ currency = euro; C o ll A t i o n = Phonebook ; C alen dar = buddhist ",
|
||||
"de_DE@calendar=buddhist;collation=Phonebook;currency=euro",
|
||||
//"de_DE",
|
||||
//"de_DE@calendar=buddhist;collation=Phonebook;currency=euro",
|
||||
{"calendar", "collation", "currency"},
|
||||
3,
|
||||
U_ZERO_ERROR
|
||||
},
|
||||
{ "de_DE@euro", "de_DE_EURO", {""}, 0, U_ZERO_ERROR}, // In C++, locale name gets canonicalized first.
|
||||
// therefore, getKeywords will not encounter the POSIX variant
|
||||
/*{ "de_DE@euro;collation=phonebook", "", "", U_INVALID_FORMAT_ERROR}*/
|
||||
{
|
||||
"de_DE@euro",
|
||||
"de_DE@euro",
|
||||
//"de_DE",
|
||||
//"de_DE@currency=EUR",
|
||||
{"","","","","","",""},
|
||||
0,
|
||||
U_INVALID_FORMAT_ERROR /* must have '=' after '@' */
|
||||
}
|
||||
};
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
|
@ -1635,10 +1649,10 @@ LocaleTest::TestKeywordVariantParsing(void) {
|
|||
const char *keyword;
|
||||
const char *expectedValue;
|
||||
} testCases[] = {
|
||||
{ "de_DE@ C o ll A t i o n = Phonebook ", "c o ll a t i o n", "Phonebook" },
|
||||
{ "de_DE@ C o ll A t i o n = Phonebook ", "collation", "Phonebook" },
|
||||
{ "de_DE", "collation", ""},
|
||||
{ "de_DE@collation= PHONEBOOK", "collation", "PHONEBOOK" },
|
||||
{ "de_DE@ currency = euro ; CoLLaTion = PHONEBOOk ", "collatiON", "PHONEBOOk" },
|
||||
{ "de_DE@ currency = euro ; CoLLaTion = PHONEBOOk ", "collation", "PHONEBOOk" },
|
||||
};
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
|
Loading…
Add table
Reference in a new issue