ICU-860 support for i-,x-,@,. locales, Locale::createFromName(), fix bugs in uloc_getName

X-SVN-Rev: 3667
This commit is contained in:
Steven R. Loomis 2001-02-17 13:33:57 +00:00
parent 4f1c21c090
commit 8e5d16290f
4 changed files with 228 additions and 45 deletions

View file

@ -395,6 +395,32 @@ Locale::setDefault( const Locale& newLocale,
fgDefaultLocale = newLocale;
}
Locale
Locale::createFromName (const char *name)
{
char stack[128];
char *heap = NULL;
char *buf = stack;
int buflen = 128;
int n;
UErrorCode status;
status = U_ZERO_ERROR;
/* for some reason */
if(uprv_strlen(name) > buflen) {
buflen = uprv_strlen(name)+1;
heap = (char*)uprv_malloc(buflen);
buf = heap;
}
n = uloc_getName(name, buf, buflen, &status);
Locale l(buf);
free(heap);
return l;
}
const char *
Locale::getCountry() const
{

View file

@ -18,6 +18,14 @@
* 07/21/99 stephen Modified setDefault() to propagate to C++
******************************************************************************/
/*
POSIX's locale format, from putil.c: [no spaces]
ll [ _CC ] [ . MM ] [ @ VV]
l = lang, C = ctry, M = charmap, V = variant
*/
#include "unicode/uloc.h"
#include "unicode/locid.h"
@ -171,6 +179,17 @@ static void _lazyEvaluate_installedLocales(void);
/*returns TRUE if a is an ID separator FALSE otherwise*/
#define _isIDSeparator(a) (a == '_' || a == '-')
#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
/*returns TRUE if one of the special prefixes is here (s=string)
'x-' or 'i-' */
#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
/* Dot terminates it because of POSIX form where dot precedes the codepage
* except for variant
*/
#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@'))
/*******************************************************************************
API function definitions
@ -300,11 +319,29 @@ uloc_getLanguage(const char* localeID,
if (localeID == NULL) localeID = uloc_getDefault();
/* If it starts with i- or x- */
if(_isIDPrefix(localeID))
{
if(languageCapacity > i)
{
language[i] = (char)uprv_tolower(*localeID);
}
i++;
localeID++;
if(languageCapacity > i)
{
language[i] = '-';
}
i++;
localeID++;
}
/*Loop updates i to the size of the language
but only copies into the buffer as much as the buffer can bare*/
while ((*localeID != '\0') && !_isIDSeparator(*localeID))
while (!_isTerminator(*localeID) && !_isIDSeparator(*localeID))
{
if (languageCapacity > i) language[i] = (char)tolower(*localeID);
if (languageCapacity > i) language[i] = (char)uprv_tolower(*localeID);
i++;
localeID++;
}
@ -333,6 +370,13 @@ int32_t uloc_getCountry(const char* localeID,
if (U_FAILURE(*err)) return 0;
if (localeID == NULL) localeID = uloc_getDefault();
/* skip over i- or x- */
if(_isIDPrefix(localeID))
{
localeID += 2;
}
localeID = _findCharSeparator(localeID);
/*Loop updates i to the size of the language
@ -340,9 +384,9 @@ int32_t uloc_getCountry(const char* localeID,
if (localeID)
{
++localeID;
while ((*localeID != '\0') && !_isIDSeparator(*localeID))
while (!_isTerminator(*localeID) && !_isIDSeparator(*localeID))
{
if (countryCapacity > i) country[i] = (char)toupper(*localeID);
if (countryCapacity > i) country[i] = (char)uprv_toupper(*localeID);
i++;
localeID++;
}
@ -363,31 +407,59 @@ int32_t uloc_getVariant(const char* localeID,
UErrorCode* err)
{
int i=0;
const char *p = localeID;
if (U_FAILURE(*err)) return 0;
if (localeID == NULL) localeID = uloc_getDefault();
/* skip over i- or x- */
if(_isIDPrefix(localeID))
{
localeID += 2;
}
localeID = _findCharSeparator(localeID);
if (localeID) localeID = _findCharSeparator(++localeID);
if (localeID)
{
localeID = _findCharSeparator(++localeID);
}
if (localeID)
{
{
++localeID;
/*Loop updates i to the size of the language
but only copies into the buffer as much as the buffer can bare*/
while (*localeID != '\0')
but only copies into the buffer as much as the buffer can bear*/
while (!_isTerminator(*localeID))
{
if (variantCapacity > i) variant[i] = (char)toupper(*localeID);
if (variantCapacity > i) variant[i] = (char)uprv_toupper(*localeID);
i++;
localeID++;
}
}
/* But wait, there's more!
**IFF** no variant was otherwise found, take one from @...
*/
if ( (i == 0) && /* Found nothing (zero chars copied) */
(localeID = uprv_strrchr(p, '@')))
{
localeID++; /* point after the @ */
/* Note that we will stop at a period if the user accidentally
put a period after the @ sign */
/* repeat above copying loop */
while (!_isTerminator(*localeID))
{
if (variantCapacity > i) variant[i] = (char)uprv_toupper(*localeID);
i++;
localeID++;
}
}
if (i >= variantCapacity )
{
{
*err = U_BUFFER_OVERFLOW_ERROR;
}
}
if (variantCapacity>0) {variant[uprv_min(i,variantCapacity-1)] = '\0';}
@ -399,12 +471,16 @@ int32_t uloc_getName(const char* localeID,
int32_t nameCapacity,
UErrorCode* err)
{
int i= 0;
int varSze = 0;
int cntSze = 0;
int i= 0; /* total required size */
int n= 0; /* How much has been copied currently */
int varSze = 0; /* How big the variant is */
int cntSze = 0; /* How big the country is */
UErrorCode int_err = U_ZERO_ERROR;
int remainingCapacity;
if (U_FAILURE(*err)) return 0;
/*First we preflight the components in order to ensure a valid return value*/
if (localeID == NULL) localeID = uloc_getDefault();
@ -423,47 +499,92 @@ int32_t uloc_getName(const char* localeID,
NULL,
0,
&int_err);
/*Adjust for the zero terminators*/
--varSze;
--cntSze;
if (cntSze) i++;
if (varSze) i++;
/*Adjust for the zero terminators*/
--varSze;
--cntSze;
/* i is still languagesize+1 for the terminator */
/* Add space for underscores */
if (varSze)
{
i+= 2; /* if theres a variant, it will ALWAYS contain two underscores. */
}
else
{
if (cntSze)
{
i++; /* Otherwise - only language _ country. */
}
}
/* Update i (total req'd size) */
i += cntSze + varSze;
int_err = U_ZERO_ERROR;
uloc_getLanguage(localeID,
name,
nameCapacity,
&int_err);
/*We fill in the users buffer*/
if ((nameCapacity>0) && cntSze)
if(nameCapacity) /* If size is zero, skip the actual copy */
{
/* Now, the real copying */
int_err = U_ZERO_ERROR;
uloc_getLanguage(localeID,
name,
nameCapacity /* -(n=0) */,
&int_err);
n += uprv_strlen(name);
/*We fill in the users buffer*/
if ((n<nameCapacity) && cntSze)
{
if (U_SUCCESS(int_err)) uprv_strcat(name, "_");
if(U_SUCCESS(int_err))
{
name[n++] = '_';
}
uloc_getCountry(localeID,
name + uprv_strlen(name),
nameCapacity - uprv_strlen(name),
&int_err);
if (varSze)
name + n,
nameCapacity - n,
&int_err);
n += cntSze;
if (varSze && (n<nameCapacity))
{
if(U_SUCCESS(int_err))
{
name[n++] = '_';
}
uloc_getVariant(localeID,
name + n,
nameCapacity - n,
&int_err);
}
}
else if((n<nameCapacity) && varSze)
{
if (U_SUCCESS(int_err)) uprv_strcat(name, "_");
if (U_SUCCESS(int_err))
{
name[n++] = '_';
if(n<nameCapacity)
name[n++] = '_';
}
uloc_getVariant(localeID,
name + uprv_strlen(name),
nameCapacity - uprv_strlen(name),
&int_err);
name + n,
nameCapacity - n,
&int_err);
}
/* Tie it off */
name[uprv_min(i,nameCapacity-1)] = '\0';
} /* end (if nameCapacity > 0) */
}
*err = int_err;
return i;
}
const char* uloc_getISO3Language(const char* localeID)
{
int16_t offset;
@ -541,7 +662,10 @@ int32_t uloc_getDisplayLanguage(const char* locale,
inLocale = uloc_getDefault();
isDefaultLocale = TRUE;
}
else if (uprv_strcmp(inLocale, uloc_getDefault()) == 0) isDefaultLocale = TRUE;
else if (uprv_strcmp(inLocale, uloc_getDefault()) == 0)
{
isDefaultLocale = TRUE;
}
/*truncates the fallback mechanism if we start out with a defaultLocale*/
if (locale == NULL) locale = uloc_getDefault();

View file

@ -329,6 +329,19 @@ public:
static void setDefault(const Locale& newLocale,
UErrorCode& success);
/**
* Creates a locale which has had minimal canonicalization
* as per uloc_getName().
* @param name The name to create from
* @return new locale object
* @draft
* @see uloc_getName
*/
static Locale createFromName(const char *name);
/**
* Returns the locale's two-letter ISO-639 language code.
* @return An alias to the code

View file

@ -177,6 +177,21 @@
* \endcode
* </pre>
* </blockquote>
* <P>
* Concerning POSIX/RFC1766 Locale IDs,
* the getLanguage/getCountry/getVariant/getName functions do understand
* the POSIX type form of language_COUNTRY.ENCODING@VARIANT
* and if there is not an ICU-stype variant, uloc_getVariant() for example
* will return the one listed after the @at sign. As well, the hyphen
* "-" is recognized as a country/variant separator similarly to RFC1766.
* So for example, "en-us" will be interpreted as en_US.
* As a result, uloc_getName() is far from a no-op, and will have the
* effect of converting POSIX/RFC1766 IDs into ICU form, although it does
* NOT map any of the actual codes (i.e. russian->ru) in any way.
* Applications should call uloc_getName() at the point where a locale ID
* is coming from an external source (user entry, OS, web browser)
* and pass the resulting string to other ICU functions. For example,
* don't use de-de@EURO as an argument to resourcebundle.
*/
/*
@ -297,6 +312,11 @@ uloc_getVariant(const char* localeID,
UErrorCode* err);
/**
* Gets the full name for the specified locale.
* Note: This has the effect of 'canonicalizing' the string to
* a certain extent. Upper and lower case are set as needed,
* and if the components were in 'POSIX' format they are changed to
* ICU format. It does NOT map aliased names in any way.
* See the top of this header file.
*
* @param localeID the locale to get the full name with
* @param name the full name for localeID