From 8e5d16290f0832fde715921a48f55b2db4cee096 Mon Sep 17 00:00:00 2001 From: "Steven R. Loomis" Date: Sat, 17 Feb 2001 13:33:57 +0000 Subject: [PATCH] ICU-860 support for i-,x-,@,. locales, Locale::createFromName(), fix bugs in uloc_getName X-SVN-Rev: 3667 --- icu4c/source/common/locid.cpp | 26 ++++ icu4c/source/common/uloc.c | 214 ++++++++++++++++++++++------ icu4c/source/common/unicode/locid.h | 13 ++ icu4c/source/common/unicode/uloc.h | 20 +++ 4 files changed, 228 insertions(+), 45 deletions(-) diff --git a/icu4c/source/common/locid.cpp b/icu4c/source/common/locid.cpp index 0dc80833704..e1af5b94b63 100644 --- a/icu4c/source/common/locid.cpp +++ b/icu4c/source/common/locid.cpp @@ -395,6 +395,32 @@ Locale::setDefault( const Locale& newLocale, fgDefaultLocale = newLocale; } +Locale +Locale::createFromName (const char *name) +{ + char stack[128]; + char *heap = NULL; + char *buf = stack; + int buflen = 128; + int n; + UErrorCode status; + + status = U_ZERO_ERROR; + + /* for some reason */ + if(uprv_strlen(name) > buflen) { + buflen = uprv_strlen(name)+1; + heap = (char*)uprv_malloc(buflen); + buf = heap; + } + + n = uloc_getName(name, buf, buflen, &status); + + Locale l(buf); + free(heap); + return l; +} + const char * Locale::getCountry() const { diff --git a/icu4c/source/common/uloc.c b/icu4c/source/common/uloc.c index 75a0905a631..9ef47ac6551 100644 --- a/icu4c/source/common/uloc.c +++ b/icu4c/source/common/uloc.c @@ -18,6 +18,14 @@ * 07/21/99 stephen Modified setDefault() to propagate to C++ ******************************************************************************/ +/* + POSIX's locale format, from putil.c: [no spaces] + + ll [ _CC ] [ . MM ] [ @ VV] + + l = lang, C = ctry, M = charmap, V = variant +*/ + #include "unicode/uloc.h" #include "unicode/locid.h" @@ -171,6 +179,17 @@ static void _lazyEvaluate_installedLocales(void); /*returns TRUE if a is an ID separator FALSE otherwise*/ #define _isIDSeparator(a) (a == '_' || a == '-') +#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I')) +/*returns TRUE if one of the special prefixes is here (s=string) + 'x-' or 'i-' */ +#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1])) + +/* Dot terminates it because of POSIX form where dot precedes the codepage + * except for variant + */ +#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@')) + + /******************************************************************************* API function definitions @@ -300,11 +319,29 @@ uloc_getLanguage(const char* localeID, if (localeID == NULL) localeID = uloc_getDefault(); + /* If it starts with i- or x- */ + if(_isIDPrefix(localeID)) + { + if(languageCapacity > i) + { + language[i] = (char)uprv_tolower(*localeID); + } + i++; + localeID++; + + if(languageCapacity > i) + { + language[i] = '-'; + } + i++; + localeID++; + } + /*Loop updates i to the size of the language but only copies into the buffer as much as the buffer can bare*/ - while ((*localeID != '\0') && !_isIDSeparator(*localeID)) + while (!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) { - if (languageCapacity > i) language[i] = (char)tolower(*localeID); + if (languageCapacity > i) language[i] = (char)uprv_tolower(*localeID); i++; localeID++; } @@ -333,6 +370,13 @@ int32_t uloc_getCountry(const char* localeID, if (U_FAILURE(*err)) return 0; if (localeID == NULL) localeID = uloc_getDefault(); + + /* skip over i- or x- */ + if(_isIDPrefix(localeID)) + { + localeID += 2; + } + localeID = _findCharSeparator(localeID); /*Loop updates i to the size of the language @@ -340,9 +384,9 @@ int32_t uloc_getCountry(const char* localeID, if (localeID) { ++localeID; - while ((*localeID != '\0') && !_isIDSeparator(*localeID)) + while (!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) { - if (countryCapacity > i) country[i] = (char)toupper(*localeID); + if (countryCapacity > i) country[i] = (char)uprv_toupper(*localeID); i++; localeID++; } @@ -363,31 +407,59 @@ int32_t uloc_getVariant(const char* localeID, UErrorCode* err) { int i=0; + const char *p = localeID; if (U_FAILURE(*err)) return 0; if (localeID == NULL) localeID = uloc_getDefault(); + /* skip over i- or x- */ + if(_isIDPrefix(localeID)) + { + localeID += 2; + } + localeID = _findCharSeparator(localeID); - if (localeID) localeID = _findCharSeparator(++localeID); + if (localeID) + { + localeID = _findCharSeparator(++localeID); + } if (localeID) - { + { ++localeID; /*Loop updates i to the size of the language - but only copies into the buffer as much as the buffer can bare*/ - while (*localeID != '\0') + but only copies into the buffer as much as the buffer can bear*/ + while (!_isTerminator(*localeID)) { - if (variantCapacity > i) variant[i] = (char)toupper(*localeID); + if (variantCapacity > i) variant[i] = (char)uprv_toupper(*localeID); i++; localeID++; } - + } + + /* But wait, there's more! + **IFF** no variant was otherwise found, take one from @... + */ + if ( (i == 0) && /* Found nothing (zero chars copied) */ + (localeID = uprv_strrchr(p, '@'))) + { + localeID++; /* point after the @ */ + /* Note that we will stop at a period if the user accidentally + put a period after the @ sign */ + + /* repeat above copying loop */ + while (!_isTerminator(*localeID)) + { + if (variantCapacity > i) variant[i] = (char)uprv_toupper(*localeID); + i++; + localeID++; } + } if (i >= variantCapacity ) - { + { *err = U_BUFFER_OVERFLOW_ERROR; - } + } if (variantCapacity>0) {variant[uprv_min(i,variantCapacity-1)] = '\0';} @@ -399,12 +471,16 @@ int32_t uloc_getName(const char* localeID, int32_t nameCapacity, UErrorCode* err) { - int i= 0; - int varSze = 0; - int cntSze = 0; + int i= 0; /* total required size */ + int n= 0; /* How much has been copied currently */ + int varSze = 0; /* How big the variant is */ + int cntSze = 0; /* How big the country is */ + UErrorCode int_err = U_ZERO_ERROR; + int remainingCapacity; if (U_FAILURE(*err)) return 0; + /*First we preflight the components in order to ensure a valid return value*/ if (localeID == NULL) localeID = uloc_getDefault(); @@ -423,47 +499,92 @@ int32_t uloc_getName(const char* localeID, NULL, 0, &int_err); - /*Adjust for the zero terminators*/ - --varSze; - --cntSze; - if (cntSze) i++; - if (varSze) i++; + /*Adjust for the zero terminators*/ + --varSze; + --cntSze; + /* i is still languagesize+1 for the terminator */ + + /* Add space for underscores */ + if (varSze) + { + i+= 2; /* if theres a variant, it will ALWAYS contain two underscores. */ + } + else + { + if (cntSze) + { + i++; /* Otherwise - only language _ country. */ + } + } + + /* Update i (total req'd size) */ i += cntSze + varSze; - int_err = U_ZERO_ERROR; - - uloc_getLanguage(localeID, - name, - nameCapacity, - &int_err); - - /*We fill in the users buffer*/ - if ((nameCapacity>0) && cntSze) + if(nameCapacity) /* If size is zero, skip the actual copy */ + { + /* Now, the real copying */ + int_err = U_ZERO_ERROR; + + uloc_getLanguage(localeID, + name, + nameCapacity /* -(n=0) */, + &int_err); + + n += uprv_strlen(name); + + /*We fill in the users buffer*/ + if ((n 0) */ - } *err = int_err; - + return i; } - + const char* uloc_getISO3Language(const char* localeID) { int16_t offset; @@ -541,7 +662,10 @@ int32_t uloc_getDisplayLanguage(const char* locale, inLocale = uloc_getDefault(); isDefaultLocale = TRUE; } - else if (uprv_strcmp(inLocale, uloc_getDefault()) == 0) isDefaultLocale = TRUE; + else if (uprv_strcmp(inLocale, uloc_getDefault()) == 0) + { + isDefaultLocale = TRUE; + } /*truncates the fallback mechanism if we start out with a defaultLocale*/ if (locale == NULL) locale = uloc_getDefault(); diff --git a/icu4c/source/common/unicode/locid.h b/icu4c/source/common/unicode/locid.h index 644e02464d7..936122344e1 100644 --- a/icu4c/source/common/unicode/locid.h +++ b/icu4c/source/common/unicode/locid.h @@ -329,6 +329,19 @@ public: static void setDefault(const Locale& newLocale, UErrorCode& success); + + /** + * Creates a locale which has had minimal canonicalization + * as per uloc_getName(). + * @param name The name to create from + * @return new locale object + * @draft + * @see uloc_getName + */ + + static Locale createFromName(const char *name); + + /** * Returns the locale's two-letter ISO-639 language code. * @return An alias to the code diff --git a/icu4c/source/common/unicode/uloc.h b/icu4c/source/common/unicode/uloc.h index c9f544243cb..db60db25907 100644 --- a/icu4c/source/common/unicode/uloc.h +++ b/icu4c/source/common/unicode/uloc.h @@ -177,6 +177,21 @@ * \endcode * * + *

+ * Concerning POSIX/RFC1766 Locale IDs, + * the getLanguage/getCountry/getVariant/getName functions do understand + * the POSIX type form of language_COUNTRY.ENCODING@VARIANT + * and if there is not an ICU-stype variant, uloc_getVariant() for example + * will return the one listed after the @at sign. As well, the hyphen + * "-" is recognized as a country/variant separator similarly to RFC1766. + * So for example, "en-us" will be interpreted as en_US. + * As a result, uloc_getName() is far from a no-op, and will have the + * effect of converting POSIX/RFC1766 IDs into ICU form, although it does + * NOT map any of the actual codes (i.e. russian->ru) in any way. + * Applications should call uloc_getName() at the point where a locale ID + * is coming from an external source (user entry, OS, web browser) + * and pass the resulting string to other ICU functions. For example, + * don't use de-de@EURO as an argument to resourcebundle. */ /* @@ -297,6 +312,11 @@ uloc_getVariant(const char* localeID, UErrorCode* err); /** * Gets the full name for the specified locale. + * Note: This has the effect of 'canonicalizing' the string to + * a certain extent. Upper and lower case are set as needed, + * and if the components were in 'POSIX' format they are changed to + * ICU format. It does NOT map aliased names in any way. + * See the top of this header file. * * @param localeID the locale to get the full name with * @param name the full name for localeID