ICU-860 support for i-,x-,@,. locales, Locale::createFromName(), fix bugs in uloc_getName

X-SVN-Rev: 3667
2025-04-08 06:53:45 +00:00 · 2001-02-17 13:33:57 +00:00 · 2001-02-17 13:33:57 +00:00 · 8e5d16290f
commit 8e5d16290f
parent 4f1c21c090
4 changed files with 228 additions and 45 deletions
--- a/icu4c/source/common/locid.cpp
+++ b/icu4c/source/common/locid.cpp
@ -395,6 +395,32 @@ Locale::setDefault( const   Locale&     newLocale,
    fgDefaultLocale = newLocale;
 }

+Locale
+Locale::createFromName (const char *name)
+{
+  char stack[128];
+  char *heap = NULL;
+  char *buf = stack;
+  int buflen = 128;
+  int   n;
+  UErrorCode status;
+
+  status = U_ZERO_ERROR;
+
+  /* for some reason */
+  if(uprv_strlen(name) > buflen) {
+    buflen = uprv_strlen(name)+1;
+    heap = (char*)uprv_malloc(buflen);
+    buf = heap;
+  }
+  
+  n = uloc_getName(name, buf, buflen, &status);
+  
+  Locale l(buf);
+  free(heap);
+  return l;
+}
+
 const char *
 Locale::getCountry() const
 {
--- a/icu4c/source/common/uloc.c
+++ b/icu4c/source/common/uloc.c
@ -18,6 +18,14 @@
 *   07/21/99    stephen     Modified setDefault() to propagate to C++
 ******************************************************************************/

+/*
+   POSIX's locale format, from putil.c: [no spaces]
+
+     ll [ _CC ] [ . MM ] [ @ VV]
+
+     l = lang, C = ctry, M = charmap, V = variant
+*/
+

 #include "unicode/uloc.h"
 #include "unicode/locid.h"
@ -171,6 +179,17 @@ static void _lazyEvaluate_installedLocales(void);
 /*returns TRUE if a is an ID separator FALSE otherwise*/
 #define _isIDSeparator(a) (a == '_' || a == '-')

+#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
+/*returns TRUE if one of the special prefixes is here (s=string)
+  'x-' or 'i-' */
+#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
+
+/* Dot terminates it because of POSIX form  where dot precedes the codepage
+ * except for variant
+ */
+#define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
+
+

 /*******************************************************************************
  API function definitions
@ -300,11 +319,29 @@ uloc_getLanguage(const char*    localeID,

  if (localeID == NULL)    localeID = uloc_getDefault();

+  /* If it starts with i- or x- */
+  if(_isIDPrefix(localeID))
+  {
+    if(languageCapacity > i)
+    {
+      language[i] = (char)uprv_tolower(*localeID);
+    }
+    i++;
+    localeID++;
+
+    if(languageCapacity > i)
+    {
+      language[i] = '-';
+    }
+    i++;
+    localeID++;
+  }
+
  /*Loop updates i to the size of the language
    but only copies into the buffer as much as the buffer can bare*/
-  while ((*localeID != '\0') && !_isIDSeparator(*localeID))
+  while (!_isTerminator(*localeID) && !_isIDSeparator(*localeID))
    {
-      if (languageCapacity > i) language[i] = (char)tolower(*localeID);
+      if (languageCapacity > i) language[i] = (char)uprv_tolower(*localeID);
      i++;
      localeID++;
    }
@ -333,6 +370,13 @@ int32_t uloc_getCountry(const char* localeID,
  if (U_FAILURE(*err)) return 0;
  if (localeID == NULL)    localeID = uloc_getDefault();
  
+
+  /* skip over i- or x- */
+  if(_isIDPrefix(localeID))
+  {
+    localeID += 2;
+  }
+
  localeID = _findCharSeparator(localeID);
  
  /*Loop updates i to the size of the language
@ -340,9 +384,9 @@ int32_t uloc_getCountry(const char* localeID,
  if (localeID)
    {
      ++localeID;
-      while ((*localeID != '\0') && !_isIDSeparator(*localeID))
+      while (!_isTerminator(*localeID) && !_isIDSeparator(*localeID))
      {
-        if (countryCapacity > i) country[i] = (char)toupper(*localeID);
+        if (countryCapacity > i) country[i] = (char)uprv_toupper(*localeID);
        i++;
        localeID++;
      }
@ -363,31 +407,59 @@ int32_t uloc_getVariant(const char* localeID,
                        UErrorCode* err) 
 {
  int i=0;
+  const char *p = localeID;

  if (U_FAILURE(*err)) return 0;
  if (localeID == NULL)    localeID = uloc_getDefault();

+  /* skip over i- or x- */
+  if(_isIDPrefix(localeID))
+  {
+    localeID += 2;
+  }
+
  localeID = _findCharSeparator(localeID);
-  if (localeID)    localeID = _findCharSeparator(++localeID);
+  if (localeID)
+  {
+    localeID = _findCharSeparator(++localeID);
+  }

  if (localeID)
-    {
+  {
      ++localeID;
      /*Loop updates i to the size of the language
-    but only copies into the buffer as much as the buffer can bare*/
-      while (*localeID != '\0')
+    but only copies into the buffer as much as the buffer can bear*/
+      while (!_isTerminator(*localeID))
    {
-      if (variantCapacity > i) variant[i] = (char)toupper(*localeID);
+      if (variantCapacity > i) variant[i] = (char)uprv_toupper(*localeID);
      i++;
      localeID++;
    }
-
+  }
+  
+  /* But wait, there's more! 
+     **IFF** no variant was otherwise found, take one from @...
+   */
+  if ( (i == 0) &&  /* Found nothing (zero chars copied) */
+       (localeID = uprv_strrchr(p, '@')))
+  {
+    localeID++; /* point after the @ */
+    /* Note that we will stop at a period if the user accidentally
+       put a period after the @ sign */
+    
+    /* repeat above copying loop */
+    while (!_isTerminator(*localeID))
+    {
+      if (variantCapacity > i) variant[i] = (char)uprv_toupper(*localeID);
+      i++;
+      localeID++;
    }
+  }

  if (i >= variantCapacity )
-    {
+  {
      *err = U_BUFFER_OVERFLOW_ERROR;
-    }
+  }


  if (variantCapacity>0) {variant[uprv_min(i,variantCapacity-1)] = '\0';}
@ -399,12 +471,16 @@ int32_t uloc_getName(const char* localeID,
             int32_t nameCapacity,
             UErrorCode* err)  
 {
-  int i= 0;
-  int varSze = 0;
-  int cntSze = 0;
+  int i= 0;       /* total required size */
+  int n= 0;       /* How much has been copied currently */
+  int varSze = 0; /* How big the variant is */
+  int cntSze = 0; /* How big the country is */
+
  UErrorCode int_err = U_ZERO_ERROR;
+  int remainingCapacity;

  if (U_FAILURE(*err)) return 0;
+
  /*First we preflight the components in order to ensure a valid return value*/
  if (localeID == NULL)    localeID = uloc_getDefault();

@ -423,47 +499,92 @@ int32_t uloc_getName(const char* localeID,
               NULL,
               0, 
               &int_err);
-  /*Adjust for the zero terminators*/
-  --varSze; 
-  --cntSze;

-  if (cntSze) i++;
-  if (varSze) i++;
+  /*Adjust for the zero terminators*/
+  --varSze;
+  --cntSze;
+  /* i is still languagesize+1 for the terminator */
+
+  /* Add space for underscores */
+  if (varSze)
+  {
+    i+= 2;  /* if theres a variant, it will ALWAYS contain two underscores. */
+  }
+  else
+  {
+    if (cntSze)
+    {
+      i++; /* Otherwise - only language _ country. */
+    }
+  }
+
+  /* Update i (total req'd size) */
  i += cntSze + varSze;

-  int_err = U_ZERO_ERROR;
-
-  uloc_getLanguage(localeID, 
-           name,
-           nameCapacity, 
-           &int_err);
-
-  /*We fill in the users buffer*/
-  if ((nameCapacity>0) && cntSze)
+  if(nameCapacity)  /* If size is zero, skip the actual copy */
+  {
+    /* Now, the real copying */
+    int_err = U_ZERO_ERROR;
+    
+    uloc_getLanguage(localeID, 
+                     name,
+                     nameCapacity /* -(n=0) */,  
+                     &int_err);
+    
+    n += uprv_strlen(name);
+    
+    /*We fill in the users buffer*/
+    if ((n<nameCapacity) && cntSze)
    {
-      if (U_SUCCESS(int_err)) uprv_strcat(name, "_");
-
+      if(U_SUCCESS(int_err))
+      {
+        name[n++] = '_';
+      }
+      
      uloc_getCountry(localeID,
-          name + uprv_strlen(name),
-              nameCapacity - uprv_strlen(name),
-              &int_err);
-
-      if (varSze)
+                      name + n,
+                      nameCapacity - n,
+                      &int_err);
+      n += cntSze;
+      
+      if (varSze && (n<nameCapacity))
+      {
+        if(U_SUCCESS(int_err))
+        {
+          name[n++] = '_';
+        }
+        
+        uloc_getVariant(localeID,
+                        name + n,
+                        nameCapacity - n,
+                        &int_err);
+      }
+      
+    }
+    else if((n<nameCapacity) && varSze)
    {
-      if (U_SUCCESS(int_err)) uprv_strcat(name, "_");
-
+      if (U_SUCCESS(int_err))
+      {
+        name[n++] = '_';
+        if(n<nameCapacity)
+          name[n++] = '_';
+      }
+      
      uloc_getVariant(localeID,
-                   name + uprv_strlen(name),
-                   nameCapacity - uprv_strlen(name), 
-                   &int_err);
+                      name + n,
+                      nameCapacity - n,
+                      &int_err);
    }
+    
+    /* Tie it off */
+    name[uprv_min(i,nameCapacity-1)] = '\0';
+  }   /* end (if nameCapacity > 0) */

-    }
  *err  = int_err;
-
+  
  return i;
 }
-
+       
 const char* uloc_getISO3Language(const char* localeID) 
 {
  int16_t offset;
@ -541,7 +662,10 @@ int32_t uloc_getDisplayLanguage(const char* locale,
      inLocale = uloc_getDefault();
      isDefaultLocale = TRUE;
    }
-  else if (uprv_strcmp(inLocale, uloc_getDefault()) == 0) isDefaultLocale = TRUE;
+  else if (uprv_strcmp(inLocale, uloc_getDefault()) == 0)
+  {
+    isDefaultLocale = TRUE;
+  }
  /*truncates the fallback mechanism if we start out with a defaultLocale*/

  if (locale == NULL) locale = uloc_getDefault();
--- a/icu4c/source/common/unicode/locid.h
+++ b/icu4c/source/common/unicode/locid.h
@ -329,6 +329,19 @@ public:
    static      void            setDefault(const    Locale&     newLocale,
                                                    UErrorCode&  success);

+    
+    /**
+     * Creates a locale which has had minimal canonicalization 
+     * as per uloc_getName(). 
+     * @param name The name to create from
+     * @return new locale object
+     * @draft
+     * @see uloc_getName
+     */
+     
+    static Locale createFromName(const char *name);
+
+    
    /**
     * Returns the locale's two-letter ISO-639 language code.
     * @return      An alias to the code
--- a/icu4c/source/common/unicode/uloc.h
+++ b/icu4c/source/common/unicode/uloc.h
@ -177,6 +177,21 @@
 * \endcode
 * </pre>
 * </blockquote>
+ * <P>
+ * Concerning POSIX/RFC1766 Locale IDs, 
+ *  the getLanguage/getCountry/getVariant/getName functions do understand
+ * the POSIX type form of  language_COUNTRY.ENCODING@VARIANT
+ * and if there is not an ICU-stype variant, uloc_getVariant() for example
+ * will return the one listed after the @at sign. As well, the hyphen
+ * "-" is recognized as a country/variant separator similarly to RFC1766.
+ * So for example, "en-us" will be interpreted as en_US.  
+ * As a result, uloc_getName() is far from a no-op, and will have the
+ * effect of converting POSIX/RFC1766 IDs into ICU form, although it does
+ * NOT map any of the actual codes (i.e. russian->ru) in any way.
+ * Applications should call uloc_getName() at the point where a locale ID
+ * is coming from an external source (user entry, OS, web browser)
+ * and pass the resulting string to other ICU functions.  For example,
+ * don't use de-de@EURO as an argument to resourcebundle.
 */

 /*
@ -297,6 +312,11 @@ uloc_getVariant(const char*    localeID,
        UErrorCode* err);
 /**
 * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the string to
+ * a certain extent. Upper and lower case are set as needed,
+ * and if the components were in 'POSIX' format they are changed to
+ * ICU format.  It does NOT map aliased names in any way.
+ * See the top of this header file.
 *
 * @param localeID the locale to get the full name with
 * @param name the full name for localeID