ICU-2438 uloc_getLocaleNoKeywords (for resource bundles), support for canonizing ICU variants to keywords (disabled).

X-SVN-Rev: 13473
2025-04-08 06:53:45 +00:00 · 2003-10-22 18:53:54 +00:00 · 2003-10-22 18:53:54 +00:00 · 3d2215268d
commit 3d2215268d
parent 0866e20efb
4 changed files with 203 additions and 24 deletions
--- a/icu4c/source/common/uloc.c
+++ b/icu4c/source/common/uloc.c
@ -1042,13 +1042,47 @@ uloc_getKeywords(const char* localeID,
    /*return u_terminateChars(keywords, keywordsCapacity, i, status);*/
 }

+static keywordConv variantsToKeywords[] = {
+  { "ca_ES_PREEURO", "ca_ES@currency=ESP" },
+  { "de_AT_PREEURO", "de_AT@currency=ATS" },
+  { "de_DE_PREEURO", "de_DE@currency=DEM" },
+  { "de_LU_PREEURO", "de_LU@currency=EUR" },
+  { "el_GR_PREEURO", "el_GR@currency=GRD" },
+  { "en_BE_PREEURO", "en_BE@currency=BEF" },
+  { "en_IE_PREEURO", "en_IE@currency=IEP" },
+  { "es_ES_PREEURO", "es_ES@currency=ESP" },
+  { "eu_ES_PREEURO", "eu_ES@currency=ESP" },
+  { "fi_FI_PREEURO", "fi_FI@currency=FIM" },
+  { "fr_BE_PREEURO", "fr_BE@currency=BEF" },
+  { "fr_FR_PREEURO", "fr_FR@currency=FRF" },
+  { "fr_LU_PREEURO", "fr_LU@currency=LUF" },
+  { "ga_IE_PREEURO", "ga_IE@currency=IEP" },
+  { "gl_ES_PREEURO", "gl_ES@currency=ESP" },
+  { "it_IT_PREEURO", "it_IT@currency=ITL" },
+  { "nl_BE_PREEURO", "nl_BE@currency=BEF" },
+  { "nl_NL_PREEURO", "nl_NL@currency=NLG" },
+  { "pt_PT_PREEURO", "pt_PT@currency=PTE" },
+  { "de__PHONEBOOK", "de@collation=phonebook" },
+  { "en_GB_EURO", "en_GB@currency=EUR" },
+  { "es__TRADITIONAL", "es@collation=traditional" },
+  { "hi__DIRECT", "hi@collation=direct" },
+  { "ja_JP_TRADITIONAL", "ja_JP@calendar=japanese" },
+  { "th_TH_TRADITIONAL", "th_TH@calendar=buddhist" },
+  { "zh_TW_STROKE", "zh_TW@collation=stroke" },
+  { "zh__PINYIN", "zh@collation=pinyin" }
+};
+
+
 U_CAPI int32_t  U_EXPORT2
-uloc_getName(const char* localeID,
+uloc_getNameInternal(const char* localeID,
             char* name,
             int32_t nameCapacity,
+             UBool stripKeywords,
+             UBool canonizeICU,
             UErrorCode* err)  
 {
-    int32_t i, fieldCount;
+    int32_t i, j, fieldCount;
+    UBool alreadyAddedAKeyword = FALSE;

    if(err==NULL || U_FAILURE(*err)) {
        return 0;
@ -1095,34 +1129,77 @@ uloc_getName(const char* localeID,
                }
                ++i;
                i+=_getVariant(localeID+1, *localeID, name+i, nameCapacity-i);
+                if(canonizeICU) {
+                  /* there is a variant. Maybe it's a variant that should be expressed as a keyword */
+                  for(j = 0; j < sizeof(variantsToKeywords)/sizeof(variantsToKeywords[0]); j++) {
+                    if(nameCapacity > i && uprv_strncmp(name, variantsToKeywords[j].variant, i) == 0) {
+                      int32_t nameLen = 0;
+                      if(stripKeywords) {
+                        nameLen = uprv_strchr(variantsToKeywords[j].name, '@') - variantsToKeywords[j].name;
+                      } else {
+                        nameLen = uprv_strlen(variantsToKeywords[j].name);
+                      }
+                      if(nameCapacity > nameLen) {
+                        uprv_strncpy(name, variantsToKeywords[j].name, nameLen);
+                        name[nameLen] = 0;
+                      }
+                      i = nameLen;
+                      alreadyAddedAKeyword = TRUE;
+                    }
+                  }
+                }
            }
        }
    }

-    /* if we do not have a variant tag yet then try a POSIX variant after '@' */
-    if((localeID=uprv_strrchr(localeID, '@'))!=NULL) {
-      const char *keywordIndicator = uprv_strchr(localeID, ULOC_KEYWORD_ASSIGN);
-      const char *separatorIndicator = uprv_strchr(localeID, ULOC_KEYWORD_ITEM_SEPARATOR);
-      if(keywordIndicator && (!separatorIndicator || separatorIndicator > keywordIndicator)) {
-        if(i<nameCapacity) {
-            name[i]='@';
-        }
-        ++i;
-        ++fieldCount;
-        i += locale_getKeywords(localeID+1, '@', name+i, nameCapacity-i, NULL, 0, NULL, TRUE, err);
-      } else if(fieldCount < 2) {
-        do {
-            if(i<nameCapacity) {
-                name[i]='_';
+    if(!stripKeywords) {
+      /* if we do not have a variant tag yet then try a POSIX variant after '@' */
+      if((localeID=uprv_strrchr(localeID, '@'))!=NULL) {
+        const char *keywordIndicator = uprv_strchr(localeID, ULOC_KEYWORD_ASSIGN);
+        const char *separatorIndicator = uprv_strchr(localeID, ULOC_KEYWORD_ITEM_SEPARATOR);
+        if(keywordIndicator && (!separatorIndicator || separatorIndicator > keywordIndicator)) {
+          if(i<nameCapacity) {
+            if(alreadyAddedAKeyword) {
+              name[i]=';';
+            } else {
+              name[i]='@';
            }
-            ++i;
-            ++fieldCount;
-        } while(fieldCount<2);
-        i+=_getVariant(localeID+1, '@', name+i, nameCapacity-i);
+          }
+          ++i;
+          ++fieldCount;
+          i += locale_getKeywords(localeID+1, '@', name+i, nameCapacity-i, NULL, 0, NULL, TRUE, err);
+        } else if(fieldCount < 2) {
+          do {
+              if(i<nameCapacity) {
+                  name[i]='_';
+              }
+              ++i;
+              ++fieldCount;
+          } while(fieldCount<2);
+          i+=_getVariant(localeID+1, '@', name+i, nameCapacity-i);
+        }
      }
    }
    return u_terminateChars(name, nameCapacity, i, err);
 }
+
+U_CAPI int32_t  U_EXPORT2
+uloc_getName(const char* localeID,
+             char* name,
+             int32_t nameCapacity,
+             UErrorCode* err)  
+{
+  return uloc_getNameInternal(localeID, name, nameCapacity, FALSE, FALSE, err);
+}
+
+U_CAPI int32_t  U_EXPORT2
+uloc_getNameNoKeywords(const char* localeID,
+             char* name,
+             int32_t nameCapacity,
+             UErrorCode* err)  
+{
+  return uloc_getNameInternal(localeID, name, nameCapacity, TRUE, FALSE, err);
+}
       
 U_CAPI const char*  U_EXPORT2
 uloc_getISO3Language(const char* localeID) 
--- a/icu4c/source/common/unicode/uloc.h
+++ b/icu4c/source/common/unicode/uloc.h
@ -662,6 +662,31 @@ uloc_getParent(const char*    localeID,
 */
 #define ULOC_KEYWORD_ITEM_SEPARATOR ';'

+/**
+ * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the string to
+ * a certain extent. Upper and lower case are set as needed,
+ * and if the components were in 'POSIX' format they are changed to
+ * ICU format.  It does NOT map aliased names in any way.
+ * See the top of this header file.
+ * This API strips off the keyword part, so "de_DE@collation=phonebook" 
+ * will become "de_DE"
+ *
+ * @param localeID the locale to get the full name with
+ * @param name the full name for localeID
+ * @param nameCapacity the size of the name buffer to store the 
+ * full name with
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name.  If it's greater 
+ * than nameCapacity, the returned full name will be truncated.  
+ * @draft ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getNameNoKeywords(const char*    localeID,
+         char* name,
+         int32_t nameCapacity,
+         UErrorCode* err);
+
 /**
 * Gets an enumeration of keywords for the specified locale. Enumeration
 * must get disposed of by the client using uenum_close function.
@ -692,6 +717,16 @@ uloc_getKeywordValue(const char* localeID,
                     char* buffer, int32_t bufferCapacity,
                     UErrorCode* status);

+/** structure for going from variants to keywords 
+ *  @draft ICU 2.8
+ */
+typedef struct keywordConv {
+  char variant[25];
+  char name[25];
+} keywordConv;
+
+
+
 /*eof*/


--- a/icu4c/source/test/cintltst/cloctst.c
+++ b/icu4c/source/test/cintltst/cloctst.c
@ -187,6 +187,7 @@ void addLocaleTest(TestNode** root)
    /*addTest(root, &MoreVariants,             "tsutil/cloctst/MoreVariants");*/
    addTest(root, &TestKeywordVariants,      "tsutil/cloctst/TestKeywordVariants");
    addTest(root, &TestKeywordVariantParsing,"tsutil/cloctst/TestKeywordVariantParsing");
+    addTest(root, &TestVariantCanonization,  "tsutil/cloctst/TestVariantCanonization");
 }


@ -2117,17 +2118,19 @@ static void TestKeywordVariants(void)
  struct {
    const char *localeID;
    const char *expectedLocaleID;
+    const char *expectedLocaleIDNoKeywords;
    const char *expectedKeywords[10];
    int32_t numKeywords;
    UErrorCode expectedStatus;
  } testCases[] = {
    { "de_DE@  currency = euro; C o ll A t i o n   = Phonebook   ; C alen dar = budhist   ", 
      "de_DE@c alen dar=budhist;c o ll a t i o n=Phonebook;currency=euro", 
+      "de_DE",
    { "c alen dar", "c o ll a t i o n", "currency"},
      3,
      U_ZERO_ERROR
    },
-    { "de_DE@euro", "de_DE_EURO", {""}, 0, U_INVALID_FORMAT_ERROR},
+    { "de_DE@euro", "de_DE_EURO", "de_DE", {""}, 0, U_INVALID_FORMAT_ERROR},
    /*{ "de_DE@euro;collation=phonebook", "", "", U_INVALID_FORMAT_ERROR}*/
  };
  UErrorCode status = U_ZERO_ERROR;
@ -2202,6 +2205,68 @@ static void TestKeywordVariantParsing(void)
        testCases[i].expectedValue, testCases[i].localeID, testCases[i].keyword, buffer);
    }
  }
-
-
 }
+
+static void TestVariantCanonization(void)
+{
+  struct {
+    const char *localeID;
+    const char *expectedValue;
+    const char *expectedValueNoKeywords;
+  } testCases[] = {
+    { "ca_ES_PREEURO", "ca_ES@currency=ESP", "ca_ES"},
+    { "de_AT_PREEURO", "de_AT@currency=ATS", "de_AT" },
+    { "de_DE_PREEURO", "de_DE@currency=DEM", "de_DE" },
+    { "de_LU_PREEURO", "de_LU@currency=EUR", "de_LU" },
+    { "el_GR_PREEURO", "el_GR@currency=GRD", "el_GR" },
+    { "en_BE_PREEURO", "en_BE@currency=BEF", "en_BE" },
+    { "en_IE_PREEURO", "en_IE@currency=IEP", "en_IE" },
+    { "es_ES_PREEURO", "es_ES@currency=ESP", "es_ES" },
+    { "eu_ES_PREEURO", "eu_ES@currency=ESP", "eu_ES" },
+    { "fi_FI_PREEURO", "fi_FI@currency=FIM", "fi_FI" },
+    { "fr_BE_PREEURO", "fr_BE@currency=BEF", "fr_BE" },
+    { "fr_FR_PREEURO", "fr_FR@currency=FRF", "fr_FR" },
+    { "fr_LU_PREEURO", "fr_LU@currency=LUF", "fr_LU" },
+    { "ga_IE_PREEURO", "ga_IE@currency=IEP", "ga_IE" },
+    { "gl_ES_PREEURO", "gl_ES@currency=ESP", "gl_ES" },
+    { "it_IT_PREEURO", "it_IT@currency=ITL", "it_IT" },
+    { "nl_BE_PREEURO", "nl_BE@currency=BEF", "nl_BE" },
+    { "nl_NL_PREEURO", "nl_NL@currency=NLG", "nl_NL" },
+    { "pt_PT_PREEURO", "pt_PT@currency=PTE", "pt_PT" },
+    { "de__PHONEBOOK", "de@collation=phonebook", "de" },
+    { "en_GB_EURO", "en_GB@currency=EUR", "en_GB" },
+    { "es__TRADITIONAL", "es@collation=traditional", "es" },
+    { "hi__DIRECT", "hi@collation=direct", "hi" },
+    { "ja_JP_TRADITIONAL", "ja_JP@calendar=japanese", "ja_JP" },
+    { "th_TH_TRADITIONAL", "th_TH@calendar=buddhist", "th_TH" },
+    { "zh_TW_STROKE", "zh_TW@collation=stroke", "zh_TW" },
+    { "zh__PINYIN", "zh@collation=pinyin", "zh" },
+    { "en_US_POSIX", "en_US_POSIX", "en_US_POSIX" }, 
+    { "hy_AM_REVISED", "hy_AM_REVISED", "hy_AM_REVISED" }, 
+    { "no_NO_NY", "no_NO_NY", "no_NO_NY" }
+  };
+
+  UErrorCode status = U_ZERO_ERROR;
+
+  int32_t i = 0;
+  int32_t resultLen = 0;
+  char buffer[256];
+  /* this test should be enabled if we decide to do canonization of variants using uloc_getName */
+  /* otherwise, use this data to test another API */
+  return;
+
+  for(i = 0; i < sizeof(testCases)/sizeof(testCases[0]); i++) {
+    *buffer = 0;
+    resultLen = uloc_getName(testCases[i].localeID, buffer, 256, &status);
+    if(uprv_strcmp(testCases[i].expectedValue, buffer) != 0) {
+      log_err("Expected to get \"%s\" from \"%s\". Got \"%s\" instead\n",
+        testCases[i].expectedValue, testCases[i].localeID, buffer);
+    }
+    resultLen = uloc_getNameNoKeywords(testCases[i].localeID, buffer, 256, &status);
+    if(uprv_strcmp(testCases[i].expectedValueNoKeywords, buffer) != 0) {
+      log_err("Expected to get \"%s\" from \"%s\". Got \"%s\" instead\n",
+        testCases[i].expectedValueNoKeywords, testCases[i].localeID, buffer);
+    }
+  }
+}
+
--- a/icu4c/source/test/cintltst/cloctst.h
+++ b/icu4c/source/test/cintltst/cloctst.h
@ -81,6 +81,8 @@ static  void TestDisplayNames(void);
 */
 static void doTestDisplayNames(const char* inLocale, int32_t compareIndex);

+static void TestVariantCanonization(void);
+
 /**
 * additional intialization for datatables storing expected values
 */