ICU-899 change uscript_getCode() API to accept fillin buffer and capacity

X-SVN-Rev: 6389
2025-04-15 09:45:26 +00:00 · 2001-10-23 02:00:50 +00:00 · 2001-10-23 02:00:50 +00:00 · 5a3494cf54
commit 5a3494cf54
parent f7a2890b94
5 changed files with 145 additions and 80 deletions
--- a/icu4c/source/common/unicode/uscript.h
+++ b/icu4c/source/common/unicode/uscript.h
@ -67,16 +67,20 @@ typedef enum UScriptCode {


 /**
- * Gets a script code associated with the given locale or ISO 15924 abbreviation or name. 
- * Returns USCRIPT_MALAYAM given "Malayam" OR "Mlym".
- * Returns USCRIPT_LATIN given "en" OR "en_US" 
+ * Gets script codes associated with the given locale or ISO 15924 abbreviation or name. 
+ * Fills in USCRIPT_MALAYAM given "Malayam" OR "Mlym".
+ * Fills in USCRIPT_LATIN given "en" OR "en_US" 
+ * If required capacity is greater than capacity of the destination buffer then the error code
+ * is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned
 * @param nameOrAbbrOrLocale name of the script or ISO 15924 code or locale
+ * @param fillIn the UScriptCode buffer to fill in the script code
+ * @param capacity the capacity (size) fo UScriptCode buffer passed in.
 * @param err the error status code.
- * @return The UScriptCode 
+ * @return The number of script codes filled in the buffer passed in 
 * @draft
 */
-U_CAPI UScriptCode 
-uscript_getCode(const char* nameOrAbbrOrLocale,UErrorCode *err);
+U_CAPI int32_t 
+uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err);

 /**
 * Gets a script name associated with the given script code. 
--- a/icu4c/source/common/uscript.c
+++ b/icu4c/source/common/uscript.c
@ -210,7 +210,7 @@ static const UScriptCode scriptAbbrCodes[] = {
        USCRIPT_SYRIAC     ,
      /*  USCRIPT_SYRIAC     , */
      /*  USCRIPT_SYRIAC     , */
-        USCRIPT_SYRIAC     ,
+      /*  USCRIPT_SYRIAC     , */
        USCRIPT_TAMIL      ,
        USCRIPT_TELUGU     ,
        USCRIPT_THAANA     ,
@ -768,48 +768,77 @@ findCodeIndex(const UScriptCode unsorted[], const UScriptCode target, int size){
    return -1;
 }

-U_CAPI UScriptCode 
-uscript_getCode(const char* nameOrAbbrOrLocale, UErrorCode* err){
+U_CAPI int32_t 
+uscript_getCode(const char* nameOrAbbrOrLocale,
+                UScriptCode* fillIn,
+                int32_t capacity,
+                UErrorCode* err){
+
    UScriptCode code = USCRIPT_INVALID_CODE;
    int strIndex=0;
-
+    int32_t numFilled=0;
+    int32_t len=0;
    /* check arguments */
    if(U_FAILURE(*err)){
-        return code;
+        return numFilled;
    }
    /* try the Names array first */
    strIndex = findStringIndex(scriptNames, nameOrAbbrOrLocale, USCRIPT_NAMES_ARRAY_SIZE);
    
    if(strIndex>=0 && strIndex < USCRIPT_NAMES_ARRAY_SIZE){ 
        code = (UScriptCode) scriptNameCodes[strIndex];
+        len = 1;
    }
    /* we did not find in names array so try abbr array*/
    if(code ==USCRIPT_INVALID_CODE){
        strIndex = findStringIndex(scriptAbbr, nameOrAbbrOrLocale, USCRIPT_ABBR_ARRAY_SIZE);
        if(strIndex>=0 && strIndex < USCRIPT_NAMES_ARRAY_SIZE){ 
            code = (UScriptCode) scriptAbbrCodes[strIndex];
+            len = 1;
        }
    }
+
    /* we still haven't found it try locale */
    if(code==USCRIPT_INVALID_CODE){
        UResourceBundle* resB = ures_open(u_getDataDirectory(),nameOrAbbrOrLocale,err);
        if(U_SUCCESS(*err)&& *err != U_USING_DEFAULT_ERROR){
-            int32_t len=0;
            UResourceBundle* resD = ures_getByKey(resB,kLocaleScript,NULL,err);
            int index =0;
-            const UChar* name = ures_getStringByIndex(resD,0,&len,err);
            if(U_SUCCESS(*err) ){
-                char cName[50] = {'\0'};
-                u_UCharsToChars(name,cName,len);
-                index = findStringIndex(scriptNames, cName, USCRIPT_NAMES_ARRAY_SIZE);
-                code = (UScriptCode) scriptNameCodes[index];
+                len =0;
+                while(ures_hasNext(resD)){
+                    const UChar* name = ures_getNextString(resD,&len,NULL,err);
+                    if(U_SUCCESS(*err)){
+                        char cName[50] = {'\0'};
+                        u_UCharsToChars(name,cName,len);
+                        index = findStringIndex(scriptAbbr, cName, USCRIPT_NAMES_ARRAY_SIZE);
+                        code = (UScriptCode) scriptAbbrCodes[index];
+                        /* got the script code now fill in the buffer */
+                        if(numFilled<=capacity){ 
+                            *(fillIn)++=code;
+                            numFilled++;
+                        }else{
+                            *err=U_BUFFER_OVERFLOW_ERROR;
+                            return len;
+                        }
+                    }
+                }
            }
            ures_close(resD);
        
        }
        ures_close(resB);
+    }else{
+        /* we found it */
+        if(numFilled<=capacity){ 
+            *(fillIn)++=code;
+            numFilled++;
+        }else{
+            *err=U_BUFFER_OVERFLOW_ERROR;
+            return len;
+        }
    }
-    return code;
+    return numFilled;
 }

 U_CAPI const char* 
--- a/icu4c/source/i18n/transreg.cpp
+++ b/icu4c/source/i18n/transreg.cpp
@ -169,10 +169,10 @@ Spec::Spec(const UnicodeString& theSpec) : top(theSpec) {
    // Canonicalize script name -or- do locale->script mapping
    status = U_ZERO_ERROR;
    CharString spc(top);
-    UScriptCode s = uscript_getCode(spc, &status);
-    if (s != USCRIPT_INVALID_CODE) {
-        scriptName = UnicodeString(uscript_getName(s), "");
-    }
+    const int32_t capacity = 10;
+    UScriptCode script[capacity]={USCRIPT_INVALID_CODE};
+    int32_t num = uscript_getCode(spc,script,capacity, &status);
+

    // Canonicalize top
    char buf[256];
--- a/icu4c/source/i18n/upropset.cpp
+++ b/icu4c/source/i18n/upropset.cpp
@ -4,8 +4,8 @@
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 * $Source: /xsrl/Nsvn/icu/icu/source/i18n/Attic/upropset.cpp,v $
-* $Date: 2001/10/17 19:20:41 $
-* $Revision: 1.1 $
+* $Date: 2001/10/23 02:00:50 $
+* $Revision: 1.2 $
 **********************************************************************
 */
 #include "upropset.h"
@ -272,12 +272,15 @@ UnicodeSet* UnicodePropertySet::createCategorySet(const UnicodeString& valueName
 UnicodeSet* UnicodePropertySet::createScriptSet(const UnicodeString& valueName) {
    _CharString cvalueName(valueName);
    UErrorCode ec = U_ZERO_ERROR;
-    UScriptCode script = uscript_getCode(cvalueName, &ec);
-    if (script == USCRIPT_INVALID_CODE || U_FAILURE(ec)) {
+    const int32_t capacity = 10;
+    UScriptCode script[capacity]={USCRIPT_INVALID_CODE};
+    int32_t num = uscript_getCode(cvalueName,script,capacity, &ec);
+
+    if (script[0] == USCRIPT_INVALID_CODE || U_FAILURE(ec)) {
        // Syntax error; unknown short name
        return NULL;
    }
-    return new UnicodeSet(getScriptSet(script));
+    return new UnicodeSet(getScriptSet(script[0]));
 }

 //----------------------------------------------------------------
--- a/icu4c/source/test/cintltst/cucdtst.c
+++ b/icu4c/source/test/cintltst/cucdtst.c
@ -1778,64 +1778,93 @@ TestCaseCompare() {
 }

 static void TestUScriptCodeAPI(){
-    const char* testNames[]={
-    /* test locale */
-    "en", "en_US", "sr", "ta" , "te_IN",
-    "hi", "he", "ar",
-    /* test abbr */
-    "Hani", "Hang","Hebr","Hira",
-    "Knda","Kana","Khmr","Lao",
-    "Latn",/*"Latf","Latg",*/ 
-    "Mlym", "Mong",
-    
-    /* test names */
-    "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN", 
-    "GOTHIC",  "GREEK",  "GUJARATI", 
-    /* test lower case names */
-    "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
-    "oriya",     "runic",     "sinhala", "syriac","tamil",     
-    "telugu",    "thaana",    "thai",    "tibetan", 
-    /* test the bounds*/
-    "ucas", "arabic",
-    /* test bogus */
-    "asfdasd", "5464", "12235",
-    '\0'  
-    };
-    UScriptCode expected[] ={
-        /* locales should return */
-        USCRIPT_LATIN, USCRIPT_LATIN, USCRIPT_CYRILLIC, USCRIPT_TAMIL, USCRIPT_TELUGU, 
-        USCRIPT_DEVANAGARI, USCRIPT_HEBREW, USCRIPT_ARABIC,
-        /* abbr should return */
-        USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
-        USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
-        USCRIPT_LATIN,/* USCRIPT_LATIN, USCRIPT_LATIN,*/ 
-        USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
-        /* names should return */
-        USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
-        USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI,
-        /* lower case names should return */    
-        USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN, USCRIPT_MYANMAR, USCRIPT_OGHAM, USCRIPT_OLD_ITALIC,
-        USCRIPT_ORIYA, USCRIPT_RUNIC, USCRIPT_SINHALA, USCRIPT_SYRIAC, USCRIPT_TAMIL,
-        USCRIPT_TELUGU, USCRIPT_THAANA, USCRIPT_THAI, USCRIPT_TIBETAN,
-        /* bounds */
-        USCRIPT_UCAS, USCRIPT_ARABIC,
-        /* bogus names should return invalid code */
-        USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE,
-    };
    int i =0;
    int numErrors =0;
-    UErrorCode err = U_ZERO_ERROR;
-    for( ; testNames[i]!='\0'; i++){
-        UScriptCode code = uscript_getCode(testNames[i],&err);
-        if( code != expected[i]){
-               log_verbose("Error getting script code Got: %i  Expected: %i for name %s\n",
-                   code,expected[i],testNames[i]);
-               numErrors++;
+    {
+        const char* testNames[]={
+        /* test locale */
+        "en", "en_US", "sr", "ta" , "te_IN",
+        "hi", "he", "ar",
+        /* test abbr */
+        "Hani", "Hang","Hebr","Hira",
+        "Knda","Kana","Khmr","Lao",
+        "Latn",/*"Latf","Latg",*/ 
+        "Mlym", "Mong",
+    
+        /* test names */
+        "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN", 
+        "GOTHIC",  "GREEK",  "GUJARATI", 
+        /* test lower case names */
+        "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
+        "oriya",     "runic",     "sinhala", "syriac","tamil",     
+        "telugu",    "thaana",    "thai",    "tibetan", 
+        /* test the bounds*/
+        "ucas", "arabic",
+        /* test bogus */
+        "asfdasd", "5464", "12235",
+        '\0'  
+        };
+        UScriptCode expected[] ={
+            /* locales should return */
+            USCRIPT_LATIN, USCRIPT_LATIN, USCRIPT_CYRILLIC, USCRIPT_TAMIL, USCRIPT_TELUGU, 
+            USCRIPT_DEVANAGARI, USCRIPT_HEBREW, USCRIPT_ARABIC,
+            /* abbr should return */
+            USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
+            USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
+            USCRIPT_LATIN,/* USCRIPT_LATIN, USCRIPT_LATIN,*/ 
+            USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
+            /* names should return */
+            USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
+            USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI,
+            /* lower case names should return */    
+            USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN, USCRIPT_MYANMAR, USCRIPT_OGHAM, USCRIPT_OLD_ITALIC,
+            USCRIPT_ORIYA, USCRIPT_RUNIC, USCRIPT_SINHALA, USCRIPT_SYRIAC, USCRIPT_TAMIL,
+            USCRIPT_TELUGU, USCRIPT_THAANA, USCRIPT_THAI, USCRIPT_TIBETAN,
+            /* bounds */
+            USCRIPT_UCAS, USCRIPT_ARABIC,
+            /* bogus names should return invalid code */
+            USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE,
+        };
+
+        UErrorCode err = U_ZERO_ERROR;
+
+        const int32_t capacity = 10;
+
+        for( ; testNames[i]!='\0'; i++){
+            UScriptCode script[10]={USCRIPT_INVALID_CODE};
+            int32_t num = uscript_getCode(testNames[i],script,capacity, &err);
+            if( script[0] != expected[i]){
+                   log_verbose("Error getting script code Got: %i  Expected: %i for name %s\n",
+                       script[0],expected[i],testNames[i]);
+                   numErrors++;
+            }
+        }
+        if(numErrors >0 ){
+            log_err("Errors uchar_getScriptCode() : %i \n",numErrors);
        }
    }
-    if(numErrors >0 ){
-        log_err("Errors uchar_getScriptCode() : %i \n",numErrors);
+    {
+        UErrorCode err = U_ZERO_ERROR;
+        int32_t capacity=0;
+        UScriptCode jaCode[2]={ USCRIPT_KATAKANA,USCRIPT_HIRAGANA};
+        UScriptCode script[10]={USCRIPT_INVALID_CODE};
+        int32_t num = uscript_getCode("ja",script,capacity, &err);
+        /* preflight */
+        if(err==U_BUFFER_OVERFLOW_ERROR){
+            err = U_ZERO_ERROR;
+            capacity = 10;
+            num = uscript_getCode("ja",script,capacity, &err);
+            if(num!=(sizeof(jaCode)/sizeof(UScriptCode)) || script[0]!=jaCode[0] || script[1]!=jaCode[1]){
+                log_err("Errors uscript_getScriptCode() for Japaneese locale \n");
+            }
+        }else{
+            log_err("Errors in uscript_getScriptCode() expected error : %s got: %s \n", 
+                "U_BUFFER_OVERFLOW_ERROR",
+                 u_errorName(err));
+        }
+
    }
+
    {
        UScriptCode testAbbr[]={
            /* names should return */