From 31ebf423236191eea40154d82f666df163e8bbe8 Mon Sep 17 00:00:00 2001 From: George Rhoten Date: Wed, 30 Oct 2002 22:52:08 +0000 Subject: [PATCH] ICU-1977 add additional Converter Alias functions get all converters get the canonical names from a tagged alias X-SVN-Rev: 10121 --- icu4c/source/common/ucnv_io.c | 184 ++++++++++++++++++++++---- icu4c/source/common/ucnv_io.h | 1 + icu4c/source/common/unicode/ucnv.h | 28 ++++ icu4c/source/test/cintltst/ccapitst.c | 20 ++- icu4c/source/test/cintltst/stdnmtst.c | 56 +++++++- 5 files changed, 260 insertions(+), 29 deletions(-) diff --git a/icu4c/source/common/ucnv_io.c b/icu4c/source/common/ucnv_io.c index feaa86d7d0b..cb53e203946 100644 --- a/icu4c/source/common/ucnv_io.c +++ b/icu4c/source/common/ucnv_io.c @@ -405,7 +405,7 @@ ucnv_compareNames(const char *name1, const char *name2) { * search for an alias * return the converter number index for gConverterList */ -static uint32_t +static U_INLINE uint32_t findConverter(const char *alias, UErrorCode *pErrorCode) { uint32_t mid, start, limit; uint32_t lastMid; @@ -444,6 +444,28 @@ findConverter(const char *alias, UErrorCode *pErrorCode) { return UINT32_MAX; } +/* + * Is this alias in this list? + * alias and listOffset should be non-NULL. + */ +static U_INLINE UBool +isAliasInList(const char *alias, uint32_t listOffset) { + if (listOffset) { + uint32_t currAlias; + uint32_t listCount = gTaggedAliasLists[listOffset]; + /* +1 to skip listCount */ + const uint16_t *currList = gTaggedAliasLists + listOffset + 1; + for (currAlias = 0; currAlias < listCount; currAlias++) { + if (currList[currAlias] + && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0) + { + return TRUE; + } + } + } + return FALSE; +} + /* * Search for an standard name of an alias (what is the default name * that this standard uses?) @@ -464,7 +486,7 @@ findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode * *pErrorCode = myErr; } - if (tagNum < (gTagListSize - UCNV_NUM_RESERVED_TAGS) && convNum < gConverterListSize) { + if (tagNum < (gTagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gConverterListSize) { listOffset = gTaggedAliasArray[tagNum*gConverterListSize + convNum]; if (listOffset && gTaggedAliasLists[listOffset + 1]) { return listOffset; @@ -477,28 +499,17 @@ findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode * */ for (idx = 0; idx < gTaggedAliasArraySize; idx++) { listOffset = gTaggedAliasArray[idx]; - if (listOffset) { - uint32_t currAlias; - uint32_t listCount = gTaggedAliasLists[listOffset]; - /* +1 to skip listCount */ - const uint16_t *currList = gTaggedAliasLists + listOffset + 1; - for (currAlias = 0; currAlias < listCount; currAlias++) { - if (currList[currAlias] - && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0) - { - /*return listOffset;*/ - uint32_t currTagNum = idx/gConverterListSize; - uint32_t currConvNum = (idx - currTagNum*gConverterListSize); - uint32_t tempListOffset = gTaggedAliasArray[tagNum*gConverterListSize + currConvNum]; - if (tempListOffset && gTaggedAliasLists[tempListOffset + 1]) { - return tempListOffset; - } - /* else keep on looking */ - /* We could speed this up by starting on the next row - because an alias is unique per row, right now. - This would change if alias versioning appears. */ - } + if (listOffset && isAliasInList(alias, listOffset)) { + uint32_t currTagNum = idx/gConverterListSize; + uint32_t currConvNum = (idx - currTagNum*gConverterListSize); + uint32_t tempListOffset = gTaggedAliasArray[tagNum*gConverterListSize + currConvNum]; + if (tempListOffset && gTaggedAliasLists[tempListOffset + 1]) { + return tempListOffset; } + /* else keep on looking */ + /* We could speed this up by starting on the next row + because an alias is unique per row, right now. + This would change if alias versioning appears. */ } } /* The standard doesn't know about the alias */ @@ -511,6 +522,51 @@ findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode * return UINT32_MAX; } +/* Return the canonical name */ +static uint32_t +findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) { + uint32_t idx; + uint32_t listOffset; + uint32_t convNum; + UErrorCode myErr = U_ZERO_ERROR; + uint32_t tagNum = getTagNumber(standard); + + /* Make a quick guess. Hopefully they used a TR22 canonical alias. */ + convNum = findConverter(alias, &myErr); + if (myErr != U_ZERO_ERROR) { + *pErrorCode = myErr; + } + + if (tagNum < (gTagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gConverterListSize) { + listOffset = gTaggedAliasArray[tagNum*gConverterListSize + convNum]; + if (listOffset && isAliasInList(alias, listOffset)) { + return convNum; + } + if (myErr == U_AMBIGUOUS_ALIAS_WARNING) { + /* Uh Oh! They used an ambiguous alias. + We have to search one slice of the swiss cheese. + We search only in the requested tag, not the whole thing. + This may take a while. + */ + uint32_t convStart = (tagNum)*gConverterListSize; + uint32_t convLimit = (tagNum+1)*gConverterListSize; + for (idx = convStart; idx < convLimit; idx++) { + listOffset = gTaggedAliasArray[idx]; + if (listOffset && isAliasInList(alias, listOffset)) { + return convNum; + } + } + /* The standard doesn't know about the alias */ + } + /* else no canonical name */ + } + /* else converter or tag not found */ + + return UINT32_MAX; +} + + + U_CFUNC const char * ucnv_io_getConverterName(const char *alias, UErrorCode *pErrorCode) { if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { @@ -609,9 +665,9 @@ ucnv_openStandardNames(const char *convName, uprv_free(myEnum); return NULL; } - myEnum->context = myContext; myContext->listOffset = listOffset; myContext->listIdx = 0; + myEnum->context = myContext; } /* else converter or tag not found */ } @@ -690,7 +746,7 @@ U_CFUNC uint16_t ucnv_io_countStandards(UErrorCode *pErrorCode) { if (haveAliasData(pErrorCode)) { /* Don't include the empty list */ - return (uint16_t)(gTagListSize - UCNV_NUM_RESERVED_TAGS); + return (uint16_t)(gTagListSize - UCNV_NUM_HIDDEN_TAGS); } return 0; @@ -699,7 +755,7 @@ ucnv_io_countStandards(UErrorCode *pErrorCode) { U_CAPI const char * U_EXPORT2 ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) { if (haveAliasData(pErrorCode)) { - if (n < gTagListSize - UCNV_NUM_RESERVED_TAGS) { + if (n < gTagListSize - UCNV_NUM_HIDDEN_TAGS) { return GET_STRING(gTagList[n]); } *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; @@ -728,6 +784,19 @@ ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pError return NULL; } +U_CAPI const char * U_EXPORT2 +ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) { + if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { + uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode); + + if (convNum < gConverterListSize) { + return GET_STRING(gConverterList[convNum]); + } + } + + return NULL; +} + void ucnv_io_flushAvailableConverterCache() { if (gAvailableConverters) { @@ -801,6 +870,69 @@ ucnv_io_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) { return NULL; } +static int32_t U_CALLCONV +ucnv_io_countAllConverters(UEnumeration *enumerator, UErrorCode *pErrorCode) { + return gConverterListSize; +} + +static const char* U_CALLCONV +ucnv_io_nextAllConverters(UEnumeration *enumerator, + int32_t* resultLength, + UErrorCode *pErrorCode) +{ + uint16_t *myContext = (uint16_t *)(enumerator->context); + + if (*myContext < gConverterListSize) { + const char *myStr = GET_STRING(gConverterList[(*myContext)++]); + if (resultLength) { + *resultLength = uprv_strlen(myStr); + } + return myStr; + } + /* Either we accessed a zero length list, or we enumerated too far. */ + *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return NULL; +} + +static void U_CALLCONV +ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode *pErrorCode) { + *((uint16_t *)(enumerator->context)) = 0; +} + +static const UEnumeration gEnumAllConverters = { + NULL, + NULL, + ucnv_io_closeUEnumeration, + ucnv_io_countAllConverters, + uenum_unextDefault, + ucnv_io_nextAllConverters, + ucnv_io_resetAllConverters +}; + +U_CAPI UEnumeration * U_EXPORT2 +ucnv_openAllNames(UErrorCode *pErrorCode) { + UEnumeration *myEnum = NULL; + if (haveAliasData(pErrorCode)) { + uint16_t *myContext; + + myEnum = uprv_malloc(sizeof(UEnumeration)); + if (myEnum == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration)); + myContext = uprv_malloc(sizeof(uint16_t)); + if (myContext == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + uprv_free(myEnum); + return NULL; + } + *myContext = 0; + myEnum->context = myContext; + } + return myEnum; +} + U_CFUNC void ucnv_io_fillAvailableConverters(const char **aliases, UErrorCode *pErrorCode) { if (haveAvailableConverterList(pErrorCode)) { diff --git a/icu4c/source/common/ucnv_io.h b/icu4c/source/common/ucnv_io.h index 359120eae57..8a866425852 100644 --- a/icu4c/source/common/ucnv_io.h +++ b/icu4c/source/common/ucnv_io.h @@ -18,6 +18,7 @@ #define UCNV_AMBIGUOUS_ALIAS_MAP_BIT 0x8000 #define UCNV_CONVERTER_INDEX_MASK 0xFFF #define UCNV_NUM_RESERVED_TAGS 2 +#define UCNV_NUM_HIDDEN_TAGS 1 /** * Remove the underscores, dashes and spaces from the name, and convert diff --git a/icu4c/source/common/unicode/ucnv.h b/icu4c/source/common/unicode/ucnv.h index 8319d56a98c..795ff28bffb 100644 --- a/icu4c/source/common/unicode/ucnv.h +++ b/icu4c/source/common/unicode/ucnv.h @@ -1059,6 +1059,20 @@ ucnv_countAvailable (void); U_CAPI const char* U_EXPORT2 ucnv_getAvailableName (int32_t n); +/** + * Returns a UEnumeration to enumerate all of the canonical converter + * names, as per the alias file, regardless of the ability to open each + * converter. + * + * @return A UEnumeration object for getting all the recognized canonical + * converter names. + * @see ucnv_getAvailableName + * @see uenum_close + * @see uenum_next + */ +U_CAPI UEnumeration * U_EXPORT2 +ucnv_openAllNames(UErrorCode *pErrorCode); + /** * Gives the number of aliases for a given converter or alias name. * If the alias is ambiguous, then the preferred converter is used @@ -1164,6 +1178,20 @@ ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode); U_CAPI const char * U_EXPORT2 ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode); +/** + * This function will return the internal canonical converter name of the + * tagged alias. This is the opposite of ucnv_openStandardNames, which + * returns the tagged alias given the canonical name. + * + * @return returns the canonical converter name; + * if a standard or alias name cannot be determined, + * then NULL is returned. The returned string is + * owned by the library. + * @see ucnv_getStandardName + */ +U_CAPI const char * U_EXPORT2 +ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode); + /** * returns the current default converter name. * diff --git a/icu4c/source/test/cintltst/ccapitst.c b/icu4c/source/test/cintltst/ccapitst.c index 7a316856ad8..7cc483bb5f1 100644 --- a/icu4c/source/test/cintltst/ccapitst.c +++ b/icu4c/source/test/cintltst/ccapitst.c @@ -162,6 +162,8 @@ static void TestConvert() UConverterToUCallback oldToUAction = NULL; const void* oldFromUContext = NULL; const void* oldToUContext = NULL; + UEnumeration *allNamesEnum = NULL; + int32_t allNamesCount = 0; /* Allocate memory */ mytarget = (char*) malloc(MAX_FILE_LEN * sizeof(mytarget[0])); @@ -179,12 +181,28 @@ static void TestConvert() /*Calling all the UnicodeConverterCPP API and checking functionality*/ + log_verbose("Testing ucnv_openAllNames()..."); + allNamesEnum = ucnv_openAllNames(&err); + if(U_FAILURE(err)) { + log_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err)); + } + else { + const char *string = NULL; + int32_t len = 0; + allNamesCount = uenum_count(allNamesEnum, &err); + while ((string = uenum_next(allNamesEnum, &len, &err))) { + log_verbose("read \"%s\", length %i\n", string, len); + } + } + uenum_close(allNamesEnum); + err = U_ZERO_ERROR; + /*Tests ucnv_getAvailableName(), getAvialableCount()*/ log_verbose("Testing ucnv_countAvailable()..."); testLong1=ucnv_countAvailable(); - log_info("Number of available Codepages: %d\n", testLong1); + log_info("Number of available Codepages: %d/%d\n", testLong1, allNamesCount); log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */ diff --git a/icu4c/source/test/cintltst/stdnmtst.c b/icu4c/source/test/cintltst/stdnmtst.c index f8f29546ad9..919ab65376e 100644 --- a/icu4c/source/test/cintltst/stdnmtst.c +++ b/icu4c/source/test/cintltst/stdnmtst.c @@ -22,6 +22,7 @@ static void TestStandardName(void); static void TestStandardNames(void); +static void TestCanonicalName(void); void addStandardNamesTest(TestNode** root); @@ -31,6 +32,7 @@ addStandardNamesTest(TestNode** root) { addTest(root, &TestStandardName, "stdnmtst/TestStandardName"); addTest(root, &TestStandardNames, "stdnmtst/TestStandardNames"); + addTest(root, &TestCanonicalName, "stdnmtst/TestCanonicalName"); } static int dotestname(const char *name, const char *standard, const char *expected) { @@ -61,7 +63,7 @@ static void TestStandardName() /* Iterate over all standards. */ - for (i = 0, count = ucnv_countStandards(); i < count; ++i) { + for (i = 0, count = ucnv_countStandards(); i < count-1; ++i) { const char *standard; err = U_ZERO_ERROR; @@ -76,7 +78,13 @@ static void TestStandardName() } } err = U_ZERO_ERROR; - if (ucnv_getStandard(i, &err)) { + /* "" must be last */ + if (*ucnv_getStandard((uint16_t)(count-1), &err) != 0) { + log_err("FAIL: ucnv_getStandard(%d) should return ""\n", count-1); + res = 0; + } + err = U_ZERO_ERROR; + if (ucnv_getStandard(++i, &err)) { log_err("FAIL: ucnv_getStandard(%d) should return NULL\n", i); res = 0; } @@ -102,6 +110,50 @@ static void TestStandardName() } } +static int dotestconv(const char *name, const char *standard, const char *expected) { + int res = 1; + + UErrorCode error; + const char *tag; + + error = U_ZERO_ERROR; + tag = ucnv_getCanonicalName(name, standard, &error); + if (tag && !expected) { + log_err("FAIL: Unexpectedly found %s canonical name for %s, got %s\n", standard, name, tag); + res = 0; + } else if (!tag && expected) { + log_err("FAIL: could not find %s canonical name for %s\n", (standard ? "\"\"" : standard), name); + res = 0; + } else if (expected && (name == tag || uprv_strcmp(expected, tag))) { + log_err("FAIL: expected %s for %s canonical name for %s, got %s\n", expected, standard, name, tag); + res = 0; + } + + return res; +} + +static void TestCanonicalName() +{ + /* Test for some expected results. */ + + if (dotestconv("UTF-8", "IANA", "UTF-8") && /* default name */ + dotestconv("UTF-8", "MIME", "UTF-8") && /* default name */ + dotestconv("ibm-1208", "IBM", "UTF-8") && /* default name */ + dotestconv("ibm-5305", "IBM", "UTF-8") && /* non-default name */ + dotestconv("ibm-5305", "MIME", NULL) && /* mapping does not exist */ + dotestconv("ascii", "MIME", NULL) && /* mapping does not exist */ + dotestconv("ibm-1208", "IANA", NULL) && /* mapping does not exist */ + dotestconv("ibm-5305", "IANA", NULL) && /* mapping does not exist */ + dotestconv("cp1208", "", "UTF-8") && /* default name due to ordering */ + dotestconv("cp65001", "", "UTF-8") && /* non-default name due to ordering */ + dotestconv("ISO-2022", "MIME", "ISO_2022") &&/* default name */ + dotestconv("crazy", "MIME", NULL) && + dotestconv("ASCII", "crazy", NULL)) + { + log_verbose("PASS: getting IANA and MIME canonical names works\n"); + } +} + static UBool doTestNames(const char *name, const char *standard, const char **expected, int32_t size) { UErrorCode err = U_ZERO_ERROR;