diff --git a/icu4c/source/common/convert.cpp b/icu4c/source/common/convert.cpp index 1d42bffdb08..e36fe2edeb8 100644 --- a/icu4c/source/common/convert.cpp +++ b/icu4c/source/common/convert.cpp @@ -25,7 +25,9 @@ extern "C" { } #include "convert.h" - +/* list of converter and alias names */ +const char **UnicodeConverterCPP::availableConverterNames=NULL; +int32_t UnicodeConverterCPP::availableConverterNamesCount=0; UnicodeConverterCPP::UnicodeConverterCPP() { @@ -397,9 +399,21 @@ void UnicodeConverterCPP::getStarters(bool_t starters[256], const char* const* UnicodeConverterCPP::getAvailableNames(int32_t& num, UErrorCode& err) { - num = ucnv_countAvailable(); - return AVAILABLE_CONVERTERS_NAMES; - + if (availableConverterNames==NULL) { + availableConverterNamesCount = ucnv_io_countAvailableAliases(&err); + if (availableConverterNamesCount > 0) { + availableConverterNames = new const char *[availableConverterNamesCount]; + if (availableConverterNames != NULL) { + ucnv_io_fillAvailableAliases(availableConverterNames, &err); + } else { + num = 0; + err = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + } + } + num = availableConverterNamesCount; + return availableConverterNames; } int32_t UnicodeConverterCPP::flushCache() diff --git a/icu4c/source/common/convert.h b/icu4c/source/common/convert.h index d1382739d0c..c1a3bdaddbd 100644 --- a/icu4c/source/common/convert.h +++ b/icu4c/source/common/convert.h @@ -23,6 +23,10 @@ class U_COMMON_API UnicodeConverterCPP /*Debug method*/ void printRef(void) const; + /* list of converter and alias names */ + static const char **availableConverterNames; + static int32_t availableConverterNamesCount; + public: //Constructors and a destructor diff --git a/icu4c/source/common/ucnv.c b/icu4c/source/common/ucnv.c index a9a043ed648..3205dc1aaed 100644 --- a/icu4c/source/common/ucnv.c +++ b/icu4c/source/common/ucnv.c @@ -275,28 +275,24 @@ int32_t ucnv_flushCache () return tableDeletedNum; } -/*returns a single Name from the static list, will return NULL if out of bounds +/*returns a single Name from the list, will return NULL if out of bounds */ const char* ucnv_getAvailableName (int32_t index) { - UErrorCode err = U_ZERO_ERROR; - /*lazy evaluates the list of Available converters */ - if (AVAILABLE_CONVERTERS_NAMES == NULL) - setupAliasTableAndAvailableConverters (&err); - if (index > AVAILABLE_CONVERTERS) - return NULL; - else - return AVAILABLE_CONVERTERS_NAMES[index]; + if (0 <= index && index <= 0xffff) { + UErrorCode err = U_ZERO_ERROR; + const char *name = ucnv_io_getAvailableAlias((uint16_t)index, &err); + if (U_SUCCESS(err)) { + return name; + } + } + return NULL; } int32_t ucnv_countAvailable () { UErrorCode err = U_ZERO_ERROR; - /*lazy evaluates the list of Available converters */ - if (AVAILABLE_CONVERTERS_NAMES == NULL) - setupAliasTableAndAvailableConverters (&err); - - return AVAILABLE_CONVERTERS; + return ucnv_io_countAvailableAliases(&err); } void ucnv_getSubstChars (const UConverter * converter, diff --git a/icu4c/source/common/ucnv_bld.c b/icu4c/source/common/ucnv_bld.c index 14c2740e50c..19063984998 100644 --- a/icu4c/source/common/ucnv_bld.c +++ b/icu4c/source/common/ucnv_bld.c @@ -92,6 +92,70 @@ UHashtable *SHARED_DATA_HASHTABLE = NULL; UHashtable *ALGORITHMIC_CONVERTERS_HASHTABLE = NULL; +/*Returns uppercased string */ +char * + strtoupper (char *name) +{ + int32_t i = 0; + + while (name[i] = icu_toupper (name[i])) + i++; + + return name; +} + +/* Returns true in c is a in set 'setOfChars', false otherwise + */ +bool_t + isInSet (char c, const char *setOfChars) +{ + uint8_t i = 0; + + while (setOfChars[i] != '\0') + { + if (c == setOfChars[i++]) + return TRUE; + } + + return FALSE; +} + +/* Returns pointer to the next non-whitespace (or non-separator) + */ +int32_t + nextTokenOffset (const char *line, const char *separators) +{ + int32_t i = 0; + + while (line[i] && isInSet (line[i], separators)) + i++; + + return i; +} + +/* Returns pointer to the next token based on the set of separators + */ +char * + getToken (char *token, char *line, const char *separators) +{ + int32_t i = nextTokenOffset (line, separators); + int8_t j = 0; + + while (line[i] && (!isInSet (line[i], separators))) + token[j++] = line[i++]; + token[j] = '\0'; + + return line + i; +} + +int32_t uhash_hashIString(const void* name) +{ + char myName[UCNV_MAX_CONVERTER_NAME_LENGTH]; + icu_strcpy(myName, (char*)name); + strtoupper(myName); + + return uhash_hashString(myName); +} CompactShortArray* createCompactShortArrayFromFile (FileStream * infile, UErrorCode * err) { @@ -212,10 +276,17 @@ UConverter* createConverterFromFile (const char *fileName, UErrorCode * err) FileStream *infile = NULL; int8_t errorLevel = 0; char throwAway[UCNV_COPYRIGHT_STRING_LENGTH]; - if (U_FAILURE (*err)) - return NULL; + char actualFullFilenameName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; - infile = openConverterFile (fileName); + if (err == NULL || U_FAILURE (*err)) { + return NULL; + } + + icu_strcpy (actualFullFilenameName, u_getDataDirectory ()); + icu_strcat (actualFullFilenameName, fileName); + icu_strcat (actualFullFilenameName, CONVERTER_FILE_EXTENSION); + + infile = T_FileStream_open (actualFullFilenameName, "rb"); if (infile == NULL) { *err = U_FILE_ACCESS_ERROR; @@ -433,7 +504,7 @@ void shareConverterData (UConverterSharedData * data) if (SHARED_DATA_HASHTABLE == NULL) { UHashtable* myHT = uhash_openSize ((UHashFunction) uhash_hashSharedData, - AVAILABLE_CONVERTERS, + ucnv_io_countAvailableAliases(&err), &err); if (U_FAILURE (err)) return; umtx_lock (NULL); @@ -508,14 +579,6 @@ bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData) return TRUE; } -int32_t uhash_hashIString(const void* name) -{ - char myName[UCNV_MAX_CONVERTER_NAME_LENGTH]; - icu_strcpy(myName, (char*)name); - strtoupper(myName); - - return uhash_hashString(myName); -} bool_t isDataBasedConverter (const char *name) { int32_t i = 0; @@ -575,18 +638,22 @@ bool_t isDataBasedConverter (const char *name) UConverter * createConverter (const char *converterName, UErrorCode * err) { - char realName[UCNV_MAX_CONVERTER_NAME_LENGTH]; + const char *realName; UConverter *myUConverter = NULL; UConverterSharedData *mySharedConverterData = NULL; if (U_FAILURE (*err)) return NULL; - if (resolveName (realName, converterName) == FALSE) - { - *err = U_INVALID_TABLE_FILE; - return NULL; - } + realName = ucnv_io_getConverterName(converterName, err); + if (U_FAILURE(*err)) { + return NULL; + } + + if (realName == NULL) { + /* set the input name in case the converter was added without updating the alias table */ + realName = converterName; + } if (isDataBasedConverter (realName)) @@ -596,7 +663,7 @@ UConverter * if (mySharedConverterData == NULL) { /*Not cached, we need to stream it in from file */ - myUConverter = createConverterFromFile (converterName, err); + myUConverter = createConverterFromFile (realName, err); if (U_FAILURE (*err) || (myUConverter == NULL)) { diff --git a/icu4c/source/common/ucnv_bld.h b/icu4c/source/common/ucnv_bld.h index 8e859909252..a4e8e9c247f 100644 --- a/icu4c/source/common/ucnv_bld.h +++ b/icu4c/source/common/ucnv_bld.h @@ -216,5 +216,9 @@ typedef struct } UConverterDataISO2022; +#define CONVERTER_FILE_EXTENSION ".cnv" + +/*case insensitive hash key*/ +U_CAPI int32_t U_EXPORT2 uhash_hashIString(const void* name); #endif /* _UCNV_BLD */ diff --git a/icu4c/source/common/ucnv_io.c b/icu4c/source/common/ucnv_io.c index e6bc397bd3e..d4040a0d92e 100644 --- a/icu4c/source/common/ucnv_io.c +++ b/icu4c/source/common/ucnv_io.c @@ -1,273 +1,291 @@ /* - ******************************************************************************** - * * - * COPYRIGHT: * - * (C) Copyright International Business Machines Corporation, 1998 * - * Licensed Material - Program-Property of IBM - All Rights Reserved. * - * US Government Users Restricted Rights - Use, duplication, or disclosure * - * restricted by GSA ADP Schedule Contract with IBM Corp. * - * * - ******************************************************************************** - * - * - * uconv_io.c: - * initializes global variables and defines functions pertaining to file access, - * and name resolution aspect of the library. - ******************************************************************************** - */ +******************************************************************************** +* * +* COPYRIGHT: * +* (C) Copyright International Business Machines Corporation, 1998, 1999 * +* Licensed Material - Program-Property of IBM - All Rights Reserved. * +* US Government Users Restricted Rights - Use, duplication, or disclosure * +* restricted by GSA ADP Schedule Contract with IBM Corp. * +* * +******************************************************************************** +* +* +* ucnv_io.c: +* initializes global variables and defines functions pertaining to file access, +* and name resolution aspect of the library. +* +* new implementation: +* +* created on: 1999nov22 +* created by: Markus W. Scherer +* +* Use the binary cnvalias.dat (created from convrtrs.txt) to work +* with aliases for converter names. +******************************************************************************** +*/ + #include "utypes.h" #include "umutex.h" -#include "filestrm.h" #include "cstring.h" -#include "cmemory.h" -#include "uhash.h" -#include "ucmp8.h" -#include "ucmp16.h" -#include "ucnv_bld.h" #include "ucnv_io.h" -#include "uloc.h" +#include "udata.h" -static void doSetupAliasTableAndAvailableConverters (FileStream * converterFile, - UErrorCode * err); - -static char *_convertDataDirectory = NULL; - -/*Initializes Global Variables */ -static UHashtable *ALIASNAMES_HASHTABLE = NULL; -char **AVAILABLE_CONVERTERS_NAMES = NULL; -int32_t AVAILABLE_CONVERTERS = 0; - -/* Remove all characters followed by '#' +/* Format of cnvalias.dat ------------------------------------------------------ + * + * cnvalias.dat is a binary, memory-mappable form of convrtrs.txt . + * It contains two sorted tables and a block of zero-terminated strings. + * Each table is preceded by the number of table entries. + * + * The first table maps from aliases to converter indexes. + * The converter names themselves are listed as aliases in this table. + * Each entry in this table has an offset to the alias and + * an index of the converter in the converter table. + * + * The second table lists only the converters themselves. + * Each entry in this table has an offset to the converter name and + * the number of aliases, including the converter itself. + * A count of 1 means that there is no alias, only the converter name. + * + * In the block of strings after the tables, each converter name is directly + * followed by its aliases. All offsets to strings are offsets from the + * beginning of the data. + * + * More formal file data structure (data format 1.0): + * + * uint16_t aliasCount; + * struct { + * uint16_t aliasOffset; + * uint16_t converterIndex; + * } aliases[aliasCount]; + * + * uint16_t converterCount; + * struct { + * uint16_t converterOffset; + * uint16_t aliasCount; + * } converters[converterCount]; + * + * char strings[]={ + * "Converter0\0Alias1\0Alias2\0...Converter1\0Converter2\0Alias0\Alias1\0..." + * }; */ -char * - removeComments (char *line) -{ - char *pound = icu_strchr (line, '#'); - if (pound != NULL) - *pound = '\0'; - return line; +#define DATA_NAME "cnvalias" +#define DATA_TYPE "dat" + +static UDataMemory *aliasData=NULL; +static const uint16_t *aliasTable=NULL; + +static bool_t +isAcceptable(void *context, + const char *type, const char *name, + UDataInfo *pInfo) { + return + pInfo->size>=20 && + pInfo->isBigEndian==U_IS_BIG_ENDIAN && + pInfo->charsetFamily==U_CHARSET_FAMILY && + pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */ + pInfo->dataFormat[1]==0x76 && + pInfo->dataFormat[2]==0x41 && + pInfo->dataFormat[3]==0x6c && + pInfo->formatVersion[0]==1; } -/*Returns uppercased string */ -char * - strtoupper (char *name) -{ - int32_t i = 0; - - while (name[i] = icu_toupper (name[i])) - i++; - - return name; -} - -/* Returns true in c is a in set 'setOfChars', false otherwise - */ -bool_t - isInSet (char c, const char *setOfChars) -{ - uint8_t i = 0; - - while (setOfChars[i] != '\0') - { - if (c == setOfChars[i++]) - return TRUE; +static bool_t +haveAliasData(UErrorCode *pErrorCode) { + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return FALSE; } - return FALSE; + /* load converter alias data from file if necessary */ + if(aliasData==NULL) { + UDataMemory *data; + const uint16_t *table=NULL; + + /* open the data outside the mutex block */ + data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return FALSE; + } + + table=(const uint16_t *)udata_getMemory(data); + + /* in the mutex block, set the data for this process */ + umtx_lock(NULL); + if(aliasData==NULL) { + aliasData=data; + data=NULL; + aliasTable=table; + table=NULL; + } + umtx_unlock(NULL); + + /* if a different thread set it first, then close the extra data */ + if(data!=NULL) { + udata_close(data); /* NULL if it was set correctly */ + } + } + + return TRUE; } -/* Returns pointer to the next non-whitespace (or non-separator) +static bool_t +isAlias(const char *alias, UErrorCode *pErrorCode) { + if(alias==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } else if(*alias==0) { + return FALSE; + } else { + return TRUE; + } +} + +/* compare lowercase str1 with mixed-case str2, ignoring case */ +static int +strHalfCaseCmp(const char *str1, const char *str2) { + /* compare non-NULL strings lexically with lowercase */ + int rc; + unsigned char c1, c2; + + for(;;) { + c1=(unsigned char)*str1; + c2=(unsigned char)*str2; + if(c1==0) { + if(c2==0) { + return 0; + } else { + return -1; + } + } else if(c2==0) { + return 1; + } else { + /* compare non-zero characters with lowercase */ + rc=(int)c1-(int)(unsigned char)icu_tolower(c2); + if(rc!=0) { + return rc; + } + } + ++str1; + ++str2; + } +} + +/* + * search for an alias + * return NULL or a pointer to the converter table entry */ -int32_t - nextTokenOffset (const char *line, const char *separators) -{ - int32_t i = 0; +static const uint16_t * +findAlias(const char *alias) { + char name[100]; + const uint16_t *p=aliasTable; + uint16_t i, start, limit; - while (line[i] && isInSet (line[i], separators)) - i++; + limit=*p++; + if(limit==0) { + /* there are no aliases */ + return NULL; + } - return i; + /* convert the alias name to lowercase to do case-insensitive comparisons */ + for(i=0; i0) { + /* skip a name, first the canonical converter name */ + aliases+=icu_strlen(aliases)+1; + --index; + } + return aliases; + } + } } - - /*If another thread has already created the hashtable and array, we need to free */ - if ((ALIASNAMES_HASHTABLE != NULL) || (AVAILABLE_CONVERTERS_NAMES != NULL)) - { - while (myAVAILABLE_CONVERTERS > 0) - { - icu_free (myAVAILABLE_CONVERTERS_NAMES[--myAVAILABLE_CONVERTERS]); - } - icu_free (myAVAILABLE_CONVERTERS_NAMES); - uhash_close (myALIASNAMES_HASHTABLE); - } - else - { - umtx_lock (NULL); - ALIASNAMES_HASHTABLE = myALIASNAMES_HASHTABLE; - AVAILABLE_CONVERTERS_NAMES = myAVAILABLE_CONVERTERS_NAMES; - AVAILABLE_CONVERTERS = myAVAILABLE_CONVERTERS; - umtx_unlock (NULL); - } - - return; + return NULL; } -/* resolveName takes a table alias name and fills in the actual name used internally. - * it returns a TRUE if the name was found (table supported) returns FALSE otherwise - */ -bool_t - resolveName (char *realName, const char *alias) -{ - int32_t i = 0; - bool_t found = FALSE; - char *actualName = NULL; - UErrorCode err = U_ZERO_ERROR; - - /*Lazy evaluates the Alias hashtable */ - if (ALIASNAMES_HASHTABLE == NULL) - setupAliasTableAndAvailableConverters (&err); - if (U_FAILURE (err)) - return FALSE; - - - actualName = (char *) uhash_get (ALIASNAMES_HASHTABLE, uhash_hashIString (alias)); - - if (actualName != NULL) - { - icu_strcpy (realName, actualName); - found = TRUE; +U_CFUNC uint16_t +ucnv_io_countAvailableAliases(UErrorCode *pErrorCode) { + if(haveAliasData(pErrorCode)) { + return *aliasTable; } - - return found; + return 0; } -/*Higher level function, takes in an alias name - *and returns a file pointer of the table file - *Will return NULL if the file isn't found for - *any given reason (file not there, name not in - *"convrtrs.txt" - */ -FileStream * - openConverterFile (const char *name) -{ - char actualFullFilenameName[UCNV_MAX_FULL_FILE_NAME_LENGTH]; - FileStream *tableFile = NULL; - - icu_strcpy (actualFullFilenameName, uloc_getDataDirectory ()); - - if (resolveName (actualFullFilenameName + icu_strlen (actualFullFilenameName), name)) - { - icu_strcat (actualFullFilenameName, CONVERTER_FILE_EXTENSION); - tableFile = T_FileStream_open (actualFullFilenameName, "rb"); +U_CFUNC const char * +ucnv_io_getAvailableAlias(uint16_t index, UErrorCode *pErrorCode) { + if(haveAliasData(pErrorCode) && index<*aliasTable) { + return (const char *)aliasTable+*(aliasTable+1+2*index); + } + return NULL; +} + +U_CFUNC void +ucnv_io_fillAvailableAliases(const char **aliases, UErrorCode *pErrorCode) { + if(haveAliasData(pErrorCode)) { + const uint16_t *p=aliasTable; + uint16_t count=*p++; + while(count>0) { + *aliases++=(const char *)aliasTable+*p; + p+=2; + --count; + } } - - return tableFile; } diff --git a/icu4c/source/common/ucnv_io.h b/icu4c/source/common/ucnv_io.h index 36c0d88de01..11ee2bb4ddd 100644 --- a/icu4c/source/common/ucnv_io.h +++ b/icu4c/source/common/ucnv_io.h @@ -10,7 +10,7 @@ ******************************************************************************** * * - * uconv_io.h: + * ucnv_io.h: * defines variables and functions pertaining to file access, and name resolution * aspect of the library */ @@ -18,47 +18,24 @@ #ifndef UCNV_IO_H #define UCNV_IO_H - #include "utypes.h" -#include "filestrm.h" +U_CFUNC const char * +ucnv_io_getConverterName(const char *alias, UErrorCode *pErrorCode); -/*filename containing aliasing information on the converter names */ -static const char CONVERTER_FILE_NAME[13] = "convrtrs.txt"; -static const char CONVERTER_FILE_EXTENSION[5] = ".cnv"; -static const char SPACE_SEPARATORS[4] = {'\n', '\t', ' ', 0x00}; +U_CFUNC uint16_t +ucnv_io_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode); +U_CFUNC const char * +ucnv_io_getAlias(const char *alias, uint16_t index, UErrorCode *pErrorCode); -extern char **AVAILABLE_CONVERTERS_NAMES; -extern int32_t AVAILABLE_CONVERTERS; +U_CFUNC uint16_t +ucnv_io_countAvailableAliases(UErrorCode *pErrorCode); -/*Checks if c is in the NULL terminated setOfChars */ -bool_t isInSet (char c, const char *setOfChars); +U_CFUNC const char * +ucnv_io_getAvailableAlias(uint16_t index, UErrorCode *pErrorCode); -/*Remove all characters followed by '#' */ -U_CAPI char * U_EXPORT2 removeComments (char *line); +U_CFUNC void +ucnv_io_fillAvailableAliases(const char **aliases, UErrorCode *pErrorCode); - -/*Returns pointer to the next non-whitespace (or non-separators) */ -U_CAPI int32_t U_EXPORT2 nextTokenOffset (const char *line, const char *separators); - -/*Copies the next string in token and returns an updated pointer to the next token */ -U_CAPI char * U_EXPORT2 getToken (char *token, char *line, const char *separators); - -/*Takes an alias name and returns a FileStream pointer of the requested converter table or NULL, if not found */ -FileStream * U_EXPORT2 openConverterFile (const char *name); - -/*Fills in the Actual name of a converter based on the convrtrs.txt file - returns TRUE if the name was resolved FALSE otherwise */ -bool_t resolveName (char *realName, const char *alias); - -/*called through lazy evaluation. Sets up a hashtable containg all the aliases and an array with pointers - to the values inside the hashtable for quick indexing */ -void setupAliasTableAndAvailableConverters (UErrorCode * err); - -/*Uppercases a null-terminate string */ -U_CAPI char * U_EXPORT2 strtoupper (char *); - -/*case insensitive hash key*/ -U_CAPI int32_t U_EXPORT2 uhash_hashIString(const void* name); #endif /* _UCNV_IO */