ICU-307 lenient converter name matching

X-SVN-Rev: 1957
This commit is contained in:
Alan Liu 2000-07-19 22:18:43 +00:00
parent 8509d36272
commit 1ab1e529c5
5 changed files with 69 additions and 28 deletions

View file

@ -136,32 +136,48 @@ isAlias(const char *alias, UErrorCode *pErrorCode) {
}
}
/* compare lowercase str1 with mixed-case str2, both being charset names */
static int
charsetNameCmp(const char *str1, const char *str2) {
/**
* Do a fuzzy compare of a two converter/alias names. The comparison
* is case-insensitive. It also ignores the characters '-', '_', and
* ' ' (dash, underscore, and space). Thus the strings "UTF-8",
* "utf_8", and "Utf 8" are exactly equivalent.
*
* This is a symmetrical (commutative) operation; order of arguments
* is insignificant. This is an important property for sorting the
* list (when the list is preprocessed into binary form) and for
* performing binary searches on it at run time.
*
* @param name1 a converter name or alias, zero-terminated
* @param name2 a converter name or alias, zero-terminated
* @return 0 if the names match, or a negative value if the name1
* lexically precedes name2, or a positive value if the name1
* lexically follows name2.
*/
U_CFUNC int
charsetNameCmp(const char *name1, const char *name2) {
int rc;
unsigned char c1, c2;
for (;;) {
c1 = (unsigned char) *str1;
c2 = (unsigned char) *str2;
if (c1 == 0) {
if(c2 == 0) {
return 0;
} else {
return -1;
}
} else if (c2 == 0) {
return 1;
} else {
/* compare non-zero characters with lowercase */
rc = (int) c1 - (int) (unsigned char) uprv_tolower(c2);
if(rc != 0) {
return rc;
}
/* Ignore delimiters '-', '_', and ' ' */
while ((c1 = (unsigned char)*name1) == '-'
|| c1 == '_' || c1 == ' ') ++name1;
while ((c2 = (unsigned char)*name2) == '-'
|| c2 == '_' || c2 == ' ') ++name2;
/* If we reach the ends of both strings then they match */
if ((c1|c2)==0) {
return 0;
}
++str1;
++str2;
/* Case-insensitive comparison */
rc = (int)(unsigned char)uprv_tolower(c1) -
(int)(unsigned char)uprv_tolower(c2);
if (rc!=0) {
return rc;
}
++name1;
++name2;
}
}

View file

@ -99,4 +99,11 @@ ucnv_io_getDefaultConverterName(void);
U_CFUNC void
ucnv_io_setDefaultConverterName(const char *name);
/**
* Lexically compare two converter names/aliases, ignoring case and
* the delimiters '-', '_', and ' '.
*/
U_CFUNC int
charsetNameCmp(const char *name1, const char *name2);
#endif /* _UCNV_IO */

View file

@ -122,7 +122,10 @@ U_CDECL_END
/**
* Creates a UConverter object with the names specified as a C string.
* The actual name will be resolved with the alias file.
* The actual name will be resolved with the alias file
* using a case-insensitive string comparison that ignores
* the delimiters '-', '_', and ' ' (dash, underscore, and space).
* E.g., the names "UTF8", "utf-8", and "Utf 8" are all equivalent.
* If <code>NULL</code> is passed for the converter name, it will create one with the
* getDefaultName return value.
*
@ -151,8 +154,12 @@ UConverter* U_EXPORT2 ucnv_open (const char *converterName, UErrorCode * err);
/**
* Creates a Unicode converter with the names specified as unicode string. The name should be limited to
* the ASCII-7 alphanumerics range. The actual name will be resolved with the alias file.
* if <TT>NULL</TT> is passed for the converter name, it will create one with the
* the ASCII-7 alphanumerics range.
* The actual name will be resolved with the alias file
* using a case-insensitive string comparison that ignores
* the delimiters '-', '_', and ' ' (dash, underscore, and space).
* E.g., the names "UTF8", "utf-8", and "Utf 8" are all equivalent.
* If <TT>NULL</TT> is passed for the converter name, it will create one with the
* getDefaultName return value.
* @param converterName : name of the uconv table in a zero terminated Unicode string
* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, TABLE_NOT_FOUND</TT>
@ -714,7 +721,9 @@ U_CAPI int32_t U_EXPORT2 ucnv_countAvailable (void);
/**
* Gives the number of aliases for given converter or alias name
* Gives the number of aliases for given converter or alias name.
* Note that additional aliases are recognized by ucnv_open().
* This method only enumerates the listed entries in the alias file.
* @param alias alias name
* @param pErrorCode result of operation
* @return number of names on alias list
@ -724,7 +733,9 @@ U_CAPI uint16_t
ucnv_countAliases(const char *alias, UErrorCode *pErrorCode);
/**
* Gives the name of the alias at given index of alias list
* Gives the name of the alias at given index of alias list.
* Note that additional aliases are recognized by ucnv_open().
* This method only enumerates the listed entries in the alias file.
* @param alias alias name
* @param n index in alias list
* @param pErrorCode result of operation
@ -735,7 +746,9 @@ U_CAPI const char *
ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode);
/**
* Fill-up the list of alias names for the given alias
* Fill-up the list of alias names for the given alias.
* Note that additional aliases are recognized by ucnv_open().
* This method only enumerates the listed entries in the alias file.
* @param alias alias name
* @param aliases fill-in list, aliases is a pointer to an array of
* <code>ucnv_countAliases()</code> string-pointers

View file

@ -23,6 +23,7 @@
#include "unicode/utypes.h"
#include "cmemory.h"
#include "cstring.h"
#include "ucnv_io.h" /* charsetNameCmp */
#include "filestrm.h"
#include "unewdata.h"
#include "uoptions.h"
@ -326,7 +327,7 @@ allocString(uint32_t length) {
static int
compareAliases(const void *alias1, const void *alias2) {
return uprv_stricmp(((Alias *)alias1)->alias, ((Alias *)alias2)->alias);
return charsetNameCmp(((Alias*)alias1)->alias, ((Alias*)alias2)->alias);
}
/*

View file

@ -89,6 +89,10 @@ LINK32=link.exe
SOURCE=.\gencnval.c
# End Source File
# Begin Source File
SOURCE=..\..\common\ucnv_io.c
# End Source File
# End Group
# Begin Group "Header Files"