mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 22:44:49 +00:00
ICU-307 lenient converter name matching
X-SVN-Rev: 1957
This commit is contained in:
parent
8509d36272
commit
1ab1e529c5
5 changed files with 69 additions and 28 deletions
|
@ -136,32 +136,48 @@ isAlias(const char *alias, UErrorCode *pErrorCode) {
|
|||
}
|
||||
}
|
||||
|
||||
/* compare lowercase str1 with mixed-case str2, both being charset names */
|
||||
static int
|
||||
charsetNameCmp(const char *str1, const char *str2) {
|
||||
/**
|
||||
* Do a fuzzy compare of a two converter/alias names. The comparison
|
||||
* is case-insensitive. It also ignores the characters '-', '_', and
|
||||
* ' ' (dash, underscore, and space). Thus the strings "UTF-8",
|
||||
* "utf_8", and "Utf 8" are exactly equivalent.
|
||||
*
|
||||
* This is a symmetrical (commutative) operation; order of arguments
|
||||
* is insignificant. This is an important property for sorting the
|
||||
* list (when the list is preprocessed into binary form) and for
|
||||
* performing binary searches on it at run time.
|
||||
*
|
||||
* @param name1 a converter name or alias, zero-terminated
|
||||
* @param name2 a converter name or alias, zero-terminated
|
||||
* @return 0 if the names match, or a negative value if the name1
|
||||
* lexically precedes name2, or a positive value if the name1
|
||||
* lexically follows name2.
|
||||
*/
|
||||
U_CFUNC int
|
||||
charsetNameCmp(const char *name1, const char *name2) {
|
||||
int rc;
|
||||
unsigned char c1, c2;
|
||||
|
||||
for (;;) {
|
||||
c1 = (unsigned char) *str1;
|
||||
c2 = (unsigned char) *str2;
|
||||
if (c1 == 0) {
|
||||
if(c2 == 0) {
|
||||
return 0;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else if (c2 == 0) {
|
||||
return 1;
|
||||
} else {
|
||||
/* compare non-zero characters with lowercase */
|
||||
rc = (int) c1 - (int) (unsigned char) uprv_tolower(c2);
|
||||
if(rc != 0) {
|
||||
return rc;
|
||||
}
|
||||
/* Ignore delimiters '-', '_', and ' ' */
|
||||
while ((c1 = (unsigned char)*name1) == '-'
|
||||
|| c1 == '_' || c1 == ' ') ++name1;
|
||||
while ((c2 = (unsigned char)*name2) == '-'
|
||||
|| c2 == '_' || c2 == ' ') ++name2;
|
||||
|
||||
/* If we reach the ends of both strings then they match */
|
||||
if ((c1|c2)==0) {
|
||||
return 0;
|
||||
}
|
||||
++str1;
|
||||
++str2;
|
||||
|
||||
/* Case-insensitive comparison */
|
||||
rc = (int)(unsigned char)uprv_tolower(c1) -
|
||||
(int)(unsigned char)uprv_tolower(c2);
|
||||
if (rc!=0) {
|
||||
return rc;
|
||||
}
|
||||
++name1;
|
||||
++name2;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -99,4 +99,11 @@ ucnv_io_getDefaultConverterName(void);
|
|||
U_CFUNC void
|
||||
ucnv_io_setDefaultConverterName(const char *name);
|
||||
|
||||
/**
|
||||
* Lexically compare two converter names/aliases, ignoring case and
|
||||
* the delimiters '-', '_', and ' '.
|
||||
*/
|
||||
U_CFUNC int
|
||||
charsetNameCmp(const char *name1, const char *name2);
|
||||
|
||||
#endif /* _UCNV_IO */
|
||||
|
|
|
@ -122,7 +122,10 @@ U_CDECL_END
|
|||
|
||||
/**
|
||||
* Creates a UConverter object with the names specified as a C string.
|
||||
* The actual name will be resolved with the alias file.
|
||||
* The actual name will be resolved with the alias file
|
||||
* using a case-insensitive string comparison that ignores
|
||||
* the delimiters '-', '_', and ' ' (dash, underscore, and space).
|
||||
* E.g., the names "UTF8", "utf-8", and "Utf 8" are all equivalent.
|
||||
* If <code>NULL</code> is passed for the converter name, it will create one with the
|
||||
* getDefaultName return value.
|
||||
*
|
||||
|
@ -151,8 +154,12 @@ UConverter* U_EXPORT2 ucnv_open (const char *converterName, UErrorCode * err);
|
|||
|
||||
/**
|
||||
* Creates a Unicode converter with the names specified as unicode string. The name should be limited to
|
||||
* the ASCII-7 alphanumerics range. The actual name will be resolved with the alias file.
|
||||
* if <TT>NULL</TT> is passed for the converter name, it will create one with the
|
||||
* the ASCII-7 alphanumerics range.
|
||||
* The actual name will be resolved with the alias file
|
||||
* using a case-insensitive string comparison that ignores
|
||||
* the delimiters '-', '_', and ' ' (dash, underscore, and space).
|
||||
* E.g., the names "UTF8", "utf-8", and "Utf 8" are all equivalent.
|
||||
* If <TT>NULL</TT> is passed for the converter name, it will create one with the
|
||||
* getDefaultName return value.
|
||||
* @param converterName : name of the uconv table in a zero terminated Unicode string
|
||||
* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, TABLE_NOT_FOUND</TT>
|
||||
|
@ -714,7 +721,9 @@ U_CAPI int32_t U_EXPORT2 ucnv_countAvailable (void);
|
|||
|
||||
|
||||
/**
|
||||
* Gives the number of aliases for given converter or alias name
|
||||
* Gives the number of aliases for given converter or alias name.
|
||||
* Note that additional aliases are recognized by ucnv_open().
|
||||
* This method only enumerates the listed entries in the alias file.
|
||||
* @param alias alias name
|
||||
* @param pErrorCode result of operation
|
||||
* @return number of names on alias list
|
||||
|
@ -724,7 +733,9 @@ U_CAPI uint16_t
|
|||
ucnv_countAliases(const char *alias, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Gives the name of the alias at given index of alias list
|
||||
* Gives the name of the alias at given index of alias list.
|
||||
* Note that additional aliases are recognized by ucnv_open().
|
||||
* This method only enumerates the listed entries in the alias file.
|
||||
* @param alias alias name
|
||||
* @param n index in alias list
|
||||
* @param pErrorCode result of operation
|
||||
|
@ -735,7 +746,9 @@ U_CAPI const char *
|
|||
ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Fill-up the list of alias names for the given alias
|
||||
* Fill-up the list of alias names for the given alias.
|
||||
* Note that additional aliases are recognized by ucnv_open().
|
||||
* This method only enumerates the listed entries in the alias file.
|
||||
* @param alias alias name
|
||||
* @param aliases fill-in list, aliases is a pointer to an array of
|
||||
* <code>ucnv_countAliases()</code> string-pointers
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "unicode/utypes.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "ucnv_io.h" /* charsetNameCmp */
|
||||
#include "filestrm.h"
|
||||
#include "unewdata.h"
|
||||
#include "uoptions.h"
|
||||
|
@ -326,7 +327,7 @@ allocString(uint32_t length) {
|
|||
|
||||
static int
|
||||
compareAliases(const void *alias1, const void *alias2) {
|
||||
return uprv_stricmp(((Alias *)alias1)->alias, ((Alias *)alias2)->alias);
|
||||
return charsetNameCmp(((Alias*)alias1)->alias, ((Alias*)alias2)->alias);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -89,6 +89,10 @@ LINK32=link.exe
|
|||
|
||||
SOURCE=.\gencnval.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=..\..\common\ucnv_io.c
|
||||
# End Source File
|
||||
# End Group
|
||||
# Begin Group "Header Files"
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue