From b373e4dd5a3aee977defe8ffe32a51e409c12d4c Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Fri, 26 Sep 2003 00:29:18 +0000 Subject: [PATCH] ICU-2235 use charset-explicit version of uprv_comparePropertyNames() in pnames.icu swapping X-SVN-Rev: 13218 --- icu4c/source/common/propname.cpp | 21 ++++- icu4c/source/common/unicode/urename.h | 1 - icu4c/source/common/uprops.c | 108 ++++++++++++++++++++------ icu4c/source/common/uprops.h | 19 ++++- 4 files changed, 119 insertions(+), 30 deletions(-) diff --git a/icu4c/source/common/propname.cpp b/icu4c/source/common/propname.cpp index 349c6d81e00..7cc0e13dcd8 100644 --- a/icu4c/source/common/propname.cpp +++ b/icu4c/source/common/propname.cpp @@ -305,11 +305,18 @@ struct NameAndIndex { Offset name, index; }; +typedef int32_t U_CALLCONV PropNameCompareFn(const char *name1, const char *name2); + +struct CompareContext { + const char *chars; + PropNameCompareFn *propCompare; +}; + static int32_t upname_compareRows(const void *context, const void *left, const void *right) { - const char *chars=(const char *)context; - return (int32_t)uprv_strcmp(chars+((const NameAndIndex *)left)->name, - chars+((const NameAndIndex *)right)->name); + CompareContext *cmp=(CompareContext *)context; + return cmp->propCompare(cmp->chars+((const NameAndIndex *)left)->name, + cmp->chars+((const NameAndIndex *)right)->name); } int32_t @@ -327,6 +334,7 @@ NameToEnum::swap(const UDataSwapper *ds, Offset *outNameArray; NameAndIndex *sortArray; + CompareContext cmp; int32_t i, size, oldIndex; @@ -389,8 +397,13 @@ NameToEnum::swap(const UDataSwapper *ds, * use a stable sort to avoid shuffling of equal strings, * which makes testing harder */ + cmp.chars=(const char *)outBytes; + cmp.propCompare= + ds->outCharset==U_ASCII_FAMILY ? + uprv_compareASCIIPropertyNames : + uprv_compareEBCDICPropertyNames; uprv_sortArray(sortArray, tempMap->count, sizeof(NameAndIndex), - upname_compareRows, outBytes, + upname_compareRows, &cmp, TRUE, pErrorCode); if(U_FAILURE(*pErrorCode)) { udata_printError(ds, "upname_swap(NameToEnum).uprv_sortArray(%d items) failed - %s\n", diff --git a/icu4c/source/common/unicode/urename.h b/icu4c/source/common/unicode/urename.h index c5574c1a360..c46d75990fd 100644 --- a/icu4c/source/common/unicode/urename.h +++ b/icu4c/source/common/unicode/urename.h @@ -827,7 +827,6 @@ #define uprv_cnttab_setContraction uprv_cnttab_setContraction_2_8 #define uprv_compareInvAscii uprv_compareInvAscii_2_8 #define uprv_compareInvEbcdic uprv_compareInvEbcdic_2_8 -#define uprv_comparePropertyNames uprv_comparePropertyNames_2_8 #define uprv_convertToLCID uprv_convertToLCID_2_8 #define uprv_convertToPosix uprv_convertToPosix_2_8 #define uprv_copyAscii uprv_copyAscii_2_8 diff --git a/icu4c/source/common/uprops.c b/icu4c/source/common/uprops.c index a6ad85873d6..16398c00232 100644 --- a/icu4c/source/common/uprops.c +++ b/icu4c/source/common/uprops.c @@ -29,51 +29,115 @@ #ifdef DEBUG #include #endif + +/** + * Get the next non-ignorable ASCII character from a property name + * and lowercases it. + * @return ((advance count for the name)<<8)|character + */ +static U_INLINE int32_t +getASCIIPropertyNameChar(const char *name) { + int32_t i; + char c; + + /* Ignore delimiters '-', '_', and ASCII White_Space */ + for(i=0; + (c=name[i++])==0x2d || c==0x5f || + c==0x20 || (0x09<=c && c<=0x0d); + ) {} + + if(c!=0) { + return (i<<8)|(uint8_t)uprv_asciitolower((char)c); + } else { + return i<<8; + } +} + +/** + * Get the next non-ignorable EBCDIC character from a property name + * and lowercases it. + * @return ((advance count for the name)<<8)|character + */ +static U_INLINE int32_t +getEBCDICPropertyNameChar(const char *name) { + int32_t i; + char c; + + /* Ignore delimiters '-', '_', and EBCDIC White_Space */ + for(i=0; + (c=name[i++])==0x60 || c==0x6d || + c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d; + ) {} + + if(c!=0) { + return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c); + } else { + return i<<8; + } +} + /** * Unicode property names and property value names are compared * "loosely". Property[Value]Aliases.txt say: * "With loose matching of property names, the case distinctions, whitespace, * and '_' are ignored." * - * This function does just that, for ASCII (char *) name strings. + * This function does just that, for (char *) name strings. * It is almost identical to ucnv_compareNames() but also ignores - * ASCII White_Space characters (U+0009..U+000d). + * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC). * * @internal */ + U_CAPI int32_t U_EXPORT2 -uprv_comparePropertyNames(const char *name1, const char *name2) { - int32_t rc; - unsigned char c1, c2; +uprv_compareASCIIPropertyNames(const char *name1, const char *name2) { + int32_t rc, r1, r2; for(;;) { - /* Ignore delimiters '-', '_', and ASCII White_Space */ - while((c1=(unsigned char)*name1)=='-' || c1=='_' || - c1==' ' || c1=='\t' || c1=='\n' || c1=='\v' || c1=='\f' || c1=='\r' - ) { - ++name1; - } - while((c2=(unsigned char)*name2)=='-' || c2=='_' || - c2==' ' || c2=='\t' || c2=='\n' || c2=='\v' || c2=='\f' || c2=='\r' - ) { - ++name2; - } + r1=getASCIIPropertyNameChar(name1); + r2=getASCIIPropertyNameChar(name2); /* If we reach the ends of both strings then they match */ - if((c1|c2)==0) { + if(((r1|r2)&0xff)==0) { return 0; } - /* Case-insensitive comparison */ - if(c1!=c2) { - rc=(int32_t)(unsigned char)uprv_tolower(c1)-(int32_t)(unsigned char)uprv_tolower(c2); + /* Compare the lowercased characters */ + if(r1!=r2) { + rc=(r1&0xff)-(r2&0xff); if(rc!=0) { return rc; } } - ++name1; - ++name2; + name1+=r1>>8; + name2+=r2>>8; + } +} + +U_CAPI int32_t U_EXPORT2 +uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) { + int32_t rc, r1, r2; + + for(;;) { + r1=getEBCDICPropertyNameChar(name1); + r2=getEBCDICPropertyNameChar(name2); + + /* If we reach the ends of both strings then they match */ + if(((r1|r2)&0xff)==0) { + return 0; + } + + /* Compare the lowercased characters */ + if(r1!=r2) { + rc=(r1&0xff)-(r2&0xff); + if(rc!=0) { + return rc; + } + } + + name1+=r1>>8; + name2+=r2>>8; } } diff --git a/icu4c/source/common/uprops.h b/icu4c/source/common/uprops.h index f55d9419957..ab6907ae6e0 100644 --- a/icu4c/source/common/uprops.h +++ b/icu4c/source/common/uprops.h @@ -202,19 +202,32 @@ U_CFUNC int32_t uprv_getMaxValues(int32_t column); /** + * \var uprv_comparePropertyNames * Unicode property names and property value names are compared * "loosely". Property[Value]Aliases.txt say: * "With loose matching of property names, the case distinctions, whitespace, * and '_' are ignored." * - * This function does just that, for ASCII (char *) name strings. + * This function does just that, for (char *) name strings. * It is almost identical to ucnv_compareNames() but also ignores - * ASCII White_Space characters (U+0009..U+000d). + * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC). * * @internal */ + U_CAPI int32_t U_EXPORT2 -uprv_comparePropertyNames(const char *name1, const char *name2); +uprv_compareASCIIPropertyNames(const char *name1, const char *name2); + +U_CAPI int32_t U_EXPORT2 +uprv_compareEBCDICPropertyNames(const char *name1, const char *name2); + +#if U_CHARSET_FAMILY==U_ASCII_FAMILY +# define uprv_comparePropertyNames uprv_compareASCIIPropertyNames +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY +# define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames +#else +# error U_CHARSET_FAMILY is not valid +#endif /** Turn a bit index into a bit flag. @internal */ #define FLAG(n) ((uint32_t)1<<(n))