ICU-2235 use charset-explicit version of uprv_comparePropertyNames() in pnames.icu swapping

X-SVN-Rev: 13218
This commit is contained in:
Markus Scherer 2003-09-26 00:29:18 +00:00
parent 8c4a58cca7
commit b373e4dd5a
4 changed files with 119 additions and 30 deletions

View file

@ -305,11 +305,18 @@ struct NameAndIndex {
Offset name, index;
};
typedef int32_t U_CALLCONV PropNameCompareFn(const char *name1, const char *name2);
struct CompareContext {
const char *chars;
PropNameCompareFn *propCompare;
};
static int32_t
upname_compareRows(const void *context, const void *left, const void *right) {
const char *chars=(const char *)context;
return (int32_t)uprv_strcmp(chars+((const NameAndIndex *)left)->name,
chars+((const NameAndIndex *)right)->name);
CompareContext *cmp=(CompareContext *)context;
return cmp->propCompare(cmp->chars+((const NameAndIndex *)left)->name,
cmp->chars+((const NameAndIndex *)right)->name);
}
int32_t
@ -327,6 +334,7 @@ NameToEnum::swap(const UDataSwapper *ds,
Offset *outNameArray;
NameAndIndex *sortArray;
CompareContext cmp;
int32_t i, size, oldIndex;
@ -389,8 +397,13 @@ NameToEnum::swap(const UDataSwapper *ds,
* use a stable sort to avoid shuffling of equal strings,
* which makes testing harder
*/
cmp.chars=(const char *)outBytes;
cmp.propCompare=
ds->outCharset==U_ASCII_FAMILY ?
uprv_compareASCIIPropertyNames :
uprv_compareEBCDICPropertyNames;
uprv_sortArray(sortArray, tempMap->count, sizeof(NameAndIndex),
upname_compareRows, outBytes,
upname_compareRows, &cmp,
TRUE, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
udata_printError(ds, "upname_swap(NameToEnum).uprv_sortArray(%d items) failed - %s\n",

View file

@ -827,7 +827,6 @@
#define uprv_cnttab_setContraction uprv_cnttab_setContraction_2_8
#define uprv_compareInvAscii uprv_compareInvAscii_2_8
#define uprv_compareInvEbcdic uprv_compareInvEbcdic_2_8
#define uprv_comparePropertyNames uprv_comparePropertyNames_2_8
#define uprv_convertToLCID uprv_convertToLCID_2_8
#define uprv_convertToPosix uprv_convertToPosix_2_8
#define uprv_copyAscii uprv_copyAscii_2_8

View file

@ -29,51 +29,115 @@
#ifdef DEBUG
#include <stdio.h>
#endif
/**
* Get the next non-ignorable ASCII character from a property name
* and lowercases it.
* @return ((advance count for the name)<<8)|character
*/
static U_INLINE int32_t
getASCIIPropertyNameChar(const char *name) {
int32_t i;
char c;
/* Ignore delimiters '-', '_', and ASCII White_Space */
for(i=0;
(c=name[i++])==0x2d || c==0x5f ||
c==0x20 || (0x09<=c && c<=0x0d);
) {}
if(c!=0) {
return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
} else {
return i<<8;
}
}
/**
* Get the next non-ignorable EBCDIC character from a property name
* and lowercases it.
* @return ((advance count for the name)<<8)|character
*/
static U_INLINE int32_t
getEBCDICPropertyNameChar(const char *name) {
int32_t i;
char c;
/* Ignore delimiters '-', '_', and EBCDIC White_Space */
for(i=0;
(c=name[i++])==0x60 || c==0x6d ||
c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
) {}
if(c!=0) {
return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
} else {
return i<<8;
}
}
/**
* Unicode property names and property value names are compared
* "loosely". Property[Value]Aliases.txt say:
* "With loose matching of property names, the case distinctions, whitespace,
* and '_' are ignored."
*
* This function does just that, for ASCII (char *) name strings.
* This function does just that, for (char *) name strings.
* It is almost identical to ucnv_compareNames() but also ignores
* ASCII White_Space characters (U+0009..U+000d).
* C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
*
* @internal
*/
U_CAPI int32_t U_EXPORT2
uprv_comparePropertyNames(const char *name1, const char *name2) {
int32_t rc;
unsigned char c1, c2;
uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
int32_t rc, r1, r2;
for(;;) {
/* Ignore delimiters '-', '_', and ASCII White_Space */
while((c1=(unsigned char)*name1)=='-' || c1=='_' ||
c1==' ' || c1=='\t' || c1=='\n' || c1=='\v' || c1=='\f' || c1=='\r'
) {
++name1;
}
while((c2=(unsigned char)*name2)=='-' || c2=='_' ||
c2==' ' || c2=='\t' || c2=='\n' || c2=='\v' || c2=='\f' || c2=='\r'
) {
++name2;
}
r1=getASCIIPropertyNameChar(name1);
r2=getASCIIPropertyNameChar(name2);
/* If we reach the ends of both strings then they match */
if((c1|c2)==0) {
if(((r1|r2)&0xff)==0) {
return 0;
}
/* Case-insensitive comparison */
if(c1!=c2) {
rc=(int32_t)(unsigned char)uprv_tolower(c1)-(int32_t)(unsigned char)uprv_tolower(c2);
/* Compare the lowercased characters */
if(r1!=r2) {
rc=(r1&0xff)-(r2&0xff);
if(rc!=0) {
return rc;
}
}
++name1;
++name2;
name1+=r1>>8;
name2+=r2>>8;
}
}
U_CAPI int32_t U_EXPORT2
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
int32_t rc, r1, r2;
for(;;) {
r1=getEBCDICPropertyNameChar(name1);
r2=getEBCDICPropertyNameChar(name2);
/* If we reach the ends of both strings then they match */
if(((r1|r2)&0xff)==0) {
return 0;
}
/* Compare the lowercased characters */
if(r1!=r2) {
rc=(r1&0xff)-(r2&0xff);
if(rc!=0) {
return rc;
}
}
name1+=r1>>8;
name2+=r2>>8;
}
}

View file

@ -202,19 +202,32 @@ U_CFUNC int32_t
uprv_getMaxValues(int32_t column);
/**
* \var uprv_comparePropertyNames
* Unicode property names and property value names are compared
* "loosely". Property[Value]Aliases.txt say:
* "With loose matching of property names, the case distinctions, whitespace,
* and '_' are ignored."
*
* This function does just that, for ASCII (char *) name strings.
* This function does just that, for (char *) name strings.
* It is almost identical to ucnv_compareNames() but also ignores
* ASCII White_Space characters (U+0009..U+000d).
* C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
*
* @internal
*/
U_CAPI int32_t U_EXPORT2
uprv_comparePropertyNames(const char *name1, const char *name2);
uprv_compareASCIIPropertyNames(const char *name1, const char *name2);
U_CAPI int32_t U_EXPORT2
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2);
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
# define uprv_comparePropertyNames uprv_compareASCIIPropertyNames
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
# define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames
#else
# error U_CHARSET_FAMILY is not valid
#endif
/** Turn a bit index into a bit flag. @internal */
#define FLAG(n) ((uint32_t)1<<(n))