mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-2235 use charset-explicit version of uprv_comparePropertyNames() in pnames.icu swapping
X-SVN-Rev: 13218
This commit is contained in:
parent
8c4a58cca7
commit
b373e4dd5a
4 changed files with 119 additions and 30 deletions
|
@ -305,11 +305,18 @@ struct NameAndIndex {
|
|||
Offset name, index;
|
||||
};
|
||||
|
||||
typedef int32_t U_CALLCONV PropNameCompareFn(const char *name1, const char *name2);
|
||||
|
||||
struct CompareContext {
|
||||
const char *chars;
|
||||
PropNameCompareFn *propCompare;
|
||||
};
|
||||
|
||||
static int32_t
|
||||
upname_compareRows(const void *context, const void *left, const void *right) {
|
||||
const char *chars=(const char *)context;
|
||||
return (int32_t)uprv_strcmp(chars+((const NameAndIndex *)left)->name,
|
||||
chars+((const NameAndIndex *)right)->name);
|
||||
CompareContext *cmp=(CompareContext *)context;
|
||||
return cmp->propCompare(cmp->chars+((const NameAndIndex *)left)->name,
|
||||
cmp->chars+((const NameAndIndex *)right)->name);
|
||||
}
|
||||
|
||||
int32_t
|
||||
|
@ -327,6 +334,7 @@ NameToEnum::swap(const UDataSwapper *ds,
|
|||
Offset *outNameArray;
|
||||
|
||||
NameAndIndex *sortArray;
|
||||
CompareContext cmp;
|
||||
|
||||
int32_t i, size, oldIndex;
|
||||
|
||||
|
@ -389,8 +397,13 @@ NameToEnum::swap(const UDataSwapper *ds,
|
|||
* use a stable sort to avoid shuffling of equal strings,
|
||||
* which makes testing harder
|
||||
*/
|
||||
cmp.chars=(const char *)outBytes;
|
||||
cmp.propCompare=
|
||||
ds->outCharset==U_ASCII_FAMILY ?
|
||||
uprv_compareASCIIPropertyNames :
|
||||
uprv_compareEBCDICPropertyNames;
|
||||
uprv_sortArray(sortArray, tempMap->count, sizeof(NameAndIndex),
|
||||
upname_compareRows, outBytes,
|
||||
upname_compareRows, &cmp,
|
||||
TRUE, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
udata_printError(ds, "upname_swap(NameToEnum).uprv_sortArray(%d items) failed - %s\n",
|
||||
|
|
|
@ -827,7 +827,6 @@
|
|||
#define uprv_cnttab_setContraction uprv_cnttab_setContraction_2_8
|
||||
#define uprv_compareInvAscii uprv_compareInvAscii_2_8
|
||||
#define uprv_compareInvEbcdic uprv_compareInvEbcdic_2_8
|
||||
#define uprv_comparePropertyNames uprv_comparePropertyNames_2_8
|
||||
#define uprv_convertToLCID uprv_convertToLCID_2_8
|
||||
#define uprv_convertToPosix uprv_convertToPosix_2_8
|
||||
#define uprv_copyAscii uprv_copyAscii_2_8
|
||||
|
|
|
@ -29,51 +29,115 @@
|
|||
#ifdef DEBUG
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Get the next non-ignorable ASCII character from a property name
|
||||
* and lowercases it.
|
||||
* @return ((advance count for the name)<<8)|character
|
||||
*/
|
||||
static U_INLINE int32_t
|
||||
getASCIIPropertyNameChar(const char *name) {
|
||||
int32_t i;
|
||||
char c;
|
||||
|
||||
/* Ignore delimiters '-', '_', and ASCII White_Space */
|
||||
for(i=0;
|
||||
(c=name[i++])==0x2d || c==0x5f ||
|
||||
c==0x20 || (0x09<=c && c<=0x0d);
|
||||
) {}
|
||||
|
||||
if(c!=0) {
|
||||
return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
|
||||
} else {
|
||||
return i<<8;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the next non-ignorable EBCDIC character from a property name
|
||||
* and lowercases it.
|
||||
* @return ((advance count for the name)<<8)|character
|
||||
*/
|
||||
static U_INLINE int32_t
|
||||
getEBCDICPropertyNameChar(const char *name) {
|
||||
int32_t i;
|
||||
char c;
|
||||
|
||||
/* Ignore delimiters '-', '_', and EBCDIC White_Space */
|
||||
for(i=0;
|
||||
(c=name[i++])==0x60 || c==0x6d ||
|
||||
c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
|
||||
) {}
|
||||
|
||||
if(c!=0) {
|
||||
return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
|
||||
} else {
|
||||
return i<<8;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unicode property names and property value names are compared
|
||||
* "loosely". Property[Value]Aliases.txt say:
|
||||
* "With loose matching of property names, the case distinctions, whitespace,
|
||||
* and '_' are ignored."
|
||||
*
|
||||
* This function does just that, for ASCII (char *) name strings.
|
||||
* This function does just that, for (char *) name strings.
|
||||
* It is almost identical to ucnv_compareNames() but also ignores
|
||||
* ASCII White_Space characters (U+0009..U+000d).
|
||||
* C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uprv_comparePropertyNames(const char *name1, const char *name2) {
|
||||
int32_t rc;
|
||||
unsigned char c1, c2;
|
||||
uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
|
||||
int32_t rc, r1, r2;
|
||||
|
||||
for(;;) {
|
||||
/* Ignore delimiters '-', '_', and ASCII White_Space */
|
||||
while((c1=(unsigned char)*name1)=='-' || c1=='_' ||
|
||||
c1==' ' || c1=='\t' || c1=='\n' || c1=='\v' || c1=='\f' || c1=='\r'
|
||||
) {
|
||||
++name1;
|
||||
}
|
||||
while((c2=(unsigned char)*name2)=='-' || c2=='_' ||
|
||||
c2==' ' || c2=='\t' || c2=='\n' || c2=='\v' || c2=='\f' || c2=='\r'
|
||||
) {
|
||||
++name2;
|
||||
}
|
||||
r1=getASCIIPropertyNameChar(name1);
|
||||
r2=getASCIIPropertyNameChar(name2);
|
||||
|
||||
/* If we reach the ends of both strings then they match */
|
||||
if((c1|c2)==0) {
|
||||
if(((r1|r2)&0xff)==0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Case-insensitive comparison */
|
||||
if(c1!=c2) {
|
||||
rc=(int32_t)(unsigned char)uprv_tolower(c1)-(int32_t)(unsigned char)uprv_tolower(c2);
|
||||
/* Compare the lowercased characters */
|
||||
if(r1!=r2) {
|
||||
rc=(r1&0xff)-(r2&0xff);
|
||||
if(rc!=0) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
++name1;
|
||||
++name2;
|
||||
name1+=r1>>8;
|
||||
name2+=r2>>8;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
|
||||
int32_t rc, r1, r2;
|
||||
|
||||
for(;;) {
|
||||
r1=getEBCDICPropertyNameChar(name1);
|
||||
r2=getEBCDICPropertyNameChar(name2);
|
||||
|
||||
/* If we reach the ends of both strings then they match */
|
||||
if(((r1|r2)&0xff)==0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Compare the lowercased characters */
|
||||
if(r1!=r2) {
|
||||
rc=(r1&0xff)-(r2&0xff);
|
||||
if(rc!=0) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
name1+=r1>>8;
|
||||
name2+=r2>>8;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -202,19 +202,32 @@ U_CFUNC int32_t
|
|||
uprv_getMaxValues(int32_t column);
|
||||
|
||||
/**
|
||||
* \var uprv_comparePropertyNames
|
||||
* Unicode property names and property value names are compared
|
||||
* "loosely". Property[Value]Aliases.txt say:
|
||||
* "With loose matching of property names, the case distinctions, whitespace,
|
||||
* and '_' are ignored."
|
||||
*
|
||||
* This function does just that, for ASCII (char *) name strings.
|
||||
* This function does just that, for (char *) name strings.
|
||||
* It is almost identical to ucnv_compareNames() but also ignores
|
||||
* ASCII White_Space characters (U+0009..U+000d).
|
||||
* C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uprv_comparePropertyNames(const char *name1, const char *name2);
|
||||
uprv_compareASCIIPropertyNames(const char *name1, const char *name2);
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2);
|
||||
|
||||
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
|
||||
# define uprv_comparePropertyNames uprv_compareASCIIPropertyNames
|
||||
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
||||
# define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames
|
||||
#else
|
||||
# error U_CHARSET_FAMILY is not valid
|
||||
#endif
|
||||
|
||||
/** Turn a bit index into a bit flag. @internal */
|
||||
#define FLAG(n) ((uint32_t)1<<(n))
|
||||
|
|
Loading…
Add table
Reference in a new issue