ICU-2235 use charset-explicit version of uprv_comparePropertyNames() in pnames.icu swapping

X-SVN-Rev: 13218
2025-04-13 08:53:20 +00:00 · 2003-09-26 00:29:18 +00:00 · 2003-09-26 00:29:18 +00:00 · b373e4dd5a
commit b373e4dd5a
parent 8c4a58cca7
4 changed files with 119 additions and 30 deletions
--- a/icu4c/source/common/propname.cpp
+++ b/icu4c/source/common/propname.cpp
@ -305,11 +305,18 @@ struct NameAndIndex {
    Offset name, index;
 };

+typedef int32_t U_CALLCONV PropNameCompareFn(const char *name1, const char *name2);
+
+struct CompareContext {
+    const char *chars;
+    PropNameCompareFn *propCompare;
+};
+
 static int32_t
 upname_compareRows(const void *context, const void *left, const void *right) {
-    const char *chars=(const char *)context;
-    return (int32_t)uprv_strcmp(chars+((const NameAndIndex *)left)->name,
-                                chars+((const NameAndIndex *)right)->name);
+    CompareContext *cmp=(CompareContext *)context;
+    return cmp->propCompare(cmp->chars+((const NameAndIndex *)left)->name,
+                            cmp->chars+((const NameAndIndex *)right)->name);
 }

 int32_t
@ -327,6 +334,7 @@ NameToEnum::swap(const UDataSwapper *ds,
    Offset *outNameArray;

    NameAndIndex *sortArray;
+    CompareContext cmp;

    int32_t i, size, oldIndex;

@ -389,8 +397,13 @@ NameToEnum::swap(const UDataSwapper *ds,
         * use a stable sort to avoid shuffling of equal strings,
         * which makes testing harder
         */
+        cmp.chars=(const char *)outBytes;
+        cmp.propCompare=
+            ds->outCharset==U_ASCII_FAMILY ?
+                uprv_compareASCIIPropertyNames :
+                uprv_compareEBCDICPropertyNames;
        uprv_sortArray(sortArray, tempMap->count, sizeof(NameAndIndex),
-                       upname_compareRows, outBytes,
+                       upname_compareRows, &cmp,
                       TRUE, pErrorCode);
        if(U_FAILURE(*pErrorCode)) {
            udata_printError(ds, "upname_swap(NameToEnum).uprv_sortArray(%d items) failed - %s\n",
--- a/icu4c/source/common/unicode/urename.h
+++ b/icu4c/source/common/unicode/urename.h
@ -827,7 +827,6 @@
 #define uprv_cnttab_setContraction uprv_cnttab_setContraction_2_8
 #define uprv_compareInvAscii uprv_compareInvAscii_2_8
 #define uprv_compareInvEbcdic uprv_compareInvEbcdic_2_8
-#define uprv_comparePropertyNames uprv_comparePropertyNames_2_8
 #define uprv_convertToLCID uprv_convertToLCID_2_8
 #define uprv_convertToPosix uprv_convertToPosix_2_8
 #define uprv_copyAscii uprv_copyAscii_2_8
--- a/icu4c/source/common/uprops.c
+++ b/icu4c/source/common/uprops.c
@ -29,51 +29,115 @@
 #ifdef DEBUG
 #include <stdio.h>
 #endif
+
+/**
+ * Get the next non-ignorable ASCII character from a property name
+ * and lowercases it.
+ * @return ((advance count for the name)<<8)|character
+ */
+static U_INLINE int32_t
+getASCIIPropertyNameChar(const char *name) {
+    int32_t i;
+    char c;
+
+    /* Ignore delimiters '-', '_', and ASCII White_Space */
+    for(i=0;
+        (c=name[i++])==0x2d || c==0x5f ||
+        c==0x20 || (0x09<=c && c<=0x0d);
+    ) {}
+
+    if(c!=0) {
+        return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
+    } else {
+        return i<<8;
+    }
+}
+
+/**
+ * Get the next non-ignorable EBCDIC character from a property name
+ * and lowercases it.
+ * @return ((advance count for the name)<<8)|character
+ */
+static U_INLINE int32_t
+getEBCDICPropertyNameChar(const char *name) {
+    int32_t i;
+    char c;
+
+    /* Ignore delimiters '-', '_', and EBCDIC White_Space */
+    for(i=0;
+        (c=name[i++])==0x60 || c==0x6d ||
+        c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
+    ) {}
+
+    if(c!=0) {
+        return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
+    } else {
+        return i<<8;
+    }
+}
+
 /**
 * Unicode property names and property value names are compared
 * "loosely". Property[Value]Aliases.txt say:
 *   "With loose matching of property names, the case distinctions, whitespace,
 *    and '_' are ignored."
 *
- * This function does just that, for ASCII (char *) name strings.
+ * This function does just that, for (char *) name strings.
 * It is almost identical to ucnv_compareNames() but also ignores
- * ASCII White_Space characters (U+0009..U+000d).
+ * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
 *
 * @internal
 */
+
 U_CAPI int32_t U_EXPORT2
-uprv_comparePropertyNames(const char *name1, const char *name2) {
-    int32_t rc;
-    unsigned char c1, c2;
+uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
+    int32_t rc, r1, r2;

    for(;;) {
-        /* Ignore delimiters '-', '_', and ASCII White_Space */
-        while((c1=(unsigned char)*name1)=='-' || c1=='_' ||
-              c1==' ' || c1=='\t' || c1=='\n' || c1=='\v' || c1=='\f' || c1=='\r'
-        ) {
-            ++name1;
-        }
-        while((c2=(unsigned char)*name2)=='-' || c2=='_' ||
-              c2==' ' || c2=='\t' || c2=='\n' || c2=='\v' || c2=='\f' || c2=='\r'
-        ) {
-            ++name2;
-        }
+        r1=getASCIIPropertyNameChar(name1);
+        r2=getASCIIPropertyNameChar(name2);

        /* If we reach the ends of both strings then they match */
-        if((c1|c2)==0) {
+        if(((r1|r2)&0xff)==0) {
            return 0;
        }
        
-        /* Case-insensitive comparison */
-        if(c1!=c2) {
-            rc=(int32_t)(unsigned char)uprv_tolower(c1)-(int32_t)(unsigned char)uprv_tolower(c2);
+        /* Compare the lowercased characters */
+        if(r1!=r2) {
+            rc=(r1&0xff)-(r2&0xff);
            if(rc!=0) {
                return rc;
            }
        }

-        ++name1;
-        ++name2;
+        name1+=r1>>8;
+        name2+=r2>>8;
+    }
+}
+
+U_CAPI int32_t U_EXPORT2
+uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
+    int32_t rc, r1, r2;
+
+    for(;;) {
+        r1=getEBCDICPropertyNameChar(name1);
+        r2=getEBCDICPropertyNameChar(name2);
+
+        /* If we reach the ends of both strings then they match */
+        if(((r1|r2)&0xff)==0) {
+            return 0;
+        }
+        
+        /* Compare the lowercased characters */
+        if(r1!=r2) {
+            rc=(r1&0xff)-(r2&0xff);
+            if(rc!=0) {
+                return rc;
+            }
+        }
+
+        name1+=r1>>8;
+        name2+=r2>>8;
    }
 }

--- a/icu4c/source/common/uprops.h
+++ b/icu4c/source/common/uprops.h
@ -202,19 +202,32 @@ U_CFUNC int32_t
 uprv_getMaxValues(int32_t column);

 /**
+ * \var uprv_comparePropertyNames
 * Unicode property names and property value names are compared
 * "loosely". Property[Value]Aliases.txt say:
 *   "With loose matching of property names, the case distinctions, whitespace,
 *    and '_' are ignored."
 *
- * This function does just that, for ASCII (char *) name strings.
+ * This function does just that, for (char *) name strings.
 * It is almost identical to ucnv_compareNames() but also ignores
- * ASCII White_Space characters (U+0009..U+000d).
+ * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
 *
 * @internal
 */
+
 U_CAPI int32_t U_EXPORT2
-uprv_comparePropertyNames(const char *name1, const char *name2);
+uprv_compareASCIIPropertyNames(const char *name1, const char *name2);
+
+U_CAPI int32_t U_EXPORT2
+uprv_compareEBCDICPropertyNames(const char *name1, const char *name2);
+
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+#   define uprv_comparePropertyNames uprv_compareASCIIPropertyNames
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+#   define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames
+#else
+#   error U_CHARSET_FAMILY is not valid
+#endif

 /** Turn a bit index into a bit flag. @internal */
 #define FLAG(n) ((uint32_t)1<<(n))