ICU-4078 move property name comparison functions from uprops to propname

X-SVN-Rev: 16303
2025-04-08 06:53:45 +00:00 · 2004-09-11 22:02:10 +00:00 · 2004-09-11 22:02:10 +00:00 · bd1f26f937
commit bd1f26f937
parent 248e6ad8fa
5 changed files with 165 additions and 157 deletions
--- a/icu4c/source/common/propname.cpp
+++ b/icu4c/source/common/propname.cpp
@ -16,6 +16,125 @@
 #include "cstring.h"
 #include "uarrsort.h"

+U_CDECL_BEGIN
+
+/**
+ * Get the next non-ignorable ASCII character from a property name
+ * and lowercases it.
+ * @return ((advance count for the name)<<8)|character
+ */
+static U_INLINE int32_t
+getASCIIPropertyNameChar(const char *name) {
+    int32_t i;
+    char c;
+
+    /* Ignore delimiters '-', '_', and ASCII White_Space */
+    for(i=0;
+        (c=name[i++])==0x2d || c==0x5f ||
+        c==0x20 || (0x09<=c && c<=0x0d);
+    ) {}
+
+    if(c!=0) {
+        return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
+    } else {
+        return i<<8;
+    }
+}
+
+/**
+ * Get the next non-ignorable EBCDIC character from a property name
+ * and lowercases it.
+ * @return ((advance count for the name)<<8)|character
+ */
+static U_INLINE int32_t
+getEBCDICPropertyNameChar(const char *name) {
+    int32_t i;
+    char c;
+
+    /* Ignore delimiters '-', '_', and EBCDIC White_Space */
+    for(i=0;
+        (c=name[i++])==0x60 || c==0x6d ||
+        c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
+    ) {}
+
+    if(c!=0) {
+        return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
+    } else {
+        return i<<8;
+    }
+}
+
+/**
+ * Unicode property names and property value names are compared "loosely".
+ *
+ * UCD.html 4.0.1 says:
+ *   For all property names, property value names, and for property values for
+ *   Enumerated, Binary, or Catalog properties, use the following
+ *   loose matching rule:
+ *
+ *   LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
+ *
+ * This function does just that, for (char *) name strings.
+ * It is almost identical to ucnv_compareNames() but also ignores
+ * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
+ *
+ * @internal
+ */
+
+U_CAPI int32_t U_EXPORT2
+uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
+    int32_t rc, r1, r2;
+
+    for(;;) {
+        r1=getASCIIPropertyNameChar(name1);
+        r2=getASCIIPropertyNameChar(name2);
+
+        /* If we reach the ends of both strings then they match */
+        if(((r1|r2)&0xff)==0) {
+            return 0;
+        }
+        
+        /* Compare the lowercased characters */
+        if(r1!=r2) {
+            rc=(r1&0xff)-(r2&0xff);
+            if(rc!=0) {
+                return rc;
+            }
+        }
+
+        name1+=r1>>8;
+        name2+=r2>>8;
+    }
+}
+
+U_CAPI int32_t U_EXPORT2
+uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
+    int32_t rc, r1, r2;
+
+    for(;;) {
+        r1=getEBCDICPropertyNameChar(name1);
+        r2=getEBCDICPropertyNameChar(name2);
+
+        /* If we reach the ends of both strings then they match */
+        if(((r1|r2)&0xff)==0) {
+            return 0;
+        }
+        
+        /* Compare the lowercased characters */
+        if(r1!=r2) {
+            rc=(r1&0xff)-(r2&0xff);
+            if(rc!=0) {
+                return rc;
+            }
+        }
+
+        name1+=r1>>8;
+        name2+=r2>>8;
+    }
+}
+
+U_CDECL_END
+
 U_NAMESPACE_BEGIN

 //----------------------------------------------------------------------
--- a/icu4c/source/common/propname.h
+++ b/icu4c/source/common/propname.h
@ -1,6 +1,6 @@
 /*
 **********************************************************************
-* Copyright (c) 2002-2003, International Business Machines
+* Copyright (c) 2002-2004, International Business Machines
 * Corporation and others.  All Rights Reserved.
 **********************************************************************
 * Author: Alan Liu
@ -24,6 +24,44 @@
 *   genpname     - creates data
 */

+/* low-level char * property name comparison -------------------------------- */
+
+U_CDECL_BEGIN
+
+/**
+ * \var uprv_comparePropertyNames
+ * Unicode property names and property value names are compared "loosely".
+ *
+ * UCD.html 4.0.1 says:
+ *   For all property names, property value names, and for property values for
+ *   Enumerated, Binary, or Catalog properties, use the following
+ *   loose matching rule:
+ *
+ *   LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
+ *
+ * This function does just that, for (char *) name strings.
+ * It is almost identical to ucnv_compareNames() but also ignores
+ * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
+ *
+ * @internal
+ */
+
+U_CAPI int32_t U_EXPORT2
+uprv_compareASCIIPropertyNames(const char *name1, const char *name2);
+
+U_CAPI int32_t U_EXPORT2
+uprv_compareEBCDICPropertyNames(const char *name1, const char *name2);
+
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+#   define uprv_comparePropertyNames uprv_compareASCIIPropertyNames
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+#   define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames
+#else
+#   error U_CHARSET_FAMILY is not valid
+#endif
+
+U_CDECL_END
+
 /* UDataMemory structure and signatures ------------------------------------- */

 #define PNAME_DATA_NAME "pnames"
--- a/icu4c/source/common/uniset_props.cpp
+++ b/icu4c/source/common/uniset_props.cpp
@ -30,6 +30,7 @@
 #include "util.h"
 #include "uvector.h"
 #include "uprops.h"
+#include "propname.h"
 #include "charstr.h"
 #include "ustrfmt.h"
 #include "mutex.h"
--- a/icu4c/source/common/uprops.c
+++ b/icu4c/source/common/uprops.c
@ -15,6 +15,10 @@
 *
 *   Implementations for mostly non-core Unicode character properties
 *   stored in uprops.icu.
+*
+*   With the APIs implemented here, almost all properties files and
+*   their associated implementation files are used from this file,
+*   including those for normalization and case mappings.
 */

 #include "unicode/utypes.h"
@ -26,121 +30,6 @@

 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))

-/**
- * Get the next non-ignorable ASCII character from a property name
- * and lowercases it.
- * @return ((advance count for the name)<<8)|character
- */
-static U_INLINE int32_t
-getASCIIPropertyNameChar(const char *name) {
-    int32_t i;
-    char c;
-
-    /* Ignore delimiters '-', '_', and ASCII White_Space */
-    for(i=0;
-        (c=name[i++])==0x2d || c==0x5f ||
-        c==0x20 || (0x09<=c && c<=0x0d);
-    ) {}
-
-    if(c!=0) {
-        return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
-    } else {
-        return i<<8;
-    }
-}
-
-/**
- * Get the next non-ignorable EBCDIC character from a property name
- * and lowercases it.
- * @return ((advance count for the name)<<8)|character
- */
-static U_INLINE int32_t
-getEBCDICPropertyNameChar(const char *name) {
-    int32_t i;
-    char c;
-
-    /* Ignore delimiters '-', '_', and EBCDIC White_Space */
-    for(i=0;
-        (c=name[i++])==0x60 || c==0x6d ||
-        c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
-    ) {}
-
-    if(c!=0) {
-        return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
-    } else {
-        return i<<8;
-    }
-}
-
-/**
- * Unicode property names and property value names are compared "loosely".
- *
- * UCD.html 4.0.1 says:
- *   For all property names, property value names, and for property values for
- *   Enumerated, Binary, or Catalog properties, use the following
- *   loose matching rule:
- *
- *   LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
- *
- * This function does just that, for (char *) name strings.
- * It is almost identical to ucnv_compareNames() but also ignores
- * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
- *
- * @internal
- */
-
-U_CAPI int32_t U_EXPORT2
-uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
-    int32_t rc, r1, r2;
-
-    for(;;) {
-        r1=getASCIIPropertyNameChar(name1);
-        r2=getASCIIPropertyNameChar(name2);
-
-        /* If we reach the ends of both strings then they match */
-        if(((r1|r2)&0xff)==0) {
-            return 0;
-        }
-        
-        /* Compare the lowercased characters */
-        if(r1!=r2) {
-            rc=(r1&0xff)-(r2&0xff);
-            if(rc!=0) {
-                return rc;
-            }
-        }
-
-        name1+=r1>>8;
-        name2+=r2>>8;
-    }
-}
-
-U_CAPI int32_t U_EXPORT2
-uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
-    int32_t rc, r1, r2;
-
-    for(;;) {
-        r1=getEBCDICPropertyNameChar(name1);
-        r2=getEBCDICPropertyNameChar(name2);
-
-        /* If we reach the ends of both strings then they match */
-        if(((r1|r2)&0xff)==0) {
-            return 0;
-        }
-        
-        /* Compare the lowercased characters */
-        if(r1!=r2) {
-            rc=(r1&0xff)-(r2&0xff);
-            if(rc!=0) {
-                return rc;
-            }
-        }
-
-        name1+=r1>>8;
-        name2+=r2>>8;
-    }
-}
-
 /* API functions ------------------------------------------------------------ */

 static const struct {
@ -212,7 +101,7 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
        } else if(binProps[which].column==-2) {
            /* case mapping properties */
            UErrorCode errorCode=U_ZERO_ERROR;
-            UCaseProps *csp=uchar_getCaseProps(&errorCode);
+            UCaseProps *csp=ucase_getSingleton(&errorCode);
            if(U_FAILURE(errorCode)) {
                return FALSE;
            }
@ -499,5 +388,5 @@ uprv_getInclusions(USetAdder *sa, UErrorCode *pErrorCode) {
    unorm_addPropertyStarts(sa, pErrorCode);
 #endif
    uchar_addPropertyStarts(sa, pErrorCode);
-    ucase_addPropertyStarts(uchar_getCaseProps(pErrorCode), sa, pErrorCode);
+    ucase_addPropertyStarts(ucase_getSingleton(pErrorCode), sa, pErrorCode);
 }
--- a/icu4c/source/common/uprops.h
+++ b/icu4c/source/common/uprops.h
@ -217,45 +217,6 @@ uprv_getMaxValues(int32_t column);
 U_CFUNC UHangulSyllableType
 uchar_getHST(UChar32 c);

-/**
- * Get internal UCaseProps pointer from uchar.c for uprops.c.
- * Other code should use ucase_getSingleton().
- */
-U_CFUNC UCaseProps *
-uchar_getCaseProps(UErrorCode *pErrorCode);
-
-/**
- * \var uprv_comparePropertyNames
- * Unicode property names and property value names are compared "loosely".
- *
- * UCD.html 4.0.1 says:
- *   For all property names, property value names, and for property values for
- *   Enumerated, Binary, or Catalog properties, use the following
- *   loose matching rule:
- *
- *   LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
- *
- * This function does just that, for (char *) name strings.
- * It is almost identical to ucnv_compareNames() but also ignores
- * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
- *
- * @internal
- */
-
-U_CAPI int32_t U_EXPORT2
-uprv_compareASCIIPropertyNames(const char *name1, const char *name2);
-
-U_CAPI int32_t U_EXPORT2
-uprv_compareEBCDICPropertyNames(const char *name1, const char *name2);
-
-#if U_CHARSET_FAMILY==U_ASCII_FAMILY
-#   define uprv_comparePropertyNames uprv_compareASCIIPropertyNames
-#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
-#   define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames
-#else
-#   error U_CHARSET_FAMILY is not valid
-#endif
-
 /** Turn a bit index into a bit flag. @internal */
 #define FLAG(n) ((uint32_t)1<<(n))