From bd1f26f937c80f3891554b3ac3de09292f652097 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Sat, 11 Sep 2004 22:02:10 +0000 Subject: [PATCH] ICU-4078 move property name comparison functions from uprops to propname X-SVN-Rev: 16303 --- icu4c/source/common/propname.cpp | 119 ++++++++++++++++++++++++++ icu4c/source/common/propname.h | 40 ++++++++- icu4c/source/common/uniset_props.cpp | 1 + icu4c/source/common/uprops.c | 123 ++------------------------- icu4c/source/common/uprops.h | 39 --------- 5 files changed, 165 insertions(+), 157 deletions(-) diff --git a/icu4c/source/common/propname.cpp b/icu4c/source/common/propname.cpp index 7837e3f70ff..1a2e663bc3d 100644 --- a/icu4c/source/common/propname.cpp +++ b/icu4c/source/common/propname.cpp @@ -16,6 +16,125 @@ #include "cstring.h" #include "uarrsort.h" +U_CDECL_BEGIN + +/** + * Get the next non-ignorable ASCII character from a property name + * and lowercases it. + * @return ((advance count for the name)<<8)|character + */ +static U_INLINE int32_t +getASCIIPropertyNameChar(const char *name) { + int32_t i; + char c; + + /* Ignore delimiters '-', '_', and ASCII White_Space */ + for(i=0; + (c=name[i++])==0x2d || c==0x5f || + c==0x20 || (0x09<=c && c<=0x0d); + ) {} + + if(c!=0) { + return (i<<8)|(uint8_t)uprv_asciitolower((char)c); + } else { + return i<<8; + } +} + +/** + * Get the next non-ignorable EBCDIC character from a property name + * and lowercases it. + * @return ((advance count for the name)<<8)|character + */ +static U_INLINE int32_t +getEBCDICPropertyNameChar(const char *name) { + int32_t i; + char c; + + /* Ignore delimiters '-', '_', and EBCDIC White_Space */ + for(i=0; + (c=name[i++])==0x60 || c==0x6d || + c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d; + ) {} + + if(c!=0) { + return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c); + } else { + return i<<8; + } +} + +/** + * Unicode property names and property value names are compared "loosely". + * + * UCD.html 4.0.1 says: + * For all property names, property value names, and for property values for + * Enumerated, Binary, or Catalog properties, use the following + * loose matching rule: + * + * LM3. Ignore case, whitespace, underscore ('_'), and hyphens. + * + * This function does just that, for (char *) name strings. + * It is almost identical to ucnv_compareNames() but also ignores + * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC). + * + * @internal + */ + +U_CAPI int32_t U_EXPORT2 +uprv_compareASCIIPropertyNames(const char *name1, const char *name2) { + int32_t rc, r1, r2; + + for(;;) { + r1=getASCIIPropertyNameChar(name1); + r2=getASCIIPropertyNameChar(name2); + + /* If we reach the ends of both strings then they match */ + if(((r1|r2)&0xff)==0) { + return 0; + } + + /* Compare the lowercased characters */ + if(r1!=r2) { + rc=(r1&0xff)-(r2&0xff); + if(rc!=0) { + return rc; + } + } + + name1+=r1>>8; + name2+=r2>>8; + } +} + +U_CAPI int32_t U_EXPORT2 +uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) { + int32_t rc, r1, r2; + + for(;;) { + r1=getEBCDICPropertyNameChar(name1); + r2=getEBCDICPropertyNameChar(name2); + + /* If we reach the ends of both strings then they match */ + if(((r1|r2)&0xff)==0) { + return 0; + } + + /* Compare the lowercased characters */ + if(r1!=r2) { + rc=(r1&0xff)-(r2&0xff); + if(rc!=0) { + return rc; + } + } + + name1+=r1>>8; + name2+=r2>>8; + } +} + +U_CDECL_END + U_NAMESPACE_BEGIN //---------------------------------------------------------------------- diff --git a/icu4c/source/common/propname.h b/icu4c/source/common/propname.h index cbee61b825e..a3a77181a2b 100644 --- a/icu4c/source/common/propname.h +++ b/icu4c/source/common/propname.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2002-2003, International Business Machines +* Copyright (c) 2002-2004, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Author: Alan Liu @@ -24,6 +24,44 @@ * genpname - creates data */ +/* low-level char * property name comparison -------------------------------- */ + +U_CDECL_BEGIN + +/** + * \var uprv_comparePropertyNames + * Unicode property names and property value names are compared "loosely". + * + * UCD.html 4.0.1 says: + * For all property names, property value names, and for property values for + * Enumerated, Binary, or Catalog properties, use the following + * loose matching rule: + * + * LM3. Ignore case, whitespace, underscore ('_'), and hyphens. + * + * This function does just that, for (char *) name strings. + * It is almost identical to ucnv_compareNames() but also ignores + * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC). + * + * @internal + */ + +U_CAPI int32_t U_EXPORT2 +uprv_compareASCIIPropertyNames(const char *name1, const char *name2); + +U_CAPI int32_t U_EXPORT2 +uprv_compareEBCDICPropertyNames(const char *name1, const char *name2); + +#if U_CHARSET_FAMILY==U_ASCII_FAMILY +# define uprv_comparePropertyNames uprv_compareASCIIPropertyNames +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY +# define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames +#else +# error U_CHARSET_FAMILY is not valid +#endif + +U_CDECL_END + /* UDataMemory structure and signatures ------------------------------------- */ #define PNAME_DATA_NAME "pnames" diff --git a/icu4c/source/common/uniset_props.cpp b/icu4c/source/common/uniset_props.cpp index 39c3289fbb5..da9ad5bd01f 100644 --- a/icu4c/source/common/uniset_props.cpp +++ b/icu4c/source/common/uniset_props.cpp @@ -30,6 +30,7 @@ #include "util.h" #include "uvector.h" #include "uprops.h" +#include "propname.h" #include "charstr.h" #include "ustrfmt.h" #include "mutex.h" diff --git a/icu4c/source/common/uprops.c b/icu4c/source/common/uprops.c index 880e0195da5..1d281345a85 100644 --- a/icu4c/source/common/uprops.c +++ b/icu4c/source/common/uprops.c @@ -15,6 +15,10 @@ * * Implementations for mostly non-core Unicode character properties * stored in uprops.icu. +* +* With the APIs implemented here, almost all properties files and +* their associated implementation files are used from this file, +* including those for normalization and case mappings. */ #include "unicode/utypes.h" @@ -26,121 +30,6 @@ #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) -/** - * Get the next non-ignorable ASCII character from a property name - * and lowercases it. - * @return ((advance count for the name)<<8)|character - */ -static U_INLINE int32_t -getASCIIPropertyNameChar(const char *name) { - int32_t i; - char c; - - /* Ignore delimiters '-', '_', and ASCII White_Space */ - for(i=0; - (c=name[i++])==0x2d || c==0x5f || - c==0x20 || (0x09<=c && c<=0x0d); - ) {} - - if(c!=0) { - return (i<<8)|(uint8_t)uprv_asciitolower((char)c); - } else { - return i<<8; - } -} - -/** - * Get the next non-ignorable EBCDIC character from a property name - * and lowercases it. - * @return ((advance count for the name)<<8)|character - */ -static U_INLINE int32_t -getEBCDICPropertyNameChar(const char *name) { - int32_t i; - char c; - - /* Ignore delimiters '-', '_', and EBCDIC White_Space */ - for(i=0; - (c=name[i++])==0x60 || c==0x6d || - c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d; - ) {} - - if(c!=0) { - return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c); - } else { - return i<<8; - } -} - -/** - * Unicode property names and property value names are compared "loosely". - * - * UCD.html 4.0.1 says: - * For all property names, property value names, and for property values for - * Enumerated, Binary, or Catalog properties, use the following - * loose matching rule: - * - * LM3. Ignore case, whitespace, underscore ('_'), and hyphens. - * - * This function does just that, for (char *) name strings. - * It is almost identical to ucnv_compareNames() but also ignores - * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC). - * - * @internal - */ - -U_CAPI int32_t U_EXPORT2 -uprv_compareASCIIPropertyNames(const char *name1, const char *name2) { - int32_t rc, r1, r2; - - for(;;) { - r1=getASCIIPropertyNameChar(name1); - r2=getASCIIPropertyNameChar(name2); - - /* If we reach the ends of both strings then they match */ - if(((r1|r2)&0xff)==0) { - return 0; - } - - /* Compare the lowercased characters */ - if(r1!=r2) { - rc=(r1&0xff)-(r2&0xff); - if(rc!=0) { - return rc; - } - } - - name1+=r1>>8; - name2+=r2>>8; - } -} - -U_CAPI int32_t U_EXPORT2 -uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) { - int32_t rc, r1, r2; - - for(;;) { - r1=getEBCDICPropertyNameChar(name1); - r2=getEBCDICPropertyNameChar(name2); - - /* If we reach the ends of both strings then they match */ - if(((r1|r2)&0xff)==0) { - return 0; - } - - /* Compare the lowercased characters */ - if(r1!=r2) { - rc=(r1&0xff)-(r2&0xff); - if(rc!=0) { - return rc; - } - } - - name1+=r1>>8; - name2+=r2>>8; - } -} - /* API functions ------------------------------------------------------------ */ static const struct { @@ -212,7 +101,7 @@ u_hasBinaryProperty(UChar32 c, UProperty which) { } else if(binProps[which].column==-2) { /* case mapping properties */ UErrorCode errorCode=U_ZERO_ERROR; - UCaseProps *csp=uchar_getCaseProps(&errorCode); + UCaseProps *csp=ucase_getSingleton(&errorCode); if(U_FAILURE(errorCode)) { return FALSE; } @@ -499,5 +388,5 @@ uprv_getInclusions(USetAdder *sa, UErrorCode *pErrorCode) { unorm_addPropertyStarts(sa, pErrorCode); #endif uchar_addPropertyStarts(sa, pErrorCode); - ucase_addPropertyStarts(uchar_getCaseProps(pErrorCode), sa, pErrorCode); + ucase_addPropertyStarts(ucase_getSingleton(pErrorCode), sa, pErrorCode); } diff --git a/icu4c/source/common/uprops.h b/icu4c/source/common/uprops.h index e4cfde45110..a50a8fbe47a 100644 --- a/icu4c/source/common/uprops.h +++ b/icu4c/source/common/uprops.h @@ -217,45 +217,6 @@ uprv_getMaxValues(int32_t column); U_CFUNC UHangulSyllableType uchar_getHST(UChar32 c); -/** - * Get internal UCaseProps pointer from uchar.c for uprops.c. - * Other code should use ucase_getSingleton(). - */ -U_CFUNC UCaseProps * -uchar_getCaseProps(UErrorCode *pErrorCode); - -/** - * \var uprv_comparePropertyNames - * Unicode property names and property value names are compared "loosely". - * - * UCD.html 4.0.1 says: - * For all property names, property value names, and for property values for - * Enumerated, Binary, or Catalog properties, use the following - * loose matching rule: - * - * LM3. Ignore case, whitespace, underscore ('_'), and hyphens. - * - * This function does just that, for (char *) name strings. - * It is almost identical to ucnv_compareNames() but also ignores - * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC). - * - * @internal - */ - -U_CAPI int32_t U_EXPORT2 -uprv_compareASCIIPropertyNames(const char *name1, const char *name2); - -U_CAPI int32_t U_EXPORT2 -uprv_compareEBCDICPropertyNames(const char *name1, const char *name2); - -#if U_CHARSET_FAMILY==U_ASCII_FAMILY -# define uprv_comparePropertyNames uprv_compareASCIIPropertyNames -#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY -# define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames -#else -# error U_CHARSET_FAMILY is not valid -#endif - /** Turn a bit index into a bit flag. @internal */ #define FLAG(n) ((uint32_t)1<<(n))