ICU-4078 move property name comparison functions from uprops to propname

X-SVN-Rev: 16303
This commit is contained in:
Markus Scherer 2004-09-11 22:02:10 +00:00
parent 248e6ad8fa
commit bd1f26f937
5 changed files with 165 additions and 157 deletions

View file

@ -16,6 +16,125 @@
#include "cstring.h"
#include "uarrsort.h"
U_CDECL_BEGIN
/**
* Get the next non-ignorable ASCII character from a property name
* and lowercases it.
* @return ((advance count for the name)<<8)|character
*/
static U_INLINE int32_t
getASCIIPropertyNameChar(const char *name) {
int32_t i;
char c;
/* Ignore delimiters '-', '_', and ASCII White_Space */
for(i=0;
(c=name[i++])==0x2d || c==0x5f ||
c==0x20 || (0x09<=c && c<=0x0d);
) {}
if(c!=0) {
return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
} else {
return i<<8;
}
}
/**
* Get the next non-ignorable EBCDIC character from a property name
* and lowercases it.
* @return ((advance count for the name)<<8)|character
*/
static U_INLINE int32_t
getEBCDICPropertyNameChar(const char *name) {
int32_t i;
char c;
/* Ignore delimiters '-', '_', and EBCDIC White_Space */
for(i=0;
(c=name[i++])==0x60 || c==0x6d ||
c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
) {}
if(c!=0) {
return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
} else {
return i<<8;
}
}
/**
* Unicode property names and property value names are compared "loosely".
*
* UCD.html 4.0.1 says:
* For all property names, property value names, and for property values for
* Enumerated, Binary, or Catalog properties, use the following
* loose matching rule:
*
* LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
*
* This function does just that, for (char *) name strings.
* It is almost identical to ucnv_compareNames() but also ignores
* C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
*
* @internal
*/
U_CAPI int32_t U_EXPORT2
uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
int32_t rc, r1, r2;
for(;;) {
r1=getASCIIPropertyNameChar(name1);
r2=getASCIIPropertyNameChar(name2);
/* If we reach the ends of both strings then they match */
if(((r1|r2)&0xff)==0) {
return 0;
}
/* Compare the lowercased characters */
if(r1!=r2) {
rc=(r1&0xff)-(r2&0xff);
if(rc!=0) {
return rc;
}
}
name1+=r1>>8;
name2+=r2>>8;
}
}
U_CAPI int32_t U_EXPORT2
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
int32_t rc, r1, r2;
for(;;) {
r1=getEBCDICPropertyNameChar(name1);
r2=getEBCDICPropertyNameChar(name2);
/* If we reach the ends of both strings then they match */
if(((r1|r2)&0xff)==0) {
return 0;
}
/* Compare the lowercased characters */
if(r1!=r2) {
rc=(r1&0xff)-(r2&0xff);
if(rc!=0) {
return rc;
}
}
name1+=r1>>8;
name2+=r2>>8;
}
}
U_CDECL_END
U_NAMESPACE_BEGIN
//----------------------------------------------------------------------

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (c) 2002-2003, International Business Machines
* Copyright (c) 2002-2004, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
@ -24,6 +24,44 @@
* genpname - creates data
*/
/* low-level char * property name comparison -------------------------------- */
U_CDECL_BEGIN
/**
* \var uprv_comparePropertyNames
* Unicode property names and property value names are compared "loosely".
*
* UCD.html 4.0.1 says:
* For all property names, property value names, and for property values for
* Enumerated, Binary, or Catalog properties, use the following
* loose matching rule:
*
* LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
*
* This function does just that, for (char *) name strings.
* It is almost identical to ucnv_compareNames() but also ignores
* C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
*
* @internal
*/
U_CAPI int32_t U_EXPORT2
uprv_compareASCIIPropertyNames(const char *name1, const char *name2);
U_CAPI int32_t U_EXPORT2
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2);
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
# define uprv_comparePropertyNames uprv_compareASCIIPropertyNames
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
# define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames
#else
# error U_CHARSET_FAMILY is not valid
#endif
U_CDECL_END
/* UDataMemory structure and signatures ------------------------------------- */
#define PNAME_DATA_NAME "pnames"

View file

@ -30,6 +30,7 @@
#include "util.h"
#include "uvector.h"
#include "uprops.h"
#include "propname.h"
#include "charstr.h"
#include "ustrfmt.h"
#include "mutex.h"

View file

@ -15,6 +15,10 @@
*
* Implementations for mostly non-core Unicode character properties
* stored in uprops.icu.
*
* With the APIs implemented here, almost all properties files and
* their associated implementation files are used from this file,
* including those for normalization and case mappings.
*/
#include "unicode/utypes.h"
@ -26,121 +30,6 @@
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
/**
* Get the next non-ignorable ASCII character from a property name
* and lowercases it.
* @return ((advance count for the name)<<8)|character
*/
static U_INLINE int32_t
getASCIIPropertyNameChar(const char *name) {
int32_t i;
char c;
/* Ignore delimiters '-', '_', and ASCII White_Space */
for(i=0;
(c=name[i++])==0x2d || c==0x5f ||
c==0x20 || (0x09<=c && c<=0x0d);
) {}
if(c!=0) {
return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
} else {
return i<<8;
}
}
/**
* Get the next non-ignorable EBCDIC character from a property name
* and lowercases it.
* @return ((advance count for the name)<<8)|character
*/
static U_INLINE int32_t
getEBCDICPropertyNameChar(const char *name) {
int32_t i;
char c;
/* Ignore delimiters '-', '_', and EBCDIC White_Space */
for(i=0;
(c=name[i++])==0x60 || c==0x6d ||
c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
) {}
if(c!=0) {
return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
} else {
return i<<8;
}
}
/**
* Unicode property names and property value names are compared "loosely".
*
* UCD.html 4.0.1 says:
* For all property names, property value names, and for property values for
* Enumerated, Binary, or Catalog properties, use the following
* loose matching rule:
*
* LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
*
* This function does just that, for (char *) name strings.
* It is almost identical to ucnv_compareNames() but also ignores
* C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
*
* @internal
*/
U_CAPI int32_t U_EXPORT2
uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
int32_t rc, r1, r2;
for(;;) {
r1=getASCIIPropertyNameChar(name1);
r2=getASCIIPropertyNameChar(name2);
/* If we reach the ends of both strings then they match */
if(((r1|r2)&0xff)==0) {
return 0;
}
/* Compare the lowercased characters */
if(r1!=r2) {
rc=(r1&0xff)-(r2&0xff);
if(rc!=0) {
return rc;
}
}
name1+=r1>>8;
name2+=r2>>8;
}
}
U_CAPI int32_t U_EXPORT2
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
int32_t rc, r1, r2;
for(;;) {
r1=getEBCDICPropertyNameChar(name1);
r2=getEBCDICPropertyNameChar(name2);
/* If we reach the ends of both strings then they match */
if(((r1|r2)&0xff)==0) {
return 0;
}
/* Compare the lowercased characters */
if(r1!=r2) {
rc=(r1&0xff)-(r2&0xff);
if(rc!=0) {
return rc;
}
}
name1+=r1>>8;
name2+=r2>>8;
}
}
/* API functions ------------------------------------------------------------ */
static const struct {
@ -212,7 +101,7 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
} else if(binProps[which].column==-2) {
/* case mapping properties */
UErrorCode errorCode=U_ZERO_ERROR;
UCaseProps *csp=uchar_getCaseProps(&errorCode);
UCaseProps *csp=ucase_getSingleton(&errorCode);
if(U_FAILURE(errorCode)) {
return FALSE;
}
@ -499,5 +388,5 @@ uprv_getInclusions(USetAdder *sa, UErrorCode *pErrorCode) {
unorm_addPropertyStarts(sa, pErrorCode);
#endif
uchar_addPropertyStarts(sa, pErrorCode);
ucase_addPropertyStarts(uchar_getCaseProps(pErrorCode), sa, pErrorCode);
ucase_addPropertyStarts(ucase_getSingleton(pErrorCode), sa, pErrorCode);
}

View file

@ -217,45 +217,6 @@ uprv_getMaxValues(int32_t column);
U_CFUNC UHangulSyllableType
uchar_getHST(UChar32 c);
/**
* Get internal UCaseProps pointer from uchar.c for uprops.c.
* Other code should use ucase_getSingleton().
*/
U_CFUNC UCaseProps *
uchar_getCaseProps(UErrorCode *pErrorCode);
/**
* \var uprv_comparePropertyNames
* Unicode property names and property value names are compared "loosely".
*
* UCD.html 4.0.1 says:
* For all property names, property value names, and for property values for
* Enumerated, Binary, or Catalog properties, use the following
* loose matching rule:
*
* LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
*
* This function does just that, for (char *) name strings.
* It is almost identical to ucnv_compareNames() but also ignores
* C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
*
* @internal
*/
U_CAPI int32_t U_EXPORT2
uprv_compareASCIIPropertyNames(const char *name1, const char *name2);
U_CAPI int32_t U_EXPORT2
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2);
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
# define uprv_comparePropertyNames uprv_compareASCIIPropertyNames
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
# define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames
#else
# error U_CHARSET_FAMILY is not valid
#endif
/** Turn a bit index into a bit flag. @internal */
#define FLAG(n) ((uint32_t)1<<(n))