mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-4078 move property name comparison functions from uprops to propname
X-SVN-Rev: 16303
This commit is contained in:
parent
248e6ad8fa
commit
bd1f26f937
5 changed files with 165 additions and 157 deletions
|
@ -16,6 +16,125 @@
|
|||
#include "cstring.h"
|
||||
#include "uarrsort.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/**
|
||||
* Get the next non-ignorable ASCII character from a property name
|
||||
* and lowercases it.
|
||||
* @return ((advance count for the name)<<8)|character
|
||||
*/
|
||||
static U_INLINE int32_t
|
||||
getASCIIPropertyNameChar(const char *name) {
|
||||
int32_t i;
|
||||
char c;
|
||||
|
||||
/* Ignore delimiters '-', '_', and ASCII White_Space */
|
||||
for(i=0;
|
||||
(c=name[i++])==0x2d || c==0x5f ||
|
||||
c==0x20 || (0x09<=c && c<=0x0d);
|
||||
) {}
|
||||
|
||||
if(c!=0) {
|
||||
return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
|
||||
} else {
|
||||
return i<<8;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the next non-ignorable EBCDIC character from a property name
|
||||
* and lowercases it.
|
||||
* @return ((advance count for the name)<<8)|character
|
||||
*/
|
||||
static U_INLINE int32_t
|
||||
getEBCDICPropertyNameChar(const char *name) {
|
||||
int32_t i;
|
||||
char c;
|
||||
|
||||
/* Ignore delimiters '-', '_', and EBCDIC White_Space */
|
||||
for(i=0;
|
||||
(c=name[i++])==0x60 || c==0x6d ||
|
||||
c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
|
||||
) {}
|
||||
|
||||
if(c!=0) {
|
||||
return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
|
||||
} else {
|
||||
return i<<8;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unicode property names and property value names are compared "loosely".
|
||||
*
|
||||
* UCD.html 4.0.1 says:
|
||||
* For all property names, property value names, and for property values for
|
||||
* Enumerated, Binary, or Catalog properties, use the following
|
||||
* loose matching rule:
|
||||
*
|
||||
* LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
|
||||
*
|
||||
* This function does just that, for (char *) name strings.
|
||||
* It is almost identical to ucnv_compareNames() but also ignores
|
||||
* C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
|
||||
int32_t rc, r1, r2;
|
||||
|
||||
for(;;) {
|
||||
r1=getASCIIPropertyNameChar(name1);
|
||||
r2=getASCIIPropertyNameChar(name2);
|
||||
|
||||
/* If we reach the ends of both strings then they match */
|
||||
if(((r1|r2)&0xff)==0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Compare the lowercased characters */
|
||||
if(r1!=r2) {
|
||||
rc=(r1&0xff)-(r2&0xff);
|
||||
if(rc!=0) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
name1+=r1>>8;
|
||||
name2+=r2>>8;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
|
||||
int32_t rc, r1, r2;
|
||||
|
||||
for(;;) {
|
||||
r1=getEBCDICPropertyNameChar(name1);
|
||||
r2=getEBCDICPropertyNameChar(name2);
|
||||
|
||||
/* If we reach the ends of both strings then they match */
|
||||
if(((r1|r2)&0xff)==0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Compare the lowercased characters */
|
||||
if(r1!=r2) {
|
||||
rc=(r1&0xff)-(r2&0xff);
|
||||
if(rc!=0) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
name1+=r1>>8;
|
||||
name2+=r2>>8;
|
||||
}
|
||||
}
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2002-2003, International Business Machines
|
||||
* Copyright (c) 2002-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Author: Alan Liu
|
||||
|
@ -24,6 +24,44 @@
|
|||
* genpname - creates data
|
||||
*/
|
||||
|
||||
/* low-level char * property name comparison -------------------------------- */
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/**
|
||||
* \var uprv_comparePropertyNames
|
||||
* Unicode property names and property value names are compared "loosely".
|
||||
*
|
||||
* UCD.html 4.0.1 says:
|
||||
* For all property names, property value names, and for property values for
|
||||
* Enumerated, Binary, or Catalog properties, use the following
|
||||
* loose matching rule:
|
||||
*
|
||||
* LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
|
||||
*
|
||||
* This function does just that, for (char *) name strings.
|
||||
* It is almost identical to ucnv_compareNames() but also ignores
|
||||
* C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uprv_compareASCIIPropertyNames(const char *name1, const char *name2);
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2);
|
||||
|
||||
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
|
||||
# define uprv_comparePropertyNames uprv_compareASCIIPropertyNames
|
||||
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
||||
# define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames
|
||||
#else
|
||||
# error U_CHARSET_FAMILY is not valid
|
||||
#endif
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
/* UDataMemory structure and signatures ------------------------------------- */
|
||||
|
||||
#define PNAME_DATA_NAME "pnames"
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "util.h"
|
||||
#include "uvector.h"
|
||||
#include "uprops.h"
|
||||
#include "propname.h"
|
||||
#include "charstr.h"
|
||||
#include "ustrfmt.h"
|
||||
#include "mutex.h"
|
||||
|
|
|
@ -15,6 +15,10 @@
|
|||
*
|
||||
* Implementations for mostly non-core Unicode character properties
|
||||
* stored in uprops.icu.
|
||||
*
|
||||
* With the APIs implemented here, almost all properties files and
|
||||
* their associated implementation files are used from this file,
|
||||
* including those for normalization and case mappings.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
@ -26,121 +30,6 @@
|
|||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
/**
|
||||
* Get the next non-ignorable ASCII character from a property name
|
||||
* and lowercases it.
|
||||
* @return ((advance count for the name)<<8)|character
|
||||
*/
|
||||
static U_INLINE int32_t
|
||||
getASCIIPropertyNameChar(const char *name) {
|
||||
int32_t i;
|
||||
char c;
|
||||
|
||||
/* Ignore delimiters '-', '_', and ASCII White_Space */
|
||||
for(i=0;
|
||||
(c=name[i++])==0x2d || c==0x5f ||
|
||||
c==0x20 || (0x09<=c && c<=0x0d);
|
||||
) {}
|
||||
|
||||
if(c!=0) {
|
||||
return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
|
||||
} else {
|
||||
return i<<8;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the next non-ignorable EBCDIC character from a property name
|
||||
* and lowercases it.
|
||||
* @return ((advance count for the name)<<8)|character
|
||||
*/
|
||||
static U_INLINE int32_t
|
||||
getEBCDICPropertyNameChar(const char *name) {
|
||||
int32_t i;
|
||||
char c;
|
||||
|
||||
/* Ignore delimiters '-', '_', and EBCDIC White_Space */
|
||||
for(i=0;
|
||||
(c=name[i++])==0x60 || c==0x6d ||
|
||||
c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
|
||||
) {}
|
||||
|
||||
if(c!=0) {
|
||||
return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
|
||||
} else {
|
||||
return i<<8;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unicode property names and property value names are compared "loosely".
|
||||
*
|
||||
* UCD.html 4.0.1 says:
|
||||
* For all property names, property value names, and for property values for
|
||||
* Enumerated, Binary, or Catalog properties, use the following
|
||||
* loose matching rule:
|
||||
*
|
||||
* LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
|
||||
*
|
||||
* This function does just that, for (char *) name strings.
|
||||
* It is almost identical to ucnv_compareNames() but also ignores
|
||||
* C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
|
||||
int32_t rc, r1, r2;
|
||||
|
||||
for(;;) {
|
||||
r1=getASCIIPropertyNameChar(name1);
|
||||
r2=getASCIIPropertyNameChar(name2);
|
||||
|
||||
/* If we reach the ends of both strings then they match */
|
||||
if(((r1|r2)&0xff)==0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Compare the lowercased characters */
|
||||
if(r1!=r2) {
|
||||
rc=(r1&0xff)-(r2&0xff);
|
||||
if(rc!=0) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
name1+=r1>>8;
|
||||
name2+=r2>>8;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
|
||||
int32_t rc, r1, r2;
|
||||
|
||||
for(;;) {
|
||||
r1=getEBCDICPropertyNameChar(name1);
|
||||
r2=getEBCDICPropertyNameChar(name2);
|
||||
|
||||
/* If we reach the ends of both strings then they match */
|
||||
if(((r1|r2)&0xff)==0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Compare the lowercased characters */
|
||||
if(r1!=r2) {
|
||||
rc=(r1&0xff)-(r2&0xff);
|
||||
if(rc!=0) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
name1+=r1>>8;
|
||||
name2+=r2>>8;
|
||||
}
|
||||
}
|
||||
|
||||
/* API functions ------------------------------------------------------------ */
|
||||
|
||||
static const struct {
|
||||
|
@ -212,7 +101,7 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
|
|||
} else if(binProps[which].column==-2) {
|
||||
/* case mapping properties */
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
UCaseProps *csp=uchar_getCaseProps(&errorCode);
|
||||
UCaseProps *csp=ucase_getSingleton(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -499,5 +388,5 @@ uprv_getInclusions(USetAdder *sa, UErrorCode *pErrorCode) {
|
|||
unorm_addPropertyStarts(sa, pErrorCode);
|
||||
#endif
|
||||
uchar_addPropertyStarts(sa, pErrorCode);
|
||||
ucase_addPropertyStarts(uchar_getCaseProps(pErrorCode), sa, pErrorCode);
|
||||
ucase_addPropertyStarts(ucase_getSingleton(pErrorCode), sa, pErrorCode);
|
||||
}
|
||||
|
|
|
@ -217,45 +217,6 @@ uprv_getMaxValues(int32_t column);
|
|||
U_CFUNC UHangulSyllableType
|
||||
uchar_getHST(UChar32 c);
|
||||
|
||||
/**
|
||||
* Get internal UCaseProps pointer from uchar.c for uprops.c.
|
||||
* Other code should use ucase_getSingleton().
|
||||
*/
|
||||
U_CFUNC UCaseProps *
|
||||
uchar_getCaseProps(UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* \var uprv_comparePropertyNames
|
||||
* Unicode property names and property value names are compared "loosely".
|
||||
*
|
||||
* UCD.html 4.0.1 says:
|
||||
* For all property names, property value names, and for property values for
|
||||
* Enumerated, Binary, or Catalog properties, use the following
|
||||
* loose matching rule:
|
||||
*
|
||||
* LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
|
||||
*
|
||||
* This function does just that, for (char *) name strings.
|
||||
* It is almost identical to ucnv_compareNames() but also ignores
|
||||
* C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uprv_compareASCIIPropertyNames(const char *name1, const char *name2);
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uprv_compareEBCDICPropertyNames(const char *name1, const char *name2);
|
||||
|
||||
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
|
||||
# define uprv_comparePropertyNames uprv_compareASCIIPropertyNames
|
||||
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
|
||||
# define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames
|
||||
#else
|
||||
# error U_CHARSET_FAMILY is not valid
|
||||
#endif
|
||||
|
||||
/** Turn a bit index into a bit flag. @internal */
|
||||
#define FLAG(n) ((uint32_t)1<<(n))
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue