ICU-2556 implement UCHAR_GENERAL_CATEGORY_MASK in name api; refine min/max behavior

X-SVN-Rev: 10596
This commit is contained in:
Alan Liu 2002-12-11 01:09:02 +00:00
parent 206c6b4cfd
commit 53406ecc37
4 changed files with 98 additions and 17 deletions

View file

@ -147,23 +147,70 @@ static UBool load() {
// on it. If it cannot obtain a pointer, because valid data is not
// available, then it returns NULL or UCHAR_INVALID_CODE.
// NOTE (ICU 2.4) For the 2.4 release it was decided late in the cycle
// to add a new enum to UProperty, UCHAR_GENERAL_CATEGORY_MASK. This
// enum would specify UCharCategory mask values. Because of time
// constraints, the underlying binary data and genprop scripts were
// not updated. So the PNAME->... API takes UCHAR_GENERAL_CATEGORY
// and associates it with a MASK value. We munge things to make this
// associate with a UCharCategory value, and we make
// UCHAR_GENERAL_CATEGORY_MASK correspond to the mask value.
// We add a synthetic (not in PropertyAliases.txt) pair of property
// names corresponding to UCHAR_GENERAL_CATEGORY_MASK:
// gcm ; General_Category_Mask
// TODO: Remove the munge code, marked "//TODO:munge" below, after the
// script/binary data are updated (probably in ICU 2.6).
static const char* SHORT_GCM_NAME = "gcm";
static const char* LONG_GCM_NAME = "General_Category_Mask";
U_CAPI const char* U_EXPORT2
u_getPropertyName(UProperty property,
UPropertyNameChoice nameChoice) {
//TODO:munge
if (property == UCHAR_GENERAL_CATEGORY_MASK) {
switch (nameChoice) {
case U_SHORT_PROPERTY_NAME:
return SHORT_GCM_NAME;
case U_LONG_PROPERTY_NAME:
return LONG_GCM_NAME;
default:
return NULL;
}
}
return load() ? PNAME->getPropertyName(property, nameChoice)
: NULL;
}
U_CAPI UProperty U_EXPORT2
u_getPropertyEnum(const char* alias) {
return load() ? (UProperty) PNAME->getPropertyEnum(alias)
: UCHAR_INVALID_CODE;
UProperty p = load() ? (UProperty) PNAME->getPropertyEnum(alias)
: UCHAR_INVALID_CODE;
//TODO:munge
if (p == UCHAR_INVALID_CODE) {
if (0 == uprv_comparePropertyNames(alias, SHORT_GCM_NAME) ||
0 == uprv_comparePropertyNames(alias, LONG_GCM_NAME)) {
p = UCHAR_GENERAL_CATEGORY_MASK;
}
}
return p;
}
U_CAPI const char* U_EXPORT2
u_getPropertyValueName(UProperty property,
int32_t value,
UPropertyNameChoice nameChoice) {
//TODO:munge
switch (property) {
case UCHAR_GENERAL_CATEGORY:
value = (value < 32) ? U_MASK(value) : 0;
break;
case UCHAR_GENERAL_CATEGORY_MASK:
property = UCHAR_GENERAL_CATEGORY;
break;
}
return load() ? PNAME->getPropertyValueName(property, value, nameChoice)
: NULL;
}
@ -171,8 +218,28 @@ u_getPropertyValueName(UProperty property,
U_CAPI int32_t U_EXPORT2
u_getPropertyValueEnum(UProperty property,
const char* alias) {
return load() ? PNAME->getPropertyValueEnum(property, alias)
: UCHAR_INVALID_CODE;
//TODO:munge
UProperty p = (property == UCHAR_GENERAL_CATEGORY_MASK) ?
UCHAR_GENERAL_CATEGORY : property;
int32_t v = load() ? PNAME->getPropertyValueEnum(p, alias)
: UCHAR_INVALID_CODE;
//TODO:munge
if (property == UCHAR_GENERAL_CATEGORY) {
int32_t gc = 0;
for (;;) {
if (v == 1) {
return gc;
}
if ((v & 1) != 0) {
// More than one bit is set; we can't map this mask to
// a UCharCategory.
return UCHAR_INVALID_CODE;
}
v >>= 1;
gc += 1;
}
}
return v;
}
//eof

View file

@ -1462,8 +1462,7 @@ u_getIntPropertyValue(UChar32 c, UProperty which);
*
* @param which UProperty selector constant, identifies which binary property to check.
* Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
* or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
* or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
* or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
* @return Minimum value returned by u_getIntPropertyValue for a Unicode property.
* 0 if the property selector is out of range.
*
@ -1492,8 +1491,7 @@ u_getIntPropertyMinValue(UProperty which);
*
* @param which UProperty selector constant, identifies which binary property to check.
* Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
* or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
* or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
* or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
* @return Maximum value returned by u_getIntPropertyValue for a Unicode property.
* <=0 if the property selector is out of range.
*
@ -2066,6 +2064,11 @@ u_enumCharNames(UChar32 start, UChar32 limit,
* Return the Unicode name for a given property, as given in the
* Unicode database file PropertyAliases.txt.
*
* In addition, this function maps the property
* UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
* "General_Category_Mask". These names are not in
* PropertyAliases.txt.
*
* @param property UProperty selector other than UCHAR_INVALID_CODE.
* If out of range, NULL is returned.
*
@ -2097,6 +2100,11 @@ u_getPropertyName(UProperty property,
* in the Unicode database file PropertyAliases.txt. Short, long, and
* any other variants are recognized.
*
* In addition, this function maps the synthetic names "gcm" /
* "General_Category_Mask" to the property
* UCHAR_GENERAL_CATEGORY_MASK. These names are not in
* PropertyAliases.txt.
*
* @param alias the property name to be matched. The name is compared
* using "loose matching" as described in PropertyAliases.txt.
*
@ -2113,6 +2121,12 @@ u_getPropertyEnum(const char* alias);
* Return the Unicode name for a given property value, as given in the
* Unicode database file PropertyValueAliases.txt.
*
* Note: Some of the names in PropertyValueAliases.txt can only be
* retrieved using UCHAR_GENERAL_CATEGORY_MASK, not
* UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" /
* "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
* / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
*
* @param property UProperty selector constant.
* Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
* or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
@ -2125,7 +2139,7 @@ u_getPropertyEnum(const char* alias);
* (1.) UCHAR_BLOCK values begin at the non-zero value
* UBLOCK_BASIC_LATIN. (2.) UCHAR_CANONICAL_COMBINING_CLASS
* values are not contiguous and range from 0..240. (3.)
* UCHAR_GENERAL_CATEGORY values are not values of
* UCHAR_GENERAL_CATEGORY_MASK values are not values of
* UCharCategory, but rather mask values produced by
* U_GET_GC_MASK(). This allows grouped categories such as
* [:L:] to be represented. Mask values range
@ -2160,11 +2174,16 @@ u_getPropertyValueName(UProperty property,
* specified in the Unicode database file PropertyValueAliases.txt.
* Short, long, and any other variants are recognized.
*
* Note: Some of the names in PropertyValueAliases.txt will only be
* recognized with UCHAR_GENERAL_CATEGORY_MASK, not
* UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" /
* "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
* / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
*
* @param property UProperty selector constant.
* Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
* or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
* or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
* Only these properties can be enumerated.
* If out of range, UCHAR_INVALID_CODE is returned.
*
* @param alias the value name to be matched. The name is compared

View file

@ -400,8 +400,6 @@ u_getIntPropertyMaxValue(UProperty which) {
default:
return -1; /* undefined */
}
} else if(which==UCHAR_GENERAL_CATEGORY_MASK) {
return U_MASK(U_CHAR_CATEGORY_COUNT)-1;
} else {
return -1; /* undefined */
}

View file

@ -2355,7 +2355,7 @@ TestPropertyNames(void) {
int32_t max = 0;
if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
max = 255;
} else if (p == UCHAR_GENERAL_CATEGORY) {
} else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
/* it's far too slow to iterate all the way up to
the real max, U_GC_P_MASK */
max = U_GC_NL_MASK;
@ -2396,8 +2396,6 @@ TestPropertyNames(void) {
} else if (p>=UCHAR_MASK_LIMIT) {
p = UCHAR_DOUBLE_START - 1;
} else if (p>=UCHAR_INT_LIMIT) {
/* ### TODO remove this next line */
return;
p = UCHAR_MASK_START - 1;
} else if (p>=UCHAR_BINARY_LIMIT) {
p = UCHAR_INT_START - 1;
@ -2433,8 +2431,7 @@ TestPropertyValues(void) {
}
if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=(int32_t)(U_MASK(U_CHAR_CATEGORY_COUNT)-1)
) {
u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
}