ICU-1611 add and test u_enumCharTypes

X-SVN-Rev: 7434
This commit is contained in:
Markus Scherer 2002-01-12 00:11:09 +00:00
parent 75882c9836
commit f4190c00c7
3 changed files with 118 additions and 4 deletions

View file

@ -423,6 +423,39 @@ u_charType(UChar32 c) {
return (int8_t)GET_CATEGORY(props);
}
/* Enumerate all code points with their general categories. */
struct _EnumTypeCallback {
UCharEnumTypeRange *enumRange;
const void *context;
};
static uint32_t U_CALLCONV
_enumTypeValue(const void *context, uint32_t value) {
/* access the general category from the 32-bit properties, and those from the 16-bit trie value */
return GET_CATEGORY(props32Table[value]);
}
static UBool U_CALLCONV
_enumTypeRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
/* just cast the value to UCharCategory */
return ((struct _EnumTypeCallback *)context)->
enumRange(((struct _EnumTypeCallback *)context)->context,
start, limit, (UCharCategory)value);
}
U_CAPI void U_EXPORT2
u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) {
struct _EnumTypeCallback callback;
if(enumRange==NULL || !HAVE_DATA) {
return;
}
callback.enumRange=enumRange;
callback.context=context;
utrie_enum(&propsTrie, _enumTypeValue, _enumTypeRange, &callback);
}
/* Checks if ch is a lower case letter.*/
U_CAPI UBool U_EXPORT2
u_islower(UChar32 c) {

View file

@ -24,6 +24,9 @@
#define UCHAR_H
#include "unicode/utypes.h"
U_CDECL_BEGIN
/*==========================================================================*/
/* Unicode version number */
/*==========================================================================*/
@ -995,6 +998,48 @@ u_charCellWidth(UChar32 c);
U_CAPI int8_t U_EXPORT2
u_charType(UChar32 c);
/**
* Callback from u_enumCharTypes(), is called for each contiguous range
* of code points c (where start<=c<limit)
* with the same Unicode general category ("character type").
*
* The callback function can stop the enumeration by returning FALSE.
*
* @param context an opaque pointer, as passed into utrie_enum()
* @param start the first code point in a contiguous range with value
* @param limit one past the last code point in a contiguous range with value
* @param type the general category for all code points in [start..limit[
* @return FALSE to stop the enumeration
*
* @draft ICU 2.1
* @see UCharCategory
* @see u_enumCharTypes
*/
typedef UBool U_CALLCONV
UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type);
/**
* Enumerate efficiently all code points with their Unicode general categories.
*
* This is useful for building data structures (e.g., UnicodeSet's),
* for enumerating all assigned code points (type!=U_UNASSIGNED), etc.
*
* For each contiguous range of code points with a given general category ("character type"),
* the UCharEnumTypeRange function is called.
* Adjacent ranges have different types.
* The Unicode Standard guarantees that the numeric value of the type is 0..31.
*
* @param enumRange a pointer to a function that is called for each contiguous range
* of code points with the same general category
* @param context an opaque pointer that is passed on to the callback function
*
* @draft ICU 2.1
* @see UCharCategory
* @see UCharEnumTypeRange
*/
U_CAPI void U_EXPORT2
u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context);
/**
* Returns the combining class of the code point as specified in UnicodeData.txt.
*
@ -1083,8 +1128,6 @@ u_charFromName(UCharNameChoice nameChoice,
const char *name,
UErrorCode *pErrorCode);
U_CDECL_BEGIN
/**
* Type of a callback function for u_enumCharNames() that gets called
* for each Unicode character with the code point value and
@ -1107,8 +1150,6 @@ typedef UBool UEnumCharNamesFn(void *context,
const char *name,
UTextOffset length);
U_CDECL_END
/**
* Enumerate all assigned Unicode characters between the start and limit
* code points (start inclusive, limit exclusive) and call a function
@ -1429,5 +1470,7 @@ u_getUnicodeVersion(UVersionInfo info);
/** @deprecated Use the enum UCharBlock instead. Remove after Aug,2002*/
typedef UBlockCode UCharScript;
U_CDECL_END
#endif /*_UCHAR*/
/*eof*/

View file

@ -573,6 +573,41 @@ unicodeDataLineFn(void *context,
}
}
static UBool U_CALLCONV
enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
static UChar32 test[][2]={
0x41, U_UPPERCASE_LETTER,
0x308, U_NON_SPACING_MARK,
0xfffe, U_GENERAL_OTHER_TYPES,
0xe0041, U_FORMAT_CHAR,
0xeffff, U_UNASSIGNED
};
int i, count;
if(0!=uprv_strcmp((const char *)context, "a1")) {
log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
}
count=sizeof(test)/sizeof(test[0]);
for(i=0; i<count; ++i) {
if(start<=test[i][0] && test[i][0]<limit) {
if(type!=(UCharCategory)test[i][1]) {
log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
start, limit, (long)type, test[i][0], test[i][1]);
}
/* stop at the range that includes the last test code point */
return i==(count-1) ? FALSE : TRUE;
}
}
if(start>test[count-1][0]) {
log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
start, limit, (long)type);
return FALSE;
}
return TRUE;
}
/* tests for several properties */
static void TestUnicodeData()
{
@ -658,6 +693,9 @@ static void TestUnicodeData()
++c;
}
}
/* test u_enumCharTypes() */
u_enumCharTypes(enumTypeRange, "a1");
}
/*internal functions ----*/