mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-1611 add and test u_enumCharTypes
X-SVN-Rev: 7434
This commit is contained in:
parent
75882c9836
commit
f4190c00c7
3 changed files with 118 additions and 4 deletions
|
@ -423,6 +423,39 @@ u_charType(UChar32 c) {
|
|||
return (int8_t)GET_CATEGORY(props);
|
||||
}
|
||||
|
||||
/* Enumerate all code points with their general categories. */
|
||||
struct _EnumTypeCallback {
|
||||
UCharEnumTypeRange *enumRange;
|
||||
const void *context;
|
||||
};
|
||||
|
||||
static uint32_t U_CALLCONV
|
||||
_enumTypeValue(const void *context, uint32_t value) {
|
||||
/* access the general category from the 32-bit properties, and those from the 16-bit trie value */
|
||||
return GET_CATEGORY(props32Table[value]);
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV
|
||||
_enumTypeRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
|
||||
/* just cast the value to UCharCategory */
|
||||
return ((struct _EnumTypeCallback *)context)->
|
||||
enumRange(((struct _EnumTypeCallback *)context)->context,
|
||||
start, limit, (UCharCategory)value);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) {
|
||||
struct _EnumTypeCallback callback;
|
||||
|
||||
if(enumRange==NULL || !HAVE_DATA) {
|
||||
return;
|
||||
}
|
||||
|
||||
callback.enumRange=enumRange;
|
||||
callback.context=context;
|
||||
utrie_enum(&propsTrie, _enumTypeValue, _enumTypeRange, &callback);
|
||||
}
|
||||
|
||||
/* Checks if ch is a lower case letter.*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
u_islower(UChar32 c) {
|
||||
|
|
|
@ -24,6 +24,9 @@
|
|||
#define UCHAR_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/*==========================================================================*/
|
||||
/* Unicode version number */
|
||||
/*==========================================================================*/
|
||||
|
@ -995,6 +998,48 @@ u_charCellWidth(UChar32 c);
|
|||
U_CAPI int8_t U_EXPORT2
|
||||
u_charType(UChar32 c);
|
||||
|
||||
/**
|
||||
* Callback from u_enumCharTypes(), is called for each contiguous range
|
||||
* of code points c (where start<=c<limit)
|
||||
* with the same Unicode general category ("character type").
|
||||
*
|
||||
* The callback function can stop the enumeration by returning FALSE.
|
||||
*
|
||||
* @param context an opaque pointer, as passed into utrie_enum()
|
||||
* @param start the first code point in a contiguous range with value
|
||||
* @param limit one past the last code point in a contiguous range with value
|
||||
* @param type the general category for all code points in [start..limit[
|
||||
* @return FALSE to stop the enumeration
|
||||
*
|
||||
* @draft ICU 2.1
|
||||
* @see UCharCategory
|
||||
* @see u_enumCharTypes
|
||||
*/
|
||||
typedef UBool U_CALLCONV
|
||||
UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type);
|
||||
|
||||
/**
|
||||
* Enumerate efficiently all code points with their Unicode general categories.
|
||||
*
|
||||
* This is useful for building data structures (e.g., UnicodeSet's),
|
||||
* for enumerating all assigned code points (type!=U_UNASSIGNED), etc.
|
||||
*
|
||||
* For each contiguous range of code points with a given general category ("character type"),
|
||||
* the UCharEnumTypeRange function is called.
|
||||
* Adjacent ranges have different types.
|
||||
* The Unicode Standard guarantees that the numeric value of the type is 0..31.
|
||||
*
|
||||
* @param enumRange a pointer to a function that is called for each contiguous range
|
||||
* of code points with the same general category
|
||||
* @param context an opaque pointer that is passed on to the callback function
|
||||
*
|
||||
* @draft ICU 2.1
|
||||
* @see UCharCategory
|
||||
* @see UCharEnumTypeRange
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context);
|
||||
|
||||
/**
|
||||
* Returns the combining class of the code point as specified in UnicodeData.txt.
|
||||
*
|
||||
|
@ -1083,8 +1128,6 @@ u_charFromName(UCharNameChoice nameChoice,
|
|||
const char *name,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/**
|
||||
* Type of a callback function for u_enumCharNames() that gets called
|
||||
* for each Unicode character with the code point value and
|
||||
|
@ -1107,8 +1150,6 @@ typedef UBool UEnumCharNamesFn(void *context,
|
|||
const char *name,
|
||||
UTextOffset length);
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
/**
|
||||
* Enumerate all assigned Unicode characters between the start and limit
|
||||
* code points (start inclusive, limit exclusive) and call a function
|
||||
|
@ -1429,5 +1470,7 @@ u_getUnicodeVersion(UVersionInfo info);
|
|||
/** @deprecated Use the enum UCharBlock instead. Remove after Aug,2002*/
|
||||
typedef UBlockCode UCharScript;
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif /*_UCHAR*/
|
||||
/*eof*/
|
||||
|
|
|
@ -573,6 +573,41 @@ unicodeDataLineFn(void *context,
|
|||
}
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV
|
||||
enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
|
||||
static UChar32 test[][2]={
|
||||
0x41, U_UPPERCASE_LETTER,
|
||||
0x308, U_NON_SPACING_MARK,
|
||||
0xfffe, U_GENERAL_OTHER_TYPES,
|
||||
0xe0041, U_FORMAT_CHAR,
|
||||
0xeffff, U_UNASSIGNED
|
||||
};
|
||||
int i, count;
|
||||
|
||||
if(0!=uprv_strcmp((const char *)context, "a1")) {
|
||||
log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
|
||||
}
|
||||
|
||||
count=sizeof(test)/sizeof(test[0]);
|
||||
for(i=0; i<count; ++i) {
|
||||
if(start<=test[i][0] && test[i][0]<limit) {
|
||||
if(type!=(UCharCategory)test[i][1]) {
|
||||
log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
|
||||
start, limit, (long)type, test[i][0], test[i][1]);
|
||||
}
|
||||
/* stop at the range that includes the last test code point */
|
||||
return i==(count-1) ? FALSE : TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if(start>test[count-1][0]) {
|
||||
log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
|
||||
start, limit, (long)type);
|
||||
return FALSE;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/* tests for several properties */
|
||||
static void TestUnicodeData()
|
||||
{
|
||||
|
@ -658,6 +693,9 @@ static void TestUnicodeData()
|
|||
++c;
|
||||
}
|
||||
}
|
||||
|
||||
/* test u_enumCharTypes() */
|
||||
u_enumCharTypes(enumTypeRange, "a1");
|
||||
}
|
||||
|
||||
/*internal functions ----*/
|
||||
|
|
Loading…
Add table
Reference in a new issue