mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-09 15:27:38 +00:00
ICU-4078 enable building a UnicodeSet from a property using/loading only the relevant data file
X-SVN-Rev: 16313
This commit is contained in:
parent
1ed0796a99
commit
f7b7183d7a
4 changed files with 271 additions and 162 deletions
|
@ -930,7 +930,7 @@ uprv_getMaxValues(int32_t column) {
|
|||
|
||||
/*
|
||||
* get Hangul Syllable Type
|
||||
* implemented here so that uchar.c (uchar_addPropertyStarts())
|
||||
* implemented here so that uchar.c (uhst_addPropertyStarts())
|
||||
* does not depend on uprops.c (u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE))
|
||||
*/
|
||||
U_CFUNC UHangulSyllableType
|
||||
|
@ -995,6 +995,69 @@ ublock_getCode(UChar32 c) {
|
|||
|
||||
/* property starts for UnicodeSet ------------------------------------------- */
|
||||
|
||||
/* for Hangul_Syllable_Type */
|
||||
U_CAPI void U_EXPORT2
|
||||
uhst_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode) {
|
||||
UChar32 c;
|
||||
int32_t value, value2;
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if(!HAVE_DATA) {
|
||||
*pErrorCode=dataErrorCode;
|
||||
return;
|
||||
}
|
||||
|
||||
/* add code points with hardcoded properties, plus the ones following them */
|
||||
|
||||
/*
|
||||
* Add Jamo type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE.
|
||||
* First, we add fixed boundaries for the blocks of Jamos.
|
||||
* Then we check in loops to see where the current Unicode version
|
||||
* actually stops assigning such Jamos. We start each loop
|
||||
* at the end of the per-Jamo-block assignments in Unicode 4 or earlier.
|
||||
* (These have not changed since Unicode 2.)
|
||||
*/
|
||||
sa->add(sa->set, 0x1100);
|
||||
value=U_HST_LEADING_JAMO;
|
||||
for(c=0x115a; c<=0x115f; ++c) {
|
||||
value2=uchar_getHST(c);
|
||||
if(value!=value2) {
|
||||
value=value2;
|
||||
sa->add(sa->set, c);
|
||||
}
|
||||
}
|
||||
|
||||
sa->add(sa->set, 0x1160);
|
||||
value=U_HST_VOWEL_JAMO;
|
||||
for(c=0x11a3; c<=0x11a7; ++c) {
|
||||
value2=uchar_getHST(c);
|
||||
if(value!=value2) {
|
||||
value=value2;
|
||||
sa->add(sa->set, c);
|
||||
}
|
||||
}
|
||||
|
||||
sa->add(sa->set, 0x11a8);
|
||||
value=U_HST_TRAILING_JAMO;
|
||||
for(c=0x11fa; c<=0x11ff; ++c) {
|
||||
value2=uchar_getHST(c);
|
||||
if(value!=value2) {
|
||||
value=value2;
|
||||
sa->add(sa->set, c);
|
||||
}
|
||||
}
|
||||
|
||||
/* Add Hangul type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE. */
|
||||
for(c=HANGUL_BASE; c<(HANGUL_BASE+HANGUL_COUNT); c+=JAMO_T_COUNT) {
|
||||
sa->add(sa->set, c);
|
||||
sa->add(sa->set, c+1);
|
||||
}
|
||||
sa->add(sa->set, c);
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV
|
||||
_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
|
||||
/* add the start code point to the USet */
|
||||
|
@ -1007,8 +1070,9 @@ _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 limit, uint
|
|||
|
||||
U_CAPI void U_EXPORT2
|
||||
uchar_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode) {
|
||||
UChar32 c;
|
||||
int32_t value, value2;
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if(!HAVE_DATA) {
|
||||
*pErrorCode=dataErrorCode;
|
||||
|
@ -1072,42 +1136,4 @@ uchar_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode) {
|
|||
/* add for UCHAR_JOINING_TYPE */
|
||||
sa->add(sa->set, ZWNJ); /* range ZWNJ..ZWJ */
|
||||
sa->add(sa->set, ZWJ+1);
|
||||
|
||||
/*
|
||||
* Add Jamo type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE.
|
||||
* First, we add fixed boundaries for the blocks of Jamos.
|
||||
* Then we check in loops to see where the current Unicode version
|
||||
* actually stops assigning such Jamos. We start each loop
|
||||
* at the end of the per-Jamo-block assignments in Unicode 4 or earlier.
|
||||
* (These have not changed since Unicode 2.)
|
||||
*/
|
||||
sa->add(sa->set, 0x1100);
|
||||
value=U_HST_LEADING_JAMO;
|
||||
for(c=0x115a; c<=0x115f; ++c) {
|
||||
value2=uchar_getHST(c);
|
||||
if(value!=value2) {
|
||||
value=value2;
|
||||
sa->add(sa->set, c);
|
||||
}
|
||||
}
|
||||
|
||||
sa->add(sa->set, 0x1160);
|
||||
value=U_HST_VOWEL_JAMO;
|
||||
for(c=0x11a3; c<=0x11a7; ++c) {
|
||||
value2=uchar_getHST(c);
|
||||
if(value!=value2) {
|
||||
value=value2;
|
||||
sa->add(sa->set, c);
|
||||
}
|
||||
}
|
||||
|
||||
sa->add(sa->set, 0x11a8);
|
||||
value=U_HST_TRAILING_JAMO;
|
||||
for(c=0x11fa; c<=0x11ff; ++c) {
|
||||
value2=uchar_getHST(c);
|
||||
if(value!=value2) {
|
||||
value=value2;
|
||||
sa->add(sa->set, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,6 +31,8 @@
|
|||
#include "uvector.h"
|
||||
#include "uprops.h"
|
||||
#include "propname.h"
|
||||
#include "unormimp.h"
|
||||
#include "ucase.h"
|
||||
#include "charstr.h"
|
||||
#include "ustrfmt.h"
|
||||
#include "mutex.h"
|
||||
|
@ -149,7 +151,7 @@ static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */
|
|||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
static UnicodeSet* INCLUSIONS = NULL; // cached uprv_getInclusions()
|
||||
static UnicodeSet *INCLUSIONS[UPROPS_SRC_COUNT] = { NULL }; // cached getInclusions()
|
||||
|
||||
static Hashtable* CASE_EQUIV_HASH = NULL; // for closeOver(USET_CASE)
|
||||
|
||||
|
@ -1016,6 +1018,7 @@ static UBool intPropertyFilter(UChar32 ch, void* context) {
|
|||
*/
|
||||
void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
|
||||
void* context,
|
||||
int32_t src,
|
||||
UErrorCode &status) {
|
||||
// Walk through all Unicode characters, noting the start
|
||||
// and end of each range for which filter.contain(c) is
|
||||
|
@ -1031,7 +1034,7 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter,
|
|||
// those properties. Scanning code points is slow.
|
||||
if (U_FAILURE(status)) return;
|
||||
|
||||
const UnicodeSet* inclusions = getInclusions(status);
|
||||
const UnicodeSet* inclusions = getInclusions(src, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
@ -1101,38 +1104,10 @@ UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec)
|
|||
if (U_FAILURE(ec)) return *this;
|
||||
|
||||
if (prop == UCHAR_GENERAL_CATEGORY_MASK) {
|
||||
applyFilter(generalCategoryMaskFilter, &value, ec);
|
||||
#if UCONFIG_NO_NORMALIZATION
|
||||
} else if(prop == UCHAR_HANGUL_SYLLABLE_TYPE) {
|
||||
/*
|
||||
* Special code for when normalization is off.
|
||||
* HST is still available because it is hardcoded in uprops.c, but
|
||||
* the inclusions set does not have the necessary code points
|
||||
* for normalization properties.
|
||||
* I am hardcoding HST in this case because it is the only property
|
||||
* that prevents genbrk from compiling char.txt when normalization is off.
|
||||
* This saves me from turning off break iteration or making more
|
||||
* complicated changes in genbrk.
|
||||
*
|
||||
* This code is not efficient. For efficiency turn on normalization.
|
||||
*
|
||||
* markus 20030505
|
||||
*/
|
||||
UChar32 c;
|
||||
|
||||
clear();
|
||||
for(c=0x1100; c<=0xd7a3; ++c) {
|
||||
if(c==0x1200) {
|
||||
c=0xac00;
|
||||
}
|
||||
if(value == u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE)) {
|
||||
add(c);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
applyFilter(generalCategoryMaskFilter, &value, UPROPS_SRC_CHAR, ec);
|
||||
} else {
|
||||
IntPropertyContext c = {prop, value};
|
||||
applyFilter(intPropertyFilter, &c, ec);
|
||||
applyFilter(intPropertyFilter, &c, uprops_getSource(prop), ec);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
@ -1205,7 +1180,7 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
|
|||
if (*end != 0) {
|
||||
FAIL(ec);
|
||||
}
|
||||
applyFilter(numericValueFilter, &value, ec);
|
||||
applyFilter(numericValueFilter, &value, UPROPS_SRC_CHAR, ec);
|
||||
return *this;
|
||||
}
|
||||
break;
|
||||
|
@ -1236,7 +1211,7 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
|
|||
if (!mungeCharName(buf, vname, sizeof(buf))) FAIL(ec);
|
||||
UVersionInfo version;
|
||||
u_versionFromString(version, buf);
|
||||
applyFilter(versionFilter, &version, ec);
|
||||
applyFilter(versionFilter, &version, UPROPS_SRC_CHAR, ec);
|
||||
return *this;
|
||||
}
|
||||
break;
|
||||
|
@ -1274,7 +1249,7 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
|
|||
for (int32_t i=0; i<C99_COUNT; ++i) {
|
||||
int32_t c = uprv_comparePropertyNames(pname, C99_DISPATCH[i].name);
|
||||
if (c == 0) {
|
||||
applyFilter(c99Filter, (void*) &C99_DISPATCH[i], ec);
|
||||
applyFilter(c99Filter, (void*) &C99_DISPATCH[i], UPROPS_SRC_CHAR, ec);
|
||||
return *this;
|
||||
} else if (c < 0) {
|
||||
// Further entries will not match; bail out
|
||||
|
@ -1490,9 +1465,9 @@ _set_addString(USet *set, const UChar *str, int32_t length) {
|
|||
|
||||
U_CDECL_END
|
||||
|
||||
const UnicodeSet* UnicodeSet::getInclusions(UErrorCode &status) {
|
||||
const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
|
||||
umtx_lock(NULL);
|
||||
UBool f = (INCLUSIONS == NULL);
|
||||
UBool f = (INCLUSIONS[src] == NULL);
|
||||
umtx_unlock(NULL);
|
||||
if (f) {
|
||||
UnicodeSet* incl = new UnicodeSet();
|
||||
|
@ -1504,11 +1479,29 @@ const UnicodeSet* UnicodeSet::getInclusions(UErrorCode &status) {
|
|||
};
|
||||
|
||||
if (incl != NULL) {
|
||||
uprv_getInclusions(&sa, &status);
|
||||
switch(src) {
|
||||
case UPROPS_SRC_CHAR:
|
||||
uchar_addPropertyStarts(&sa, &status);
|
||||
break;
|
||||
case UPROPS_SRC_HST:
|
||||
uhst_addPropertyStarts(&sa, &status);
|
||||
break;
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
case UPROPS_SRC_NORM:
|
||||
unorm_addPropertyStarts(&sa, &status);
|
||||
break;
|
||||
#endif
|
||||
case UPROPS_SRC_CASE:
|
||||
ucase_addPropertyStarts(ucase_getSingleton(&status), &sa, &status);
|
||||
break;
|
||||
default:
|
||||
status = U_INTERNAL_PROGRAM_ERROR;
|
||||
break;
|
||||
}
|
||||
if (U_SUCCESS(status)) {
|
||||
umtx_lock(NULL);
|
||||
if (INCLUSIONS == NULL) {
|
||||
INCLUSIONS = incl;
|
||||
if (INCLUSIONS[src] == NULL) {
|
||||
INCLUSIONS[src] = incl;
|
||||
incl = NULL;
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
|
@ -1518,16 +1511,20 @@ const UnicodeSet* UnicodeSet::getInclusions(UErrorCode &status) {
|
|||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
return INCLUSIONS;
|
||||
return INCLUSIONS[src];
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup function for UnicodeSet
|
||||
*/
|
||||
U_CFUNC UBool uset_cleanup(void) {
|
||||
if (INCLUSIONS != NULL) {
|
||||
delete INCLUSIONS;
|
||||
INCLUSIONS = NULL;
|
||||
int32_t i;
|
||||
|
||||
for(i = UPROPS_SRC_NONE; i < UPROPS_SRC_COUNT; ++i) {
|
||||
if (INCLUSIONS[i] != NULL) {
|
||||
delete INCLUSIONS[i];
|
||||
INCLUSIONS[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (CASE_EQUIV_HASH != NULL) {
|
||||
|
|
|
@ -42,50 +42,50 @@ static const struct {
|
|||
* and there must be exacly one entry per binary UProperty.
|
||||
*
|
||||
* Properties with mask 0 are handled in code.
|
||||
* Pseudo-column -2 indicates case mapping properties.
|
||||
* For them, column is the UPropertySource value.
|
||||
*/
|
||||
{ 1, U_MASK(UPROPS_ALPHABETIC) },
|
||||
{ 1, U_MASK(UPROPS_ASCII_HEX_DIGIT) },
|
||||
{ 1, U_MASK(UPROPS_BIDI_CONTROL) },
|
||||
{ -1, U_MASK(UPROPS_MIRROR_SHIFT) },
|
||||
{ 1, U_MASK(UPROPS_DASH) },
|
||||
{ 1, U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT) },
|
||||
{ 1, U_MASK(UPROPS_DEPRECATED) },
|
||||
{ 1, U_MASK(UPROPS_DIACRITIC) },
|
||||
{ 1, U_MASK(UPROPS_EXTENDER) },
|
||||
{ 0, 0 }, /* UCHAR_FULL_COMPOSITION_EXCLUSION */
|
||||
{ 1, U_MASK(UPROPS_GRAPHEME_BASE) },
|
||||
{ 1, U_MASK(UPROPS_GRAPHEME_EXTEND) },
|
||||
{ 1, U_MASK(UPROPS_GRAPHEME_LINK) },
|
||||
{ 1, U_MASK(UPROPS_HEX_DIGIT) },
|
||||
{ 1, U_MASK(UPROPS_HYPHEN) },
|
||||
{ 1, U_MASK(UPROPS_ID_CONTINUE) },
|
||||
{ 1, U_MASK(UPROPS_ID_START) },
|
||||
{ 1, U_MASK(UPROPS_IDEOGRAPHIC) },
|
||||
{ 1, U_MASK(UPROPS_IDS_BINARY_OPERATOR) },
|
||||
{ 1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR) },
|
||||
{ 1, U_MASK(UPROPS_JOIN_CONTROL) },
|
||||
{ 1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION) },
|
||||
{ -2, 0 }, /* UCHAR_LOWERCASE */
|
||||
{ 1, U_MASK(UPROPS_MATH) },
|
||||
{ 1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT) },
|
||||
{ 1, U_MASK(UPROPS_QUOTATION_MARK) },
|
||||
{ 1, U_MASK(UPROPS_RADICAL) },
|
||||
{ -2, 0 }, /* UCHAR_SOFT_DOTTED */
|
||||
{ 1, U_MASK(UPROPS_TERMINAL_PUNCTUATION) },
|
||||
{ 1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH) },
|
||||
{ -2, 0 }, /* UCHAR_UPPERCASE */
|
||||
{ 1, U_MASK(UPROPS_WHITE_SPACE) },
|
||||
{ 1, U_MASK(UPROPS_XID_CONTINUE) },
|
||||
{ 1, U_MASK(UPROPS_XID_START) },
|
||||
{ -2, 0 }, /* UCHAR_CASE_SENSITIVE */
|
||||
{ 2, U_MASK(UPROPS_V2_S_TERM) },
|
||||
{ 2, U_MASK(UPROPS_V2_VARIATION_SELECTOR) },
|
||||
{ 0, 0 }, /* UCHAR_NFD_INERT */
|
||||
{ 0, 0 }, /* UCHAR_NFKD_INERT */
|
||||
{ 0, 0 }, /* UCHAR_NFC_INERT */
|
||||
{ 0, 0 }, /* UCHAR_NFKC_INERT */
|
||||
{ 0, 0 } /* UCHAR_SEGMENT_STARTER */
|
||||
{ 1, U_MASK(UPROPS_ALPHABETIC) },
|
||||
{ 1, U_MASK(UPROPS_ASCII_HEX_DIGIT) },
|
||||
{ 1, U_MASK(UPROPS_BIDI_CONTROL) },
|
||||
{ -1, U_MASK(UPROPS_MIRROR_SHIFT) },
|
||||
{ 1, U_MASK(UPROPS_DASH) },
|
||||
{ 1, U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT) },
|
||||
{ 1, U_MASK(UPROPS_DEPRECATED) },
|
||||
{ 1, U_MASK(UPROPS_DIACRITIC) },
|
||||
{ 1, U_MASK(UPROPS_EXTENDER) },
|
||||
{ UPROPS_SRC_NORM, 0 }, /* UCHAR_FULL_COMPOSITION_EXCLUSION */
|
||||
{ 1, U_MASK(UPROPS_GRAPHEME_BASE) },
|
||||
{ 1, U_MASK(UPROPS_GRAPHEME_EXTEND) },
|
||||
{ 1, U_MASK(UPROPS_GRAPHEME_LINK) },
|
||||
{ 1, U_MASK(UPROPS_HEX_DIGIT) },
|
||||
{ 1, U_MASK(UPROPS_HYPHEN) },
|
||||
{ 1, U_MASK(UPROPS_ID_CONTINUE) },
|
||||
{ 1, U_MASK(UPROPS_ID_START) },
|
||||
{ 1, U_MASK(UPROPS_IDEOGRAPHIC) },
|
||||
{ 1, U_MASK(UPROPS_IDS_BINARY_OPERATOR) },
|
||||
{ 1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR) },
|
||||
{ 1, U_MASK(UPROPS_JOIN_CONTROL) },
|
||||
{ 1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION) },
|
||||
{ UPROPS_SRC_CASE, 0 }, /* UCHAR_LOWERCASE */
|
||||
{ 1, U_MASK(UPROPS_MATH) },
|
||||
{ 1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT) },
|
||||
{ 1, U_MASK(UPROPS_QUOTATION_MARK) },
|
||||
{ 1, U_MASK(UPROPS_RADICAL) },
|
||||
{ UPROPS_SRC_CASE, 0 }, /* UCHAR_SOFT_DOTTED */
|
||||
{ 1, U_MASK(UPROPS_TERMINAL_PUNCTUATION) },
|
||||
{ 1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH) },
|
||||
{ UPROPS_SRC_CASE, 0 }, /* UCHAR_UPPERCASE */
|
||||
{ 1, U_MASK(UPROPS_WHITE_SPACE) },
|
||||
{ 1, U_MASK(UPROPS_XID_CONTINUE) },
|
||||
{ 1, U_MASK(UPROPS_XID_START) },
|
||||
{ UPROPS_SRC_CASE, 0 }, /* UCHAR_CASE_SENSITIVE */
|
||||
{ 2, U_MASK(UPROPS_V2_S_TERM) },
|
||||
{ 2, U_MASK(UPROPS_V2_VARIATION_SELECTOR) },
|
||||
{ UPROPS_SRC_NORM, 0 }, /* UCHAR_NFD_INERT */
|
||||
{ UPROPS_SRC_NORM, 0 }, /* UCHAR_NFKD_INERT */
|
||||
{ UPROPS_SRC_NORM, 0 }, /* UCHAR_NFC_INERT */
|
||||
{ UPROPS_SRC_NORM, 0 }, /* UCHAR_NFKC_INERT */
|
||||
{ UPROPS_SRC_NORM, 0 } /* UCHAR_SEGMENT_STARTER */
|
||||
};
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
|
@ -95,45 +95,48 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
|
|||
/* not a known binary property */
|
||||
} else {
|
||||
uint32_t mask=binProps[which].mask;
|
||||
int32_t column=binProps[which].column;
|
||||
if(mask!=0) {
|
||||
/* systematic, directly stored properties */
|
||||
return (u_getUnicodeProperties(c, binProps[which].column)&mask)!=0;
|
||||
} else if(binProps[which].column==-2) {
|
||||
/* case mapping properties */
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
UCaseProps *csp=ucase_getSingleton(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
switch(which) {
|
||||
case UCHAR_LOWERCASE:
|
||||
return (UBool)(UCASE_LOWER==ucase_getType(csp, c));
|
||||
case UCHAR_UPPERCASE:
|
||||
return (UBool)(UCASE_UPPER==ucase_getType(csp, c));
|
||||
case UCHAR_SOFT_DOTTED:
|
||||
return ucase_isSoftDotted(csp, c);
|
||||
case UCHAR_CASE_SENSITIVE:
|
||||
return ucase_isCaseSensitive(csp, c);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return (u_getUnicodeProperties(c, column)&mask)!=0;
|
||||
} else {
|
||||
if(column==UPROPS_SRC_CASE) {
|
||||
/* case mapping properties */
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
UCaseProps *csp=ucase_getSingleton(&errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return FALSE;
|
||||
}
|
||||
switch(which) {
|
||||
case UCHAR_LOWERCASE:
|
||||
return (UBool)(UCASE_LOWER==ucase_getType(csp, c));
|
||||
case UCHAR_UPPERCASE:
|
||||
return (UBool)(UCASE_UPPER==ucase_getType(csp, c));
|
||||
case UCHAR_SOFT_DOTTED:
|
||||
return ucase_isSoftDotted(csp, c);
|
||||
case UCHAR_CASE_SENSITIVE:
|
||||
return ucase_isCaseSensitive(csp, c);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else if(column==UPROPS_SRC_NORM) {
|
||||
#if !UCONFIG_NO_NORMALIZATION
|
||||
/* normalization properties from unorm.icu */
|
||||
switch(which) {
|
||||
case UCHAR_FULL_COMPOSITION_EXCLUSION:
|
||||
return unorm_internalIsFullCompositionExclusion(c);
|
||||
case UCHAR_NFD_INERT:
|
||||
case UCHAR_NFKD_INERT:
|
||||
case UCHAR_NFC_INERT:
|
||||
case UCHAR_NFKC_INERT:
|
||||
return unorm_isNFSkippable(c, (UNormalizationMode)(which-UCHAR_NFD_INERT)+UNORM_NFD);
|
||||
case UCHAR_SEGMENT_STARTER:
|
||||
return unorm_isCanonSafeStart(c);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
/* normalization properties from unorm.icu */
|
||||
switch(which) {
|
||||
case UCHAR_FULL_COMPOSITION_EXCLUSION:
|
||||
return unorm_internalIsFullCompositionExclusion(c);
|
||||
case UCHAR_NFD_INERT:
|
||||
case UCHAR_NFKD_INERT:
|
||||
case UCHAR_NFC_INERT:
|
||||
case UCHAR_NFKC_INERT:
|
||||
return unorm_isNFSkippable(c, (UNormalizationMode)(which-UCHAR_NFD_INERT)+UNORM_NFD);
|
||||
case UCHAR_SEGMENT_STARTER:
|
||||
return unorm_isCanonSafeStart(c);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
|
@ -291,6 +294,40 @@ u_getIntPropertyMaxValue(UProperty which) {
|
|||
}
|
||||
}
|
||||
|
||||
U_CAPI UPropertySource U_EXPORT2
|
||||
uprops_getSource(UProperty which) {
|
||||
if(which<UCHAR_BINARY_START) {
|
||||
return UPROPS_SRC_NONE; /* undefined */
|
||||
} else if(which<UCHAR_BINARY_LIMIT) {
|
||||
if(binProps[which].mask!=0) {
|
||||
return UPROPS_SRC_CHAR;
|
||||
} else {
|
||||
return (UPropertySource)binProps[which].column;
|
||||
}
|
||||
} else if(which<UCHAR_INT_START) {
|
||||
return UPROPS_SRC_NONE; /* undefined */
|
||||
} else if(which<UCHAR_INT_LIMIT) {
|
||||
switch(which) {
|
||||
case UCHAR_HANGUL_SYLLABLE_TYPE:
|
||||
return UPROPS_SRC_HST;
|
||||
case UCHAR_CANONICAL_COMBINING_CLASS:
|
||||
case UCHAR_NFD_QUICK_CHECK:
|
||||
case UCHAR_NFKD_QUICK_CHECK:
|
||||
case UCHAR_NFC_QUICK_CHECK:
|
||||
case UCHAR_NFKC_QUICK_CHECK:
|
||||
case UCHAR_LEAD_CANONICAL_COMBINING_CLASS:
|
||||
case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS:
|
||||
return UPROPS_SRC_NORM;
|
||||
default:
|
||||
return UPROPS_SRC_CHAR;
|
||||
}
|
||||
} else if(which==UCHAR_GENERAL_CATEGORY_MASK) {
|
||||
return UPROPS_SRC_CHAR;
|
||||
} else {
|
||||
return UPROPS_SRC_NONE; /* undefined */
|
||||
}
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------
|
||||
* Inclusions list
|
||||
*----------------------------------------------------------------*/
|
||||
|
@ -377,7 +414,15 @@ u_getIntPropertyMaxValue(UProperty which) {
|
|||
*
|
||||
* Do not use a UnicodeSet pattern because that causes infinite recursion;
|
||||
* UnicodeSet depends on the inclusions set.
|
||||
*
|
||||
* ---
|
||||
*
|
||||
* uprv_getInclusions() is commented out starting 2004-sep-13 because
|
||||
* uniset_props.cpp now calls the uxyz_addPropertyStarts() directly,
|
||||
* and only for the relevant property source.
|
||||
*/
|
||||
#if 0
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uprv_getInclusions(USetAdder *sa, UErrorCode *pErrorCode) {
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
|
@ -390,3 +435,5 @@ uprv_getInclusions(USetAdder *sa, UErrorCode *pErrorCode) {
|
|||
uchar_addPropertyStarts(sa, pErrorCode);
|
||||
ucase_addPropertyStarts(ucase_getSingleton(pErrorCode), sa, pErrorCode);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -337,6 +337,38 @@ uprv_getISOCommentCharacters(USetAdder *sa);
|
|||
*/
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Constants for which data and implementation files provide which properties.
|
||||
* Used by UnicodeSet for service-specific property enumeration.
|
||||
* @internal
|
||||
*/
|
||||
enum UPropertySource {
|
||||
/** No source, not a supported property. */
|
||||
UPROPS_SRC_NONE,
|
||||
/** From uchar.c/uprops.icu */
|
||||
UPROPS_SRC_CHAR,
|
||||
/** Hangul_Syllable_Type, from uchar.c/uprops.icu */
|
||||
UPROPS_SRC_HST,
|
||||
/** From unames.c/unames.icu */
|
||||
UPROPS_SRC_NAMES,
|
||||
/** From unorm.cpp/unorm.icu */
|
||||
UPROPS_SRC_NORM,
|
||||
/** From ucase.c/ucase.icu */
|
||||
UPROPS_SRC_CASE,
|
||||
/** From ubidi.c/ubidi.icu */
|
||||
UPROPS_SRC_BIDI,
|
||||
/** One more than the highes UPropertySource (UPROPS_SRC_) constant. */
|
||||
UPROPS_SRC_COUNT
|
||||
};
|
||||
typedef enum UPropertySource UPropertySource;
|
||||
|
||||
/**
|
||||
* @see UPropertySource
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI UPropertySource U_EXPORT2
|
||||
uprops_getSource(UProperty which);
|
||||
|
||||
/**
|
||||
* Enumerate each core properties data trie and add the
|
||||
* start of each range of same properties to the set.
|
||||
|
@ -345,6 +377,13 @@ uprv_getISOCommentCharacters(USetAdder *sa);
|
|||
U_CAPI void U_EXPORT2
|
||||
uchar_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Same as uchar_addPropertyStarts() but only for Hangul_Syllable_Type.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uhst_addPropertyStarts(USetAdder *sa, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Return a set of characters for property enumeration.
|
||||
* For each two consecutive characters (start, limit) in the set,
|
||||
|
|
Loading…
Add table
Reference in a new issue