mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-1030 move ucol_unsafeCP into ucol_imp.h so it can be shared with string search impl.
X-SVN-Rev: 5444
This commit is contained in:
parent
4a03a13259
commit
a62ee61efb
2 changed files with 33 additions and 22 deletions
|
@ -663,26 +663,6 @@ static const uint16_t *FCD_STAGE_2_;
|
|||
static const uint16_t *FCD_STAGE_3_;
|
||||
|
||||
|
||||
inline UBool ucol_unsafeCP(UChar c, const UCollator *coll) {
|
||||
if (c < coll->minUnsafeCP) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
int32_t hash = c;
|
||||
uint8_t htbyte;
|
||||
|
||||
if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) {
|
||||
if (hash >= 0xd800 && hash <= 0xf8ff) {
|
||||
/* Part of a surrogate, or in private use area. */
|
||||
/* These are always considered unsafe. */
|
||||
return TRUE;
|
||||
}
|
||||
hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256;
|
||||
}
|
||||
htbyte = coll->unsafeCP[hash>>3];
|
||||
return (((htbyte >> (hash & 7)) & 1) == 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Approximate determination if a character is at a contraction end.
|
||||
* Guaranteed to be TRUE if a character is at the end of a contraction,
|
||||
|
|
|
@ -211,8 +211,8 @@ struct UCollationElements
|
|||
if((collationSource).CEpos == (collationSource).toReturn) { \
|
||||
(collationSource).CEpos = (collationSource).toReturn = (collationSource).CEs; \
|
||||
} \
|
||||
} else if((collationSource).pos < (collationSource).endp) { \
|
||||
UChar ch = *(collationSource).pos++; \
|
||||
} else if((collationSource).pos < (collationSource).endp) { \
|
||||
UChar ch = *(collationSource).pos++; \
|
||||
if(ch <= 0xFF) { \
|
||||
(order) = (coll)->latinOneMapping[ch]; \
|
||||
} else { \
|
||||
|
@ -656,5 +656,36 @@ U_CAPI int32_t U_EXPORT2 ucol_inv_getPrevCE(uint32_t CE, uint32_t contCE,
|
|||
uint32_t *prevCE, uint32_t *prevContCE,
|
||||
uint32_t strength);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Test whether a character is potentially "unsafe" for use as a collation
|
||||
* starting point. Unsafe chars are those with combining class != 0 plus
|
||||
* those that are the 2nd thru nth character in a contraction sequence.
|
||||
*
|
||||
* Function is in header file because it's used in both collation and string search,
|
||||
* and needs to be inline for performance.
|
||||
*/
|
||||
/* __inline */ static UBool ucol_unsafeCP(UChar c, const UCollator *coll) {
|
||||
int32_t hash;
|
||||
uint8_t htbyte;
|
||||
|
||||
if (c < coll->minUnsafeCP) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
hash = c;
|
||||
if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) {
|
||||
if (hash >= 0xd800 && hash <= 0xf8ff) {
|
||||
/* Part of a surrogate, or in private use area. */
|
||||
/* These are always considered unsafe. */
|
||||
return TRUE;
|
||||
}
|
||||
hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256;
|
||||
}
|
||||
htbyte = coll->unsafeCP[hash>>3];
|
||||
return (((htbyte >> (hash & 7)) & 1) == 1);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue