ICU-8942 use smaller/simpler FCD data rather than building an FCD trie

X-SVN-Rev: 30985
This commit is contained in:
Markus Scherer 2011-11-28 22:59:49 +00:00
parent 8ffe8b6439
commit 524fd241c5
8 changed files with 152 additions and 333 deletions

View file

@ -523,12 +523,7 @@ const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
if(allModes!=NULL) {
allModes->impl.getFCDTrie(errorCode);
return &allModes->fcd;
} else {
return NULL;
}
return allModes!=NULL ? &allModes->fcd : NULL;
}
const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
@ -605,17 +600,6 @@ Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
return &((Normalizer2WithImpl *)norm2)->impl;
}
const UTrie2 *
Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) {
Norm2AllModes *allModes=
Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
if(allModes!=NULL) {
return allModes->impl.getFCDTrie(errorCode);
} else {
return NULL;
}
}
const Normalizer2 *
Normalizer2::getInstance(const char *packageName,
const char *name,
@ -682,7 +666,6 @@ Normalizer2::getInstance(const char *packageName,
case UNORM2_DECOMPOSE:
return &allModes->decomp;
case UNORM2_FCD:
allModes->impl.getFCDTrie(errorCode);
return &allModes->fcd;
case UNORM2_COMPOSE_CONTIGUOUS:
return &allModes->fcc;
@ -960,25 +943,14 @@ unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
}
U_CFUNC uint16_t
unorm_getFCD16Simple(UChar32 c) {
unorm_getFCD16(UChar32 c) {
UErrorCode errorCode=U_ZERO_ERROR;
const UTrie2 *trie=Normalizer2Factory::getFCDTrie(errorCode);
const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
if(U_SUCCESS(errorCode)) {
return UTRIE2_GET16(trie, c);
return impl->getFCD16(c);
} else {
return 0;
}
}
U_CAPI const uint16_t * U_EXPORT2
unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) {
const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode);
if(U_SUCCESS(*pErrorCode)) {
fcdHighStart=trie->highStart;
return trie->index;
} else {
return NULL;
}
}
#endif // !UCONFIG_NO_NORMALIZATION

View file

@ -254,7 +254,6 @@ struct CanonIterData : public UMemory {
Normalizer2Impl::~Normalizer2Impl() {
udata_close(memory);
utrie2_close(normTrie);
UTrie2Singleton(fcdTrieSingleton).deleteInstance();
delete (CanonIterData *)canonIterDataSingleton.fInstance;
}
@ -1507,121 +1506,13 @@ const UChar *Normalizer2Impl::findNextCompBoundary(const UChar *p, const UChar *
return iter.codePointStart;
}
class FCDTrieSingleton : public UTrie2Singleton {
public:
FCDTrieSingleton(SimpleSingleton &s, Normalizer2Impl &ni, UErrorCode &ec) :
UTrie2Singleton(s), impl(ni), errorCode(ec) {}
UTrie2 *getInstance(UErrorCode &errorCode) {
return UTrie2Singleton::getInstance(createInstance, this, errorCode);
}
static void *createInstance(const void *context, UErrorCode &errorCode);
UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) {
if(value!=0) {
impl.setFCD16FromNorm16(start, end, (uint16_t)value, newFCDTrie, errorCode);
}
return U_SUCCESS(errorCode);
}
Normalizer2Impl &impl;
UTrie2 *newFCDTrie;
UErrorCode &errorCode;
};
U_CDECL_BEGIN
// Set the FCD value for a range of same-norm16 characters.
static UBool U_CALLCONV
enumRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) {
return ((FCDTrieSingleton *)context)->rangeHandler(start, end, value);
}
// Collect (OR together) the FCD values for a range of supplementary characters,
// for their lead surrogate code unit.
static UBool U_CALLCONV
enumRangeOrValue(const void *context, UChar32 /*start*/, UChar32 /*end*/, uint32_t value) {
*((uint32_t *)context)|=value;
return TRUE;
}
U_CDECL_END
void *FCDTrieSingleton::createInstance(const void *context, UErrorCode &errorCode) {
FCDTrieSingleton *me=(FCDTrieSingleton *)context;
me->newFCDTrie=utrie2_open(0, 0, &errorCode);
if(U_SUCCESS(errorCode)) {
utrie2_enum(me->impl.getNormTrie(), NULL, enumRangeHandler, me);
for(UChar lead=0xd800; lead<0xdc00; ++lead) {
uint32_t oredValue=utrie2_get32(me->newFCDTrie, lead);
utrie2_enumForLeadSurrogate(me->newFCDTrie, lead, NULL, enumRangeOrValue, &oredValue);
if(oredValue!=0) {
// Set a "bad" value for makeFCD() to break the quick check loop
// and look up the value for the supplementary code point.
// If there is any lccc, then set the worst-case lccc of 1.
// The ORed-together value's tccc is already the worst case.
if(oredValue>0xff) {
oredValue=0x100|(oredValue&0xff);
}
utrie2_set32ForLeadSurrogateCodeUnit(me->newFCDTrie, lead, oredValue, &errorCode);
}
}
utrie2_freeze(me->newFCDTrie, UTRIE2_16_VALUE_BITS, &errorCode);
if(U_SUCCESS(errorCode)) {
return me->newFCDTrie;
}
}
utrie2_close(me->newFCDTrie);
return NULL;
}
void Normalizer2Impl::setFCD16FromNorm16(UChar32 start, UChar32 end, uint16_t norm16,
UTrie2 *newFCDTrie, UErrorCode &errorCode) const {
// Only loops for 1:1 algorithmic mappings.
for(;;) {
if(norm16>=MIN_NORMAL_MAYBE_YES) {
norm16&=0xff;
norm16|=norm16<<8;
} else if(norm16<=minYesNo || minMaybeYes<=norm16) {
// no decomposition or Hangul syllable, all zeros
break;
} else if(limitNoNo<=norm16) {
int32_t delta=norm16-(minMaybeYes-MAX_DELTA-1);
if(start==end) {
start+=delta;
norm16=getNorm16(start);
} else {
// the same delta leads from different original characters to different mappings
do {
UChar32 c=start+delta;
setFCD16FromNorm16(c, c, getNorm16(c), newFCDTrie, errorCode);
} while(++start<=end);
break;
}
} else {
// c decomposes, get everything from the variable-length extra data
const uint16_t *mapping=getMapping(norm16);
uint16_t firstUnit=*mapping;
if((firstUnit&MAPPING_LENGTH_MASK)==0) {
// A character that is deleted (maps to an empty string) must
// get the worst-case lccc and tccc values because arbitrary
// characters on both sides will become adjacent.
norm16=0x1ff;
} else {
norm16=firstUnit>>8; // tccc
if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) {
norm16|=*(mapping-1)&0xff00; // lccc
}
}
}
utrie2_setRange32(newFCDTrie, start, end, norm16, TRUE, &errorCode);
break;
}
}
const UTrie2 *Normalizer2Impl::getFCDTrie(UErrorCode &errorCode) const {
// Logically const: Synchronized instantiation.
Normalizer2Impl *me=const_cast<Normalizer2Impl *>(this);
return FCDTrieSingleton(me->fcdTrieSingleton, *me, errorCode).getInstance(errorCode);
}
// Note: normalizer2impl.cpp r30982 (2011-nov-27)
// still had getFCDTrie() which built and cached an FCD trie.
// That provided faster access to FCD data than getFCD16FromNormData()
// but required synchronization and consumed some 10kB of heap memory
// in any process that uses FCD (e.g., via collation).
// tccc180[] and smallFCD[] are intended to help with any loss of performance,
// at least for Latin & CJK.
// Gets the FCD value from the regular normalization data.
uint16_t Normalizer2Impl::getFCD16FromNormData(UChar32 c) const {
@ -1679,7 +1570,7 @@ Normalizer2Impl::makeFCD(const UChar *src, const UChar *limit,
prevBoundary=src;
// We know that the previous character's lccc==0.
// Fetching the fcd16 value was deferred for this below-U+0300 code point.
prevFCD16=getFCD16FromSingleLead(*(src-1));
prevFCD16=getFCD16(*(src-1));
if(prevFCD16>1) {
--prevBoundary;
}
@ -1693,8 +1584,6 @@ Normalizer2Impl::makeFCD(const UChar *src, const UChar *limit,
// The exception is the call to decomposeShort() which uses the buffer
// in the normal way.
const UTrie2 *trie=fcdTrie();
const UChar *prevSrc;
UChar32 c=0;
uint16_t fcd16=0;
@ -1705,24 +1594,24 @@ Normalizer2Impl::makeFCD(const UChar *src, const UChar *limit,
if((c=*src)<MIN_CCC_LCCC_CP) {
prevFCD16=~c;
++src;
} else if((fcd16=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c))<=0xff) {
prevFCD16=fcd16;
} else if(!singleLeadMightHaveNonZeroFCD16(c)) {
prevFCD16=0;
++src;
} else if(!U16_IS_SURROGATE(c)) {
break;
} else {
UChar c2;
if(U16_IS_SURROGATE_LEAD(c)) {
if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
c=U16_GET_SUPPLEMENTARY(c, c2);
}
} else /* trail surrogate */ {
if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
--src;
c=U16_GET_SUPPLEMENTARY(c2, c);
if(U16_IS_SURROGATE(c)) {
UChar c2;
if(U16_IS_SURROGATE_LEAD(c)) {
if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) {
c=U16_GET_SUPPLEMENTARY(c, c2);
}
} else /* trail surrogate */ {
if(prevSrc<src && U16_IS_LEAD(c2=*(src-1))) {
--src;
c=U16_GET_SUPPLEMENTARY(c2, c);
}
}
}
if((fcd16=getFCD16(c))<=0xff) {
if((fcd16=getFCD16FromNormData(c))<=0xff) {
prevFCD16=fcd16;
src+=U16_LENGTH(c);
} else {
@ -1742,7 +1631,8 @@ Normalizer2Impl::makeFCD(const UChar *src, const UChar *limit,
// We know that the previous character's lccc==0.
if(prevFCD16<0) {
// Fetching the fcd16 value was deferred for this below-U+0300 code point.
prevFCD16=getFCD16FromSingleLead((UChar)~prevFCD16);
UChar32 prev=~prevFCD16;
prevFCD16= prev<0x180 ? tccc180[prev] : getFCD16FromNormData(prev);
if(prevFCD16>1) {
--prevBoundary;
}
@ -1752,7 +1642,7 @@ Normalizer2Impl::makeFCD(const UChar *src, const UChar *limit,
--p;
// Need to fetch the previous character's FCD value because
// prevFCD16 was just for the trail surrogate code point.
prevFCD16=getFCD16FromSurrogatePair(p[0], p[1]);
prevFCD16=getFCD16FromNormData(U16_GET_SUPPLEMENTARY(p[0], p[1]));
// Still known to have lccc==0 because its lead surrogate unit had lccc==0.
}
if(prevFCD16>1) {
@ -1840,21 +1730,18 @@ void Normalizer2Impl::makeFCDAndAppend(const UChar *src, const UChar *limit,
}
const UChar *Normalizer2Impl::findPreviousFCDBoundary(const UChar *start, const UChar *p) const {
BackwardUTrie2StringIterator iter(fcdTrie(), start, p);
uint16_t fcd16;
do {
fcd16=iter.previous16();
} while(fcd16>0xff);
return iter.codePointStart;
while(start<p && previousFCD16(start, p)>0xff) {}
return p;
}
const UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *limit) const {
ForwardUTrie2StringIterator iter(fcdTrie(), p, limit);
uint16_t fcd16;
do {
fcd16=iter.next16();
} while(fcd16>0xff);
return iter.codePointStart;
while(p<limit) {
const UChar *codePointStart=p;
if(nextFCD16(p, limit)<=0xff) {
return codePointStart;
}
}
return p;
}
// CanonicalIterator data -------------------------------------------------- ***

View file

@ -216,7 +216,6 @@ private:
class U_COMMON_API Normalizer2Impl : public UMemory {
public:
Normalizer2Impl() : memory(NULL), normTrie(NULL) {
fcdTrieSingleton.fInstance=NULL;
canonIterDataSingleton.fInstance=NULL;
}
~Normalizer2Impl();
@ -229,7 +228,6 @@ public:
// low-level properties ------------------------------------------------ ***
const UTrie2 *getNormTrie() const { return normTrie; }
const UTrie2 *getFCDTrie(UErrorCode &errorCode) const ;
UBool ensureCanonIterData(UErrorCode &errorCode) const;
@ -260,22 +258,80 @@ public:
return norm16>=MIN_NORMAL_MAYBE_YES ? (uint8_t)norm16 : 0;
}
uint16_t getFCD16(UChar32 c) const { return UTRIE2_GET16(fcdTrie(), c); }
uint16_t getFCD16FromSingleLead(UChar c) const {
return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(fcdTrie(), c);
/**
* Returns the FCD data for code point c.
* @param c A Unicode code point.
* @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
*/
uint16_t getFCD16(UChar32 c) const {
if(c<0) {
return 0;
} else if(c<0x180) {
return tccc180[c];
} else if(c<=0xffff) {
if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; }
}
return getFCD16FromNormData(c);
}
uint16_t getFCD16FromSupplementary(UChar32 c) const {
return UTRIE2_GET16_FROM_SUPP(fcdTrie(), c);
/**
* Returns the FCD data for the next code point (post-increment).
* Might skip only a lead surrogate rather than the whole surrogate pair if none of
* the supplementary code points associated with the lead surrogate have non-zero FCD data.
* @param s A valid pointer into a string. Requires s!=limit.
* @param limit The end of the string, or NULL.
* @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
*/
uint16_t nextFCD16(const UChar *&s, const UChar *limit) const {
UChar32 c=*s++;
if(c<0x180) {
return tccc180[c];
} else if(!singleLeadMightHaveNonZeroFCD16(c)) {
return 0;
}
UChar c2;
if(U16_IS_LEAD(c) && s!=limit && U16_IS_TRAIL(c2=*s)) {
c=U16_GET_SUPPLEMENTARY(c, c2);
++s;
}
return getFCD16FromNormData(c);
}
uint16_t getFCD16FromSurrogatePair(UChar c, UChar c2) const {
return getFCD16FromSupplementary(U16_GET_SUPPLEMENTARY(c, c2));
/**
* Returns the FCD data for the previous code point (pre-decrement).
* @param start The start of the string.
* @param s A valid pointer into a string. Requires start<s.
* @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
*/
uint16_t previousFCD16(const UChar *start, const UChar *&s) const {
UChar32 c=*--s;
if(c<0x180) {
return tccc180[c];
}
if(!U16_IS_TRAIL(c)) {
if(!singleLeadMightHaveNonZeroFCD16(c)) {
return 0;
}
} else {
UChar c2;
if(start<s && U16_IS_LEAD(c2=*(s-1))) {
c=U16_GET_SUPPLEMENTARY(c2, c);
--s;
}
}
return getFCD16FromNormData(c);
}
/** Returns the FCD data for U+0000<=c<U+0180. */
uint16_t getFCD16FromBelow180(UChar32 c) const { return tccc180[c]; }
/** Returns TRUE if the single-or-lead code unit c might have non-zero FCD data. */
UBool singleLeadMightHaveNonZeroFCD16(UChar32 lead) const {
// 0<=lead<=0xffff
uint8_t bits=smallFCD[lead>>8];
if(bits==0) { return false; }
return (UBool)((bits>>((lead>>5)&7))&1);
}
/** Returns the FCD value from the regular normalization data. */
uint16_t getFCD16FromNormData(UChar32 c) const;
void setFCD16FromNorm16(UChar32 start, UChar32 end, uint16_t norm16,
UTrie2 *newFCDTrie, UErrorCode &errorCode) const;
void makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, uint16_t norm16,
CanonIterData &newData, UErrorCode &errorCode) const;
@ -504,8 +560,6 @@ private:
const UChar *findPreviousCompBoundary(const UChar *start, const UChar *p) const;
const UChar *findNextCompBoundary(const UChar *p, const UChar *limit) const;
const UTrie2 *fcdTrie() const { return (const UTrie2 *)fcdTrieSingleton.fInstance; }
const UChar *findPreviousFCDBoundary(const UChar *start, const UChar *p) const;
const UChar *findNextFCDBoundary(const UChar *p, const UChar *limit) const;
@ -532,7 +586,6 @@ private:
const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0
uint8_t tccc180[0x180]; // tccc values for U+0000..U+017F
SimpleSingleton fcdTrieSingleton;
SimpleSingleton canonIterDataSingleton;
};
@ -565,8 +618,6 @@ public:
// Get the Impl instance of the Normalizer2.
// Must be used only when it is known that norm2 is a Normalizer2WithImpl instance.
static const Normalizer2Impl *getImpl(const Normalizer2 *norm2);
static const UTrie2 *getFCDTrie(UErrorCode &errorCode);
private:
Normalizer2Factory(); // No instantiation.
};
@ -586,101 +637,11 @@ U_CFUNC UNormalizationCheckResult
unorm_getQuickCheck(UChar32 c, UNormalizationMode mode);
/**
* Get the 16-bit FCD value (lead & trail CCs) for a code point, for u_getIntPropertyValue().
* Gets the 16-bit FCD value (lead & trail CCs) for a code point, for u_getIntPropertyValue().
* @internal
*/
U_CFUNC uint16_t
unorm_getFCD16Simple(UChar32 c);
/**
* Internal API, used by collation code.
* Get access to the internal FCD trie table to be able to perform
* incremental, per-code unit, FCD checks in collation.
* One pointer is sufficient because the trie index values are offset
* by the index size, so that the same pointer is used to access the trie data.
* Code points at fcdHighStart and above have a zero FCD value.
* @internal
*/
U_CAPI const uint16_t * U_EXPORT2
unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode);
/**
* Internal API, used by collation code.
* Get the FCD value for a code unit, with
* bits 15..8 lead combining class
* bits 7..0 trail combining class
*
* If c is a lead surrogate and the value is not 0,
* then some of c's associated supplementary code points have a non-zero FCD value.
*
* @internal
*/
static inline uint16_t
unorm_getFCD16(const uint16_t *fcdTrieIndex, UChar c) {
return fcdTrieIndex[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(fcdTrieIndex, c)];
}
/**
* Internal API, used by collation code.
* Get the FCD value of the next code point (post-increment), with
* bits 15..8 lead combining class
* bits 7..0 trail combining class
*
* @internal
*/
static inline uint16_t
unorm_nextFCD16(const uint16_t *fcdTrieIndex, UChar32 fcdHighStart,
const UChar *&s, const UChar *limit) {
UChar32 c=*s++;
uint16_t fcd=fcdTrieIndex[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(fcdTrieIndex, c)];
if(fcd!=0 && U16_IS_LEAD(c)) {
UChar c2;
if(s!=limit && U16_IS_TRAIL(c2=*s)) {
++s;
c=U16_GET_SUPPLEMENTARY(c, c2);
if(c<fcdHighStart) {
fcd=fcdTrieIndex[_UTRIE2_INDEX_FROM_SUPP(fcdTrieIndex, c)];
} else {
fcd=0;
}
} else /* unpaired lead surrogate */ {
fcd=0;
}
}
return fcd;
}
/**
* Internal API, used by collation code.
* Get the FCD value of the previous code point (pre-decrement), with
* bits 15..8 lead combining class
* bits 7..0 trail combining class
*
* @internal
*/
static inline uint16_t
unorm_prevFCD16(const uint16_t *fcdTrieIndex, UChar32 fcdHighStart,
const UChar *start, const UChar *&s) {
UChar32 c=*--s;
uint16_t fcd;
if(!U16_IS_SURROGATE(c)) {
fcd=fcdTrieIndex[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(fcdTrieIndex, c)];
} else {
UChar c2;
if(U16_IS_SURROGATE_TRAIL(c) && s!=start && U16_IS_LEAD(c2=*(s-1))) {
--s;
c=U16_GET_SUPPLEMENTARY(c2, c);
if(c<fcdHighStart) {
fcd=fcdTrieIndex[_UTRIE2_INDEX_FROM_SUPP(fcdTrieIndex, c)];
} else {
fcd=0;
}
} else /* unpaired surrogate */ {
fcd=0;
}
}
return fcd;
}
unorm_getFCD16(UChar32 c);
/**
* Format of Normalizer2 .nrm data files.

View file

@ -401,7 +401,7 @@ static int32_t getLeadCombiningClass(const IntProperty &, UChar32, UProperty) {
}
#else
static int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
return unorm_getFCD16Simple(c)>>8;
return unorm_getFCD16(c)>>8;
}
#endif
@ -411,7 +411,7 @@ static int32_t getTrailCombiningClass(const IntProperty &, UChar32, UProperty) {
}
#else
static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) {
return unorm_getFCD16Simple(c)&0xff;
return unorm_getFCD16(c)&0xff;
}
#endif

View file

@ -52,13 +52,11 @@ U_NAMESPACE_USE
#define ZERO_CC_LIMIT_ 0xC0
// this is static pointer to the normalizer fcdTrieIndex
// This is static pointer to the NFC implementation instance.
// it is always the same between calls to u_cleanup
// and therefore writing to it is not synchronized.
// It is cleaned in ucol_cleanup
static const uint16_t *fcdTrieIndex=NULL;
// Code points at fcdHighStart and above have a zero FCD value.
static UChar32 fcdHighStart = 0;
static const Normalizer2Impl *g_nfcImpl = NULL;
// These are values from UCA required for
// implicit generation and supressing sort key compression
@ -72,7 +70,7 @@ U_CDECL_BEGIN
static UBool U_CALLCONV
ucol_cleanup(void)
{
fcdTrieIndex = NULL;
g_nfcImpl = NULL;
return TRUE;
}
@ -86,11 +84,13 @@ U_CDECL_END
// init FCD data
static inline
UBool initializeFCD(UErrorCode *status) {
if (fcdTrieIndex != NULL) {
if (g_nfcImpl != NULL) {
return TRUE;
} else {
// The result is constant, until the library is reloaded.
fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
g_nfcImpl = Normalizer2Factory::getNFCImpl(*status);
// Note: Alternatively, we could also store this pointer in each collIterate struct,
// same as Normalizer2Factory::getImpl(collIterate->nfd).
ucln_i18n_registerCleanup(UCLN_I18N_UCOL, ucol_cleanup);
return U_SUCCESS(*status);
}
@ -1433,10 +1433,8 @@ inline UBool collIterFCD(collIterate *collationSource) {
endP = NULL;
}
// Get the trailing combining class of the current character. If it's zero,
// we are OK.
/* trie access */
fcd = unorm_nextFCD16(fcdTrieIndex, fcdHighStart, srcP, endP);
// Get the trailing combining class of the current character. If it's zero, we are OK.
fcd = g_nfcImpl->nextFCD16(srcP, endP);
if (fcd != 0) {
prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
@ -1447,8 +1445,7 @@ inline UBool collIterFCD(collIterate *collationSource) {
{
const UChar *savedSrcP = srcP;
/* trie access */
fcd = unorm_nextFCD16(fcdTrieIndex, fcdHighStart, srcP, endP);
fcd = g_nfcImpl->nextFCD16(srcP, endP);
leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
if (leadingCC == 0) {
srcP = savedSrcP; // Hit char that is not part of combining sequence.
@ -1809,7 +1806,7 @@ inline UBool collPrevIterFCD(collIterate *data)
src = data->pos + 1;
/* Get the trailing combining class of the current character. */
fcd = unorm_prevFCD16(fcdTrieIndex, fcdHighStart, start, src);
fcd = g_nfcImpl->previousFCD16(start, src);
leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
@ -1825,7 +1822,7 @@ inline UBool collPrevIterFCD(collIterate *data)
return result;
}
fcd = unorm_prevFCD16(fcdTrieIndex, fcdHighStart, start, src);
fcd = g_nfcImpl->previousFCD16(start, src);
trailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);

View file

@ -845,8 +845,7 @@ U_CFUNC void ucol_createElements(UColTokenParser *src, tempUCATable *t, UColTokL
UColToken *tok = lh->first;
UColToken *expt = NULL;
uint32_t i = 0, j = 0;
UChar32 fcdHighStart;
const uint16_t *fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
const Normalizer2Impl *nfcImpl = Normalizer2Factory::getNFCImpl(*status);
while(tok != NULL && U_SUCCESS(*status)) {
/* first, check if there are any expansions */
@ -942,7 +941,7 @@ U_CFUNC void ucol_createElements(UColTokenParser *src, tempUCATable *t, UColTokL
if (!src->buildCCTabFlag && el.cSize > 0) {
// Check the trailing canonical combining class (tccc) of the last character.
const UChar *s = el.cPoints + el.cSize;
uint16_t fcd = unorm_prevFCD16(fcdTrieIndex, fcdHighStart, el.cPoints, s);
uint16_t fcd = nfcImpl->previousFCD16(el.cPoints, s);
if ((fcd & 0xff) != 0) {
src->buildCCTabFlag = TRUE;
}

View file

@ -743,15 +743,12 @@ uprv_uca_copyCMTable(tempUCATable *t, UChar *cm, uint16_t *index) {
static void uprv_uca_unsafeCPAddCCNZ(tempUCATable *t, UErrorCode *status) {
UChar c;
uint16_t fcd; // Hi byte is lead combining class.
// lo byte is trailing combing class.
const uint16_t *fcdTrieIndex;
UChar32 fcdHighStart;
uint16_t fcd; // Hi byte is lead combining class. lo byte is trailing combing class.
UBool buildCMTable = (t->cmLookup==NULL); // flag for building combining class table
UChar *cm=NULL;
uint16_t index[256];
int32_t count=0;
fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
const Normalizer2Impl *nfcImpl = Normalizer2Factory::getNFCImpl(*status);
if (U_FAILURE(*status)) {
return;
}
@ -767,7 +764,18 @@ static void uprv_uca_unsafeCPAddCCNZ(tempUCATable *t, UErrorCode *status) {
uprv_memset(index, 0, sizeof(index));
}
for (c=0; c<0xffff; c++) {
fcd = unorm_getFCD16(fcdTrieIndex, c);
if (U16_IS_LEAD(c)) {
fcd = 0;
if (nfcImpl->singleLeadMightHaveNonZeroFCD16(c)) {
UChar32 supp = U16_GET_SUPPLEMENTARY(c, 0xdc00);
UChar32 suppLimit = supp + 0x400;
while (supp < suppLimit) {
fcd |= nfcImpl->getFCD16FromNormData(supp++);
}
}
} else {
fcd = nfcImpl->getFCD16(c);
}
if (fcd >= 0x100 || // if the leading combining class(c) > 0 ||
(U16_IS_LEAD(c) && fcd != 0)) {// c is a leading surrogate with some FCD data
if (buildCMTable) {
@ -1785,12 +1793,11 @@ uprv_uca_addMultiCMContractions(tempUCATable *t,
CombinClassTable *cmLookup = t->cmLookup;
UChar newDecomp[256];
int32_t maxComp, newDecLen;
UChar32 fcdHighStart;
const uint16_t *fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
const Normalizer2Impl *nfcImpl = Normalizer2Factory::getNFCImpl(*status);
if (U_FAILURE(*status)) {
return;
}
int16_t curClass = (unorm_getFCD16(fcdTrieIndex, c->tailoringCM) & 0xff);
int16_t curClass = nfcImpl->getFCD16(c->tailoringCM) & 0xff;
CompData *precomp = c->precomp;
int32_t compLen = c->compLen;
UChar *comp = c->comp;
@ -1855,12 +1862,11 @@ uprv_uca_addTailCanonicalClosures(tempUCATable *t,
UCAElements *el,
UErrorCode *status) {
CombinClassTable *cmLookup = t->cmLookup;
UChar32 fcdHighStart;
const uint16_t *fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
const Normalizer2Impl *nfcImpl = Normalizer2Factory::getNFCImpl(*status);
if (U_FAILURE(*status)) {
return;
}
int16_t maxIndex = (unorm_getFCD16(fcdTrieIndex, cMark) & 0xff );
int16_t maxIndex = nfcImpl->getFCD16(cMark) & 0xff;
UCAElements element;
uint16_t *index;
UChar decomp[256];
@ -1874,8 +1880,8 @@ uprv_uca_addTailCanonicalClosures(tempUCATable *t,
return;
}
index = cmLookup->index;
int32_t cClass=(unorm_getFCD16(fcdTrieIndex, cMark) & 0xff);
maxIndex = (int32_t)index[(unorm_getFCD16(fcdTrieIndex, cMark) & 0xff)-1];
int32_t cClass=nfcImpl->getFCD16(cMark) & 0xff;
maxIndex = (int32_t)index[(nfcImpl->getFCD16(cMark) & 0xff)-1];
c.comp = comp;
c.decomp = decomp;
c.precomp = precomp;
@ -1898,7 +1904,7 @@ uprv_uca_addTailCanonicalClosures(tempUCATable *t,
// other combining mark combinations.
precomp[precompLen].cp=comp[0];
curClass = precomp[precompLen].cClass =
index[unorm_getFCD16(fcdTrieIndex, decomp[1]) & 0xff];
index[nfcImpl->getFCD16(decomp[1]) & 0xff];
precompLen++;
replacedPos=0;
for (decompLen=0; decompLen< (int32_t)el->cSize; decompLen++) {
@ -1938,7 +1944,7 @@ uprv_uca_addTailCanonicalClosures(tempUCATable *t,
// This is a fix for tailoring contractions with accented
// character at the end of contraction string.
if ((len>2) &&
(unorm_getFCD16(fcdTrieIndex, comp[len-2]) & 0xff00)==0) {
(nfcImpl->getFCD16(comp[len-2]) & 0xff00)==0) {
uprv_uca_addFCD4AccentedContractions(t, colEl, comp, len, &element, status);
}
@ -1967,8 +1973,6 @@ uprv_uca_canonicalClosure(tempUCATable *t,
UColToken *tok;
uint32_t i = 0, j = 0;
UChar baseChar, firstCM;
UChar32 fcdHighStart;
const uint16_t *fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
context.nfcImpl=Normalizer2Factory::getNFCImpl(*status);
if(U_FAILURE(*status)) {
return 0;
@ -2039,7 +2043,7 @@ uprv_uca_canonicalClosure(tempUCATable *t,
}
if(src->UCA != NULL) {
for(j = 0; j<el.cSize; j++) {
int16_t fcd = unorm_getFCD16(fcdTrieIndex, el.cPoints[j]);
int16_t fcd = context.nfcImpl->getFCD16(el.cPoints[j]);
if ( (fcd & 0xff) == 0 ) {
baseChar = el.cPoints[j]; // last base character
firstCM=0; // reset combining mark value

View file

@ -37,8 +37,7 @@ U_NAMESPACE_USE
#define SECOND_LAST_BYTE_SHIFT_ 8
#define SUPPLEMENTARY_MIN_VALUE_ 0x10000
static const uint16_t *fcdTrieIndex = NULL;
static UChar32 fcdHighStart = 0;
static const Normalizer2Impl *g_nfcImpl = NULL;
// internal methods -------------------------------------------------
@ -103,7 +102,7 @@ inline int hash(uint32_t ce)
U_CDECL_BEGIN
static UBool U_CALLCONV
usearch_cleanup(void) {
fcdTrieIndex = NULL;
g_nfcImpl = NULL;
return TRUE;
}
U_CDECL_END
@ -117,8 +116,8 @@ U_CDECL_END
static
inline void initializeFCD(UErrorCode *status)
{
if (fcdTrieIndex == NULL) {
fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);
if (g_nfcImpl == NULL) {
g_nfcImpl = Normalizer2Factory::getNFCImpl(*status);
ucln_i18n_registerCleanup(UCLN_I18N_USEARCH, usearch_cleanup);
}
}
@ -138,7 +137,7 @@ uint16_t getFCD(const UChar *str, int32_t *offset,
int32_t strlength)
{
const UChar *temp = str + *offset;
uint16_t result = unorm_nextFCD16(fcdTrieIndex, fcdHighStart, temp, str + strlength);
uint16_t result = g_nfcImpl->nextFCD16(temp, str + strlength);
*offset = (int32_t)(temp - str);
return result;
}