ICU-7273 compute FC_NFKC_Closure on the fly

X-SVN-Rev: 27534
This commit is contained in:
Markus Scherer 2010-02-10 23:05:39 +00:00
parent 537b4141fa
commit 1cb38e859b
6 changed files with 60 additions and 41 deletions

View file

@ -510,6 +510,11 @@ Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
return allModes!=NULL ? &allModes->impl : NULL;
}
const Normalizer2Impl *
Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
return &((Normalizer2WithImpl *)norm2)->impl;
}
const UTrie2 *
Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) {
Norm2AllModes *allModes=

View file

@ -487,6 +487,10 @@ public:
static const Normalizer2Impl *getNFKCImpl(UErrorCode &errorCode);
static const Normalizer2Impl *getNFKC_CFImpl(UErrorCode &errorCode);
// Get the Impl instance of the Normalizer2.
// Must be used only when it is known that norm2 is a Normalizer2WithImpl instance.
static const Normalizer2Impl *getImpl(const Normalizer2 *norm2);
static const UTrie2 *getFCDTrie(UErrorCode &errorCode);
private:
Normalizer2Factory(); // No instantiation.

View file

@ -393,44 +393,6 @@ unorm_getCanonStartSet(UChar32 c, USerializedSet *fillSet) {
return FALSE; /* not found */
}
U_CAPI int32_t U_EXPORT2
u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) {
uint16_t aux;
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
if(_haveData(*pErrorCode) && auxTrie.index!=NULL) {
aux=UTRIE2_GET16(&auxTrie, c);
aux&=_NORM_AUX_FNC_MASK;
} else {
aux=0;
}
if(aux!=0) {
const UChar *s;
int32_t length;
s=(const UChar *)(extraData+aux);
if(*s<0xff00) {
/* s points to the single-unit string */
length=1;
} else {
length=*s&0xff;
++s;
}
if(0<length && length<=destCapacity) {
uprv_memcpy(dest, s, length*U_SIZEOF_UCHAR);
}
return u_terminateUChars(dest, destCapacity, length, pErrorCode);
} else {
return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
}
}
U_CAPI void U_EXPORT2
unorm_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
UChar c;

View file

@ -124,9 +124,6 @@ U_NAMESPACE_USE
* Note that all of this is only a problem when case-folding _and_
* canonical equivalence come together.
* (Comments in unorm_compare() are more up to date than this TODO.)
*
* This function could be moved to a different source file, at increased cost
* for calling the decomposition access function.
*/
/* stack element for previous-level source/decomposition pointers */

View file

@ -586,6 +586,55 @@ uprops_getSource(UProperty which) {
}
}
U_CAPI int32_t U_EXPORT2
u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) {
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
// Compute the FC_NFKC_Closure on the fly:
// We have the API for complete coverage of Unicode properties, although
// this value by itself is not useful via API.
// (What could be useful is a custom normalization table that combines
// case folding and NFKC.)
// For the derivation, see Unicode's DerivedNormalizationProps.txt.
const Normalizer2 *nfkc=Normalizer2Factory::getNFKCInstance(*pErrorCode);
const UCaseProps *csp=ucase_getSingleton(pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return 0;
}
// first: b = NFKC(Fold(a))
UnicodeString folded1String;
const UChar *folded1;
int32_t folded1Length=ucase_toFullFolding(csp, c, &folded1, U_FOLD_CASE_DEFAULT);
if(folded1Length<0) {
const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc);
if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) {
return u_terminateUChars(dest, destCapacity, 0, pErrorCode); // c does not change at all under CaseFolding+NFKC
}
folded1String.setTo(c);
} else {
if(folded1Length>UCASE_MAX_STRING_LENGTH) {
folded1String.setTo(folded1Length);
} else {
folded1String.setTo(FALSE, folded1, folded1Length);
}
}
UnicodeString kc1=nfkc->normalize(folded1String, *pErrorCode);
// second: c = NFKC(Fold(b))
UnicodeString folded2String(kc1);
UnicodeString kc2=nfkc->normalize(folded2String.foldCase(), *pErrorCode);
// if (c != b) add the mapping from a to c
if(U_FAILURE(*pErrorCode) || kc1==kc2) {
return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
} else {
return kc2.extract(dest, destCapacity, *pErrorCode);
}
}
/*----------------------------------------------------------------
* Inclusions list
*----------------------------------------------------------------*/

View file

@ -1342,6 +1342,8 @@ TestFCNFKCClosure(void) {
UChar32 c;
const UChar s[6];
} tests[]={
{ 0x00C4, { 0 } },
{ 0x00E4, { 0 } },
{ 0x037A, { 0x0020, 0x03B9, 0 } },
{ 0x03D2, { 0x03C5, 0 } },
{ 0x20A8, { 0x0072, 0x0073, 0 } },