mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-7273 compute FC_NFKC_Closure on the fly
X-SVN-Rev: 27534
This commit is contained in:
parent
537b4141fa
commit
1cb38e859b
6 changed files with 60 additions and 41 deletions
|
@ -510,6 +510,11 @@ Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
|
|||
return allModes!=NULL ? &allModes->impl : NULL;
|
||||
}
|
||||
|
||||
const Normalizer2Impl *
|
||||
Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
|
||||
return &((Normalizer2WithImpl *)norm2)->impl;
|
||||
}
|
||||
|
||||
const UTrie2 *
|
||||
Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) {
|
||||
Norm2AllModes *allModes=
|
||||
|
|
|
@ -487,6 +487,10 @@ public:
|
|||
static const Normalizer2Impl *getNFKCImpl(UErrorCode &errorCode);
|
||||
static const Normalizer2Impl *getNFKC_CFImpl(UErrorCode &errorCode);
|
||||
|
||||
// Get the Impl instance of the Normalizer2.
|
||||
// Must be used only when it is known that norm2 is a Normalizer2WithImpl instance.
|
||||
static const Normalizer2Impl *getImpl(const Normalizer2 *norm2);
|
||||
|
||||
static const UTrie2 *getFCDTrie(UErrorCode &errorCode);
|
||||
private:
|
||||
Normalizer2Factory(); // No instantiation.
|
||||
|
|
|
@ -393,44 +393,6 @@ unorm_getCanonStartSet(UChar32 c, USerializedSet *fillSet) {
|
|||
return FALSE; /* not found */
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) {
|
||||
uint16_t aux;
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
if(_haveData(*pErrorCode) && auxTrie.index!=NULL) {
|
||||
aux=UTRIE2_GET16(&auxTrie, c);
|
||||
aux&=_NORM_AUX_FNC_MASK;
|
||||
} else {
|
||||
aux=0;
|
||||
}
|
||||
if(aux!=0) {
|
||||
const UChar *s;
|
||||
int32_t length;
|
||||
|
||||
s=(const UChar *)(extraData+aux);
|
||||
if(*s<0xff00) {
|
||||
/* s points to the single-unit string */
|
||||
length=1;
|
||||
} else {
|
||||
length=*s&0xff;
|
||||
++s;
|
||||
}
|
||||
if(0<length && length<=destCapacity) {
|
||||
uprv_memcpy(dest, s, length*U_SIZEOF_UCHAR);
|
||||
}
|
||||
return u_terminateUChars(dest, destCapacity, length, pErrorCode);
|
||||
} else {
|
||||
return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
unorm_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
|
||||
UChar c;
|
||||
|
|
|
@ -124,9 +124,6 @@ U_NAMESPACE_USE
|
|||
* Note that all of this is only a problem when case-folding _and_
|
||||
* canonical equivalence come together.
|
||||
* (Comments in unorm_compare() are more up to date than this TODO.)
|
||||
*
|
||||
* This function could be moved to a different source file, at increased cost
|
||||
* for calling the decomposition access function.
|
||||
*/
|
||||
|
||||
/* stack element for previous-level source/decomposition pointers */
|
||||
|
|
|
@ -586,6 +586,55 @@ uprops_getSource(UProperty which) {
|
|||
}
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) {
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
// Compute the FC_NFKC_Closure on the fly:
|
||||
// We have the API for complete coverage of Unicode properties, although
|
||||
// this value by itself is not useful via API.
|
||||
// (What could be useful is a custom normalization table that combines
|
||||
// case folding and NFKC.)
|
||||
// For the derivation, see Unicode's DerivedNormalizationProps.txt.
|
||||
const Normalizer2 *nfkc=Normalizer2Factory::getNFKCInstance(*pErrorCode);
|
||||
const UCaseProps *csp=ucase_getSingleton(pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
// first: b = NFKC(Fold(a))
|
||||
UnicodeString folded1String;
|
||||
const UChar *folded1;
|
||||
int32_t folded1Length=ucase_toFullFolding(csp, c, &folded1, U_FOLD_CASE_DEFAULT);
|
||||
if(folded1Length<0) {
|
||||
const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc);
|
||||
if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) {
|
||||
return u_terminateUChars(dest, destCapacity, 0, pErrorCode); // c does not change at all under CaseFolding+NFKC
|
||||
}
|
||||
folded1String.setTo(c);
|
||||
} else {
|
||||
if(folded1Length>UCASE_MAX_STRING_LENGTH) {
|
||||
folded1String.setTo(folded1Length);
|
||||
} else {
|
||||
folded1String.setTo(FALSE, folded1, folded1Length);
|
||||
}
|
||||
}
|
||||
UnicodeString kc1=nfkc->normalize(folded1String, *pErrorCode);
|
||||
// second: c = NFKC(Fold(b))
|
||||
UnicodeString folded2String(kc1);
|
||||
UnicodeString kc2=nfkc->normalize(folded2String.foldCase(), *pErrorCode);
|
||||
// if (c != b) add the mapping from a to c
|
||||
if(U_FAILURE(*pErrorCode) || kc1==kc2) {
|
||||
return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
|
||||
} else {
|
||||
return kc2.extract(dest, destCapacity, *pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------
|
||||
* Inclusions list
|
||||
*----------------------------------------------------------------*/
|
||||
|
|
|
@ -1342,6 +1342,8 @@ TestFCNFKCClosure(void) {
|
|||
UChar32 c;
|
||||
const UChar s[6];
|
||||
} tests[]={
|
||||
{ 0x00C4, { 0 } },
|
||||
{ 0x00E4, { 0 } },
|
||||
{ 0x037A, { 0x0020, 0x03B9, 0 } },
|
||||
{ 0x03D2, { 0x03C5, 0 } },
|
||||
{ 0x20A8, { 0x0072, 0x0073, 0 } },
|
||||
|
|
Loading…
Add table
Reference in a new issue