mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 14:05:32 +00:00
ICU-1754 add internal APIs for canonical equivalence
X-SVN-Rev: 7931
This commit is contained in:
parent
fae8399d33
commit
dee8abeeec
2 changed files with 52 additions and 1 deletions
|
@ -31,6 +31,8 @@
|
|||
#include "cmemory.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "umutex.h"
|
||||
#include "utrie.h"
|
||||
#include "uset.h"
|
||||
#include "unormimp.h"
|
||||
|
||||
/*
|
||||
|
@ -116,7 +118,8 @@ static UTrie normTrie={ 0,0,0,0,0,0,0 }, fcdTrie={ 0,0,0,0,0,0,0 }, auxTrie={ 0,
|
|||
* pointers into the memory-mapped unorm.dat
|
||||
*/
|
||||
static const uint16_t *extraData=NULL,
|
||||
*combiningTable=NULL;
|
||||
*combiningTable=NULL,
|
||||
*canonStartSets=NULL;
|
||||
|
||||
static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
|
||||
static UBool formatVersion_2_1=FALSE;
|
||||
|
@ -246,6 +249,9 @@ loadNormData(UErrorCode &errorCode) {
|
|||
extraData=(uint16_t *)((uint8_t *)(p+_NORM_INDEX_TOP)+indexes[_NORM_INDEX_TRIE_SIZE]);
|
||||
combiningTable=extraData+indexes[_NORM_INDEX_UCHAR_COUNT];
|
||||
formatVersion_2_1=formatVersion[0]>2 || (formatVersion[0]==2 && formatVersion[1]>=1);
|
||||
if(formatVersion_2_1) {
|
||||
canonStartSets=combiningTable+(indexes[_NORM_INDEX_FCD_TRIE_SIZE]+indexes[_NORM_INDEX_AUX_TRIE_SIZE])/2;
|
||||
}
|
||||
haveNormData=1;
|
||||
|
||||
/* if a different thread set it first, then close the extra data */
|
||||
|
@ -544,6 +550,36 @@ unorm_internalIsFullCompositionExclusion(UChar32 c) {
|
|||
}
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
unorm_isCanonSafeStart(UChar32 c) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
if(_haveData(errorCode) && formatVersion_2_1) {
|
||||
uint32_t aux32;
|
||||
|
||||
UTRIE_GET32(&auxTrie, c, aux32);
|
||||
return (UBool)((aux32&_NORM_AUX_UNSAFE_MASK)==0);
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
unorm_getCanonStartSet(UChar32 c, USerializedSet *fillSet) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
if(fillSet!=NULL && _haveData(errorCode) && canonStartSets!=NULL) {
|
||||
uint32_t aux32;
|
||||
|
||||
UTRIE_GET32(&auxTrie, c, aux32);
|
||||
aux32&=_NORM_AUX_CANON_SET_MASK;
|
||||
return aux32!=0 &&
|
||||
uset_getSerializedSet(fillSet,
|
||||
canonStartSets+aux32,
|
||||
indexes[indexes[_NORM_INDEX_CANON_SET_COUNT]]-aux32);
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/* reorder UTF-16 in-place -------------------------------------------------- */
|
||||
|
||||
/*
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include "unicode/uiter.h"
|
||||
#include "unicode/unorm.h"
|
||||
#include "utrie.h"
|
||||
#include "uset.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
/*
|
||||
|
@ -248,6 +249,20 @@ U_NAMESPACE_END
|
|||
U_CAPI UBool U_EXPORT2
|
||||
unorm_internalIsFullCompositionExclusion(UChar32 c);
|
||||
|
||||
/**
|
||||
* Internal API, used by enumeration of canonically equivalent strings
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
unorm_isCanonSafeStart(UChar32 c);
|
||||
|
||||
/**
|
||||
* Internal API, used by enumeration of canonically equivalent strings
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
unorm_getCanonStartSet(UChar32 c, USerializedSet *fillSet);
|
||||
|
||||
/**
|
||||
* Description of the format of unorm.dat version 2.1.
|
||||
*
|
||||
|
|
Loading…
Add table
Reference in a new issue