mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-1007 replace old normalization implementation by new one, use unorm_quickCheck(UNORM_FCD) instead of the temporary checkFCD()
X-SVN-Rev: 5489
This commit is contained in:
parent
44283cc553
commit
6eb5998fc1
5 changed files with 258 additions and 1032 deletions
icu4c/source
|
@ -151,59 +151,33 @@ Normalizer::normalize(const UnicodeString& source,
|
|||
EMode mode,
|
||||
int32_t options,
|
||||
UnicodeString& result,
|
||||
UErrorCode &status)
|
||||
{
|
||||
if (quickCheck(source, mode, status) == UNORM_YES)
|
||||
{
|
||||
result = source;
|
||||
return;
|
||||
}
|
||||
|
||||
/* ### TODO: begin new implementation */
|
||||
if(unorm_usesNewImplementation()) {
|
||||
if(source.isBogus()) {
|
||||
UErrorCode &status) {
|
||||
if(source.isBogus()) {
|
||||
result.setToBogus();
|
||||
} else {
|
||||
/* make sure that we do not operate on the same buffer in source and result */
|
||||
result.cloneArrayIfNeeded(-1, source.length()+20, FALSE);
|
||||
result.fLength=unorm_internalNormalize(result.fArray, result.fCapacity,
|
||||
source.fArray, source.fLength,
|
||||
getUNormalizationMode(mode, status), (options&IGNORE_HANGUL)!=0,
|
||||
UnicodeString::growBuffer, &result,
|
||||
&status);
|
||||
if(U_FAILURE(status)) {
|
||||
result.setToBogus();
|
||||
} else {
|
||||
/* make sure that we do not operate on the same buffer in source and result */
|
||||
result.cloneArrayIfNeeded(-1, source.length()+20, FALSE);
|
||||
result.fLength=unorm_internalNormalize(result.fArray, result.fCapacity,
|
||||
source.fArray, source.fLength,
|
||||
getUNormalizationMode(mode, status), (options&IGNORE_HANGUL)!=0,
|
||||
UnicodeString::growBuffer, &result,
|
||||
&status);
|
||||
if(U_FAILURE(status)) {
|
||||
result.setToBogus();
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
/* ### end new implementation */
|
||||
|
||||
switch (mode) {
|
||||
case NO_OP:
|
||||
result = source;
|
||||
break;
|
||||
case COMPOSE:
|
||||
case COMPOSE_COMPAT:
|
||||
compose(source, (mode & COMPAT_BIT) != 0, options, result, status);
|
||||
break;
|
||||
case DECOMP:
|
||||
case DECOMP_COMPAT:
|
||||
decompose(source, (mode & COMPAT_BIT) != 0, options, result, status);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
UNormalizationCheckResult
|
||||
Normalizer::quickCheck(const UnicodeString& source,
|
||||
Normalizer::EMode mode,
|
||||
UErrorCode &status)
|
||||
{
|
||||
if (U_FAILURE(status))
|
||||
return UNORM_MAYBE;
|
||||
UErrorCode &status) {
|
||||
if(U_FAILURE(status)) {
|
||||
return UNORM_MAYBE;
|
||||
}
|
||||
|
||||
return unorm_quickCheck(source.fArray, source.length(),
|
||||
getUNormalizationMode(mode, status), &status);
|
||||
return unorm_quickCheck(source.fArray, source.length(),
|
||||
getUNormalizationMode(mode, status), &status);
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
|
@ -239,165 +213,19 @@ Normalizer::compose(const UnicodeString& source,
|
|||
UBool compat,
|
||||
int32_t options,
|
||||
UnicodeString& result,
|
||||
UErrorCode &status)
|
||||
{
|
||||
/* ### TODO: begin new implementation */
|
||||
if(unorm_usesNewImplementation()) {
|
||||
if(source.isBogus()) {
|
||||
UErrorCode &status) {
|
||||
if(source.isBogus()) {
|
||||
result.setToBogus();
|
||||
} else {
|
||||
/* make sure that we do not operate on the same buffer in source and result */
|
||||
result.cloneArrayIfNeeded(-1, source.length()+20, FALSE);
|
||||
result.fLength=unorm_compose(result.fArray, result.fCapacity,
|
||||
source.fArray, source.fLength,
|
||||
compat, (options&IGNORE_HANGUL)!=0,
|
||||
UnicodeString::growBuffer, &result,
|
||||
&status);
|
||||
if(U_FAILURE(status)) {
|
||||
result.setToBogus();
|
||||
} else {
|
||||
/* make sure that we do not operate on the same buffer in source and result */
|
||||
result.cloneArrayIfNeeded(-1, source.length()+20, FALSE);
|
||||
result.fLength=unorm_compose(result.fArray, result.fCapacity,
|
||||
source.fArray, source.fLength,
|
||||
compat, (options&IGNORE_HANGUL)!=0,
|
||||
UnicodeString::growBuffer, &result,
|
||||
&status);
|
||||
if(U_FAILURE(status)) {
|
||||
result.setToBogus();
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
/* ### end new implementation */
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
result.truncate(0);
|
||||
UnicodeString explodeBuf;
|
||||
|
||||
UTextOffset explodePos = EMPTY; // Position in input buffer
|
||||
UTextOffset basePos = 0; // Position of last base in output string
|
||||
uint16_t baseIndex = 0; // Index of last base in "actions" array
|
||||
uint32_t classesSeen[2]; // Combining classes seen since last base
|
||||
uint16_t action;
|
||||
|
||||
// Compatibility explosions have lower indices; skip them if necessary
|
||||
uint16_t minExplode = (uint16_t)(compat ? 0 : ComposeData::MAX_COMPAT);
|
||||
uint16_t minDecompLocal = (uint16_t)(compat ? 0 : DecompData::MAX_COMPAT);
|
||||
|
||||
UTextOffset i = 0;
|
||||
|
||||
emptyBitmask64(classesSeen);
|
||||
while (i < source.length() || explodePos != EMPTY) {
|
||||
// Get the next char from either the buffer or the source
|
||||
UChar ch;
|
||||
if (explodePos == EMPTY) {
|
||||
ch = source[i++];
|
||||
} else {
|
||||
ch = explodeBuf[explodePos++];
|
||||
if (explodePos >= explodeBuf.length()) {
|
||||
explodePos = EMPTY;
|
||||
explodeBuf.truncate(0);
|
||||
}
|
||||
}
|
||||
|
||||
// Get the basic info for the character
|
||||
uint16_t charInfo = composeLookup(ch);
|
||||
uint16_t type = (uint16_t)(charInfo & ComposeData::TYPE_MASK);
|
||||
uint16_t index = (uint16_t)(charInfo >> ComposeData::INDEX_SHIFT);
|
||||
|
||||
if (type == ComposeData::BASE ||
|
||||
(type == ComposeData::NON_COMPOSING_COMBINING && index < minExplode)) {
|
||||
emptyBitmask64(classesSeen);
|
||||
baseIndex = index;
|
||||
basePos = result.length();
|
||||
result += ch;
|
||||
}
|
||||
else if (type == ComposeData::COMBINING)
|
||||
{
|
||||
uint32_t cclass = ComposeData::typeBit[index]; // 0..63
|
||||
|
||||
// We can only combine a character with the base if we haven't
|
||||
// already seen a combining character with the same canonical class.
|
||||
// We also only combine characters with an index from
|
||||
// 1..COMBINING_COUNT-1. Indices >= COMBINING_COUNT are
|
||||
// non-combining; these formerly had an index of zero.
|
||||
if (index < ComposeData::COMBINING_COUNT
|
||||
&& !isSetBitmask64(classesSeen, cclass)
|
||||
&& (action = composeAction(baseIndex, index)) > 0)
|
||||
{
|
||||
if (action > ComposeData::MAX_COMPOSED) {
|
||||
// Pairwise explosion. Actions above this value are really
|
||||
// indices into an array that in turn contains indices
|
||||
// into the exploding string table
|
||||
// TODO: What if there are unprocessed chars in the explode buffer?
|
||||
UChar newBase = pairExplode(explodeBuf, action);
|
||||
explodePos = 0;
|
||||
result[basePos] = newBase;
|
||||
|
||||
baseIndex = (uint16_t)(composeLookup(newBase) >> ComposeData::INDEX_SHIFT);
|
||||
} else {
|
||||
// Normal pairwise combination. Replace the base char
|
||||
UChar newBase = (UChar) action;
|
||||
result[basePos] = newBase;
|
||||
|
||||
baseIndex = (uint16_t)(composeLookup(newBase) >> ComposeData::INDEX_SHIFT);
|
||||
}
|
||||
//
|
||||
// Since there are Unicode characters that cannot be combined in arbitrary
|
||||
// order, we have to re-process any combining marks that go with this
|
||||
// base character. There are only four characters in Unicode that have
|
||||
// this problem. If they are fixed in Unicode 3.0, this code can go away.
|
||||
//
|
||||
UTextOffset len = result.length();
|
||||
if (len - basePos > 1) {
|
||||
for (UTextOffset j = basePos+1; j < len; j++) {
|
||||
explodeBuf += result[j];
|
||||
}
|
||||
result.truncate(basePos+1);
|
||||
emptyBitmask64(classesSeen);
|
||||
if (explodePos == EMPTY) explodePos = 0;
|
||||
}
|
||||
} else {
|
||||
// No combination with this character
|
||||
bubbleAppend(result, ch, cclass);
|
||||
setBitmask64(classesSeen, cclass);
|
||||
}
|
||||
}
|
||||
else if (index > minExplode) {
|
||||
// Single exploding character
|
||||
explode(explodeBuf, index);
|
||||
explodePos = 0;
|
||||
}
|
||||
else if (type == ComposeData::HANGUL && minExplode == 0) {
|
||||
// If we're in compatibility mode we need to decompose Hangul to Jamo,
|
||||
// because some of the Jamo might have compatibility decompositions.
|
||||
hangulToJamo(ch, explodeBuf, minDecompLocal);
|
||||
explodePos = 0;
|
||||
}
|
||||
else if (type == ComposeData::INITIAL_JAMO) {
|
||||
emptyBitmask64(classesSeen);
|
||||
baseIndex = ComposeData::INITIAL_JAMO_INDEX;
|
||||
basePos = result.length();
|
||||
result += ch;
|
||||
}
|
||||
else if (type == ComposeData::MEDIAL_JAMO
|
||||
&& isEmptyBitmask64(classesSeen)
|
||||
&& baseIndex == ComposeData::INITIAL_JAMO_INDEX) {
|
||||
// If the last character was an initial jamo, we can combine it with this
|
||||
// one to create a Hangul character.
|
||||
uint16_t l = (uint16_t)(result[basePos] - (UChar)JAMO_LBASE);
|
||||
uint16_t v = (uint16_t)(ch - JAMO_VBASE);
|
||||
result[basePos] = (UChar)(HANGUL_BASE + (l*JAMO_VCOUNT + v) * JAMO_TCOUNT);
|
||||
|
||||
baseIndex = ComposeData::MEDIAL_JAMO_INDEX;
|
||||
}
|
||||
else if (type == ComposeData::FINAL_JAMO
|
||||
&& isEmptyBitmask64(classesSeen)
|
||||
&& baseIndex == ComposeData::MEDIAL_JAMO_INDEX) {
|
||||
// If the last character was a medial jamo that we turned into Hangul,
|
||||
// we can add this character too.
|
||||
result[basePos] = (UChar)(result[basePos] + (ch - JAMO_TBASE));
|
||||
|
||||
baseIndex = 0;
|
||||
basePos = -1;
|
||||
emptyBitmask64(classesSeen);
|
||||
} else {
|
||||
baseIndex = 0;
|
||||
basePos = -1;
|
||||
emptyBitmask64(classesSeen);
|
||||
result += ch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -707,68 +535,21 @@ Normalizer::decompose(const UnicodeString& source,
|
|||
UBool compat,
|
||||
int32_t options,
|
||||
UnicodeString& result,
|
||||
UErrorCode &status)
|
||||
{
|
||||
/* ### TODO: begin new implementation */
|
||||
if(unorm_usesNewImplementation()) {
|
||||
if(source.isBogus()) {
|
||||
UErrorCode &status) {
|
||||
if(source.isBogus()) {
|
||||
result.setToBogus();
|
||||
} else {
|
||||
/* make sure that we do not operate on the same buffer in source and result */
|
||||
result.cloneArrayIfNeeded(-1, source.length()+20, FALSE);
|
||||
result.fLength=unorm_decompose(result.fArray, result.fCapacity,
|
||||
source.fArray, source.fLength,
|
||||
compat, (options&IGNORE_HANGUL)!=0,
|
||||
UnicodeString::growBuffer, &result,
|
||||
&status);
|
||||
if(U_FAILURE(status)) {
|
||||
result.setToBogus();
|
||||
} else {
|
||||
/* make sure that we do not operate on the same buffer in source and result */
|
||||
result.cloneArrayIfNeeded(-1, source.length()+20, FALSE);
|
||||
result.fLength=unorm_decompose(result.fArray, result.fCapacity,
|
||||
source.fArray, source.fLength,
|
||||
compat, (options&IGNORE_HANGUL)!=0,
|
||||
UnicodeString::growBuffer, &result,
|
||||
&status);
|
||||
if(U_FAILURE(status)) {
|
||||
result.setToBogus();
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
/* ### end new implementation */
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
UBool hangul = (options & IGNORE_HANGUL) == 0;
|
||||
uint16_t minDecompLocal = (uint16_t)(compat ? 0 : DecompData::MAX_COMPAT);
|
||||
UnicodeString buffer;
|
||||
int32_t i = 0, bufPtr = -1;
|
||||
|
||||
result.truncate(0);
|
||||
|
||||
// Rewritten - Liu
|
||||
while (i < source.length() || bufPtr >= 0) {
|
||||
UChar ch;
|
||||
|
||||
if (bufPtr >= 0) {
|
||||
ch = buffer.charAt(bufPtr++);
|
||||
if (bufPtr == buffer.length()) {
|
||||
bufPtr = -1;
|
||||
}
|
||||
} else {
|
||||
ch = source[i++];
|
||||
}
|
||||
|
||||
uint16_t offset = ucmp16_getu(DecompData::offsets, ch);
|
||||
uint16_t index = (uint16_t)(offset & DecompData::DECOMP_MASK);
|
||||
|
||||
if (index > minDecompLocal) {
|
||||
if ((offset & DecompData::DECOMP_RECURSE) != 0) {
|
||||
buffer.truncate(0);
|
||||
doAppend((const UChar*)DecompData::contents, index, buffer);
|
||||
bufPtr = 0;
|
||||
} else {
|
||||
doAppend((const UChar*)DecompData::contents, index, result);
|
||||
}
|
||||
} else if (ch >= HANGUL_BASE && ch < HANGUL_LIMIT && hangul) {
|
||||
hangulToJamo(ch, result, minDecompLocal);
|
||||
} else {
|
||||
result += ch;
|
||||
}
|
||||
}
|
||||
fixCanonical(result);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -16,6 +16,10 @@
|
|||
* 02/23/01 synwee Modified quickcheck and checkFCE to run through
|
||||
* string for codepoints < 0x300 for the normalization
|
||||
* mode NFC.
|
||||
* 06/20/01+ Markus Scherer total rewrite, implement all normalization here
|
||||
* instead of just wrappers around normlzr.cpp,
|
||||
* load unorm.dat, support Unicode 3.1 with
|
||||
* supplementary code points, etc.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
@ -28,24 +32,7 @@
|
|||
#include "umutex.h"
|
||||
#include "unormimp.h"
|
||||
|
||||
/* added by synwee ### TODO: remove once the new implementation is finished */
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/utf16.h"
|
||||
|
||||
/* ### TODO: remove this once the new implementation is finished */
|
||||
static UBool useNewImplementation=FALSE;
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
unorm_setNewImplementation(UBool useNew) {
|
||||
useNewImplementation=useNew;
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
unorm_usesNewImplementation() {
|
||||
return useNewImplementation;
|
||||
}
|
||||
|
||||
/* new implementation ------------------------------------------------------- */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/* Korean Hangul and Jamo constants */
|
||||
enum {
|
||||
|
@ -181,6 +168,15 @@ unorm_haveData(UErrorCode *pErrorCode) {
|
|||
return _haveData(*pErrorCode);
|
||||
}
|
||||
|
||||
U_CAPI const uint16_t * U_EXPORT2
|
||||
unorm_getFCDTrie(UErrorCode *pErrorCode) {
|
||||
if(_haveData(*pErrorCode)) {
|
||||
return fcdTrieIndex;
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* data access primitives --------------------------------------------------- */
|
||||
|
||||
inline uint32_t
|
||||
|
@ -625,8 +621,8 @@ unorm_checkFCD(const UChar *src, int32_t srcLength) {
|
|||
}
|
||||
}
|
||||
|
||||
static UNormalizationCheckResult
|
||||
_unorm_quickCheck(const UChar *src,
|
||||
U_CAPI UNormalizationCheckResult U_EXPORT2
|
||||
unorm_quickCheck(const UChar *src,
|
||||
int32_t srcLength,
|
||||
UNormalizationMode mode,
|
||||
UErrorCode *pErrorCode) {
|
||||
|
@ -751,7 +747,7 @@ U_CFUNC int32_t
|
|||
unorm_decompose(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBool compat, UBool ignoreHangul,
|
||||
GrowBuffer *growBuffer, void *context,
|
||||
UGrowBuffer *growBuffer, void *context,
|
||||
UErrorCode *pErrorCode) {
|
||||
UChar buffer[3];
|
||||
const UChar *limit, *prevSrc, *p;
|
||||
|
@ -1046,7 +1042,7 @@ _findSafeFCD(const UChar *src, const UChar *limit, uint16_t fcd16) {
|
|||
static uint8_t
|
||||
_decomposeFCD(const UChar *src, const UChar *decompLimit, const UChar *limit,
|
||||
UChar *dest, int32_t &destIndex, int32_t &destCapacity,
|
||||
UBool canGrow, GrowBuffer *growBuffer, void *context) {
|
||||
UBool canGrow, UGrowBuffer *growBuffer, void *context) {
|
||||
UChar *reorderStart;
|
||||
const UChar *p;
|
||||
uint32_t norm32;
|
||||
|
@ -1167,7 +1163,7 @@ _decomposeFCD(const UChar *src, const UChar *decompLimit, const UChar *limit,
|
|||
static int32_t
|
||||
unorm_makeFCD(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
GrowBuffer *growBuffer, void *context,
|
||||
UGrowBuffer *growBuffer, void *context,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UChar *limit, *prevSrc, *decompStart;
|
||||
int32_t destIndex, length;
|
||||
|
@ -1989,7 +1985,7 @@ U_CFUNC int32_t
|
|||
unorm_compose(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBool compat, UBool /* ### TODO: need to do this? -- ignoreHangul -- ### */,
|
||||
GrowBuffer *growBuffer, void *context,
|
||||
UGrowBuffer *growBuffer, void *context,
|
||||
UErrorCode *pErrorCode) {
|
||||
UChar stackBuffer[_STACK_BUFFER_CAPACITY];
|
||||
UChar *buffer;
|
||||
|
@ -2271,7 +2267,7 @@ U_CFUNC int32_t
|
|||
unorm_internalNormalize(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UNormalizationMode mode, UBool ignoreHangul,
|
||||
GrowBuffer *growBuffer, void *context,
|
||||
UGrowBuffer *growBuffer, void *context,
|
||||
UErrorCode *pErrorCode) {
|
||||
switch(mode) {
|
||||
case UNORM_NFD:
|
||||
|
@ -2329,638 +2325,36 @@ unorm_internalNormalize(UChar *dest, int32_t destCapacity,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* old implementation ------------------------------------------------------- */
|
||||
|
||||
/* added by synwee for trie manipulation*/
|
||||
#define STAGE_1_SHIFT_ 10
|
||||
#define STAGE_2_SHIFT_ 4
|
||||
#define STAGE_2_MASK_AFTER_SHIFT_ 0x3F
|
||||
#define STAGE_3_MASK_ 0xF
|
||||
#define LAST_BYTE_MASK_ 0xFF
|
||||
#define SECOND_LAST_BYTE_SHIFT_ 8
|
||||
|
||||
/* added by synwee for fast route in quickcheck and fcd */
|
||||
#define NFC_ZERO_CC_BLOCK_LIMIT_ 0x300
|
||||
|
||||
/*
|
||||
* for a description of the file format,
|
||||
* see icu/source/tools/genqchk/genqchk.c
|
||||
*/
|
||||
#define QCHK_DATA_NAME "qchk"
|
||||
#define FCHK_DATA_NAME "fchk"
|
||||
#define DATA_TYPE "dat"
|
||||
|
||||
static UDataMemory *quickcheckData = NULL;
|
||||
static UDataMemory *fcdcheckData = NULL;
|
||||
|
||||
/**
|
||||
* Authentication values
|
||||
*/
|
||||
static const uint8_t QCHK_DATA_FORMAT_[] = {0x71, 0x63, 0x68, 0x6b};
|
||||
static const uint8_t FCHK_DATA_FORMAT_[] = {0x66, 0x63, 0x68, 0x6b};
|
||||
static const uint8_t QCHK_FORMAT_VERSION_[] = {1, 0, 0, 0};
|
||||
static const uint8_t FCHK_FORMAT_VERSION_[] = {1, 0, 0, 0};
|
||||
|
||||
/**
|
||||
* index values loaded from qchk.dat.
|
||||
* static uint16_t indexes[8];
|
||||
*/
|
||||
enum {
|
||||
QCHK_INDEX_STAGE_2_BITS,
|
||||
QCHK_INDEX_STAGE_3_BITS,
|
||||
QCHK_INDEX_MIN_VALUES_SIZE,
|
||||
QCHK_INDEX_STAGE_1_INDEX,
|
||||
QCHK_INDEX_STAGE_2_INDEX,
|
||||
QCHK_INDEX_STAGE_3_INDEX
|
||||
};
|
||||
|
||||
/**
|
||||
* index values loaded from qchk.dat.
|
||||
* static uint16_t indexes[8];
|
||||
*/
|
||||
enum {
|
||||
FCHK_INDEX_STAGE_2_BITS,
|
||||
FCHK_INDEX_STAGE_3_BITS,
|
||||
FCHK_INDEX_STAGE_1_INDEX,
|
||||
FCHK_INDEX_STAGE_2_INDEX,
|
||||
FCHK_INDEX_STAGE_3_INDEX
|
||||
};
|
||||
|
||||
/**
|
||||
* Array of mask for determining normalization quick check values.
|
||||
* Indexes follows the values in UNormalizationMode
|
||||
*/
|
||||
static const uint8_t QCHK_MASK_[] = {0, 0, 0x11, 0x22, 0x44, 0x88};
|
||||
/**
|
||||
* Array of minimum codepoints that has UNORM_MAYBE or UNORM_NO quick check
|
||||
* values. Indexes follows the values in UNormalizationMode.
|
||||
* Generated values! Edit at your own risk.
|
||||
*/
|
||||
static const UChar32 *QCHK_MIN_VALUES_;
|
||||
|
||||
/**
|
||||
* Flag to indicate if data has been loaded
|
||||
*/
|
||||
static UBool isQuickCheckLoaded = FALSE;
|
||||
static UBool isFCDCheckLoaded = FALSE;
|
||||
|
||||
/**
|
||||
* Minimum value to determine if quickcheck value contains a MAYBE
|
||||
*/
|
||||
static const uint8_t MIN_UNORM_MAYBE_ = 0x10;
|
||||
|
||||
/**
|
||||
* Array of normalization form corresponding to the index code point.
|
||||
* Hence codepoint 0xABCD will have normalization form QUICK_CHECK_DATA[0xABCD].
|
||||
* UQUICK_CHECK_DATA[0xABCD] is a byte containing 2 sets of 4 bits information
|
||||
* representing UNORM_MAYBE and UNORM_YES.<br>
|
||||
* bits 1 2 3 4 5678<br>
|
||||
* NFKC NFC NFKD NFD MAYBES NFKC NFC NFKD NFD YES<br>
|
||||
* ie if UQUICK_CHECK_DATA[0xABCD] = 10000001, this means that 0xABCD is in
|
||||
* NFD form and maybe in NFKC form
|
||||
*/
|
||||
static const uint16_t *QCHK_STAGE_1_;
|
||||
static const uint16_t *QCHK_STAGE_2_;
|
||||
static const uint8_t *QCHK_STAGE_3_;
|
||||
|
||||
/**
|
||||
* Trie data for FCD.
|
||||
* Each index corresponds to each code point.
|
||||
* Trie value is the combining class of the first and the last character of the
|
||||
* NFD of the codepoint.
|
||||
* size uint16_t for the first 2 stages instead of uint32_t to reduce size.
|
||||
*/
|
||||
static const uint16_t *FCHK_STAGE_1_;
|
||||
static const uint16_t *FCHK_STAGE_2_;
|
||||
static const uint16_t *FCHK_STAGE_3_;
|
||||
|
||||
/** Public API for normalizing. */
|
||||
U_CAPI int32_t
|
||||
unorm_normalize(const UChar* src,
|
||||
int32_t srcLength,
|
||||
UNormalizationMode mode,
|
||||
int32_t option,
|
||||
UChar* dest,
|
||||
int32_t destCapacity,
|
||||
UErrorCode* pErrorCode)
|
||||
{
|
||||
if(useNewImplementation) {
|
||||
/* check argument values */
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if( destCapacity<0 || (dest==NULL && destCapacity>0) ||
|
||||
src==NULL || srcLength<-1
|
||||
) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* check for overlapping src and destination */
|
||||
/* ### TODO: real API may provide a temp buffer */
|
||||
if( (src>=dest && src<(dest+destCapacity)) ||
|
||||
(srcLength>0 && dest>=src && dest<(src+srcLength))
|
||||
) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return unorm_internalNormalize(dest, destCapacity,
|
||||
src, srcLength,
|
||||
mode, (UBool)((option&UNORM_IGNORE_HANGUL)!=0),
|
||||
NULL, NULL,
|
||||
pErrorCode);
|
||||
unorm_normalize(const UChar *src, int32_t srcLength,
|
||||
UNormalizationMode mode, int32_t option,
|
||||
UChar *dest, int32_t destCapacity,
|
||||
UErrorCode *pErrorCode) {
|
||||
/* check argument values */
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) return -1;
|
||||
if( destCapacity<0 || (dest==NULL && destCapacity>0) ||
|
||||
src==NULL || srcLength<-1
|
||||
) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* synwee : removed hard coded conversion */
|
||||
Normalizer::EMode normMode = Normalizer::getNormalizerEMode(mode, *pErrorCode);
|
||||
if (U_FAILURE(*pErrorCode))
|
||||
return -1;
|
||||
/* check for overlapping src and destination */
|
||||
/* ### TODO: real API may provide a temp buffer */
|
||||
if( (src>=dest && src<(dest+destCapacity)) ||
|
||||
(srcLength>0 && dest>=src && dest<(src+srcLength))
|
||||
) {
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t len = (srcLength == -1 ? u_strlen(src) : srcLength);
|
||||
const UnicodeString source(srcLength == -1, src, len);
|
||||
UnicodeString dst(dest, 0, destCapacity);
|
||||
/* synwee : note quickcheck is added in C ++ normalize method */
|
||||
if ((option & UNORM_IGNORE_HANGUL) != 0)
|
||||
option = Normalizer::IGNORE_HANGUL;
|
||||
Normalizer::normalize(source, normMode, option, dst, *pErrorCode);
|
||||
return uprv_fillOutputString(dst, dest, destCapacity, pErrorCode);
|
||||
}
|
||||
|
||||
static UBool U_CALLCONV
|
||||
isQuickCheckAcceptable(void *context,
|
||||
const char *type, const char *name,
|
||||
const UDataInfo *pInfo) {
|
||||
if (pInfo->size >= 20 &&
|
||||
pInfo->isBigEndian == U_IS_BIG_ENDIAN &&
|
||||
pInfo->charsetFamily == U_CHARSET_FAMILY &&
|
||||
(uprv_memcmp(pInfo->dataFormat, QCHK_DATA_FORMAT_,
|
||||
sizeof(QCHK_DATA_FORMAT_)) == 0) &&
|
||||
/*
|
||||
pInfo->dataFormat[0] == 0x71 &&
|
||||
pInfo->dataFormat[1] == 0x63 &&
|
||||
pInfo->dataFormat[2] == 0x68 &&
|
||||
pInfo->dataFormat[3] == 0x6b &&
|
||||
pInfo->formatVersion[0] == 1
|
||||
*/
|
||||
(uprv_memcmp(pInfo->formatVersion, QCHK_FORMAT_VERSION_,
|
||||
sizeof(QCHK_FORMAT_VERSION_)) == 0)) {
|
||||
return TRUE;
|
||||
} else {
|
||||
context = NULL;
|
||||
type = NULL;
|
||||
name = NULL;
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
static UBool
|
||||
loadQuickCheckData(UErrorCode *error) {
|
||||
/* load quickcheck data from file if necessary */
|
||||
if (!isQuickCheckLoaded && U_SUCCESS(*error)) {
|
||||
UDataMemory *data;
|
||||
|
||||
/* open the data outside the mutex block */
|
||||
data = udata_openChoice(NULL, DATA_TYPE, QCHK_DATA_NAME,
|
||||
isQuickCheckAcceptable, NULL, error);
|
||||
if (U_FAILURE(*error)) {
|
||||
return isQuickCheckLoaded = FALSE;
|
||||
}
|
||||
|
||||
/* in the mutex block, set the data for this process */
|
||||
umtx_lock(NULL);
|
||||
if (quickcheckData == NULL) {
|
||||
const uint16_t *temp = (const uint16_t *)udata_getMemory(data);
|
||||
const uint16_t *indexes = temp;
|
||||
|
||||
quickcheckData = data;
|
||||
|
||||
temp += 8;
|
||||
QCHK_MIN_VALUES_ = (const UChar32 *)temp;
|
||||
QCHK_STAGE_1_ = temp + indexes[QCHK_INDEX_STAGE_1_INDEX];
|
||||
QCHK_STAGE_2_ = temp + indexes[QCHK_INDEX_STAGE_2_INDEX];
|
||||
QCHK_STAGE_3_ = (const uint8_t *)(temp +
|
||||
indexes[QCHK_INDEX_STAGE_3_INDEX]);
|
||||
data = NULL;
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
|
||||
isQuickCheckLoaded = TRUE;
|
||||
|
||||
/* if a different thread set it first, then close the extra data */
|
||||
if (data != NULL) {
|
||||
udata_close(data); /* NULL if it was set correctly */
|
||||
}
|
||||
}
|
||||
|
||||
return isQuickCheckLoaded;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performing quick check on a string, to quickly determine if the string is
|
||||
* in a particular normalization format.
|
||||
* Three types of result can be returned UNORM_YES, UNORM_NO or
|
||||
* UNORM_MAYBE. Result UNORM_YES indicates that the argument
|
||||
* string is in the desired normalized format, UNORM_NO determines that
|
||||
* argument string is not in the desired normalized format. A
|
||||
* UNORM_MAYBE result indicates that a more thorough check is required,
|
||||
* the user may have to put the string in its normalized form and compare the
|
||||
* results.
|
||||
* @param source string for determining if it is in a normalized format
|
||||
* @param sourcelength length of source to test
|
||||
* @param mode normalization format from the enum UNormalizationMode
|
||||
* @param status A pointer to an UErrorCode to receive any errors
|
||||
* @return UNORM_YES, UNORM_NO or UNORM_MAYBE
|
||||
*/
|
||||
U_CAPI UNormalizationCheckResult
|
||||
unorm_quickCheck(const UChar *source,
|
||||
int32_t sourcelength,
|
||||
UNormalizationMode mode,
|
||||
UErrorCode* status)
|
||||
{
|
||||
uint8_t oldcombiningclass = 0;
|
||||
uint8_t combiningclass;
|
||||
uint8_t quickcheckvalue;
|
||||
uint8_t mask = QCHK_MASK_[mode];
|
||||
UChar32 min;
|
||||
UChar32 codepoint;
|
||||
UNormalizationCheckResult result = UNORM_YES;
|
||||
const UChar *psource;
|
||||
const UChar *pend = 0;
|
||||
|
||||
if(useNewImplementation) {
|
||||
return _unorm_quickCheck(source, sourcelength, mode, status);
|
||||
}
|
||||
|
||||
if (!loadQuickCheckData(status) || U_FAILURE(*status)) {
|
||||
return UNORM_MAYBE;
|
||||
}
|
||||
|
||||
min = QCHK_MIN_VALUES_[mode];
|
||||
|
||||
/* checking argument*/
|
||||
if (mode >= UNORM_MODE_COUNT || mode < UNORM_NONE) {
|
||||
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return UNORM_MAYBE;
|
||||
}
|
||||
|
||||
if (sourcelength >= 0) {
|
||||
psource = source;
|
||||
pend = source + sourcelength;
|
||||
for (;;) {
|
||||
if (psource >= pend) {
|
||||
return UNORM_YES;
|
||||
}
|
||||
/* fast route : since codepoints < min has combining class 0 and YES
|
||||
looking at the minimum values, surrogates are not a problem */
|
||||
if (*psource >= min) {
|
||||
break;
|
||||
}
|
||||
psource ++;
|
||||
}
|
||||
}
|
||||
else {
|
||||
psource = source;
|
||||
for (;;) {
|
||||
if (*psource == 0) {
|
||||
return UNORM_YES;
|
||||
}
|
||||
/* fast route : since codepoints < min has combining class 0 and YES
|
||||
looking at the minimum values, surrogates are not a problem */
|
||||
if (*psource >= min) {
|
||||
break;
|
||||
}
|
||||
psource ++;
|
||||
}
|
||||
}
|
||||
|
||||
if (sourcelength >= 0) {
|
||||
for (;;) {
|
||||
int count = 0;
|
||||
|
||||
if (psource >= pend) {
|
||||
break;
|
||||
}
|
||||
UTF_NEXT_CHAR(psource, count, pend - psource, codepoint);
|
||||
combiningclass = u_getCombiningClass(codepoint);
|
||||
/* not in canonical order */
|
||||
|
||||
if (oldcombiningclass > combiningclass && combiningclass != 0) {
|
||||
return UNORM_NO;
|
||||
}
|
||||
|
||||
oldcombiningclass = combiningclass;
|
||||
|
||||
/* trie access */
|
||||
quickcheckvalue = (uint8_t)(QCHK_STAGE_3_[
|
||||
QCHK_STAGE_2_[QCHK_STAGE_1_[codepoint >> STAGE_1_SHIFT_] +
|
||||
((codepoint >> STAGE_2_SHIFT_) & STAGE_2_MASK_AFTER_SHIFT_)] +
|
||||
(codepoint & STAGE_3_MASK_)] & mask);
|
||||
/* value is a byte containing 2 sets of 4 bits information.
|
||||
bits 1 2 3 4 5678<br>
|
||||
NFKC NFC NFKD NFD MAYBES NFKC NFC NFKD NFD YES<br>
|
||||
ie if quick[0xABCD] = 10000001, this means that 0xABCD is in NFD form
|
||||
and maybe in NFKC form. */
|
||||
if (quickcheckvalue == 0) {
|
||||
return UNORM_NO;
|
||||
}
|
||||
if (quickcheckvalue >= MIN_UNORM_MAYBE_) {
|
||||
result = UNORM_MAYBE;
|
||||
}
|
||||
psource += count;
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (;;) {
|
||||
int count = 0;
|
||||
UTF_NEXT_CHAR(psource, count, pend - psource, codepoint);
|
||||
if (codepoint == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
combiningclass = u_getCombiningClass(codepoint);
|
||||
/* not in canonical order */
|
||||
|
||||
if (oldcombiningclass > combiningclass && combiningclass != 0) {
|
||||
return UNORM_NO;
|
||||
}
|
||||
|
||||
oldcombiningclass = combiningclass;
|
||||
|
||||
/* trie access */
|
||||
quickcheckvalue = (uint8_t)(QCHK_STAGE_3_[
|
||||
QCHK_STAGE_2_[QCHK_STAGE_1_[codepoint >> STAGE_1_SHIFT_] +
|
||||
((codepoint >> STAGE_2_SHIFT_) & STAGE_2_MASK_AFTER_SHIFT_)] +
|
||||
(codepoint & STAGE_3_MASK_)] & mask);
|
||||
/* value is a byte containing 2 sets of 4 bits information.
|
||||
bits 1 2 3 4 5678<br>
|
||||
NFKC NFC NFKD NFD MAYBES NFKC NFC NFKD NFD YES<br>
|
||||
ie if quick[0xABCD] = 10000001, this means that 0xABCD is in NFD form
|
||||
and maybe in NFKC form. */
|
||||
if (quickcheckvalue == 0) {
|
||||
return UNORM_NO;
|
||||
}
|
||||
if (quickcheckvalue >= MIN_UNORM_MAYBE_) {
|
||||
result = UNORM_MAYBE;
|
||||
}
|
||||
psource += count;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* private methods ---------------------------------------------------------- */
|
||||
|
||||
static UBool U_CALLCONV
|
||||
isFCDCheckAcceptable(void *context,
|
||||
const char *type, const char *name,
|
||||
const UDataInfo *pInfo) {
|
||||
if(
|
||||
pInfo->size >= 20 &&
|
||||
pInfo->isBigEndian == U_IS_BIG_ENDIAN &&
|
||||
pInfo->charsetFamily == U_CHARSET_FAMILY &&
|
||||
(uprv_memcmp(pInfo->dataFormat, FCHK_DATA_FORMAT_,
|
||||
sizeof(FCHK_DATA_FORMAT_)) == 0) &&
|
||||
/*
|
||||
pInfo->dataFormat[0] == 0x71 &&
|
||||
pInfo->dataFormat[1] == 0x63 &&
|
||||
pInfo->dataFormat[2] == 0x68 &&
|
||||
pInfo->dataFormat[3] == 0x6b &&
|
||||
pInfo->formatVersion[0] == 1
|
||||
*/
|
||||
(uprv_memcmp(pInfo->formatVersion, FCHK_FORMAT_VERSION_,
|
||||
sizeof(FCHK_FORMAT_VERSION_)) == 0)) {
|
||||
return TRUE;
|
||||
} else {
|
||||
context = NULL;
|
||||
type = NULL;
|
||||
name = NULL;
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
static UBool
|
||||
loadFCDCheckData(UErrorCode *error) {
|
||||
/* load fcdcheck data from file if necessary */
|
||||
if (!isFCDCheckLoaded && U_SUCCESS(*error)) {
|
||||
UDataMemory *data;
|
||||
|
||||
/* open the data outside the mutex block */
|
||||
data = udata_openChoice(NULL, DATA_TYPE, FCHK_DATA_NAME,
|
||||
isFCDCheckAcceptable, NULL, error);
|
||||
if (U_FAILURE(*error)) {
|
||||
return isFCDCheckLoaded = FALSE;
|
||||
}
|
||||
|
||||
/* in the mutex block, set the data for this process */
|
||||
umtx_lock(NULL);
|
||||
if (fcdcheckData == NULL) {
|
||||
const uint16_t *temp = (const uint16_t *)udata_getMemory(data);
|
||||
const uint16_t *indexes = temp;
|
||||
|
||||
fcdcheckData = data;
|
||||
|
||||
temp += 8;
|
||||
FCHK_STAGE_1_ = temp + indexes[FCHK_INDEX_STAGE_1_INDEX];
|
||||
FCHK_STAGE_2_ = temp + indexes[FCHK_INDEX_STAGE_2_INDEX];
|
||||
FCHK_STAGE_3_ = (const uint16_t *)(temp +
|
||||
indexes[FCHK_INDEX_STAGE_3_INDEX]);
|
||||
data = NULL;
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
|
||||
isFCDCheckLoaded = TRUE;
|
||||
|
||||
/* if a different thread set it first, then close the extra data */
|
||||
if (data != NULL) {
|
||||
udata_close(data); /* NULL if it was set correctly */
|
||||
}
|
||||
}
|
||||
|
||||
return isFCDCheckLoaded;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the stage 1 data for checkFCD.
|
||||
* @param error status
|
||||
* @return checkFCD data stage 1, null if data can not be loaded
|
||||
*/
|
||||
U_CAPI const uint16_t * getFCHK_STAGE_1_(UErrorCode *error)
|
||||
{
|
||||
if (loadFCDCheckData(error)) {
|
||||
return FCHK_STAGE_1_;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the stage 2 data for checkFCD.
|
||||
* @param error status
|
||||
* @return checkFCD data stage 2, null if data can not be loaded
|
||||
*/
|
||||
U_CAPI const uint16_t * getFCHK_STAGE_2_(UErrorCode *error)
|
||||
{
|
||||
if (loadFCDCheckData(error)) {
|
||||
return FCHK_STAGE_2_;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the stage 3 data for checkFCD.
|
||||
* @param error status
|
||||
* @return checkFCD data stage 3, null if data can not be loaded
|
||||
*/
|
||||
U_CAPI const uint16_t * getFCHK_STAGE_3_(UErrorCode *error)
|
||||
{
|
||||
if (loadFCDCheckData(error)) {
|
||||
return FCHK_STAGE_3_;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Private method which performs a quick FCD check on a string, to quickly
|
||||
* determine if a string is in a required FCD format.
|
||||
* FCD is the set of strings such that for each character in the string,
|
||||
* decomposition without any canonical reordering will produce a NFD.
|
||||
* @param source string for determining if it is in a normalized format
|
||||
* @param sourcelength length of source to test
|
||||
* @paran mode normalization format from the enum UNormalizationMode
|
||||
* @param status A pointer to an UErrorCode to receive any errors
|
||||
* @return TRUE if source is in FCD format, FALSE otherwise
|
||||
*/
|
||||
U_CAPI UBool
|
||||
checkFCD(const UChar* source, int32_t sourcelength, UErrorCode* status)
|
||||
{
|
||||
if(useNewImplementation) {
|
||||
return UNORM_YES==unorm_quickCheck(source, sourcelength, UNORM_FCD, status);
|
||||
}
|
||||
|
||||
UChar32 codepoint;
|
||||
const UChar *psource;
|
||||
const UChar *pend = 0;
|
||||
uint8_t oldfcdtrail = 0;
|
||||
uint16_t fcd = 0;
|
||||
|
||||
if (!loadFCDCheckData(status) || U_FAILURE(*status)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (sourcelength >= 0) {
|
||||
psource = source;
|
||||
pend = source + sourcelength;
|
||||
for (;;) {
|
||||
if (psource >= pend) {
|
||||
return TRUE;
|
||||
}
|
||||
/* fast route : since codepoints < NFC_ZER_CC_BLOCK_LIMIT_ has
|
||||
combining class 0.
|
||||
looking at the minimum values, surrogates are not a problem */
|
||||
if (*psource >= NFC_ZERO_CC_BLOCK_LIMIT_) {
|
||||
break;
|
||||
}
|
||||
psource ++;
|
||||
}
|
||||
}
|
||||
else {
|
||||
psource = source;
|
||||
for (;;) {
|
||||
if (*psource == 0) {
|
||||
return TRUE;
|
||||
}
|
||||
/* fast route : since codepoints < min has combining class 0 and YES
|
||||
looking at the minimum values, surrogates are not a problem */
|
||||
if (*psource >= NFC_ZERO_CC_BLOCK_LIMIT_) {
|
||||
break;
|
||||
}
|
||||
psource ++;
|
||||
}
|
||||
}
|
||||
|
||||
/* not end of string and yet failed simple compare
|
||||
safe to shift back one char because the previous char has to be < 0x300 or the
|
||||
start of a string */
|
||||
if (psource == source) {
|
||||
oldfcdtrail = 0;
|
||||
}
|
||||
else {
|
||||
codepoint = *(psource - 1);
|
||||
oldfcdtrail = (uint8_t)(FCHK_STAGE_3_[
|
||||
FCHK_STAGE_2_[FCHK_STAGE_1_[codepoint >> STAGE_1_SHIFT_] +
|
||||
((codepoint >> STAGE_2_SHIFT_) & STAGE_2_MASK_AFTER_SHIFT_)]
|
||||
+ (codepoint & STAGE_3_MASK_)] & LAST_BYTE_MASK_);
|
||||
}
|
||||
|
||||
if (sourcelength >= 0) {
|
||||
for (;;) {
|
||||
int count = 0;
|
||||
uint8_t lead;
|
||||
|
||||
if (psource >= pend) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
UTF_NEXT_CHAR(psource, count, pend - psource, codepoint);
|
||||
|
||||
/* trie access */
|
||||
fcd = FCHK_STAGE_3_[
|
||||
FCHK_STAGE_2_[FCHK_STAGE_1_[codepoint >> STAGE_1_SHIFT_] +
|
||||
((codepoint >> STAGE_2_SHIFT_) & STAGE_2_MASK_AFTER_SHIFT_)] +
|
||||
(codepoint & STAGE_3_MASK_)];
|
||||
lead = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
|
||||
|
||||
if (lead != 0 && oldfcdtrail > lead) {
|
||||
return FALSE;
|
||||
}
|
||||
oldfcdtrail = (uint8_t)(fcd & LAST_BYTE_MASK_);
|
||||
|
||||
psource += count;
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (;;) {
|
||||
int count = 0;
|
||||
uint8_t lead;
|
||||
|
||||
UTF_NEXT_CHAR(psource, count, pend - psource, codepoint);
|
||||
if (codepoint == 0) {
|
||||
return TRUE;
|
||||
}
|
||||
/* trie access */
|
||||
fcd = FCHK_STAGE_3_[
|
||||
FCHK_STAGE_2_[FCHK_STAGE_1_[codepoint >> STAGE_1_SHIFT_] +
|
||||
((codepoint >> STAGE_2_SHIFT_) & STAGE_2_MASK_AFTER_SHIFT_)] +
|
||||
(codepoint & STAGE_3_MASK_)];
|
||||
|
||||
lead = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
|
||||
|
||||
if (lead != 0 && oldfcdtrail > lead) {
|
||||
return FALSE;
|
||||
}
|
||||
oldfcdtrail = (uint8_t)(fcd & LAST_BYTE_MASK_);
|
||||
psource += count;
|
||||
}
|
||||
}
|
||||
return TRUE;
|
||||
return unorm_internalNormalize(dest, destCapacity,
|
||||
src, srcLength,
|
||||
mode, (UBool)((option&UNORM_IGNORE_HANGUL)!=0),
|
||||
NULL, NULL,
|
||||
pErrorCode);
|
||||
}
|
||||
|
|
|
@ -146,7 +146,7 @@ U_CFUNC int32_t
|
|||
unorm_internalNormalize(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UNormalizationMode mode, UBool ignoreHangul,
|
||||
GrowBuffer *growBuffer, void *context,
|
||||
UGrowBuffer *growBuffer, void *context,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
|
@ -157,7 +157,7 @@ U_CFUNC int32_t
|
|||
unorm_decompose(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBool compat, UBool ignoreHangul,
|
||||
GrowBuffer *growBuffer, void *context,
|
||||
UGrowBuffer *growBuffer, void *context,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
|
@ -168,21 +168,72 @@ U_CFUNC int32_t
|
|||
unorm_compose(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UBool compat, UBool ignoreHangul,
|
||||
GrowBuffer *growBuffer, void *context,
|
||||
UGrowBuffer *growBuffer, void *context,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* internal API, but used by tests
|
||||
* internal API, used by collation code
|
||||
* Get access to the internal FCD trie table to be able to perform
|
||||
* incremental, per-code unit, FCD checks in collation.
|
||||
* One pointer is sufficient because the trie index values are offset
|
||||
* by the index size, so that the same pointer is used to access the trie data.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
unorm_setNewImplementation(UBool useNew);
|
||||
U_CAPI const uint16_t * U_EXPORT2
|
||||
unorm_getFCDTrie(UErrorCode *pErrorCode);
|
||||
|
||||
#ifdef XP_CPLUSPLUS
|
||||
|
||||
/**
|
||||
* internal API, but used by tests
|
||||
* internal API, used by collation code
|
||||
* Get the FCD value for a code unit, with
|
||||
* bits 15..8 lead combining class
|
||||
* bits 7..0 trail combining class
|
||||
*
|
||||
* If c is a lead surrogate and the value is not 0,
|
||||
* then instead of combining classes the value
|
||||
* is used in unorm_getFCD16FromSurrogatePair() to get the real value
|
||||
* of the supplementary code point.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
unorm_usesNewImplementation();
|
||||
inline uint16_t
|
||||
unorm_getFCD16(const uint16_t *fcdTrieIndex, UChar c) {
|
||||
return
|
||||
fcdTrieIndex[
|
||||
fcdTrieIndex[
|
||||
c>>_NORM_TRIE_SHIFT
|
||||
]+
|
||||
(c&_NORM_STAGE_2_MASK)
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* internal API, used by collation code
|
||||
* Get the FCD value for a supplementary code point, with
|
||||
* bits 15..8 lead combining class
|
||||
* bits 7..0 trail combining class
|
||||
*
|
||||
* @param fcd16 The FCD value for the lead surrogate, not 0.
|
||||
* @param c2 The trail surrogate code unit.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
inline uint16_t
|
||||
unorm_getFCD16FromSurrogatePair(const uint16_t *fcdTrieIndex, uint16_t fcd16, UChar c2) {
|
||||
/* the surrogate index in fcd16 is an absolute offset over the start of stage 1 */
|
||||
uint32_t c=
|
||||
((uint32_t)fcd16<<10)|
|
||||
(c2&0x3ff);
|
||||
return
|
||||
fcdTrieIndex[
|
||||
fcdTrieIndex[
|
||||
c>>_NORM_TRIE_SHIFT
|
||||
]+
|
||||
(c&_NORM_STAGE_2_MASK)
|
||||
];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "unicode/unorm.h"
|
||||
#include "unicode/udata.h"
|
||||
|
||||
#include "unormimp.h"
|
||||
#include "cpputils.h"
|
||||
#include "cstring.h"
|
||||
#include "ucmp32.h"
|
||||
|
@ -51,8 +52,6 @@
|
|||
|
||||
static UCollator* UCA = NULL;
|
||||
|
||||
extern "C" UBool checkFCD(const UChar*, int32_t, UErrorCode*);
|
||||
|
||||
U_CDECL_BEGIN
|
||||
static UBool U_CALLCONV
|
||||
isAcceptableUCA(void * /*context*/,
|
||||
|
@ -672,14 +671,7 @@ void ucol_putOptionsToHeader(UCollator* result, UColOptionSet * opts, UErrorCode
|
|||
opts->alternateHandling = result->alternateHandling;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI const uint16_t * getFCHK_STAGE_1_(UErrorCode *);
|
||||
U_CAPI const uint16_t * getFCHK_STAGE_2_(UErrorCode *);
|
||||
U_CAPI const uint16_t * getFCHK_STAGE_3_(UErrorCode *);
|
||||
|
||||
static const uint16_t *FCD_STAGE_1_;
|
||||
static const uint16_t *FCD_STAGE_2_;
|
||||
static const uint16_t *FCD_STAGE_3_;
|
||||
static const uint16_t *fcdTrieIndex=NULL;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -807,14 +799,8 @@ UCollator* ucol_initCollator(const UCATableHeader *image, UCollator *fillIn, UEr
|
|||
result->expansionCESize = (uint8_t*)result->image +
|
||||
result->image->expansionCESize;
|
||||
|
||||
if (FCD_STAGE_1_ == NULL) {
|
||||
FCD_STAGE_1_ = getFCHK_STAGE_1_(status);
|
||||
}
|
||||
if (FCD_STAGE_2_ == NULL) {
|
||||
FCD_STAGE_2_ = getFCHK_STAGE_2_(status);
|
||||
}
|
||||
if (FCD_STAGE_3_ == NULL) {
|
||||
FCD_STAGE_3_ = getFCHK_STAGE_3_(status);
|
||||
if (fcdTrieIndex == NULL) {
|
||||
fcdTrieIndex = unorm_getFCDTrie(status);
|
||||
}
|
||||
|
||||
result->errorCode = *status;
|
||||
|
@ -929,10 +915,8 @@ void collIterNormalize(collIterate *collationSource)
|
|||
/* True because the previous call to this function will have always exited */
|
||||
/* that way, and we get called for every char where cc might be non-zero. */
|
||||
inline UBool collIterFCD(collIterate *collationSource) {
|
||||
UChar32 codepoint;
|
||||
UChar *srcP;
|
||||
int32_t length;
|
||||
int32_t count = 0;
|
||||
UChar c, c2;
|
||||
const UChar *srcP, *endP;
|
||||
uint8_t leadingCC;
|
||||
uint8_t prevTrailingCC = 0;
|
||||
uint16_t fcd;
|
||||
|
@ -940,52 +924,64 @@ inline UBool collIterFCD(collIterate *collationSource) {
|
|||
|
||||
srcP = collationSource->pos-1;
|
||||
|
||||
// If the source string is null terminated, use a fake too-long string length
|
||||
// (needed for UTF_NEXT_CHAR). null will stop everything OK.)
|
||||
length = (collationSource->flags & UCOL_ITER_HASLEN) ? collationSource->endp - srcP : INT32_MAX;
|
||||
if (collationSource->flags & UCOL_ITER_HASLEN) {
|
||||
endP = collationSource->endp;
|
||||
} else {
|
||||
endP = NULL;
|
||||
}
|
||||
|
||||
// Get the trailing combining class of the current character. If it's zero,
|
||||
// we are OK.
|
||||
UTF_NEXT_CHAR(srcP, count, length, codepoint);
|
||||
c = *srcP++;
|
||||
/* trie access */
|
||||
fcd = FCD_STAGE_3_[
|
||||
FCD_STAGE_2_[FCD_STAGE_1_[codepoint >> STAGE_1_SHIFT_] +
|
||||
((codepoint >> STAGE_2_SHIFT_) & STAGE_2_MASK_AFTER_SHIFT_)] +
|
||||
(codepoint & STAGE_3_MASK_)];
|
||||
prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
|
||||
|
||||
if (prevTrailingCC != 0) {
|
||||
// The current char has a non-zero trailing CC. Scan forward until we find
|
||||
// a char with a leading cc of zero.
|
||||
for (;;)
|
||||
{
|
||||
if (count >= length) {
|
||||
break;
|
||||
fcd = unorm_getFCD16(fcdTrieIndex, c);
|
||||
if (fcd != 0) {
|
||||
if (UTF_IS_FIRST_SURROGATE(c)) {
|
||||
if ((endP == NULL || srcP != endP) && UTF_IS_SECOND_SURROGATE(c2=*srcP)) {
|
||||
++srcP;
|
||||
fcd = unorm_getFCD16FromSurrogatePair(fcdTrieIndex, fcd, c2);
|
||||
} else {
|
||||
fcd = 0;
|
||||
}
|
||||
int32_t savedCount = count;
|
||||
UTF_NEXT_CHAR(srcP, count, length, codepoint);
|
||||
}
|
||||
|
||||
/* trie access */
|
||||
fcd = FCD_STAGE_3_[
|
||||
FCD_STAGE_2_[FCD_STAGE_1_[codepoint >> STAGE_1_SHIFT_] +
|
||||
((codepoint >> STAGE_2_SHIFT_) & STAGE_2_MASK_AFTER_SHIFT_)] +
|
||||
(codepoint & STAGE_3_MASK_)];
|
||||
leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
|
||||
if (leadingCC == 0) {
|
||||
count = savedCount; // Hit char that is not part of combining sequence.
|
||||
// back up over it. (Could be surrogate pair!)
|
||||
break;
|
||||
prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
|
||||
|
||||
if (prevTrailingCC != 0) {
|
||||
// The current char has a non-zero trailing CC. Scan forward until we find
|
||||
// a char with a leading cc of zero.
|
||||
while (endP == NULL || srcP != endP)
|
||||
{
|
||||
const UChar *savedSrcP = srcP;
|
||||
|
||||
c = *srcP++;
|
||||
/* trie access */
|
||||
fcd = unorm_getFCD16(fcdTrieIndex, c);
|
||||
if (fcd != 0 && UTF_IS_FIRST_SURROGATE(c)) {
|
||||
if ((endP == NULL || srcP != endP) && UTF_IS_SECOND_SURROGATE(c2=*srcP)) {
|
||||
++srcP;
|
||||
fcd = unorm_getFCD16FromSurrogatePair(fcdTrieIndex, fcd, c2);
|
||||
} else {
|
||||
fcd = 0;
|
||||
}
|
||||
}
|
||||
leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
|
||||
if (leadingCC == 0) {
|
||||
srcP = savedSrcP; // Hit char that is not part of combining sequence.
|
||||
// back up over it. (Could be surrogate pair!)
|
||||
break;
|
||||
}
|
||||
|
||||
if (leadingCC < prevTrailingCC) {
|
||||
needNormalize = TRUE;
|
||||
}
|
||||
|
||||
prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
|
||||
}
|
||||
|
||||
if (leadingCC < prevTrailingCC) {
|
||||
needNormalize = TRUE;
|
||||
}
|
||||
|
||||
prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
|
||||
}
|
||||
}
|
||||
|
||||
collationSource->fcdPosition = srcP + count;
|
||||
collationSource->fcdPosition = (UChar *)srcP;
|
||||
|
||||
return needNormalize;
|
||||
}
|
||||
|
@ -1208,23 +1204,29 @@ void collPrevIterNormalize(collIterate *data)
|
|||
*/
|
||||
inline UBool collPrevIterFCD(collIterate *data)
|
||||
{
|
||||
UChar32 codepoint;
|
||||
const UChar *src, *start;
|
||||
UChar c, c2;
|
||||
uint8_t leadingCC;
|
||||
uint8_t trailingCC = 0;
|
||||
uint16_t fcd;
|
||||
UBool result = FALSE;
|
||||
int32_t length;
|
||||
|
||||
length = (data->pos + 1) - data->string;
|
||||
start = data->string;
|
||||
src = data->pos + 1;
|
||||
|
||||
/* Get the trailing combining class of the current character. */
|
||||
UTF_PREV_CHAR(data->string, 0, length, codepoint);
|
||||
|
||||
/* trie access */
|
||||
fcd = FCD_STAGE_3_[
|
||||
FCD_STAGE_2_[FCD_STAGE_1_[codepoint >> STAGE_1_SHIFT_] +
|
||||
((codepoint >> STAGE_2_SHIFT_) & STAGE_2_MASK_AFTER_SHIFT_)] +
|
||||
(codepoint & STAGE_3_MASK_)];
|
||||
c = *--src;
|
||||
if (!UTF_IS_SURROGATE(c)) {
|
||||
fcd = unorm_getFCD16(fcdTrieIndex, c);
|
||||
} else if (UTF_IS_SECOND_SURROGATE(c) && start < src && UTF_IS_FIRST_SURROGATE(c2 = *(src - 1))) {
|
||||
--src;
|
||||
fcd = unorm_getFCD16(fcdTrieIndex, c2);
|
||||
if (fcd != 0) {
|
||||
fcd = unorm_getFCD16FromSurrogatePair(fcdTrieIndex, fcd, c);
|
||||
}
|
||||
} else /* unpaired surrogate */ {
|
||||
fcd = 0;
|
||||
}
|
||||
|
||||
leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);
|
||||
|
||||
|
@ -1235,18 +1237,23 @@ inline UBool collPrevIterFCD(collIterate *data)
|
|||
*/
|
||||
for (;;)
|
||||
{
|
||||
if (length <= 0) {
|
||||
length = -1;
|
||||
break;
|
||||
if (start == src) {
|
||||
data->fcdPosition = NULL;
|
||||
return result;
|
||||
}
|
||||
|
||||
UTF_PREV_CHAR(data->string, 0, length, codepoint);
|
||||
|
||||
/* trie access */
|
||||
fcd = FCD_STAGE_3_[
|
||||
FCD_STAGE_2_[FCD_STAGE_1_[codepoint >> STAGE_1_SHIFT_] +
|
||||
((codepoint >> STAGE_2_SHIFT_) & STAGE_2_MASK_AFTER_SHIFT_)] +
|
||||
(codepoint & STAGE_3_MASK_)];
|
||||
c = *--src;
|
||||
if (!UTF_IS_SURROGATE(c)) {
|
||||
fcd = unorm_getFCD16(fcdTrieIndex, c);
|
||||
} else if (UTF_IS_SECOND_SURROGATE(c) && start < src && UTF_IS_FIRST_SURROGATE(c2 = *(src - 1))) {
|
||||
--src;
|
||||
fcd = unorm_getFCD16(fcdTrieIndex, c2);
|
||||
if (fcd != 0) {
|
||||
fcd = unorm_getFCD16FromSurrogatePair(fcdTrieIndex, fcd, c);
|
||||
}
|
||||
} else /* unpaired surrogate */ {
|
||||
fcd = 0;
|
||||
}
|
||||
|
||||
trailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);
|
||||
|
||||
|
@ -1262,12 +1269,7 @@ inline UBool collPrevIterFCD(collIterate *data)
|
|||
}
|
||||
}
|
||||
|
||||
if (length < 0) {
|
||||
data->fcdPosition = NULL;
|
||||
}
|
||||
else {
|
||||
data->fcdPosition = data->string + length;
|
||||
}
|
||||
data->fcdPosition = (UChar *)src;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -3103,7 +3105,7 @@ ucol_calcSortKey(const UCollator *coll,
|
|||
}
|
||||
} else if((normMode != UCOL_OFF)
|
||||
/* changed by synwee */
|
||||
&& !checkFCD(source, len, status))
|
||||
&& UNORM_YES!=unorm_quickCheck(source, len, UNORM_FCD, status))
|
||||
{
|
||||
normSourceLen = unorm_normalize(source, sourceLength, UNORM_NFD, 0, normSource, normSourceLen, status);
|
||||
if(U_FAILURE(*status)) {
|
||||
|
@ -3595,7 +3597,7 @@ ucol_calcSortKeySimpleTertiary(const UCollator *coll,
|
|||
/* If we need to normalize, we'll do it all at once at the beggining! */
|
||||
UColAttributeValue normMode = coll->normalizationMode;
|
||||
if(normMode != UCOL_OFF) {
|
||||
if (!checkFCD(source, len, status))
|
||||
if (UNORM_YES!=unorm_quickCheck(source, len, UNORM_FCD, status))
|
||||
{
|
||||
normSourceLen = unorm_normalize(source, sourceLength, UNORM_NFD, 0, normSource, normSourceLen, status);
|
||||
if(U_FAILURE(*status)) {
|
||||
|
|
|
@ -26,8 +26,6 @@
|
|||
|
||||
#define ARRAY_LENGTH(array) (sizeof (array) / sizeof (*array))
|
||||
|
||||
extern UBool checkFCD(const UChar *, int32_t, UErrorCode *);
|
||||
|
||||
static UCollator *myCollation;
|
||||
|
||||
static void
|
||||
|
@ -566,7 +564,7 @@ void TestCheckFCD()
|
|||
{0x0061, 0x030A, 0x00E2, 0x0323, 0},
|
||||
{0x0061, 0x0323, 0x00E2, 0x0323, 0},
|
||||
{0x0061, 0x0323, 0x1E05, 0x0302, 0} };
|
||||
const UBool result[] = {TRUE, FALSE, FALSE, TRUE};
|
||||
const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
|
||||
|
||||
const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
|
||||
0x6a,
|
||||
|
@ -581,26 +579,26 @@ void TestCheckFCD()
|
|||
|
||||
int count = 0;
|
||||
|
||||
if (checkFCD(FAST_, 10, &status) != TRUE)
|
||||
log_err("checkFCD failed: expected value for fast checkFCD is TRUE\n");
|
||||
if (checkFCD(FALSE_, 10, &status) != FALSE)
|
||||
log_err("checkFCD failed: expected value for error checkFCD is FALSE\n");
|
||||
if (checkFCD(TRUE_, 10, &status) != TRUE)
|
||||
log_err("checkFCD failed: expected value for correct checkFCD is TRUE\n");
|
||||
if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
|
||||
log_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES\n");
|
||||
if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
|
||||
log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
|
||||
if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
|
||||
log_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES\n");
|
||||
|
||||
if (U_FAILURE(status))
|
||||
log_err("checkFCD failed: %s\n", u_errorName(status));
|
||||
log_err("unorm_quickCheck(FCD) failed: %s\n", u_errorName(status));
|
||||
|
||||
while (count < 4)
|
||||
{
|
||||
UBool fcdresult = checkFCD(datastr[count], 4, &status);
|
||||
UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
log_err("checkFCD failed: exception occured at data set %d\n", count);
|
||||
log_err("unorm_quickCheck(FCD) failed: exception occured at data set %d\n", count);
|
||||
break;
|
||||
}
|
||||
else {
|
||||
if (result[count] != fcdresult) {
|
||||
log_err("checkFCD failed: Data set %d expected value %d\n", count,
|
||||
log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
|
||||
result[count]);
|
||||
}
|
||||
}
|
||||
|
@ -614,7 +612,7 @@ void TestCheckFCD()
|
|||
for (count = 0; count < 50; count ++)
|
||||
{
|
||||
int size = 0;
|
||||
UBool testresult = TRUE;
|
||||
UBool testresult = UNORM_YES;
|
||||
UChar data[20];
|
||||
UChar norm[100];
|
||||
UChar nfd[100];
|
||||
|
@ -627,7 +625,7 @@ void TestCheckFCD()
|
|||
normsize += unorm_normalize(data + size, 1, UCOL_DECOMP_CAN, UCOL_IGNORE_HANGUL,
|
||||
norm + normsize, 100 - normsize, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
log_err("checkFCD failed: exception occured at data generation\n");
|
||||
log_err("unorm_quickCheck(FCD) failed: exception occured at data generation\n");
|
||||
break;
|
||||
}
|
||||
size ++;
|
||||
|
@ -637,21 +635,21 @@ void TestCheckFCD()
|
|||
nfdsize = unorm_normalize(data, size, UCOL_DECOMP_CAN, UCOL_IGNORE_HANGUL,
|
||||
nfd, 100, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
log_err("checkFCD failed: exception occured at normalized data generation\n");
|
||||
log_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation\n");
|
||||
}
|
||||
|
||||
if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
|
||||
testresult = FALSE;
|
||||
testresult = UNORM_NO;
|
||||
}
|
||||
if (testresult == TRUE) {
|
||||
log_verbose("result TRUE\n");
|
||||
if (testresult == UNORM_YES) {
|
||||
log_verbose("result UNORM_YES\n");
|
||||
}
|
||||
else {
|
||||
log_verbose("result FALSE\n");
|
||||
log_verbose("result UNORM_NO\n");
|
||||
}
|
||||
|
||||
if (checkFCD(data, size, &status) != testresult || U_FAILURE(status)) {
|
||||
log_err("checkFCD failed: expected %d for random data\n", testresult);
|
||||
if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
|
||||
log_err("unorm_quickCheck(FCD) failed: expected %d for random data\n", testresult);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue