mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-2757 add APIs for NF*_QC properties
X-SVN-Rev: 14892
This commit is contained in:
parent
d7242682d5
commit
81f1506e2a
8 changed files with 1430 additions and 1246 deletions
|
@ -339,6 +339,18 @@ typedef enum UProperty {
|
|||
/** Enumerated property Hangul_Syllable_Type, new in Unicode 4.
|
||||
Returns UHangulSyllableType values. @draft ICU 2.6 */
|
||||
UCHAR_HANGUL_SYLLABLE_TYPE,
|
||||
/** Enumerated property NFD_Quick_Check.
|
||||
Returns UNormalizationCheckResult values. @draft ICU 3.0 */
|
||||
UCHAR_NFD_QUICK_CHECK,
|
||||
/** Enumerated property NFKD_Quick_Check.
|
||||
Returns UNormalizationCheckResult values. @draft ICU 3.0 */
|
||||
UCHAR_NFKD_QUICK_CHECK,
|
||||
/** Enumerated property NFC_Quick_Check.
|
||||
Returns UNormalizationCheckResult values. @draft ICU 3.0 */
|
||||
UCHAR_NFC_QUICK_CHECK,
|
||||
/** Enumerated property NFKC_Quick_Check.
|
||||
Returns UNormalizationCheckResult values. @draft ICU 3.0 */
|
||||
UCHAR_NFKC_QUICK_CHECK,
|
||||
/** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
|
||||
UCHAR_INT_LIMIT,
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
******************************************************************************
|
||||
* Copyright (c) 1996-2003, International Business Machines
|
||||
* Copyright (c) 1996-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
******************************************************************************
|
||||
* File unorm.cpp
|
||||
|
@ -1163,6 +1163,32 @@ unorm_addPropertyStarts(USet *set, UErrorCode *pErrorCode) {
|
|||
uset_add(set, HANGUL_BASE+HANGUL_COUNT); /* add Hangul+1 to continue with other properties */
|
||||
}
|
||||
|
||||
U_CAPI UNormalizationCheckResult U_EXPORT2
|
||||
unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
|
||||
static const uint32_t qcMask[UNORM_MODE_COUNT]={
|
||||
0, 0, _NORM_QC_NFD, _NORM_QC_NFKD, _NORM_QC_NFC, _NORM_QC_NFKC
|
||||
};
|
||||
|
||||
UErrorCode errorCode;
|
||||
uint32_t norm32;
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
if(!_haveData(errorCode)) {
|
||||
return UNORM_YES;
|
||||
}
|
||||
|
||||
UTRIE_GET32(&normTrie, c, norm32);
|
||||
norm32&=qcMask[mode];
|
||||
|
||||
if(norm32==0) {
|
||||
return UNORM_YES;
|
||||
} else if(norm32&_NORM_QC_ANY_NO) {
|
||||
return UNORM_NO;
|
||||
} else /* _NORM_QC_ANY_MAYBE */ {
|
||||
return UNORM_MAYBE;
|
||||
}
|
||||
}
|
||||
|
||||
/* reorder UTF-16 in-place -------------------------------------------------- */
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001-2003, International Business Machines
|
||||
* Copyright (C) 2001-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -389,6 +389,13 @@ unorm_swap(const UDataSwapper *ds,
|
|||
const void *inData, int32_t length, void *outData,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Get the NF*_QC property for a code point, for u_getIntPropertyValue().
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI UNormalizationCheckResult U_EXPORT2
|
||||
unorm_getQuickCheck(UChar32 c, UNormalizationMode mode);
|
||||
|
||||
/**
|
||||
* Description of the format of unorm.dat version 2.2.
|
||||
*
|
||||
|
|
|
@ -349,6 +349,11 @@ u_getIntPropertyValue(UChar32 c, UProperty which) {
|
|||
return c%JAMO_T_COUNT==0 ? U_HST_LV_SYLLABLE : U_HST_LVT_SYLLABLE;
|
||||
}
|
||||
return U_HST_NOT_APPLICABLE;
|
||||
case UCHAR_NFD_QUICK_CHECK:
|
||||
case UCHAR_NFKD_QUICK_CHECK:
|
||||
case UCHAR_NFC_QUICK_CHECK:
|
||||
case UCHAR_NFKC_QUICK_CHECK:
|
||||
return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK)+UNORM_NFD);
|
||||
default:
|
||||
return 0; /* undefined */
|
||||
}
|
||||
|
@ -407,6 +412,12 @@ u_getIntPropertyMaxValue(UProperty which) {
|
|||
return max!=0 ? max : (int32_t)USCRIPT_CODE_LIMIT-1;
|
||||
case UCHAR_HANGUL_SYLLABLE_TYPE:
|
||||
return (int32_t)U_HST_COUNT-1;
|
||||
case UCHAR_NFD_QUICK_CHECK:
|
||||
case UCHAR_NFKD_QUICK_CHECK:
|
||||
return (int32_t)UNORM_YES; /* these are never "maybe", only "no" or "yes" */
|
||||
case UCHAR_NFC_QUICK_CHECK:
|
||||
case UCHAR_NFKC_QUICK_CHECK:
|
||||
return (int32_t)UNORM_MAYBE;
|
||||
default:
|
||||
return -1; /* undefined */
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2003, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2004, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
/********************************************************************************
|
||||
|
@ -32,7 +32,7 @@ void addNormTest(TestNode** root) {
|
|||
#include "unicode/unorm.h"
|
||||
#include "cnormtst.h"
|
||||
|
||||
#define ARRAY_LENGTH(array) (sizeof (array) / sizeof (*array))
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof ((array)[0]))
|
||||
|
||||
static void
|
||||
TestAPI(void);
|
||||
|
@ -51,6 +51,9 @@ static void TestIsNormalized(void);
|
|||
static void
|
||||
TestFCNFKCClosure(void);
|
||||
|
||||
static void
|
||||
TestQuickCheckPerCP(void);
|
||||
|
||||
const static char* canonTests[][3] = {
|
||||
/* Input*/ /*Decomposed*/ /*Composed*/
|
||||
{ "cat", "cat", "cat" },
|
||||
|
@ -121,6 +124,7 @@ void addNormTest(TestNode** root)
|
|||
addTest(root, &TestCompatDecompCompose, "tscoll/cnormtst/CompatDecompCompose");
|
||||
addTest(root, &TestNull, "tscoll/cnormtst/TestNull");
|
||||
addTest(root, &TestQuickCheck, "tscoll/cnormtst/TestQuickCheck");
|
||||
addTest(root, &TestQuickCheckPerCP, "tscoll/cnormtst/TestQuickCheckPerCP");
|
||||
addTest(root, &TestIsNormalized, "tscoll/cnormtst/TestIsNormalized");
|
||||
addTest(root, &TestCheckFCD, "tscoll/cnormtst/TestCheckFCD");
|
||||
addTest(root, &TestNormCoverage, "tscoll/cnormtst/TestNormCoverage");
|
||||
|
@ -137,7 +141,7 @@ void TestDecomp()
|
|||
status = U_ZERO_ERROR;
|
||||
resLen=0;
|
||||
log_verbose("Testing unorm_normalize with Decomp canonical\n");
|
||||
for(x=0; x < ARRAY_LENGTH(canonTests); x++)
|
||||
for(x=0; x < LENGTHOF(canonTests); x++)
|
||||
{
|
||||
source=CharsToUChars(canonTests[x][0]);
|
||||
neededLen= unorm_normalize(source, u_strlen(source), UNORM_NFD, 0, NULL, 0, &status);
|
||||
|
@ -166,7 +170,7 @@ void TestCompatDecomp()
|
|||
status = U_ZERO_ERROR;
|
||||
resLen=0;
|
||||
log_verbose("Testing unorm_normalize with Decomp compat\n");
|
||||
for(x=0; x < ARRAY_LENGTH(compatTests); x++)
|
||||
for(x=0; x < LENGTHOF(compatTests); x++)
|
||||
{
|
||||
source=CharsToUChars(compatTests[x][0]);
|
||||
neededLen= unorm_normalize(source, u_strlen(source), UNORM_NFKD, 0, NULL, 0, &status);
|
||||
|
@ -195,7 +199,7 @@ void TestCanonDecompCompose()
|
|||
status = U_ZERO_ERROR;
|
||||
resLen=0;
|
||||
log_verbose("Testing unorm_normalize with Decomp can compose compat\n");
|
||||
for(x=0; x < ARRAY_LENGTH(canonTests); x++)
|
||||
for(x=0; x < LENGTHOF(canonTests); x++)
|
||||
{
|
||||
source=CharsToUChars(canonTests[x][0]);
|
||||
neededLen= unorm_normalize(source, u_strlen(source), UNORM_NFC, 0, NULL, 0, &status);
|
||||
|
@ -224,7 +228,7 @@ void TestCompatDecompCompose()
|
|||
status = U_ZERO_ERROR;
|
||||
resLen=0;
|
||||
log_verbose("Testing unorm_normalize with compat decomp compose can\n");
|
||||
for(x=0; x < ARRAY_LENGTH(compatTests); x++)
|
||||
for(x=0; x < LENGTHOF(compatTests); x++)
|
||||
{
|
||||
source=CharsToUChars(compatTests[x][0]);
|
||||
neededLen= unorm_normalize(source, u_strlen(source), UNORM_NFKC, 0, NULL, 0, &status);
|
||||
|
@ -503,7 +507,7 @@ static void TestQuickCheckStringResult()
|
|||
UChar *c = NULL;
|
||||
UErrorCode error = U_ZERO_ERROR;
|
||||
|
||||
for (count = 0; count < ARRAY_LENGTH(canonTests); count ++)
|
||||
for (count = 0; count < LENGTHOF(canonTests); count ++)
|
||||
{
|
||||
d = CharsToUChars(canonTests[count][1]);
|
||||
c = CharsToUChars(canonTests[count][2]);
|
||||
|
@ -525,7 +529,7 @@ static void TestQuickCheckStringResult()
|
|||
free(c);
|
||||
}
|
||||
|
||||
for (count = 0; count < ARRAY_LENGTH(compatTests); count ++)
|
||||
for (count = 0; count < LENGTHOF(compatTests); count ++)
|
||||
{
|
||||
d = CharsToUChars(compatTests[count][1]);
|
||||
c = CharsToUChars(compatTests[count][2]);
|
||||
|
@ -607,7 +611,7 @@ static void TestIsNormalized(void) {
|
|||
}
|
||||
|
||||
/* specific cases */
|
||||
for(i=0; i<ARRAY_LENGTH(notNFC); ++i) {
|
||||
for(i=0; i<LENGTHOF(notNFC); ++i) {
|
||||
errorCode=U_ZERO_ERROR;
|
||||
if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
|
||||
log_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s)\n", i, u_errorName(errorCode));
|
||||
|
@ -617,7 +621,7 @@ static void TestIsNormalized(void) {
|
|||
log_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s)\n", i, u_errorName(errorCode));
|
||||
}
|
||||
}
|
||||
for(i=0; i<ARRAY_LENGTH(notNFKC); ++i) {
|
||||
for(i=0; i<LENGTHOF(notNFKC); ++i) {
|
||||
errorCode=U_ZERO_ERROR;
|
||||
if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
|
||||
log_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s)\n", i, u_errorName(errorCode));
|
||||
|
@ -1360,9 +1364,9 @@ TestFCNFKCClosure(void) {
|
|||
UErrorCode errorCode;
|
||||
int32_t i, length;
|
||||
|
||||
for(i=0; i<ARRAY_LENGTH(tests); ++i) {
|
||||
for(i=0; i<LENGTHOF(tests); ++i) {
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=u_getFC_NFKC_Closure(tests[i].c, buffer, ARRAY_LENGTH(buffer), &errorCode);
|
||||
length=u_getFC_NFKC_Closure(tests[i].c, buffer, LENGTHOF(buffer), &errorCode);
|
||||
if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
|
||||
log_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s)\n", tests[i].c, u_errorName(errorCode));
|
||||
}
|
||||
|
@ -1370,15 +1374,71 @@ TestFCNFKCClosure(void) {
|
|||
|
||||
/* error handling */
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=u_getFC_NFKC_Closure(0x5c, NULL, ARRAY_LENGTH(buffer), &errorCode);
|
||||
length=u_getFC_NFKC_Closure(0x5c, NULL, LENGTHOF(buffer), &errorCode);
|
||||
if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
|
||||
log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
|
||||
}
|
||||
|
||||
length=u_getFC_NFKC_Closure(0x5c, buffer, ARRAY_LENGTH(buffer), &errorCode);
|
||||
length=u_getFC_NFKC_Closure(0x5c, buffer, LENGTHOF(buffer), &errorCode);
|
||||
if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
|
||||
log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
TestQuickCheckPerCP() {
|
||||
UErrorCode errorCode;
|
||||
UChar32 c;
|
||||
UChar s[U16_MAX_LENGTH];
|
||||
int32_t length;
|
||||
UNormalizationCheckResult qc1, qc2;
|
||||
|
||||
if(
|
||||
u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
|
||||
u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
|
||||
u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
|
||||
u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE
|
||||
) {
|
||||
log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK)\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* compare the quick check property values for some code points
|
||||
* to the quick check results for checking same-code point strings
|
||||
*/
|
||||
errorCode=U_ZERO_ERROR;
|
||||
c=0;
|
||||
while(c<0x110000) {
|
||||
length=0;
|
||||
U16_APPEND_UNSAFE(s, length, c);
|
||||
|
||||
qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
|
||||
qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
|
||||
if(qc1!=qc2) {
|
||||
log_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x\n", qc1, qc2, c);
|
||||
}
|
||||
|
||||
qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
|
||||
qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
|
||||
if(qc1!=qc2) {
|
||||
log_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x\n", qc1, qc2, c);
|
||||
}
|
||||
|
||||
qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
|
||||
qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
|
||||
if(qc1!=qc2) {
|
||||
log_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x\n", qc1, qc2, c);
|
||||
}
|
||||
|
||||
qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
|
||||
qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
|
||||
if(qc1!=qc2) {
|
||||
log_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x\n", qc1, qc2, c);
|
||||
}
|
||||
|
||||
/* skip some code points */
|
||||
c=(20*c)/19+1;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2002-2003, International Business Machines
|
||||
* Copyright (C) 2002-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
|
@ -33,6 +33,7 @@
|
|||
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "unicode/unorm.h"
|
||||
|
||||
class AliasName {
|
||||
public:
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#!/bin/perl -w
|
||||
#*******************************************************************
|
||||
# COPYRIGHT:
|
||||
# Copyright (c) 2002-2003, International Business Machines Corporation and
|
||||
# Copyright (c) 2002-2004, International Business Machines Corporation and
|
||||
# others. All Rights Reserved.
|
||||
#*******************************************************************
|
||||
|
||||
|
@ -94,10 +94,6 @@ my %UNSUPPORTED = (Composition_Exclusion => 1,
|
|||
Expands_On_NFKD => 1,
|
||||
FC_NFKC_Closure => 1,
|
||||
ID_Start_Exceptions => 1,
|
||||
NFC_Quick_Check => 1,
|
||||
NFD_Quick_Check => 1,
|
||||
NFKC_Quick_Check => 1,
|
||||
NFKD_Quick_Check => 1,
|
||||
Special_Case_Condition => 1,
|
||||
);
|
||||
|
||||
|
@ -1199,6 +1195,25 @@ sub read_uchar {
|
|||
|
||||
$in->close();
|
||||
|
||||
# hardcode known values for the normalization quick check properties
|
||||
# see unorm.h for the UNormalizationCheckResult enum
|
||||
|
||||
addDatum($hash, 'NFC_QC', 'UNORM_NO', 'N');
|
||||
addDatum($hash, 'NFC_QC', 'UNORM_YES', 'Y');
|
||||
addDatum($hash, 'NFC_QC', 'UNORM_MAYBE', 'M');
|
||||
|
||||
addDatum($hash, 'NFKC_QC', 'UNORM_NO', 'N');
|
||||
addDatum($hash, 'NFKC_QC', 'UNORM_YES', 'Y');
|
||||
addDatum($hash, 'NFKC_QC', 'UNORM_MAYBE', 'M');
|
||||
|
||||
# no "maybe" values for NF[K]D
|
||||
|
||||
addDatum($hash, 'NFD_QC', 'UNORM_NO', 'N');
|
||||
addDatum($hash, 'NFD_QC', 'UNORM_YES', 'Y');
|
||||
|
||||
addDatum($hash, 'NFKD_QC', 'UNORM_NO', 'N');
|
||||
addDatum($hash, 'NFKD_QC', 'UNORM_YES', 'Y');
|
||||
|
||||
$hash;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue