mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-06 05:55:35 +00:00
ICU-7182 remove support for Unicode Normalization behavior before corrigendum 5 (PRI #29)
X-SVN-Rev: 26952
This commit is contained in:
parent
3b12074b40
commit
9fca8644b5
5 changed files with 34 additions and 119 deletions
|
@ -2090,8 +2090,8 @@ _recompose(UCharBuffer &buffer, int32_t options, const UnicodeSet *nx) {
|
|||
if((combineFlags&_NORM_COMBINES_BACK) && starter!=NULL) {
|
||||
if(combineBackIndex&0x8000) {
|
||||
/* c is a Jamo V/T, see if we can compose it with the previous character */
|
||||
/* for the PRI #29 fix, check that there is no intervening combining mark */
|
||||
if((options&UNORM_BEFORE_PRI_29) || prevCC==0) {
|
||||
/* check that there is no intervening combining mark */
|
||||
if(prevCC==0) {
|
||||
pRemove=NULL; /* NULL while no Hangul composition */
|
||||
combineFlags=0;
|
||||
c2=*starter;
|
||||
|
@ -2104,9 +2104,6 @@ _recompose(UCharBuffer &buffer, int32_t options, const UnicodeSet *nx) {
|
|||
if(p!=limit && (c2=(UChar)(*p-JAMO_T_BASE))<JAMO_T_COUNT) {
|
||||
++p;
|
||||
c+=c2;
|
||||
} else {
|
||||
/* the result is an LV syllable, which is a starter (unlike LVT) */
|
||||
combineFlags=_NORM_COMBINES_FWD;
|
||||
}
|
||||
if(!nx_contains(nx, c)) {
|
||||
*starter=c;
|
||||
|
@ -2119,26 +2116,13 @@ _recompose(UCharBuffer &buffer, int32_t options, const UnicodeSet *nx) {
|
|||
pRemove=NULL;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
/*
|
||||
* Normally, the following can not occur:
|
||||
* No "else" for Jamo T:
|
||||
* Since the input is in NFD, there are no Hangul LV syllables that
|
||||
* a Jamo T could combine with.
|
||||
* All Jamo Ts are combined above when handling Jamo Vs.
|
||||
*
|
||||
* However, before the PRI #29 fix, this can occur due to
|
||||
* an intervening combining mark between the Hangul LV and the Jamo T.
|
||||
*/
|
||||
} else {
|
||||
/* Jamo T, compose with previous Hangul that does not have a Jamo T */
|
||||
if(isHangulWithoutJamoT(c2)) {
|
||||
c2+=(UChar)(c-JAMO_T_BASE);
|
||||
if(!nx_contains(nx, c2)) {
|
||||
pRemove=p-1;
|
||||
*starter=c2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(pRemove!=NULL) {
|
||||
/* remove the Jamo(s) */
|
||||
|
@ -2152,41 +2136,22 @@ _recompose(UCharBuffer &buffer, int32_t options, const UnicodeSet *nx) {
|
|||
}
|
||||
|
||||
c2=0; /* c2 held *starter temporarily */
|
||||
|
||||
if(combineFlags!=0) {
|
||||
/*
|
||||
* not starter=NULL because the composition is a Hangul LV syllable
|
||||
* and might combine once more (but only before the PRI #29 fix)
|
||||
*/
|
||||
|
||||
/* done? */
|
||||
if(p==limit) {
|
||||
return prevCC;
|
||||
}
|
||||
|
||||
/* the composition is a Hangul LV syllable which is a starter that combines forward */
|
||||
combineFwdIndex=0xfff0;
|
||||
|
||||
/* we combined; continue with looking for compositions */
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
prevCC=0;
|
||||
}
|
||||
|
||||
/*
|
||||
* now: cc==0 and the combining index does not include "forward" ->
|
||||
* the rest of the loop body will reset starter to NULL;
|
||||
* technically, a composed Hangul syllable is a starter, but it
|
||||
* does not combine forward now that we have consumed all eligible Jamos;
|
||||
* for Jamo V/T, combineFlags does not contain _NORM_COMBINES_FWD
|
||||
*/
|
||||
/* done? */
|
||||
if(p==limit) {
|
||||
return prevCC;
|
||||
}
|
||||
|
||||
starter=NULL;
|
||||
continue;
|
||||
} else if(
|
||||
/* the starter is not a Hangul LV or Jamo V/T and */
|
||||
!(combineFwdIndex&0x8000) &&
|
||||
/* the combining mark is not blocked and */
|
||||
((options&UNORM_BEFORE_PRI_29) ?
|
||||
(prevCC!=cc || prevCC==0) :
|
||||
(prevCC<cc || prevCC==0)) &&
|
||||
(prevCC<cc || prevCC==0) &&
|
||||
/* the starter and the combining mark (c, c2) do combine and */
|
||||
0!=(result=_combine(combiningTable+combineFwdIndex, combineBackIndex, value, value2)) &&
|
||||
/* the composition result is not excluded */
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001-2008, International Business Machines
|
||||
* Copyright (C) 2001-2009, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -188,15 +188,6 @@ enum {
|
|||
UNORM_NX_HANGUL=1,
|
||||
/** Options bit 1, do not decompose CJK compatibility characters. @draft ICU 2.6 */
|
||||
UNORM_NX_CJK_COMPAT=2,
|
||||
/**
|
||||
* Options bit 8, use buggy recomposition described in
|
||||
* Unicode Public Review Issue #29
|
||||
* at http://www.unicode.org/review/resolved-pri.html#pri29
|
||||
*
|
||||
* Used in IDNA implementation according to strict interpretation
|
||||
* of IDNA definition based on Unicode 3.2 which predates PRI #29.
|
||||
*/
|
||||
UNORM_BEFORE_PRI_29=0x100
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -632,20 +632,9 @@ static int32_t
|
|||
usprep_normalize( const UChar* src, int32_t srcLength,
|
||||
UChar* dest, int32_t destCapacity,
|
||||
UErrorCode* status ){
|
||||
/*
|
||||
* Option UNORM_BEFORE_PRI_29:
|
||||
*
|
||||
* IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
|
||||
* requires strict adherence to Unicode 3.2 normalization,
|
||||
* including buggy composition from before fixing Public Review Issue #29.
|
||||
* Note that this results in some valid but nonsensical text to be
|
||||
* either corrupted or rejected, depending on the text.
|
||||
* See http://www.unicode.org/review/resolved-pri.html#pri29
|
||||
* See unorm.cpp and cnormtst.c
|
||||
*/
|
||||
return unorm_normalize(
|
||||
src, srcLength,
|
||||
UNORM_NFKC, UNORM_UNICODE_3_2|UNORM_BEFORE_PRI_29,
|
||||
UNORM_NFKC, UNORM_UNICODE_3_2,
|
||||
dest, destCapacity,
|
||||
status);
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2007, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2009, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
/********************************************************************************
|
||||
|
@ -122,21 +122,21 @@ void addNormTest(TestNode** root);
|
|||
|
||||
void addNormTest(TestNode** root)
|
||||
{
|
||||
addTest(root, &TestAPI, "tscoll/cnormtst/TestAPI");
|
||||
addTest(root, &TestDecomp, "tscoll/cnormtst/TestDecomp");
|
||||
addTest(root, &TestCompatDecomp, "tscoll/cnormtst/TestCompatDecomp");
|
||||
addTest(root, &TestCanonDecompCompose, "tscoll/cnormtst/TestCanonDecompCompose");
|
||||
addTest(root, &TestCompatDecompCompose, "tscoll/cnormtst/CompatDecompCompose");
|
||||
addTest(root, &TestNull, "tscoll/cnormtst/TestNull");
|
||||
addTest(root, &TestQuickCheck, "tscoll/cnormtst/TestQuickCheck");
|
||||
addTest(root, &TestQuickCheckPerCP, "tscoll/cnormtst/TestQuickCheckPerCP");
|
||||
addTest(root, &TestIsNormalized, "tscoll/cnormtst/TestIsNormalized");
|
||||
addTest(root, &TestCheckFCD, "tscoll/cnormtst/TestCheckFCD");
|
||||
addTest(root, &TestNormCoverage, "tscoll/cnormtst/TestNormCoverage");
|
||||
addTest(root, &TestConcatenate, "tscoll/cnormtst/TestConcatenate");
|
||||
addTest(root, &TestNextPrevious, "tscoll/cnormtst/TestNextPrevious");
|
||||
addTest(root, &TestFCNFKCClosure, "tscoll/cnormtst/TestFCNFKCClosure");
|
||||
addTest(root, &TestComposition, "tscoll/cnormtst/TestComposition");
|
||||
addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI");
|
||||
addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp");
|
||||
addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp");
|
||||
addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose");
|
||||
addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/CompatDecompCompose");
|
||||
addTest(root, &TestNull, "tsnorm/cnormtst/TestNull");
|
||||
addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck");
|
||||
addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP");
|
||||
addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized");
|
||||
addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD");
|
||||
addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage");
|
||||
addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate");
|
||||
addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious");
|
||||
addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
|
||||
addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
|
||||
}
|
||||
|
||||
void TestDecomp()
|
||||
|
@ -1478,19 +1478,15 @@ TestComposition(void) {
|
|||
} cases[]={
|
||||
/*
|
||||
* special cases for UAX #15 bug
|
||||
* see Unicode Public Review Issue #29
|
||||
* at http://www.unicode.org/review/resolved-pri.html#pri29
|
||||
* see Unicode Corrigendum #5: Normalization Idempotency
|
||||
* at http://unicode.org/versions/corrigendum5.html
|
||||
* (was Public Review Issue #29)
|
||||
*/
|
||||
{ UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0x1100, 0x0300, 0x1161, 0x0327 } },
|
||||
{ UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
|
||||
{ UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac00, 0x0327, 0x0300, 0x11a8 } },
|
||||
{ UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b47, 0x0300, 0x0b3e } },
|
||||
|
||||
{ UNORM_NFC, UNORM_BEFORE_PRI_29, { 0x1100, 0x0300, 0x1161, 0x0327 }, { 0xac00, 0x0300, 0x0327 } },
|
||||
{ UNORM_NFC, UNORM_BEFORE_PRI_29, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0xac01, 0x0300, 0x0327 } },
|
||||
{ UNORM_NFC, UNORM_BEFORE_PRI_29, { 0xac00, 0x0300, 0x0327, 0x11a8 }, { 0xac01, 0x0327, 0x0300 } },
|
||||
{ UNORM_NFC, UNORM_BEFORE_PRI_29, { 0x0b47, 0x0300, 0x0b3e }, { 0x0b4b, 0x0300 } }
|
||||
|
||||
/* TODO: add test cases for UNORM_FCC here (j2151) */
|
||||
};
|
||||
|
||||
|
|
|
@ -33,7 +33,6 @@ static void TestToASCII(void);
|
|||
static void TestIDNToUnicode(void);
|
||||
static void TestIDNToASCII(void);
|
||||
static void TestCompare(void);
|
||||
static void TestUnicode32Norm(void);
|
||||
static void TestJB4490(void);
|
||||
static void TestJB4475(void);
|
||||
static void TestLength(void);
|
||||
|
@ -61,7 +60,6 @@ addIDNATest(TestNode** root)
|
|||
addTest(root, &TestIDNToUnicode, "idna/TestIDNToUnicode");
|
||||
addTest(root, &TestIDNToASCII, "idna/TestIDNToASCII");
|
||||
addTest(root, &TestCompare, "idna/TestCompare");
|
||||
addTest(root, &TestUnicode32Norm,"idna/TestUnicode32Norm");
|
||||
addTest(root, &TestJB4490, "idna/TestJB4490");
|
||||
addTest(root, &TestJB4475, "idna/TestJB4475");
|
||||
addTest(root, &TestLength, "idna/TestLength");
|
||||
|
@ -639,30 +637,6 @@ TestCompare(){
|
|||
}
|
||||
}
|
||||
|
||||
static void TestUnicode32Norm() {
|
||||
/*
|
||||
* test Unicode 3.2 normalization, before Public Review Issue #29
|
||||
* see cnormtst.c TestComposition()
|
||||
*/
|
||||
static const UChar strings[][8]={
|
||||
{ 0x1100, 0x0300, 0x1161, 0x0327 },
|
||||
{ 0x0b47, 0x0300, 0x0b3e, 0x0327 }
|
||||
};
|
||||
|
||||
UChar ascii[20], unicode[20];
|
||||
int32_t i, length;
|
||||
UErrorCode errorCode;
|
||||
|
||||
for(i=0; i<LENGTHOF(strings); ++i) {
|
||||
errorCode=U_ZERO_ERROR;
|
||||
length=uidna_toASCII(strings[i], -1, ascii, LENGTHOF(ascii), 0, NULL, &errorCode);
|
||||
length=uidna_toUnicode(ascii, length, unicode, LENGTHOF(unicode), 0, NULL, &errorCode);
|
||||
if(u_strncmp(ascii, unicode, length)!=0) {
|
||||
log_err("Did not get the correct output\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void TestJB4490(){
|
||||
static const UChar data[][50]= {
|
||||
{0x00F5,0x00dE,0x00dF,0x00dD, 0x0000},
|
||||
|
|
Loading…
Add table
Reference in a new issue