ICU-2758 add APIs for FCD properties lccc & tccc

X-SVN-Rev: 14924
This commit is contained in:
Markus Scherer 2004-04-09 21:58:34 +00:00
parent 72ce99314f
commit efe870f9b6
3 changed files with 42 additions and 18 deletions

View file

@ -649,16 +649,16 @@ public class BasicTest extends TestFmwk {
public void TestQuickCheckPerCP() {
int c, lead, trail;
String s, nfd;
//int lccc1, lccc2, tccc1, tccc2;
int lccc1, lccc2, tccc1, tccc2;
int qc1, qc2;
if(
UCharacter.getIntPropertyMaxValue(UProperty.NFD_QUICK_CHECK)!=1 || // YES
UCharacter.getIntPropertyMaxValue(UProperty.NFKD_QUICK_CHECK)!=1 ||
UCharacter.getIntPropertyMaxValue(UProperty.NFC_QUICK_CHECK)!=2 || // MAYBE
UCharacter.getIntPropertyMaxValue(UProperty.NFKC_QUICK_CHECK)!=2/* ||
UCharacter.getIntPropertyMaxValue(UProperty.LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
UCharacter.getIntPropertyMaxValue(UProperty.TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)*/
UCharacter.getIntPropertyMaxValue(UProperty.NFKC_QUICK_CHECK)!=2 ||
UCharacter.getIntPropertyMaxValue(UProperty.LEAD_CANONICAL_COMBINING_CLASS)!=UCharacter.getIntPropertyMaxValue(UProperty.CANONICAL_COMBINING_CLASS) ||
UCharacter.getIntPropertyMaxValue(UProperty.TRAIL_CANONICAL_COMBINING_CLASS)!=UCharacter.getIntPropertyMaxValue(UProperty.CANONICAL_COMBINING_CLASS)
) {
errln("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS");
}
@ -694,25 +694,23 @@ public class BasicTest extends TestFmwk {
if(qc1!=qc2) {
errln("getIntPropertyValue(NFKD)="+qc1+" != "+qc2+"=quickCheck(NFKD) for U+"+Integer.toHexString(c));
}
/*
length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, LENGTHOF(nfd), &errorCode);
U16_GET(nfd, 0, 0, length, lead);
U16_GET(nfd, 0, length-1, length, trail);
lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
lccc2=u_getCombiningClass(lead);
tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
tccc2=u_getCombiningClass(trail);
nfd=Normalizer.normalize(s, Normalizer.NFD);
lead=UTF16.charAt(nfd, 0);
trail=UTF16.charAt(nfd, nfd.length()-1);
lccc1=UCharacter.getIntPropertyValue(c, UProperty.LEAD_CANONICAL_COMBINING_CLASS);
lccc2=UCharacter.getCombiningClass(lead);
tccc1=UCharacter.getIntPropertyValue(c, UProperty.TRAIL_CANONICAL_COMBINING_CLASS);
tccc2=UCharacter.getCombiningClass(trail);
if(lccc1!=lccc2) {
log_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
lccc1, lccc2, c);
errln("getIntPropertyValue(lccc)="+lccc1+" != "+lccc2+"=getCombiningClass(lead) for U+"+Integer.toHexString(c));
}
if(tccc1!=tccc2) {
log_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
tccc1, tccc2, c);
errln("getIntPropertyValue(tccc)="+tccc1+" != "+tccc2+"=getCombiningClass(trail) for U+"+Integer.toHexString(c));
}
*/
/* skip some code points */
c=(20*c)/19+1;
}

View file

@ -4334,6 +4334,10 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
case UProperty.NFC_QUICK_CHECK:
case UProperty.NFKC_QUICK_CHECK:
return NormalizerImpl.quickCheck(ch, (type-UProperty.NFD_QUICK_CHECK)+2); // 2=UNORM_NFD
case UProperty.LEAD_CANONICAL_COMBINING_CLASS:
return NormalizerImpl.getFCD16(ch)>>8;
case UProperty.TRAIL_CANONICAL_COMBINING_CLASS:
return NormalizerImpl.getFCD16(ch)&0xff;
default:
return 0; /* undefined */
@ -4415,6 +4419,8 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
& BLOCK_MASK_) >> BLOCK_SHIFT_;
return (max!=0) ? max : UnicodeBlock.COUNT - 1;
case UProperty.CANONICAL_COMBINING_CLASS:
case UProperty.LEAD_CANONICAL_COMBINING_CLASS:
case UProperty.TRAIL_CANONICAL_COMBINING_CLASS:
return 0xff; // TODO do we need to be more precise,
// getting the actual maximum?
case UProperty.DECOMPOSITION_TYPE:

View file

@ -402,12 +402,32 @@ public interface UProperty
* @draft ICU 3.0
*/
public static final int NFKC_QUICK_CHECK = 0x100F;
/**
* Enumerated property Lead_Canonical_Combining_Class.
* ICU-specific property for the ccc of the first code point
* of the decomposition, or lccc(c)=ccc(NFD(c)[0]).
* Useful for checking for canonically ordered text;
* see Normalizer.FCD and http://www.unicode.org/notes/tn5/#FCD .
* Returns 8-bit numeric values like CANONICAL_COMBINING_CLASS.
* @draft ICU 3.0
*/
public static final int LEAD_CANONICAL_COMBINING_CLASS = 0x1010;
/**
* Enumerated property Trail_Canonical_Combining_Class.
* ICU-specific property for the ccc of the last code point
* of the decomposition, or lccc(c)=ccc(NFD(c)[last]).
* Useful for checking for canonically ordered text;
* see Normalizer.FCD and http://www.unicode.org/notes/tn5/#FCD .
* Returns 8-bit numeric values like CANONICAL_COMBINING_CLASS.
* @draft ICU 3.0
*/
public static final int TRAIL_CANONICAL_COMBINING_CLASS = 0x1011;
/**
* One more than the last constant for enumerated/integer Unicode
* properties.
* @draft ICU 2.4
*/
public static final int INT_LIMIT = 0x1010;
public static final int INT_LIMIT = 0x1012;
/**
* Bitmask property General_Category_Mask.