mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-2021 another fix for canonical caseless match, still prototyping
X-SVN-Rev: 11209
This commit is contained in:
parent
e9e4feea24
commit
353f8ee0eb
3 changed files with 226 additions and 121 deletions
|
@ -32,6 +32,8 @@
|
|||
#include "ustr_imp.h"
|
||||
#include "uprops.h"
|
||||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
/* statically loaded Unicode character properties -------------------------- */
|
||||
|
||||
/* MACHINE-GENERATED: Do not edit (see com.ibm.icu.dev.tools.translit.UnicodeSetCloseOver) */
|
||||
|
@ -1912,6 +1914,54 @@ u_foldCase(UChar32 c, uint32_t options) {
|
|||
return c; /* no mapping - return c itself */
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* ### TODO Turkic-i case folding prototype, j2021 */
|
||||
enum {
|
||||
FOLD_T_LENGTH=3
|
||||
};
|
||||
|
||||
/*
|
||||
* Turkic full case foldings.
|
||||
* First UChar is the source, second the default mapping,
|
||||
* then the Turkic mapping.
|
||||
*/
|
||||
static const UChar fold_T[][2+FOLD_T_LENGTH]={
|
||||
{ 0x0049, 0x0069, 0x131 },
|
||||
{ 0x0069, 0x0069, 0x131, 0x307 },
|
||||
{ 0x00cc, 0x00ec, 0x131, 0x300 },
|
||||
{ 0x00cd, 0x00ed, 0x131, 0x301 },
|
||||
{ 0x00ce, 0x00ee, 0x131, 0x302 },
|
||||
{ 0x00cf, 0x00ef, 0x131, 0x308 },
|
||||
{ 0x00ec, 0x00ec, 0x131, 0x300 },
|
||||
{ 0x00ed, 0x00ed, 0x131, 0x301 },
|
||||
{ 0x00ee, 0x00ee, 0x131, 0x302 },
|
||||
{ 0x00ef, 0x00ef, 0x131, 0x308 },
|
||||
{ 0x0128, 0x0129, 0x131, 0x303 },
|
||||
{ 0x0129, 0x0129, 0x131, 0x303 },
|
||||
{ 0x012a, 0x012b, 0x131, 0x304 },
|
||||
{ 0x012b, 0x012b, 0x131, 0x304 },
|
||||
{ 0x012c, 0x012d, 0x131, 0x306 },
|
||||
{ 0x012d, 0x012d, 0x131, 0x306 },
|
||||
{ 0x012e, 0x012f, 0x131, 0x328 },
|
||||
{ 0x012f, 0x012f, 0x131, 0x328 },
|
||||
{ 0x0130, 0, 0x131, 0x307 }, /* normal mapping is 0069 0307 */
|
||||
{ 0x01cf, 0x01d0, 0x131, 0x30c },
|
||||
{ 0x01d0, 0x01d0, 0x131, 0x30c },
|
||||
{ 0x0208, 0x0209, 0x131, 0x30f },
|
||||
{ 0x0209, 0x0209, 0x131, 0x30f },
|
||||
{ 0x020a, 0x020b, 0x131, 0x311 },
|
||||
{ 0x020b, 0x020b, 0x131, 0x311 },
|
||||
{ 0x1e2c, 0x1e2d, 0x131, 0x330 },
|
||||
{ 0x1e2d, 0x1e2d, 0x131, 0x330 },
|
||||
{ 0x1e2e, 0x1e2f, 0x131, 0x308, 0x301 },
|
||||
{ 0x1e2f, 0x1e2f, 0x131, 0x308, 0x301 },
|
||||
{ 0x1ec8, 0x1ec9, 0x131, 0x309 },
|
||||
{ 0x1ec9, 0x1ec9, 0x131, 0x309 },
|
||||
{ 0x1eca, 0x1ecb, 0x131, 0x323 },
|
||||
{ 0x1ecb, 0x1ecb, 0x131, 0x323 }
|
||||
};
|
||||
#endif
|
||||
|
||||
/* internal, see ustr_imp.h */
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_internalFoldCase(UChar32 c,
|
||||
|
@ -1966,9 +2016,37 @@ u_internalFoldCase(UChar32 c,
|
|||
dest[1]=0x307;
|
||||
}
|
||||
return 2;
|
||||
#if 0
|
||||
/* ### TODO Turkic-i case folding prototype, j2021 */
|
||||
} else if(c<=fold_T[LENGTHOF(fold_T)-1][0]) {
|
||||
for(i=0; i<LENGTHOF(fold_T) && c>=fold_T[i][0]; ++i) {
|
||||
if(c==fold_T[i][0]) {
|
||||
result=fold_T[i][1];
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
/* Turkic mappings */
|
||||
#if 0
|
||||
/* ### TODO Turkic-i case folding prototype, j2021 */
|
||||
if(c<=fold_T[LENGTHOF(fold_T)-1][0]) {
|
||||
for(i=0; i<LENGTHOF(fold_T) && c>=fold_T[i][0]; ++i) {
|
||||
if(c==fold_T[i][0]) {
|
||||
const UChar *p=&(fold_T[i][2]);
|
||||
length=0;
|
||||
while(length<FOLD_T_LENGTH && *p!=0) {
|
||||
if(length<destCapacity) {
|
||||
dest[length]=*p++;
|
||||
}
|
||||
++length;
|
||||
}
|
||||
return length;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
if(c==0x49) {
|
||||
/* 0049; T; 0131; # LATIN CAPITAL LETTER I */
|
||||
result=0x131;
|
||||
|
@ -1976,6 +2054,7 @@ u_internalFoldCase(UChar32 c,
|
|||
/* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
|
||||
result=0x69;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
/* return c itself because there is no special mapping for it */
|
||||
/* goto single; */
|
||||
|
|
|
@ -3100,41 +3100,39 @@ unorm_compose(UChar *dest, int32_t destCapacity,
|
|||
/**
|
||||
* Internal API for normalizing.
|
||||
* Does not check for bad input.
|
||||
* Requires _haveData() to be true.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
static int32_t
|
||||
unorm_internalNormalize(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UNormalizationMode mode, int32_t options,
|
||||
UNormalizationMode mode, const UnicodeSet *nx,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UnicodeSet *nx;
|
||||
int32_t destLength;
|
||||
uint8_t trailCC;
|
||||
|
||||
switch(mode) {
|
||||
case UNORM_NFD:
|
||||
return unorm_decompose(dest, destCapacity,
|
||||
src, srcLength,
|
||||
FALSE, options,
|
||||
pErrorCode);
|
||||
destLength=_decompose(dest, destCapacity,
|
||||
src, srcLength,
|
||||
FALSE, nx, trailCC);
|
||||
break;
|
||||
case UNORM_NFKD:
|
||||
return unorm_decompose(dest, destCapacity,
|
||||
src, srcLength,
|
||||
TRUE, options,
|
||||
pErrorCode);
|
||||
destLength=_decompose(dest, destCapacity,
|
||||
src, srcLength,
|
||||
TRUE, nx, trailCC);
|
||||
break;
|
||||
case UNORM_NFC:
|
||||
return unorm_compose(dest, destCapacity,
|
||||
src, srcLength,
|
||||
FALSE, options,
|
||||
pErrorCode);
|
||||
destLength=_compose(dest, destCapacity,
|
||||
src, srcLength,
|
||||
FALSE, nx, pErrorCode);
|
||||
break;
|
||||
case UNORM_NFKC:
|
||||
return unorm_compose(dest, destCapacity,
|
||||
src, srcLength,
|
||||
TRUE, options,
|
||||
pErrorCode);
|
||||
destLength=_compose(dest, destCapacity,
|
||||
src, srcLength,
|
||||
TRUE, nx, pErrorCode);
|
||||
break;
|
||||
case UNORM_FCD:
|
||||
nx=getNX(options, *pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
return unorm_makeFCD(dest, destCapacity,
|
||||
src, srcLength,
|
||||
nx,
|
||||
|
@ -3147,11 +3145,41 @@ unorm_internalNormalize(UChar *dest, int32_t destCapacity,
|
|||
if(srcLength>0 && srcLength<=destCapacity) {
|
||||
uprv_memcpy(dest, src, srcLength*U_SIZEOF_UCHAR);
|
||||
}
|
||||
return u_terminateUChars(dest, destCapacity, srcLength, pErrorCode);
|
||||
destLength=srcLength;
|
||||
break;
|
||||
default:
|
||||
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal API for normalizing.
|
||||
* Does not check for bad input.
|
||||
* @internal
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
unorm_internalNormalize(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
UNormalizationMode mode, int32_t options,
|
||||
UErrorCode *pErrorCode) {
|
||||
const UnicodeSet *nx;
|
||||
|
||||
if(!_haveData(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
nx=getNX(options, *pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return unorm_internalNormalize(dest, destCapacity,
|
||||
src, srcLength,
|
||||
mode, nx,
|
||||
pErrorCode);
|
||||
}
|
||||
|
||||
/** Public API for normalizing. */
|
||||
|
@ -4300,6 +4328,7 @@ unorm_compare(const UChar *s1, int32_t length1,
|
|||
UChar fcd1[300], fcd2[300];
|
||||
UChar *d1, *d2;
|
||||
const UnicodeSet *nx;
|
||||
UNormalizationMode mode;
|
||||
int32_t result;
|
||||
|
||||
/* argument checking */
|
||||
|
@ -4341,28 +4370,46 @@ unorm_compare(const UChar *s1, int32_t length1,
|
|||
* case-folding preserves the FCD-ness of a string.
|
||||
* The outer normalization is then only performed by unorm_cmpEquivFold()
|
||||
* when there is a difference.
|
||||
*
|
||||
* Exception: When using the Turkic case-folding option, we do perform
|
||||
* full NFD first. This is because in the Turkic case precomposed characters
|
||||
* with 0049 capital I or 0069 small i fold differently whether they
|
||||
* are first decomposed or not, so an FCD check - a check only for
|
||||
* canonical order - is not sufficient.
|
||||
*/
|
||||
if(options&U_FOLD_CASE_EXCLUDE_SPECIAL_I) {
|
||||
mode=UNORM_NFD;
|
||||
options&=~UNORM_INPUT_IS_FCD;
|
||||
} else {
|
||||
mode=UNORM_FCD;
|
||||
}
|
||||
|
||||
if(!(options&UNORM_INPUT_IS_FCD)) {
|
||||
int32_t _len1, _len2;
|
||||
UBool isFCD1, isFCD2;
|
||||
|
||||
// check if s1 and/or s2 fulfill the FCD conditions
|
||||
isFCD1=unorm_checkFCD(s1, length1, nx);
|
||||
isFCD2=unorm_checkFCD(s2, length2, nx);
|
||||
isFCD1= UNORM_YES==_quickCheck(s1, length1, mode, TRUE, nx, pErrorCode);
|
||||
isFCD2= UNORM_YES==_quickCheck(s2, length2, mode, TRUE, nx, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(!isFCD1 && !isFCD2) {
|
||||
// if both strings need normalization then make them NFD right away and
|
||||
// turn off normalization in the comparison function
|
||||
uint8_t trailCC;
|
||||
/*
|
||||
* ICU 2.4 had a further optimization:
|
||||
* If both strings were not in FCD, then they were both NFD'ed,
|
||||
* and the _COMPARE_EQUIV option was turned off.
|
||||
* It is not entirely clear that this is valid with the current
|
||||
* definition of the canonical caseless match.
|
||||
* Therefore, ICU 2.6 removes that optimization.
|
||||
*/
|
||||
|
||||
// fully decompose (NFD) s1 and s2
|
||||
|
||||
_len1=_decompose(fcd1, sizeof(fcd1)/U_SIZEOF_UCHAR,
|
||||
s1, length1,
|
||||
FALSE, nx,
|
||||
trailCC);
|
||||
if(_len1<=(int32_t)(sizeof(fcd1)/U_SIZEOF_UCHAR)) {
|
||||
if(!isFCD1) {
|
||||
_len1=unorm_internalNormalize(fcd1, LENGTHOF(fcd1),
|
||||
s1, length1,
|
||||
mode, nx,
|
||||
pErrorCode);
|
||||
if(*pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
|
||||
s1=fcd1;
|
||||
} else {
|
||||
d1=(UChar *)uprv_malloc(_len1*U_SIZEOF_UCHAR);
|
||||
|
@ -4371,20 +4418,26 @@ unorm_compare(const UChar *s1, int32_t length1,
|
|||
goto cleanup;
|
||||
}
|
||||
|
||||
_len1=_decompose(d1, _len1,
|
||||
s1, length1,
|
||||
FALSE, nx,
|
||||
trailCC);
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
_len1=unorm_internalNormalize(d1, _len1,
|
||||
s1, length1,
|
||||
mode, nx,
|
||||
pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
s1=d1;
|
||||
}
|
||||
length1=_len1;
|
||||
}
|
||||
|
||||
_len2=_decompose(fcd2, sizeof(fcd2)/U_SIZEOF_UCHAR,
|
||||
s2, length2,
|
||||
FALSE, nx,
|
||||
trailCC);
|
||||
if(_len2<=(int32_t)(sizeof(fcd2)/U_SIZEOF_UCHAR)) {
|
||||
if(!isFCD2) {
|
||||
_len2=unorm_internalNormalize(fcd2, LENGTHOF(fcd2),
|
||||
s2, length2,
|
||||
mode, nx,
|
||||
pErrorCode);
|
||||
if(*pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
|
||||
s2=fcd2;
|
||||
} else {
|
||||
d2=(UChar *)uprv_malloc(_len2*U_SIZEOF_UCHAR);
|
||||
|
@ -4393,85 +4446,22 @@ unorm_compare(const UChar *s1, int32_t length1,
|
|||
goto cleanup;
|
||||
}
|
||||
|
||||
_len2=_decompose(d2, _len2,
|
||||
s2, length2,
|
||||
FALSE, nx,
|
||||
trailCC);
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
_len2=unorm_internalNormalize(d2, _len2,
|
||||
s2, length2,
|
||||
mode, nx,
|
||||
pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
s2=d2;
|
||||
}
|
||||
length2=_len2;
|
||||
|
||||
// compare NFD strings
|
||||
options&=~_COMPARE_EQUIV;
|
||||
} else {
|
||||
// if at least one string is already in FCD then only makeFCD the other
|
||||
// and compare for equivalence
|
||||
if(!isFCD1) {
|
||||
_len1=unorm_makeFCD(fcd1, sizeof(fcd1)/U_SIZEOF_UCHAR,
|
||||
s1, length1,
|
||||
nx,
|
||||
pErrorCode);
|
||||
if(*pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
|
||||
s1=fcd1;
|
||||
} else {
|
||||
d1=(UChar *)uprv_malloc(_len1*U_SIZEOF_UCHAR);
|
||||
if(d1==0) {
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
_len1=unorm_makeFCD(d1, _len1,
|
||||
s1, length1,
|
||||
nx,
|
||||
pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
s1=d1;
|
||||
}
|
||||
length1=_len1;
|
||||
}
|
||||
|
||||
if(!isFCD2) {
|
||||
_len2=unorm_makeFCD(fcd2, sizeof(fcd2)/U_SIZEOF_UCHAR,
|
||||
s2, length2,
|
||||
nx,
|
||||
pErrorCode);
|
||||
if(*pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
|
||||
s2=fcd2;
|
||||
} else {
|
||||
d2=(UChar *)uprv_malloc(_len2*U_SIZEOF_UCHAR);
|
||||
if(d2==0) {
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
_len2=unorm_makeFCD(d2, _len2,
|
||||
s2, length2,
|
||||
nx,
|
||||
pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
s2=d2;
|
||||
}
|
||||
length2=_len2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
// do nothing
|
||||
} else if(!(options&(_COMPARE_EQUIV|U_COMPARE_IGNORE_CASE))) {
|
||||
// compare NFD strings case-sensitive: just use normal comparison
|
||||
result=uprv_strCompare(s1, length1, s2, length2,
|
||||
FALSE, (UBool)(0!=(options&U_COMPARE_CODE_POINT_ORDER)));
|
||||
} else {
|
||||
if(U_SUCCESS(*pErrorCode)) {
|
||||
result=unorm_cmpEquivFold(s1, length1, s2, length2, options, pErrorCode);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# CaseFolding-3.2.0.txt
|
||||
# Date: 2002-03-22,20:54:33 GMT [MD]
|
||||
# CaseFolding-4.0.0.txt
|
||||
# Date: 2003-02-14,16:49:03 GMT [MD]
|
||||
#
|
||||
# Case Folding Properties
|
||||
#
|
||||
|
@ -70,6 +70,7 @@
|
|||
0058; C; 0078; # LATIN CAPITAL LETTER X
|
||||
0059; C; 0079; # LATIN CAPITAL LETTER Y
|
||||
005A; C; 007A; # LATIN CAPITAL LETTER Z
|
||||
###0069; T; 0131 0307; # LATIN SMALL LETTER I
|
||||
00B5; C; 03BC; # MICRO SIGN
|
||||
00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
|
@ -84,9 +85,13 @@
|
|||
00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
###00CC; T; 0131 0300; # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
###00CD; T; 0131 0301; # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
###00CE; T; 0131 0302; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
###00CF; T; 0131 0308; # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
00D0; C; 00F0; # LATIN CAPITAL LETTER ETH
|
||||
00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE
|
||||
00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
|
@ -102,6 +107,10 @@
|
|||
00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
00DE; C; 00FE; # LATIN CAPITAL LETTER THORN
|
||||
00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
|
||||
###00EC; T; 0131 0300; # LATIN SMALL LETTER I WITH GRAVE
|
||||
###00ED; T; 0131 0301; # LATIN SMALL LETTER I WITH ACUTE
|
||||
###00EE; T; 0131 0302; # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
###00EF; T; 0131 0308; # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON
|
||||
0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE
|
||||
0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK
|
||||
|
@ -123,11 +132,20 @@
|
|||
0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
|
||||
0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE
|
||||
0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE
|
||||
###0128; T; 0131 0303; # LATIN CAPITAL LETTER I WITH TILDE
|
||||
###0129; T; 0131 0303; # LATIN SMALL LETTER I WITH TILDE
|
||||
012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON
|
||||
###012A; T; 0131 0304; # LATIN CAPITAL LETTER I WITH MACRON
|
||||
###012B; T; 0131 0304; # LATIN SMALL LETTER I WITH MACRON
|
||||
012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE
|
||||
###012C; T; 0131 0306; # LATIN CAPITAL LETTER I WITH BREVE
|
||||
###012D; T; 0131 0306; # LATIN SMALL LETTER I WITH BREVE
|
||||
012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
###012E; T; 0131 0328; # LATIN CAPITAL LETTER I WITH OGONEK
|
||||
###012F; T; 0131 0328; # LATIN SMALL LETTER I WITH OGONEK
|
||||
0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
###0130; T; 0131 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
0132; C; 0133; # LATIN CAPITAL LIGATURE IJ
|
||||
0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
|
||||
0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA
|
||||
|
@ -212,6 +230,8 @@
|
|||
01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
|
||||
01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON
|
||||
01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON
|
||||
###01CF; T; 0131 030C; # LATIN CAPITAL LETTER I WITH CARON
|
||||
###01D0; T; 0131 030C; # LATIN SMALL LETTER I WITH CARON
|
||||
01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON
|
||||
01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON
|
||||
01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
||||
|
@ -242,7 +262,11 @@
|
|||
0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
|
||||
0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE
|
||||
0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
|
||||
###0208; T; 0131 030F; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
|
||||
###0209; T; 0131 030F; # LATIN SMALL LETTER I WITH DOUBLE GRAVE
|
||||
020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE
|
||||
###020A; T; 0131 0311; # LATIN CAPITAL LETTER I WITH INVERTED BREVE
|
||||
###020B; T; 0131 0311; # LATIN SMALL LETTER I WITH INVERTED BREVE
|
||||
020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
|
||||
020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE
|
||||
0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
|
||||
|
@ -318,9 +342,11 @@
|
|||
03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI
|
||||
03F0; C; 03BA; # GREEK KAPPA SYMBOL
|
||||
03F1; C; 03C1; # GREEK RHO SYMBOL
|
||||
03F2; C; 03C3; # GREEK LUNATE SIGMA SYMBOL
|
||||
03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL
|
||||
03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL
|
||||
#03F7; C; 03F8; # GREEK CAPITAL LETTER SHO
|
||||
#03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL
|
||||
#03FA; C; 03FB; # GREEK CAPITAL LETTER SAN
|
||||
0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE
|
||||
0401; C; 0451; # CYRILLIC CAPITAL LETTER IO
|
||||
0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE
|
||||
|
@ -510,7 +536,11 @@
|
|||
1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA
|
||||
1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW
|
||||
1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW
|
||||
###1E2C; T; 0131 0330; # LATIN CAPITAL LETTER I WITH TILDE BELOW
|
||||
###1E2D; T; 0131 0330; # LATIN SMALL LETTER I WITH TILDE BELOW
|
||||
1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
|
||||
###1E2E; T; 0131 0308 0301; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
|
||||
###1E2F; T; 0131 0308 0301; # LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE
|
||||
1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE
|
||||
1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW
|
||||
1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW
|
||||
|
@ -589,7 +619,11 @@
|
|||
1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
|
||||
1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
|
||||
1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE
|
||||
###1EC8; T; 0131 0309; # LATIN CAPITAL LETTER I WITH HOOK ABOVE
|
||||
###1EC9; T; 0131 0309; # LATIN SMALL LETTER I WITH HOOK ABOVE
|
||||
1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW
|
||||
###1ECA; T; 0131 0323; # LATIN CAPITAL LETTER I WITH DOT BELOW
|
||||
###1ECB; T; 0131 0323; # LATIN SMALL LETTER I WITH DOT BELOW
|
||||
1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW
|
||||
1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE
|
||||
1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
|
||||
|
@ -910,3 +944,5 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
|
|||
10423; C; 1044B; # DESERET CAPITAL LETTER EM
|
||||
10424; C; 1044C; # DESERET CAPITAL LETTER EN
|
||||
10425; C; 1044D; # DESERET CAPITAL LETTER ENG
|
||||
#10426; C; 1044E; # DESERET CAPITAL LETTER OI
|
||||
#10427; C; 1044F; # DESERET CAPITAL LETTER EW
|
||||
|
|
Loading…
Add table
Reference in a new issue