ICU-2021 another fix for canonical caseless match, still prototyping

X-SVN-Rev: 11209
This commit is contained in:
Markus Scherer 2003-03-01 00:13:40 +00:00
parent e9e4feea24
commit 353f8ee0eb
3 changed files with 226 additions and 121 deletions

View file

@ -32,6 +32,8 @@
#include "ustr_imp.h"
#include "uprops.h"
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
/* statically loaded Unicode character properties -------------------------- */
/* MACHINE-GENERATED: Do not edit (see com.ibm.icu.dev.tools.translit.UnicodeSetCloseOver) */
@ -1912,6 +1914,54 @@ u_foldCase(UChar32 c, uint32_t options) {
return c; /* no mapping - return c itself */
}
#if 0
/* ### TODO Turkic-i case folding prototype, j2021 */
enum {
FOLD_T_LENGTH=3
};
/*
* Turkic full case foldings.
* First UChar is the source, second the default mapping,
* then the Turkic mapping.
*/
static const UChar fold_T[][2+FOLD_T_LENGTH]={
{ 0x0049, 0x0069, 0x131 },
{ 0x0069, 0x0069, 0x131, 0x307 },
{ 0x00cc, 0x00ec, 0x131, 0x300 },
{ 0x00cd, 0x00ed, 0x131, 0x301 },
{ 0x00ce, 0x00ee, 0x131, 0x302 },
{ 0x00cf, 0x00ef, 0x131, 0x308 },
{ 0x00ec, 0x00ec, 0x131, 0x300 },
{ 0x00ed, 0x00ed, 0x131, 0x301 },
{ 0x00ee, 0x00ee, 0x131, 0x302 },
{ 0x00ef, 0x00ef, 0x131, 0x308 },
{ 0x0128, 0x0129, 0x131, 0x303 },
{ 0x0129, 0x0129, 0x131, 0x303 },
{ 0x012a, 0x012b, 0x131, 0x304 },
{ 0x012b, 0x012b, 0x131, 0x304 },
{ 0x012c, 0x012d, 0x131, 0x306 },
{ 0x012d, 0x012d, 0x131, 0x306 },
{ 0x012e, 0x012f, 0x131, 0x328 },
{ 0x012f, 0x012f, 0x131, 0x328 },
{ 0x0130, 0, 0x131, 0x307 }, /* normal mapping is 0069 0307 */
{ 0x01cf, 0x01d0, 0x131, 0x30c },
{ 0x01d0, 0x01d0, 0x131, 0x30c },
{ 0x0208, 0x0209, 0x131, 0x30f },
{ 0x0209, 0x0209, 0x131, 0x30f },
{ 0x020a, 0x020b, 0x131, 0x311 },
{ 0x020b, 0x020b, 0x131, 0x311 },
{ 0x1e2c, 0x1e2d, 0x131, 0x330 },
{ 0x1e2d, 0x1e2d, 0x131, 0x330 },
{ 0x1e2e, 0x1e2f, 0x131, 0x308, 0x301 },
{ 0x1e2f, 0x1e2f, 0x131, 0x308, 0x301 },
{ 0x1ec8, 0x1ec9, 0x131, 0x309 },
{ 0x1ec9, 0x1ec9, 0x131, 0x309 },
{ 0x1eca, 0x1ecb, 0x131, 0x323 },
{ 0x1ecb, 0x1ecb, 0x131, 0x323 }
};
#endif
/* internal, see ustr_imp.h */
U_CAPI int32_t U_EXPORT2
u_internalFoldCase(UChar32 c,
@ -1966,9 +2016,37 @@ u_internalFoldCase(UChar32 c,
dest[1]=0x307;
}
return 2;
#if 0
/* ### TODO Turkic-i case folding prototype, j2021 */
} else if(c<=fold_T[LENGTHOF(fold_T)-1][0]) {
for(i=0; i<LENGTHOF(fold_T) && c>=fold_T[i][0]; ++i) {
if(c==fold_T[i][0]) {
result=fold_T[i][1];
break;
}
}
#endif
}
} else {
/* Turkic mappings */
#if 0
/* ### TODO Turkic-i case folding prototype, j2021 */
if(c<=fold_T[LENGTHOF(fold_T)-1][0]) {
for(i=0; i<LENGTHOF(fold_T) && c>=fold_T[i][0]; ++i) {
if(c==fold_T[i][0]) {
const UChar *p=&(fold_T[i][2]);
length=0;
while(length<FOLD_T_LENGTH && *p!=0) {
if(length<destCapacity) {
dest[length]=*p++;
}
++length;
}
return length;
}
}
}
#else
if(c==0x49) {
/* 0049; T; 0131; # LATIN CAPITAL LETTER I */
result=0x131;
@ -1976,6 +2054,7 @@ u_internalFoldCase(UChar32 c,
/* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
result=0x69;
}
#endif
}
/* return c itself because there is no special mapping for it */
/* goto single; */

View file

@ -3100,41 +3100,39 @@ unorm_compose(UChar *dest, int32_t destCapacity,
/**
* Internal API for normalizing.
* Does not check for bad input.
* Requires _haveData() to be true.
* @internal
*/
U_CAPI int32_t U_EXPORT2
static int32_t
unorm_internalNormalize(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UNormalizationMode mode, int32_t options,
UNormalizationMode mode, const UnicodeSet *nx,
UErrorCode *pErrorCode) {
const UnicodeSet *nx;
int32_t destLength;
uint8_t trailCC;
switch(mode) {
case UNORM_NFD:
return unorm_decompose(dest, destCapacity,
src, srcLength,
FALSE, options,
pErrorCode);
destLength=_decompose(dest, destCapacity,
src, srcLength,
FALSE, nx, trailCC);
break;
case UNORM_NFKD:
return unorm_decompose(dest, destCapacity,
src, srcLength,
TRUE, options,
pErrorCode);
destLength=_decompose(dest, destCapacity,
src, srcLength,
TRUE, nx, trailCC);
break;
case UNORM_NFC:
return unorm_compose(dest, destCapacity,
src, srcLength,
FALSE, options,
pErrorCode);
destLength=_compose(dest, destCapacity,
src, srcLength,
FALSE, nx, pErrorCode);
break;
case UNORM_NFKC:
return unorm_compose(dest, destCapacity,
src, srcLength,
TRUE, options,
pErrorCode);
destLength=_compose(dest, destCapacity,
src, srcLength,
TRUE, nx, pErrorCode);
break;
case UNORM_FCD:
nx=getNX(options, *pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return 0;
}
return unorm_makeFCD(dest, destCapacity,
src, srcLength,
nx,
@ -3147,11 +3145,41 @@ unorm_internalNormalize(UChar *dest, int32_t destCapacity,
if(srcLength>0 && srcLength<=destCapacity) {
uprv_memcpy(dest, src, srcLength*U_SIZEOF_UCHAR);
}
return u_terminateUChars(dest, destCapacity, srcLength, pErrorCode);
destLength=srcLength;
break;
default:
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
}
/**
* Internal API for normalizing.
* Does not check for bad input.
* @internal
*/
U_CAPI int32_t U_EXPORT2
unorm_internalNormalize(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UNormalizationMode mode, int32_t options,
UErrorCode *pErrorCode) {
const UnicodeSet *nx;
if(!_haveData(*pErrorCode)) {
return 0;
}
nx=getNX(options, *pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return 0;
}
return unorm_internalNormalize(dest, destCapacity,
src, srcLength,
mode, nx,
pErrorCode);
}
/** Public API for normalizing. */
@ -4300,6 +4328,7 @@ unorm_compare(const UChar *s1, int32_t length1,
UChar fcd1[300], fcd2[300];
UChar *d1, *d2;
const UnicodeSet *nx;
UNormalizationMode mode;
int32_t result;
/* argument checking */
@ -4341,28 +4370,46 @@ unorm_compare(const UChar *s1, int32_t length1,
* case-folding preserves the FCD-ness of a string.
* The outer normalization is then only performed by unorm_cmpEquivFold()
* when there is a difference.
*
* Exception: When using the Turkic case-folding option, we do perform
* full NFD first. This is because in the Turkic case precomposed characters
* with 0049 capital I or 0069 small i fold differently whether they
* are first decomposed or not, so an FCD check - a check only for
* canonical order - is not sufficient.
*/
if(options&U_FOLD_CASE_EXCLUDE_SPECIAL_I) {
mode=UNORM_NFD;
options&=~UNORM_INPUT_IS_FCD;
} else {
mode=UNORM_FCD;
}
if(!(options&UNORM_INPUT_IS_FCD)) {
int32_t _len1, _len2;
UBool isFCD1, isFCD2;
// check if s1 and/or s2 fulfill the FCD conditions
isFCD1=unorm_checkFCD(s1, length1, nx);
isFCD2=unorm_checkFCD(s2, length2, nx);
isFCD1= UNORM_YES==_quickCheck(s1, length1, mode, TRUE, nx, pErrorCode);
isFCD2= UNORM_YES==_quickCheck(s2, length2, mode, TRUE, nx, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
return 0;
}
if(!isFCD1 && !isFCD2) {
// if both strings need normalization then make them NFD right away and
// turn off normalization in the comparison function
uint8_t trailCC;
/*
* ICU 2.4 had a further optimization:
* If both strings were not in FCD, then they were both NFD'ed,
* and the _COMPARE_EQUIV option was turned off.
* It is not entirely clear that this is valid with the current
* definition of the canonical caseless match.
* Therefore, ICU 2.6 removes that optimization.
*/
// fully decompose (NFD) s1 and s2
_len1=_decompose(fcd1, sizeof(fcd1)/U_SIZEOF_UCHAR,
s1, length1,
FALSE, nx,
trailCC);
if(_len1<=(int32_t)(sizeof(fcd1)/U_SIZEOF_UCHAR)) {
if(!isFCD1) {
_len1=unorm_internalNormalize(fcd1, LENGTHOF(fcd1),
s1, length1,
mode, nx,
pErrorCode);
if(*pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
s1=fcd1;
} else {
d1=(UChar *)uprv_malloc(_len1*U_SIZEOF_UCHAR);
@ -4371,20 +4418,26 @@ unorm_compare(const UChar *s1, int32_t length1,
goto cleanup;
}
_len1=_decompose(d1, _len1,
s1, length1,
FALSE, nx,
trailCC);
*pErrorCode=U_ZERO_ERROR;
_len1=unorm_internalNormalize(d1, _len1,
s1, length1,
mode, nx,
pErrorCode);
if(U_FAILURE(*pErrorCode)) {
goto cleanup;
}
s1=d1;
}
length1=_len1;
}
_len2=_decompose(fcd2, sizeof(fcd2)/U_SIZEOF_UCHAR,
s2, length2,
FALSE, nx,
trailCC);
if(_len2<=(int32_t)(sizeof(fcd2)/U_SIZEOF_UCHAR)) {
if(!isFCD2) {
_len2=unorm_internalNormalize(fcd2, LENGTHOF(fcd2),
s2, length2,
mode, nx,
pErrorCode);
if(*pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
s2=fcd2;
} else {
d2=(UChar *)uprv_malloc(_len2*U_SIZEOF_UCHAR);
@ -4393,85 +4446,22 @@ unorm_compare(const UChar *s1, int32_t length1,
goto cleanup;
}
_len2=_decompose(d2, _len2,
s2, length2,
FALSE, nx,
trailCC);
*pErrorCode=U_ZERO_ERROR;
_len2=unorm_internalNormalize(d2, _len2,
s2, length2,
mode, nx,
pErrorCode);
if(U_FAILURE(*pErrorCode)) {
goto cleanup;
}
s2=d2;
}
length2=_len2;
// compare NFD strings
options&=~_COMPARE_EQUIV;
} else {
// if at least one string is already in FCD then only makeFCD the other
// and compare for equivalence
if(!isFCD1) {
_len1=unorm_makeFCD(fcd1, sizeof(fcd1)/U_SIZEOF_UCHAR,
s1, length1,
nx,
pErrorCode);
if(*pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
s1=fcd1;
} else {
d1=(UChar *)uprv_malloc(_len1*U_SIZEOF_UCHAR);
if(d1==0) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
goto cleanup;
}
*pErrorCode=U_ZERO_ERROR;
_len1=unorm_makeFCD(d1, _len1,
s1, length1,
nx,
pErrorCode);
if(U_FAILURE(*pErrorCode)) {
goto cleanup;
}
s1=d1;
}
length1=_len1;
}
if(!isFCD2) {
_len2=unorm_makeFCD(fcd2, sizeof(fcd2)/U_SIZEOF_UCHAR,
s2, length2,
nx,
pErrorCode);
if(*pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
s2=fcd2;
} else {
d2=(UChar *)uprv_malloc(_len2*U_SIZEOF_UCHAR);
if(d2==0) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
goto cleanup;
}
*pErrorCode=U_ZERO_ERROR;
_len2=unorm_makeFCD(d2, _len2,
s2, length2,
nx,
pErrorCode);
if(U_FAILURE(*pErrorCode)) {
goto cleanup;
}
s2=d2;
}
length2=_len2;
}
}
}
if(U_FAILURE(*pErrorCode)) {
// do nothing
} else if(!(options&(_COMPARE_EQUIV|U_COMPARE_IGNORE_CASE))) {
// compare NFD strings case-sensitive: just use normal comparison
result=uprv_strCompare(s1, length1, s2, length2,
FALSE, (UBool)(0!=(options&U_COMPARE_CODE_POINT_ORDER)));
} else {
if(U_SUCCESS(*pErrorCode)) {
result=unorm_cmpEquivFold(s1, length1, s2, length2, options, pErrorCode);
}

View file

@ -1,5 +1,5 @@
# CaseFolding-3.2.0.txt
# Date: 2002-03-22,20:54:33 GMT [MD]
# CaseFolding-4.0.0.txt
# Date: 2003-02-14,16:49:03 GMT [MD]
#
# Case Folding Properties
#
@ -70,6 +70,7 @@
0058; C; 0078; # LATIN CAPITAL LETTER X
0059; C; 0079; # LATIN CAPITAL LETTER Y
005A; C; 007A; # LATIN CAPITAL LETTER Z
###0069; T; 0131 0307; # LATIN SMALL LETTER I
00B5; C; 03BC; # MICRO SIGN
00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
@ -84,9 +85,13 @@
00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS
00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE
###00CC; T; 0131 0300; # LATIN CAPITAL LETTER I WITH GRAVE
00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE
###00CD; T; 0131 0301; # LATIN CAPITAL LETTER I WITH ACUTE
00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
###00CE; T; 0131 0302; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS
###00CF; T; 0131 0308; # LATIN CAPITAL LETTER I WITH DIAERESIS
00D0; C; 00F0; # LATIN CAPITAL LETTER ETH
00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE
00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE
@ -102,6 +107,10 @@
00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE
00DE; C; 00FE; # LATIN CAPITAL LETTER THORN
00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
###00EC; T; 0131 0300; # LATIN SMALL LETTER I WITH GRAVE
###00ED; T; 0131 0301; # LATIN SMALL LETTER I WITH ACUTE
###00EE; T; 0131 0302; # LATIN SMALL LETTER I WITH CIRCUMFLEX
###00EF; T; 0131 0308; # LATIN SMALL LETTER I WITH DIAERESIS
0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON
0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE
0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK
@ -123,11 +132,20 @@
0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE
0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE
###0128; T; 0131 0303; # LATIN CAPITAL LETTER I WITH TILDE
###0129; T; 0131 0303; # LATIN SMALL LETTER I WITH TILDE
012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON
###012A; T; 0131 0304; # LATIN CAPITAL LETTER I WITH MACRON
###012B; T; 0131 0304; # LATIN SMALL LETTER I WITH MACRON
012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE
###012C; T; 0131 0306; # LATIN CAPITAL LETTER I WITH BREVE
###012D; T; 0131 0306; # LATIN SMALL LETTER I WITH BREVE
012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK
###012E; T; 0131 0328; # LATIN CAPITAL LETTER I WITH OGONEK
###012F; T; 0131 0328; # LATIN SMALL LETTER I WITH OGONEK
0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
###0130; T; 0131 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
0132; C; 0133; # LATIN CAPITAL LIGATURE IJ
0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA
@ -212,6 +230,8 @@
01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON
01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON
###01CF; T; 0131 030C; # LATIN CAPITAL LETTER I WITH CARON
###01D0; T; 0131 030C; # LATIN SMALL LETTER I WITH CARON
01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON
01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON
01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
@ -242,7 +262,11 @@
0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE
0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
###0208; T; 0131 030F; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
###0209; T; 0131 030F; # LATIN SMALL LETTER I WITH DOUBLE GRAVE
020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE
###020A; T; 0131 0311; # LATIN CAPITAL LETTER I WITH INVERTED BREVE
###020B; T; 0131 0311; # LATIN SMALL LETTER I WITH INVERTED BREVE
020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE
0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
@ -318,9 +342,11 @@
03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI
03F0; C; 03BA; # GREEK KAPPA SYMBOL
03F1; C; 03C1; # GREEK RHO SYMBOL
03F2; C; 03C3; # GREEK LUNATE SIGMA SYMBOL
03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL
03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL
#03F7; C; 03F8; # GREEK CAPITAL LETTER SHO
#03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL
#03FA; C; 03FB; # GREEK CAPITAL LETTER SAN
0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE
0401; C; 0451; # CYRILLIC CAPITAL LETTER IO
0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE
@ -510,7 +536,11 @@
1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA
1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW
1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW
###1E2C; T; 0131 0330; # LATIN CAPITAL LETTER I WITH TILDE BELOW
###1E2D; T; 0131 0330; # LATIN SMALL LETTER I WITH TILDE BELOW
1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
###1E2E; T; 0131 0308 0301; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
###1E2F; T; 0131 0308 0301; # LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE
1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE
1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW
1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW
@ -589,7 +619,11 @@
1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE
###1EC8; T; 0131 0309; # LATIN CAPITAL LETTER I WITH HOOK ABOVE
###1EC9; T; 0131 0309; # LATIN SMALL LETTER I WITH HOOK ABOVE
1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW
###1ECA; T; 0131 0323; # LATIN CAPITAL LETTER I WITH DOT BELOW
###1ECB; T; 0131 0323; # LATIN SMALL LETTER I WITH DOT BELOW
1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW
1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE
1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
@ -910,3 +944,5 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
10423; C; 1044B; # DESERET CAPITAL LETTER EM
10424; C; 1044C; # DESERET CAPITAL LETTER EN
10425; C; 1044D; # DESERET CAPITAL LETTER ENG
#10426; C; 1044E; # DESERET CAPITAL LETTER OI
#10427; C; 1044F; # DESERET CAPITAL LETTER EW