ICU-8972 parse ppucd.txt case mappings

X-SVN-Rev: 31182
This commit is contained in:
Markus Scherer 2012-01-06 01:02:02 +00:00
parent 9cd2b8cbd6
commit 28a30d3c2d
2 changed files with 55 additions and 12 deletions

View file

@ -42,6 +42,7 @@ PropertyNames::getPropertyValueEnum(int32_t property, const char *name) const {
UniProps::UniProps()
: start(U_SENTINEL), end(U_SENTINEL),
bmg(U_SENTINEL),
scf(U_SENTINEL), slc(U_SENTINEL), stc(U_SENTINEL), suc(U_SENTINEL),
digitValue(-1), numericValue(NULL),
name(NULL), nameAlias(NULL) {
memset(binProps, 0, sizeof(binProps));
@ -258,7 +259,9 @@ static const struct {
const char *name;
int32_t prop;
} ppucdProperties[]={
{ "Name_Alias", PPUCD_NAME_ALIAS }
{ "Name_Alias", PPUCD_NAME_ALIAS },
{ "Conditional_Case_Mappings", PPUCD_CONDITIONAL_CASE_MAPPINGS },
{ "Turkic_Case_Folding", PPUCD_TURKIC_CASE_FOLDING }
};
// Returns TRUE for "ok to continue parsing fields".
@ -345,21 +348,33 @@ PreparsedUCD::parseProperty(UniProps &props, const char *field, UnicodeSet &newV
case UCHAR_BIDI_MIRRORING_GLYPH:
props.bmg=U_SENTINEL;
break;
case UCHAR_SIMPLE_CASE_FOLDING:
props.scf=U_SENTINEL;
break;
case UCHAR_SIMPLE_LOWERCASE_MAPPING:
props.slc=U_SENTINEL;
break;
case UCHAR_SIMPLE_TITLECASE_MAPPING:
props.stc=U_SENTINEL;
break;
case UCHAR_SIMPLE_UPPERCASE_MAPPING:
props.suc=U_SENTINEL;
break;
case UCHAR_CASE_FOLDING:
props.cf.remove();
break;
case UCHAR_LOWERCASE_MAPPING:
props.lc.remove();
break;
case UCHAR_TITLECASE_MAPPING:
props.tc.remove();
break;
case UCHAR_UPPERCASE_MAPPING:
props.uc.remove();
break;
case UCHAR_SCRIPT_EXTENSIONS:
props.scx.clear();
break;
case UCHAR_LOWERCASE_MAPPING:
case UCHAR_SIMPLE_CASE_FOLDING:
case UCHAR_SIMPLE_LOWERCASE_MAPPING:
case UCHAR_SIMPLE_TITLECASE_MAPPING:
case UCHAR_SIMPLE_UPPERCASE_MAPPING:
case UCHAR_TITLECASE_MAPPING:
case UCHAR_UPPERCASE_MAPPING:
// Ignore unhandled properties.
break;
default:
fprintf(stderr,
"error in preparsed UCD: '%s' is not a valid default value on line %ld\n",
@ -387,12 +402,37 @@ PreparsedUCD::parseProperty(UniProps &props, const char *field, UnicodeSet &newV
case UCHAR_BIDI_MIRRORING_GLYPH:
props.bmg=parseCodePoint(v, errorCode);
break;
case UCHAR_SIMPLE_CASE_FOLDING:
props.scf=parseCodePoint(v, errorCode);
break;
case UCHAR_SIMPLE_LOWERCASE_MAPPING:
props.slc=parseCodePoint(v, errorCode);
break;
case UCHAR_SIMPLE_TITLECASE_MAPPING:
props.stc=parseCodePoint(v, errorCode);
break;
case UCHAR_SIMPLE_UPPERCASE_MAPPING:
props.suc=parseCodePoint(v, errorCode);
break;
case UCHAR_CASE_FOLDING:
parseString(v, props.cf, errorCode);
break;
case UCHAR_LOWERCASE_MAPPING:
parseString(v, props.lc, errorCode);
break;
case UCHAR_TITLECASE_MAPPING:
parseString(v, props.tc, errorCode);
break;
case UCHAR_UPPERCASE_MAPPING:
parseString(v, props.uc, errorCode);
break;
case PPUCD_NAME_ALIAS:
props.nameAlias=v;
break;
case PPUCD_CONDITIONAL_CASE_MAPPINGS:
case PPUCD_TURKIC_CASE_FOLDING:
// No need to parse their values: They are hardcoded in the runtime library.
break;
case UCHAR_SCRIPT_EXTENSIONS:
parseScriptExtensions(v, props.scx, errorCode);
break;

View file

@ -24,7 +24,9 @@
/** Additions to the uchar.h enum UProperty. */
enum {
/** Name_Alias */
PPUCD_NAME_ALIAS=UCHAR_STRING_LIMIT
PPUCD_NAME_ALIAS=UCHAR_STRING_LIMIT,
PPUCD_CONDITIONAL_CASE_MAPPINGS,
PPUCD_TURKIC_CASE_FOLDING
};
U_NAMESPACE_BEGIN
@ -47,11 +49,12 @@ struct U_TOOLUTIL_API UniProps {
int32_t intProps[UCHAR_INT_LIMIT-UCHAR_INT_START];
UVersionInfo age;
UChar32 bmg;
UChar32 scf, slc, stc, suc;
int32_t digitValue;
const char *numericValue;
const char *name;
const char *nameAlias;
UnicodeString cf;
UnicodeString cf, lc, tc, uc;
UnicodeSet scx;
};