ICU-2427 add Han numeric values

X-SVN-Rev: 11322
This commit is contained in:
Markus Scherer 2003-03-14 22:00:19 +00:00
parent aea8920b73
commit b89a8122f2
5 changed files with 1255 additions and 3 deletions

File diff suppressed because it is too large Load diff

View file

@ -2461,30 +2461,43 @@ TestNumericProperties(void) {
} values[]={
{ 0x0F33, U_NT_NUMERIC, -1./2. },
{ 0x0C66, U_NT_DECIMAL, 0 },
{ 0x96f6, U_NT_NUMERIC, 0 },
{ 0x2159, U_NT_NUMERIC, 1./6. },
{ 0x00BD, U_NT_NUMERIC, 1./2. },
{ 0x0031, U_NT_DECIMAL, 1. },
{ 0x4e00, U_NT_NUMERIC, 1. },
{ 0x58f1, U_NT_NUMERIC, 1. },
{ 0x10320, U_NT_NUMERIC, 1. },
{ 0x0F2B, U_NT_NUMERIC, 3./2. },
{ 0x00B2, U_NT_DIGIT, 2. },
{ 0x5f10, U_NT_NUMERIC, 2. },
{ 0x1813, U_NT_DECIMAL, 3. },
{ 0x5f0e, U_NT_NUMERIC, 3. },
{ 0x2173, U_NT_NUMERIC, 4. },
{ 0x8086, U_NT_NUMERIC, 4. },
{ 0x278E, U_NT_DIGIT, 5. },
{ 0x1D7F2, U_NT_DECIMAL, 6. },
{ 0x247A, U_NT_DIGIT, 7. },
{ 0x7396, U_NT_NUMERIC, 9. },
{ 0x1372, U_NT_NUMERIC, 10. },
{ 0x216B, U_NT_NUMERIC, 12. },
{ 0x16EE, U_NT_NUMERIC, 17. },
{ 0x249A, U_NT_NUMERIC, 19. },
{ 0x303A, U_NT_NUMERIC, 30. },
{ 0x5345, U_NT_NUMERIC, 30. },
{ 0x32B2, U_NT_NUMERIC, 37. },
{ 0x1375, U_NT_NUMERIC, 40. },
{ 0x10323, U_NT_NUMERIC, 50. },
{ 0x0BF1, U_NT_NUMERIC, 100. },
{ 0x964c, U_NT_NUMERIC, 100. },
{ 0x217E, U_NT_NUMERIC, 500. },
{ 0x2180, U_NT_NUMERIC, 1000. },
{ 0x4edf, U_NT_NUMERIC, 1000. },
{ 0x2181, U_NT_NUMERIC, 5000. },
{ 0x137C, U_NT_NUMERIC, 10000. },
{ 0x4e07, U_NT_NUMERIC, 10000. },
{ 0x4ebf, U_NT_NUMERIC, 100000000. },
{ 0x5146, U_NT_NUMERIC, 1000000000000. },
{ 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
{ 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
{ 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },

View file

@ -83,6 +83,9 @@ makeProps(Props *p);
extern void
addProps(uint32_t c, uint32_t props);
extern uint32_t
getProps(uint32_t c);
extern void
repeatProps(uint32_t first, uint32_t last, uint32_t props);

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2002, International Business Machines
* Copyright (C) 2002-2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -77,6 +77,31 @@ ageLineFn(void *context,
char *fields[][2], int32_t fieldCount,
UErrorCode *pErrorCode);
static void
parseMultiFieldFile(char *filename, char *basename,
const char *ucdFile, const char *suffix,
int32_t fieldCount,
UParseLineFn *lineFn,
UErrorCode *pErrorCode) {
char *fields[20][2];
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return;
}
writeUCDFilename(basename, ucdFile, suffix);
u_parseDelimitedFile(filename, ';', fields, fieldCount, lineFn, NULL, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "error parsing %s.txt: %s\n", ucdFile, u_errorName(*pErrorCode));
}
}
static void U_CALLCONV
numericLineFn(void *context,
char *fields[][2], int32_t fieldCount,
UErrorCode *pErrorCode);
/* parse files with single enumerated properties ---------------------------- */
struct SingleEnum {
@ -367,6 +392,10 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr
basename=filename+uprv_strlen(filename);
/* process various UCD .txt files */
/* add Han numeric types & values */
parseMultiFieldFile(filename, basename, "DerivedNumericValues", suffix, 3, numericLineFn, pErrorCode);
parseTwoFieldFile(filename, basename, "DerivedAge", suffix, ageLineFn, pErrorCode);
/*
@ -491,6 +520,112 @@ ageLineFn(void *context,
}
}
/* DerivedNumericValues.txt ------------------------------------------------- */
static void U_CALLCONV
numericLineFn(void *context,
char *fields[][2], int32_t fieldCount,
UErrorCode *pErrorCode) {
Props newProps;
char *s, *end;
uint32_t start, limit, value, oldProps32;
int32_t type, oldType;
char c;
UBool isFraction;
/* get the code point range */
u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode);
if(U_FAILURE(*pErrorCode)) {
fprintf(stderr, "genprops: syntax error in DerivedNumericValues.txt field 0 at %s\n", fields[0][0]);
exit(*pErrorCode);
}
++limit;
/* check if the numeric value is a fraction (this code does not handle any) */
isFraction=FALSE;
s=uprv_strchr(fields[1][0], '.');
if(s!=NULL) {
end=s+1;
while('0'<=(c=*end++) && c<='9') {
if(c!='0') {
isFraction=TRUE;
break;
}
}
}
if(isFraction) {
value=0;
} else {
/* parse numeric value */
s=(char *)u_skipWhitespace(fields[1][0]);
/* try large powers of 10 first, may otherwise overflow strtoul() */
if(0==uprv_strncmp(s, "10000000000", 11)) {
/* large powers of 10 are encoded in a special way, see store.c */
value=0x7fffff00;
end=s;
while(*(++end)=='0') {
++value;
}
} else {
/* normal number parsing */
value=(uint32_t)uprv_strtoul(s, &end, 10);
}
if(end<=s || (*end!='.' && u_skipWhitespace(end)!=fields[1][1]) || value>=0x80000000) {
fprintf(stderr, "genprops: syntax error in DerivedNumericValues.txt field 1 at %s\n", fields[0][0]);
exit(U_PARSE_ERROR);
}
}
/* parse numeric type */
s=trimTerminateField(fields[2][0], fields[2][1]);
type=u_getPropertyValueEnum(UCHAR_NUMERIC_TYPE, s);
if(type<=0) {
fprintf(stderr, "genprops error: unknown numeric type in DerivedNumericValues.txt field 1 at %s\n", s);
exit(U_PARSE_ERROR);
}
for(; start<limit; ++start) {
oldProps32=getProps(start);
oldType=(int32_t)GET_NUMERIC_TYPE(oldProps32);
if(oldType==type) {
/* this code point was already listed with its numeric value in UnicodeData.txt */
continue;
}
if(oldType!=0) {
/* the numeric type differs from what we got from UnicodeData.txt */
fprintf(stderr, "genprops error: new numeric value for an already numeric character in DerivedNumericValues.txt at %s\n", fields[0][0]);
exit(U_PARSE_ERROR);
}
if(GET_UNSIGNED_VALUE(oldProps32)!=0) {
/* the code below is not prepared to maintain values and exceptions */
fprintf(stderr, "genprops error: new numeric value for a character with some other value in DerivedNumericValues.txt at %s\n", fields[0][0]);
exit(U_PARSE_ERROR);
}
if(isFraction) {
fprintf(stderr, "genprops: not prepared for new fractions in DerivedNumericValues.txt field 1 at %s\n", fields[1][0]);
exit(U_PARSE_ERROR);
}
if(beVerbose) {
printf("adding U+%04x numeric type %d value %lu\n", start, type, value);
}
/* reconstruct the properties and set the new numeric type and value */
uprv_memset(&newProps, 0, sizeof(newProps));
newProps.code=start;
newProps.generalCategory=(uint8_t)GET_CATEGORY(oldProps32);
newProps.bidi=(uint8_t)((oldProps32>>UPROPS_BIDI_SHIFT)&0x1f);
newProps.isMirrored=(uint8_t)(oldProps32&(1UL<<UPROPS_MIRROR_SHIFT) ? TRUE : FALSE);
newProps.numericType=(uint8_t)type; /* newly parsed numeric type */
newProps.numericValue=(int32_t)value; /* newly parsed numeric value */
addProps(start, makeProps(&newProps));
}
}
/* data serialization ------------------------------------------------------- */
U_CFUNC int32_t

View file

@ -457,10 +457,10 @@ makeProps(Props *p) {
printf("*** code 0x%06x needs an exception because it is irregular\n", p->code);
*/
} else if(value<UPROPS_MIN_VALUE || UPROPS_MAX_VALUE<value) {
printf("*** code 0x%06x needs an exception because its value is out-of-bounds at %ld (not [%ld..%ld]\n",
printf("*** U+%04x needs an exception because its value is out-of-bounds at %ld (not [%ld..%ld]\n",
p->code, (long)value, (long)UPROPS_MIN_VALUE, (long)UPROPS_MAX_VALUE);
} else {
printf("*** code 0x%06x needs an exception because it has %u values\n", p->code, count);
printf("*** U+%04x needs an exception because it has %u values\n", p->code, count);
}
}
@ -625,6 +625,11 @@ addCaseSensitive(UChar32 first, UChar32 last) {
}
}
extern uint32_t
getProps(uint32_t c) {
return utrie_get32(pTrie, (UChar32)c, NULL);
}
/* areas of same properties ------------------------------------------------- */
extern void