mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-2427 add Han numeric values
X-SVN-Rev: 11322
This commit is contained in:
parent
aea8920b73
commit
b89a8122f2
5 changed files with 1255 additions and 3 deletions
1096
icu4c/source/data/unidata/DerivedNumericValues.txt
Normal file
1096
icu4c/source/data/unidata/DerivedNumericValues.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -2461,30 +2461,43 @@ TestNumericProperties(void) {
|
|||
} values[]={
|
||||
{ 0x0F33, U_NT_NUMERIC, -1./2. },
|
||||
{ 0x0C66, U_NT_DECIMAL, 0 },
|
||||
{ 0x96f6, U_NT_NUMERIC, 0 },
|
||||
{ 0x2159, U_NT_NUMERIC, 1./6. },
|
||||
{ 0x00BD, U_NT_NUMERIC, 1./2. },
|
||||
{ 0x0031, U_NT_DECIMAL, 1. },
|
||||
{ 0x4e00, U_NT_NUMERIC, 1. },
|
||||
{ 0x58f1, U_NT_NUMERIC, 1. },
|
||||
{ 0x10320, U_NT_NUMERIC, 1. },
|
||||
{ 0x0F2B, U_NT_NUMERIC, 3./2. },
|
||||
{ 0x00B2, U_NT_DIGIT, 2. },
|
||||
{ 0x5f10, U_NT_NUMERIC, 2. },
|
||||
{ 0x1813, U_NT_DECIMAL, 3. },
|
||||
{ 0x5f0e, U_NT_NUMERIC, 3. },
|
||||
{ 0x2173, U_NT_NUMERIC, 4. },
|
||||
{ 0x8086, U_NT_NUMERIC, 4. },
|
||||
{ 0x278E, U_NT_DIGIT, 5. },
|
||||
{ 0x1D7F2, U_NT_DECIMAL, 6. },
|
||||
{ 0x247A, U_NT_DIGIT, 7. },
|
||||
{ 0x7396, U_NT_NUMERIC, 9. },
|
||||
{ 0x1372, U_NT_NUMERIC, 10. },
|
||||
{ 0x216B, U_NT_NUMERIC, 12. },
|
||||
{ 0x16EE, U_NT_NUMERIC, 17. },
|
||||
{ 0x249A, U_NT_NUMERIC, 19. },
|
||||
{ 0x303A, U_NT_NUMERIC, 30. },
|
||||
{ 0x5345, U_NT_NUMERIC, 30. },
|
||||
{ 0x32B2, U_NT_NUMERIC, 37. },
|
||||
{ 0x1375, U_NT_NUMERIC, 40. },
|
||||
{ 0x10323, U_NT_NUMERIC, 50. },
|
||||
{ 0x0BF1, U_NT_NUMERIC, 100. },
|
||||
{ 0x964c, U_NT_NUMERIC, 100. },
|
||||
{ 0x217E, U_NT_NUMERIC, 500. },
|
||||
{ 0x2180, U_NT_NUMERIC, 1000. },
|
||||
{ 0x4edf, U_NT_NUMERIC, 1000. },
|
||||
{ 0x2181, U_NT_NUMERIC, 5000. },
|
||||
{ 0x137C, U_NT_NUMERIC, 10000. },
|
||||
{ 0x4e07, U_NT_NUMERIC, 10000. },
|
||||
{ 0x4ebf, U_NT_NUMERIC, 100000000. },
|
||||
{ 0x5146, U_NT_NUMERIC, 1000000000000. },
|
||||
{ 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
|
||||
{ 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
|
||||
{ 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
|
||||
|
|
|
@ -83,6 +83,9 @@ makeProps(Props *p);
|
|||
extern void
|
||||
addProps(uint32_t c, uint32_t props);
|
||||
|
||||
extern uint32_t
|
||||
getProps(uint32_t c);
|
||||
|
||||
extern void
|
||||
repeatProps(uint32_t first, uint32_t last, uint32_t props);
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002, International Business Machines
|
||||
* Copyright (C) 2002-2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -77,6 +77,31 @@ ageLineFn(void *context,
|
|||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
static void
|
||||
parseMultiFieldFile(char *filename, char *basename,
|
||||
const char *ucdFile, const char *suffix,
|
||||
int32_t fieldCount,
|
||||
UParseLineFn *lineFn,
|
||||
UErrorCode *pErrorCode) {
|
||||
char *fields[20][2];
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
writeUCDFilename(basename, ucdFile, suffix);
|
||||
|
||||
u_parseDelimitedFile(filename, ';', fields, fieldCount, lineFn, NULL, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "error parsing %s.txt: %s\n", ucdFile, u_errorName(*pErrorCode));
|
||||
}
|
||||
}
|
||||
|
||||
static void U_CALLCONV
|
||||
numericLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/* parse files with single enumerated properties ---------------------------- */
|
||||
|
||||
struct SingleEnum {
|
||||
|
@ -367,6 +392,10 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr
|
|||
basename=filename+uprv_strlen(filename);
|
||||
|
||||
/* process various UCD .txt files */
|
||||
|
||||
/* add Han numeric types & values */
|
||||
parseMultiFieldFile(filename, basename, "DerivedNumericValues", suffix, 3, numericLineFn, pErrorCode);
|
||||
|
||||
parseTwoFieldFile(filename, basename, "DerivedAge", suffix, ageLineFn, pErrorCode);
|
||||
|
||||
/*
|
||||
|
@ -491,6 +520,112 @@ ageLineFn(void *context,
|
|||
}
|
||||
}
|
||||
|
||||
/* DerivedNumericValues.txt ------------------------------------------------- */
|
||||
|
||||
static void U_CALLCONV
|
||||
numericLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode) {
|
||||
Props newProps;
|
||||
char *s, *end;
|
||||
uint32_t start, limit, value, oldProps32;
|
||||
int32_t type, oldType;
|
||||
char c;
|
||||
UBool isFraction;
|
||||
|
||||
/* get the code point range */
|
||||
u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genprops: syntax error in DerivedNumericValues.txt field 0 at %s\n", fields[0][0]);
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
++limit;
|
||||
|
||||
/* check if the numeric value is a fraction (this code does not handle any) */
|
||||
isFraction=FALSE;
|
||||
s=uprv_strchr(fields[1][0], '.');
|
||||
if(s!=NULL) {
|
||||
end=s+1;
|
||||
while('0'<=(c=*end++) && c<='9') {
|
||||
if(c!='0') {
|
||||
isFraction=TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(isFraction) {
|
||||
value=0;
|
||||
} else {
|
||||
/* parse numeric value */
|
||||
s=(char *)u_skipWhitespace(fields[1][0]);
|
||||
|
||||
/* try large powers of 10 first, may otherwise overflow strtoul() */
|
||||
if(0==uprv_strncmp(s, "10000000000", 11)) {
|
||||
/* large powers of 10 are encoded in a special way, see store.c */
|
||||
value=0x7fffff00;
|
||||
end=s;
|
||||
while(*(++end)=='0') {
|
||||
++value;
|
||||
}
|
||||
} else {
|
||||
/* normal number parsing */
|
||||
value=(uint32_t)uprv_strtoul(s, &end, 10);
|
||||
}
|
||||
if(end<=s || (*end!='.' && u_skipWhitespace(end)!=fields[1][1]) || value>=0x80000000) {
|
||||
fprintf(stderr, "genprops: syntax error in DerivedNumericValues.txt field 1 at %s\n", fields[0][0]);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
/* parse numeric type */
|
||||
s=trimTerminateField(fields[2][0], fields[2][1]);
|
||||
type=u_getPropertyValueEnum(UCHAR_NUMERIC_TYPE, s);
|
||||
if(type<=0) {
|
||||
fprintf(stderr, "genprops error: unknown numeric type in DerivedNumericValues.txt field 1 at %s\n", s);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
for(; start<limit; ++start) {
|
||||
oldProps32=getProps(start);
|
||||
oldType=(int32_t)GET_NUMERIC_TYPE(oldProps32);
|
||||
if(oldType==type) {
|
||||
/* this code point was already listed with its numeric value in UnicodeData.txt */
|
||||
continue;
|
||||
}
|
||||
if(oldType!=0) {
|
||||
/* the numeric type differs from what we got from UnicodeData.txt */
|
||||
fprintf(stderr, "genprops error: new numeric value for an already numeric character in DerivedNumericValues.txt at %s\n", fields[0][0]);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
if(GET_UNSIGNED_VALUE(oldProps32)!=0) {
|
||||
/* the code below is not prepared to maintain values and exceptions */
|
||||
fprintf(stderr, "genprops error: new numeric value for a character with some other value in DerivedNumericValues.txt at %s\n", fields[0][0]);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
if(isFraction) {
|
||||
fprintf(stderr, "genprops: not prepared for new fractions in DerivedNumericValues.txt field 1 at %s\n", fields[1][0]);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
if(beVerbose) {
|
||||
printf("adding U+%04x numeric type %d value %lu\n", start, type, value);
|
||||
}
|
||||
|
||||
/* reconstruct the properties and set the new numeric type and value */
|
||||
uprv_memset(&newProps, 0, sizeof(newProps));
|
||||
newProps.code=start;
|
||||
newProps.generalCategory=(uint8_t)GET_CATEGORY(oldProps32);
|
||||
newProps.bidi=(uint8_t)((oldProps32>>UPROPS_BIDI_SHIFT)&0x1f);
|
||||
newProps.isMirrored=(uint8_t)(oldProps32&(1UL<<UPROPS_MIRROR_SHIFT) ? TRUE : FALSE);
|
||||
newProps.numericType=(uint8_t)type; /* newly parsed numeric type */
|
||||
newProps.numericValue=(int32_t)value; /* newly parsed numeric value */
|
||||
addProps(start, makeProps(&newProps));
|
||||
}
|
||||
}
|
||||
|
||||
/* data serialization ------------------------------------------------------- */
|
||||
|
||||
U_CFUNC int32_t
|
||||
|
|
|
@ -457,10 +457,10 @@ makeProps(Props *p) {
|
|||
printf("*** code 0x%06x needs an exception because it is irregular\n", p->code);
|
||||
*/
|
||||
} else if(value<UPROPS_MIN_VALUE || UPROPS_MAX_VALUE<value) {
|
||||
printf("*** code 0x%06x needs an exception because its value is out-of-bounds at %ld (not [%ld..%ld]\n",
|
||||
printf("*** U+%04x needs an exception because its value is out-of-bounds at %ld (not [%ld..%ld]\n",
|
||||
p->code, (long)value, (long)UPROPS_MIN_VALUE, (long)UPROPS_MAX_VALUE);
|
||||
} else {
|
||||
printf("*** code 0x%06x needs an exception because it has %u values\n", p->code, count);
|
||||
printf("*** U+%04x needs an exception because it has %u values\n", p->code, count);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -625,6 +625,11 @@ addCaseSensitive(UChar32 first, UChar32 last) {
|
|||
}
|
||||
}
|
||||
|
||||
extern uint32_t
|
||||
getProps(uint32_t c) {
|
||||
return utrie_get32(pTrie, (UChar32)c, NULL);
|
||||
}
|
||||
|
||||
/* areas of same properties ------------------------------------------------- */
|
||||
|
||||
extern void
|
||||
|
|
Loading…
Add table
Reference in a new issue