From 1ec18324283a3601d7b1cee9cd8b5c2e874c2a5f Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Thu, 22 Dec 2011 06:28:59 +0000 Subject: [PATCH] ICU-8972 bug fixes X-SVN-Rev: 31166 --- tools/unicode/c/genprops/corepropsbuilder.cpp | 2 +- tools/unicode/c/genprops/pnames_data.h | 144 +++++++++--------- tools/unicode/c/genprops/pnamesbuilder.cpp | 33 ++-- tools/unicode/py/preparseucd.py | 12 +- 4 files changed, 102 insertions(+), 89 deletions(-) diff --git a/tools/unicode/c/genprops/corepropsbuilder.cpp b/tools/unicode/c/genprops/corepropsbuilder.cpp index 0facdfa0807..db33e815268 100644 --- a/tools/unicode/c/genprops/corepropsbuilder.cpp +++ b/tools/unicode/c/genprops/corepropsbuilder.cpp @@ -726,7 +726,7 @@ CorePropsBuilder::writeCSourceFile(const char *path, UErrorCode &errorCode) { int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS; FILE *f=usrc_createFromGenerator(path, "uchar_props_data.h", - "icu/tools/src/unicode/c/genprops/corepropsbuilder.cpp"); + "icu/tools/unicode/c/genprops/corepropsbuilder.cpp"); if(f==NULL) { errorCode=U_FILE_ACCESS_ERROR; return; diff --git a/tools/unicode/c/genprops/pnames_data.h b/tools/unicode/c/genprops/pnames_data.h index 7d9c6bd48e0..cb46c9ad25e 100644 --- a/tools/unicode/c/genprops/pnames_data.h +++ b/tools/unicode/c/genprops/pnames_data.h @@ -948,63 +948,63 @@ const Value VALUES_gcm[] = { const int32_t PROPERTIES_COUNT = 94; const Property PROPERTIES[] = { - Property(UCHAR_ALPHABETIC, "Alpha Alphabetic", NULL, 0), - Property(UCHAR_ASCII_HEX_DIGIT, "AHex ASCII_Hex_Digit", NULL, 0), - Property(UCHAR_BIDI_CONTROL, "Bidi_C Bidi_Control", NULL, 0), - Property(UCHAR_BIDI_MIRRORED, "Bidi_M Bidi_Mirrored", NULL, 0), - Property(UCHAR_DASH, "Dash Dash", NULL, 0), - Property(UCHAR_DEFAULT_IGNORABLE_CODE_POINT, "DI Default_Ignorable_Code_Point", NULL, 0), - Property(UCHAR_DEPRECATED, "Dep Deprecated", NULL, 0), - Property(UCHAR_DIACRITIC, "Dia Diacritic", NULL, 0), - Property(UCHAR_EXTENDER, "Ext Extender", NULL, 0), - Property(UCHAR_FULL_COMPOSITION_EXCLUSION, "Comp_Ex Full_Composition_Exclusion", NULL, 0), - Property(UCHAR_GRAPHEME_BASE, "Gr_Base Grapheme_Base", NULL, 0), - Property(UCHAR_GRAPHEME_EXTEND, "Gr_Ext Grapheme_Extend", NULL, 0), - Property(UCHAR_GRAPHEME_LINK, "Gr_Link Grapheme_Link", NULL, 0), - Property(UCHAR_HEX_DIGIT, "Hex Hex_Digit", NULL, 0), - Property(UCHAR_HYPHEN, "Hyphen Hyphen", NULL, 0), - Property(UCHAR_ID_CONTINUE, "IDC ID_Continue", NULL, 0), - Property(UCHAR_ID_START, "IDS ID_Start", NULL, 0), - Property(UCHAR_IDEOGRAPHIC, "Ideo Ideographic", NULL, 0), - Property(UCHAR_IDS_BINARY_OPERATOR, "IDSB IDS_Binary_Operator", NULL, 0), - Property(UCHAR_IDS_TRINARY_OPERATOR, "IDST IDS_Trinary_Operator", NULL, 0), - Property(UCHAR_JOIN_CONTROL, "Join_C Join_Control", NULL, 0), - Property(UCHAR_LOGICAL_ORDER_EXCEPTION, "LOE Logical_Order_Exception", NULL, 0), - Property(UCHAR_LOWERCASE, "Lower Lowercase", NULL, 0), - Property(UCHAR_MATH, "Math Math", NULL, 0), - Property(UCHAR_NONCHARACTER_CODE_POINT, "NChar Noncharacter_Code_Point", NULL, 0), - Property(UCHAR_QUOTATION_MARK, "QMark Quotation_Mark", NULL, 0), - Property(UCHAR_RADICAL, "Radical Radical", NULL, 0), - Property(UCHAR_SOFT_DOTTED, "SD Soft_Dotted", NULL, 0), - Property(UCHAR_TERMINAL_PUNCTUATION, "Term Terminal_Punctuation", NULL, 0), - Property(UCHAR_UNIFIED_IDEOGRAPH, "UIdeo Unified_Ideograph", NULL, 0), - Property(UCHAR_UPPERCASE, "Upper Uppercase", NULL, 0), - Property(UCHAR_WHITE_SPACE, "WSpace White_Space space", NULL, 0), - Property(UCHAR_XID_CONTINUE, "XIDC XID_Continue", NULL, 0), - Property(UCHAR_XID_START, "XIDS XID_Start", NULL, 0), - Property(UCHAR_CASE_SENSITIVE, "Sensitive Case_Sensitive", NULL, 0), - Property(UCHAR_S_TERM, "STerm STerm", NULL, 0), - Property(UCHAR_VARIATION_SELECTOR, "VS Variation_Selector", NULL, 0), - Property(UCHAR_NFD_INERT, "nfdinert NFD_Inert", NULL, 0), - Property(UCHAR_NFKD_INERT, "nfkdinert NFKD_Inert", NULL, 0), - Property(UCHAR_NFC_INERT, "nfcinert NFC_Inert", NULL, 0), - Property(UCHAR_NFKC_INERT, "nfkcinert NFKC_Inert", NULL, 0), - Property(UCHAR_SEGMENT_STARTER, "segstart Segment_Starter", NULL, 0), - Property(UCHAR_PATTERN_SYNTAX, "Pat_Syn Pattern_Syntax", NULL, 0), - Property(UCHAR_PATTERN_WHITE_SPACE, "Pat_WS Pattern_White_Space", NULL, 0), - Property(UCHAR_POSIX_ALNUM, " alnum", NULL, 0), - Property(UCHAR_POSIX_BLANK, " blank", NULL, 0), - Property(UCHAR_POSIX_GRAPH, " graph", NULL, 0), - Property(UCHAR_POSIX_PRINT, " print", NULL, 0), - Property(UCHAR_POSIX_XDIGIT, " xdigit", NULL, 0), - Property(UCHAR_CASED, "Cased Cased", NULL, 0), - Property(UCHAR_CASE_IGNORABLE, "CI Case_Ignorable", NULL, 0), - Property(UCHAR_CHANGES_WHEN_LOWERCASED, "CWL Changes_When_Lowercased", NULL, 0), - Property(UCHAR_CHANGES_WHEN_UPPERCASED, "CWU Changes_When_Uppercased", NULL, 0), - Property(UCHAR_CHANGES_WHEN_TITLECASED, "CWT Changes_When_Titlecased", NULL, 0), - Property(UCHAR_CHANGES_WHEN_CASEFOLDED, "CWCF Changes_When_Casefolded", NULL, 0), - Property(UCHAR_CHANGES_WHEN_CASEMAPPED, "CWCM Changes_When_Casemapped", NULL, 0), - Property(UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED, "CWKCF Changes_When_NFKC_Casefolded", NULL, 0), + Property(UCHAR_ALPHABETIC, "Alpha Alphabetic"), + Property(UCHAR_ASCII_HEX_DIGIT, "AHex ASCII_Hex_Digit"), + Property(UCHAR_BIDI_CONTROL, "Bidi_C Bidi_Control"), + Property(UCHAR_BIDI_MIRRORED, "Bidi_M Bidi_Mirrored"), + Property(UCHAR_DASH, "Dash Dash"), + Property(UCHAR_DEFAULT_IGNORABLE_CODE_POINT, "DI Default_Ignorable_Code_Point"), + Property(UCHAR_DEPRECATED, "Dep Deprecated"), + Property(UCHAR_DIACRITIC, "Dia Diacritic"), + Property(UCHAR_EXTENDER, "Ext Extender"), + Property(UCHAR_FULL_COMPOSITION_EXCLUSION, "Comp_Ex Full_Composition_Exclusion"), + Property(UCHAR_GRAPHEME_BASE, "Gr_Base Grapheme_Base"), + Property(UCHAR_GRAPHEME_EXTEND, "Gr_Ext Grapheme_Extend"), + Property(UCHAR_GRAPHEME_LINK, "Gr_Link Grapheme_Link"), + Property(UCHAR_HEX_DIGIT, "Hex Hex_Digit"), + Property(UCHAR_HYPHEN, "Hyphen Hyphen"), + Property(UCHAR_ID_CONTINUE, "IDC ID_Continue"), + Property(UCHAR_ID_START, "IDS ID_Start"), + Property(UCHAR_IDEOGRAPHIC, "Ideo Ideographic"), + Property(UCHAR_IDS_BINARY_OPERATOR, "IDSB IDS_Binary_Operator"), + Property(UCHAR_IDS_TRINARY_OPERATOR, "IDST IDS_Trinary_Operator"), + Property(UCHAR_JOIN_CONTROL, "Join_C Join_Control"), + Property(UCHAR_LOGICAL_ORDER_EXCEPTION, "LOE Logical_Order_Exception"), + Property(UCHAR_LOWERCASE, "Lower Lowercase"), + Property(UCHAR_MATH, "Math Math"), + Property(UCHAR_NONCHARACTER_CODE_POINT, "NChar Noncharacter_Code_Point"), + Property(UCHAR_QUOTATION_MARK, "QMark Quotation_Mark"), + Property(UCHAR_RADICAL, "Radical Radical"), + Property(UCHAR_SOFT_DOTTED, "SD Soft_Dotted"), + Property(UCHAR_TERMINAL_PUNCTUATION, "Term Terminal_Punctuation"), + Property(UCHAR_UNIFIED_IDEOGRAPH, "UIdeo Unified_Ideograph"), + Property(UCHAR_UPPERCASE, "Upper Uppercase"), + Property(UCHAR_WHITE_SPACE, "WSpace White_Space space"), + Property(UCHAR_XID_CONTINUE, "XIDC XID_Continue"), + Property(UCHAR_XID_START, "XIDS XID_Start"), + Property(UCHAR_CASE_SENSITIVE, "Sensitive Case_Sensitive"), + Property(UCHAR_S_TERM, "STerm STerm"), + Property(UCHAR_VARIATION_SELECTOR, "VS Variation_Selector"), + Property(UCHAR_NFD_INERT, "nfdinert NFD_Inert"), + Property(UCHAR_NFKD_INERT, "nfkdinert NFKD_Inert"), + Property(UCHAR_NFC_INERT, "nfcinert NFC_Inert"), + Property(UCHAR_NFKC_INERT, "nfkcinert NFKC_Inert"), + Property(UCHAR_SEGMENT_STARTER, "segstart Segment_Starter"), + Property(UCHAR_PATTERN_SYNTAX, "Pat_Syn Pattern_Syntax"), + Property(UCHAR_PATTERN_WHITE_SPACE, "Pat_WS Pattern_White_Space"), + Property(UCHAR_POSIX_ALNUM, " alnum"), + Property(UCHAR_POSIX_BLANK, " blank"), + Property(UCHAR_POSIX_GRAPH, " graph"), + Property(UCHAR_POSIX_PRINT, " print"), + Property(UCHAR_POSIX_XDIGIT, " xdigit"), + Property(UCHAR_CASED, "Cased Cased"), + Property(UCHAR_CASE_IGNORABLE, "CI Case_Ignorable"), + Property(UCHAR_CHANGES_WHEN_LOWERCASED, "CWL Changes_When_Lowercased"), + Property(UCHAR_CHANGES_WHEN_UPPERCASED, "CWU Changes_When_Uppercased"), + Property(UCHAR_CHANGES_WHEN_TITLECASED, "CWT Changes_When_Titlecased"), + Property(UCHAR_CHANGES_WHEN_CASEFOLDED, "CWCF Changes_When_Casefolded"), + Property(UCHAR_CHANGES_WHEN_CASEMAPPED, "CWCM Changes_When_Casemapped"), + Property(UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED, "CWKCF Changes_When_NFKC_Casefolded"), Property(UCHAR_BIDI_CLASS, "bc Bidi_Class", VALUES_bc, VALUES_bc_COUNT), Property(UCHAR_BLOCK, "blk Block", VALUES_blk, VALUES_blk_COUNT), Property(UCHAR_CANONICAL_COMBINING_CLASS, "ccc Canonical_Combining_Class", VALUES_ccc, VALUES_ccc_COUNT), @@ -1027,21 +1027,21 @@ const Property PROPERTIES[] = { Property(UCHAR_SENTENCE_BREAK, "SB Sentence_Break", VALUES_SB, VALUES_SB_COUNT), Property(UCHAR_WORD_BREAK, "WB Word_Break", VALUES_WB, VALUES_WB_COUNT), Property(UCHAR_GENERAL_CATEGORY_MASK, "gcm General_Category_Mask", VALUES_gcm, VALUES_gcm_COUNT), - Property(UCHAR_NUMERIC_VALUE, "nv Numeric_Value", NULL, 0), - Property(UCHAR_AGE, "age Age", NULL, 0), - Property(UCHAR_BIDI_MIRRORING_GLYPH, "bmg Bidi_Mirroring_Glyph", NULL, 0), - Property(UCHAR_CASE_FOLDING, "cf Case_Folding", NULL, 0), - Property(UCHAR_ISO_COMMENT, "isc ISO_Comment", NULL, 0), - Property(UCHAR_LOWERCASE_MAPPING, "lc Lowercase_Mapping", NULL, 0), - Property(UCHAR_NAME, "na Name", NULL, 0), - Property(UCHAR_SIMPLE_CASE_FOLDING, "scf Simple_Case_Folding sfc", NULL, 0), - Property(UCHAR_SIMPLE_LOWERCASE_MAPPING, "slc Simple_Lowercase_Mapping", NULL, 0), - Property(UCHAR_SIMPLE_TITLECASE_MAPPING, "stc Simple_Titlecase_Mapping", NULL, 0), - Property(UCHAR_SIMPLE_UPPERCASE_MAPPING, "suc Simple_Uppercase_Mapping", NULL, 0), - Property(UCHAR_TITLECASE_MAPPING, "tc Titlecase_Mapping", NULL, 0), - Property(UCHAR_UNICODE_1_NAME, "na1 Unicode_1_Name", NULL, 0), - Property(UCHAR_UPPERCASE_MAPPING, "uc Uppercase_Mapping", NULL, 0), - Property(UCHAR_SCRIPT_EXTENSIONS, "scx Script_Extensions", NULL, 0), + Property(UCHAR_NUMERIC_VALUE, "nv Numeric_Value"), + Property(UCHAR_AGE, "age Age"), + Property(UCHAR_BIDI_MIRRORING_GLYPH, "bmg Bidi_Mirroring_Glyph"), + Property(UCHAR_CASE_FOLDING, "cf Case_Folding"), + Property(UCHAR_ISO_COMMENT, "isc ISO_Comment"), + Property(UCHAR_LOWERCASE_MAPPING, "lc Lowercase_Mapping"), + Property(UCHAR_NAME, "na Name"), + Property(UCHAR_SIMPLE_CASE_FOLDING, "scf Simple_Case_Folding sfc"), + Property(UCHAR_SIMPLE_LOWERCASE_MAPPING, "slc Simple_Lowercase_Mapping"), + Property(UCHAR_SIMPLE_TITLECASE_MAPPING, "stc Simple_Titlecase_Mapping"), + Property(UCHAR_SIMPLE_UPPERCASE_MAPPING, "suc Simple_Uppercase_Mapping"), + Property(UCHAR_TITLECASE_MAPPING, "tc Titlecase_Mapping"), + Property(UCHAR_UNICODE_1_NAME, "na1 Unicode_1_Name"), + Property(UCHAR_UPPERCASE_MAPPING, "uc Uppercase_Mapping"), + Property(UCHAR_SCRIPT_EXTENSIONS, "scx Script_Extensions"), }; const int32_t MAX_ALIASES = 4; diff --git a/tools/unicode/c/genprops/pnamesbuilder.cpp b/tools/unicode/c/genprops/pnamesbuilder.cpp index 108f766c60a..fe013440557 100644 --- a/tools/unicode/c/genprops/pnamesbuilder.cpp +++ b/tools/unicode/c/genprops/pnamesbuilder.cpp @@ -55,15 +55,17 @@ U_NAMESPACE_USE // So we define a second constant and at runtime check that it's >=MAX_ALIASES. static const int32_t VALUE_MAX_ALIASES=4; +static const int32_t JOINED_ALIASES_CAPACITY=100; + class Value { public: Value(int32_t enumValue, const char *joinedAliases) : enumValue(enumValue), joinedAliases(joinedAliases), count(0) { - if(uprv_strlen(joinedAliases)>=LENGTHOF(aliasesBuffer)) { + if(uprv_strlen(joinedAliases)>=JOINED_ALIASES_CAPACITY) { fprintf(stderr, "genprops error: pnamesbuilder.cpp Value::Value(%ld, \"%s\"): " - "joined aliases too long: make Value::aliasesBuffer[] larger, " - "at least %ld\n", + "joined aliases too long: " + "increase JOINED_ALIASES_CAPACITY, to at least %ld\n", (long)enumValue, joinedAliases, uprv_strlen(joinedAliases)+1); exit(U_BUFFER_OVERFLOW_ERROR); } @@ -76,14 +78,12 @@ public: do { aliases[count]=a; normalized[count++]=n; - char c; - while((c=*j)!=' ' && c!=0) { + while((c=*j++)!=' ' && c!=0) { *a++=c; // Ignore delimiters '-' and '_'. if(!(c=='-' || c=='_')) { *n++=uprv_tolower(c); } - ++j; } *a++=0; *n++=0; @@ -117,8 +117,8 @@ public: int32_t enumValue; const char *joinedAliases; - char aliasesBuffer[100]; - char normalizedBuffer[100]; // Same capacity as aliasesBuffer! + char aliasesBuffer[JOINED_ALIASES_CAPACITY]; + char normalizedBuffer[JOINED_ALIASES_CAPACITY]; const char *aliases[VALUE_MAX_ALIASES]; const char *normalized[VALUE_MAX_ALIASES]; int32_t count; @@ -126,10 +126,13 @@ public: class Property : public Value { public: + // A property with a values array. Property(int32_t enumValue, const char *joinedAliases, const Value *values, int32_t valueCount) : Value(enumValue, joinedAliases), values(values), valueCount(valueCount) {} + // A binary property (enumValue