ICU-8972 bug fixes

X-SVN-Rev: 31166
This commit is contained in:
Markus Scherer 2011-12-22 06:28:59 +00:00
parent f3fd941998
commit 1ec1832428
4 changed files with 102 additions and 89 deletions

View file

@ -726,7 +726,7 @@ CorePropsBuilder::writeCSourceFile(const char *path, UErrorCode &errorCode) {
int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
FILE *f=usrc_createFromGenerator(path, "uchar_props_data.h",
"icu/tools/src/unicode/c/genprops/corepropsbuilder.cpp");
"icu/tools/unicode/c/genprops/corepropsbuilder.cpp");
if(f==NULL) {
errorCode=U_FILE_ACCESS_ERROR;
return;

View file

@ -948,63 +948,63 @@ const Value VALUES_gcm[] = {
const int32_t PROPERTIES_COUNT = 94;
const Property PROPERTIES[] = {
Property(UCHAR_ALPHABETIC, "Alpha Alphabetic", NULL, 0),
Property(UCHAR_ASCII_HEX_DIGIT, "AHex ASCII_Hex_Digit", NULL, 0),
Property(UCHAR_BIDI_CONTROL, "Bidi_C Bidi_Control", NULL, 0),
Property(UCHAR_BIDI_MIRRORED, "Bidi_M Bidi_Mirrored", NULL, 0),
Property(UCHAR_DASH, "Dash Dash", NULL, 0),
Property(UCHAR_DEFAULT_IGNORABLE_CODE_POINT, "DI Default_Ignorable_Code_Point", NULL, 0),
Property(UCHAR_DEPRECATED, "Dep Deprecated", NULL, 0),
Property(UCHAR_DIACRITIC, "Dia Diacritic", NULL, 0),
Property(UCHAR_EXTENDER, "Ext Extender", NULL, 0),
Property(UCHAR_FULL_COMPOSITION_EXCLUSION, "Comp_Ex Full_Composition_Exclusion", NULL, 0),
Property(UCHAR_GRAPHEME_BASE, "Gr_Base Grapheme_Base", NULL, 0),
Property(UCHAR_GRAPHEME_EXTEND, "Gr_Ext Grapheme_Extend", NULL, 0),
Property(UCHAR_GRAPHEME_LINK, "Gr_Link Grapheme_Link", NULL, 0),
Property(UCHAR_HEX_DIGIT, "Hex Hex_Digit", NULL, 0),
Property(UCHAR_HYPHEN, "Hyphen Hyphen", NULL, 0),
Property(UCHAR_ID_CONTINUE, "IDC ID_Continue", NULL, 0),
Property(UCHAR_ID_START, "IDS ID_Start", NULL, 0),
Property(UCHAR_IDEOGRAPHIC, "Ideo Ideographic", NULL, 0),
Property(UCHAR_IDS_BINARY_OPERATOR, "IDSB IDS_Binary_Operator", NULL, 0),
Property(UCHAR_IDS_TRINARY_OPERATOR, "IDST IDS_Trinary_Operator", NULL, 0),
Property(UCHAR_JOIN_CONTROL, "Join_C Join_Control", NULL, 0),
Property(UCHAR_LOGICAL_ORDER_EXCEPTION, "LOE Logical_Order_Exception", NULL, 0),
Property(UCHAR_LOWERCASE, "Lower Lowercase", NULL, 0),
Property(UCHAR_MATH, "Math Math", NULL, 0),
Property(UCHAR_NONCHARACTER_CODE_POINT, "NChar Noncharacter_Code_Point", NULL, 0),
Property(UCHAR_QUOTATION_MARK, "QMark Quotation_Mark", NULL, 0),
Property(UCHAR_RADICAL, "Radical Radical", NULL, 0),
Property(UCHAR_SOFT_DOTTED, "SD Soft_Dotted", NULL, 0),
Property(UCHAR_TERMINAL_PUNCTUATION, "Term Terminal_Punctuation", NULL, 0),
Property(UCHAR_UNIFIED_IDEOGRAPH, "UIdeo Unified_Ideograph", NULL, 0),
Property(UCHAR_UPPERCASE, "Upper Uppercase", NULL, 0),
Property(UCHAR_WHITE_SPACE, "WSpace White_Space space", NULL, 0),
Property(UCHAR_XID_CONTINUE, "XIDC XID_Continue", NULL, 0),
Property(UCHAR_XID_START, "XIDS XID_Start", NULL, 0),
Property(UCHAR_CASE_SENSITIVE, "Sensitive Case_Sensitive", NULL, 0),
Property(UCHAR_S_TERM, "STerm STerm", NULL, 0),
Property(UCHAR_VARIATION_SELECTOR, "VS Variation_Selector", NULL, 0),
Property(UCHAR_NFD_INERT, "nfdinert NFD_Inert", NULL, 0),
Property(UCHAR_NFKD_INERT, "nfkdinert NFKD_Inert", NULL, 0),
Property(UCHAR_NFC_INERT, "nfcinert NFC_Inert", NULL, 0),
Property(UCHAR_NFKC_INERT, "nfkcinert NFKC_Inert", NULL, 0),
Property(UCHAR_SEGMENT_STARTER, "segstart Segment_Starter", NULL, 0),
Property(UCHAR_PATTERN_SYNTAX, "Pat_Syn Pattern_Syntax", NULL, 0),
Property(UCHAR_PATTERN_WHITE_SPACE, "Pat_WS Pattern_White_Space", NULL, 0),
Property(UCHAR_POSIX_ALNUM, " alnum", NULL, 0),
Property(UCHAR_POSIX_BLANK, " blank", NULL, 0),
Property(UCHAR_POSIX_GRAPH, " graph", NULL, 0),
Property(UCHAR_POSIX_PRINT, " print", NULL, 0),
Property(UCHAR_POSIX_XDIGIT, " xdigit", NULL, 0),
Property(UCHAR_CASED, "Cased Cased", NULL, 0),
Property(UCHAR_CASE_IGNORABLE, "CI Case_Ignorable", NULL, 0),
Property(UCHAR_CHANGES_WHEN_LOWERCASED, "CWL Changes_When_Lowercased", NULL, 0),
Property(UCHAR_CHANGES_WHEN_UPPERCASED, "CWU Changes_When_Uppercased", NULL, 0),
Property(UCHAR_CHANGES_WHEN_TITLECASED, "CWT Changes_When_Titlecased", NULL, 0),
Property(UCHAR_CHANGES_WHEN_CASEFOLDED, "CWCF Changes_When_Casefolded", NULL, 0),
Property(UCHAR_CHANGES_WHEN_CASEMAPPED, "CWCM Changes_When_Casemapped", NULL, 0),
Property(UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED, "CWKCF Changes_When_NFKC_Casefolded", NULL, 0),
Property(UCHAR_ALPHABETIC, "Alpha Alphabetic"),
Property(UCHAR_ASCII_HEX_DIGIT, "AHex ASCII_Hex_Digit"),
Property(UCHAR_BIDI_CONTROL, "Bidi_C Bidi_Control"),
Property(UCHAR_BIDI_MIRRORED, "Bidi_M Bidi_Mirrored"),
Property(UCHAR_DASH, "Dash Dash"),
Property(UCHAR_DEFAULT_IGNORABLE_CODE_POINT, "DI Default_Ignorable_Code_Point"),
Property(UCHAR_DEPRECATED, "Dep Deprecated"),
Property(UCHAR_DIACRITIC, "Dia Diacritic"),
Property(UCHAR_EXTENDER, "Ext Extender"),
Property(UCHAR_FULL_COMPOSITION_EXCLUSION, "Comp_Ex Full_Composition_Exclusion"),
Property(UCHAR_GRAPHEME_BASE, "Gr_Base Grapheme_Base"),
Property(UCHAR_GRAPHEME_EXTEND, "Gr_Ext Grapheme_Extend"),
Property(UCHAR_GRAPHEME_LINK, "Gr_Link Grapheme_Link"),
Property(UCHAR_HEX_DIGIT, "Hex Hex_Digit"),
Property(UCHAR_HYPHEN, "Hyphen Hyphen"),
Property(UCHAR_ID_CONTINUE, "IDC ID_Continue"),
Property(UCHAR_ID_START, "IDS ID_Start"),
Property(UCHAR_IDEOGRAPHIC, "Ideo Ideographic"),
Property(UCHAR_IDS_BINARY_OPERATOR, "IDSB IDS_Binary_Operator"),
Property(UCHAR_IDS_TRINARY_OPERATOR, "IDST IDS_Trinary_Operator"),
Property(UCHAR_JOIN_CONTROL, "Join_C Join_Control"),
Property(UCHAR_LOGICAL_ORDER_EXCEPTION, "LOE Logical_Order_Exception"),
Property(UCHAR_LOWERCASE, "Lower Lowercase"),
Property(UCHAR_MATH, "Math Math"),
Property(UCHAR_NONCHARACTER_CODE_POINT, "NChar Noncharacter_Code_Point"),
Property(UCHAR_QUOTATION_MARK, "QMark Quotation_Mark"),
Property(UCHAR_RADICAL, "Radical Radical"),
Property(UCHAR_SOFT_DOTTED, "SD Soft_Dotted"),
Property(UCHAR_TERMINAL_PUNCTUATION, "Term Terminal_Punctuation"),
Property(UCHAR_UNIFIED_IDEOGRAPH, "UIdeo Unified_Ideograph"),
Property(UCHAR_UPPERCASE, "Upper Uppercase"),
Property(UCHAR_WHITE_SPACE, "WSpace White_Space space"),
Property(UCHAR_XID_CONTINUE, "XIDC XID_Continue"),
Property(UCHAR_XID_START, "XIDS XID_Start"),
Property(UCHAR_CASE_SENSITIVE, "Sensitive Case_Sensitive"),
Property(UCHAR_S_TERM, "STerm STerm"),
Property(UCHAR_VARIATION_SELECTOR, "VS Variation_Selector"),
Property(UCHAR_NFD_INERT, "nfdinert NFD_Inert"),
Property(UCHAR_NFKD_INERT, "nfkdinert NFKD_Inert"),
Property(UCHAR_NFC_INERT, "nfcinert NFC_Inert"),
Property(UCHAR_NFKC_INERT, "nfkcinert NFKC_Inert"),
Property(UCHAR_SEGMENT_STARTER, "segstart Segment_Starter"),
Property(UCHAR_PATTERN_SYNTAX, "Pat_Syn Pattern_Syntax"),
Property(UCHAR_PATTERN_WHITE_SPACE, "Pat_WS Pattern_White_Space"),
Property(UCHAR_POSIX_ALNUM, " alnum"),
Property(UCHAR_POSIX_BLANK, " blank"),
Property(UCHAR_POSIX_GRAPH, " graph"),
Property(UCHAR_POSIX_PRINT, " print"),
Property(UCHAR_POSIX_XDIGIT, " xdigit"),
Property(UCHAR_CASED, "Cased Cased"),
Property(UCHAR_CASE_IGNORABLE, "CI Case_Ignorable"),
Property(UCHAR_CHANGES_WHEN_LOWERCASED, "CWL Changes_When_Lowercased"),
Property(UCHAR_CHANGES_WHEN_UPPERCASED, "CWU Changes_When_Uppercased"),
Property(UCHAR_CHANGES_WHEN_TITLECASED, "CWT Changes_When_Titlecased"),
Property(UCHAR_CHANGES_WHEN_CASEFOLDED, "CWCF Changes_When_Casefolded"),
Property(UCHAR_CHANGES_WHEN_CASEMAPPED, "CWCM Changes_When_Casemapped"),
Property(UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED, "CWKCF Changes_When_NFKC_Casefolded"),
Property(UCHAR_BIDI_CLASS, "bc Bidi_Class", VALUES_bc, VALUES_bc_COUNT),
Property(UCHAR_BLOCK, "blk Block", VALUES_blk, VALUES_blk_COUNT),
Property(UCHAR_CANONICAL_COMBINING_CLASS, "ccc Canonical_Combining_Class", VALUES_ccc, VALUES_ccc_COUNT),
@ -1027,21 +1027,21 @@ const Property PROPERTIES[] = {
Property(UCHAR_SENTENCE_BREAK, "SB Sentence_Break", VALUES_SB, VALUES_SB_COUNT),
Property(UCHAR_WORD_BREAK, "WB Word_Break", VALUES_WB, VALUES_WB_COUNT),
Property(UCHAR_GENERAL_CATEGORY_MASK, "gcm General_Category_Mask", VALUES_gcm, VALUES_gcm_COUNT),
Property(UCHAR_NUMERIC_VALUE, "nv Numeric_Value", NULL, 0),
Property(UCHAR_AGE, "age Age", NULL, 0),
Property(UCHAR_BIDI_MIRRORING_GLYPH, "bmg Bidi_Mirroring_Glyph", NULL, 0),
Property(UCHAR_CASE_FOLDING, "cf Case_Folding", NULL, 0),
Property(UCHAR_ISO_COMMENT, "isc ISO_Comment", NULL, 0),
Property(UCHAR_LOWERCASE_MAPPING, "lc Lowercase_Mapping", NULL, 0),
Property(UCHAR_NAME, "na Name", NULL, 0),
Property(UCHAR_SIMPLE_CASE_FOLDING, "scf Simple_Case_Folding sfc", NULL, 0),
Property(UCHAR_SIMPLE_LOWERCASE_MAPPING, "slc Simple_Lowercase_Mapping", NULL, 0),
Property(UCHAR_SIMPLE_TITLECASE_MAPPING, "stc Simple_Titlecase_Mapping", NULL, 0),
Property(UCHAR_SIMPLE_UPPERCASE_MAPPING, "suc Simple_Uppercase_Mapping", NULL, 0),
Property(UCHAR_TITLECASE_MAPPING, "tc Titlecase_Mapping", NULL, 0),
Property(UCHAR_UNICODE_1_NAME, "na1 Unicode_1_Name", NULL, 0),
Property(UCHAR_UPPERCASE_MAPPING, "uc Uppercase_Mapping", NULL, 0),
Property(UCHAR_SCRIPT_EXTENSIONS, "scx Script_Extensions", NULL, 0),
Property(UCHAR_NUMERIC_VALUE, "nv Numeric_Value"),
Property(UCHAR_AGE, "age Age"),
Property(UCHAR_BIDI_MIRRORING_GLYPH, "bmg Bidi_Mirroring_Glyph"),
Property(UCHAR_CASE_FOLDING, "cf Case_Folding"),
Property(UCHAR_ISO_COMMENT, "isc ISO_Comment"),
Property(UCHAR_LOWERCASE_MAPPING, "lc Lowercase_Mapping"),
Property(UCHAR_NAME, "na Name"),
Property(UCHAR_SIMPLE_CASE_FOLDING, "scf Simple_Case_Folding sfc"),
Property(UCHAR_SIMPLE_LOWERCASE_MAPPING, "slc Simple_Lowercase_Mapping"),
Property(UCHAR_SIMPLE_TITLECASE_MAPPING, "stc Simple_Titlecase_Mapping"),
Property(UCHAR_SIMPLE_UPPERCASE_MAPPING, "suc Simple_Uppercase_Mapping"),
Property(UCHAR_TITLECASE_MAPPING, "tc Titlecase_Mapping"),
Property(UCHAR_UNICODE_1_NAME, "na1 Unicode_1_Name"),
Property(UCHAR_UPPERCASE_MAPPING, "uc Uppercase_Mapping"),
Property(UCHAR_SCRIPT_EXTENSIONS, "scx Script_Extensions"),
};
const int32_t MAX_ALIASES = 4;

View file

@ -55,15 +55,17 @@ U_NAMESPACE_USE
// So we define a second constant and at runtime check that it's >=MAX_ALIASES.
static const int32_t VALUE_MAX_ALIASES=4;
static const int32_t JOINED_ALIASES_CAPACITY=100;
class Value {
public:
Value(int32_t enumValue, const char *joinedAliases)
: enumValue(enumValue), joinedAliases(joinedAliases), count(0) {
if(uprv_strlen(joinedAliases)>=LENGTHOF(aliasesBuffer)) {
if(uprv_strlen(joinedAliases)>=JOINED_ALIASES_CAPACITY) {
fprintf(stderr,
"genprops error: pnamesbuilder.cpp Value::Value(%ld, \"%s\"): "
"joined aliases too long: make Value::aliasesBuffer[] larger, "
"at least %ld\n",
"joined aliases too long: "
"increase JOINED_ALIASES_CAPACITY, to at least %ld\n",
(long)enumValue, joinedAliases, uprv_strlen(joinedAliases)+1);
exit(U_BUFFER_OVERFLOW_ERROR);
}
@ -76,14 +78,12 @@ public:
do {
aliases[count]=a;
normalized[count++]=n;
char c;
while((c=*j)!=' ' && c!=0) {
while((c=*j++)!=' ' && c!=0) {
*a++=c;
// Ignore delimiters '-' and '_'.
if(!(c=='-' || c=='_')) {
*n++=uprv_tolower(c);
}
++j;
}
*a++=0;
*n++=0;
@ -117,8 +117,8 @@ public:
int32_t enumValue;
const char *joinedAliases;
char aliasesBuffer[100];
char normalizedBuffer[100]; // Same capacity as aliasesBuffer!
char aliasesBuffer[JOINED_ALIASES_CAPACITY];
char normalizedBuffer[JOINED_ALIASES_CAPACITY];
const char *aliases[VALUE_MAX_ALIASES];
const char *normalized[VALUE_MAX_ALIASES];
int32_t count;
@ -126,10 +126,13 @@ public:
class Property : public Value {
public:
// A property with a values array.
Property(int32_t enumValue, const char *joinedAliases,
const Value *values, int32_t valueCount)
: Value(enumValue, joinedAliases),
values(values), valueCount(valueCount) {}
// A binary property (enumValue<UCHAR_BINARY_LIMIT), or one without values.
Property(int32_t enumValue, const char *joinedAliases);
const Value *values;
int32_t valueCount;
@ -138,6 +141,11 @@ public:
// *** Include the data header ***
#include "pnames_data.h"
Property::Property(int32_t enumValue, const char *joinedAliases)
: Value(enumValue, joinedAliases),
values(enumValue<UCHAR_BINARY_LIMIT ? VALUES_binprop : NULL),
valueCount(enumValue<UCHAR_BINARY_LIMIT ? VALUES_binprop_COUNT : 0) {}
// END DATA
//----------------------------------------------------------------------
@ -219,6 +227,12 @@ public:
valueMaps.addElement(bytesTrieOffset, errorCode);
buildValueMap(VALUES_binprop, VALUES_binprop_COUNT, errorCode);
// Note: It is slightly wasteful to store binary properties like all others.
// Since we know that they are in the lowest range of property enum values
// and share the same name group and BytesTrie,
// we could just store those two indexes once.
// (This would save 8 bytes per binary property, or about half a kilobyte.)
// Build the known-repeated canonical combining class properties once.
int32_t cccValueMapOffset=valueMaps.size();
bytesTrieOffset=buildValuesBytesTrie(VALUES_ccc, VALUES_ccc_COUNT, errorCode);
@ -281,7 +295,6 @@ public:
int32_t writeValueAliases(const Value &value, UErrorCode &errorCode) {
int32_t nameOffset=uhash_geti(nameGroupToOffset, (void *)value.joinedAliases);
if(nameOffset!=0) {
printf("* duplicate joinedAliases: \"%s\"\n", value.joinedAliases);
// The same list of aliases has been written already.
return nameOffset-1; // Was incremented to reserve 0 for "not found".
}
@ -476,7 +489,7 @@ void
PNamesBuilderImpl::writeCSourceFile(const char *path, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
FILE *f=usrc_createFromGenerator(path, "propname_data.h",
"icu/tools/src/unicode/c/genprops/pnamesbuilder.cpp");
"icu/tools/unicode/c/genprops/pnamesbuilder.cpp");
if(f==NULL) {
errorCode=U_FILE_ACCESS_ERROR;
return; // usrc_create() reported an error.

View file

@ -1724,15 +1724,15 @@ def WritePNamesDataHeader(out_path):
out_file.write("const Property PROPERTIES[] = {\n")
for (enum, pname, values) in _icu_properties:
prop = _properties[pname]
aliases = prop[1]
if values: # Property with named values.
if prop[0] == "Binary": pname = "binprop"
aliases = " ".join(prop[1])
if prop[0] == "Binary":
out_file.write(' Property(%s, "%s"),\n' % (enum, aliases))
elif values: # Property with named values.
out_file.write(
' Property(%s, "%s", VALUES_%s, VALUES_%s_COUNT),\n' %
(enum, " ".join(aliases), pname, pname))
(enum, aliases, pname, pname))
else:
out_file.write(' Property(%s, "%s", NULL, 0),\n' %
(enum, " ".join(aliases)))
out_file.write(' Property(%s, "%s"),\n' % (enum, aliases))
out_file.write("};\n\n")
out_file.write("const int32_t MAX_ALIASES = %d;\n" % max_aliases)