mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-8972 bug fixes
X-SVN-Rev: 31166
This commit is contained in:
parent
f3fd941998
commit
1ec1832428
4 changed files with 102 additions and 89 deletions
tools/unicode
|
@ -726,7 +726,7 @@ CorePropsBuilder::writeCSourceFile(const char *path, UErrorCode &errorCode) {
|
|||
int32_t pvCount=pvRows*UPROPS_VECTOR_WORDS;
|
||||
|
||||
FILE *f=usrc_createFromGenerator(path, "uchar_props_data.h",
|
||||
"icu/tools/src/unicode/c/genprops/corepropsbuilder.cpp");
|
||||
"icu/tools/unicode/c/genprops/corepropsbuilder.cpp");
|
||||
if(f==NULL) {
|
||||
errorCode=U_FILE_ACCESS_ERROR;
|
||||
return;
|
||||
|
|
|
@ -948,63 +948,63 @@ const Value VALUES_gcm[] = {
|
|||
const int32_t PROPERTIES_COUNT = 94;
|
||||
|
||||
const Property PROPERTIES[] = {
|
||||
Property(UCHAR_ALPHABETIC, "Alpha Alphabetic", NULL, 0),
|
||||
Property(UCHAR_ASCII_HEX_DIGIT, "AHex ASCII_Hex_Digit", NULL, 0),
|
||||
Property(UCHAR_BIDI_CONTROL, "Bidi_C Bidi_Control", NULL, 0),
|
||||
Property(UCHAR_BIDI_MIRRORED, "Bidi_M Bidi_Mirrored", NULL, 0),
|
||||
Property(UCHAR_DASH, "Dash Dash", NULL, 0),
|
||||
Property(UCHAR_DEFAULT_IGNORABLE_CODE_POINT, "DI Default_Ignorable_Code_Point", NULL, 0),
|
||||
Property(UCHAR_DEPRECATED, "Dep Deprecated", NULL, 0),
|
||||
Property(UCHAR_DIACRITIC, "Dia Diacritic", NULL, 0),
|
||||
Property(UCHAR_EXTENDER, "Ext Extender", NULL, 0),
|
||||
Property(UCHAR_FULL_COMPOSITION_EXCLUSION, "Comp_Ex Full_Composition_Exclusion", NULL, 0),
|
||||
Property(UCHAR_GRAPHEME_BASE, "Gr_Base Grapheme_Base", NULL, 0),
|
||||
Property(UCHAR_GRAPHEME_EXTEND, "Gr_Ext Grapheme_Extend", NULL, 0),
|
||||
Property(UCHAR_GRAPHEME_LINK, "Gr_Link Grapheme_Link", NULL, 0),
|
||||
Property(UCHAR_HEX_DIGIT, "Hex Hex_Digit", NULL, 0),
|
||||
Property(UCHAR_HYPHEN, "Hyphen Hyphen", NULL, 0),
|
||||
Property(UCHAR_ID_CONTINUE, "IDC ID_Continue", NULL, 0),
|
||||
Property(UCHAR_ID_START, "IDS ID_Start", NULL, 0),
|
||||
Property(UCHAR_IDEOGRAPHIC, "Ideo Ideographic", NULL, 0),
|
||||
Property(UCHAR_IDS_BINARY_OPERATOR, "IDSB IDS_Binary_Operator", NULL, 0),
|
||||
Property(UCHAR_IDS_TRINARY_OPERATOR, "IDST IDS_Trinary_Operator", NULL, 0),
|
||||
Property(UCHAR_JOIN_CONTROL, "Join_C Join_Control", NULL, 0),
|
||||
Property(UCHAR_LOGICAL_ORDER_EXCEPTION, "LOE Logical_Order_Exception", NULL, 0),
|
||||
Property(UCHAR_LOWERCASE, "Lower Lowercase", NULL, 0),
|
||||
Property(UCHAR_MATH, "Math Math", NULL, 0),
|
||||
Property(UCHAR_NONCHARACTER_CODE_POINT, "NChar Noncharacter_Code_Point", NULL, 0),
|
||||
Property(UCHAR_QUOTATION_MARK, "QMark Quotation_Mark", NULL, 0),
|
||||
Property(UCHAR_RADICAL, "Radical Radical", NULL, 0),
|
||||
Property(UCHAR_SOFT_DOTTED, "SD Soft_Dotted", NULL, 0),
|
||||
Property(UCHAR_TERMINAL_PUNCTUATION, "Term Terminal_Punctuation", NULL, 0),
|
||||
Property(UCHAR_UNIFIED_IDEOGRAPH, "UIdeo Unified_Ideograph", NULL, 0),
|
||||
Property(UCHAR_UPPERCASE, "Upper Uppercase", NULL, 0),
|
||||
Property(UCHAR_WHITE_SPACE, "WSpace White_Space space", NULL, 0),
|
||||
Property(UCHAR_XID_CONTINUE, "XIDC XID_Continue", NULL, 0),
|
||||
Property(UCHAR_XID_START, "XIDS XID_Start", NULL, 0),
|
||||
Property(UCHAR_CASE_SENSITIVE, "Sensitive Case_Sensitive", NULL, 0),
|
||||
Property(UCHAR_S_TERM, "STerm STerm", NULL, 0),
|
||||
Property(UCHAR_VARIATION_SELECTOR, "VS Variation_Selector", NULL, 0),
|
||||
Property(UCHAR_NFD_INERT, "nfdinert NFD_Inert", NULL, 0),
|
||||
Property(UCHAR_NFKD_INERT, "nfkdinert NFKD_Inert", NULL, 0),
|
||||
Property(UCHAR_NFC_INERT, "nfcinert NFC_Inert", NULL, 0),
|
||||
Property(UCHAR_NFKC_INERT, "nfkcinert NFKC_Inert", NULL, 0),
|
||||
Property(UCHAR_SEGMENT_STARTER, "segstart Segment_Starter", NULL, 0),
|
||||
Property(UCHAR_PATTERN_SYNTAX, "Pat_Syn Pattern_Syntax", NULL, 0),
|
||||
Property(UCHAR_PATTERN_WHITE_SPACE, "Pat_WS Pattern_White_Space", NULL, 0),
|
||||
Property(UCHAR_POSIX_ALNUM, " alnum", NULL, 0),
|
||||
Property(UCHAR_POSIX_BLANK, " blank", NULL, 0),
|
||||
Property(UCHAR_POSIX_GRAPH, " graph", NULL, 0),
|
||||
Property(UCHAR_POSIX_PRINT, " print", NULL, 0),
|
||||
Property(UCHAR_POSIX_XDIGIT, " xdigit", NULL, 0),
|
||||
Property(UCHAR_CASED, "Cased Cased", NULL, 0),
|
||||
Property(UCHAR_CASE_IGNORABLE, "CI Case_Ignorable", NULL, 0),
|
||||
Property(UCHAR_CHANGES_WHEN_LOWERCASED, "CWL Changes_When_Lowercased", NULL, 0),
|
||||
Property(UCHAR_CHANGES_WHEN_UPPERCASED, "CWU Changes_When_Uppercased", NULL, 0),
|
||||
Property(UCHAR_CHANGES_WHEN_TITLECASED, "CWT Changes_When_Titlecased", NULL, 0),
|
||||
Property(UCHAR_CHANGES_WHEN_CASEFOLDED, "CWCF Changes_When_Casefolded", NULL, 0),
|
||||
Property(UCHAR_CHANGES_WHEN_CASEMAPPED, "CWCM Changes_When_Casemapped", NULL, 0),
|
||||
Property(UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED, "CWKCF Changes_When_NFKC_Casefolded", NULL, 0),
|
||||
Property(UCHAR_ALPHABETIC, "Alpha Alphabetic"),
|
||||
Property(UCHAR_ASCII_HEX_DIGIT, "AHex ASCII_Hex_Digit"),
|
||||
Property(UCHAR_BIDI_CONTROL, "Bidi_C Bidi_Control"),
|
||||
Property(UCHAR_BIDI_MIRRORED, "Bidi_M Bidi_Mirrored"),
|
||||
Property(UCHAR_DASH, "Dash Dash"),
|
||||
Property(UCHAR_DEFAULT_IGNORABLE_CODE_POINT, "DI Default_Ignorable_Code_Point"),
|
||||
Property(UCHAR_DEPRECATED, "Dep Deprecated"),
|
||||
Property(UCHAR_DIACRITIC, "Dia Diacritic"),
|
||||
Property(UCHAR_EXTENDER, "Ext Extender"),
|
||||
Property(UCHAR_FULL_COMPOSITION_EXCLUSION, "Comp_Ex Full_Composition_Exclusion"),
|
||||
Property(UCHAR_GRAPHEME_BASE, "Gr_Base Grapheme_Base"),
|
||||
Property(UCHAR_GRAPHEME_EXTEND, "Gr_Ext Grapheme_Extend"),
|
||||
Property(UCHAR_GRAPHEME_LINK, "Gr_Link Grapheme_Link"),
|
||||
Property(UCHAR_HEX_DIGIT, "Hex Hex_Digit"),
|
||||
Property(UCHAR_HYPHEN, "Hyphen Hyphen"),
|
||||
Property(UCHAR_ID_CONTINUE, "IDC ID_Continue"),
|
||||
Property(UCHAR_ID_START, "IDS ID_Start"),
|
||||
Property(UCHAR_IDEOGRAPHIC, "Ideo Ideographic"),
|
||||
Property(UCHAR_IDS_BINARY_OPERATOR, "IDSB IDS_Binary_Operator"),
|
||||
Property(UCHAR_IDS_TRINARY_OPERATOR, "IDST IDS_Trinary_Operator"),
|
||||
Property(UCHAR_JOIN_CONTROL, "Join_C Join_Control"),
|
||||
Property(UCHAR_LOGICAL_ORDER_EXCEPTION, "LOE Logical_Order_Exception"),
|
||||
Property(UCHAR_LOWERCASE, "Lower Lowercase"),
|
||||
Property(UCHAR_MATH, "Math Math"),
|
||||
Property(UCHAR_NONCHARACTER_CODE_POINT, "NChar Noncharacter_Code_Point"),
|
||||
Property(UCHAR_QUOTATION_MARK, "QMark Quotation_Mark"),
|
||||
Property(UCHAR_RADICAL, "Radical Radical"),
|
||||
Property(UCHAR_SOFT_DOTTED, "SD Soft_Dotted"),
|
||||
Property(UCHAR_TERMINAL_PUNCTUATION, "Term Terminal_Punctuation"),
|
||||
Property(UCHAR_UNIFIED_IDEOGRAPH, "UIdeo Unified_Ideograph"),
|
||||
Property(UCHAR_UPPERCASE, "Upper Uppercase"),
|
||||
Property(UCHAR_WHITE_SPACE, "WSpace White_Space space"),
|
||||
Property(UCHAR_XID_CONTINUE, "XIDC XID_Continue"),
|
||||
Property(UCHAR_XID_START, "XIDS XID_Start"),
|
||||
Property(UCHAR_CASE_SENSITIVE, "Sensitive Case_Sensitive"),
|
||||
Property(UCHAR_S_TERM, "STerm STerm"),
|
||||
Property(UCHAR_VARIATION_SELECTOR, "VS Variation_Selector"),
|
||||
Property(UCHAR_NFD_INERT, "nfdinert NFD_Inert"),
|
||||
Property(UCHAR_NFKD_INERT, "nfkdinert NFKD_Inert"),
|
||||
Property(UCHAR_NFC_INERT, "nfcinert NFC_Inert"),
|
||||
Property(UCHAR_NFKC_INERT, "nfkcinert NFKC_Inert"),
|
||||
Property(UCHAR_SEGMENT_STARTER, "segstart Segment_Starter"),
|
||||
Property(UCHAR_PATTERN_SYNTAX, "Pat_Syn Pattern_Syntax"),
|
||||
Property(UCHAR_PATTERN_WHITE_SPACE, "Pat_WS Pattern_White_Space"),
|
||||
Property(UCHAR_POSIX_ALNUM, " alnum"),
|
||||
Property(UCHAR_POSIX_BLANK, " blank"),
|
||||
Property(UCHAR_POSIX_GRAPH, " graph"),
|
||||
Property(UCHAR_POSIX_PRINT, " print"),
|
||||
Property(UCHAR_POSIX_XDIGIT, " xdigit"),
|
||||
Property(UCHAR_CASED, "Cased Cased"),
|
||||
Property(UCHAR_CASE_IGNORABLE, "CI Case_Ignorable"),
|
||||
Property(UCHAR_CHANGES_WHEN_LOWERCASED, "CWL Changes_When_Lowercased"),
|
||||
Property(UCHAR_CHANGES_WHEN_UPPERCASED, "CWU Changes_When_Uppercased"),
|
||||
Property(UCHAR_CHANGES_WHEN_TITLECASED, "CWT Changes_When_Titlecased"),
|
||||
Property(UCHAR_CHANGES_WHEN_CASEFOLDED, "CWCF Changes_When_Casefolded"),
|
||||
Property(UCHAR_CHANGES_WHEN_CASEMAPPED, "CWCM Changes_When_Casemapped"),
|
||||
Property(UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED, "CWKCF Changes_When_NFKC_Casefolded"),
|
||||
Property(UCHAR_BIDI_CLASS, "bc Bidi_Class", VALUES_bc, VALUES_bc_COUNT),
|
||||
Property(UCHAR_BLOCK, "blk Block", VALUES_blk, VALUES_blk_COUNT),
|
||||
Property(UCHAR_CANONICAL_COMBINING_CLASS, "ccc Canonical_Combining_Class", VALUES_ccc, VALUES_ccc_COUNT),
|
||||
|
@ -1027,21 +1027,21 @@ const Property PROPERTIES[] = {
|
|||
Property(UCHAR_SENTENCE_BREAK, "SB Sentence_Break", VALUES_SB, VALUES_SB_COUNT),
|
||||
Property(UCHAR_WORD_BREAK, "WB Word_Break", VALUES_WB, VALUES_WB_COUNT),
|
||||
Property(UCHAR_GENERAL_CATEGORY_MASK, "gcm General_Category_Mask", VALUES_gcm, VALUES_gcm_COUNT),
|
||||
Property(UCHAR_NUMERIC_VALUE, "nv Numeric_Value", NULL, 0),
|
||||
Property(UCHAR_AGE, "age Age", NULL, 0),
|
||||
Property(UCHAR_BIDI_MIRRORING_GLYPH, "bmg Bidi_Mirroring_Glyph", NULL, 0),
|
||||
Property(UCHAR_CASE_FOLDING, "cf Case_Folding", NULL, 0),
|
||||
Property(UCHAR_ISO_COMMENT, "isc ISO_Comment", NULL, 0),
|
||||
Property(UCHAR_LOWERCASE_MAPPING, "lc Lowercase_Mapping", NULL, 0),
|
||||
Property(UCHAR_NAME, "na Name", NULL, 0),
|
||||
Property(UCHAR_SIMPLE_CASE_FOLDING, "scf Simple_Case_Folding sfc", NULL, 0),
|
||||
Property(UCHAR_SIMPLE_LOWERCASE_MAPPING, "slc Simple_Lowercase_Mapping", NULL, 0),
|
||||
Property(UCHAR_SIMPLE_TITLECASE_MAPPING, "stc Simple_Titlecase_Mapping", NULL, 0),
|
||||
Property(UCHAR_SIMPLE_UPPERCASE_MAPPING, "suc Simple_Uppercase_Mapping", NULL, 0),
|
||||
Property(UCHAR_TITLECASE_MAPPING, "tc Titlecase_Mapping", NULL, 0),
|
||||
Property(UCHAR_UNICODE_1_NAME, "na1 Unicode_1_Name", NULL, 0),
|
||||
Property(UCHAR_UPPERCASE_MAPPING, "uc Uppercase_Mapping", NULL, 0),
|
||||
Property(UCHAR_SCRIPT_EXTENSIONS, "scx Script_Extensions", NULL, 0),
|
||||
Property(UCHAR_NUMERIC_VALUE, "nv Numeric_Value"),
|
||||
Property(UCHAR_AGE, "age Age"),
|
||||
Property(UCHAR_BIDI_MIRRORING_GLYPH, "bmg Bidi_Mirroring_Glyph"),
|
||||
Property(UCHAR_CASE_FOLDING, "cf Case_Folding"),
|
||||
Property(UCHAR_ISO_COMMENT, "isc ISO_Comment"),
|
||||
Property(UCHAR_LOWERCASE_MAPPING, "lc Lowercase_Mapping"),
|
||||
Property(UCHAR_NAME, "na Name"),
|
||||
Property(UCHAR_SIMPLE_CASE_FOLDING, "scf Simple_Case_Folding sfc"),
|
||||
Property(UCHAR_SIMPLE_LOWERCASE_MAPPING, "slc Simple_Lowercase_Mapping"),
|
||||
Property(UCHAR_SIMPLE_TITLECASE_MAPPING, "stc Simple_Titlecase_Mapping"),
|
||||
Property(UCHAR_SIMPLE_UPPERCASE_MAPPING, "suc Simple_Uppercase_Mapping"),
|
||||
Property(UCHAR_TITLECASE_MAPPING, "tc Titlecase_Mapping"),
|
||||
Property(UCHAR_UNICODE_1_NAME, "na1 Unicode_1_Name"),
|
||||
Property(UCHAR_UPPERCASE_MAPPING, "uc Uppercase_Mapping"),
|
||||
Property(UCHAR_SCRIPT_EXTENSIONS, "scx Script_Extensions"),
|
||||
};
|
||||
|
||||
const int32_t MAX_ALIASES = 4;
|
||||
|
|
|
@ -55,15 +55,17 @@ U_NAMESPACE_USE
|
|||
// So we define a second constant and at runtime check that it's >=MAX_ALIASES.
|
||||
static const int32_t VALUE_MAX_ALIASES=4;
|
||||
|
||||
static const int32_t JOINED_ALIASES_CAPACITY=100;
|
||||
|
||||
class Value {
|
||||
public:
|
||||
Value(int32_t enumValue, const char *joinedAliases)
|
||||
: enumValue(enumValue), joinedAliases(joinedAliases), count(0) {
|
||||
if(uprv_strlen(joinedAliases)>=LENGTHOF(aliasesBuffer)) {
|
||||
if(uprv_strlen(joinedAliases)>=JOINED_ALIASES_CAPACITY) {
|
||||
fprintf(stderr,
|
||||
"genprops error: pnamesbuilder.cpp Value::Value(%ld, \"%s\"): "
|
||||
"joined aliases too long: make Value::aliasesBuffer[] larger, "
|
||||
"at least %ld\n",
|
||||
"joined aliases too long: "
|
||||
"increase JOINED_ALIASES_CAPACITY, to at least %ld\n",
|
||||
(long)enumValue, joinedAliases, uprv_strlen(joinedAliases)+1);
|
||||
exit(U_BUFFER_OVERFLOW_ERROR);
|
||||
}
|
||||
|
@ -76,14 +78,12 @@ public:
|
|||
do {
|
||||
aliases[count]=a;
|
||||
normalized[count++]=n;
|
||||
char c;
|
||||
while((c=*j)!=' ' && c!=0) {
|
||||
while((c=*j++)!=' ' && c!=0) {
|
||||
*a++=c;
|
||||
// Ignore delimiters '-' and '_'.
|
||||
if(!(c=='-' || c=='_')) {
|
||||
*n++=uprv_tolower(c);
|
||||
}
|
||||
++j;
|
||||
}
|
||||
*a++=0;
|
||||
*n++=0;
|
||||
|
@ -117,8 +117,8 @@ public:
|
|||
|
||||
int32_t enumValue;
|
||||
const char *joinedAliases;
|
||||
char aliasesBuffer[100];
|
||||
char normalizedBuffer[100]; // Same capacity as aliasesBuffer!
|
||||
char aliasesBuffer[JOINED_ALIASES_CAPACITY];
|
||||
char normalizedBuffer[JOINED_ALIASES_CAPACITY];
|
||||
const char *aliases[VALUE_MAX_ALIASES];
|
||||
const char *normalized[VALUE_MAX_ALIASES];
|
||||
int32_t count;
|
||||
|
@ -126,10 +126,13 @@ public:
|
|||
|
||||
class Property : public Value {
|
||||
public:
|
||||
// A property with a values array.
|
||||
Property(int32_t enumValue, const char *joinedAliases,
|
||||
const Value *values, int32_t valueCount)
|
||||
: Value(enumValue, joinedAliases),
|
||||
values(values), valueCount(valueCount) {}
|
||||
// A binary property (enumValue<UCHAR_BINARY_LIMIT), or one without values.
|
||||
Property(int32_t enumValue, const char *joinedAliases);
|
||||
|
||||
const Value *values;
|
||||
int32_t valueCount;
|
||||
|
@ -138,6 +141,11 @@ public:
|
|||
// *** Include the data header ***
|
||||
#include "pnames_data.h"
|
||||
|
||||
Property::Property(int32_t enumValue, const char *joinedAliases)
|
||||
: Value(enumValue, joinedAliases),
|
||||
values(enumValue<UCHAR_BINARY_LIMIT ? VALUES_binprop : NULL),
|
||||
valueCount(enumValue<UCHAR_BINARY_LIMIT ? VALUES_binprop_COUNT : 0) {}
|
||||
|
||||
// END DATA
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
|
@ -219,6 +227,12 @@ public:
|
|||
valueMaps.addElement(bytesTrieOffset, errorCode);
|
||||
buildValueMap(VALUES_binprop, VALUES_binprop_COUNT, errorCode);
|
||||
|
||||
// Note: It is slightly wasteful to store binary properties like all others.
|
||||
// Since we know that they are in the lowest range of property enum values
|
||||
// and share the same name group and BytesTrie,
|
||||
// we could just store those two indexes once.
|
||||
// (This would save 8 bytes per binary property, or about half a kilobyte.)
|
||||
|
||||
// Build the known-repeated canonical combining class properties once.
|
||||
int32_t cccValueMapOffset=valueMaps.size();
|
||||
bytesTrieOffset=buildValuesBytesTrie(VALUES_ccc, VALUES_ccc_COUNT, errorCode);
|
||||
|
@ -281,7 +295,6 @@ public:
|
|||
int32_t writeValueAliases(const Value &value, UErrorCode &errorCode) {
|
||||
int32_t nameOffset=uhash_geti(nameGroupToOffset, (void *)value.joinedAliases);
|
||||
if(nameOffset!=0) {
|
||||
printf("* duplicate joinedAliases: \"%s\"\n", value.joinedAliases);
|
||||
// The same list of aliases has been written already.
|
||||
return nameOffset-1; // Was incremented to reserve 0 for "not found".
|
||||
}
|
||||
|
@ -476,7 +489,7 @@ void
|
|||
PNamesBuilderImpl::writeCSourceFile(const char *path, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return; }
|
||||
FILE *f=usrc_createFromGenerator(path, "propname_data.h",
|
||||
"icu/tools/src/unicode/c/genprops/pnamesbuilder.cpp");
|
||||
"icu/tools/unicode/c/genprops/pnamesbuilder.cpp");
|
||||
if(f==NULL) {
|
||||
errorCode=U_FILE_ACCESS_ERROR;
|
||||
return; // usrc_create() reported an error.
|
||||
|
|
|
@ -1724,15 +1724,15 @@ def WritePNamesDataHeader(out_path):
|
|||
out_file.write("const Property PROPERTIES[] = {\n")
|
||||
for (enum, pname, values) in _icu_properties:
|
||||
prop = _properties[pname]
|
||||
aliases = prop[1]
|
||||
if values: # Property with named values.
|
||||
if prop[0] == "Binary": pname = "binprop"
|
||||
aliases = " ".join(prop[1])
|
||||
if prop[0] == "Binary":
|
||||
out_file.write(' Property(%s, "%s"),\n' % (enum, aliases))
|
||||
elif values: # Property with named values.
|
||||
out_file.write(
|
||||
' Property(%s, "%s", VALUES_%s, VALUES_%s_COUNT),\n' %
|
||||
(enum, " ".join(aliases), pname, pname))
|
||||
(enum, aliases, pname, pname))
|
||||
else:
|
||||
out_file.write(' Property(%s, "%s", NULL, 0),\n' %
|
||||
(enum, " ".join(aliases)))
|
||||
out_file.write(' Property(%s, "%s"),\n' % (enum, aliases))
|
||||
out_file.write("};\n\n")
|
||||
|
||||
out_file.write("const int32_t MAX_ALIASES = %d;\n" % max_aliases)
|
||||
|
|
Loading…
Add table
Reference in a new issue