ICU-10128 encode new properties bpt & bpb in ubidi.icu format version 2.1

X-SVN-Rev: 33557
This commit is contained in:
Markus Scherer 2013-04-26 23:45:27 +00:00
parent c6902ad571
commit dabb8350c7
4 changed files with 44 additions and 14 deletions

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2004-2012, International Business Machines
* Copyright (C) 2004-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -43,7 +43,7 @@ the udata API for loading ICU data. Especially, a UDataInfo structure
precedes the actual data. It contains platform properties values and the
file format version.
The following is a description of format version 2.0 .
The following is a description of format version 2.1 .
The file contains the following structures:
@ -80,7 +80,7 @@ Bits
12 is mirrored
11 Bidi_Control
10 Join_Control
9.. 8 reserved (set to 0)
9.. 8 Bidi_Paired_Bracket_Type(bpt) -- new in format version 2.1
7.. 5 Joining_Type
4.. 0 BiDi category
@ -120,6 +120,11 @@ containing the Joining_Group value.
All code points outside of this range have No_Joining_Group (0).
--- Changes in format version 2.1 ---
Addition of Bidi_Paired_Bracket_Type(bpt) values.
(Trie data bits 9..8 were reserved.)
--- Changes in format version 2 ---
Change from UTrie to UTrie2.
@ -140,7 +145,7 @@ static UDataInfo dataInfo={
/* dataFormat="BiDi" */
{ UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 },
{ 2, 0, 0, 0 }, /* formatVersion */
{ 2, 1, 0, 0 }, /* formatVersion */
{ 6, 0, 0, 0 } /* dataVersion */
};
@ -252,6 +257,15 @@ BiDiPropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
UChar32 start=props.start;
UChar32 end=props.end;
// The runtime code relies on this invariant for returning both bmg and bpb
// from the same data.
int32_t bpt=props.getIntProp(UCHAR_BIDI_PAIRED_BRACKET_TYPE);
if(!(bpt==0 ? props.bpb==U_SENTINEL : props.bpb==props.bmg)) {
fprintf(stderr,
"genprops error: invariant not true: "
"if(bpt==None) then bpb=<none> else bpb=bmg\n");
return;
}
int32_t delta=encodeBidiMirroringGlyph(start, end, props.bmg, errorCode);
uint32_t value=(uint32_t)delta<<UBIDI_MIRROR_DELTA_SHIFT;
if(props.binProps[UCHAR_BIDI_MIRRORED]) {
@ -263,6 +277,7 @@ BiDiPropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
if(props.binProps[UCHAR_JOIN_CONTROL]) {
value|=U_MASK(UBIDI_JOIN_CONTROL_SHIFT);
}
value|=(uint32_t)bpt<<UBIDI_BPT_SHIFT;
value|=(uint32_t)props.getIntProp(UCHAR_JOINING_TYPE)<<UBIDI_JT_SHIFT;
value|=(uint32_t)props.getIntProp(UCHAR_BIDI_CLASS);
utrie2_setRange32(pTrie, start, end, value, TRUE, &errorCode);

View file

@ -1,18 +1,18 @@
/**
* Copyright (C) 2002-2012, International Business Machines Corporation and
* Copyright (C) 2002-2013, International Business Machines Corporation and
* others. All Rights Reserved.
*
* machine-generated by: icu/tools/unicode/py/preparseucd.py
*/
#define UNICODE_VERSION { 6, 2, 0, 0 }
#define UNICODE_VERSION { 6, 3, 0, 0 }
static const Value VALUES_binprop[2] = {
Value(0, "N No F False"),
Value(1, "Y Yes T True"),
};
static const Value VALUES_bc[19] = {
static const Value VALUES_bc[23] = {
Value(U_LEFT_TO_RIGHT, "L Left_To_Right"),
Value(U_RIGHT_TO_LEFT, "R Right_To_Left"),
Value(U_EUROPEAN_NUMBER, "EN European_Number"),
@ -32,6 +32,10 @@ static const Value VALUES_bc[19] = {
Value(U_POP_DIRECTIONAL_FORMAT, "PDF Pop_Directional_Format"),
Value(U_DIR_NON_SPACING_MARK, "NSM Nonspacing_Mark"),
Value(U_BOUNDARY_NEUTRAL, "BN Boundary_Neutral"),
Value(U_FIRST_STRONG_ISOLATE, "FSI First_Strong_Isolate"),
Value(U_LEFT_TO_RIGHT_ISOLATE, "LRI Left_To_Right_Isolate"),
Value(U_RIGHT_TO_LEFT_ISOLATE, "RLI Right_To_Left_Isolate"),
Value(U_POP_DIRECTIONAL_ISOLATE, "PDI Pop_Directional_Isolate"),
};
static const Value VALUES_blk[221] = {
@ -848,7 +852,7 @@ static const Value VALUES_SB[15] = {
Value(U_SB_SCONTINUE, "SC SContinue"),
};
static const Value VALUES_WB[14] = {
static const Value VALUES_WB[17] = {
Value(U_WB_OTHER, "XX Other"),
Value(U_WB_ALETTER, "LE ALetter"),
Value(U_WB_FORMAT, "FO Format"),
@ -863,6 +867,15 @@ static const Value VALUES_WB[14] = {
Value(U_WB_MIDNUMLET, "MB MidNumLet"),
Value(U_WB_NEWLINE, "NL Newline"),
Value(U_WB_REGIONAL_INDICATOR, "RI Regional_Indicator"),
Value(U_WB_HEBREW_LETTER, "HL Hebrew_Letter"),
Value(U_WB_SINGLE_QUOTE, "SQ Single_Quote"),
Value(U_WB_DOUBLE_QUOTE, "DQ Double_Quote"),
};
static const Value VALUES_bpt[3] = {
Value(U_BPT_NONE, "n None"),
Value(U_BPT_OPEN, "o Open"),
Value(U_BPT_CLOSE, "c Close"),
};
static const Value VALUES_gcm[38] = {
@ -906,7 +919,7 @@ static const Value VALUES_gcm[38] = {
Value((int32_t)U_GC_ZS_MASK, "Zs Space_Separator"),
};
static const Property PROPERTIES[94] = {
static const Property PROPERTIES[96] = {
Property(UCHAR_ALPHABETIC, "Alpha Alphabetic"),
Property(UCHAR_ASCII_HEX_DIGIT, "AHex ASCII_Hex_Digit"),
Property(UCHAR_BIDI_CONTROL, "Bidi_C Bidi_Control"),
@ -964,7 +977,7 @@ static const Property PROPERTIES[94] = {
Property(UCHAR_CHANGES_WHEN_CASEFOLDED, "CWCF Changes_When_Casefolded"),
Property(UCHAR_CHANGES_WHEN_CASEMAPPED, "CWCM Changes_When_Casemapped"),
Property(UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED, "CWKCF Changes_When_NFKC_Casefolded"),
Property(UCHAR_BIDI_CLASS, "bc Bidi_Class", VALUES_bc, 19),
Property(UCHAR_BIDI_CLASS, "bc Bidi_Class", VALUES_bc, 23),
Property(UCHAR_BLOCK, "blk Block", VALUES_blk, 221),
Property(UCHAR_CANONICAL_COMBINING_CLASS, "ccc Canonical_Combining_Class", VALUES_ccc, 57),
Property(UCHAR_DECOMPOSITION_TYPE, "dt Decomposition_Type", VALUES_dt, 18),
@ -984,7 +997,8 @@ static const Property PROPERTIES[94] = {
Property(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS, "tccc Trail_Canonical_Combining_Class", VALUES_tccc, 57),
Property(UCHAR_GRAPHEME_CLUSTER_BREAK, "GCB Grapheme_Cluster_Break", VALUES_GCB, 13),
Property(UCHAR_SENTENCE_BREAK, "SB Sentence_Break", VALUES_SB, 15),
Property(UCHAR_WORD_BREAK, "WB Word_Break", VALUES_WB, 14),
Property(UCHAR_WORD_BREAK, "WB Word_Break", VALUES_WB, 17),
Property(UCHAR_BIDI_PAIRED_BRACKET_TYPE, "bpt Bidi_Paired_Bracket_Type", VALUES_bpt, 3),
Property(UCHAR_GENERAL_CATEGORY_MASK, "gcm General_Category_Mask", VALUES_gcm, 38),
Property(UCHAR_NUMERIC_VALUE, "nv Numeric_Value"),
Property(UCHAR_AGE, "age Age"),
@ -1000,6 +1014,7 @@ static const Property PROPERTIES[94] = {
Property(UCHAR_TITLECASE_MAPPING, "tc Titlecase_Mapping"),
Property(UCHAR_UNICODE_1_NAME, "na1 Unicode_1_Name"),
Property(UCHAR_UPPERCASE_MAPPING, "uc Uppercase_Mapping"),
Property(UCHAR_BIDI_PAIRED_BRACKET, "bpb Bidi_Paired_Bracket"),
Property(UCHAR_SCRIPT_EXTENSIONS, "scx Script_Extensions"),
};

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2000-2012, International Business Machines
* Copyright (C) 2000-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -705,7 +705,7 @@ UCAElements *readAnElement(FILE *data, tempUCATable *t, UCAConstants *consts, Le
u_versionToString(UCAVersion, uca);
u_versionToString(UCDVersion, ucd);
// Warning, not error, to permit bootstrapping during a version upgrade.
fprintf(stderr, "warning: UCA version %s != UCD version %s (temporarily change the FractionalUCA.txt UCA version during Unicode version upgrade)\n", uca, ucd);
fprintf(stderr, "warning: UCA version %s != UCD version %s\n", uca, ucd);
// *status = U_INVALID_FORMAT_ERROR;
// return NULL;
}

View file

@ -1781,7 +1781,7 @@ _ublock_re = re.compile(" *(UBLOCK_[0-9A-Z_]+) *= *[0-9]+,")
# Sample line to match:
# U_EA_AMBIGUOUS,
_prop_and_value_re = re.compile(
" *(U_(DT|EA|GCB|HST|LB|JG|JT|NT|SB|WB)_([0-9A-Z_]+))")
" *(U_(BPT|DT|EA|GCB|HST|LB|JG|JT|NT|SB|WB)_([0-9A-Z_]+))")
# Sample line to match if it has matched _prop_and_value_re
# (we want to exclude aliases):