ICU-12526 initial Unicode 9 data

X-SVN-Rev: 38698
This commit is contained in:
Markus Scherer 2016-05-04 23:54:37 +00:00
parent 36eb56695c
commit dbebd188e7
2 changed files with 46 additions and 17 deletions

View file

@ -5,7 +5,7 @@
* machine-generated by: icu/tools/unicode/py/preparseucd.py
*/
#define UNICODE_VERSION { 8, 0, 0, 0 }
#define UNICODE_VERSION { 9, 0, 0, 0 }
static const Value VALUES_binprop[2] = {
Value(0, "N No F False"),
@ -38,7 +38,7 @@ static const Value VALUES_bc[23] = {
Value(U_POP_DIRECTIONAL_ISOLATE, "PDI Pop_Directional_Isolate"),
};
static const Value VALUES_blk[263] = {
static const Value VALUES_blk[274] = {
Value(UBLOCK_NO_BLOCK, "NB No_Block"),
Value(UBLOCK_BASIC_LATIN, "ASCII Basic_Latin"),
Value(UBLOCK_LATIN_1_SUPPLEMENT, "Latin_1_Sup Latin_1_Supplement Latin_1"),
@ -302,6 +302,17 @@ static const Value VALUES_blk[263] = {
Value(UBLOCK_OLD_HUNGARIAN, "Old_Hungarian Old_Hungarian"),
Value(UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS, "Sup_Symbols_And_Pictographs Supplemental_Symbols_And_Pictographs"),
Value(UBLOCK_SUTTON_SIGNWRITING, "Sutton_SignWriting Sutton_SignWriting"),
Value(UBLOCK_ADLAM, "Adlam Adlam"),
Value(UBLOCK_BHAIKSUKI, "Bhaiksuki Bhaiksuki"),
Value(UBLOCK_CYRILLIC_EXTENDED_C, "Cyrillic_Ext_C Cyrillic_Extended_C"),
Value(UBLOCK_GLAGOLITIC_SUPPLEMENT, "Glagolitic_Sup Glagolitic_Supplement"),
Value(UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION, "Ideographic_Symbols Ideographic_Symbols_And_Punctuation"),
Value(UBLOCK_MARCHEN, "Marchen Marchen"),
Value(UBLOCK_MONGOLIAN_SUPPLEMENT, "Mongolian_Sup Mongolian_Supplement"),
Value(UBLOCK_NEWA, "Newa Newa"),
Value(UBLOCK_OSAGE, "Osage Osage"),
Value(UBLOCK_TANGUT, "Tangut Tangut"),
Value(UBLOCK_TANGUT_COMPONENTS, "Tangut_Components Tangut_Components"),
};
static const Value VALUES_ccc[57] = {
@ -427,7 +438,7 @@ static const Value VALUES_gc[30] = {
Value(U_FINAL_PUNCTUATION, "Pf Final_Punctuation"),
};
static const Value VALUES_jg[86] = {
static const Value VALUES_jg[89] = {
Value(U_JG_NO_JOINING_GROUP, "No_Joining_Group No_Joining_Group"),
Value(U_JG_AIN, "Ain Ain"),
Value(U_JG_ALAPH, "Alaph Alaph"),
@ -514,6 +525,9 @@ static const Value VALUES_jg[86] = {
Value(U_JG_MANICHAEAN_YODH, "Manichaean_Yodh Manichaean_Yodh"),
Value(U_JG_MANICHAEAN_ZAYIN, "Manichaean_Zayin Manichaean_Zayin"),
Value(U_JG_STRAIGHT_WAW, "Straight_Waw Straight_Waw"),
Value(U_JG_AFRICAN_FEH, "African_Feh African_Feh"),
Value(U_JG_AFRICAN_NOON, "African_Noon African_Noon"),
Value(U_JG_AFRICAN_QAF, "African_Qaf African_Qaf"),
};
static const Value VALUES_jt[6] = {
@ -525,7 +539,7 @@ static const Value VALUES_jt[6] = {
Value(U_JT_TRANSPARENT, "T Transparent"),
};
static const Value VALUES_lb[40] = {
static const Value VALUES_lb[43] = {
Value(U_LB_UNKNOWN, "XX Unknown"),
Value(U_LB_AMBIGUOUS, "AI Ambiguous"),
Value(U_LB_ALPHABETIC, "AL Alphabetic"),
@ -566,6 +580,9 @@ static const Value VALUES_lb[40] = {
Value(U_LB_CONDITIONAL_JAPANESE_STARTER, "CJ Conditional_Japanese_Starter"),
Value(U_LB_HEBREW_LETTER, "HL Hebrew_Letter"),
Value(U_LB_REGIONAL_INDICATOR, "RI Regional_Indicator"),
Value(U_LB_E_BASE, "EB E_Base"),
Value(U_LB_E_MODIFIER, "EM E_Modifier"),
Value(U_LB_ZWJ, "ZWJ ZWJ"),
};
static const Value VALUES_nt[4] = {
@ -730,7 +747,7 @@ static const Value VALUES_sc[175] = {
Value(USCRIPT_SHARADA, "Shrd Sharada"),
Value(USCRIPT_SORA_SOMPENG, "Sora Sora_Sompeng"),
Value(USCRIPT_TAKRI, "Takr Takri"),
Value(USCRIPT_TANGUT, "Tang Tang"),
Value(USCRIPT_TANGUT, "Tang Tangut"),
Value(USCRIPT_WOLEAI, "Wole Wole"),
Value(USCRIPT_ANATOLIAN_HIEROGLYPHS, "Hluw Anatolian_Hieroglyphs"),
Value(USCRIPT_KHOJKI, "Khoj Khojki"),
@ -904,7 +921,7 @@ static const Value VALUES_tccc[57] = {
Value(240, "IS Iota_Subscript"),
};
static const Value VALUES_GCB[13] = {
static const Value VALUES_GCB[18] = {
Value(U_GCB_OTHER, "XX Other"),
Value(U_GCB_CONTROL, "CN Control"),
Value(U_GCB_CR, "CR CR"),
@ -918,6 +935,11 @@ static const Value VALUES_GCB[13] = {
Value(U_GCB_SPACING_MARK, "SM SpacingMark"),
Value(U_GCB_PREPEND, "PP Prepend"),
Value(U_GCB_REGIONAL_INDICATOR, "RI Regional_Indicator"),
Value(U_GCB_E_BASE, "EB E_Base"),
Value(U_GCB_E_BASE_GAZ, "EBG E_Base_GAZ"),
Value(U_GCB_E_MODIFIER, "EM E_Modifier"),
Value(U_GCB_GLUE_AFTER_ZWJ, "GAZ Glue_After_Zwj"),
Value(U_GCB_ZWJ, "ZWJ ZWJ"),
};
static const Value VALUES_SB[15] = {
@ -938,7 +960,7 @@ static const Value VALUES_SB[15] = {
Value(U_SB_SCONTINUE, "SC SContinue"),
};
static const Value VALUES_WB[17] = {
static const Value VALUES_WB[22] = {
Value(U_WB_OTHER, "XX Other"),
Value(U_WB_ALETTER, "LE ALetter"),
Value(U_WB_FORMAT, "FO Format"),
@ -956,6 +978,11 @@ static const Value VALUES_WB[17] = {
Value(U_WB_HEBREW_LETTER, "HL Hebrew_Letter"),
Value(U_WB_SINGLE_QUOTE, "SQ Single_Quote"),
Value(U_WB_DOUBLE_QUOTE, "DQ Double_Quote"),
Value(U_WB_E_BASE, "EB E_Base"),
Value(U_WB_E_BASE_GAZ, "EBG E_Base_GAZ"),
Value(U_WB_E_MODIFIER, "EM E_Modifier"),
Value(U_WB_GLUE_AFTER_ZWJ, "GAZ Glue_After_Zwj"),
Value(U_WB_ZWJ, "ZWJ ZWJ"),
};
static const Value VALUES_bpt[3] = {
@ -1041,7 +1068,7 @@ static const Property PROPERTIES[100] = {
Property(UCHAR_XID_CONTINUE, "XIDC XID_Continue"),
Property(UCHAR_XID_START, "XIDS XID_Start"),
Property(UCHAR_CASE_SENSITIVE, "Sensitive Case_Sensitive"),
Property(UCHAR_S_TERM, "STerm STerm"),
Property(UCHAR_S_TERM, "STerm Sentence_Terminal"),
Property(UCHAR_VARIATION_SELECTOR, "VS Variation_Selector"),
Property(UCHAR_NFD_INERT, "nfdinert NFD_Inert"),
Property(UCHAR_NFKD_INERT, "nfkdinert NFKD_Inert"),
@ -1068,14 +1095,14 @@ static const Property PROPERTIES[100] = {
Property(UCHAR_EMOJI_MODIFIER, "Emoji_Modifier Emoji_Modifier"),
Property(UCHAR_EMOJI_MODIFIER_BASE, "Emoji_Modifier_Base Emoji_Modifier_Base"),
Property(UCHAR_BIDI_CLASS, "bc Bidi_Class", VALUES_bc, 23),
Property(UCHAR_BLOCK, "blk Block", VALUES_blk, 263),
Property(UCHAR_BLOCK, "blk Block", VALUES_blk, 274),
Property(UCHAR_CANONICAL_COMBINING_CLASS, "ccc Canonical_Combining_Class", VALUES_ccc, 57),
Property(UCHAR_DECOMPOSITION_TYPE, "dt Decomposition_Type", VALUES_dt, 18),
Property(UCHAR_EAST_ASIAN_WIDTH, "ea East_Asian_Width", VALUES_ea, 6),
Property(UCHAR_GENERAL_CATEGORY, "gc General_Category", VALUES_gc, 30),
Property(UCHAR_JOINING_GROUP, "jg Joining_Group", VALUES_jg, 86),
Property(UCHAR_JOINING_GROUP, "jg Joining_Group", VALUES_jg, 89),
Property(UCHAR_JOINING_TYPE, "jt Joining_Type", VALUES_jt, 6),
Property(UCHAR_LINE_BREAK, "lb Line_Break", VALUES_lb, 40),
Property(UCHAR_LINE_BREAK, "lb Line_Break", VALUES_lb, 43),
Property(UCHAR_NUMERIC_TYPE, "nt Numeric_Type", VALUES_nt, 4),
Property(UCHAR_SCRIPT, "sc Script", VALUES_sc, 175),
Property(UCHAR_HANGUL_SYLLABLE_TYPE, "hst Hangul_Syllable_Type", VALUES_hst, 6),
@ -1085,9 +1112,9 @@ static const Property PROPERTIES[100] = {
Property(UCHAR_NFKC_QUICK_CHECK, "NFKC_QC NFKC_Quick_Check", VALUES_NFKC_QC, 3),
Property(UCHAR_LEAD_CANONICAL_COMBINING_CLASS, "lccc Lead_Canonical_Combining_Class", VALUES_lccc, 57),
Property(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS, "tccc Trail_Canonical_Combining_Class", VALUES_tccc, 57),
Property(UCHAR_GRAPHEME_CLUSTER_BREAK, "GCB Grapheme_Cluster_Break", VALUES_GCB, 13),
Property(UCHAR_GRAPHEME_CLUSTER_BREAK, "GCB Grapheme_Cluster_Break", VALUES_GCB, 18),
Property(UCHAR_SENTENCE_BREAK, "SB Sentence_Break", VALUES_SB, 15),
Property(UCHAR_WORD_BREAK, "WB Word_Break", VALUES_WB, 17),
Property(UCHAR_WORD_BREAK, "WB Word_Break", VALUES_WB, 22),
Property(UCHAR_BIDI_PAIRED_BRACKET_TYPE, "bpt Bidi_Paired_Bracket_Type", VALUES_bpt, 3),
Property(UCHAR_GENERAL_CATEGORY_MASK, "gcm General_Category_Mask", VALUES_gcm, 38),
Property(UCHAR_NUMERIC_VALUE, "nv Numeric_Value"),

View file

@ -49,11 +49,11 @@ _current_year = datetime.date.today().strftime("%Y")
_scripts_only_in_iso15924 = (
"Afak", "Blis", "Cirt", "Cyrs",
"Egyd", "Egyh", "Geok",
"Hans", "Hant",
"Inds", "Jpan", "Jurc", "Kore", "Kpel", "Latf", "Latg", "Loma",
"Hanb", "Hans", "Hant",
"Inds", "Jamo", "Jpan", "Jurc", "Kore", "Kpel", "Latf", "Latg", "Loma",
"Maya", "Moon", "Nkgb", "Nshu", "Phlv", "Roro",
"Sara", "Syre", "Syrj", "Syrn",
"Tang", "Teng", "Visp", "Wole", "Zmth", "Zsym", "Zxxx"
"Teng", "Visp", "Wole", "Zmth", "Zsye", "Zsym", "Zxxx"
)
# Properties --------------------------------------------------------------- ***
@ -654,7 +654,9 @@ def ParseUnicodeData(in_file):
range_first = -1
# Remember algorithmic name ranges.
if "Ideograph" in name:
_alg_names_ranges.append([c, end, "han", "CJK UNIFIED IDEOGRAPH-"])
prefix = "CJK UNIFIED IDEOGRAPH-"
if c == 0x17000: prefix = "TANGUT IDEOGRAPH-"
_alg_names_ranges.append([c, end, "han", prefix])
elif name == "Hangul Syllable":
_alg_names_ranges.append([c, end, "hangul"])
name = ""