mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-07 14:31:31 +00:00
ICU-11574 Unicode 8 updates
X-SVN-Rev: 37353
This commit is contained in:
parent
90b2bf6959
commit
99c4dfa565
4 changed files with 61 additions and 33 deletions
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2012, International Business Machines
|
||||
* Copyright (C) 1999-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -431,7 +431,7 @@ CorePropsBuilder::setGcAndNumeric(const UniProps &props, const UnicodeSet &newVa
|
|||
}
|
||||
|
||||
int32_t ntv=UPROPS_NTV_NONE; // numeric type & value
|
||||
if(nvString!=NULL) {
|
||||
if(nvString!=NULL && uprv_strcmp(nvString, "NaN")!=0) {
|
||||
int32_t digitValue=props.digitValue;
|
||||
if( type<=U_NT_NONE || U_NT_NUMERIC<type ||
|
||||
((type==U_NT_DECIMAL || type==U_NT_DIGIT) && digitValue<0)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2012, International Business Machines
|
||||
* Copyright (C) 1999-2015, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -129,7 +129,7 @@
|
|||
#include "unewdata.h"
|
||||
#include "uoptions.h"
|
||||
|
||||
#define STRING_STORE_SIZE 1000000
|
||||
#define STRING_STORE_SIZE 2000000
|
||||
#define GROUP_STORE_SIZE 5000
|
||||
|
||||
#define GROUP_SHIFT 5
|
||||
|
@ -1097,7 +1097,7 @@ allocLine(int32_t length) {
|
|||
uint8_t *p;
|
||||
|
||||
if(top>wordBottom) {
|
||||
fprintf(stderr, "gennames: out of memory\n");
|
||||
fprintf(stderr, "gennames allocLine(): out of memory\n");
|
||||
exit(U_MEMORY_ALLOCATION_ERROR);
|
||||
}
|
||||
p=stringStore+lineTop;
|
||||
|
@ -1110,7 +1110,7 @@ allocWord(uint32_t length) {
|
|||
uint32_t bottom=wordBottom-length;
|
||||
|
||||
if(lineTop>bottom) {
|
||||
fprintf(stderr, "gennames: out of memory\n");
|
||||
fprintf(stderr, "gennames allocWord(): out of memory\n");
|
||||
exit(U_MEMORY_ALLOCATION_ERROR);
|
||||
}
|
||||
wordBottom=bottom;
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
/**
|
||||
* Copyright (C) 2002-2014, International Business Machines Corporation and
|
||||
* Copyright (C) 2002-2015, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*
|
||||
* machine-generated by: icu/tools/unicode/py/preparseucd.py
|
||||
*/
|
||||
|
||||
#define UNICODE_VERSION { 7, 0, 0, 0 }
|
||||
#define UNICODE_VERSION { 8, 0, 0, 0 }
|
||||
|
||||
static const Value VALUES_binprop[2] = {
|
||||
Value(0, "N No F False"),
|
||||
|
@ -38,7 +38,7 @@ static const Value VALUES_bc[23] = {
|
|||
Value(U_POP_DIRECTIONAL_ISOLATE, "PDI Pop_Directional_Isolate"),
|
||||
};
|
||||
|
||||
static const Value VALUES_blk[253] = {
|
||||
static const Value VALUES_blk[263] = {
|
||||
Value(UBLOCK_NO_BLOCK, "NB No_Block"),
|
||||
Value(UBLOCK_BASIC_LATIN, "ASCII Basic_Latin"),
|
||||
Value(UBLOCK_LATIN_1_SUPPLEMENT, "Latin_1_Sup Latin_1_Supplement Latin_1"),
|
||||
|
@ -292,6 +292,16 @@ static const Value VALUES_blk[253] = {
|
|||
Value(UBLOCK_SUPPLEMENTAL_ARROWS_C, "Sup_Arrows_C Supplemental_Arrows_C"),
|
||||
Value(UBLOCK_TIRHUTA, "Tirhuta Tirhuta"),
|
||||
Value(UBLOCK_WARANG_CITI, "Warang_Citi Warang_Citi"),
|
||||
Value(UBLOCK_AHOM, "Ahom Ahom"),
|
||||
Value(UBLOCK_ANATOLIAN_HIEROGLYPHS, "Anatolian_Hieroglyphs Anatolian_Hieroglyphs"),
|
||||
Value(UBLOCK_CHEROKEE_SUPPLEMENT, "Cherokee_Sup Cherokee_Supplement"),
|
||||
Value(UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E, "CJK_Ext_E CJK_Unified_Ideographs_Extension_E"),
|
||||
Value(UBLOCK_EARLY_DYNASTIC_CUNEIFORM, "Early_Dynastic_Cuneiform Early_Dynastic_Cuneiform"),
|
||||
Value(UBLOCK_HATRAN, "Hatran Hatran"),
|
||||
Value(UBLOCK_MULTANI, "Multani Multani"),
|
||||
Value(UBLOCK_OLD_HUNGARIAN, "Old_Hungarian Old_Hungarian"),
|
||||
Value(UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS, "Sup_Symbols_And_Pictographs Supplemental_Symbols_And_Pictographs"),
|
||||
Value(UBLOCK_SUTTON_SIGNWRITING, "Sutton_SignWriting Sutton_SignWriting"),
|
||||
};
|
||||
|
||||
static const Value VALUES_ccc[57] = {
|
||||
|
@ -642,7 +652,7 @@ static const Value VALUES_sc[167] = {
|
|||
Value(USCRIPT_SIMPLIFIED_HAN, "Hans Hans"),
|
||||
Value(USCRIPT_TRADITIONAL_HAN, "Hant Hant"),
|
||||
Value(USCRIPT_PAHAWH_HMONG, "Hmng Pahawh_Hmong"),
|
||||
Value(USCRIPT_OLD_HUNGARIAN, "Hung Hung"),
|
||||
Value(USCRIPT_OLD_HUNGARIAN, "Hung Old_Hungarian"),
|
||||
Value(USCRIPT_HARAPPAN_INDUS, "Inds Inds"),
|
||||
Value(USCRIPT_JAVANESE, "Java Javanese"),
|
||||
Value(USCRIPT_KAYAH_LI, "Kali Kayah_Li"),
|
||||
|
@ -678,7 +688,7 @@ static const Value VALUES_sc[167] = {
|
|||
Value(USCRIPT_OL_CHIKI, "Olck Ol_Chiki"),
|
||||
Value(USCRIPT_REJANG, "Rjng Rejang"),
|
||||
Value(USCRIPT_SAURASHTRA, "Saur Saurashtra"),
|
||||
Value(USCRIPT_SIGN_WRITING, "Sgnw Sgnw"),
|
||||
Value(USCRIPT_SIGN_WRITING, "Sgnw SignWriting"),
|
||||
Value(USCRIPT_SUNDANESE, "Sund Sundanese"),
|
||||
Value(USCRIPT_MOON, "Moon Moon"),
|
||||
Value(USCRIPT_MEITEI_MAYEK, "Mtei Meetei_Mayek"),
|
||||
|
@ -701,7 +711,7 @@ static const Value VALUES_sc[167] = {
|
|||
Value(USCRIPT_NAKHI_GEBA, "Nkgb Nkgb"),
|
||||
Value(USCRIPT_OLD_SOUTH_ARABIAN, "Sarb Old_South_Arabian"),
|
||||
Value(USCRIPT_BASSA_VAH, "Bass Bassa_Vah"),
|
||||
Value(USCRIPT_DUPLOYAN_SHORTAND, "Dupl Duployan"),
|
||||
Value(USCRIPT_DUPLOYAN, "Dupl Duployan"),
|
||||
Value(USCRIPT_ELBASAN, "Elba Elbasan"),
|
||||
Value(USCRIPT_GRANTHA, "Gran Grantha"),
|
||||
Value(USCRIPT_KPELLE, "Kpel Kpel"),
|
||||
|
@ -722,15 +732,15 @@ static const Value VALUES_sc[167] = {
|
|||
Value(USCRIPT_TAKRI, "Takr Takri"),
|
||||
Value(USCRIPT_TANGUT, "Tang Tang"),
|
||||
Value(USCRIPT_WOLEAI, "Wole Wole"),
|
||||
Value(USCRIPT_ANATOLIAN_HIEROGLYPHS, "Hluw Hluw"),
|
||||
Value(USCRIPT_ANATOLIAN_HIEROGLYPHS, "Hluw Anatolian_Hieroglyphs"),
|
||||
Value(USCRIPT_KHOJKI, "Khoj Khojki"),
|
||||
Value(USCRIPT_TIRHUTA, "Tirh Tirhuta"),
|
||||
Value(USCRIPT_CAUCASIAN_ALBANIAN, "Aghb Caucasian_Albanian"),
|
||||
Value(USCRIPT_MAHAJANI, "Mahj Mahajani"),
|
||||
Value(USCRIPT_AHOM, "Ahom Ahom"),
|
||||
Value(USCRIPT_HATRAN, "Hatr Hatr"),
|
||||
Value(USCRIPT_HATRAN, "Hatr Hatran"),
|
||||
Value(USCRIPT_MODI, "Modi Modi"),
|
||||
Value(USCRIPT_MULTANI, "Mult Mult"),
|
||||
Value(USCRIPT_MULTANI, "Mult Multani"),
|
||||
Value(USCRIPT_PAU_CIN_HAU, "Pauc Pau_Cin_Hau"),
|
||||
Value(USCRIPT_SIDDHAM, "Sidd Siddham"),
|
||||
};
|
||||
|
@ -1046,7 +1056,7 @@ static const Property PROPERTIES[96] = {
|
|||
Property(UCHAR_CHANGES_WHEN_CASEMAPPED, "CWCM Changes_When_Casemapped"),
|
||||
Property(UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED, "CWKCF Changes_When_NFKC_Casefolded"),
|
||||
Property(UCHAR_BIDI_CLASS, "bc Bidi_Class", VALUES_bc, 23),
|
||||
Property(UCHAR_BLOCK, "blk Block", VALUES_blk, 253),
|
||||
Property(UCHAR_BLOCK, "blk Block", VALUES_blk, 263),
|
||||
Property(UCHAR_CANONICAL_COMBINING_CLASS, "ccc Canonical_Combining_Class", VALUES_ccc, 57),
|
||||
Property(UCHAR_DECOMPOSITION_TYPE, "dt Decomposition_Type", VALUES_dt, 18),
|
||||
Property(UCHAR_EAST_ASIAN_WIDTH, "ea East_Asian_Width", VALUES_ea, 6),
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2009-2014 International Business Machines
|
||||
# Copyright (c) 2009-2015 International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
# file name: preparseucd.py
|
||||
|
@ -47,12 +47,12 @@ _current_year = datetime.date.today().strftime("%Y")
|
|||
# Script codes from ISO 15924 http://www.unicode.org/iso15924/codechanges.html
|
||||
# that are not yet in the UCD.
|
||||
_scripts_only_in_iso15924 = (
|
||||
"Afak", "Ahom", "Blis", "Cirt", "Cyrs",
|
||||
"Afak", "Blis", "Cirt", "Cyrs",
|
||||
"Egyd", "Egyh", "Geok",
|
||||
"Hans", "Hant", "Hatr", "Hluw", "Hung",
|
||||
"Hans", "Hant",
|
||||
"Inds", "Jpan", "Jurc", "Kore", "Kpel", "Latf", "Latg", "Loma",
|
||||
"Maya", "Moon", "Mult", "Nkgb", "Nshu", "Phlv", "Roro",
|
||||
"Sara", "Sgnw", "Syre", "Syrj", "Syrn",
|
||||
"Maya", "Moon", "Nkgb", "Nshu", "Phlv", "Roro",
|
||||
"Sara", "Syre", "Syrj", "Syrn",
|
||||
"Tang", "Teng", "Visp", "Wole", "Zmth", "Zsym", "Zxxx"
|
||||
)
|
||||
|
||||
|
@ -680,6 +680,23 @@ def ParseUnicodeData(in_file):
|
|||
if (decimal and decimal != nv) or (digit and digit != nv):
|
||||
raise SyntaxError("error: numeric values differ at\n %s\n" % line)
|
||||
if nv:
|
||||
# Map improper fractions to proper ones.
|
||||
# U+109F7 MEROITIC CURSIVE FRACTION TWO TWELFTHS
|
||||
# .. U+109FF MEROITIC CURSIVE FRACTION TEN TWELFTHS
|
||||
if nv == "2/12":
|
||||
nv = "1/6"
|
||||
elif nv == "3/12":
|
||||
nv = "1/4"
|
||||
elif nv == "4/12":
|
||||
nv = "1/3"
|
||||
elif nv == "6/12":
|
||||
nv = "1/2"
|
||||
elif nv == "8/12":
|
||||
nv = "2/3"
|
||||
elif nv == "9/12":
|
||||
nv = "3/4"
|
||||
elif nv == "10/12":
|
||||
nv = "5/6"
|
||||
props["nv"] = nv
|
||||
props["nt"] = "De" if decimal else "Di" if digit else "Nu"
|
||||
if fields[9] == "Y": props["Bidi_M"] = True
|
||||
|
@ -773,7 +790,7 @@ def ParseDerivedJoiningGroup(in_file): ParseOneProperty(in_file, "jg")
|
|||
def ParseDerivedJoiningType(in_file): ParseOneProperty(in_file, "jt")
|
||||
def ParseEastAsianWidth(in_file): ParseOneProperty(in_file, "ea")
|
||||
def ParseGraphemeBreakProperty(in_file): ParseOneProperty(in_file, "GCB")
|
||||
def ParseIndicMatraCategory(in_file): ParseOneProperty(in_file, "InMC")
|
||||
def ParseIndicPositionalCategory(in_file): ParseOneProperty(in_file, "InPC")
|
||||
def ParseIndicSyllabicCategory(in_file): ParseOneProperty(in_file, "InSC")
|
||||
def ParseLineBreak(in_file): ParseOneProperty(in_file, "lb")
|
||||
def ParseScripts(in_file): ParseOneProperty(in_file, "sc")
|
||||
|
@ -824,8 +841,8 @@ def NeedToSetNumericValue(nv, start, end, c_props):
|
|||
assert "nt" not in c_props
|
||||
return True
|
||||
if nv != c_nv:
|
||||
raise ValueError("UnicodeData.txt has nv=%s for %04lX..%04lX " +
|
||||
"but DerivedNumericValues.txt has nv=%s" %
|
||||
raise ValueError(("UnicodeData.txt has nv=%s for %04lX..%04lX " +
|
||||
"but DerivedNumericValues.txt has nv=%s") %
|
||||
(c_nv, start, end, nv))
|
||||
return False
|
||||
|
||||
|
@ -920,31 +937,32 @@ def CompactBlock(b, i):
|
|||
assert b[0] == _starts[i]
|
||||
orig_i = i
|
||||
# Count the number of occurrences of each property's value in this block.
|
||||
num_cp_so_far = 0
|
||||
# To minimize the output, count the number of ranges,
|
||||
# not the number of code points.
|
||||
num_ranges_so_far = 0
|
||||
prop_counters = {}
|
||||
while True:
|
||||
start = _starts[i]
|
||||
if start > b[1]: break
|
||||
num_cp_in_this_range = _starts[i + 1] - start
|
||||
props = _props[i]
|
||||
for (pname, value) in props.iteritems():
|
||||
if pname in prop_counters:
|
||||
counter = prop_counters[pname]
|
||||
else:
|
||||
counter = {_null_or_defaults[pname]: num_cp_so_far}
|
||||
counter = {_null_or_defaults[pname]: num_ranges_so_far}
|
||||
prop_counters[pname] = counter
|
||||
if value in counter:
|
||||
counter[value] += num_cp_in_this_range
|
||||
counter[value] += 1
|
||||
else:
|
||||
counter[value] = num_cp_in_this_range
|
||||
counter[value] = 1
|
||||
# Also count default values for properties that do not occur in a range.
|
||||
for pname in prop_counters:
|
||||
if pname not in props:
|
||||
counter = prop_counters[pname]
|
||||
value = _null_or_defaults[pname]
|
||||
counter[value] += num_cp_in_this_range
|
||||
num_cp_so_far += num_cp_in_this_range
|
||||
# Invariant: For each counter, the sum of counts must equal num_cp_so_far.
|
||||
counter[value] += 1
|
||||
num_ranges_so_far += 1
|
||||
# Invariant: For each counter, the sum of counts must equal num_ranges_so_far.
|
||||
i += 1
|
||||
# For each property that occurs within this block,
|
||||
# set the most common value as a block property value.
|
||||
|
@ -1519,7 +1537,7 @@ _files = {
|
|||
"EastAsianWidth.txt": (DontCopy, ParseEastAsianWidth),
|
||||
"GraphemeBreakProperty.txt": (DontCopy, ParseGraphemeBreakProperty),
|
||||
"GraphemeBreakTest.txt": (PrependBOM, "testdata"),
|
||||
"IndicMatraCategory.txt": (DontCopy, ParseIndicMatraCategory),
|
||||
"IndicPositionalCategory.txt": (DontCopy, ParseIndicPositionalCategory),
|
||||
"IndicSyllabicCategory.txt": (DontCopy, ParseIndicSyllabicCategory),
|
||||
"LineBreak.txt": (DontCopy, ParseLineBreak),
|
||||
"LineBreakTest.txt": (PrependBOM, "testdata"),
|
||||
|
|
Loading…
Add table
Reference in a new issue